diff options
Diffstat (limited to 'net/sched/sch_fq_pie.c')
| -rw-r--r-- | net/sched/sch_fq_pie.c | 124 |
1 files changed, 78 insertions, 46 deletions
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 6980796d435d..7b96bc3ff891 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -10,6 +10,7 @@ */ #include <linux/jhash.h> +#include <linux/module.h> #include <linux/sizes.h> #include <linux/vmalloc.h> #include <net/pkt_cls.h> @@ -61,6 +62,7 @@ struct fq_pie_sched_data { struct pie_params p_params; u32 ecn_prob; u32 flows_cnt; + u32 flows_cursor; u32 quantum; u32 memory_limit; u32 new_flow_count; @@ -128,6 +130,7 @@ static inline void flow_queue_add(struct fq_pie_flow *flow, static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_OVERLIMIT; struct fq_pie_sched_data *q = qdisc_priv(sch); struct fq_pie_flow *sel_flow; int ret; @@ -159,6 +162,8 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->overmemory++; } + reason = SKB_DROP_REASON_QDISC_CONGESTED; + if (!pie_drop_early(sch, &q->p_params, &sel_flow->vars, sel_flow->backlog, skb->len)) { enqueue = true; @@ -196,11 +201,15 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, out: q->stats.dropped++; sel_flow->vars.accu_prob = 0; - __qdisc_drop(skb, to_free); - qdisc_qstats_drop(sch); + qdisc_drop_reason(skb, sch, to_free, reason); return NET_XMIT_CN; } +static const struct netlink_range_validation fq_pie_q_range = { + .min = 1, + .max = 1 << 20, +}; + static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = { [TCA_FQ_PIE_LIMIT] = {.type = NLA_U32}, [TCA_FQ_PIE_FLOWS] = {.type = NLA_U32}, @@ -208,7 +217,8 @@ static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = { [TCA_FQ_PIE_TUPDATE] = {.type = NLA_U32}, [TCA_FQ_PIE_ALPHA] = {.type = NLA_U32}, [TCA_FQ_PIE_BETA] = {.type = NLA_U32}, - [TCA_FQ_PIE_QUANTUM] = {.type = NLA_U32}, + [TCA_FQ_PIE_QUANTUM] = + NLA_POLICY_FULL_RANGE(NLA_U32, &fq_pie_q_range), [TCA_FQ_PIE_MEMORY_LIMIT] = {.type = NLA_U32}, [TCA_FQ_PIE_ECN_PROB] = {.type = NLA_U32}, [TCA_FQ_PIE_ECN] = {.type = NLA_U32}, @@ -277,10 +287,9 @@ begin: static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + unsigned int dropped_pkts = 0, dropped_bytes = 0; struct fq_pie_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_PIE_MAX + 1]; - unsigned int len_dropped = 0; - unsigned int num_dropped = 0; int err; err = nla_parse_nested(tb, TCA_FQ_PIE_MAX, opt, fq_pie_policy, extack); @@ -291,8 +300,8 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_FQ_PIE_LIMIT]) { u32 limit = nla_get_u32(tb[TCA_FQ_PIE_LIMIT]); - q->p_params.limit = limit; - sch->limit = limit; + WRITE_ONCE(q->p_params.limit, limit); + WRITE_ONCE(sch->limit, limit); } if (tb[TCA_FQ_PIE_FLOWS]) { if (q->flows) { @@ -314,49 +323,58 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, u32 target = nla_get_u32(tb[TCA_FQ_PIE_TARGET]); /* convert to pschedtime */ - q->p_params.target = - PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC); + WRITE_ONCE(q->p_params.target, + PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC)); } /* tupdate is in jiffies */ if (tb[TCA_FQ_PIE_TUPDATE]) - q->p_params.tupdate = - usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE])); + WRITE_ONCE(q->p_params.tupdate, + usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE]))); if (tb[TCA_FQ_PIE_ALPHA]) - q->p_params.alpha = nla_get_u32(tb[TCA_FQ_PIE_ALPHA]); + WRITE_ONCE(q->p_params.alpha, + nla_get_u32(tb[TCA_FQ_PIE_ALPHA])); if (tb[TCA_FQ_PIE_BETA]) - q->p_params.beta = nla_get_u32(tb[TCA_FQ_PIE_BETA]); + WRITE_ONCE(q->p_params.beta, + nla_get_u32(tb[TCA_FQ_PIE_BETA])); if (tb[TCA_FQ_PIE_QUANTUM]) - q->quantum = nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]); + WRITE_ONCE(q->quantum, nla_get_u32(tb[TCA_FQ_PIE_QUANTUM])); if (tb[TCA_FQ_PIE_MEMORY_LIMIT]) - q->memory_limit = nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]); + WRITE_ONCE(q->memory_limit, + nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT])); if (tb[TCA_FQ_PIE_ECN_PROB]) - q->ecn_prob = nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]); + WRITE_ONCE(q->ecn_prob, + nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB])); if (tb[TCA_FQ_PIE_ECN]) - q->p_params.ecn = nla_get_u32(tb[TCA_FQ_PIE_ECN]); + WRITE_ONCE(q->p_params.ecn, + nla_get_u32(tb[TCA_FQ_PIE_ECN])); if (tb[TCA_FQ_PIE_BYTEMODE]) - q->p_params.bytemode = nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]); + WRITE_ONCE(q->p_params.bytemode, + nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE])); if (tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]) - q->p_params.dq_rate_estimator = - nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]); + WRITE_ONCE(q->p_params.dq_rate_estimator, + nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR])); /* Drop excess packets if new limit is lower */ while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = fq_pie_qdisc_dequeue(sch); + struct sk_buff *skb = qdisc_dequeue_internal(sch, false); + + if (!skb) + break; - len_dropped += qdisc_pkt_len(skb); - num_dropped += 1; + dropped_pkts++; + dropped_bytes += qdisc_pkt_len(skb); rtnl_kfree_skbs(skb, skb); } - qdisc_tree_reduce_backlog(sch, num_dropped, len_dropped); + qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return 0; @@ -368,23 +386,35 @@ flow_error: static void fq_pie_timer(struct timer_list *t) { - struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer); + struct fq_pie_sched_data *q = timer_container_of(q, t, adapt_timer); + unsigned long next, tupdate; struct Qdisc *sch = q->sch; spinlock_t *root_lock; /* to lock qdisc for probability calculations */ - u32 idx; + int max_cnt, i; + rcu_read_lock(); root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); - for (idx = 0; idx < q->flows_cnt; idx++) - pie_calculate_probability(&q->p_params, &q->flows[idx].vars, - q->flows[idx].backlog); - - /* reset the timer to fire after 'tupdate' jiffies. */ - if (q->p_params.tupdate) - mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate); + /* Limit this expensive loop to 2048 flows per round. */ + max_cnt = min_t(int, q->flows_cnt - q->flows_cursor, 2048); + for (i = 0; i < max_cnt; i++) { + pie_calculate_probability(&q->p_params, + &q->flows[q->flows_cursor].vars, + q->flows[q->flows_cursor].backlog); + q->flows_cursor++; + } + tupdate = q->p_params.tupdate; + next = 0; + if (q->flows_cursor >= q->flows_cnt) { + q->flows_cursor = 0; + next = tupdate; + } + if (tupdate) + mod_timer(&q->adapt_timer, jiffies + next); spin_unlock(root_lock); + rcu_read_unlock(); } static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, @@ -451,22 +481,23 @@ static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb) return -EMSGSIZE; /* convert target from pschedtime to us */ - if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) || - nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) || + if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, READ_ONCE(sch->limit)) || + nla_put_u32(skb, TCA_FQ_PIE_FLOWS, READ_ONCE(q->flows_cnt)) || nla_put_u32(skb, TCA_FQ_PIE_TARGET, - ((u32)PSCHED_TICKS2NS(q->p_params.target)) / + ((u32)PSCHED_TICKS2NS(READ_ONCE(q->p_params.target))) / NSEC_PER_USEC) || nla_put_u32(skb, TCA_FQ_PIE_TUPDATE, - jiffies_to_usecs(q->p_params.tupdate)) || - nla_put_u32(skb, TCA_FQ_PIE_ALPHA, q->p_params.alpha) || - nla_put_u32(skb, TCA_FQ_PIE_BETA, q->p_params.beta) || - nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, q->quantum) || - nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT, q->memory_limit) || - nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, q->ecn_prob) || - nla_put_u32(skb, TCA_FQ_PIE_ECN, q->p_params.ecn) || - nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, q->p_params.bytemode) || + jiffies_to_usecs(READ_ONCE(q->p_params.tupdate))) || + nla_put_u32(skb, TCA_FQ_PIE_ALPHA, READ_ONCE(q->p_params.alpha)) || + nla_put_u32(skb, TCA_FQ_PIE_BETA, READ_ONCE(q->p_params.beta)) || + nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, READ_ONCE(q->quantum)) || + nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT, + READ_ONCE(q->memory_limit)) || + nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, READ_ONCE(q->ecn_prob)) || + nla_put_u32(skb, TCA_FQ_PIE_ECN, READ_ONCE(q->p_params.ecn)) || + nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, READ_ONCE(q->p_params.bytemode)) || nla_put_u32(skb, TCA_FQ_PIE_DQ_RATE_ESTIMATOR, - q->p_params.dq_rate_estimator)) + READ_ONCE(q->p_params.dq_rate_estimator))) goto nla_put_failure; return nla_nest_end(skb, opts); @@ -526,7 +557,7 @@ static void fq_pie_destroy(struct Qdisc *sch) tcf_block_put(q->block); q->p_params.tupdate = 0; - del_timer_sync(&q->adapt_timer); + timer_delete_sync(&q->adapt_timer); kvfree(q->flows); } @@ -544,6 +575,7 @@ static struct Qdisc_ops fq_pie_qdisc_ops __read_mostly = { .dump_stats = fq_pie_dump_stats, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("fq_pie"); static int __init fq_pie_module_init(void) { |
