diff options
Diffstat (limited to 'net/sched/sch_mqprio.c')
| -rw-r--r-- | net/sched/sch_mqprio.c | 649 |
1 files changed, 508 insertions, 141 deletions
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index accec33c454c..f3e5ef9a9592 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -1,13 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * net/sched/sch_mqprio.c * * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. */ +#include <linux/ethtool_netlink.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/kernel.h> @@ -18,12 +16,82 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/sch_generic.h> +#include <net/pkt_cls.h> + +#include "sch_mqprio_lib.h" struct mqprio_sched { struct Qdisc **qdiscs; - int hw_owned; + u16 mode; + u16 shaper; + int hw_offload; + u32 flags; + u64 min_rate[TC_QOPT_MAX_QUEUE]; + u64 max_rate[TC_QOPT_MAX_QUEUE]; + u32 fp[TC_QOPT_MAX_QUEUE]; }; +static int mqprio_enable_offload(struct Qdisc *sch, + const struct tc_mqprio_qopt *qopt, + struct netlink_ext_ack *extack) +{ + struct mqprio_sched *priv = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_mqprio_qopt_offload mqprio = { + .qopt = *qopt, + .extack = extack, + }; + int err, i; + + switch (priv->mode) { + case TC_MQPRIO_MODE_DCB: + if (priv->shaper != TC_MQPRIO_SHAPER_DCB) + return -EINVAL; + break; + case TC_MQPRIO_MODE_CHANNEL: + mqprio.flags = priv->flags; + if (priv->flags & TC_MQPRIO_F_MODE) + mqprio.mode = priv->mode; + if (priv->flags & TC_MQPRIO_F_SHAPER) + mqprio.shaper = priv->shaper; + if (priv->flags & TC_MQPRIO_F_MIN_RATE) + for (i = 0; i < mqprio.qopt.num_tc; i++) + mqprio.min_rate[i] = priv->min_rate[i]; + if (priv->flags & TC_MQPRIO_F_MAX_RATE) + for (i = 0; i < mqprio.qopt.num_tc; i++) + mqprio.max_rate[i] = priv->max_rate[i]; + break; + default: + return -EINVAL; + } + + mqprio_fp_to_offload(priv->fp, &mqprio); + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQPRIO, + &mqprio); + if (err) + return err; + + priv->hw_offload = mqprio.qopt.hw; + + return 0; +} + +static void mqprio_disable_offload(struct Qdisc *sch) +{ + struct tc_mqprio_qopt_offload mqprio = { { 0 } }; + struct mqprio_sched *priv = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + + switch (priv->mode) { + case TC_MQPRIO_MODE_DCB: + case TC_MQPRIO_MODE_CHANNEL: + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQPRIO, + &mqprio); + break; + } +} + static void mqprio_destroy(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); @@ -34,62 +102,248 @@ static void mqprio_destroy(struct Qdisc *sch) for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) - qdisc_destroy(priv->qdiscs[ntx]); + qdisc_put(priv->qdiscs[ntx]); kfree(priv->qdiscs); } - if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) - dev->netdev_ops->ndo_setup_tc(dev, 0); + if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) + mqprio_disable_offload(sch); else netdev_set_num_tc(dev, 0); } -static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) +static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt, + const struct tc_mqprio_caps *caps, + struct netlink_ext_ack *extack) { - int i, j; + int err; - /* Verify num_tc is not out of max range */ - if (qopt->num_tc > TC_MAX_QUEUE) + /* Limit qopt->hw to maximum supported offload value. Drivers have + * the option of overriding this later if they don't support the a + * given offload type. + */ + if (qopt->hw > TC_MQPRIO_HW_OFFLOAD_MAX) + qopt->hw = TC_MQPRIO_HW_OFFLOAD_MAX; + + /* If hardware offload is requested, we will leave 3 options to the + * device driver: + * - populate the queue counts itself (and ignore what was requested) + * - validate the provided queue counts by itself (and apply them) + * - request queue count validation here (and apply them) + */ + err = mqprio_validate_qopt(dev, qopt, + !qopt->hw || caps->validate_queue_counts, + false, extack); + if (err) + return err; + + /* If ndo_setup_tc is not present then hardware doesn't support offload + * and we should return an error. + */ + if (qopt->hw && !dev->netdev_ops->ndo_setup_tc) { + NL_SET_ERR_MSG(extack, + "Device does not support hardware offload"); return -EINVAL; + } - /* Verify priority mapping uses valid tcs */ - for (i = 0; i < TC_BITMASK + 1; i++) { - if (qopt->prio_tc_map[i] >= qopt->num_tc) - return -EINVAL; + return 0; +} + +static const struct +nla_policy mqprio_tc_entry_policy[TCA_MQPRIO_TC_ENTRY_MAX + 1] = { + [TCA_MQPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32, + TC_QOPT_MAX_QUEUE - 1), + [TCA_MQPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32, + TC_FP_EXPRESS, + TC_FP_PREEMPTIBLE), +}; + +static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = { + [TCA_MQPRIO_MODE] = { .len = sizeof(u16) }, + [TCA_MQPRIO_SHAPER] = { .len = sizeof(u16) }, + [TCA_MQPRIO_MIN_RATE64] = { .type = NLA_NESTED }, + [TCA_MQPRIO_MAX_RATE64] = { .type = NLA_NESTED }, + [TCA_MQPRIO_TC_ENTRY] = { .type = NLA_NESTED }, +}; + +static int mqprio_parse_tc_entry(u32 fp[TC_QOPT_MAX_QUEUE], + struct nlattr *opt, + unsigned long *seen_tcs, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_MQPRIO_TC_ENTRY_MAX + 1]; + int err, tc; + + err = nla_parse_nested(tb, TCA_MQPRIO_TC_ENTRY_MAX, opt, + mqprio_tc_entry_policy, extack); + if (err < 0) + return err; + + if (NL_REQ_ATTR_CHECK(extack, opt, tb, TCA_MQPRIO_TC_ENTRY_INDEX)) { + NL_SET_ERR_MSG(extack, "TC entry index missing"); + return -EINVAL; } - /* net_device does not support requested operation */ - if (qopt->hw && !dev->netdev_ops->ndo_setup_tc) + tc = nla_get_u32(tb[TCA_MQPRIO_TC_ENTRY_INDEX]); + if (*seen_tcs & BIT(tc)) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_TC_ENTRY_INDEX], + "Duplicate tc entry"); return -EINVAL; + } - /* if hw owned qcount and qoffset are taken from LLD so - * no reason to verify them here - */ - if (qopt->hw) - return 0; + *seen_tcs |= BIT(tc); - for (i = 0; i < qopt->num_tc; i++) { - unsigned int last = qopt->offset[i] + qopt->count[i]; + if (tb[TCA_MQPRIO_TC_ENTRY_FP]) + fp[tc] = nla_get_u32(tb[TCA_MQPRIO_TC_ENTRY_FP]); - /* Verify the queue count is in tx range being equal to the - * real_num_tx_queues indicates the last queue is in use. - */ - if (qopt->offset[i] >= dev->real_num_tx_queues || - !qopt->count[i] || - last > dev->real_num_tx_queues) + return 0; +} + +static int mqprio_parse_tc_entries(struct Qdisc *sch, struct nlattr *nlattr_opt, + int nlattr_opt_len, + struct netlink_ext_ack *extack) +{ + struct mqprio_sched *priv = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + bool have_preemption = false; + unsigned long seen_tcs = 0; + u32 fp[TC_QOPT_MAX_QUEUE]; + struct nlattr *n; + int tc, rem; + int err = 0; + + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) + fp[tc] = priv->fp[tc]; + + nla_for_each_attr_type(n, TCA_MQPRIO_TC_ENTRY, nlattr_opt, + nlattr_opt_len, rem) { + err = mqprio_parse_tc_entry(fp, n, &seen_tcs, extack); + if (err) + goto out; + } + + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) { + priv->fp[tc] = fp[tc]; + if (fp[tc] == TC_FP_PREEMPTIBLE) + have_preemption = true; + } + + if (have_preemption && !ethtool_dev_mm_supported(dev)) { + NL_SET_ERR_MSG(extack, "Device does not support preemption"); + return -EOPNOTSUPP; + } +out: + return err; +} + +/* Parse the other netlink attributes that represent the payload of + * TCA_OPTIONS, which are appended right after struct tc_mqprio_qopt. + */ +static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt, + struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct nlattr *nlattr_opt = nla_data(opt) + NLA_ALIGN(sizeof(*qopt)); + int nlattr_opt_len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt)); + struct mqprio_sched *priv = qdisc_priv(sch); + struct nlattr *tb[TCA_MQPRIO_MAX + 1] = {}; + struct nlattr *attr; + int i, rem, err; + + if (nlattr_opt_len >= nla_attr_size(0)) { + err = nla_parse_deprecated(tb, TCA_MQPRIO_MAX, nlattr_opt, + nlattr_opt_len, mqprio_policy, + NULL); + if (err < 0) + return err; + } + + if (!qopt->hw) { + NL_SET_ERR_MSG(extack, + "mqprio TCA_OPTIONS can only contain netlink attributes in hardware mode"); + return -EINVAL; + } + + if (tb[TCA_MQPRIO_MODE]) { + priv->flags |= TC_MQPRIO_F_MODE; + priv->mode = nla_get_u16(tb[TCA_MQPRIO_MODE]); + } + + if (tb[TCA_MQPRIO_SHAPER]) { + priv->flags |= TC_MQPRIO_F_SHAPER; + priv->shaper = nla_get_u16(tb[TCA_MQPRIO_SHAPER]); + } + + if (tb[TCA_MQPRIO_MIN_RATE64]) { + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MIN_RATE64], + "min_rate accepted only when shaper is in bw_rlimit mode"); return -EINVAL; + } + i = 0; + nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64], + rem) { + if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute type expected to be TCA_MQPRIO_MIN_RATE64"); + return -EINVAL; + } + + if (nla_len(attr) != sizeof(u64)) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute TCA_MQPRIO_MIN_RATE64 expected to have 8 bytes length"); + return -EINVAL; + } + + if (i >= qopt->num_tc) + break; + priv->min_rate[i] = nla_get_u64(attr); + i++; + } + priv->flags |= TC_MQPRIO_F_MIN_RATE; + } + + if (tb[TCA_MQPRIO_MAX_RATE64]) { + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MAX_RATE64], + "max_rate accepted only when shaper is in bw_rlimit mode"); + return -EINVAL; + } + i = 0; + nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64], + rem) { + if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute type expected to be TCA_MQPRIO_MAX_RATE64"); + return -EINVAL; + } - /* Verify that the offset and counts do not overlap */ - for (j = i + 1; j < qopt->num_tc; j++) { - if (last > qopt->offset[j]) + if (nla_len(attr) != sizeof(u64)) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute TCA_MQPRIO_MAX_RATE64 expected to have 8 bytes length"); return -EINVAL; + } + + if (i >= qopt->num_tc) + break; + priv->max_rate[i] = nla_get_u64(attr); + i++; } + priv->flags |= TC_MQPRIO_F_MAX_RATE; + } + + if (tb[TCA_MQPRIO_TC_ENTRY]) { + err = mqprio_parse_tc_entries(sch, nlattr_opt, nlattr_opt_len, + extack); + if (err) + return err; } return 0; } -static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) +static int mqprio_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); @@ -97,6 +351,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) struct Qdisc *qdisc; int i, err = -EOPNOTSUPP; struct tc_mqprio_qopt *qopt = NULL; + struct tc_mqprio_caps caps; + int len, tc; BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); @@ -107,32 +363,47 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) if (!netif_is_multiqueue(dev)) return -EOPNOTSUPP; + /* make certain can allocate enough classids to handle queues */ + if (dev->num_tx_queues >= TC_H_MIN_PRIORITY) + return -ENOMEM; + if (!opt || nla_len(opt) < sizeof(*qopt)) return -EINVAL; + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) + priv->fp[tc] = TC_FP_EXPRESS; + + qdisc_offload_query_caps(dev, TC_SETUP_QDISC_MQPRIO, + &caps, sizeof(caps)); + qopt = nla_data(opt); - if (mqprio_parse_opt(dev, qopt)) + if (mqprio_parse_opt(dev, qopt, &caps, extack)) return -EINVAL; + len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt)); + if (len > 0) { + err = mqprio_parse_nlattr(sch, qopt, opt, extack); + if (err) + return err; + } + /* pre-allocate qdisc, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); - if (priv->qdiscs == NULL) { - err = -ENOMEM; - goto err; - } + if (!priv->qdiscs) + return -ENOMEM; for (i = 0; i < dev->num_tx_queues; i++) { dev_queue = netdev_get_tx_queue(dev, i); - qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, + qdisc = qdisc_create_dflt(dev_queue, + get_default_qdisc_ops(dev, i), TC_H_MAKE(TC_H_MAJ(sch->handle), - TC_H_MIN(i + 1))); - if (qdisc == NULL) { - err = -ENOMEM; - goto err; - } + TC_H_MIN(i + 1)), extack); + if (!qdisc) + return -ENOMEM; + priv->qdiscs[i] = qdisc; - qdisc->flags |= TCQ_F_ONETXQUEUE; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } /* If the mqprio options indicate that hardware should own @@ -140,10 +411,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) * supplied and verified mapping */ if (qopt->hw) { - priv->hw_owned = 1; - err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc); + err = mqprio_enable_offload(sch, qopt, extack); if (err) - goto err; + return err; } else { netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) @@ -157,25 +427,23 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) sch->flags |= TCQ_F_MQROOT; return 0; - -err: - mqprio_destroy(sch); - return err; } static void mqprio_attach(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); - struct Qdisc *qdisc; + struct Qdisc *qdisc, *old; unsigned int ntx; /* Attach underlying qdisc */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { qdisc = priv->qdiscs[ntx]; - qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc); - if (qdisc) - qdisc_destroy(qdisc); + old = dev_graft_qdisc(qdisc->dev_queue, qdisc); + if (old) + qdisc_put(old); + if (ntx < dev->real_num_tx_queues) + qdisc_hash_add(qdisc, false); } kfree(priv->qdiscs); priv->qdiscs = NULL; @@ -185,7 +453,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch, unsigned long cl) { struct net_device *dev = qdisc_dev(sch); - unsigned long ntx = cl - 1 - netdev_get_num_tc(dev); + unsigned long ntx = cl - 1; if (ntx >= dev->num_tx_queues) return NULL; @@ -193,7 +461,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch, } static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old, struct netlink_ext_ack *extack) { struct net_device *dev = qdisc_dev(sch); struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); @@ -207,7 +475,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, *old = dev_graft_qdisc(dev_queue, new); if (new) - new->flags |= TCQ_F_ONETXQUEUE; + new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) dev_activate(dev); @@ -215,48 +483,129 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, return 0; } +static int dump_rates(struct mqprio_sched *priv, + struct tc_mqprio_qopt *opt, struct sk_buff *skb) +{ + struct nlattr *nest; + int i; + + if (priv->flags & TC_MQPRIO_F_MIN_RATE) { + nest = nla_nest_start_noflag(skb, TCA_MQPRIO_MIN_RATE64); + if (!nest) + goto nla_put_failure; + + for (i = 0; i < opt->num_tc; i++) { + if (nla_put(skb, TCA_MQPRIO_MIN_RATE64, + sizeof(priv->min_rate[i]), + &priv->min_rate[i])) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + } + + if (priv->flags & TC_MQPRIO_F_MAX_RATE) { + nest = nla_nest_start_noflag(skb, TCA_MQPRIO_MAX_RATE64); + if (!nest) + goto nla_put_failure; + + for (i = 0; i < opt->num_tc; i++) { + if (nla_put(skb, TCA_MQPRIO_MAX_RATE64, + sizeof(priv->max_rate[i]), + &priv->max_rate[i])) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + } + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -1; +} + +static int mqprio_dump_tc_entries(struct mqprio_sched *priv, + struct sk_buff *skb) +{ + struct nlattr *n; + int tc; + + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) { + n = nla_nest_start(skb, TCA_MQPRIO_TC_ENTRY); + if (!n) + return -EMSGSIZE; + + if (nla_put_u32(skb, TCA_MQPRIO_TC_ENTRY_INDEX, tc)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_MQPRIO_TC_ENTRY_FP, priv->fp[tc])) + goto nla_put_failure; + + nla_nest_end(skb, n); + } + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, n); + return -EMSGSIZE; +} + static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); - unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb); struct tc_mqprio_qopt opt = { 0 }; struct Qdisc *qdisc; - unsigned int i; + unsigned int ntx; sch->q.qlen = 0; - memset(&sch->bstats, 0, sizeof(sch->bstats)); + gnet_stats_basic_sync_init(&sch->bstats); memset(&sch->qstats, 0, sizeof(sch->qstats)); - for (i = 0; i < dev->num_tx_queues; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc; + /* MQ supports lockless qdiscs. However, statistics accounting needs + * to account for all, none, or a mix of locked and unlocked child + * qdiscs. Percpu stats are added to counters in-band and locking + * qdisc totals are added at end. + */ + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping); spin_lock_bh(qdisc_lock(qdisc)); - sch->q.qlen += qdisc->q.qlen; - sch->bstats.bytes += qdisc->bstats.bytes; - sch->bstats.packets += qdisc->bstats.packets; - sch->qstats.qlen += qdisc->qstats.qlen; - sch->qstats.backlog += qdisc->qstats.backlog; - sch->qstats.drops += qdisc->qstats.drops; - sch->qstats.requeues += qdisc->qstats.requeues; - sch->qstats.overlimits += qdisc->qstats.overlimits; - spin_unlock_bh(qdisc_lock(qdisc)); - } - opt.num_tc = netdev_get_num_tc(dev); - memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); - opt.hw = priv->hw_owned; + gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, + &qdisc->bstats, false); + gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, + &qdisc->qstats); + sch->q.qlen += qdisc_qlen(qdisc); - for (i = 0; i < netdev_get_num_tc(dev); i++) { - opt.count[i] = dev->tc_to_txq[i].count; - opt.offset[i] = dev->tc_to_txq[i].offset; + spin_unlock_bh(qdisc_lock(qdisc)); } + mqprio_qopt_reconstruct(dev, &opt); + opt.hw = priv->hw_offload; + if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure; - return skb->len; + if ((priv->flags & TC_MQPRIO_F_MODE) && + nla_put_u16(skb, TCA_MQPRIO_MODE, priv->mode)) + goto nla_put_failure; + + if ((priv->flags & TC_MQPRIO_F_SHAPER) && + nla_put_u16(skb, TCA_MQPRIO_SHAPER, priv->shaper)) + goto nla_put_failure; + + if ((priv->flags & TC_MQPRIO_F_MIN_RATE || + priv->flags & TC_MQPRIO_F_MAX_RATE) && + (dump_rates(priv, &opt, skb) != 0)) + goto nla_put_failure; + + if (mqprio_dump_tc_entries(priv, skb)) + goto nla_put_failure; + + return nla_nest_end(skb, nla); nla_put_failure: - nlmsg_trim(skb, b); + nlmsg_trim(skb, nla); return -1; } @@ -267,50 +616,43 @@ static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl) if (!dev_queue) return NULL; - return dev_queue->qdisc_sleeping; + return rtnl_dereference(dev_queue->qdisc_sleeping); } -static unsigned long mqprio_get(struct Qdisc *sch, u32 classid) +static unsigned long mqprio_find(struct Qdisc *sch, u32 classid) { struct net_device *dev = qdisc_dev(sch); unsigned int ntx = TC_H_MIN(classid); - if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev)) - return 0; - return ntx; -} + /* There are essentially two regions here that have valid classid + * values. The first region will have a classid value of 1 through + * num_tx_queues. All of these are backed by actual Qdiscs. + */ + if (ntx < TC_H_MIN_PRIORITY) + return (ntx <= dev->num_tx_queues) ? ntx : 0; -static void mqprio_put(struct Qdisc *sch, unsigned long cl) -{ + /* The second region represents the hardware traffic classes. These + * are represented by classid values of TC_H_MIN_PRIORITY through + * TC_H_MIN_PRIORITY + netdev_get_num_tc - 1 + */ + return ((ntx - TC_H_MIN_PRIORITY) < netdev_get_num_tc(dev)) ? ntx : 0; } static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { - struct net_device *dev = qdisc_dev(sch); + if (cl < TC_H_MIN_PRIORITY) { + struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); + struct net_device *dev = qdisc_dev(sch); + int tc = netdev_txq_to_tc(dev, cl - 1); - if (cl <= netdev_get_num_tc(dev)) { + tcm->tcm_parent = (tc < 0) ? 0 : + TC_H_MAKE(TC_H_MAJ(sch->handle), + TC_H_MIN(tc + TC_H_MIN_PRIORITY)); + tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; + } else { tcm->tcm_parent = TC_H_ROOT; tcm->tcm_info = 0; - } else { - int i; - struct netdev_queue *dev_queue; - - dev_queue = mqprio_queue_get(sch, cl); - tcm->tcm_parent = 0; - for (i = 0; i < netdev_get_num_tc(dev); i++) { - struct netdev_tc_txq tc = dev->tc_to_txq[i]; - int q_idx = cl - netdev_get_num_tc(dev); - - if (q_idx > tc.offset && - q_idx <= tc.offset + tc.count) { - tcm->tcm_parent = - TC_H_MAKE(TC_H_MAJ(sch->handle), - TC_H_MIN(i + 1)); - break; - } - } - tcm->tcm_info = dev_queue->qdisc_sleeping->handle; } tcm->tcm_handle |= TC_H_MIN(cl); return 0; @@ -321,46 +663,52 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, __releases(d->lock) __acquires(d->lock) { - struct net_device *dev = qdisc_dev(sch); - - if (cl <= netdev_get_num_tc(dev)) { + if (cl >= TC_H_MIN_PRIORITY) { int i; - struct Qdisc *qdisc; + __u32 qlen; struct gnet_stats_queue qstats = {0}; - struct gnet_stats_basic_packed bstats = {0}; - struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1]; + struct gnet_stats_basic_sync bstats; + struct net_device *dev = qdisc_dev(sch); + struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK]; + gnet_stats_basic_sync_init(&bstats); /* Drop lock here it will be reclaimed before touching * statistics this is required because the d->lock we * hold here is the look on dev_queue->qdisc_sleeping * also acquired below. */ - spin_unlock_bh(d->lock); + if (d->lock) + spin_unlock_bh(d->lock); for (i = tc.offset; i < tc.offset + tc.count; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc; + struct netdev_queue *q = netdev_get_tx_queue(dev, i); + struct Qdisc *qdisc = rtnl_dereference(q->qdisc); + spin_lock_bh(qdisc_lock(qdisc)); - bstats.bytes += qdisc->bstats.bytes; - bstats.packets += qdisc->bstats.packets; - qstats.qlen += qdisc->qstats.qlen; - qstats.backlog += qdisc->qstats.backlog; - qstats.drops += qdisc->qstats.drops; - qstats.requeues += qdisc->qstats.requeues; - qstats.overlimits += qdisc->qstats.overlimits; + + gnet_stats_add_basic(&bstats, qdisc->cpu_bstats, + &qdisc->bstats, false); + gnet_stats_add_queue(&qstats, qdisc->cpu_qstats, + &qdisc->qstats); + sch->q.qlen += qdisc_qlen(qdisc); + spin_unlock_bh(qdisc_lock(qdisc)); } + qlen = qdisc_qlen(sch) + qstats.qlen; + /* Reclaim root sleeping lock before completing stats */ - spin_lock_bh(d->lock); - if (gnet_stats_copy_basic(d, &bstats) < 0 || - gnet_stats_copy_queue(d, &qstats) < 0) + if (d->lock) + spin_lock_bh(d->lock); + if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 || + gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0) return -1; } else { struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); - sch = dev_queue->qdisc_sleeping; - sch->qstats.qlen = sch->q.qlen; - if (gnet_stats_copy_basic(d, &sch->bstats) < 0 || - gnet_stats_copy_queue(d, &sch->qstats) < 0) + sch = rtnl_dereference(dev_queue->qdisc_sleeping); + if (gnet_stats_copy_basic(d, sch->cpu_bstats, + &sch->bstats, true) < 0 || + qdisc_qstats_copy(d, sch) < 0) return -1; } return 0; @@ -376,25 +724,41 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) /* Walk hierarchy with a virtual class per tc */ arg->count = arg->skip; - for (ntx = arg->skip; - ntx < dev->num_tx_queues + netdev_get_num_tc(dev); - ntx++) { + for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) { + if (!tc_qdisc_stats_dump(sch, ntx + TC_H_MIN_PRIORITY, arg)) + return; + } + + /* Pad the values and skip over unused traffic classes */ + if (ntx < TC_MAX_QUEUE) { + arg->count = TC_MAX_QUEUE; + ntx = TC_MAX_QUEUE; + } + + /* Reset offset, sort out remaining per-queue qdiscs */ + for (ntx -= TC_MAX_QUEUE; ntx < dev->num_tx_queues; ntx++) { if (arg->fn(sch, ntx + 1, arg) < 0) { arg->stop = 1; - break; + return; } arg->count++; } } +static struct netdev_queue *mqprio_select_queue(struct Qdisc *sch, + struct tcmsg *tcm) +{ + return mqprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); +} + static const struct Qdisc_class_ops mqprio_class_ops = { .graft = mqprio_graft, .leaf = mqprio_leaf, - .get = mqprio_get, - .put = mqprio_put, + .find = mqprio_find, .walk = mqprio_walk, .dump = mqprio_dump_class, .dump_stats = mqprio_dump_class_stats, + .select_queue = mqprio_select_queue, }; static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { @@ -404,9 +768,11 @@ static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { .init = mqprio_init, .destroy = mqprio_destroy, .attach = mqprio_attach, + .change_real_num_tx = mq_change_real_num_tx, .dump = mqprio_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("mqprio"); static int __init mqprio_module_init(void) { @@ -422,3 +788,4 @@ module_init(mqprio_module_init); module_exit(mqprio_module_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Classful multiqueue prio qdisc"); |
