diff options
Diffstat (limited to 'net/sched/sch_mqprio.c')
| -rw-r--r-- | net/sched/sch_mqprio.c | 471 |
1 files changed, 320 insertions, 151 deletions
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 4c68abaa289b..f3e5ef9a9592 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -5,6 +5,7 @@ * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com> */ +#include <linux/ethtool_netlink.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/kernel.h> @@ -17,6 +18,8 @@ #include <net/sch_generic.h> #include <net/pkt_cls.h> +#include "sch_mqprio_lib.h" + struct mqprio_sched { struct Qdisc **qdiscs; u16 mode; @@ -25,8 +28,70 @@ struct mqprio_sched { u32 flags; u64 min_rate[TC_QOPT_MAX_QUEUE]; u64 max_rate[TC_QOPT_MAX_QUEUE]; + u32 fp[TC_QOPT_MAX_QUEUE]; }; +static int mqprio_enable_offload(struct Qdisc *sch, + const struct tc_mqprio_qopt *qopt, + struct netlink_ext_ack *extack) +{ + struct mqprio_sched *priv = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_mqprio_qopt_offload mqprio = { + .qopt = *qopt, + .extack = extack, + }; + int err, i; + + switch (priv->mode) { + case TC_MQPRIO_MODE_DCB: + if (priv->shaper != TC_MQPRIO_SHAPER_DCB) + return -EINVAL; + break; + case TC_MQPRIO_MODE_CHANNEL: + mqprio.flags = priv->flags; + if (priv->flags & TC_MQPRIO_F_MODE) + mqprio.mode = priv->mode; + if (priv->flags & TC_MQPRIO_F_SHAPER) + mqprio.shaper = priv->shaper; + if (priv->flags & TC_MQPRIO_F_MIN_RATE) + for (i = 0; i < mqprio.qopt.num_tc; i++) + mqprio.min_rate[i] = priv->min_rate[i]; + if (priv->flags & TC_MQPRIO_F_MAX_RATE) + for (i = 0; i < mqprio.qopt.num_tc; i++) + mqprio.max_rate[i] = priv->max_rate[i]; + break; + default: + return -EINVAL; + } + + mqprio_fp_to_offload(priv->fp, &mqprio); + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQPRIO, + &mqprio); + if (err) + return err; + + priv->hw_offload = mqprio.qopt.hw; + + return 0; +} + +static void mqprio_disable_offload(struct Qdisc *sch) +{ + struct tc_mqprio_qopt_offload mqprio = { { 0 } }; + struct mqprio_sched *priv = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + + switch (priv->mode) { + case TC_MQPRIO_MODE_DCB: + case TC_MQPRIO_MODE_CHANNEL: + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQPRIO, + &mqprio); + break; + } +} + static void mqprio_destroy(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); @@ -41,37 +106,17 @@ static void mqprio_destroy(struct Qdisc *sch) kfree(priv->qdiscs); } - if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) { - struct tc_mqprio_qopt_offload mqprio = { { 0 } }; - - switch (priv->mode) { - case TC_MQPRIO_MODE_DCB: - case TC_MQPRIO_MODE_CHANNEL: - dev->netdev_ops->ndo_setup_tc(dev, - TC_SETUP_QDISC_MQPRIO, - &mqprio); - break; - default: - return; - } - } else { + if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) + mqprio_disable_offload(sch); + else netdev_set_num_tc(dev, 0); - } } -static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) +static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt, + const struct tc_mqprio_caps *caps, + struct netlink_ext_ack *extack) { - int i, j; - - /* Verify num_tc is not out of max range */ - if (qopt->num_tc > TC_MAX_QUEUE) - return -EINVAL; - - /* Verify priority mapping uses valid tcs */ - for (i = 0; i < TC_BITMASK + 1; i++) { - if (qopt->prio_tc_map[i] >= qopt->num_tc) - return -EINVAL; - } + int err; /* Limit qopt->hw to maximum supported offload value. Drivers have * the option of overriding this later if they don't support the a @@ -80,53 +125,220 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) if (qopt->hw > TC_MQPRIO_HW_OFFLOAD_MAX) qopt->hw = TC_MQPRIO_HW_OFFLOAD_MAX; - /* If hardware offload is requested we will leave it to the device - * to either populate the queue counts itself or to validate the - * provided queue counts. If ndo_setup_tc is not present then - * hardware doesn't support offload and we should return an error. + /* If hardware offload is requested, we will leave 3 options to the + * device driver: + * - populate the queue counts itself (and ignore what was requested) + * - validate the provided queue counts by itself (and apply them) + * - request queue count validation here (and apply them) */ - if (qopt->hw) - return dev->netdev_ops->ndo_setup_tc ? 0 : -EINVAL; - - for (i = 0; i < qopt->num_tc; i++) { - unsigned int last = qopt->offset[i] + qopt->count[i]; - - /* Verify the queue count is in tx range being equal to the - * real_num_tx_queues indicates the last queue is in use. - */ - if (qopt->offset[i] >= dev->real_num_tx_queues || - !qopt->count[i] || - last > dev->real_num_tx_queues) - return -EINVAL; - - /* Verify that the offset and counts do not overlap */ - for (j = i + 1; j < qopt->num_tc; j++) { - if (last > qopt->offset[j]) - return -EINVAL; - } + err = mqprio_validate_qopt(dev, qopt, + !qopt->hw || caps->validate_queue_counts, + false, extack); + if (err) + return err; + + /* If ndo_setup_tc is not present then hardware doesn't support offload + * and we should return an error. + */ + if (qopt->hw && !dev->netdev_ops->ndo_setup_tc) { + NL_SET_ERR_MSG(extack, + "Device does not support hardware offload"); + return -EINVAL; } return 0; } +static const struct +nla_policy mqprio_tc_entry_policy[TCA_MQPRIO_TC_ENTRY_MAX + 1] = { + [TCA_MQPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32, + TC_QOPT_MAX_QUEUE - 1), + [TCA_MQPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32, + TC_FP_EXPRESS, + TC_FP_PREEMPTIBLE), +}; + static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = { [TCA_MQPRIO_MODE] = { .len = sizeof(u16) }, [TCA_MQPRIO_SHAPER] = { .len = sizeof(u16) }, [TCA_MQPRIO_MIN_RATE64] = { .type = NLA_NESTED }, [TCA_MQPRIO_MAX_RATE64] = { .type = NLA_NESTED }, + [TCA_MQPRIO_TC_ENTRY] = { .type = NLA_NESTED }, }; -static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, - const struct nla_policy *policy, int len) +static int mqprio_parse_tc_entry(u32 fp[TC_QOPT_MAX_QUEUE], + struct nlattr *opt, + unsigned long *seen_tcs, + struct netlink_ext_ack *extack) { - int nested_len = nla_len(nla) - NLA_ALIGN(len); + struct nlattr *tb[TCA_MQPRIO_TC_ENTRY_MAX + 1]; + int err, tc; + + err = nla_parse_nested(tb, TCA_MQPRIO_TC_ENTRY_MAX, opt, + mqprio_tc_entry_policy, extack); + if (err < 0) + return err; + + if (NL_REQ_ATTR_CHECK(extack, opt, tb, TCA_MQPRIO_TC_ENTRY_INDEX)) { + NL_SET_ERR_MSG(extack, "TC entry index missing"); + return -EINVAL; + } + + tc = nla_get_u32(tb[TCA_MQPRIO_TC_ENTRY_INDEX]); + if (*seen_tcs & BIT(tc)) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_TC_ENTRY_INDEX], + "Duplicate tc entry"); + return -EINVAL; + } - if (nested_len >= nla_attr_size(0)) - return nla_parse_deprecated(tb, maxtype, - nla_data(nla) + NLA_ALIGN(len), - nested_len, policy, NULL); + *seen_tcs |= BIT(tc); + + if (tb[TCA_MQPRIO_TC_ENTRY_FP]) + fp[tc] = nla_get_u32(tb[TCA_MQPRIO_TC_ENTRY_FP]); + + return 0; +} + +static int mqprio_parse_tc_entries(struct Qdisc *sch, struct nlattr *nlattr_opt, + int nlattr_opt_len, + struct netlink_ext_ack *extack) +{ + struct mqprio_sched *priv = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + bool have_preemption = false; + unsigned long seen_tcs = 0; + u32 fp[TC_QOPT_MAX_QUEUE]; + struct nlattr *n; + int tc, rem; + int err = 0; + + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) + fp[tc] = priv->fp[tc]; + + nla_for_each_attr_type(n, TCA_MQPRIO_TC_ENTRY, nlattr_opt, + nlattr_opt_len, rem) { + err = mqprio_parse_tc_entry(fp, n, &seen_tcs, extack); + if (err) + goto out; + } + + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) { + priv->fp[tc] = fp[tc]; + if (fp[tc] == TC_FP_PREEMPTIBLE) + have_preemption = true; + } + + if (have_preemption && !ethtool_dev_mm_supported(dev)) { + NL_SET_ERR_MSG(extack, "Device does not support preemption"); + return -EOPNOTSUPP; + } +out: + return err; +} + +/* Parse the other netlink attributes that represent the payload of + * TCA_OPTIONS, which are appended right after struct tc_mqprio_qopt. + */ +static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt, + struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct nlattr *nlattr_opt = nla_data(opt) + NLA_ALIGN(sizeof(*qopt)); + int nlattr_opt_len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt)); + struct mqprio_sched *priv = qdisc_priv(sch); + struct nlattr *tb[TCA_MQPRIO_MAX + 1] = {}; + struct nlattr *attr; + int i, rem, err; + + if (nlattr_opt_len >= nla_attr_size(0)) { + err = nla_parse_deprecated(tb, TCA_MQPRIO_MAX, nlattr_opt, + nlattr_opt_len, mqprio_policy, + NULL); + if (err < 0) + return err; + } + + if (!qopt->hw) { + NL_SET_ERR_MSG(extack, + "mqprio TCA_OPTIONS can only contain netlink attributes in hardware mode"); + return -EINVAL; + } + + if (tb[TCA_MQPRIO_MODE]) { + priv->flags |= TC_MQPRIO_F_MODE; + priv->mode = nla_get_u16(tb[TCA_MQPRIO_MODE]); + } + + if (tb[TCA_MQPRIO_SHAPER]) { + priv->flags |= TC_MQPRIO_F_SHAPER; + priv->shaper = nla_get_u16(tb[TCA_MQPRIO_SHAPER]); + } + + if (tb[TCA_MQPRIO_MIN_RATE64]) { + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MIN_RATE64], + "min_rate accepted only when shaper is in bw_rlimit mode"); + return -EINVAL; + } + i = 0; + nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64], + rem) { + if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute type expected to be TCA_MQPRIO_MIN_RATE64"); + return -EINVAL; + } + + if (nla_len(attr) != sizeof(u64)) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute TCA_MQPRIO_MIN_RATE64 expected to have 8 bytes length"); + return -EINVAL; + } + + if (i >= qopt->num_tc) + break; + priv->min_rate[i] = nla_get_u64(attr); + i++; + } + priv->flags |= TC_MQPRIO_F_MIN_RATE; + } + + if (tb[TCA_MQPRIO_MAX_RATE64]) { + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MAX_RATE64], + "max_rate accepted only when shaper is in bw_rlimit mode"); + return -EINVAL; + } + i = 0; + nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64], + rem) { + if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute type expected to be TCA_MQPRIO_MAX_RATE64"); + return -EINVAL; + } + + if (nla_len(attr) != sizeof(u64)) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute TCA_MQPRIO_MAX_RATE64 expected to have 8 bytes length"); + return -EINVAL; + } + + if (i >= qopt->num_tc) + break; + priv->max_rate[i] = nla_get_u64(attr); + i++; + } + priv->flags |= TC_MQPRIO_F_MAX_RATE; + } + + if (tb[TCA_MQPRIO_TC_ENTRY]) { + err = mqprio_parse_tc_entries(sch, nlattr_opt, nlattr_opt_len, + extack); + if (err) + return err; + } - memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); return 0; } @@ -139,10 +351,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt, struct Qdisc *qdisc; int i, err = -EOPNOTSUPP; struct tc_mqprio_qopt *qopt = NULL; - struct nlattr *tb[TCA_MQPRIO_MAX + 1]; - struct nlattr *attr; - int rem; - int len; + struct tc_mqprio_caps caps; + int len, tc; BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); @@ -160,61 +370,21 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt, if (!opt || nla_len(opt) < sizeof(*qopt)) return -EINVAL; + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) + priv->fp[tc] = TC_FP_EXPRESS; + + qdisc_offload_query_caps(dev, TC_SETUP_QDISC_MQPRIO, + &caps, sizeof(caps)); + qopt = nla_data(opt); - if (mqprio_parse_opt(dev, qopt)) + if (mqprio_parse_opt(dev, qopt, &caps, extack)) return -EINVAL; len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt)); if (len > 0) { - err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy, - sizeof(*qopt)); - if (err < 0) + err = mqprio_parse_nlattr(sch, qopt, opt, extack); + if (err) return err; - - if (!qopt->hw) - return -EINVAL; - - if (tb[TCA_MQPRIO_MODE]) { - priv->flags |= TC_MQPRIO_F_MODE; - priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]); - } - - if (tb[TCA_MQPRIO_SHAPER]) { - priv->flags |= TC_MQPRIO_F_SHAPER; - priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]); - } - - if (tb[TCA_MQPRIO_MIN_RATE64]) { - if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) - return -EINVAL; - i = 0; - nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64], - rem) { - if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) - return -EINVAL; - if (i >= qopt->num_tc) - break; - priv->min_rate[i] = *(u64 *)nla_data(attr); - i++; - } - priv->flags |= TC_MQPRIO_F_MIN_RATE; - } - - if (tb[TCA_MQPRIO_MAX_RATE64]) { - if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) - return -EINVAL; - i = 0; - nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64], - rem) { - if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) - return -EINVAL; - if (i >= qopt->num_tc) - break; - priv->max_rate[i] = *(u64 *)nla_data(attr); - i++; - } - priv->flags |= TC_MQPRIO_F_MAX_RATE; - } } /* pre-allocate qdisc, attachment can't fail */ @@ -241,36 +411,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt, * supplied and verified mapping */ if (qopt->hw) { - struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt}; - - switch (priv->mode) { - case TC_MQPRIO_MODE_DCB: - if (priv->shaper != TC_MQPRIO_SHAPER_DCB) - return -EINVAL; - break; - case TC_MQPRIO_MODE_CHANNEL: - mqprio.flags = priv->flags; - if (priv->flags & TC_MQPRIO_F_MODE) - mqprio.mode = priv->mode; - if (priv->flags & TC_MQPRIO_F_SHAPER) - mqprio.shaper = priv->shaper; - if (priv->flags & TC_MQPRIO_F_MIN_RATE) - for (i = 0; i < mqprio.qopt.num_tc; i++) - mqprio.min_rate[i] = priv->min_rate[i]; - if (priv->flags & TC_MQPRIO_F_MAX_RATE) - for (i = 0; i < mqprio.qopt.num_tc; i++) - mqprio.max_rate[i] = priv->max_rate[i]; - break; - default: - return -EINVAL; - } - err = dev->netdev_ops->ndo_setup_tc(dev, - TC_SETUP_QDISC_MQPRIO, - &mqprio); + err = mqprio_enable_offload(sch, qopt, extack); if (err) return err; - - priv->hw_offload = mqprio.qopt.hw; } else { netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) @@ -380,6 +523,33 @@ nla_put_failure: return -1; } +static int mqprio_dump_tc_entries(struct mqprio_sched *priv, + struct sk_buff *skb) +{ + struct nlattr *n; + int tc; + + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) { + n = nla_nest_start(skb, TCA_MQPRIO_TC_ENTRY); + if (!n) + return -EMSGSIZE; + + if (nla_put_u32(skb, TCA_MQPRIO_TC_ENTRY_INDEX, tc)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_MQPRIO_TC_ENTRY_FP, priv->fp[tc])) + goto nla_put_failure; + + nla_nest_end(skb, n); + } + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, n); + return -EMSGSIZE; +} + static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct net_device *dev = qdisc_dev(sch); @@ -387,7 +557,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb); struct tc_mqprio_qopt opt = { 0 }; struct Qdisc *qdisc; - unsigned int ntx, tc; + unsigned int ntx; sch->q.qlen = 0; gnet_stats_basic_sync_init(&sch->bstats); @@ -399,7 +569,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) * qdisc totals are added at end. */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { - qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping); spin_lock_bh(qdisc_lock(qdisc)); gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, @@ -411,15 +581,9 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) spin_unlock_bh(qdisc_lock(qdisc)); } - opt.num_tc = netdev_get_num_tc(dev); - memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); + mqprio_qopt_reconstruct(dev, &opt); opt.hw = priv->hw_offload; - for (tc = 0; tc < netdev_get_num_tc(dev); tc++) { - opt.count[tc] = dev->tc_to_txq[tc].count; - opt.offset[tc] = dev->tc_to_txq[tc].offset; - } - if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure; @@ -436,6 +600,9 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) (dump_rates(priv, &opt, skb) != 0)) goto nla_put_failure; + if (mqprio_dump_tc_entries(priv, skb)) + goto nla_put_failure; + return nla_nest_end(skb, nla); nla_put_failure: nlmsg_trim(skb, nla); @@ -449,7 +616,7 @@ static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl) if (!dev_queue) return NULL; - return dev_queue->qdisc_sleeping; + return rtnl_dereference(dev_queue->qdisc_sleeping); } static unsigned long mqprio_find(struct Qdisc *sch, u32 classid) @@ -482,7 +649,7 @@ static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_parent = (tc < 0) ? 0 : TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(tc + TC_H_MIN_PRIORITY)); - tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; } else { tcm->tcm_parent = TC_H_ROOT; tcm->tcm_info = 0; @@ -538,7 +705,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, } else { struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); - sch = dev_queue->qdisc_sleeping; + sch = rtnl_dereference(dev_queue->qdisc_sleeping); if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) @@ -605,6 +772,7 @@ static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { .dump = mqprio_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("mqprio"); static int __init mqprio_module_init(void) { @@ -620,3 +788,4 @@ module_init(mqprio_module_init); module_exit(mqprio_module_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Classful multiqueue prio qdisc"); |
