diff options
Diffstat (limited to 'net/sched/sch_taprio.c')
| -rw-r--r-- | net/sched/sch_taprio.c | 257 |
1 files changed, 141 insertions, 116 deletions
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 717ae51d94a0..300d577b3286 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -40,7 +40,14 @@ static struct static_key_false taprio_have_working_mqprio; #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) #define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) +#define TAPRIO_SUPPORTED_FLAGS \ + (TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) #define TAPRIO_FLAGS_INVALID U32_MAX +/* Minimum value for picos_per_byte to ensure non-zero duration + * for minimum-sized Ethernet frames (ETH_ZLEN = 60). + * 60 * 17 > PSEC_PER_NSEC (1000) + */ +#define TAPRIO_PICOS_PER_BYTE_MIN 17 struct sched_entry { /* Durations between this GCL entry and the GCL entry where the @@ -408,19 +415,6 @@ static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch) return entry; } -static bool taprio_flags_valid(u32 flags) -{ - /* Make sure no other flag bits are set. */ - if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | - TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) - return false; - /* txtime-assist and full offload are mutually exclusive */ - if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) && - (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) - return false; - return true; -} - /* This returns the tstamp value set by TCP in terms of the set clock. */ static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb) { @@ -601,6 +595,7 @@ static int taprio_enqueue_segmented(struct sk_buff *skb, struct Qdisc *sch, skb_list_walk_safe(segs, segs, nskb) { skb_mark_not_on_list(segs); qdisc_skb_cb(segs)->pkt_len = segs->len; + qdisc_skb_cb(segs)->pkt_segs = 1; slen += segs->len; /* FIXME: we should be segmenting to a smaller size @@ -1008,13 +1003,19 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { - [TCA_TAPRIO_TC_ENTRY_INDEX] = { .type = NLA_U32 }, + [TCA_TAPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32, + TC_QOPT_MAX_QUEUE - 1), [TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 }, [TCA_TAPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32, TC_FP_EXPRESS, TC_FP_PREEMPTIBLE), }; +static const struct netlink_range_validation_signed taprio_cycle_time_range = { + .min = 0, + .max = INT_MAX, +}; + static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) @@ -1023,9 +1024,11 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, - [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 }, + [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = + NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range), [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, - [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 }, + [TCA_TAPRIO_ATTR_FLAGS] = + NLA_POLICY_MASK(NLA_U32, TAPRIO_SUPPORTED_FLAGS), [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 }, [TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED }, }; @@ -1154,14 +1157,19 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, list_for_each_entry(entry, &new->entries, list) cycle = ktime_add_ns(cycle, entry->interval); - if (!cycle) { - NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0"); + if (cycle < 0 || cycle > INT_MAX) { + NL_SET_ERR_MSG(extack, "'cycle_time' is too big"); return -EINVAL; } new->cycle_time = cycle; } + if (new->cycle_time < new->num_entries * length_to_duration(q, ETH_ZLEN)) { + NL_SET_ERR_MSG(extack, "'cycle_time' is too small"); + return -EINVAL; + } + taprio_calculate_gate_durations(q, new); return 0; @@ -1174,16 +1182,13 @@ static int taprio_parse_mqprio_opt(struct net_device *dev, { bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags); - if (!qopt && !dev->num_tc) { - NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); - return -EINVAL; - } - - /* If num_tc is already set, it means that the user already - * configured the mqprio part - */ - if (dev->num_tc) + if (!qopt) { + if (!dev->num_tc) { + NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); + return -EINVAL; + } return 0; + } /* taprio imposes that traffic classes map 1:n to tx queues */ if (qopt->num_tc > dev->num_tx_queues) { @@ -1285,7 +1290,8 @@ static void taprio_start_sched(struct Qdisc *sch, } static void taprio_set_picos_per_byte(struct net_device *dev, - struct taprio_sched *q) + struct taprio_sched *q, + struct netlink_ext_ack *extack) { struct ethtool_link_ksettings ecmd; int speed = SPEED_10; @@ -1301,6 +1307,15 @@ static void taprio_set_picos_per_byte(struct net_device *dev, skip: picos_per_byte = (USEC_PER_SEC * 8) / speed; + if (picos_per_byte < TAPRIO_PICOS_PER_BYTE_MIN) { + if (!extack) + pr_warn("Link speed %d is too high. Schedule may be inaccurate.\n", + speed); + NL_SET_ERR_MSG_FMT_MOD(extack, + "Link speed %d is too high. Schedule may be inaccurate.", + speed); + picos_per_byte = TAPRIO_PICOS_PER_BYTE_MIN; + } atomic64_set(&q->picos_per_byte, picos_per_byte); netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", @@ -1325,17 +1340,19 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, if (dev != qdisc_dev(q->root)) continue; - taprio_set_picos_per_byte(dev, q); + taprio_set_picos_per_byte(dev, q, NULL); stab = rtnl_dereference(q->root->stab); - oper = rtnl_dereference(q->oper_sched); + rcu_read_lock(); + oper = rcu_dereference(q->oper_sched); if (oper) taprio_update_queue_max_sdu(q, oper, stab); - admin = rtnl_dereference(q->admin_sched); + admin = rcu_dereference(q->admin_sched); if (admin) taprio_update_queue_max_sdu(q, admin, stab); + rcu_read_unlock(); break; } @@ -1347,7 +1364,7 @@ static void setup_txtime(struct taprio_sched *q, struct sched_gate_list *sched, ktime_t base) { struct sched_entry *entry; - u32 interval = 0; + u64 interval = 0; list_for_each_entry(entry, &sched->entries, list) { entry->next_txtime = ktime_add_ns(base, interval); @@ -1611,7 +1628,7 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_ts_info info = { + struct kernel_ethtool_ts_info info = { .cmd = ETHTOOL_GET_TS_INFO, .phc_index = -1, }; @@ -1697,19 +1714,15 @@ static int taprio_parse_tc_entry(struct Qdisc *sch, if (err < 0) return err; - if (!tb[TCA_TAPRIO_TC_ENTRY_INDEX]) { + if (NL_REQ_ATTR_CHECK(extack, opt, tb, TCA_TAPRIO_TC_ENTRY_INDEX)) { NL_SET_ERR_MSG_MOD(extack, "TC entry index missing"); return -EINVAL; } tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]); - if (tc >= TC_QOPT_MAX_QUEUE) { - NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range"); - return -ERANGE; - } - if (*seen_tcs & BIT(tc)) { - NL_SET_ERR_MSG_MOD(extack, "Duplicate TC entry"); + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_TAPRIO_TC_ENTRY_INDEX], + "Duplicate tc entry"); return -EINVAL; } @@ -1750,10 +1763,7 @@ static int taprio_parse_tc_entries(struct Qdisc *sch, fp[tc] = q->fp[tc]; } - nla_for_each_nested(n, opt, rem) { - if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY) - continue; - + nla_for_each_nested_type(n, TCA_TAPRIO_ATTR_TC_ENTRY, opt, rem) { err = taprio_parse_tc_entry(sch, n, max_sdu, fp, &seen_tcs, extack); if (err) @@ -1804,33 +1814,6 @@ static int taprio_mqprio_cmp(const struct net_device *dev, return 0; } -/* The semantics of the 'flags' argument in relation to 'change()' - * requests, are interpreted following two rules (which are applied in - * this order): (1) an omitted 'flags' argument is interpreted as - * zero; (2) the 'flags' of a "running" taprio instance cannot be - * changed. - */ -static int taprio_new_flags(const struct nlattr *attr, u32 old, - struct netlink_ext_ack *extack) -{ - u32 new = 0; - - if (attr) - new = nla_get_u32(attr); - - if (old != TAPRIO_FLAGS_INVALID && old != new) { - NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); - return -EOPNOTSUPP; - } - - if (!taprio_flags_valid(new)) { - NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); - return -EINVAL; - } - - return new; -} - static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -1841,6 +1824,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct net_device *dev = qdisc_dev(sch); struct tc_mqprio_qopt *mqprio = NULL; unsigned long flags; + u32 taprio_flags; ktime_t start; int i, err; @@ -1852,12 +1836,31 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); - err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS], - q->flags, extack); - if (err < 0) - return err; + /* The semantics of the 'flags' argument in relation to 'change()' + * requests, are interpreted following two rules (which are applied in + * this order): (1) an omitted 'flags' argument is interpreted as + * zero; (2) the 'flags' of a "running" taprio instance cannot be + * changed. + */ + taprio_flags = nla_get_u32_default(tb[TCA_TAPRIO_ATTR_FLAGS], 0); - q->flags = err; + /* txtime-assist and full offload are mutually exclusive */ + if ((taprio_flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) && + (taprio_flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_TAPRIO_ATTR_FLAGS], + "TXTIME_ASSIST and FULL_OFFLOAD are mutually exclusive"); + return -EINVAL; + } + + if (q->flags != TAPRIO_FLAGS_INVALID && q->flags != taprio_flags) { + NL_SET_ERR_MSG_MOD(extack, + "Changing 'flags' of a running schedule is not supported"); + return -EOPNOTSUPP; + } + q->flags = taprio_flags; + + /* Needed for length_to_duration() during netlink attribute parsing */ + taprio_set_picos_per_byte(dev, q, extack); err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags); if (err < 0) @@ -1918,7 +1921,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (err < 0) goto free_sched; - taprio_set_picos_per_byte(dev, q); taprio_update_queue_max_sdu(q, new_admin, stab); if (FULL_OFFLOAD_IS_ENABLED(q->flags)) @@ -1944,8 +1946,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (!TXTIME_ASSIST_IS_ENABLED(q->flags) && !FULL_OFFLOAD_IS_ENABLED(q->flags) && !hrtimer_active(&q->advance_timer)) { - hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); - q->advance_timer.function = advance_sched; + hrtimer_setup(&q->advance_timer, advance_sched, q->clockid, HRTIMER_MODE_ABS); } err = taprio_get_start_time(sch, new_admin, &start); @@ -1964,7 +1965,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto unlock; } - rcu_assign_pointer(q->admin_sched, new_admin); + /* Not going to race against advance_sched(), but still */ + admin = rcu_replace_pointer(q->admin_sched, new_admin, + lockdep_rtnl_is_held()); if (admin) call_rcu(&admin->rcu, taprio_free_sched_cb); } else { @@ -1975,7 +1978,8 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, taprio_start_sched(sch, start, new_admin); - rcu_assign_pointer(q->admin_sched, new_admin); + admin = rcu_replace_pointer(q->admin_sched, new_admin, + lockdep_rtnl_is_held()); if (admin) call_rcu(&admin->rcu, taprio_free_sched_cb); @@ -2065,8 +2069,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, spin_lock_init(&q->current_entry_lock); - hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); - q->advance_timer.function = advance_sched; + hrtimer_setup(&q->advance_timer, advance_sched, CLOCK_TAI, HRTIMER_MODE_ABS); q->root = sch; @@ -2088,11 +2091,8 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, return -EOPNOTSUPP; } - /* pre-allocate qdisc, attachment can't fail */ - q->qdiscs = kcalloc(dev->num_tx_queues, - sizeof(q->qdiscs[0]), + q->qdiscs = kcalloc(dev->num_tx_queues, sizeof(q->qdiscs[0]), GFP_KERNEL); - if (!q->qdiscs) return -ENOMEM; @@ -2134,25 +2134,32 @@ static void taprio_attach(struct Qdisc *sch) /* Attach underlying qdisc */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { - struct Qdisc *qdisc = q->qdiscs[ntx]; - struct Qdisc *old; + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *old, *dev_queue_qdisc; if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { + struct Qdisc *qdisc = q->qdiscs[ntx]; + + /* In offload mode, the root taprio qdisc is bypassed + * and the netdev TX queues see the children directly + */ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; - old = dev_graft_qdisc(qdisc->dev_queue, qdisc); + dev_queue_qdisc = qdisc; } else { - old = dev_graft_qdisc(qdisc->dev_queue, sch); - qdisc_refcount_inc(sch); + /* In software mode, attach the root taprio qdisc + * to all netdev TX queues, so that dev_qdisc_enqueue() + * goes through taprio_enqueue(). + */ + dev_queue_qdisc = sch; } + old = dev_graft_qdisc(dev_queue, dev_queue_qdisc); + /* The qdisc's refcount requires to be elevated once + * for each netdev TX queue it is grafted onto + */ + qdisc_refcount_inc(dev_queue_qdisc); if (old) qdisc_put(old); } - - /* access to the child qdiscs is not needed in offload mode */ - if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { - kfree(q->qdiscs); - q->qdiscs = NULL; - } } static struct netdev_queue *taprio_queue_get(struct Qdisc *sch, @@ -2181,13 +2188,23 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl, if (dev->flags & IFF_UP) dev_deactivate(dev); + /* In offload mode, the child Qdisc is directly attached to the netdev + * TX queue, and thus, we need to keep its refcount elevated in order + * to counteract qdisc_graft()'s call to qdisc_put() once per TX queue. + * However, save the reference to the new qdisc in the private array in + * both software and offload cases, to have an up-to-date reference to + * our children. + */ + *old = q->qdiscs[cl - 1]; if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { - *old = dev_graft_qdisc(dev_queue, new); - } else { - *old = q->qdiscs[cl - 1]; - q->qdiscs[cl - 1] = new; + WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old); + if (new) + qdisc_refcount_inc(new); + if (*old) + qdisc_put(*old); } + q->qdiscs[cl - 1] = new; if (new) new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; @@ -2369,9 +2386,6 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_mqprio_qopt opt = { 0 }; struct nlattr *nest, *sched_nest; - oper = rtnl_dereference(q->oper_sched); - admin = rtnl_dereference(q->admin_sched); - mqprio_qopt_reconstruct(dev, &opt); nest = nla_nest_start_noflag(skb, TCA_OPTIONS); @@ -2392,18 +2406,23 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay)) goto options_error; + rcu_read_lock(); + + oper = rtnl_dereference(q->oper_sched); + admin = rtnl_dereference(q->admin_sched); + if (oper && taprio_dump_tc_entries(skb, q, oper)) - goto options_error; + goto options_error_rcu; if (oper && dump_schedule(skb, oper)) - goto options_error; + goto options_error_rcu; if (!admin) goto done; sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED); if (!sched_nest) - goto options_error; + goto options_error_rcu; if (dump_schedule(skb, admin)) goto admin_error; @@ -2411,11 +2430,15 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) nla_nest_end(skb, sched_nest); done: + rcu_read_unlock(); return nla_nest_end(skb, nest); admin_error: nla_nest_cancel(skb, sched_nest); +options_error_rcu: + rcu_read_unlock(); + options_error: nla_nest_cancel(skb, nest); @@ -2425,12 +2448,14 @@ start_error: static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) { - struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + unsigned int ntx = cl - 1; - if (!dev_queue) + if (ntx >= dev->num_tx_queues) return NULL; - return rtnl_dereference(dev_queue->qdisc_sleeping); + return q->qdiscs[ntx]; } static unsigned long taprio_find(struct Qdisc *sch, u32 classid) @@ -2445,11 +2470,11 @@ static unsigned long taprio_find(struct Qdisc *sch, u32 classid) static int taprio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { - struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + struct Qdisc *child = taprio_leaf(sch, cl); tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle |= TC_H_MIN(cl); - tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; + tcm->tcm_info = child->handle; return 0; } @@ -2459,16 +2484,14 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, __releases(d->lock) __acquires(d->lock) { - struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + struct Qdisc *child = taprio_leaf(sch, cl); struct tc_taprio_qopt_offload offload = { .cmd = TAPRIO_CMD_QUEUE_STATS, .queue_stats = { .queue = cl - 1, }, }; - struct Qdisc *child; - child = rtnl_dereference(dev_queue->qdisc_sleeping); if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 || qdisc_qstats_copy(d, child) < 0) return -1; @@ -2523,6 +2546,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { .dump_stats = taprio_dump_stats, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("taprio"); static struct notifier_block taprio_device_notifier = { .notifier_call = taprio_dev_notifier, @@ -2547,3 +2571,4 @@ static void __exit taprio_module_exit(void) module_init(taprio_module_init); module_exit(taprio_module_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Time Aware Priority qdisc"); |
