summaryrefslogtreecommitdiff
path: root/net/sched/sch_api.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched/sch_api.c')
-rw-r--r--net/sched/sch_api.c667
1 files changed, 436 insertions, 231 deletions
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 72d2c204d5f3..f56b18c8aebf 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -25,7 +25,9 @@
#include <linux/hrtimer.h>
#include <linux/slab.h>
#include <linux/hashtable.h>
+#include <linux/bpf.h>
+#include <net/netdev_lock.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/netlink.h>
@@ -206,7 +208,7 @@ static struct Qdisc_ops *qdisc_lookup_default(const char *name)
for (q = qdisc_base; q; q = q->next) {
if (!strcmp(name, q->id)) {
- if (!try_module_get(q->owner))
+ if (!bpf_try_module_get(q, q->owner))
q = NULL;
break;
}
@@ -228,7 +230,7 @@ int qdisc_set_default(const char *name)
if (!ops) {
/* Not found, drop lock and try to load module */
write_unlock(&qdisc_mod_lock);
- request_module("sch_%s", name);
+ request_module(NET_SCH_ALIAS_PREFIX "%s", name);
write_lock(&qdisc_mod_lock);
ops = qdisc_lookup_default(name);
@@ -236,7 +238,7 @@ int qdisc_set_default(const char *name)
if (ops) {
/* Set new default */
- module_put(default_qdisc_ops->owner);
+ bpf_module_put(default_qdisc_ops, default_qdisc_ops->owner);
default_qdisc_ops = ops;
}
write_unlock(&qdisc_mod_lock);
@@ -309,7 +311,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
if (dev_ingress_queue(dev))
q = qdisc_match_from_root(
- dev_ingress_queue(dev)->qdisc_sleeping,
+ rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping),
handle);
out:
return q;
@@ -328,22 +330,28 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
nq = dev_ingress_queue_rcu(dev);
if (nq)
- q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
+ q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping),
+ handle);
out:
return q;
}
-static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
+static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid,
+ struct netlink_ext_ack *extack)
{
unsigned long cl;
const struct Qdisc_class_ops *cops = p->ops->cl_ops;
- if (cops == NULL)
- return NULL;
+ if (cops == NULL) {
+ NL_SET_ERR_MSG(extack, "Parent qdisc is not classful");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
cl = cops->find(p, classid);
- if (cl == 0)
- return NULL;
+ if (cl == 0) {
+ NL_SET_ERR_MSG(extack, "Specified class not found");
+ return ERR_PTR(-ENOENT);
+ }
return cops->leaf(p, cl);
}
@@ -357,7 +365,7 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
read_lock(&qdisc_mod_lock);
for (q = qdisc_base; q; q = q->next) {
if (nla_strcmp(kind, q->id) == 0) {
- if (!try_module_get(q->owner))
+ if (!bpf_try_module_get(q, q->owner))
q = NULL;
break;
}
@@ -423,7 +431,7 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
- !memcmp(&rtab->data, nla_data(tab), 1024)) {
+ !memcmp(&rtab->data, nla_data(tab), TC_RTAB_SIZE)) {
rtab->refcnt++;
return rtab;
}
@@ -433,7 +441,7 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
if (rtab) {
rtab->rate = *r;
rtab->refcnt = 1;
- memcpy(rtab->data, nla_data(tab), 1024);
+ memcpy(rtab->data, nla_data(tab), TC_RTAB_SIZE);
if (r->linklayer == TC_LINKLAYER_UNAWARE)
r->linklayer = __detect_linklayer(r, rtab->data);
rtab->next = qdisc_rtab_list;
@@ -592,17 +600,6 @@ out:
pkt_len = 1;
qdisc_skb_cb(skb)->pkt_len = pkt_len;
}
-EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
-
-void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
-{
- if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
- pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
- txt, qdisc->ops->id, qdisc->handle >> 16);
- qdisc->flags |= TCQ_F_WARN_NONWC;
- }
-}
-EXPORT_SYMBOL(qdisc_warn_nonwc);
static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
{
@@ -619,8 +616,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
clockid_t clockid)
{
- hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
- wd->timer.function = qdisc_watchdog;
+ hrtimer_setup(&wd->timer, qdisc_watchdog, clockid, HRTIMER_MODE_ABS_PINNED);
wd->qdisc = qdisc;
}
EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
@@ -634,19 +630,26 @@ EXPORT_SYMBOL(qdisc_watchdog_init);
void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
u64 delta_ns)
{
- if (test_bit(__QDISC_STATE_DEACTIVATED,
- &qdisc_root_sleeping(wd->qdisc)->state))
+ bool deactivated;
+
+ rcu_read_lock();
+ deactivated = test_bit(__QDISC_STATE_DEACTIVATED,
+ &qdisc_root_sleeping(wd->qdisc)->state);
+ rcu_read_unlock();
+ if (deactivated)
return;
if (hrtimer_is_queued(&wd->timer)) {
+ u64 softexpires;
+
+ softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer));
/* If timer is already set in [expires, expires + delta_ns],
* do not reprogram it.
*/
- if (wd->last_expires - expires <= delta_ns)
+ if (softexpires - expires <= delta_ns)
return;
}
- wd->last_expires = expires;
hrtimer_start_range_ns(&wd->timer,
ns_to_ktime(expires),
delta_ns,
@@ -772,42 +775,33 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
{
- bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
bool notify;
int drops;
- if (n == 0 && len == 0)
- return;
drops = max_t(int, n, 0);
rcu_read_lock();
while ((parentid = sch->parent)) {
- if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
+ if (parentid == TC_H_ROOT)
break;
if (sch->flags & TCQ_F_NOPARENT)
break;
- /* Notify parent qdisc only if child qdisc becomes empty.
- *
- * If child was empty even before update then backlog
- * counter is screwed and we skip notification because
- * parent class is already passive.
- *
- * If the original child was offloaded then it is allowed
- * to be seem as empty, so the parent is notified anyway.
- */
- notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
- !qdisc_is_offloaded);
+ /* Notify parent qdisc only if child qdisc becomes empty. */
+ notify = !sch->q.qlen;
/* TODO: perform the search on a per txq basis */
- sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
+ sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
WARN_ON_ONCE(parentid != TC_H_ROOT);
break;
}
cops = sch->ops->cl_ops;
if (notify && cops->qlen_notify) {
+ /* Note that qlen_notify must be idempotent as it may get called
+ * multiple times.
+ */
cl = cops->find(sch, parentid);
cops->qlen_notify(sch, cl);
}
@@ -902,7 +896,8 @@ static void qdisc_offload_graft_root(struct net_device *dev,
}
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
- u32 portid, u32 seq, u16 flags, int event)
+ u32 portid, u32 seq, u16 flags, int event,
+ struct netlink_ext_ack *extack)
{
struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
struct gnet_stats_queue __percpu *cpu_qstats = NULL;
@@ -970,7 +965,12 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
if (gnet_stats_finish_copy(&d) < 0)
goto nla_put_failure;
+ if (extack && extack->_msg &&
+ nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
+ goto out_nlmsg_trim;
+
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+
return skb->len;
out_nlmsg_trim:
@@ -989,25 +989,55 @@ static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
return false;
}
+static int qdisc_get_notify(struct net *net, struct sk_buff *oskb,
+ struct nlmsghdr *n, u32 clid, struct Qdisc *q,
+ struct netlink_ext_ack *extack)
+{
+ struct sk_buff *skb;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (!tc_qdisc_dump_ignore(q, false)) {
+ if (tc_fill_qdisc(skb, q, clid, portid, n->nlmsg_seq, 0,
+ RTM_NEWQDISC, extack) < 0)
+ goto err_out;
+ }
+
+ if (skb->len)
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+
+err_out:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
static int qdisc_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, u32 clid,
- struct Qdisc *old, struct Qdisc *new)
+ struct Qdisc *old, struct Qdisc *new,
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
+ return 0;
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
if (old && !tc_qdisc_dump_ignore(old, false)) {
if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
- 0, RTM_DELQDISC) < 0)
+ 0, RTM_DELQDISC, extack) < 0)
goto err_out;
}
if (new && !tc_qdisc_dump_ignore(new, false)) {
if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
- old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+ old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0)
goto err_out;
}
@@ -1022,10 +1052,11 @@ err_out:
static void notify_and_destroy(struct net *net, struct sk_buff *skb,
struct nlmsghdr *n, u32 clid,
- struct Qdisc *old, struct Qdisc *new)
+ struct Qdisc *old, struct Qdisc *new,
+ struct netlink_ext_ack *extack)
{
if (new || old)
- qdisc_notify(net, skb, n, clid, old, new);
+ qdisc_notify(net, skb, n, clid, old, new, extack);
if (old)
qdisc_put(old);
@@ -1063,17 +1094,29 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
if (parent == NULL) {
unsigned int i, num_q, ingress;
+ struct netdev_queue *dev_queue;
ingress = 0;
num_q = dev->num_tx_queues;
if ((q && q->flags & TCQ_F_INGRESS) ||
(new && new->flags & TCQ_F_INGRESS)) {
- num_q = 1;
ingress = 1;
- if (!dev_ingress_queue(dev)) {
+ dev_queue = dev_ingress_queue(dev);
+ if (!dev_queue) {
NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
return -ENOENT;
}
+
+ q = rtnl_dereference(dev_queue->qdisc_sleeping);
+
+ /* This is the counterpart of that qdisc_refcount_inc_nz() call in
+ * __tcf_qdisc_find() for filter requests.
+ */
+ if (!qdisc_refcount_dec_if_one(q)) {
+ NL_SET_ERR_MSG(extack,
+ "Current ingress or clsact Qdisc has ongoing filter requests");
+ return -EBUSY;
+ }
}
if (dev->flags & IFF_UP)
@@ -1084,18 +1127,26 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
if (new && new->ops->attach && !ingress)
goto skip;
- for (i = 0; i < num_q; i++) {
- struct netdev_queue *dev_queue = dev_ingress_queue(dev);
-
- if (!ingress)
+ if (!ingress) {
+ for (i = 0; i < num_q; i++) {
dev_queue = netdev_get_tx_queue(dev, i);
+ old = dev_graft_qdisc(dev_queue, new);
- old = dev_graft_qdisc(dev_queue, new);
- if (new && i > 0)
- qdisc_refcount_inc(new);
-
- if (!ingress)
+ if (new && i > 0)
+ qdisc_refcount_inc(new);
qdisc_put(old);
+ }
+ } else {
+ old = dev_graft_qdisc(dev_queue, NULL);
+
+ /* {ingress,clsact}_destroy() @old before grafting @new to avoid
+ * unprotected concurrent accesses to net_device::miniq_{in,e}gress
+ * pointer(s) in mini_qdisc_pair_swap().
+ */
+ qdisc_notify(net, skb, n, classid, old, new, extack);
+ qdisc_destroy(old);
+
+ dev_graft_qdisc(dev_queue, new);
}
skip:
@@ -1105,12 +1156,10 @@ skip:
qdisc_refcount_inc(new);
rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
- notify_and_destroy(net, skb, n, classid, old, new);
+ notify_and_destroy(net, skb, n, classid, old, new, extack);
if (new && new->ops->attach)
new->ops->attach(new);
- } else {
- notify_and_destroy(net, skb, n, classid, old, new);
}
if (dev->flags & IFF_UP)
@@ -1138,10 +1187,16 @@ skip:
return -EINVAL;
}
+ if (new &&
+ !(parent->flags & TCQ_F_MQROOT) &&
+ rcu_access_pointer(new->stab)) {
+ NL_SET_ERR_MSG(extack, "STAB not supported on a non root");
+ return -EINVAL;
+ }
err = cops->graft(parent, cl, new, &old, extack);
if (err)
return err;
- notify_and_destroy(net, skb, n, classid, old, new);
+ notify_and_destroy(net, skb, n, classid, old, new, extack);
}
return 0;
}
@@ -1199,36 +1254,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
struct qdisc_size_table *stab;
ops = qdisc_lookup_ops(kind);
-#ifdef CONFIG_MODULES
- if (ops == NULL && kind != NULL) {
- char name[IFNAMSIZ];
- if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
- /* We dropped the RTNL semaphore in order to
- * perform the module load. So, even if we
- * succeeded in loading the module we have to
- * tell the caller to replay the request. We
- * indicate this using -EAGAIN.
- * We replay the request because the device may
- * go away in the mean time.
- */
- rtnl_unlock();
- request_module("sch_%s", name);
- rtnl_lock();
- ops = qdisc_lookup_ops(kind);
- if (ops != NULL) {
- /* We will try again qdisc_lookup_ops,
- * so don't keep a reference.
- */
- module_put(ops->owner);
- err = -EAGAIN;
- goto err_out;
- }
- }
- }
-#endif
-
- err = -ENOENT;
if (!ops) {
+ err = -ENOENT;
NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
goto err_out;
}
@@ -1242,7 +1269,12 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
sch->parent = parent;
if (handle == TC_H_INGRESS) {
- sch->flags |= TCQ_F_INGRESS;
+ if (!(sch->flags & TCQ_F_INGRESS)) {
+ NL_SET_ERR_MSG(extack,
+ "Specified parent ID is reserved for ingress and clsact Qdiscs");
+ err = -EINVAL;
+ goto err_out3;
+ }
handle = TC_H_MAKE(TC_H_INGRESS, 0);
} else {
if (handle == 0) {
@@ -1266,7 +1298,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
* before again attaching a qdisc.
*/
if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
- dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
+ WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN);
netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
}
@@ -1274,20 +1306,21 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
if (err)
goto err_out3;
- if (ops->init) {
- err = ops->init(sch, tca[TCA_OPTIONS], extack);
- if (err != 0)
- goto err_out5;
- }
-
if (tca[TCA_STAB]) {
stab = qdisc_get_stab(tca[TCA_STAB], extack);
if (IS_ERR(stab)) {
err = PTR_ERR(stab);
- goto err_out4;
+ goto err_out3;
}
rcu_assign_pointer(sch->stab, stab);
}
+
+ if (ops->init) {
+ err = ops->init(sch, tca[TCA_OPTIONS], extack);
+ if (err != 0)
+ goto err_out4;
+ }
+
if (tca[TCA_RATE]) {
err = -EOPNOTSUPP;
if (sch->flags & TCQ_F_MQROOT) {
@@ -1312,28 +1345,22 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
return sch;
-err_out5:
- /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
+err_out4:
+ /* Even if ops->init() failed, we call ops->destroy()
+ * like qdisc_create_dflt().
+ */
if (ops->destroy)
ops->destroy(sch);
+ qdisc_put_stab(rtnl_dereference(sch->stab));
err_out3:
+ lockdep_unregister_key(&sch->root_lock_key);
netdev_put(dev, &sch->dev_tracker);
qdisc_free(sch);
err_out2:
- module_put(ops->owner);
+ bpf_module_put(ops, ops->owner);
err_out:
*errp = err;
return NULL;
-
-err_out4:
- /*
- * Any broken qdiscs that would require a ops->reset() here?
- * The qdisc was never in action so it shouldn't be necessary.
- */
- qdisc_put_stab(rtnl_dereference(sch->stab));
- if (ops->destroy)
- ops->destroy(sch);
- goto err_out3;
}
static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
@@ -1437,27 +1464,18 @@ const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
* Delete/get qdisc.
*/
-static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
- struct netlink_ext_ack *extack)
+static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack,
+ struct net_device *dev,
+ struct nlattr *tca[TCA_MAX + 1],
+ struct tcmsg *tcm)
{
struct net *net = sock_net(skb->sk);
- struct tcmsg *tcm = nlmsg_data(n);
- struct nlattr *tca[TCA_MAX + 1];
- struct net_device *dev;
- u32 clid;
struct Qdisc *q = NULL;
struct Qdisc *p = NULL;
+ u32 clid;
int err;
- err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
- rtm_tca_policy, extack);
- if (err < 0)
- return err;
-
- dev = __dev_get_by_index(net, tcm->tcm_ifindex);
- if (!dev)
- return -ENODEV;
-
clid = tcm->tcm_parent;
if (clid) {
if (clid != TC_H_ROOT) {
@@ -1467,9 +1485,9 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
return -ENOENT;
}
- q = qdisc_leaf(p, clid);
+ q = qdisc_leaf(p, clid, extack);
} else if (dev_ingress_queue(dev)) {
- q = dev_ingress_queue(dev)->qdisc_sleeping;
+ q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
}
} else {
q = rtnl_dereference(dev->qdisc);
@@ -1478,6 +1496,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
return -ENOENT;
}
+ if (IS_ERR(q))
+ return PTR_ERR(q);
if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
NL_SET_ERR_MSG(extack, "Invalid handle");
@@ -1492,7 +1512,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
}
if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
- NL_SET_ERR_MSG(extack, "Invalid qdisc name");
+ NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
return -EINVAL;
}
@@ -1509,41 +1529,67 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
if (err != 0)
return err;
} else {
- qdisc_notify(net, skb, n, clid, NULL, q);
+ qdisc_get_notify(net, skb, n, clid, q, NULL);
}
return 0;
}
-/*
- * Create/change qdisc.
- */
-
-static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
- struct netlink_ext_ack *extack)
+static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
- struct tcmsg *tcm;
+ struct tcmsg *tcm = nlmsg_data(n);
struct nlattr *tca[TCA_MAX + 1];
struct net_device *dev;
- u32 clid;
- struct Qdisc *q, *p;
int err;
-replay:
- /* Reinit, just in case something touches this. */
err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
rtm_tca_policy, extack);
if (err < 0)
return err;
- tcm = nlmsg_data(n);
- clid = tcm->tcm_parent;
- q = p = NULL;
-
dev = __dev_get_by_index(net, tcm->tcm_ifindex);
if (!dev)
return -ENODEV;
+ netdev_lock_ops(dev);
+ err = __tc_get_qdisc(skb, n, extack, dev, tca, tcm);
+ netdev_unlock_ops(dev);
+
+ return err;
+}
+
+static bool req_create_or_replace(struct nlmsghdr *n)
+{
+ return (n->nlmsg_flags & NLM_F_CREATE &&
+ n->nlmsg_flags & NLM_F_REPLACE);
+}
+
+static bool req_create_exclusive(struct nlmsghdr *n)
+{
+ return (n->nlmsg_flags & NLM_F_CREATE &&
+ n->nlmsg_flags & NLM_F_EXCL);
+}
+
+static bool req_change(struct nlmsghdr *n)
+{
+ return (!(n->nlmsg_flags & NLM_F_CREATE) &&
+ !(n->nlmsg_flags & NLM_F_REPLACE) &&
+ !(n->nlmsg_flags & NLM_F_EXCL));
+}
+
+static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack,
+ struct net_device *dev,
+ struct nlattr *tca[TCA_MAX + 1],
+ struct tcmsg *tcm)
+{
+ struct Qdisc *q = NULL;
+ struct Qdisc *p = NULL;
+ u32 clid;
+ int err;
+
+ clid = tcm->tcm_parent;
if (clid) {
if (clid != TC_H_ROOT) {
@@ -1553,9 +1599,16 @@ replay:
NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
return -ENOENT;
}
- q = qdisc_leaf(p, clid);
+ if (p->flags & TCQ_F_INGRESS) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot add children to ingress/clsact qdisc");
+ return -EOPNOTSUPP;
+ }
+ q = qdisc_leaf(p, clid, extack);
+ if (IS_ERR(q))
+ return PTR_ERR(q);
} else if (dev_ingress_queue_create(dev)) {
- q = dev_ingress_queue(dev)->qdisc_sleeping;
+ q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
}
} else {
q = rtnl_dereference(dev->qdisc);
@@ -1578,13 +1631,22 @@ replay:
q = qdisc_lookup(dev, tcm->tcm_handle);
if (!q)
goto create_n_graft;
+ if (q->parent != tcm->tcm_parent) {
+ NL_SET_ERR_MSG(extack, "Cannot move an existing qdisc to a different parent");
+ return -EINVAL;
+ }
if (n->nlmsg_flags & NLM_F_EXCL) {
NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
return -EEXIST;
}
if (tca[TCA_KIND] &&
nla_strcmp(tca[TCA_KIND], q->ops->id)) {
- NL_SET_ERR_MSG(extack, "Invalid qdisc name");
+ NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
+ return -EINVAL;
+ }
+ if (q->flags & TCQ_F_INGRESS) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot regraft ingress or clsact Qdiscs");
return -EINVAL;
}
if (q == p ||
@@ -1592,6 +1654,10 @@ replay:
NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
return -ELOOP;
}
+ if (clid == TC_H_INGRESS) {
+ NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
+ return -EINVAL;
+ }
qdisc_refcount_inc(q);
goto graft;
} else {
@@ -1602,27 +1668,35 @@ replay:
*
* We know, that some child q is already
* attached to this parent and have choice:
- * either to change it or to create/graft new one.
+ * 1) change it or 2) create/graft new one.
+ * If the requested qdisc kind is different
+ * than the existing one, then we choose graft.
+ * If they are the same then this is "change"
+ * operation - just let it fallthrough..
*
* 1. We are allowed to create/graft only
- * if CREATE and REPLACE flags are set.
+ * if the request is explicitly stating
+ * "please create if it doesn't exist".
*
- * 2. If EXCL is set, requestor wanted to say,
- * that qdisc tcm_handle is not expected
+ * 2. If the request is to exclusive create
+ * then the qdisc tcm_handle is not expected
* to exist, so that we choose create/graft too.
*
* 3. The last case is when no flags are set.
+ * This will happen when for example tc
+ * utility issues a "change" command.
* Alas, it is sort of hole in API, we
* cannot decide what to do unambiguously.
- * For now we select create/graft, if
- * user gave KIND, which does not match existing.
+ * For now we select create/graft.
*/
- if ((n->nlmsg_flags & NLM_F_CREATE) &&
- (n->nlmsg_flags & NLM_F_REPLACE) &&
- ((n->nlmsg_flags & NLM_F_EXCL) ||
- (tca[TCA_KIND] &&
- nla_strcmp(tca[TCA_KIND], q->ops->id))))
- goto create_n_graft;
+ if (tca[TCA_KIND] &&
+ nla_strcmp(tca[TCA_KIND], q->ops->id)) {
+ if (req_create_or_replace(n) ||
+ req_create_exclusive(n))
+ goto create_n_graft;
+ else if (req_change(n))
+ goto create_n_graft2;
+ }
}
}
} else {
@@ -1643,12 +1717,12 @@ replay:
return -EEXIST;
}
if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
- NL_SET_ERR_MSG(extack, "Invalid qdisc name");
+ NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
return -EINVAL;
}
err = qdisc_change(q, tca, extack);
if (err == 0)
- qdisc_notify(net, skb, n, clid, NULL, q);
+ qdisc_notify(sock_net(skb->sk), skb, n, clid, NULL, q, extack);
return err;
create_n_graft:
@@ -1656,6 +1730,7 @@ create_n_graft:
NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
return -ENOENT;
}
+create_n_graft2:
if (clid == TC_H_INGRESS) {
if (dev_ingress_queue(dev)) {
q = qdisc_create(dev, dev_ingress_queue(dev),
@@ -1679,11 +1754,8 @@ create_n_graft:
tcm->tcm_parent, tcm->tcm_handle,
tca, &err, extack);
}
- if (q == NULL) {
- if (err == -EAGAIN)
- goto replay;
+ if (!q)
return err;
- }
graft:
err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
@@ -1696,6 +1768,58 @@ graft:
return 0;
}
+static void request_qdisc_module(struct nlattr *kind)
+{
+ struct Qdisc_ops *ops;
+ char name[IFNAMSIZ];
+
+ if (!kind)
+ return;
+
+ ops = qdisc_lookup_ops(kind);
+ if (ops) {
+ bpf_module_put(ops, ops->owner);
+ return;
+ }
+
+ if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
+ rtnl_unlock();
+ request_module(NET_SCH_ALIAS_PREFIX "%s", name);
+ rtnl_lock();
+ }
+}
+
+/*
+ * Create/change qdisc.
+ */
+static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tca[TCA_MAX + 1];
+ struct net_device *dev;
+ struct tcmsg *tcm;
+ int err;
+
+ err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
+ rtm_tca_policy, extack);
+ if (err < 0)
+ return err;
+
+ request_qdisc_module(tca[TCA_KIND]);
+
+ tcm = nlmsg_data(n);
+ dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ netdev_lock_ops(dev);
+ err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm);
+ netdev_unlock_ops(dev);
+
+ return err;
+}
+
static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
struct netlink_callback *cb,
int *q_idx_p, int s_q_idx, bool recur,
@@ -1715,7 +1839,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWQDISC) <= 0)
+ RTM_NEWQDISC, NULL) <= 0)
goto done;
q_idx++;
}
@@ -1737,7 +1861,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWQDISC) <= 0)
+ RTM_NEWQDISC, NULL) <= 0)
goto done;
q_idx++;
}
@@ -1780,17 +1904,23 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_q_idx = 0;
q_idx = 0;
+ netdev_lock_ops(dev);
if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
skb, cb, &q_idx, s_q_idx,
- true, tca[TCA_DUMP_INVISIBLE]) < 0)
+ true, tca[TCA_DUMP_INVISIBLE]) < 0) {
+ netdev_unlock_ops(dev);
goto done;
+ }
dev_queue = dev_ingress_queue(dev);
if (dev_queue &&
- tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
- &q_idx, s_q_idx, false,
- tca[TCA_DUMP_INVISIBLE]) < 0)
+ tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
+ skb, cb, &q_idx, s_q_idx, false,
+ tca[TCA_DUMP_INVISIBLE]) < 0) {
+ netdev_unlock_ops(dev);
goto done;
+ }
+ netdev_unlock_ops(dev);
cont:
idx++;
@@ -1810,8 +1940,8 @@ done:
************************************************/
static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
- unsigned long cl,
- u32 portid, u32 seq, u16 flags, int event)
+ unsigned long cl, u32 portid, u32 seq, u16 flags,
+ int event, struct netlink_ext_ack *extack)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -1846,7 +1976,12 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
if (gnet_stats_finish_copy(&d) < 0)
goto nla_put_failure;
+ if (extack && extack->_msg &&
+ nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
+ goto out_nlmsg_trim;
+
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+
return skb->len;
out_nlmsg_trim:
@@ -1857,7 +1992,30 @@ nla_put_failure:
static int tclass_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct Qdisc *q,
- unsigned long cl, int event)
+ unsigned long cl, int event, struct netlink_ext_ack *extack)
+{
+ struct sk_buff *skb;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+ if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
+ return 0;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+}
+
+static int tclass_get_notify(struct net *net, struct sk_buff *oskb,
+ struct nlmsghdr *n, struct Qdisc *q,
+ unsigned long cl, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1866,7 +2024,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
+ if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS,
+ extack) < 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1888,14 +2047,18 @@ static int tclass_del_notify(struct net *net,
if (!cops->delete)
return -EOPNOTSUPP;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb)
- return -ENOBUFS;
+ if (rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
- if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
- RTM_DELTCLASS) < 0) {
- kfree_skb(skb);
- return -EINVAL;
+ if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
+ RTM_DELTCLASS, extack) < 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ } else {
+ skb = NULL;
}
err = cops->delete(q, cl, extack);
@@ -1904,8 +2067,8 @@ static int tclass_del_notify(struct net *net,
return err;
}
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ err = rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
return err;
}
@@ -1994,15 +2157,15 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
#endif
-static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
- struct netlink_ext_ack *extack)
+static int __tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack,
+ struct net_device *dev,
+ struct nlattr *tca[TCA_MAX + 1],
+ struct tcmsg *tcm)
{
struct net *net = sock_net(skb->sk);
- struct tcmsg *tcm = nlmsg_data(n);
- struct nlattr *tca[TCA_MAX + 1];
- struct net_device *dev;
- struct Qdisc *q = NULL;
const struct Qdisc_class_ops *cops;
+ struct Qdisc *q = NULL;
unsigned long cl = 0;
unsigned long new_cl;
u32 portid;
@@ -2010,15 +2173,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
u32 qid;
int err;
- err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
- rtm_tca_policy, extack);
- if (err < 0)
- return err;
-
- dev = __dev_get_by_index(net, tcm->tcm_ifindex);
- if (!dev)
- return -ENODEV;
-
/*
parent == TC_H_UNSPEC - unspecified parent.
parent == TC_H_ROOT - class is root, which has no parent.
@@ -2100,7 +2254,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
tc_bind_tclass(q, portid, clid, 0);
goto out;
case RTM_GETTCLASS:
- err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
+ err = tclass_get_notify(net, skb, n, q, cl, extack);
goto out;
default:
err = -EINVAL;
@@ -2113,12 +2267,18 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
return -EOPNOTSUPP;
}
+ /* Prevent creation of traffic classes with classid TC_H_ROOT */
+ if (clid == TC_H_ROOT) {
+ NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT");
+ return -EINVAL;
+ }
+
new_cl = cl;
err = -EOPNOTSUPP;
if (cops->change)
err = cops->change(q, clid, portid, tca, &new_cl, extack);
if (err == 0) {
- tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
+ tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack);
/* We just create a new class, need to do reverse binding. */
if (cl != new_cl)
tc_bind_tclass(q, portid, clid, new_cl);
@@ -2127,6 +2287,31 @@ out:
return err;
}
+static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tcmsg *tcm = nlmsg_data(n);
+ struct nlattr *tca[TCA_MAX + 1];
+ struct net_device *dev;
+ int err;
+
+ err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
+ rtm_tca_policy, extack);
+ if (err < 0)
+ return err;
+
+ dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ netdev_lock_ops(dev);
+ err = __tc_ctl_tclass(skb, n, extack, dev, tca, tcm);
+ netdev_unlock_ops(dev);
+
+ return err;
+}
+
struct qdisc_dump_args {
struct qdisc_walker w;
struct sk_buff *skb;
@@ -2140,7 +2325,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTCLASS);
+ RTM_NEWTCLASS, NULL);
}
static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
@@ -2203,20 +2388,12 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
return 0;
}
-static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
+static int __tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb,
+ struct tcmsg *tcm, struct net_device *dev)
{
- struct tcmsg *tcm = nlmsg_data(cb->nlh);
- struct net *net = sock_net(skb->sk);
struct netdev_queue *dev_queue;
- struct net_device *dev;
int t, s_t;
- if (nlmsg_len(cb->nlh) < sizeof(*tcm))
- return 0;
- dev = dev_get_by_index(net, tcm->tcm_ifindex);
- if (!dev)
- return 0;
-
s_t = cb->args[0];
t = 0;
@@ -2226,17 +2403,39 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
dev_queue = dev_ingress_queue(dev);
if (dev_queue &&
- tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
- &t, s_t, false) < 0)
+ tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping),
+ skb, tcm, cb, &t, s_t, false) < 0)
goto done;
done:
cb->args[0] = t;
- dev_put(dev);
return skb->len;
}
+static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct tcmsg *tcm = nlmsg_data(cb->nlh);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ int err;
+
+ if (nlmsg_len(cb->nlh) < sizeof(*tcm))
+ return 0;
+
+ dev = dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
+ return 0;
+
+ netdev_lock_ops(dev);
+ err = __tc_dump_tclass(skb, cb, tcm, dev);
+ netdev_unlock_ops(dev);
+
+ dev_put(dev);
+
+ return err;
+}
+
#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
{
@@ -2279,7 +2478,20 @@ static struct pernet_operations psched_net_ops = {
.exit = psched_net_exit,
};
+#if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
+#endif
+
+static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWQDISC, .doit = tc_modify_qdisc},
+ {.msgtype = RTM_DELQDISC, .doit = tc_get_qdisc},
+ {.msgtype = RTM_GETQDISC, .doit = tc_get_qdisc,
+ .dumpit = tc_dump_qdisc},
+ {.msgtype = RTM_NEWTCLASS, .doit = tc_ctl_tclass},
+ {.msgtype = RTM_DELTCLASS, .doit = tc_ctl_tclass},
+ {.msgtype = RTM_GETTCLASS, .doit = tc_ctl_tclass,
+ .dumpit = tc_dump_tclass},
+};
static int __init pktsched_init(void)
{
@@ -2299,14 +2511,7 @@ static int __init pktsched_init(void)
register_qdisc(&mq_qdisc_ops);
register_qdisc(&noqueue_qdisc_ops);
- rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
- 0);
- rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
- 0);
+ rtnl_register_many(psched_rtnl_msg_handlers);
tc_wrapper_init();