From 05cd271fd61a0bb64fc20c46c9c87b8272fb980c Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Mon, 30 Apr 2018 14:28:30 +0300 Subject: cls_flower: Support multiple masks per priority Currently flower doesn't support inserting filters with different masks on a single priority, even if the actual flows (key + mask) inserted aren't overlapping, as with the use case of offloading openvswitch datapath flows. Instead one must go up one level, and assign different priorities for each mask, which will create a different flower instances. This patch opens flower to support more than one mask per priority, and a single flower instance. It does so by adding another hash table on top of the existing one which will store the different masks, and the filters that share it. The user is left with the responsibility of ensuring non overlapping flows, otherwise precedence is not guaranteed. Signed-off-by: Paul Blakey Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 275 +++++++++++++++++++++++++++++++------------------ 1 file changed, 174 insertions(+), 101 deletions(-) (limited to 'net/sched') diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index d964e60c730e..eacaaf803914 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -61,16 +61,18 @@ struct fl_flow_mask_range { struct fl_flow_mask { struct fl_flow_key key; struct fl_flow_mask_range range; - struct rcu_head rcu; + struct rhash_head ht_node; + struct rhashtable ht; + struct rhashtable_params filter_ht_params; + struct flow_dissector dissector; + struct list_head filters; + struct rcu_head rcu; + struct list_head list; }; struct cls_fl_head { struct rhashtable ht; - struct fl_flow_mask mask; - struct flow_dissector dissector; - bool mask_assigned; - struct list_head filters; - struct rhashtable_params ht_params; + struct list_head masks; union { struct work_struct work; struct rcu_head rcu; @@ -79,6 +81,7 @@ struct cls_fl_head { }; struct cls_fl_filter { + struct fl_flow_mask *mask; struct rhash_head ht_node; struct fl_flow_key mkey; struct tcf_exts exts; @@ -94,6 +97,13 @@ struct cls_fl_filter { struct net_device *hw_dev; }; +static const struct rhashtable_params mask_ht_params = { + .key_offset = offsetof(struct fl_flow_mask, key), + .key_len = sizeof(struct fl_flow_key), + .head_offset = offsetof(struct fl_flow_mask, ht_node), + .automatic_shrinking = true, +}; + static unsigned short int fl_mask_range(const struct fl_flow_mask *mask) { return mask->range.end - mask->range.start; @@ -103,13 +113,19 @@ static void fl_mask_update_range(struct fl_flow_mask *mask) { const u8 *bytes = (const u8 *) &mask->key; size_t size = sizeof(mask->key); - size_t i, first = 0, last = size - 1; + size_t i, first = 0, last; - for (i = 0; i < sizeof(mask->key); i++) { + for (i = 0; i < size; i++) { + if (bytes[i]) { + first = i; + break; + } + } + last = first; + for (i = size - 1; i != first; i--) { if (bytes[i]) { - if (!first && i) - first = i; last = i; + break; } } mask->range.start = rounddown(first, sizeof(long)); @@ -140,12 +156,11 @@ static void fl_clear_masked_range(struct fl_flow_key *key, memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask)); } -static struct cls_fl_filter *fl_lookup(struct cls_fl_head *head, +static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask, struct fl_flow_key *mkey) { - return rhashtable_lookup_fast(&head->ht, - fl_key_get_start(mkey, &head->mask), - head->ht_params); + return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask), + mask->filter_ht_params); } static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -153,28 +168,28 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, { struct cls_fl_head *head = rcu_dereference_bh(tp->root); struct cls_fl_filter *f; + struct fl_flow_mask *mask; struct fl_flow_key skb_key; struct fl_flow_key skb_mkey; - if (!atomic_read(&head->ht.nelems)) - return -1; - - fl_clear_masked_range(&skb_key, &head->mask); + list_for_each_entry_rcu(mask, &head->masks, list) { + fl_clear_masked_range(&skb_key, mask); - skb_key.indev_ifindex = skb->skb_iif; - /* skb_flow_dissect() does not set n_proto in case an unknown protocol, - * so do it rather here. - */ - skb_key.basic.n_proto = skb->protocol; - skb_flow_dissect_tunnel_info(skb, &head->dissector, &skb_key); - skb_flow_dissect(skb, &head->dissector, &skb_key, 0); + skb_key.indev_ifindex = skb->skb_iif; + /* skb_flow_dissect() does not set n_proto in case an unknown + * protocol, so do it rather here. + */ + skb_key.basic.n_proto = skb->protocol; + skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key); + skb_flow_dissect(skb, &mask->dissector, &skb_key, 0); - fl_set_masked_key(&skb_mkey, &skb_key, &head->mask); + fl_set_masked_key(&skb_mkey, &skb_key, mask); - f = fl_lookup(head, &skb_mkey); - if (f && !tc_skip_sw(f->flags)) { - *res = f->res; - return tcf_exts_exec(skb, &f->exts, res); + f = fl_lookup(mask, &skb_mkey); + if (f && !tc_skip_sw(f->flags)) { + *res = f->res; + return tcf_exts_exec(skb, &f->exts, res); + } } return -1; } @@ -187,11 +202,28 @@ static int fl_init(struct tcf_proto *tp) if (!head) return -ENOBUFS; - INIT_LIST_HEAD_RCU(&head->filters); + INIT_LIST_HEAD_RCU(&head->masks); rcu_assign_pointer(tp->root, head); idr_init(&head->handle_idr); - return 0; + return rhashtable_init(&head->ht, &mask_ht_params); +} + +static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask, + bool async) +{ + if (!list_empty(&mask->filters)) + return false; + + rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params); + rhashtable_destroy(&mask->ht); + list_del_rcu(&mask->list); + if (async) + kfree_rcu(mask, rcu); + else + kfree(mask); + + return true; } static void __fl_destroy_filter(struct cls_fl_filter *f) @@ -234,8 +266,6 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, } static int fl_hw_replace_filter(struct tcf_proto *tp, - struct flow_dissector *dissector, - struct fl_flow_key *mask, struct cls_fl_filter *f, struct netlink_ext_ack *extack) { @@ -247,8 +277,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_REPLACE; cls_flower.cookie = (unsigned long) f; - cls_flower.dissector = dissector; - cls_flower.mask = mask; + cls_flower.dissector = &f->mask->dissector; + cls_flower.mask = &f->mask->key; cls_flower.key = &f->mkey; cls_flower.exts = &f->exts; cls_flower.classid = f->res.classid; @@ -283,28 +313,31 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) &cls_flower, false); } -static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, +static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); + bool async = tcf_exts_get_net(&f->exts); + bool last; idr_remove(&head->handle_idr, f->handle); list_del_rcu(&f->list); + last = fl_mask_put(head, f->mask, async); if (!tc_skip_hw(f->flags)) fl_hw_destroy_filter(tp, f, extack); tcf_unbind_filter(tp, &f->res); - if (tcf_exts_get_net(&f->exts)) + if (async) call_rcu(&f->rcu, fl_destroy_filter); else __fl_destroy_filter(f); + + return last; } static void fl_destroy_sleepable(struct work_struct *work) { struct cls_fl_head *head = container_of(work, struct cls_fl_head, work); - if (head->mask_assigned) - rhashtable_destroy(&head->ht); kfree(head); module_put(THIS_MODULE); } @@ -320,10 +353,15 @@ static void fl_destroy_rcu(struct rcu_head *rcu) static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); + struct fl_flow_mask *mask, *next_mask; struct cls_fl_filter *f, *next; - list_for_each_entry_safe(f, next, &head->filters, list) - __fl_delete(tp, f, extack); + list_for_each_entry_safe(mask, next_mask, &head->masks, list) { + list_for_each_entry_safe(f, next, &mask->filters, list) { + if (__fl_delete(tp, f, extack)) + break; + } + } idr_destroy(&head->handle_idr); __module_get(THIS_MODULE); @@ -715,14 +753,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb, return ret; } -static bool fl_mask_eq(struct fl_flow_mask *mask1, - struct fl_flow_mask *mask2) +static void fl_mask_copy(struct fl_flow_mask *dst, + struct fl_flow_mask *src) { - const long *lmask1 = fl_key_get_start(&mask1->key, mask1); - const long *lmask2 = fl_key_get_start(&mask2->key, mask2); + const void *psrc = fl_key_get_start(&src->key, src); + void *pdst = fl_key_get_start(&dst->key, src); - return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) && - !memcmp(lmask1, lmask2, fl_mask_range(mask1)); + memcpy(pdst, psrc, fl_mask_range(src)); + dst->range = src->range; } static const struct rhashtable_params fl_ht_params = { @@ -731,14 +769,13 @@ static const struct rhashtable_params fl_ht_params = { .automatic_shrinking = true, }; -static int fl_init_hashtable(struct cls_fl_head *head, - struct fl_flow_mask *mask) +static int fl_init_mask_hashtable(struct fl_flow_mask *mask) { - head->ht_params = fl_ht_params; - head->ht_params.key_len = fl_mask_range(mask); - head->ht_params.key_offset += mask->range.start; + mask->filter_ht_params = fl_ht_params; + mask->filter_ht_params.key_len = fl_mask_range(mask); + mask->filter_ht_params.key_offset += mask->range.start; - return rhashtable_init(&head->ht, &head->ht_params); + return rhashtable_init(&mask->ht, &mask->filter_ht_params); } #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member) @@ -761,8 +798,7 @@ static int fl_init_hashtable(struct cls_fl_head *head, FL_KEY_SET(keys, cnt, id, member); \ } while(0); -static void fl_init_dissector(struct cls_fl_head *head, - struct fl_flow_mask *mask) +static void fl_init_dissector(struct fl_flow_mask *mask) { struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX]; size_t cnt = 0; @@ -802,31 +838,66 @@ static void fl_init_dissector(struct cls_fl_head *head, FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp); - skb_flow_dissector_init(&head->dissector, keys, cnt); + skb_flow_dissector_init(&mask->dissector, keys, cnt); +} + +static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head, + struct fl_flow_mask *mask) +{ + struct fl_flow_mask *newmask; + int err; + + newmask = kzalloc(sizeof(*newmask), GFP_KERNEL); + if (!newmask) + return ERR_PTR(-ENOMEM); + + fl_mask_copy(newmask, mask); + + err = fl_init_mask_hashtable(newmask); + if (err) + goto errout_free; + + fl_init_dissector(newmask); + + INIT_LIST_HEAD_RCU(&newmask->filters); + + err = rhashtable_insert_fast(&head->ht, &newmask->ht_node, + mask_ht_params); + if (err) + goto errout_destroy; + + list_add_tail_rcu(&newmask->list, &head->masks); + + return newmask; + +errout_destroy: + rhashtable_destroy(&newmask->ht); +errout_free: + kfree(newmask); + + return ERR_PTR(err); } static int fl_check_assign_mask(struct cls_fl_head *head, + struct cls_fl_filter *fnew, + struct cls_fl_filter *fold, struct fl_flow_mask *mask) { - int err; + struct fl_flow_mask *newmask; - if (head->mask_assigned) { - if (!fl_mask_eq(&head->mask, mask)) + fnew->mask = rhashtable_lookup_fast(&head->ht, mask, mask_ht_params); + if (!fnew->mask) { + if (fold) return -EINVAL; - else - return 0; - } - /* Mask is not assigned yet. So assign it and init hashtable - * according to that. - */ - err = fl_init_hashtable(head, mask); - if (err) - return err; - memcpy(&head->mask, mask, sizeof(head->mask)); - head->mask_assigned = true; + newmask = fl_create_new_mask(head, mask); + if (IS_ERR(newmask)) + return PTR_ERR(newmask); - fl_init_dissector(head, mask); + fnew->mask = newmask; + } else if (fold && fold->mask == fnew->mask) { + return -EINVAL; + } return 0; } @@ -924,30 +995,26 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (err) goto errout_idr; - err = fl_check_assign_mask(head, &mask); + err = fl_check_assign_mask(head, fnew, fold, &mask); if (err) goto errout_idr; if (!tc_skip_sw(fnew->flags)) { - if (!fold && fl_lookup(head, &fnew->mkey)) { + if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) { err = -EEXIST; - goto errout_idr; + goto errout_mask; } - err = rhashtable_insert_fast(&head->ht, &fnew->ht_node, - head->ht_params); + err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node, + fnew->mask->filter_ht_params); if (err) - goto errout_idr; + goto errout_mask; } if (!tc_skip_hw(fnew->flags)) { - err = fl_hw_replace_filter(tp, - &head->dissector, - &mask.key, - fnew, - extack); + err = fl_hw_replace_filter(tp, fnew, extack); if (err) - goto errout_idr; + goto errout_mask; } if (!tc_in_hw(fnew->flags)) @@ -955,8 +1022,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (fold) { if (!tc_skip_sw(fold->flags)) - rhashtable_remove_fast(&head->ht, &fold->ht_node, - head->ht_params); + rhashtable_remove_fast(&fold->mask->ht, + &fold->ht_node, + fold->mask->filter_ht_params); if (!tc_skip_hw(fold->flags)) fl_hw_destroy_filter(tp, fold, NULL); } @@ -970,12 +1038,15 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, fl_destroy_filter); } else { - list_add_tail_rcu(&fnew->list, &head->filters); + list_add_tail_rcu(&fnew->list, &fnew->mask->filters); } kfree(tb); return 0; +errout_mask: + fl_mask_put(head, fnew->mask, false); + errout_idr: if (fnew->handle) idr_remove(&head->handle_idr, fnew->handle); @@ -994,10 +1065,10 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, struct cls_fl_filter *f = arg; if (!tc_skip_sw(f->flags)) - rhashtable_remove_fast(&head->ht, &f->ht_node, - head->ht_params); + rhashtable_remove_fast(&f->mask->ht, &f->ht_node, + f->mask->filter_ht_params); __fl_delete(tp, f, extack); - *last = list_empty(&head->filters); + *last = list_empty(&head->masks); return 0; } @@ -1005,16 +1076,19 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f; - - list_for_each_entry_rcu(f, &head->filters, list) { - if (arg->count < arg->skip) - goto skip; - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; - break; - } + struct fl_flow_mask *mask; + + list_for_each_entry_rcu(mask, &head->masks, list) { + list_for_each_entry_rcu(f, &mask->filters, list) { + if (arg->count < arg->skip) + goto skip; + if (arg->fn(tp, f, arg) < 0) { + arg->stop = 1; + break; + } skip: - arg->count++; + arg->count++; + } } } @@ -1150,7 +1224,6 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f = fh; struct nlattr *nest; struct fl_flow_key *key, *mask; @@ -1169,7 +1242,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, goto nla_put_failure; key = &f->key; - mask = &head->mask.key; + mask = &f->mask->key; if (mask->indev_ifindex) { struct net_device *dev; -- cgit From 29e6eee192f31caf9e0af7713224fd171044cef4 Mon Sep 17 00:00:00 2001 From: Craig Dillabaugh Date: Tue, 1 May 2018 10:17:43 -0400 Subject: net sched: Implemented get_fill_size routine for act_csum. Signed-off-by: Craig Dillabaugh Signed-off-by: David S. Miller --- net/sched/act_csum.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/sched') diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 7e28b2ce1437..526a8e491626 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -648,6 +648,11 @@ static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } +static size_t tcf_csum_get_fill_size(const struct tc_action *act) +{ + return nla_total_size(sizeof(struct tc_csum)); +} + static struct tc_action_ops act_csum_ops = { .kind = "csum", .type = TCA_ACT_CSUM, @@ -658,6 +663,7 @@ static struct tc_action_ops act_csum_ops = { .cleanup = tcf_csum_cleanup, .walk = tcf_csum_walker, .lookup = tcf_csum_search, + .get_fill_size = tcf_csum_get_fill_size, .size = sizeof(struct tcf_csum), }; -- cgit From 32f7b44d0f5661044fcfa84e9ad402ed9d759107 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 15 May 2018 10:50:31 +0200 Subject: sched: manipulate __QDISC_STATE_RUNNING in qdisc_run_* helpers Currently NOLOCK qdiscs pay a measurable overhead to atomically manipulate the __QDISC_STATE_RUNNING. Such bit is flipped twice per packet in the uncontended scenario with packet rate below the line rate: on packed dequeue and on the next, failing dequeue attempt. This changeset moves the bit manipulation into the qdisc_run_{begin,end} helpers, so that the bit is now flipped only once per packet, with measurable performance improvement in the uncontended scenario. This also allows simplifying the qdisc teardown code path - since qdisc_is_running() is now effective for each qdisc type - and avoid a possible race between qdisc_run() and dev_deactivate_many(), as now the some_qdisc_is_busy() can properly detect NOLOCK qdiscs being busy dequeuing packets. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 39c144b6ff98..ff3ce71aec93 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -373,33 +373,24 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, */ static inline bool qdisc_restart(struct Qdisc *q, int *packets) { - bool more, validate, nolock = q->flags & TCQ_F_NOLOCK; spinlock_t *root_lock = NULL; struct netdev_queue *txq; struct net_device *dev; struct sk_buff *skb; + bool validate; /* Dequeue packet */ - if (nolock && test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) - return false; - skb = dequeue_skb(q, &validate, packets); - if (unlikely(!skb)) { - if (nolock) - clear_bit(__QDISC_STATE_RUNNING, &q->state); + if (unlikely(!skb)) return false; - } - if (!nolock) + if (!(q->flags & TCQ_F_NOLOCK)) root_lock = qdisc_lock(q); dev = qdisc_dev(q); txq = skb_get_tx_queue(dev, skb); - more = sch_direct_xmit(skb, q, dev, txq, root_lock, validate); - if (nolock) - clear_bit(__QDISC_STATE_RUNNING, &q->state); - return more; + return sch_direct_xmit(skb, q, dev, txq, root_lock, validate); } void __qdisc_run(struct Qdisc *q) @@ -1131,17 +1122,13 @@ static bool some_qdisc_is_busy(struct net_device *dev) dev_queue = netdev_get_tx_queue(dev, i); q = dev_queue->qdisc_sleeping; - if (q->flags & TCQ_F_NOLOCK) { - val = test_bit(__QDISC_STATE_SCHED, &q->state); - } else { - root_lock = qdisc_lock(q); - spin_lock_bh(root_lock); + root_lock = qdisc_lock(q); + spin_lock_bh(root_lock); - val = (qdisc_is_running(q) || - test_bit(__QDISC_STATE_SCHED, &q->state)); + val = (qdisc_is_running(q) || + test_bit(__QDISC_STATE_SCHED, &q->state)); - spin_unlock_bh(root_lock); - } + spin_unlock_bh(root_lock); if (val) return true; -- cgit From 96009c7d500efdd5534e83b2e3eb2c58d4b137ae Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 15 May 2018 16:24:36 +0200 Subject: sched: replace __QDISC_STATE_RUNNING bit with a spin lock So that we can use lockdep on it. The newly introduced sequence lock has the same scope of busylock, so it shares the same lockdep annotation, but it's only used for NOLOCK qdiscs. With this changeset we acquire such lock in the control path around flushing operation (qdisc reset), to allow more NOLOCK qdisc perf improvement in the next patch. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ff3ce71aec93..a126f16bc30b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -858,6 +858,11 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, lockdep_set_class(&sch->busylock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); + /* seqlock has the same scope of busylock, for NOLOCK qdisc */ + spin_lock_init(&sch->seqlock); + lockdep_set_class(&sch->busylock, + dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); + seqcount_init(&sch->running); lockdep_set_class(&sch->running, dev->qdisc_running_key ?: &qdisc_running_key); @@ -1097,6 +1102,10 @@ static void dev_deactivate_queue(struct net_device *dev, qdisc = rtnl_dereference(dev_queue->qdisc); if (qdisc) { + bool nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); spin_lock_bh(qdisc_lock(qdisc)); if (!(qdisc->flags & TCQ_F_BUILTIN)) @@ -1106,6 +1115,8 @@ static void dev_deactivate_queue(struct net_device *dev, qdisc_reset(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) + spin_unlock_bh(&qdisc->seqlock); } } -- cgit From 021a17ed796b62383f7623f4fea73787abddad77 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 15 May 2018 16:24:37 +0200 Subject: pfifo_fast: drop unneeded additional lock on dequeue After the previous patch, for NOLOCK qdiscs, q->seqlock is always held when the dequeue() is invoked, we can drop any additional locking to protect such operation. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a126f16bc30b..760ab1b09f8b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -656,7 +656,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) if (__skb_array_empty(q)) continue; - skb = skb_array_consume_bh(q); + skb = __skb_array_consume(q); } if (likely(skb)) { qdisc_qstats_cpu_backlog_dec(qdisc, skb); @@ -697,7 +697,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc) if (!q->ring.queue) continue; - while ((skb = skb_array_consume_bh(q)) != NULL) + while ((skb = __skb_array_consume(q)) != NULL) kfree_skb(skb); } -- cgit From 290aa0ad74c995c60d94fb4f1d66d411efa13dd5 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 21 May 2018 23:03:04 +0300 Subject: net: sched: don't disable bh when accessing action idr Initial net_device implementation used ingress_lock spinlock to synchronize ingress path of device. This lock was used in both process and bh context. In some code paths action map lock was obtained while holding ingress_lock. Commit e1e992e52faa ("[NET_SCHED] protect action config/dump from irqs") modified actions to always disable bh, while using action map lock, in order to prevent deadlock on ingress_lock in softirq. This lock was removed from net_device, so disabling bh, while accessing action map, is no longer necessary. Replace all action idr spinlock usage with regular calls that do not disable bh. Signed-off-by: Vlad Buslov Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'net/sched') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 72251241665a..3f4cf930f809 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -77,9 +77,9 @@ static void free_tcf(struct tc_action *p) static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p) { - spin_lock_bh(&idrinfo->lock); + spin_lock(&idrinfo->lock); idr_remove(&idrinfo->action_idr, p->tcfa_index); - spin_unlock_bh(&idrinfo->lock); + spin_unlock(&idrinfo->lock); gen_kill_estimator(&p->tcfa_rate_est); free_tcf(p); } @@ -156,7 +156,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct tc_action *p; unsigned long id = 1; - spin_lock_bh(&idrinfo->lock); + spin_lock(&idrinfo->lock); s_i = cb->args[0]; @@ -191,7 +191,7 @@ done: if (index >= 0) cb->args[0] = index + 1; - spin_unlock_bh(&idrinfo->lock); + spin_unlock(&idrinfo->lock); if (n_i) { if (act_flags & TCA_FLAG_LARGE_DUMP_ON) cb->args[1] = n_i; @@ -261,9 +261,9 @@ static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo) { struct tc_action *p = NULL; - spin_lock_bh(&idrinfo->lock); + spin_lock(&idrinfo->lock); p = idr_find(&idrinfo->action_idr, index); - spin_unlock_bh(&idrinfo->lock); + spin_unlock(&idrinfo->lock); return p; } @@ -323,7 +323,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, } spin_lock_init(&p->tcfa_lock); idr_preload(GFP_KERNEL); - spin_lock_bh(&idrinfo->lock); + spin_lock(&idrinfo->lock); /* user doesn't specify an index */ if (!index) { index = 1; @@ -331,7 +331,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, } else { err = idr_alloc_u32(idr, NULL, &index, index, GFP_ATOMIC); } - spin_unlock_bh(&idrinfo->lock); + spin_unlock(&idrinfo->lock); idr_preload_end(); if (err) goto err3; @@ -369,9 +369,9 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a) { struct tcf_idrinfo *idrinfo = tn->idrinfo; - spin_lock_bh(&idrinfo->lock); + spin_lock(&idrinfo->lock); idr_replace(&idrinfo->action_idr, a, a->tcfa_index); - spin_unlock_bh(&idrinfo->lock); + spin_unlock(&idrinfo->lock); } EXPORT_SYMBOL(tcf_idr_insert); -- cgit From aaa908ffbee18a65529b716efb346a626e81559a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 23 May 2018 15:26:53 -0700 Subject: net_sched: switch to rcu_work Commit 05f0fe6b74db ("RCU, workqueue: Implement rcu_work") introduces new API's for dispatching work in a RCU callback. Now we can just switch to the new API's for tc filters. This could get rid of a lot of code. Cc: Tejun Heo Cc: "Paul E. McKenney" Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_api.c | 5 +++-- net/sched/cls_basic.c | 24 +++++++----------------- net/sched/cls_bpf.c | 22 ++++++---------------- net/sched/cls_cgroup.c | 23 +++++------------------ net/sched/cls_flow.c | 24 +++++++----------------- net/sched/cls_flower.c | 40 ++++++++++------------------------------ net/sched/cls_fw.c | 24 +++++++----------------- net/sched/cls_matchall.c | 21 +++++---------------- net/sched/cls_route.c | 23 +++++++++-------------- net/sched/cls_rsvp.h | 20 +++++--------------- net/sched/cls_tcindex.c | 41 ++++++++++------------------------------- net/sched/cls_u32.c | 37 ++++++++++--------------------------- 12 files changed, 84 insertions(+), 220 deletions(-) (limited to 'net/sched') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 963e4bf0aab8..a4a5ace834c3 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -103,9 +103,10 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) } EXPORT_SYMBOL(unregister_tcf_proto_ops); -bool tcf_queue_work(struct work_struct *work) +bool tcf_queue_work(struct rcu_work *rwork, work_func_t func) { - return queue_work(tc_filter_wq, work); + INIT_RCU_WORK(rwork, func); + return queue_rcu_work(tc_filter_wq, rwork); } EXPORT_SYMBOL(tcf_queue_work); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 6b7ab3512f5b..95367f37098d 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -35,10 +35,7 @@ struct basic_filter { struct tcf_result res; struct tcf_proto *tp; struct list_head link; - union { - struct work_struct work; - struct rcu_head rcu; - }; + struct rcu_work rwork; }; static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -97,21 +94,14 @@ static void __basic_delete_filter(struct basic_filter *f) static void basic_delete_filter_work(struct work_struct *work) { - struct basic_filter *f = container_of(work, struct basic_filter, work); - + struct basic_filter *f = container_of(to_rcu_work(work), + struct basic_filter, + rwork); rtnl_lock(); __basic_delete_filter(f); rtnl_unlock(); } -static void basic_delete_filter(struct rcu_head *head) -{ - struct basic_filter *f = container_of(head, struct basic_filter, rcu); - - INIT_WORK(&f->work, basic_delete_filter_work); - tcf_queue_work(&f->work); -} - static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); @@ -122,7 +112,7 @@ static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) tcf_unbind_filter(tp, &f->res); idr_remove(&head->handle_idr, f->handle); if (tcf_exts_get_net(&f->exts)) - call_rcu(&f->rcu, basic_delete_filter); + tcf_queue_work(&f->rwork, basic_delete_filter_work); else __basic_delete_filter(f); } @@ -140,7 +130,7 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last, tcf_unbind_filter(tp, &f->res); idr_remove(&head->handle_idr, f->handle); tcf_exts_get_net(&f->exts); - call_rcu(&f->rcu, basic_delete_filter); + tcf_queue_work(&f->rwork, basic_delete_filter_work); *last = list_empty(&head->flist); return 0; } @@ -234,7 +224,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, list_replace_rcu(&fold->link, &fnew->link); tcf_unbind_filter(tp, &fold->res); tcf_exts_get_net(&fold->exts); - call_rcu(&fold->rcu, basic_delete_filter); + tcf_queue_work(&fold->rwork, basic_delete_filter_work); } else { list_add_rcu(&fnew->link, &head->flist); } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index b07c1fa8bc0d..1aa7f6511065 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -49,10 +49,7 @@ struct cls_bpf_prog { struct sock_filter *bpf_ops; const char *bpf_name; struct tcf_proto *tp; - union { - struct work_struct work; - struct rcu_head rcu; - }; + struct rcu_work rwork; }; static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { @@ -275,21 +272,14 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) static void cls_bpf_delete_prog_work(struct work_struct *work) { - struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, work); - + struct cls_bpf_prog *prog = container_of(to_rcu_work(work), + struct cls_bpf_prog, + rwork); rtnl_lock(); __cls_bpf_delete_prog(prog); rtnl_unlock(); } -static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu) -{ - struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu); - - INIT_WORK(&prog->work, cls_bpf_delete_prog_work); - tcf_queue_work(&prog->work); -} - static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog, struct netlink_ext_ack *extack) { @@ -300,7 +290,7 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog, list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); if (tcf_exts_get_net(&prog->exts)) - call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); + tcf_queue_work(&prog->rwork, cls_bpf_delete_prog_work); else __cls_bpf_delete_prog(prog); } @@ -526,7 +516,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); tcf_exts_get_net(&oldprog->exts); - call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu); + tcf_queue_work(&oldprog->rwork, cls_bpf_delete_prog_work); } else { list_add_rcu(&prog->link, &head->plist); } diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 762da5c0cf5e..3bc01bdde165 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -23,10 +23,7 @@ struct cls_cgroup_head { struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_proto *tp; - union { - struct work_struct work; - struct rcu_head rcu; - }; + struct rcu_work rwork; }; static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -70,24 +67,14 @@ static void __cls_cgroup_destroy(struct cls_cgroup_head *head) static void cls_cgroup_destroy_work(struct work_struct *work) { - struct cls_cgroup_head *head = container_of(work, + struct cls_cgroup_head *head = container_of(to_rcu_work(work), struct cls_cgroup_head, - work); + rwork); rtnl_lock(); __cls_cgroup_destroy(head); rtnl_unlock(); } -static void cls_cgroup_destroy_rcu(struct rcu_head *root) -{ - struct cls_cgroup_head *head = container_of(root, - struct cls_cgroup_head, - rcu); - - INIT_WORK(&head->work, cls_cgroup_destroy_work); - tcf_queue_work(&head->work); -} - static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, @@ -134,7 +121,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, rcu_assign_pointer(tp->root, new); if (head) { tcf_exts_get_net(&head->exts); - call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + tcf_queue_work(&head->rwork, cls_cgroup_destroy_work); } return 0; errout: @@ -151,7 +138,7 @@ static void cls_cgroup_destroy(struct tcf_proto *tp, /* Head can still be NULL due to cls_cgroup_init(). */ if (head) { if (tcf_exts_get_net(&head->exts)) - call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + tcf_queue_work(&head->rwork, cls_cgroup_destroy_work); else __cls_cgroup_destroy(head); } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index cd5fe383afdd..2bb043cd436b 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -57,10 +57,7 @@ struct flow_filter { u32 divisor; u32 baseclass; u32 hashrnd; - union { - struct work_struct work; - struct rcu_head rcu; - }; + struct rcu_work rwork; }; static inline u32 addr_fold(void *addr) @@ -383,21 +380,14 @@ static void __flow_destroy_filter(struct flow_filter *f) static void flow_destroy_filter_work(struct work_struct *work) { - struct flow_filter *f = container_of(work, struct flow_filter, work); - + struct flow_filter *f = container_of(to_rcu_work(work), + struct flow_filter, + rwork); rtnl_lock(); __flow_destroy_filter(f); rtnl_unlock(); } -static void flow_destroy_filter(struct rcu_head *head) -{ - struct flow_filter *f = container_of(head, struct flow_filter, rcu); - - INIT_WORK(&f->work, flow_destroy_filter_work); - tcf_queue_work(&f->work); -} - static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, @@ -563,7 +553,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (fold) { tcf_exts_get_net(&fold->exts); - call_rcu(&fold->rcu, flow_destroy_filter); + tcf_queue_work(&fold->rwork, flow_destroy_filter_work); } return 0; @@ -583,7 +573,7 @@ static int flow_delete(struct tcf_proto *tp, void *arg, bool *last, list_del_rcu(&f->list); tcf_exts_get_net(&f->exts); - call_rcu(&f->rcu, flow_destroy_filter); + tcf_queue_work(&f->rwork, flow_destroy_filter_work); *last = list_empty(&head->filters); return 0; } @@ -608,7 +598,7 @@ static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) list_for_each_entry_safe(f, next, &head->filters, list) { list_del_rcu(&f->list); if (tcf_exts_get_net(&f->exts)) - call_rcu(&f->rcu, flow_destroy_filter); + tcf_queue_work(&f->rwork, flow_destroy_filter_work); else __flow_destroy_filter(f); } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index eacaaf803914..4e74508515f4 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -73,10 +73,7 @@ struct fl_flow_mask { struct cls_fl_head { struct rhashtable ht; struct list_head masks; - union { - struct work_struct work; - struct rcu_head rcu; - }; + struct rcu_work rwork; struct idr handle_idr; }; @@ -90,10 +87,7 @@ struct cls_fl_filter { struct list_head list; u32 handle; u32 flags; - union { - struct work_struct work; - struct rcu_head rcu; - }; + struct rcu_work rwork; struct net_device *hw_dev; }; @@ -235,21 +229,14 @@ static void __fl_destroy_filter(struct cls_fl_filter *f) static void fl_destroy_filter_work(struct work_struct *work) { - struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work); + struct cls_fl_filter *f = container_of(to_rcu_work(work), + struct cls_fl_filter, rwork); rtnl_lock(); __fl_destroy_filter(f); rtnl_unlock(); } -static void fl_destroy_filter(struct rcu_head *head) -{ - struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); - - INIT_WORK(&f->work, fl_destroy_filter_work); - tcf_queue_work(&f->work); -} - static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, struct netlink_ext_ack *extack) { @@ -327,7 +314,7 @@ static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, fl_hw_destroy_filter(tp, f, extack); tcf_unbind_filter(tp, &f->res); if (async) - call_rcu(&f->rcu, fl_destroy_filter); + tcf_queue_work(&f->rwork, fl_destroy_filter_work); else __fl_destroy_filter(f); @@ -336,20 +323,13 @@ static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, static void fl_destroy_sleepable(struct work_struct *work) { - struct cls_fl_head *head = container_of(work, struct cls_fl_head, - work); + struct cls_fl_head *head = container_of(to_rcu_work(work), + struct cls_fl_head, + rwork); kfree(head); module_put(THIS_MODULE); } -static void fl_destroy_rcu(struct rcu_head *rcu) -{ - struct cls_fl_head *head = container_of(rcu, struct cls_fl_head, rcu); - - INIT_WORK(&head->work, fl_destroy_sleepable); - schedule_work(&head->work); -} - static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); @@ -365,7 +345,7 @@ static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) idr_destroy(&head->handle_idr); __module_get(THIS_MODULE); - call_rcu(&head->rcu, fl_destroy_rcu); + tcf_queue_work(&head->rwork, fl_destroy_sleepable); } static void *fl_get(struct tcf_proto *tp, u32 handle) @@ -1036,7 +1016,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, list_replace_rcu(&fold->list, &fnew->list); tcf_unbind_filter(tp, &fold->res); tcf_exts_get_net(&fold->exts); - call_rcu(&fold->rcu, fl_destroy_filter); + tcf_queue_work(&fold->rwork, fl_destroy_filter_work); } else { list_add_tail_rcu(&fnew->list, &fnew->mask->filters); } diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 8b207723fbc2..29eeeaf3ea44 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -47,10 +47,7 @@ struct fw_filter { #endif /* CONFIG_NET_CLS_IND */ struct tcf_exts exts; struct tcf_proto *tp; - union { - struct work_struct work; - struct rcu_head rcu; - }; + struct rcu_work rwork; }; static u32 fw_hash(u32 handle) @@ -134,21 +131,14 @@ static void __fw_delete_filter(struct fw_filter *f) static void fw_delete_filter_work(struct work_struct *work) { - struct fw_filter *f = container_of(work, struct fw_filter, work); - + struct fw_filter *f = container_of(to_rcu_work(work), + struct fw_filter, + rwork); rtnl_lock(); __fw_delete_filter(f); rtnl_unlock(); } -static void fw_delete_filter(struct rcu_head *head) -{ - struct fw_filter *f = container_of(head, struct fw_filter, rcu); - - INIT_WORK(&f->work, fw_delete_filter_work); - tcf_queue_work(&f->work); -} - static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); @@ -164,7 +154,7 @@ static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); if (tcf_exts_get_net(&f->exts)) - call_rcu(&f->rcu, fw_delete_filter); + tcf_queue_work(&f->rwork, fw_delete_filter_work); else __fw_delete_filter(f); } @@ -193,7 +183,7 @@ static int fw_delete(struct tcf_proto *tp, void *arg, bool *last, RCU_INIT_POINTER(*fp, rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); tcf_exts_get_net(&f->exts); - call_rcu(&f->rcu, fw_delete_filter); + tcf_queue_work(&f->rwork, fw_delete_filter_work); ret = 0; break; } @@ -316,7 +306,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, rcu_assign_pointer(*fp, fnew); tcf_unbind_filter(tp, &f->res); tcf_exts_get_net(&f->exts); - call_rcu(&f->rcu, fw_delete_filter); + tcf_queue_work(&f->rwork, fw_delete_filter_work); *arg = fnew; return err; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 2ba721a590a7..47b207ef7762 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -21,10 +21,7 @@ struct cls_mall_head { struct tcf_result res; u32 handle; u32 flags; - union { - struct work_struct work; - struct rcu_head rcu; - }; + struct rcu_work rwork; }; static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -53,22 +50,14 @@ static void __mall_destroy(struct cls_mall_head *head) static void mall_destroy_work(struct work_struct *work) { - struct cls_mall_head *head = container_of(work, struct cls_mall_head, - work); + struct cls_mall_head *head = container_of(to_rcu_work(work), + struct cls_mall_head, + rwork); rtnl_lock(); __mall_destroy(head); rtnl_unlock(); } -static void mall_destroy_rcu(struct rcu_head *rcu) -{ - struct cls_mall_head *head = container_of(rcu, struct cls_mall_head, - rcu); - - INIT_WORK(&head->work, mall_destroy_work); - tcf_queue_work(&head->work); -} - static void mall_destroy_hw_filter(struct tcf_proto *tp, struct cls_mall_head *head, unsigned long cookie, @@ -126,7 +115,7 @@ static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) mall_destroy_hw_filter(tp, head, (unsigned long) head, extack); if (tcf_exts_get_net(&head->exts)) - call_rcu(&head->rcu, mall_destroy_rcu); + tcf_queue_work(&head->rwork, mall_destroy_work); else __mall_destroy(head); } diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 21a03a8ee029..0404aa5fa7cb 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -57,10 +57,7 @@ struct route4_filter { u32 handle; struct route4_bucket *bkt; struct tcf_proto *tp; - union { - struct work_struct work; - struct rcu_head rcu; - }; + struct rcu_work rwork; }; #define ROUTE4_FAILURE ((struct route4_filter *)(-1L)) @@ -266,19 +263,17 @@ static void __route4_delete_filter(struct route4_filter *f) static void route4_delete_filter_work(struct work_struct *work) { - struct route4_filter *f = container_of(work, struct route4_filter, work); - + struct route4_filter *f = container_of(to_rcu_work(work), + struct route4_filter, + rwork); rtnl_lock(); __route4_delete_filter(f); rtnl_unlock(); } -static void route4_delete_filter(struct rcu_head *head) +static void route4_queue_work(struct route4_filter *f) { - struct route4_filter *f = container_of(head, struct route4_filter, rcu); - - INIT_WORK(&f->work, route4_delete_filter_work); - tcf_queue_work(&f->work); + tcf_queue_work(&f->rwork, route4_delete_filter_work); } static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) @@ -304,7 +299,7 @@ static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) RCU_INIT_POINTER(b->ht[h2], next); tcf_unbind_filter(tp, &f->res); if (tcf_exts_get_net(&f->exts)) - call_rcu(&f->rcu, route4_delete_filter); + route4_queue_work(f); else __route4_delete_filter(f); } @@ -349,7 +344,7 @@ static int route4_delete(struct tcf_proto *tp, void *arg, bool *last, /* Delete it */ tcf_unbind_filter(tp, &f->res); tcf_exts_get_net(&f->exts); - call_rcu(&f->rcu, route4_delete_filter); + tcf_queue_work(&f->rwork, route4_delete_filter_work); /* Strip RTNL protected tree */ for (i = 0; i <= 32; i++) { @@ -554,7 +549,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (fold) { tcf_unbind_filter(tp, &fold->res); tcf_exts_get_net(&fold->exts); - call_rcu(&fold->rcu, route4_delete_filter); + tcf_queue_work(&fold->rwork, route4_delete_filter_work); } return 0; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 4f1297657c27..e9ccf7daea7d 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -97,10 +97,7 @@ struct rsvp_filter { u32 handle; struct rsvp_session *sess; - union { - struct work_struct work; - struct rcu_head rcu; - }; + struct rcu_work rwork; }; static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) @@ -294,21 +291,14 @@ static void __rsvp_delete_filter(struct rsvp_filter *f) static void rsvp_delete_filter_work(struct work_struct *work) { - struct rsvp_filter *f = container_of(work, struct rsvp_filter, work); - + struct rsvp_filter *f = container_of(to_rcu_work(work), + struct rsvp_filter, + rwork); rtnl_lock(); __rsvp_delete_filter(f); rtnl_unlock(); } -static void rsvp_delete_filter_rcu(struct rcu_head *head) -{ - struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu); - - INIT_WORK(&f->work, rsvp_delete_filter_work); - tcf_queue_work(&f->work); -} - static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) { tcf_unbind_filter(tp, &f->res); @@ -317,7 +307,7 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) * in cleanup() callback */ if (tcf_exts_get_net(&f->exts)) - call_rcu(&f->rcu, rsvp_delete_filter_rcu); + tcf_queue_work(&f->rwork, rsvp_delete_filter_work); else __rsvp_delete_filter(f); } diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index b49cc990a000..32f4bbd82f35 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -28,20 +28,14 @@ struct tcindex_filter_result { struct tcf_exts exts; struct tcf_result res; - union { - struct work_struct work; - struct rcu_head rcu; - }; + struct rcu_work rwork; }; struct tcindex_filter { u16 key; struct tcindex_filter_result result; struct tcindex_filter __rcu *next; - union { - struct work_struct work; - struct rcu_head rcu; - }; + struct rcu_work rwork; }; @@ -152,21 +146,14 @@ static void tcindex_destroy_rexts_work(struct work_struct *work) { struct tcindex_filter_result *r; - r = container_of(work, struct tcindex_filter_result, work); + r = container_of(to_rcu_work(work), + struct tcindex_filter_result, + rwork); rtnl_lock(); __tcindex_destroy_rexts(r); rtnl_unlock(); } -static void tcindex_destroy_rexts(struct rcu_head *head) -{ - struct tcindex_filter_result *r; - - r = container_of(head, struct tcindex_filter_result, rcu); - INIT_WORK(&r->work, tcindex_destroy_rexts_work); - tcf_queue_work(&r->work); -} - static void __tcindex_destroy_fexts(struct tcindex_filter *f) { tcf_exts_destroy(&f->result.exts); @@ -176,23 +163,15 @@ static void __tcindex_destroy_fexts(struct tcindex_filter *f) static void tcindex_destroy_fexts_work(struct work_struct *work) { - struct tcindex_filter *f = container_of(work, struct tcindex_filter, - work); + struct tcindex_filter *f = container_of(to_rcu_work(work), + struct tcindex_filter, + rwork); rtnl_lock(); __tcindex_destroy_fexts(f); rtnl_unlock(); } -static void tcindex_destroy_fexts(struct rcu_head *head) -{ - struct tcindex_filter *f = container_of(head, struct tcindex_filter, - rcu); - - INIT_WORK(&f->work, tcindex_destroy_fexts_work); - tcf_queue_work(&f->work); -} - static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last, struct netlink_ext_ack *extack) { @@ -228,12 +207,12 @@ found: */ if (f) { if (tcf_exts_get_net(&f->result.exts)) - call_rcu(&f->rcu, tcindex_destroy_fexts); + tcf_queue_work(&f->rwork, tcindex_destroy_fexts_work); else __tcindex_destroy_fexts(f); } else { if (tcf_exts_get_net(&r->exts)) - call_rcu(&r->rcu, tcindex_destroy_rexts); + tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work); else __tcindex_destroy_rexts(r); } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index bac47b5d18fd..fb861f90fde6 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -68,10 +68,7 @@ struct tc_u_knode { u32 __percpu *pcpu_success; #endif struct tcf_proto *tp; - union { - struct work_struct work; - struct rcu_head rcu; - }; + struct rcu_work rwork; /* The 'sel' field MUST be the last field in structure to allow for * tc_u32_keys allocated at end of structure. */ @@ -436,21 +433,14 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, */ static void u32_delete_key_work(struct work_struct *work) { - struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); - + struct tc_u_knode *key = container_of(to_rcu_work(work), + struct tc_u_knode, + rwork); rtnl_lock(); u32_destroy_key(key->tp, key, false); rtnl_unlock(); } -static void u32_delete_key_rcu(struct rcu_head *rcu) -{ - struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); - - INIT_WORK(&key->work, u32_delete_key_work); - tcf_queue_work(&key->work); -} - /* u32_delete_key_freepf_rcu is the rcu callback variant * that free's the entire structure including the statistics * percpu variables. Only use this if the key is not a copy @@ -460,21 +450,14 @@ static void u32_delete_key_rcu(struct rcu_head *rcu) */ static void u32_delete_key_freepf_work(struct work_struct *work) { - struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); - + struct tc_u_knode *key = container_of(to_rcu_work(work), + struct tc_u_knode, + rwork); rtnl_lock(); u32_destroy_key(key->tp, key, true); rtnl_unlock(); } -static void u32_delete_key_freepf_rcu(struct rcu_head *rcu) -{ - struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); - - INIT_WORK(&key->work, u32_delete_key_freepf_work); - tcf_queue_work(&key->work); -} - static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) { struct tc_u_knode __rcu **kp; @@ -491,7 +474,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) tcf_unbind_filter(tp, &key->res); idr_remove(&ht->handle_idr, key->handle); tcf_exts_get_net(&key->exts); - call_rcu(&key->rcu, u32_delete_key_freepf_rcu); + tcf_queue_work(&key->rwork, u32_delete_key_freepf_work); return 0; } } @@ -611,7 +594,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, u32_remove_hw_knode(tp, n, extack); idr_remove(&ht->handle_idr, n->handle); if (tcf_exts_get_net(&n->exts)) - call_rcu(&n->rcu, u32_delete_key_freepf_rcu); + tcf_queue_work(&n->rwork, u32_delete_key_freepf_work); else u32_destroy_key(n->tp, n, true); } @@ -995,7 +978,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); tcf_exts_get_net(&n->exts); - call_rcu(&n->rcu, u32_delete_key_rcu); + tcf_queue_work(&n->rwork, u32_delete_key_work); return 0; } -- cgit From f971b132300fb0df63a8de631947adc74a7b3db1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 25 May 2018 21:53:35 -0700 Subject: net: sched: mq: add simple offload notification mq offload is trivial, we just need to let the device know that the root qdisc is mq. Alternative approach would be to export qdisc_lookup() and make drivers check the root type themselves, but notification via ndo_setup_tc is more in line with other qdiscs. Note that mq doesn't hold any stats on it's own, it just adds up stats of its children. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/sch_mq.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'net/sched') diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index f062a18e9162..6ccf6daa2503 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -23,12 +24,28 @@ struct mq_sched { struct Qdisc **qdiscs; }; +static int mq_offload(struct Qdisc *sch, enum tc_mq_command cmd) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_mq_qopt_offload opt = { + .command = cmd, + .handle = sch->handle, + }; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt); +} + static void mq_destroy(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct mq_sched *priv = qdisc_priv(sch); unsigned int ntx; + mq_offload(sch, TC_MQ_DESTROY); + if (!priv->qdiscs) return; for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) @@ -70,6 +87,8 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt, } sch->flags |= TCQ_F_MQROOT; + + mq_offload(sch, TC_MQ_CREATE); return 0; } -- cgit From 47c669a406d8621c69b1c199ce099b54b17b9902 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 25 May 2018 21:53:37 -0700 Subject: net: sched: mq: request stats from offloads MQ doesn't hold any statistics on its own, however, statistic from offloads are requested starting from the root, hence MQ will read the old values for its sums. Call into the drivers, because of the additive nature of the stats drivers are aware of how much "pending updates" they have to children of the MQ. Since MQ reset its stats on every dump we can simply offset the stats, predicting how stats of offloaded children will change. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/sch_mq.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'net/sched') diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 6ccf6daa2503..d6b8ae4ed7a3 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -38,6 +38,22 @@ static int mq_offload(struct Qdisc *sch, enum tc_mq_command cmd) return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt); } +static void mq_offload_stats(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_mq_qopt_offload opt = { + .command = TC_MQ_STATS, + .handle = sch->handle, + .stats = { + .bstats = &sch->bstats, + .qstats = &sch->qstats, + }, + }; + + if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt); +} + static void mq_destroy(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); @@ -146,6 +162,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) sch->q.qlen += qdisc->q.qlen; sch->bstats.bytes += qdisc->bstats.bytes; sch->bstats.packets += qdisc->bstats.packets; + sch->qstats.qlen += qdisc->qstats.qlen; sch->qstats.backlog += qdisc->qstats.backlog; sch->qstats.drops += qdisc->qstats.drops; sch->qstats.requeues += qdisc->qstats.requeues; @@ -154,6 +171,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) spin_unlock_bh(qdisc_lock(qdisc)); } + mq_offload_stats(sch); return 0; } -- cgit From 4341f8308d53f95a4a1ab54f45fa9af2c30ff596 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 29 May 2018 10:03:21 -0700 Subject: net: remove bypassed check in sch_direct_xmit() Checking netif_xmit_frozen_or_stopped() at the end of sch_direct_xmit() is being bypassed. This is because "ret" from sch_direct_xmit() will be either NETDEV_TX_OK or NETDEV_TX_BUSY, and only ret == NETDEV_TX_OK == 0 will reach the condition: if (ret && netif_xmit_frozen_or_stopped(txq)) return false; This patch cleans up the code by removing the whole condition. For more discussion about this, please refer to https://marc.info/?t=152727195700008 Signed-off-by: Song Liu Cc: John Fastabend Cc: Alexei Starovoitov Cc: David S. Miller Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 760ab1b09f8b..69078c82963e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -346,9 +346,6 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, return false; } - if (ret && netif_xmit_frozen_or_stopped(txq)) - return false; - return true; } -- cgit From c431f89b18a2bd62f74174829565e6433fc0c109 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 31 May 2018 09:52:53 +0300 Subject: net: sched: split tc_ctl_tfilter into three handlers tc_ctl_tfilter handles three netlink message types: RTM_NEWTFILTER, RTM_DELTFILTER, RTM_GETTFILTER. However, implementation of this function involves a lot of branching on specific message type because most of the code is message-specific. This significantly complicates adding new functionality and doesn't provide much benefit of code reuse. Split tc_ctl_tfilter to three standalone functions that handle filter new, delete and get requests. The only truly protocol independent part of tc_ctl_tfilter is code that looks up queue, class, and block. Refactor this code to standalone tcf_block_find function that is used by all three new handlers. Signed-off-by: Vlad Buslov Signed-off-by: David S. Miller --- net/sched/cls_api.c | 438 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 293 insertions(+), 145 deletions(-) (limited to 'net/sched') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 76303c45db19..c06585fb2dc6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -437,6 +437,78 @@ static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index) return idr_find(&tn->idr, block_index); } +/* Find tcf block. + * Set q, parent, cl when appropriate. + */ + +static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, + u32 *parent, unsigned long *cl, + int ifindex, u32 block_index, + struct netlink_ext_ack *extack) +{ + struct tcf_block *block; + + if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { + block = tcf_block_lookup(net, block_index); + if (!block) { + NL_SET_ERR_MSG(extack, "Block of given index was not found"); + return ERR_PTR(-EINVAL); + } + } else { + const struct Qdisc_class_ops *cops; + struct net_device *dev; + + /* Find link */ + dev = __dev_get_by_index(net, ifindex); + if (!dev) + return ERR_PTR(-ENODEV); + + /* Find qdisc */ + if (!*parent) { + *q = dev->qdisc; + *parent = (*q)->handle; + } else { + *q = qdisc_lookup(dev, TC_H_MAJ(*parent)); + if (!*q) { + NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); + return ERR_PTR(-EINVAL); + } + } + + /* Is it classful? */ + cops = (*q)->ops->cl_ops; + if (!cops) { + NL_SET_ERR_MSG(extack, "Qdisc not classful"); + return ERR_PTR(-EINVAL); + } + + if (!cops->tcf_block) { + NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); + return ERR_PTR(-EOPNOTSUPP); + } + + /* Do we search for filter, attached to class? */ + if (TC_H_MIN(*parent)) { + *cl = cops->find(*q, *parent); + if (*cl == 0) { + NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); + return ERR_PTR(-ENOENT); + } + } + + /* And the last stroke */ + block = cops->tcf_block(*q, *cl, extack); + if (!block) + return ERR_PTR(-EINVAL); + if (tcf_block_shared(block)) { + NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); + return ERR_PTR(-EOPNOTSUPP); + } + } + + return block; +} + static struct tcf_chain *tcf_block_chain_zero(struct tcf_block *block) { return list_first_entry(&block->chain_list, struct tcf_chain, list); @@ -984,9 +1056,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, q, parent, 0, event, false); } -/* Add/change/delete/get a filter node */ - -static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, +static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); @@ -1007,8 +1077,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, int err; int tp_created; - if ((n->nlmsg_type != RTM_GETTFILTER) && - !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; replay: @@ -1026,24 +1095,13 @@ replay: cl = 0; if (prio == 0) { - switch (n->nlmsg_type) { - case RTM_DELTFILTER: - if (protocol || t->tcm_handle || tca[TCA_KIND]) { - NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set"); - return -ENOENT; - } - break; - case RTM_NEWTFILTER: - /* If no priority is provided by the user, - * we allocate one. - */ - if (n->nlmsg_flags & NLM_F_CREATE) { - prio = TC_H_MAKE(0x80000000U, 0U); - prio_allocate = true; - break; - } - /* fall-through */ - default: + /* If no priority is provided by the user, + * we allocate one. + */ + if (n->nlmsg_flags & NLM_F_CREATE) { + prio = TC_H_MAKE(0x80000000U, 0U); + prio_allocate = true; + } else { NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero"); return -ENOENT; } @@ -1051,66 +1109,11 @@ replay: /* Find head of filter chain. */ - if (t->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { - block = tcf_block_lookup(net, t->tcm_block_index); - if (!block) { - NL_SET_ERR_MSG(extack, "Block of given index was not found"); - err = -EINVAL; - goto errout; - } - } else { - const struct Qdisc_class_ops *cops; - struct net_device *dev; - - /* Find link */ - dev = __dev_get_by_index(net, t->tcm_ifindex); - if (!dev) - return -ENODEV; - - /* Find qdisc */ - if (!parent) { - q = dev->qdisc; - parent = q->handle; - } else { - q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); - if (!q) { - NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); - return -EINVAL; - } - } - - /* Is it classful? */ - cops = q->ops->cl_ops; - if (!cops) { - NL_SET_ERR_MSG(extack, "Qdisc not classful"); - return -EINVAL; - } - - if (!cops->tcf_block) { - NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); - return -EOPNOTSUPP; - } - - /* Do we search for filter, attached to class? */ - if (TC_H_MIN(parent)) { - cl = cops->find(q, parent); - if (cl == 0) { - NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); - return -ENOENT; - } - } - - /* And the last stroke */ - block = cops->tcf_block(q, cl, extack); - if (!block) { - err = -EINVAL; - goto errout; - } - if (tcf_block_shared(block)) { - NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); - err = -EOPNOTSUPP; - goto errout; - } + block = tcf_block_find(net, &q, &parent, &cl, + t->tcm_ifindex, t->tcm_block_index, extack); + if (IS_ERR(block)) { + err = PTR_ERR(block); + goto errout; } chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; @@ -1119,19 +1122,10 @@ replay: err = -EINVAL; goto errout; } - chain = tcf_chain_get(block, chain_index, - n->nlmsg_type == RTM_NEWTFILTER); + chain = tcf_chain_get(block, chain_index, true); if (!chain) { NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); - err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL; - goto errout; - } - - if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) { - tfilter_notify_chain(net, skb, block, q, parent, n, - chain, RTM_DELTFILTER); - tcf_chain_flush(chain); - err = 0; + err = -ENOMEM; goto errout; } @@ -1152,8 +1146,7 @@ replay: goto errout; } - if (n->nlmsg_type != RTM_NEWTFILTER || - !(n->nlmsg_flags & NLM_F_CREATE)) { + if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); err = -ENOENT; goto errout; @@ -1178,56 +1171,15 @@ replay: fh = tp->ops->get(tp, t->tcm_handle); if (!fh) { - if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { - tcf_chain_tp_remove(chain, &chain_info, tp); - tfilter_notify(net, skb, n, tp, block, q, parent, fh, - RTM_DELTFILTER, false); - tcf_proto_destroy(tp, extack); - err = 0; - goto errout; - } - - if (n->nlmsg_type != RTM_NEWTFILTER || - !(n->nlmsg_flags & NLM_F_CREATE)) { + if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); err = -ENOENT; goto errout; } - } else { - bool last; - - switch (n->nlmsg_type) { - case RTM_NEWTFILTER: - if (n->nlmsg_flags & NLM_F_EXCL) { - if (tp_created) - tcf_proto_destroy(tp, NULL); - NL_SET_ERR_MSG(extack, "Filter already exists"); - err = -EEXIST; - goto errout; - } - break; - case RTM_DELTFILTER: - err = tfilter_del_notify(net, skb, n, tp, block, - q, parent, fh, false, &last, - extack); - if (err) - goto errout; - if (last) { - tcf_chain_tp_remove(chain, &chain_info, tp); - tcf_proto_destroy(tp, extack); - } - goto errout; - case RTM_GETTFILTER: - err = tfilter_notify(net, skb, n, tp, block, q, parent, - fh, RTM_NEWTFILTER, true); - if (err < 0) - NL_SET_ERR_MSG(extack, "Failed to send filter notify message"); - goto errout; - default: - NL_SET_ERR_MSG(extack, "Invalid netlink message type"); - err = -EINVAL; - goto errout; - } + } else if (n->nlmsg_flags & NLM_F_EXCL) { + NL_SET_ERR_MSG(extack, "Filter already exists"); + err = -EEXIST; + goto errout; } err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, @@ -1252,6 +1204,202 @@ errout: return err; } +static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tca[TCA_MAX + 1]; + struct tcmsg *t; + u32 protocol; + u32 prio; + u32 parent; + u32 chain_index; + struct Qdisc *q = NULL; + struct tcf_chain_info chain_info; + struct tcf_chain *chain = NULL; + struct tcf_block *block; + struct tcf_proto *tp = NULL; + unsigned long cl = 0; + void *fh = NULL; + int err; + + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); + if (err < 0) + return err; + + t = nlmsg_data(n); + protocol = TC_H_MIN(t->tcm_info); + prio = TC_H_MAJ(t->tcm_info); + parent = t->tcm_parent; + + if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) { + NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set"); + return -ENOENT; + } + + /* Find head of filter chain. */ + + block = tcf_block_find(net, &q, &parent, &cl, + t->tcm_ifindex, t->tcm_block_index, extack); + if (IS_ERR(block)) { + err = PTR_ERR(block); + goto errout; + } + + chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; + if (chain_index > TC_ACT_EXT_VAL_MASK) { + NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); + err = -EINVAL; + goto errout; + } + chain = tcf_chain_get(block, chain_index, false); + if (!chain) { + NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); + err = -EINVAL; + goto errout; + } + + if (prio == 0) { + tfilter_notify_chain(net, skb, block, q, parent, n, + chain, RTM_DELTFILTER); + tcf_chain_flush(chain); + err = 0; + goto errout; + } + + tp = tcf_chain_tp_find(chain, &chain_info, protocol, + prio, false); + if (!tp || IS_ERR(tp)) { + NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); + err = PTR_ERR(tp); + goto errout; + } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { + NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); + err = -EINVAL; + goto errout; + } + + fh = tp->ops->get(tp, t->tcm_handle); + + if (!fh) { + if (t->tcm_handle == 0) { + tcf_chain_tp_remove(chain, &chain_info, tp); + tfilter_notify(net, skb, n, tp, block, q, parent, fh, + RTM_DELTFILTER, false); + tcf_proto_destroy(tp, extack); + err = 0; + } else { + NL_SET_ERR_MSG(extack, "Specified filter handle not found"); + err = -ENOENT; + } + } else { + bool last; + + err = tfilter_del_notify(net, skb, n, tp, block, + q, parent, fh, false, &last, + extack); + if (err) + goto errout; + if (last) { + tcf_chain_tp_remove(chain, &chain_info, tp); + tcf_proto_destroy(tp, extack); + } + } + +errout: + if (chain) + tcf_chain_put(chain); + return err; +} + +static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tca[TCA_MAX + 1]; + struct tcmsg *t; + u32 protocol; + u32 prio; + u32 parent; + u32 chain_index; + struct Qdisc *q = NULL; + struct tcf_chain_info chain_info; + struct tcf_chain *chain = NULL; + struct tcf_block *block; + struct tcf_proto *tp = NULL; + unsigned long cl = 0; + void *fh = NULL; + int err; + + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); + if (err < 0) + return err; + + t = nlmsg_data(n); + protocol = TC_H_MIN(t->tcm_info); + prio = TC_H_MAJ(t->tcm_info); + parent = t->tcm_parent; + + if (prio == 0) { + NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero"); + return -ENOENT; + } + + /* Find head of filter chain. */ + + block = tcf_block_find(net, &q, &parent, &cl, + t->tcm_ifindex, t->tcm_block_index, extack); + if (IS_ERR(block)) { + err = PTR_ERR(block); + goto errout; + } + + chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; + if (chain_index > TC_ACT_EXT_VAL_MASK) { + NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); + err = -EINVAL; + goto errout; + } + chain = tcf_chain_get(block, chain_index, false); + if (!chain) { + NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); + err = -EINVAL; + goto errout; + } + + tp = tcf_chain_tp_find(chain, &chain_info, protocol, + prio, false); + if (!tp || IS_ERR(tp)) { + NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); + err = PTR_ERR(tp); + goto errout; + } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { + NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); + err = -EINVAL; + goto errout; + } + + fh = tp->ops->get(tp, t->tcm_handle); + + if (!fh) { + NL_SET_ERR_MSG(extack, "Specified filter handle not found"); + err = -ENOENT; + } else { + err = tfilter_notify(net, skb, n, tp, block, q, parent, + fh, RTM_NEWTFILTER, true); + if (err < 0) + NL_SET_ERR_MSG(extack, "Failed to send filter notify message"); + } + +errout: + if (chain) + tcf_chain_put(chain); + return err; +} + struct tcf_dump_args { struct tcf_walker w; struct sk_buff *skb; @@ -1634,9 +1782,9 @@ static int __init tc_filter_init(void) if (err) goto err_register_pernet_subsys; - rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter, tc_dump_tfilter, 0); return 0; -- cgit From de9dc650f05f5c427a623797342bb2870ceedd38 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 3 Jun 2018 10:06:13 +0300 Subject: cls_flower: Fix missing free of rhashtable When destroying the instance, destroy the head rhashtable. Fixes: 05cd271fd61a ("cls_flower: Support multiple masks per priority") Reported-by: Vlad Buslov Reviewed-by: Roi Dayan Reviewed-by: Jiri Pirko Signed-off-by: Paul Blakey Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/sched') diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 3786feab0b83..159efd98ee9a 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -326,6 +326,8 @@ static void fl_destroy_sleepable(struct work_struct *work) struct cls_fl_head *head = container_of(to_rcu_work(work), struct cls_fl_head, rwork); + + rhashtable_destroy(&head->ht); kfree(head); module_put(THIS_MODULE); } -- cgit From f6521c587a586de5fdafb0a322072e5ff67f8e15 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 3 Jun 2018 10:06:14 +0300 Subject: cls_flower: Fix comparing of old filter mask with new filter We incorrectly compare the mask and the result is that we can't modify an already existing rule. Fix that by comparing correctly. Fixes: 05cd271fd61a ("cls_flower: Support multiple masks per priority") Reported-by: Vlad Buslov Reviewed-by: Roi Dayan Reviewed-by: Jiri Pirko Signed-off-by: Paul Blakey Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sched') diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 159efd98ee9a..2b5be42a9f1c 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -877,7 +877,7 @@ static int fl_check_assign_mask(struct cls_fl_head *head, return PTR_ERR(newmask); fnew->mask = newmask; - } else if (fold && fold->mask == fnew->mask) { + } else if (fold && fold->mask != fnew->mask) { return -EINVAL; } -- cgit From 0e3990356d2518691e17c1ecbf7868833b6e6704 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 4 Jun 2018 18:32:23 +0300 Subject: net: sched: return error code when tcf proto is not found If requested tcf proto is not found, get and del filter netlink protocol handlers output error message to extack, but do not return actual error code. Add check to return ENOENT when result of tp find function is NULL pointer. Fixes: c431f89b18a2 ("net: sched: split tc_ctl_tfilter into three handlers") Reported-by: Dan Carpenter Signed-off-by: Vlad Buslov Signed-off-by: David S. Miller --- net/sched/cls_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sched') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index c06585fb2dc6..cdc3c87c53e6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1274,7 +1274,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, prio, false); if (!tp || IS_ERR(tp)) { NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); - err = PTR_ERR(tp); + err = tp ? PTR_ERR(tp) : -ENOENT; goto errout; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); @@ -1374,7 +1374,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, prio, false); if (!tp || IS_ERR(tp)) { NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); - err = PTR_ERR(tp); + err = tp ? PTR_ERR(tp) : -ENOENT; goto errout; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); -- cgit From d96a43c66464cdf0b249fdf47b6dcd65b83af8c0 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 5 Jun 2018 11:04:03 +0300 Subject: net: sched: cls: Fix offloading when ingress dev is vxlan When using a vxlan device as the ingress dev, we count it as a "no offload dev", so when such a rule comes and err stop is true, we fail early and don't try the egdev route which can offload it through the egress device. Fix that by not calling the block offload if one of the devices attached to it is not offload capable, but make sure egress on such case is capable instead. Fixes: caa7260156eb ("net: sched: keep track of offloaded filters [..]") Reviewed-by: Roi Dayan Acked-by: Jiri Pirko Signed-off-by: Paul Blakey Signed-off-by: David S. Miller --- net/sched/cls_api.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'net/sched') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index cdc3c87c53e6..29fb4d68a144 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -807,10 +807,6 @@ static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type, int ok_count = 0; int err; - /* Make sure all netdevs sharing this block are offload-capable. */ - if (block->nooffloaddevcnt && err_stop) - return -EOPNOTSUPP; - list_for_each_entry(block_cb, &block->cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err) { @@ -1729,21 +1725,31 @@ static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts, int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, enum tc_setup_type type, void *type_data, bool err_stop) { - int ok_count; + int ok_count = 0; int ret; - ret = tcf_block_cb_call(block, type, type_data, err_stop); - if (ret < 0) - return ret; - ok_count = ret; + if (!block->nooffloaddevcnt) { + ret = tcf_block_cb_call(block, type, type_data, err_stop); + if (ret < 0) + return ret; + ok_count = ret; + } if (!exts || ok_count) - return ok_count; + goto skip_egress; + ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop); if (ret < 0) return ret; ok_count += ret; +skip_egress: + /* if one of the netdevs sharing this block are not offload-capable + * make sure we succeeded in egress instead. + */ + if (block->nooffloaddevcnt && !ok_count && err_stop) + return -EOPNOTSUPP; + return ok_count; } EXPORT_SYMBOL(tc_setup_cb_call); -- cgit From 9a99dc1c41772e0b24348f089a7a7edb91fc7723 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 6 Jun 2018 13:55:47 -0400 Subject: Revert "net: sched: cls: Fix offloading when ingress dev is vxlan" This reverts commit d96a43c66464cdf0b249fdf47b6dcd65b83af8c0. This potentially breaks things, so reverting as per request by Jakub Kicinski. Signed-off-by: David S. Miller --- net/sched/cls_api.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'net/sched') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 29fb4d68a144..cdc3c87c53e6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -807,6 +807,10 @@ static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type, int ok_count = 0; int err; + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) + return -EOPNOTSUPP; + list_for_each_entry(block_cb, &block->cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err) { @@ -1725,31 +1729,21 @@ static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts, int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, enum tc_setup_type type, void *type_data, bool err_stop) { - int ok_count = 0; + int ok_count; int ret; - if (!block->nooffloaddevcnt) { - ret = tcf_block_cb_call(block, type, type_data, err_stop); - if (ret < 0) - return ret; - ok_count = ret; - } + ret = tcf_block_cb_call(block, type, type_data, err_stop); + if (ret < 0) + return ret; + ok_count = ret; if (!exts || ok_count) - goto skip_egress; - + return ok_count; ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop); if (ret < 0) return ret; ok_count += ret; -skip_egress: - /* if one of the netdevs sharing this block are not offload-capable - * make sure we succeeded in egress instead. - */ - if (block->nooffloaddevcnt && !ok_count && err_stop) - return -EOPNOTSUPP; - return ok_count; } EXPORT_SYMBOL(tc_setup_cb_call); -- cgit