summaryrefslogtreecommitdiff
path: root/net/sched/cls_u32.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-08 21:40:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-08 21:40:54 -0400
commit35a9ad8af0bb0fa3525e6d0d20e32551d226f38e (patch)
tree15b4b33206818886d9cff371fd2163e073b70568 /net/sched/cls_u32.c
parentd5935b07da53f74726e2a65dd4281d0f2c70e5d4 (diff)
parent64b1f00a0830e1c53874067273a096b228d83d36 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Most notable changes in here: 1) By far the biggest accomplishment, thanks to a large range of contributors, is the addition of multi-send for transmit. This is the result of discussions back in Chicago, and the hard work of several individuals. Now, when the ->ndo_start_xmit() method of a driver sees skb->xmit_more as true, it can choose to defer the doorbell telling the driver to start processing the new TX queue entires. skb->xmit_more means that the generic networking is guaranteed to call the driver immediately with another SKB to send. There is logic added to the qdisc layer to dequeue multiple packets at a time, and the handling mis-predicted offloads in software is now done with no locks held. Finally, pktgen is extended to have a "burst" parameter that can be used to test a multi-send implementation. Several drivers have xmit_more support: i40e, igb, ixgbe, mlx4, virtio_net Adding support is almost trivial, so export more drivers to support this optimization soon. I want to thank, in no particular or implied order, Jesper Dangaard Brouer, Eric Dumazet, Alexander Duyck, Tom Herbert, Jamal Hadi Salim, John Fastabend, Florian Westphal, Daniel Borkmann, David Tat, Hannes Frederic Sowa, and Rusty Russell. 2) PTP and timestamping support in bnx2x, from Michal Kalderon. 3) Allow adjusting the rx_copybreak threshold for a driver via ethtool, and add rx_copybreak support to enic driver. From Govindarajulu Varadarajan. 4) Significant enhancements to the generic PHY layer and the bcm7xxx driver in particular (EEE support, auto power down, etc.) from Florian Fainelli. 5) Allow raw buffers to be used for flow dissection, allowing drivers to determine the optimal "linear pull" size for devices that DMA into pools of pages. The objective is to get exactly the necessary amount of headers into the linear SKB area pre-pulled, but no more. The new interface drivers use is eth_get_headlen(). From WANG Cong, with driver conversions (several had their own by-hand duplicated implementations) by Alexander Duyck and Eric Dumazet. 6) Support checksumming more smoothly and efficiently for encapsulations, and add "foo over UDP" facility. From Tom Herbert. 7) Add Broadcom SF2 switch driver to DSA layer, from Florian Fainelli. 8) eBPF now can load programs via a system call and has an extensive testsuite. Alexei Starovoitov and Daniel Borkmann. 9) Major overhaul of the packet scheduler to use RCU in several major areas such as the classifiers and rate estimators. From John Fastabend. 10) Add driver for Intel FM10000 Ethernet Switch, from Alexander Duyck. 11) Rearrange TCP_SKB_CB() to reduce cache line misses, from Eric Dumazet. 12) Add Datacenter TCP congestion control algorithm support, From Florian Westphal. 13) Reorganize sk_buff so that __copy_skb_header() is significantly faster. From Eric Dumazet" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1558 commits) netlabel: directly return netlbl_unlabel_genl_init() net: add netdev_txq_bql_{enqueue, complete}_prefetchw() helpers net: description of dma_cookie cause make xmldocs warning cxgb4: clean up a type issue cxgb4: potential shift wrapping bug i40e: skb->xmit_more support net: fs_enet: Add NAPI TX net: fs_enet: Remove non NAPI RX r8169:add support for RTL8168EP net_sched: copy exts->type in tcf_exts_change() wimax: convert printk to pr_foo() af_unix: remove 0 assignment on static ipv6: Do not warn for informational ICMP messages, regardless of type. Update Intel Ethernet Driver maintainers list bridge: Save frag_max_size between PRE_ROUTING and POST_ROUTING tipc: fix bug in multicast congestion handling net: better IFF_XMIT_DST_RELEASE support net/mlx4_en: remove NETDEV_TX_BUSY 3c59x: fix bad split of cpu_to_le32(pci_map_single()) net: bcmgenet: fix Tx ring priority programming ...
Diffstat (limited to 'net/sched/cls_u32.c')
-rw-r--r--net/sched/cls_u32.c407
1 files changed, 312 insertions, 95 deletions
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 70c0be8d0121..0472909bb014 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -36,6 +36,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
+#include <linux/percpu.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/bitmap.h>
@@ -44,40 +45,49 @@
#include <net/pkt_cls.h>
struct tc_u_knode {
- struct tc_u_knode *next;
+ struct tc_u_knode __rcu *next;
u32 handle;
- struct tc_u_hnode *ht_up;
+ struct tc_u_hnode __rcu *ht_up;
struct tcf_exts exts;
#ifdef CONFIG_NET_CLS_IND
int ifindex;
#endif
u8 fshift;
struct tcf_result res;
- struct tc_u_hnode *ht_down;
+ struct tc_u_hnode __rcu *ht_down;
#ifdef CONFIG_CLS_U32_PERF
- struct tc_u32_pcnt *pf;
+ struct tc_u32_pcnt __percpu *pf;
#endif
#ifdef CONFIG_CLS_U32_MARK
- struct tc_u32_mark mark;
+ u32 val;
+ u32 mask;
+ u32 __percpu *pcpu_success;
#endif
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
+ /* The 'sel' field MUST be the last field in structure to allow for
+ * tc_u32_keys allocated at end of structure.
+ */
struct tc_u32_sel sel;
};
struct tc_u_hnode {
- struct tc_u_hnode *next;
+ struct tc_u_hnode __rcu *next;
u32 handle;
u32 prio;
struct tc_u_common *tp_c;
int refcnt;
unsigned int divisor;
- struct tc_u_knode *ht[1];
+ struct tc_u_knode __rcu *ht[1];
+ struct rcu_head rcu;
};
struct tc_u_common {
- struct tc_u_hnode *hlist;
+ struct tc_u_hnode __rcu *hlist;
struct Qdisc *q;
int refcnt;
u32 hgenerator;
+ struct rcu_head rcu;
};
static inline unsigned int u32_hash_fold(__be32 key,
@@ -96,7 +106,7 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct
unsigned int off;
} stack[TC_U32_MAXDEPTH];
- struct tc_u_hnode *ht = tp->root;
+ struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
unsigned int off = skb_network_offset(skb);
struct tc_u_knode *n;
int sdepth = 0;
@@ -108,23 +118,23 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct
int i, r;
next_ht:
- n = ht->ht[sel];
+ n = rcu_dereference_bh(ht->ht[sel]);
next_knode:
if (n) {
struct tc_u32_key *key = n->sel.keys;
#ifdef CONFIG_CLS_U32_PERF
- n->pf->rcnt += 1;
+ __this_cpu_inc(n->pf->rcnt);
j = 0;
#endif
#ifdef CONFIG_CLS_U32_MARK
- if ((skb->mark & n->mark.mask) != n->mark.val) {
- n = n->next;
+ if ((skb->mark & n->mask) != n->val) {
+ n = rcu_dereference_bh(n->next);
goto next_knode;
} else {
- n->mark.success++;
+ __this_cpu_inc(*n->pcpu_success);
}
#endif
@@ -139,37 +149,39 @@ next_knode:
if (!data)
goto out;
if ((*data ^ key->val) & key->mask) {
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
#ifdef CONFIG_CLS_U32_PERF
- n->pf->kcnts[j] += 1;
+ __this_cpu_inc(n->pf->kcnts[j]);
j++;
#endif
}
- if (n->ht_down == NULL) {
+
+ ht = rcu_dereference_bh(n->ht_down);
+ if (!ht) {
check_terminal:
if (n->sel.flags & TC_U32_TERMINAL) {
*res = n->res;
#ifdef CONFIG_NET_CLS_IND
if (!tcf_match_indev(skb, n->ifindex)) {
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
#endif
#ifdef CONFIG_CLS_U32_PERF
- n->pf->rhit += 1;
+ __this_cpu_inc(n->pf->rhit);
#endif
r = tcf_exts_exec(skb, &n->exts, res);
if (r < 0) {
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
return r;
}
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
@@ -180,7 +192,7 @@ check_terminal:
stack[sdepth].off = off;
sdepth++;
- ht = n->ht_down;
+ ht = rcu_dereference_bh(n->ht_down);
sel = 0;
if (ht->divisor) {
__be32 *data, hdata;
@@ -222,7 +234,7 @@ check_terminal:
/* POP */
if (sdepth--) {
n = stack[sdepth].knode;
- ht = n->ht_up;
+ ht = rcu_dereference_bh(n->ht_up);
off = stack[sdepth].off;
goto check_terminal;
}
@@ -239,7 +251,9 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
{
struct tc_u_hnode *ht;
- for (ht = tp_c->hlist; ht; ht = ht->next)
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next))
if (ht->handle == handle)
break;
@@ -256,7 +270,9 @@ u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
if (sel > ht->divisor)
goto out;
- for (n = ht->ht[sel]; n; n = n->next)
+ for (n = rtnl_dereference(ht->ht[sel]);
+ n;
+ n = rtnl_dereference(n->next))
if (n->handle == handle)
break;
out:
@@ -270,7 +286,7 @@ static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
struct tc_u_common *tp_c = tp->data;
if (TC_U32_HTID(handle) == TC_U32_ROOT)
- ht = tp->root;
+ ht = rtnl_dereference(tp->root);
else
ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
@@ -291,6 +307,9 @@ static u32 gen_new_htid(struct tc_u_common *tp_c)
{
int i = 0x800;
+ /* hgenerator only used inside rtnl lock it is safe to increment
+ * without read _copy_ update semantics
+ */
do {
if (++tp_c->hgenerator == 0x7FF)
tp_c->hgenerator = 1;
@@ -326,41 +345,78 @@ static int u32_init(struct tcf_proto *tp)
}
tp_c->refcnt++;
- root_ht->next = tp_c->hlist;
- tp_c->hlist = root_ht;
+ RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
+ rcu_assign_pointer(tp_c->hlist, root_ht);
root_ht->tp_c = tp_c;
- tp->root = root_ht;
+ rcu_assign_pointer(tp->root, root_ht);
tp->data = tp_c;
return 0;
}
-static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
+static int u32_destroy_key(struct tcf_proto *tp,
+ struct tc_u_knode *n,
+ bool free_pf)
{
- tcf_unbind_filter(tp, &n->res);
- tcf_exts_destroy(tp, &n->exts);
+ tcf_exts_destroy(&n->exts);
if (n->ht_down)
n->ht_down->refcnt--;
#ifdef CONFIG_CLS_U32_PERF
- kfree(n->pf);
+ if (free_pf)
+ free_percpu(n->pf);
+#endif
+#ifdef CONFIG_CLS_U32_MARK
+ if (free_pf)
+ free_percpu(n->pcpu_success);
#endif
kfree(n);
return 0;
}
+/* u32_delete_key_rcu should be called when free'ing a copied
+ * version of a tc_u_knode obtained from u32_init_knode(). When
+ * copies are obtained from u32_init_knode() the statistics are
+ * shared between the old and new copies to allow readers to
+ * continue to update the statistics during the copy. To support
+ * this the u32_delete_key_rcu variant does not free the percpu
+ * statistics.
+ */
+static void u32_delete_key_rcu(struct rcu_head *rcu)
+{
+ struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
+
+ u32_destroy_key(key->tp, key, false);
+}
+
+/* u32_delete_key_freepf_rcu is the rcu callback variant
+ * that free's the entire structure including the statistics
+ * percpu variables. Only use this if the key is not a copy
+ * returned by u32_init_knode(). See u32_delete_key_rcu()
+ * for the variant that should be used with keys return from
+ * u32_init_knode()
+ */
+static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
+{
+ struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
+
+ u32_destroy_key(key->tp, key, true);
+}
+
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
{
- struct tc_u_knode **kp;
- struct tc_u_hnode *ht = key->ht_up;
+ struct tc_u_knode __rcu **kp;
+ struct tc_u_knode *pkp;
+ struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
if (ht) {
- for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) {
- if (*kp == key) {
- tcf_tree_lock(tp);
- *kp = key->next;
- tcf_tree_unlock(tp);
-
- u32_destroy_key(tp, key);
+ kp = &ht->ht[TC_U32_HASH(key->handle)];
+ for (pkp = rtnl_dereference(*kp); pkp;
+ kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
+ if (pkp == key) {
+ RCU_INIT_POINTER(*kp, key->next);
+
+ tcf_unbind_filter(tp, &key->res);
+ call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
return 0;
}
}
@@ -375,10 +431,11 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
unsigned int h;
for (h = 0; h <= ht->divisor; h++) {
- while ((n = ht->ht[h]) != NULL) {
- ht->ht[h] = n->next;
-
- u32_destroy_key(tp, n);
+ while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
+ RCU_INIT_POINTER(ht->ht[h],
+ rtnl_dereference(n->next));
+ tcf_unbind_filter(tp, &n->res);
+ call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
}
}
}
@@ -386,28 +443,31 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
{
struct tc_u_common *tp_c = tp->data;
- struct tc_u_hnode **hn;
+ struct tc_u_hnode __rcu **hn;
+ struct tc_u_hnode *phn;
WARN_ON(ht->refcnt);
u32_clear_hnode(tp, ht);
- for (hn = &tp_c->hlist; *hn; hn = &(*hn)->next) {
- if (*hn == ht) {
- *hn = ht->next;
- kfree(ht);
+ hn = &tp_c->hlist;
+ for (phn = rtnl_dereference(*hn);
+ phn;
+ hn = &phn->next, phn = rtnl_dereference(*hn)) {
+ if (phn == ht) {
+ RCU_INIT_POINTER(*hn, ht->next);
+ kfree_rcu(ht, rcu);
return 0;
}
}
- WARN_ON(1);
return -ENOENT;
}
static void u32_destroy(struct tcf_proto *tp)
{
struct tc_u_common *tp_c = tp->data;
- struct tc_u_hnode *root_ht = tp->root;
+ struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
WARN_ON(root_ht == NULL);
@@ -419,17 +479,16 @@ static void u32_destroy(struct tcf_proto *tp)
tp->q->u32_node = NULL;
- for (ht = tp_c->hlist; ht; ht = ht->next) {
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next)) {
ht->refcnt--;
u32_clear_hnode(tp, ht);
}
- while ((ht = tp_c->hlist) != NULL) {
- tp_c->hlist = ht->next;
-
- WARN_ON(ht->refcnt != 0);
-
- kfree(ht);
+ while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
+ RCU_INIT_POINTER(tp_c->hlist, ht->next);
+ kfree_rcu(ht, rcu);
}
kfree(tp_c);
@@ -441,6 +500,7 @@ static void u32_destroy(struct tcf_proto *tp)
static int u32_delete(struct tcf_proto *tp, unsigned long arg)
{
struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
+ struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
if (ht == NULL)
return 0;
@@ -448,7 +508,7 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
if (TC_U32_KEY(ht->handle))
return u32_delete_key(tp, (struct tc_u_knode *)ht);
- if (tp->root == ht)
+ if (root_ht == ht)
return -EINVAL;
if (ht->refcnt == 1) {
@@ -471,7 +531,9 @@ static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
if (!bitmap)
return handle | 0xFFF;
- for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
+ for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]);
+ n;
+ n = rtnl_dereference(n->next))
set_bit(TC_U32_NODE(n->handle), bitmap);
i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
@@ -521,10 +583,8 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
ht_down->refcnt++;
}
- tcf_tree_lock(tp);
- ht_old = n->ht_down;
- n->ht_down = ht_down;
- tcf_tree_unlock(tp);
+ ht_old = rtnl_dereference(n->ht_down);
+ rcu_assign_pointer(n->ht_down, ht_down);
if (ht_old)
ht_old->refcnt--;
@@ -547,10 +607,86 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
return 0;
errout:
- tcf_exts_destroy(tp, &e);
+ tcf_exts_destroy(&e);
return err;
}
+static void u32_replace_knode(struct tcf_proto *tp,
+ struct tc_u_common *tp_c,
+ struct tc_u_knode *n)
+{
+ struct tc_u_knode __rcu **ins;
+ struct tc_u_knode *pins;
+ struct tc_u_hnode *ht;
+
+ if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
+ ht = rtnl_dereference(tp->root);
+ else
+ ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));
+
+ ins = &ht->ht[TC_U32_HASH(n->handle)];
+
+ /* The node must always exist for it to be replaced if this is not the
+ * case then something went very wrong elsewhere.
+ */
+ for (pins = rtnl_dereference(*ins); ;
+ ins = &pins->next, pins = rtnl_dereference(*ins))
+ if (pins->handle == n->handle)
+ break;
+
+ RCU_INIT_POINTER(n->next, pins->next);
+ rcu_assign_pointer(*ins, n);
+}
+
+static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
+ struct tc_u_knode *n)
+{
+ struct tc_u_knode *new;
+ struct tc_u32_sel *s = &n->sel;
+
+ new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
+ GFP_KERNEL);
+
+ if (!new)
+ return NULL;
+
+ RCU_INIT_POINTER(new->next, n->next);
+ new->handle = n->handle;
+ RCU_INIT_POINTER(new->ht_up, n->ht_up);
+
+#ifdef CONFIG_NET_CLS_IND
+ new->ifindex = n->ifindex;
+#endif
+ new->fshift = n->fshift;
+ new->res = n->res;
+ RCU_INIT_POINTER(new->ht_down, n->ht_down);
+
+ /* bump reference count as long as we hold pointer to structure */
+ if (new->ht_down)
+ new->ht_down->refcnt++;
+
+#ifdef CONFIG_CLS_U32_PERF
+ /* Statistics may be incremented by readers during update
+ * so we must keep them in tact. When the node is later destroyed
+ * a special destroy call must be made to not free the pf memory.
+ */
+ new->pf = n->pf;
+#endif
+
+#ifdef CONFIG_CLS_U32_MARK
+ new->val = n->val;
+ new->mask = n->mask;
+ /* Similarly success statistics must be moved as pointers */
+ new->pcpu_success = n->pcpu_success;
+#endif
+ new->tp = tp;
+ memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
+
+ tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE);
+
+ return new;
+}
+
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca,
@@ -564,6 +700,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
struct nlattr *tb[TCA_U32_MAX + 1];
u32 htid;
int err;
+#ifdef CONFIG_CLS_U32_PERF
+ size_t size;
+#endif
if (opt == NULL)
return handle ? -EINVAL : 0;
@@ -574,11 +713,28 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
n = (struct tc_u_knode *)*arg;
if (n) {
+ struct tc_u_knode *new;
+
if (TC_U32_KEY(n->handle) == 0)
return -EINVAL;
- return u32_set_parms(net, tp, base, n->ht_up, n, tb,
- tca[TCA_RATE], ovr);
+ new = u32_init_knode(tp, n);
+ if (!new)
+ return -ENOMEM;
+
+ err = u32_set_parms(net, tp, base,
+ rtnl_dereference(n->ht_up), new, tb,
+ tca[TCA_RATE], ovr);
+
+ if (err) {
+ u32_destroy_key(tp, new, false);
+ return err;
+ }
+
+ u32_replace_knode(tp, tp_c, new);
+ tcf_unbind_filter(tp, &n->res);
+ call_rcu(&n->rcu, u32_delete_key_rcu);
+ return 0;
}
if (tb[TCA_U32_DIVISOR]) {
@@ -601,8 +757,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
ht->divisor = divisor;
ht->handle = handle;
ht->prio = tp->prio;
- ht->next = tp_c->hlist;
- tp_c->hlist = ht;
+ RCU_INIT_POINTER(ht->next, tp_c->hlist);
+ rcu_assign_pointer(tp_c->hlist, ht);
*arg = (unsigned long)ht;
return 0;
}
@@ -610,7 +766,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (tb[TCA_U32_HASH]) {
htid = nla_get_u32(tb[TCA_U32_HASH]);
if (TC_U32_HTID(htid) == TC_U32_ROOT) {
- ht = tp->root;
+ ht = rtnl_dereference(tp->root);
htid = ht->handle;
} else {
ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
@@ -618,7 +774,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
} else {
- ht = tp->root;
+ ht = rtnl_dereference(tp->root);
htid = ht->handle;
}
@@ -642,46 +798,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -ENOBUFS;
#ifdef CONFIG_CLS_U32_PERF
- n->pf = kzalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL);
- if (n->pf == NULL) {
+ size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
+ n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
+ if (!n->pf) {
kfree(n);
return -ENOBUFS;
}
#endif
memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
- n->ht_up = ht;
+ RCU_INIT_POINTER(n->ht_up, ht);
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
+ n->tp = tp;
#ifdef CONFIG_CLS_U32_MARK
+ n->pcpu_success = alloc_percpu(u32);
+ if (!n->pcpu_success) {
+ err = -ENOMEM;
+ goto errout;
+ }
+
if (tb[TCA_U32_MARK]) {
struct tc_u32_mark *mark;
mark = nla_data(tb[TCA_U32_MARK]);
- memcpy(&n->mark, mark, sizeof(struct tc_u32_mark));
- n->mark.success = 0;
+ n->val = mark->val;
+ n->mask = mark->mask;
}
#endif
err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
if (err == 0) {
- struct tc_u_knode **ins;
- for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
- if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle))
+ struct tc_u_knode __rcu **ins;
+ struct tc_u_knode *pins;
+
+ ins = &ht->ht[TC_U32_HASH(handle)];
+ for (pins = rtnl_dereference(*ins); pins;
+ ins = &pins->next, pins = rtnl_dereference(*ins))
+ if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
break;
- n->next = *ins;
- tcf_tree_lock(tp);
- *ins = n;
- tcf_tree_unlock(tp);
+ RCU_INIT_POINTER(n->next, pins);
+ rcu_assign_pointer(*ins, n);
*arg = (unsigned long)n;
return 0;
}
+
+#ifdef CONFIG_CLS_U32_MARK
+ free_percpu(n->pcpu_success);
+errout:
+#endif
+
#ifdef CONFIG_CLS_U32_PERF
- kfree(n->pf);
+ free_percpu(n->pf);
#endif
kfree(n);
return err;
@@ -697,7 +869,9 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
if (arg->stop)
return;
- for (ht = tp_c->hlist; ht; ht = ht->next) {
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next)) {
if (ht->prio != tp->prio)
continue;
if (arg->count >= arg->skip) {
@@ -708,7 +882,9 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
arg->count++;
for (h = 0; h <= ht->divisor; h++) {
- for (n = ht->ht[h]; n; n = n->next) {
+ for (n = rtnl_dereference(ht->ht[h]);
+ n;
+ n = rtnl_dereference(n->next)) {
if (arg->count < arg->skip) {
arg->count++;
continue;
@@ -727,6 +903,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct tc_u_knode *n = (struct tc_u_knode *)fh;
+ struct tc_u_hnode *ht_up, *ht_down;
struct nlattr *nest;
if (n == NULL)
@@ -745,11 +922,18 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
goto nla_put_failure;
} else {
+#ifdef CONFIG_CLS_U32_PERF
+ struct tc_u32_pcnt *gpf;
+ int cpu;
+#endif
+
if (nla_put(skb, TCA_U32_SEL,
sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
&n->sel))
goto nla_put_failure;
- if (n->ht_up) {
+
+ ht_up = rtnl_dereference(n->ht_up);
+ if (ht_up) {
u32 htid = n->handle & 0xFFFFF000;
if (nla_put_u32(skb, TCA_U32_HASH, htid))
goto nla_put_failure;
@@ -757,14 +941,28 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (n->res.classid &&
nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
goto nla_put_failure;
- if (n->ht_down &&
- nla_put_u32(skb, TCA_U32_LINK, n->ht_down->handle))
+
+ ht_down = rtnl_dereference(n->ht_down);
+ if (ht_down &&
+ nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
goto nla_put_failure;
#ifdef CONFIG_CLS_U32_MARK
- if ((n->mark.val || n->mark.mask) &&
- nla_put(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark))
- goto nla_put_failure;
+ if ((n->val || n->mask)) {
+ struct tc_u32_mark mark = {.val = n->val,
+ .mask = n->mask,
+ .success = 0};
+ int cpum;
+
+ for_each_possible_cpu(cpum) {
+ __u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
+
+ mark.success += cnt;
+ }
+
+ if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
+ goto nla_put_failure;
+ }
#endif
if (tcf_exts_dump(skb, &n->exts) < 0)
@@ -779,10 +977,29 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
}
#endif
#ifdef CONFIG_CLS_U32_PERF
+ gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
+ n->sel.nkeys * sizeof(u64),
+ GFP_KERNEL);
+ if (!gpf)
+ goto nla_put_failure;
+
+ for_each_possible_cpu(cpu) {
+ int i;
+ struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);
+
+ gpf->rcnt += pf->rcnt;
+ gpf->rhit += pf->rhit;
+ for (i = 0; i < n->sel.nkeys; i++)
+ gpf->kcnts[i] += pf->kcnts[i];
+ }
+
if (nla_put(skb, TCA_U32_PCNT,
sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
- n->pf))
+ gpf)) {
+ kfree(gpf);
goto nla_put_failure;
+ }
+ kfree(gpf);
#endif
}