summaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig27
-rw-r--r--net/sched/Makefile3
-rw-r--r--net/sched/act_api.c36
-rw-r--r--net/sched/act_bpf.c5
-rw-r--r--net/sched/act_csum.c36
-rw-r--r--net/sched/act_gact.c3
-rw-r--r--net/sched/act_ife.c26
-rw-r--r--net/sched/act_meta_skbtcindex.c79
-rw-r--r--net/sched/act_mirred.c11
-rw-r--r--net/sched/act_police.c12
-rw-r--r--net/sched/act_skbmod.c301
-rw-r--r--net/sched/act_tunnel_key.c342
-rw-r--r--net/sched/act_vlan.c29
-rw-r--r--net/sched/cls_api.c18
-rw-r--r--net/sched/cls_bpf.c126
-rw-r--r--net/sched/cls_flow.c27
-rw-r--r--net/sched/cls_flower.c124
-rw-r--r--net/sched/cls_fw.c10
-rw-r--r--net/sched/cls_route.c12
-rw-r--r--net/sched/cls_tcindex.c12
-rw-r--r--net/sched/cls_u32.c30
-rw-r--r--net/sched/sch_api.c41
-rw-r--r--net/sched/sch_codel.c4
-rw-r--r--net/sched/sch_fifo.c4
-rw-r--r--net/sched/sch_fq.c71
-rw-r--r--net/sched/sch_generic.c28
-rw-r--r--net/sched/sch_htb.c24
-rw-r--r--net/sched/sch_netem.c20
-rw-r--r--net/sched/sch_pie.c4
29 files changed, 1294 insertions, 171 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index ccf931b3b94c..87956a768d1b 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -749,6 +749,17 @@ config NET_ACT_CONNMARK
To compile this code as a module, choose M here: the
module will be called act_connmark.
+config NET_ACT_SKBMOD
+ tristate "skb data modification action"
+ depends on NET_CLS_ACT
+ ---help---
+ Say Y here to allow modification of skb data
+
+ If unsure, say N.
+
+ To compile this code as a module, choose M here: the
+ module will be called act_skbmod.
+
config NET_ACT_IFE
tristate "Inter-FE action based on IETF ForCES InterFE LFB"
depends on NET_CLS_ACT
@@ -761,6 +772,17 @@ config NET_ACT_IFE
To compile this code as a module, choose M here: the
module will be called act_ife.
+config NET_ACT_TUNNEL_KEY
+ tristate "IP tunnel metadata manipulation"
+ depends on NET_CLS_ACT
+ ---help---
+ Say Y here to set/release ip tunnel metadata.
+
+ If unsure, say N.
+
+ To compile this code as a module, choose M here: the
+ module will be called act_tunnel_key.
+
config NET_IFE_SKBMARK
tristate "Support to encoding decoding skb mark on IFE action"
depends on NET_ACT_IFE
@@ -771,6 +793,11 @@ config NET_IFE_SKBPRIO
depends on NET_ACT_IFE
---help---
+config NET_IFE_SKBTCINDEX
+ tristate "Support to encoding decoding skb tcindex on IFE action"
+ depends on NET_ACT_IFE
+ ---help---
+
config NET_CLS_IND
bool "Incoming device classification"
depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index ae088a5a9d95..4bdda3634e0b 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -19,9 +19,12 @@ obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o
obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o
obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o
+obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o
obj-$(CONFIG_NET_ACT_IFE) += act_ife.o
obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o
obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o
+obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o
+obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index d09d0687594b..c9102172ce3b 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -592,9 +592,19 @@ err_out:
return ERR_PTR(err);
}
-int tcf_action_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, char *name, int ovr,
- int bind, struct list_head *actions)
+static void cleanup_a(struct list_head *actions, int ovr)
+{
+ struct tc_action *a;
+
+ if (!ovr)
+ return;
+
+ list_for_each_entry(a, actions, list)
+ a->tcfa_refcnt--;
+}
+
+int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est,
+ char *name, int ovr, int bind, struct list_head *actions)
{
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
@@ -612,8 +622,15 @@ int tcf_action_init(struct net *net, struct nlattr *nla,
goto err;
}
act->order = i;
+ if (ovr)
+ act->tcfa_refcnt++;
list_add_tail(&act->list, actions);
}
+
+ /* Remove the temp refcnt which was necessary to protect against
+ * destroying an existing action which was being replaced
+ */
+ cleanup_a(actions, ovr);
return 0;
err:
@@ -883,6 +900,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
goto err;
}
act->order = i;
+ if (event == RTM_GETACTION)
+ act->tcfa_refcnt++;
list_add_tail(&act->list, &actions);
}
@@ -923,9 +942,8 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
return err;
}
-static int
-tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
- u32 portid, int ovr)
+static int tcf_action_add(struct net *net, struct nlattr *nla,
+ struct nlmsghdr *n, u32 portid, int ovr)
{
int ret = 0;
LIST_HEAD(actions);
@@ -988,8 +1006,7 @@ replay:
return ret;
}
-static struct nlattr *
-find_dump_kind(const struct nlmsghdr *n)
+static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
{
struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
@@ -1016,8 +1033,7 @@ find_dump_kind(const struct nlmsghdr *n)
return kind;
}
-static int
-tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
+static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct nlmsghdr *nlh;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index bfa870731e74..1d3960033f61 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -39,13 +39,10 @@ static struct tc_action_ops act_bpf_ops;
static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
struct tcf_result *res)
{
+ bool at_ingress = skb_at_tc_ingress(skb);
struct tcf_bpf *prog = to_bpf(act);
struct bpf_prog *filter;
int action, filter_res;
- bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
-
- if (unlikely(!skb_mac_header_was_set(skb)))
- return TC_ACT_UNSPEC;
tcf_lastuse_update(&prog->tcf_tm);
bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b5dbf633a863..e0defcef376d 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -116,8 +116,8 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
return (void *)(skb_network_header(skb) + ihl);
}
-static int tcf_csum_ipv4_icmp(struct sk_buff *skb,
- unsigned int ihl, unsigned int ipl)
+static int tcf_csum_ipv4_icmp(struct sk_buff *skb, unsigned int ihl,
+ unsigned int ipl)
{
struct icmphdr *icmph;
@@ -152,8 +152,8 @@ static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
return 1;
}
-static int tcf_csum_ipv6_icmp(struct sk_buff *skb,
- unsigned int ihl, unsigned int ipl)
+static int tcf_csum_ipv6_icmp(struct sk_buff *skb, unsigned int ihl,
+ unsigned int ipl)
{
struct icmp6hdr *icmp6h;
const struct ipv6hdr *ip6h;
@@ -174,8 +174,8 @@ static int tcf_csum_ipv6_icmp(struct sk_buff *skb,
return 1;
}
-static int tcf_csum_ipv4_tcp(struct sk_buff *skb,
- unsigned int ihl, unsigned int ipl)
+static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl,
+ unsigned int ipl)
{
struct tcphdr *tcph;
const struct iphdr *iph;
@@ -195,8 +195,8 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb,
return 1;
}
-static int tcf_csum_ipv6_tcp(struct sk_buff *skb,
- unsigned int ihl, unsigned int ipl)
+static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl,
+ unsigned int ipl)
{
struct tcphdr *tcph;
const struct ipv6hdr *ip6h;
@@ -217,8 +217,8 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb,
return 1;
}
-static int tcf_csum_ipv4_udp(struct sk_buff *skb,
- unsigned int ihl, unsigned int ipl, int udplite)
+static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl,
+ unsigned int ipl, int udplite)
{
struct udphdr *udph;
const struct iphdr *iph;
@@ -270,8 +270,8 @@ ignore_obscure_skb:
return 1;
}
-static int tcf_csum_ipv6_udp(struct sk_buff *skb,
- unsigned int ihl, unsigned int ipl, int udplite)
+static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl,
+ unsigned int ipl, int udplite)
{
struct udphdr *udph;
const struct ipv6hdr *ip6h;
@@ -380,8 +380,8 @@ fail:
return 0;
}
-static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh,
- unsigned int ixhl, unsigned int *pl)
+static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, unsigned int ixhl,
+ unsigned int *pl)
{
int off, len, optlen;
unsigned char *xh = (void *)ip6xh;
@@ -494,8 +494,8 @@ fail:
return 0;
}
-static int tcf_csum(struct sk_buff *skb,
- const struct tc_action *a, struct tcf_result *res)
+static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_csum *p = to_tcf_csum(a);
int action;
@@ -531,8 +531,8 @@ drop:
return TC_ACT_SHOT;
}
-static int tcf_csum_dump(struct sk_buff *skb,
- struct tc_action *a, int bind, int ref)
+static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
+ int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_csum *p = to_tcf_csum(a);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e24a4093d6f6..e0aa30f83c6c 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -156,7 +156,8 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
int action = READ_ONCE(gact->tcf_action);
struct tcf_t *tm = &gact->tcf_tm;
- _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, packets);
+ _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes,
+ packets);
if (action == TC_ACT_SHOT)
this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index e87cd81315e1..ccf7b4b655fe 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -63,6 +63,23 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval)
}
EXPORT_SYMBOL_GPL(ife_tlv_meta_encode);
+int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi)
+{
+ u16 edata = 0;
+
+ if (mi->metaval)
+ edata = *(u16 *)mi->metaval;
+ else if (metaval)
+ edata = metaval;
+
+ if (!edata) /* will not encode */
+ return 0;
+
+ edata = htons(edata);
+ return ife_tlv_meta_encode(skbdata, mi->metaid, 2, &edata);
+}
+EXPORT_SYMBOL_GPL(ife_encode_meta_u16);
+
int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi)
{
if (mi->metaval)
@@ -81,6 +98,15 @@ int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi)
}
EXPORT_SYMBOL_GPL(ife_check_meta_u32);
+int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi)
+{
+ if (metaval || mi->metaval)
+ return 8; /* T+L+(V) == 2+2+(2+2bytepad) */
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ife_check_meta_u16);
+
int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi)
{
u32 edata = metaval;
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
new file mode 100644
index 000000000000..3b35774ce890
--- /dev/null
+++ b/net/sched/act_meta_skbtcindex.c
@@ -0,0 +1,79 @@
+/*
+ * net/sched/act_meta_tc_index.c IFE skb->tc_index metadata module
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * copyright Jamal Hadi Salim (2016)
+ *
+*/
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <uapi/linux/tc_act/tc_ife.h>
+#include <net/tc_act/tc_ife.h>
+#include <linux/rtnetlink.h>
+
+static int skbtcindex_encode(struct sk_buff *skb, void *skbdata,
+ struct tcf_meta_info *e)
+{
+ u32 ifetc_index = skb->tc_index;
+
+ return ife_encode_meta_u16(ifetc_index, skbdata, e);
+}
+
+static int skbtcindex_decode(struct sk_buff *skb, void *data, u16 len)
+{
+ u16 ifetc_index = *(u16 *)data;
+
+ skb->tc_index = ntohs(ifetc_index);
+ return 0;
+}
+
+static int skbtcindex_check(struct sk_buff *skb, struct tcf_meta_info *e)
+{
+ return ife_check_meta_u16(skb->tc_index, e);
+}
+
+static struct tcf_meta_ops ife_skbtcindex_ops = {
+ .metaid = IFE_META_TCINDEX,
+ .metatype = NLA_U16,
+ .name = "tc_index",
+ .synopsis = "skb tc_index 16 bit metadata",
+ .check_presence = skbtcindex_check,
+ .encode = skbtcindex_encode,
+ .decode = skbtcindex_decode,
+ .get = ife_get_meta_u16,
+ .alloc = ife_alloc_meta_u16,
+ .release = ife_release_meta_gen,
+ .validate = ife_validate_meta_u16,
+ .owner = THIS_MODULE,
+};
+
+static int __init ifetc_index_init_module(void)
+{
+ return register_ife_op(&ife_skbtcindex_ops);
+}
+
+static void __exit ifetc_index_cleanup_module(void)
+{
+ unregister_ife_op(&ife_skbtcindex_ops);
+}
+
+module_init(ifetc_index_init_module);
+module_exit(ifetc_index_cleanup_module);
+
+MODULE_AUTHOR("Jamal Hadi Salim(2016)");
+MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6038c85d92f5..667dc382df82 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -204,7 +204,15 @@ out:
return retval;
}
-static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
+ u64 lastuse)
+{
+ tcf_lastuse_update(&a->tcfa_tm);
+ _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+}
+
+static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
+ int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_mirred *m = to_mirred(a);
@@ -280,6 +288,7 @@ static struct tc_action_ops act_mirred_ops = {
.type = TCA_ACT_MIRRED,
.owner = THIS_MODULE,
.act = tcf_mirred,
+ .stats_update = tcf_stats_update,
.dump = tcf_mirred_dump,
.cleanup = tcf_mirred_release,
.init = tcf_mirred_init,
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 8a3be1d99775..d1bd248fe146 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -249,6 +249,8 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
police->tcfp_t_c = now;
police->tcfp_toks = toks;
police->tcfp_ptoks = ptoks;
+ if (police->tcfp_result == TC_ACT_SHOT)
+ police->tcf_qstats.drops++;
spin_unlock(&police->tcf_lock);
return police->tcfp_result;
}
@@ -261,8 +263,8 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
return police->tcf_action;
}
-static int
-tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_police *police = to_police(a);
@@ -347,14 +349,12 @@ static struct pernet_operations police_net_ops = {
.size = sizeof(struct tc_action_net),
};
-static int __init
-police_init_module(void)
+static int __init police_init_module(void)
{
return tcf_register_action(&act_police_ops, &police_net_ops);
}
-static void __exit
-police_cleanup_module(void)
+static void __exit police_cleanup_module(void)
{
tcf_unregister_action(&act_police_ops, &police_net_ops);
}
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
new file mode 100644
index 000000000000..e7d96381c908
--- /dev/null
+++ b/net/sched/act_skbmod.c
@@ -0,0 +1,301 @@
+/*
+ * net/sched/act_skbmod.c skb data modifier
+ *
+ * Copyright (c) 2016 Jamal Hadi Salim <jhs@mojatatu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_skbmod.h>
+#include <net/tc_act/tc_skbmod.h>
+
+#define SKBMOD_TAB_MASK 15
+
+static int skbmod_net_id;
+static struct tc_action_ops act_skbmod_ops;
+
+#define MAX_EDIT_LEN ETH_HLEN
+static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_skbmod *d = to_skbmod(a);
+ int action;
+ struct tcf_skbmod_params *p;
+ u64 flags;
+ int err;
+
+ tcf_lastuse_update(&d->tcf_tm);
+ bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+
+ /* XXX: if you are going to edit more fields beyond ethernet header
+ * (example when you add IP header replacement or vlan swap)
+ * then MAX_EDIT_LEN needs to change appropriately
+ */
+ err = skb_ensure_writable(skb, MAX_EDIT_LEN);
+ if (unlikely(err)) { /* best policy is to drop on the floor */
+ qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+ return TC_ACT_SHOT;
+ }
+
+ rcu_read_lock();
+ action = READ_ONCE(d->tcf_action);
+ if (unlikely(action == TC_ACT_SHOT)) {
+ qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+ rcu_read_unlock();
+ return action;
+ }
+
+ p = rcu_dereference(d->skbmod_p);
+ flags = p->flags;
+ if (flags & SKBMOD_F_DMAC)
+ ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
+ if (flags & SKBMOD_F_SMAC)
+ ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src);
+ if (flags & SKBMOD_F_ETYPE)
+ eth_hdr(skb)->h_proto = p->eth_type;
+ rcu_read_unlock();
+
+ if (flags & SKBMOD_F_SWAPMAC) {
+ u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */
+ /*XXX: I am sure we can come up with more efficient swapping*/
+ ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest);
+ ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source);
+ ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr);
+ }
+
+ return action;
+}
+
+static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
+ [TCA_SKBMOD_PARMS] = { .len = sizeof(struct tc_skbmod) },
+ [TCA_SKBMOD_DMAC] = { .len = ETH_ALEN },
+ [TCA_SKBMOD_SMAC] = { .len = ETH_ALEN },
+ [TCA_SKBMOD_ETYPE] = { .type = NLA_U16 },
+};
+
+static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action **a,
+ int ovr, int bind)
+{
+ struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+ struct nlattr *tb[TCA_SKBMOD_MAX + 1];
+ struct tcf_skbmod_params *p, *p_old;
+ struct tc_skbmod *parm;
+ struct tcf_skbmod *d;
+ bool exists = false;
+ u8 *daddr = NULL;
+ u8 *saddr = NULL;
+ u16 eth_type = 0;
+ u32 lflags = 0;
+ int ret = 0, err;
+
+ if (!nla)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_SKBMOD_PARMS])
+ return -EINVAL;
+
+ if (tb[TCA_SKBMOD_DMAC]) {
+ daddr = nla_data(tb[TCA_SKBMOD_DMAC]);
+ lflags |= SKBMOD_F_DMAC;
+ }
+
+ if (tb[TCA_SKBMOD_SMAC]) {
+ saddr = nla_data(tb[TCA_SKBMOD_SMAC]);
+ lflags |= SKBMOD_F_SMAC;
+ }
+
+ if (tb[TCA_SKBMOD_ETYPE]) {
+ eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]);
+ lflags |= SKBMOD_F_ETYPE;
+ }
+
+ parm = nla_data(tb[TCA_SKBMOD_PARMS]);
+ if (parm->flags & SKBMOD_F_SWAPMAC)
+ lflags = SKBMOD_F_SWAPMAC;
+
+ exists = tcf_hash_check(tn, parm->index, a, bind);
+ if (exists && bind)
+ return 0;
+
+ if (!lflags)
+ return -EINVAL;
+
+ if (!exists) {
+ ret = tcf_hash_create(tn, parm->index, est, a,
+ &act_skbmod_ops, bind, true);
+ if (ret)
+ return ret;
+
+ ret = ACT_P_CREATED;
+ } else {
+ tcf_hash_release(*a, bind);
+ if (!ovr)
+ return -EEXIST;
+ }
+
+ d = to_skbmod(*a);
+
+ ASSERT_RTNL();
+ p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
+ if (unlikely(!p)) {
+ if (ovr)
+ tcf_hash_release(*a, bind);
+ return -ENOMEM;
+ }
+
+ p->flags = lflags;
+ d->tcf_action = parm->action;
+
+ p_old = rtnl_dereference(d->skbmod_p);
+
+ if (ovr)
+ spin_lock_bh(&d->tcf_lock);
+
+ if (lflags & SKBMOD_F_DMAC)
+ ether_addr_copy(p->eth_dst, daddr);
+ if (lflags & SKBMOD_F_SMAC)
+ ether_addr_copy(p->eth_src, saddr);
+ if (lflags & SKBMOD_F_ETYPE)
+ p->eth_type = htons(eth_type);
+
+ rcu_assign_pointer(d->skbmod_p, p);
+ if (ovr)
+ spin_unlock_bh(&d->tcf_lock);
+
+ if (p_old)
+ kfree_rcu(p_old, rcu);
+
+ if (ret == ACT_P_CREATED)
+ tcf_hash_insert(tn, *a);
+ return ret;
+}
+
+static void tcf_skbmod_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_skbmod *d = to_skbmod(a);
+ struct tcf_skbmod_params *p;
+
+ p = rcu_dereference_protected(d->skbmod_p, 1);
+ kfree_rcu(p, rcu);
+}
+
+static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ struct tcf_skbmod *d = to_skbmod(a);
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_skbmod_params *p = rtnl_dereference(d->skbmod_p);
+ struct tc_skbmod opt = {
+ .index = d->tcf_index,
+ .refcnt = d->tcf_refcnt - ref,
+ .bindcnt = d->tcf_bindcnt - bind,
+ .action = d->tcf_action,
+ };
+ struct tcf_t t;
+
+ opt.flags = p->flags;
+ if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+ if ((p->flags & SKBMOD_F_DMAC) &&
+ nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst))
+ goto nla_put_failure;
+ if ((p->flags & SKBMOD_F_SMAC) &&
+ nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src))
+ goto nla_put_failure;
+ if ((p->flags & SKBMOD_F_ETYPE) &&
+ nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type)))
+ goto nla_put_failure;
+
+ tcf_tm_dump(&t, &d->tcf_tm);
+ if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD))
+ goto nla_put_failure;
+
+ return skb->len;
+nla_put_failure:
+ rcu_read_unlock();
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ const struct tc_action_ops *ops)
+{
+ struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, ops);
+}
+
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
+static struct tc_action_ops act_skbmod_ops = {
+ .kind = "skbmod",
+ .type = TCA_ACT_SKBMOD,
+ .owner = THIS_MODULE,
+ .act = tcf_skbmod_run,
+ .dump = tcf_skbmod_dump,
+ .init = tcf_skbmod_init,
+ .cleanup = tcf_skbmod_cleanup,
+ .walk = tcf_skbmod_walker,
+ .lookup = tcf_skbmod_search,
+ .size = sizeof(struct tcf_skbmod),
+};
+
+static __net_init int skbmod_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+ return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK);
+}
+
+static void __net_exit skbmod_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations skbmod_net_ops = {
+ .init = skbmod_init_net,
+ .exit = skbmod_exit_net,
+ .id = &skbmod_net_id,
+ .size = sizeof(struct tc_action_net),
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
+MODULE_DESCRIPTION("SKB data mod-ing");
+MODULE_LICENSE("GPL");
+
+static int __init skbmod_init_module(void)
+{
+ return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops);
+}
+
+static void __exit skbmod_cleanup_module(void)
+{
+ tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops);
+}
+
+module_init(skbmod_init_module);
+module_exit(skbmod_cleanup_module);
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
new file mode 100644
index 000000000000..af47bdf2f483
--- /dev/null
+++ b/net/sched/act_tunnel_key.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2016, Amir Vadai <amir@vadai.me>
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/dst.h>
+#include <net/dst_metadata.h>
+
+#include <linux/tc_act/tc_tunnel_key.h>
+#include <net/tc_act/tc_tunnel_key.h>
+
+#define TUNNEL_KEY_TAB_MASK 15
+
+static int tunnel_key_net_id;
+static struct tc_action_ops act_tunnel_key_ops;
+
+static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_tunnel_key *t = to_tunnel_key(a);
+ struct tcf_tunnel_key_params *params;
+ int action;
+
+ rcu_read_lock();
+
+ params = rcu_dereference(t->params);
+
+ tcf_lastuse_update(&t->tcf_tm);
+ bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
+ action = params->action;
+
+ switch (params->tcft_action) {
+ case TCA_TUNNEL_KEY_ACT_RELEASE:
+ skb_dst_drop(skb);
+ break;
+ case TCA_TUNNEL_KEY_ACT_SET:
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst_clone(&params->tcft_enc_metadata->dst));
+ break;
+ default:
+ WARN_ONCE(1, "Bad tunnel_key action %d.\n",
+ params->tcft_action);
+ break;
+ }
+
+ rcu_read_unlock();
+
+ return action;
+}
+
+static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
+ [TCA_TUNNEL_KEY_PARMS] = { .len = sizeof(struct tc_tunnel_key) },
+ [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 },
+ [TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NLA_U32 },
+ [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
+ [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+ [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 },
+};
+
+static int tunnel_key_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action **a,
+ int ovr, int bind)
+{
+ struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+ struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
+ struct tcf_tunnel_key_params *params_old;
+ struct tcf_tunnel_key_params *params_new;
+ struct metadata_dst *metadata = NULL;
+ struct tc_tunnel_key *parm;
+ struct tcf_tunnel_key *t;
+ bool exists = false;
+ __be64 key_id;
+ int ret = 0;
+ int err;
+
+ if (!nla)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_TUNNEL_KEY_PARMS])
+ return -EINVAL;
+
+ parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
+ exists = tcf_hash_check(tn, parm->index, a, bind);
+ if (exists && bind)
+ return 0;
+
+ switch (parm->t_action) {
+ case TCA_TUNNEL_KEY_ACT_RELEASE:
+ break;
+ case TCA_TUNNEL_KEY_ACT_SET:
+ if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]));
+
+ if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
+ tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
+ __be32 saddr;
+ __be32 daddr;
+
+ saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]);
+ daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
+
+ metadata = __ip_tun_set_dst(saddr, daddr, 0, 0,
+ TUNNEL_KEY, key_id, 0);
+ } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
+ tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+
+ saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
+ daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
+
+ metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0,
+ TUNNEL_KEY, key_id, 0);
+ }
+
+ if (!metadata) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
+ break;
+ default:
+ goto err_out;
+ }
+
+ if (!exists) {
+ ret = tcf_hash_create(tn, parm->index, est, a,
+ &act_tunnel_key_ops, bind, true);
+ if (ret)
+ return ret;
+
+ ret = ACT_P_CREATED;
+ } else {
+ tcf_hash_release(*a, bind);
+ if (!ovr)
+ return -EEXIST;
+ }
+
+ t = to_tunnel_key(*a);
+
+ ASSERT_RTNL();
+ params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
+ if (unlikely(!params_new)) {
+ if (ret == ACT_P_CREATED)
+ tcf_hash_release(*a, bind);
+ return -ENOMEM;
+ }
+
+ params_old = rtnl_dereference(t->params);
+
+ params_new->action = parm->action;
+ params_new->tcft_action = parm->t_action;
+ params_new->tcft_enc_metadata = metadata;
+
+ rcu_assign_pointer(t->params, params_new);
+
+ if (params_old)
+ kfree_rcu(params_old, rcu);
+
+ if (ret == ACT_P_CREATED)
+ tcf_hash_insert(tn, *a);
+
+ return ret;
+
+err_out:
+ if (exists)
+ tcf_hash_release(*a, bind);
+ return ret;
+}
+
+static void tunnel_key_release(struct tc_action *a, int bind)
+{
+ struct tcf_tunnel_key *t = to_tunnel_key(a);
+ struct tcf_tunnel_key_params *params;
+
+ params = rcu_dereference_protected(t->params, 1);
+
+ if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
+ dst_release(&params->tcft_enc_metadata->dst);
+
+ kfree_rcu(params, rcu);
+}
+
+static int tunnel_key_dump_addresses(struct sk_buff *skb,
+ const struct ip_tunnel_info *info)
+{
+ unsigned short family = ip_tunnel_info_af(info);
+
+ if (family == AF_INET) {
+ __be32 saddr = info->key.u.ipv4.src;
+ __be32 daddr = info->key.u.ipv4.dst;
+
+ if (!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_SRC, saddr) &&
+ !nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_DST, daddr))
+ return 0;
+ }
+
+ if (family == AF_INET6) {
+ const struct in6_addr *saddr6 = &info->key.u.ipv6.src;
+ const struct in6_addr *daddr6 = &info->key.u.ipv6.dst;
+
+ if (!nla_put_in6_addr(skb,
+ TCA_TUNNEL_KEY_ENC_IPV6_SRC, saddr6) &&
+ !nla_put_in6_addr(skb,
+ TCA_TUNNEL_KEY_ENC_IPV6_DST, daddr6))
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_tunnel_key *t = to_tunnel_key(a);
+ struct tcf_tunnel_key_params *params;
+ struct tc_tunnel_key opt = {
+ .index = t->tcf_index,
+ .refcnt = t->tcf_refcnt - ref,
+ .bindcnt = t->tcf_bindcnt - bind,
+ };
+ struct tcf_t tm;
+
+ params = rtnl_dereference(t->params);
+
+ opt.t_action = params->tcft_action;
+ opt.action = params->action;
+
+ if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
+ struct ip_tunnel_key *key =
+ &params->tcft_enc_metadata->u.tun_info.key;
+ __be32 key_id = tunnel_id_to_key32(key->tun_id);
+
+ if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) ||
+ tunnel_key_dump_addresses(skb,
+ &params->tcft_enc_metadata->u.tun_info))
+ goto nla_put_failure;
+ }
+
+ tcf_tm_dump(&tm, &t->tcf_tm);
+ if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm),
+ &tm, TCA_TUNNEL_KEY_PAD))
+ goto nla_put_failure;
+
+ return skb->len;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ const struct tc_action_ops *ops)
+{
+ struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, ops);
+}
+
+static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
+static struct tc_action_ops act_tunnel_key_ops = {
+ .kind = "tunnel_key",
+ .type = TCA_ACT_TUNNEL_KEY,
+ .owner = THIS_MODULE,
+ .act = tunnel_key_act,
+ .dump = tunnel_key_dump,
+ .init = tunnel_key_init,
+ .cleanup = tunnel_key_release,
+ .walk = tunnel_key_walker,
+ .lookup = tunnel_key_search,
+ .size = sizeof(struct tcf_tunnel_key),
+};
+
+static __net_init int tunnel_key_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+ return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK);
+}
+
+static void __net_exit tunnel_key_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations tunnel_key_net_ops = {
+ .init = tunnel_key_init_net,
+ .exit = tunnel_key_exit_net,
+ .id = &tunnel_key_net_id,
+ .size = sizeof(struct tc_action_net),
+};
+
+static int __init tunnel_key_init_module(void)
+{
+ return tcf_register_action(&act_tunnel_key_ops, &tunnel_key_net_ops);
+}
+
+static void __exit tunnel_key_cleanup_module(void)
+{
+ tcf_unregister_action(&act_tunnel_key_ops, &tunnel_key_net_ops);
+}
+
+module_init(tunnel_key_init_module);
+module_exit(tunnel_key_cleanup_module);
+
+MODULE_AUTHOR("Amir Vadai <amir@vadai.me>");
+MODULE_DESCRIPTION("ip tunnel manipulation actions");
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 59a8d3150ae2..a95c00b119da 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -30,6 +30,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
struct tcf_vlan *v = to_vlan(a);
int action;
int err;
+ u16 tci;
spin_lock(&v->tcf_lock);
tcf_lastuse_update(&v->tcf_tm);
@@ -48,6 +49,30 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
if (err)
goto drop;
break;
+ case TCA_VLAN_ACT_MODIFY:
+ /* No-op if no vlan tag (either hw-accel or in-payload) */
+ if (!skb_vlan_tagged(skb))
+ goto unlock;
+ /* extract existing tag (and guarantee no hw-accel tag) */
+ if (skb_vlan_tag_present(skb)) {
+ tci = skb_vlan_tag_get(skb);
+ skb->vlan_tci = 0;
+ } else {
+ /* in-payload vlan tag, pop it */
+ err = __skb_vlan_pop(skb, &tci);
+ if (err)
+ goto drop;
+ }
+ /* replace the vid */
+ tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid;
+ /* replace prio bits, if tcfv_push_prio specified */
+ if (v->tcfv_push_prio) {
+ tci &= ~VLAN_PRIO_MASK;
+ tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT;
+ }
+ /* put updated tci as hwaccel tag */
+ __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci);
+ break;
default:
BUG();
}
@@ -102,6 +127,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
case TCA_VLAN_ACT_POP:
break;
case TCA_VLAN_ACT_PUSH:
+ case TCA_VLAN_ACT_MODIFY:
if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
if (exists)
tcf_hash_release(*a, bind);
@@ -185,7 +211,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- if (v->tcfv_action == TCA_VLAN_ACT_PUSH &&
+ if ((v->tcfv_action == TCA_VLAN_ACT_PUSH ||
+ v->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
(nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
v->tcfv_push_proto) ||
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a7c5645373af..11da7da0b7c4 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -344,13 +344,15 @@ replay:
if (err == 0) {
struct tcf_proto *next = rtnl_dereference(tp->next);
- tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
+ tfilter_notify(net, skb, n, tp, fh,
+ RTM_DELTFILTER);
if (tcf_destroy(tp, false))
RCU_INIT_POINTER(*back, next);
}
goto errout;
case RTM_GETTFILTER:
- err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
+ err = tfilter_notify(net, skb, n, tp, fh,
+ RTM_NEWTFILTER);
goto errout;
default:
err = -EINVAL;
@@ -448,7 +450,8 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
struct net *net = sock_net(a->skb->sk);
return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
- a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER);
+ a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ RTM_NEWTFILTER);
}
/* called with RTNL */
@@ -552,7 +555,7 @@ void tcf_exts_destroy(struct tcf_exts *exts)
EXPORT_SYMBOL(tcf_exts_destroy);
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
- struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
+ struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
{
#ifdef CONFIG_NET_CLS_ACT
{
@@ -560,8 +563,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
if (exts->police && tb[exts->police]) {
act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
- "police", ovr,
- TCA_ACT_BIND);
+ "police", ovr, TCA_ACT_BIND);
if (IS_ERR(act))
return PTR_ERR(act);
@@ -573,8 +575,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
int err, i = 0;
err = tcf_action_init(net, tb[exts->action], rate_tlv,
- NULL, ovr,
- TCA_ACT_BIND, &actions);
+ NULL, ovr, TCA_ACT_BIND,
+ &actions);
if (err)
return err;
list_for_each_entry(act, &actions, list)
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 4742f415ee5b..bb1d5a487081 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -27,6 +27,8 @@ MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
MODULE_DESCRIPTION("TC BPF based classifier");
#define CLS_BPF_NAME_LEN 256
+#define CLS_BPF_SUPPORTED_GEN_FLAGS \
+ (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW)
struct cls_bpf_head {
struct list_head plist;
@@ -39,6 +41,8 @@ struct cls_bpf_prog {
struct list_head link;
struct tcf_result res;
bool exts_integrated;
+ bool offloaded;
+ u32 gen_flags;
struct tcf_exts exts;
u32 handle;
union {
@@ -54,8 +58,10 @@ struct cls_bpf_prog {
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
[TCA_BPF_CLASSID] = { .type = NLA_U32 },
[TCA_BPF_FLAGS] = { .type = NLA_U32 },
+ [TCA_BPF_FLAGS_GEN] = { .type = NLA_U32 },
[TCA_BPF_FD] = { .type = NLA_U32 },
- [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
+ [TCA_BPF_NAME] = { .type = NLA_NUL_STRING,
+ .len = CLS_BPF_NAME_LEN },
[TCA_BPF_OPS_LEN] = { .type = NLA_U16 },
[TCA_BPF_OPS] = { .type = NLA_BINARY,
.len = sizeof(struct sock_filter) * BPF_MAXINSNS },
@@ -83,9 +89,6 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct cls_bpf_prog *prog;
int ret = -1;
- if (unlikely(!skb_mac_header_was_set(skb)))
- return -1;
-
/* Needed here for accessing maps. */
rcu_read_lock();
list_for_each_entry_rcu(prog, &head->plist, link) {
@@ -93,7 +96,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
- if (at_ingress) {
+ if (tc_skip_sw(prog->gen_flags)) {
+ filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0;
+ } else if (at_ingress) {
/* It is safe to push/pull even if skb_shared() */
__skb_push(skb, skb->mac_len);
bpf_compute_data_end(skb);
@@ -140,6 +145,91 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
return !prog->bpf_ops;
}
+static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+ enum tc_clsbpf_command cmd)
+{
+ struct net_device *dev = tp->q->dev_queue->dev;
+ struct tc_cls_bpf_offload bpf_offload = {};
+ struct tc_to_netdev offload;
+
+ offload.type = TC_SETUP_CLSBPF;
+ offload.cls_bpf = &bpf_offload;
+
+ bpf_offload.command = cmd;
+ bpf_offload.exts = &prog->exts;
+ bpf_offload.prog = prog->filter;
+ bpf_offload.name = prog->bpf_name;
+ bpf_offload.exts_integrated = prog->exts_integrated;
+ bpf_offload.gen_flags = prog->gen_flags;
+
+ return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+ tp->protocol, &offload);
+}
+
+static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+ struct cls_bpf_prog *oldprog)
+{
+ struct net_device *dev = tp->q->dev_queue->dev;
+ struct cls_bpf_prog *obj = prog;
+ enum tc_clsbpf_command cmd;
+ bool skip_sw;
+ int ret;
+
+ skip_sw = tc_skip_sw(prog->gen_flags) ||
+ (oldprog && tc_skip_sw(oldprog->gen_flags));
+
+ if (oldprog && oldprog->offloaded) {
+ if (tc_should_offload(dev, tp, prog->gen_flags)) {
+ cmd = TC_CLSBPF_REPLACE;
+ } else if (!tc_skip_sw(prog->gen_flags)) {
+ obj = oldprog;
+ cmd = TC_CLSBPF_DESTROY;
+ } else {
+ return -EINVAL;
+ }
+ } else {
+ if (!tc_should_offload(dev, tp, prog->gen_flags))
+ return skip_sw ? -EINVAL : 0;
+ cmd = TC_CLSBPF_ADD;
+ }
+
+ ret = cls_bpf_offload_cmd(tp, obj, cmd);
+ if (ret)
+ return skip_sw ? ret : 0;
+
+ obj->offloaded = true;
+ if (oldprog)
+ oldprog->offloaded = false;
+
+ return 0;
+}
+
+static void cls_bpf_stop_offload(struct tcf_proto *tp,
+ struct cls_bpf_prog *prog)
+{
+ int err;
+
+ if (!prog->offloaded)
+ return;
+
+ err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+ if (err) {
+ pr_err("Stopping hardware offload failed: %d\n", err);
+ return;
+ }
+
+ prog->offloaded = false;
+}
+
+static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
+ struct cls_bpf_prog *prog)
+{
+ if (!prog->offloaded)
+ return;
+
+ cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
+}
+
static int cls_bpf_init(struct tcf_proto *tp)
{
struct cls_bpf_head *head;
@@ -179,6 +269,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
{
struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg;
+ cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
call_rcu(&prog->rcu, __cls_bpf_delete_prog);
@@ -195,6 +286,7 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
return false;
list_for_each_entry_safe(prog, tmp, &head->plist, link) {
+ cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
call_rcu(&prog->rcu, __cls_bpf_delete_prog);
@@ -304,6 +396,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
{
bool is_bpf, is_ebpf, have_exts = false;
struct tcf_exts exts;
+ u32 gen_flags = 0;
int ret;
is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
@@ -328,8 +421,17 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
}
+ if (tb[TCA_BPF_FLAGS_GEN]) {
+ gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
+ if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
+ !tc_flags_valid(gen_flags)) {
+ ret = -EINVAL;
+ goto errout;
+ }
+ }
prog->exts_integrated = have_exts;
+ prog->gen_flags = gen_flags;
ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
cls_bpf_prog_from_efd(tb, prog, tp);
@@ -412,10 +514,17 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
goto errout;
}
- ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
+ ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE],
+ ovr);
if (ret < 0)
goto errout;
+ ret = cls_bpf_offload(tp, prog, oldprog);
+ if (ret) {
+ cls_bpf_delete_prog(tp, prog);
+ return ret;
+ }
+
if (oldprog) {
list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
@@ -477,6 +586,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
tm->tcm_handle = prog->handle;
+ cls_bpf_offload_update_stats(tp, prog);
+
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
@@ -499,6 +610,9 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
goto nla_put_failure;
+ if (prog->gen_flags &&
+ nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags))
+ goto nla_put_failure;
nla_nest_end(skb, nest);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 2c1ae549edbf..e39672394c7b 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -29,7 +29,7 @@
#include <net/route.h>
#include <net/flow_dissector.h>
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
#endif
@@ -87,12 +87,14 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
-static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
+static u32 flow_get_proto(const struct sk_buff *skb,
+ const struct flow_keys *flow)
{
return flow->basic.ip_proto;
}
-static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
+static u32 flow_get_proto_src(const struct sk_buff *skb,
+ const struct flow_keys *flow)
{
if (flow->ports.ports)
return ntohs(flow->ports.src);
@@ -100,7 +102,8 @@ static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys
return addr_fold(skb->sk);
}
-static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
+static u32 flow_get_proto_dst(const struct sk_buff *skb,
+ const struct flow_keys *flow)
{
if (flow->ports.ports)
return ntohs(flow->ports.dst);
@@ -125,14 +128,14 @@ static u32 flow_get_mark(const struct sk_buff *skb)
static u32 flow_get_nfct(const struct sk_buff *skb)
{
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
return addr_fold(skb->nfct);
#else
return 0;
#endif
}
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#define CTTUPLE(skb, member) \
({ \
enum ip_conntrack_info ctinfo; \
@@ -149,7 +152,8 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
})
#endif
-static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow)
+static u32 flow_get_nfct_src(const struct sk_buff *skb,
+ const struct flow_keys *flow)
{
switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
@@ -161,7 +165,8 @@ fallback:
return flow_get_src(skb, flow);
}
-static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow)
+static u32 flow_get_nfct_dst(const struct sk_buff *skb,
+ const struct flow_keys *flow)
{
switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
@@ -173,14 +178,16 @@ fallback:
return flow_get_dst(skb, flow);
}
-static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
+static u32 flow_get_nfct_proto_src(const struct sk_buff *skb,
+ const struct flow_keys *flow)
{
return ntohs(CTTUPLE(skb, src.u.all));
fallback:
return flow_get_proto_src(skb, flow);
}
-static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
+static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb,
+ const struct flow_keys *flow)
{
return ntohs(CTTUPLE(skb, dst.u.all));
fallback:
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index cf9ad5b50889..2af09c872a1a 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -23,18 +23,26 @@
#include <net/ip.h>
#include <net/flow_dissector.h>
+#include <net/dst.h>
+#include <net/dst_metadata.h>
+
struct fl_flow_key {
int indev_ifindex;
struct flow_dissector_key_control control;
+ struct flow_dissector_key_control enc_control;
struct flow_dissector_key_basic basic;
struct flow_dissector_key_eth_addrs eth;
struct flow_dissector_key_vlan vlan;
- struct flow_dissector_key_addrs ipaddrs;
union {
struct flow_dissector_key_ipv4_addrs ipv4;
struct flow_dissector_key_ipv6_addrs ipv6;
};
struct flow_dissector_key_ports tp;
+ struct flow_dissector_key_keyid enc_key_id;
+ union {
+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
+ };
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -124,11 +132,31 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct cls_fl_filter *f;
struct fl_flow_key skb_key;
struct fl_flow_key skb_mkey;
+ struct ip_tunnel_info *info;
if (!atomic_read(&head->ht.nelems))
return -1;
fl_clear_masked_range(&skb_key, &head->mask);
+
+ info = skb_tunnel_info(skb);
+ if (info) {
+ struct ip_tunnel_key *key = &info->key;
+
+ switch (ip_tunnel_info_af(info)) {
+ case AF_INET:
+ skb_key.enc_ipv4.src = key->u.ipv4.src;
+ skb_key.enc_ipv4.dst = key->u.ipv4.dst;
+ break;
+ case AF_INET6:
+ skb_key.enc_ipv6.src = key->u.ipv6.src;
+ skb_key.enc_ipv6.dst = key->u.ipv6.dst;
+ break;
+ }
+
+ skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
+ }
+
skb_key.indev_ifindex = skb->skb_iif;
/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
* so do it rather here.
@@ -213,7 +241,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
tc.type = TC_SETUP_CLSFLOWER;
tc.cls_flower = &offload;
- err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+ err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
+ &tc);
if (tc_skip_sw(flags))
return err;
@@ -297,7 +326,19 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_VLAN_ID] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NLA_U16 },
-
+ [TCA_FLOWER_KEY_ENC_KEY_ID] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ENC_IPV4_DST] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 },
};
static void fl_set_key_val(struct nlattr **tb,
@@ -395,20 +436,54 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
if (key->basic.ip_proto == IPPROTO_TCP) {
fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
- &mask->tp.src, TCA_FLOWER_UNSPEC,
+ &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
sizeof(key->tp.src));
fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
- &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
sizeof(key->tp.dst));
} else if (key->basic.ip_proto == IPPROTO_UDP) {
fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
- &mask->tp.src, TCA_FLOWER_UNSPEC,
+ &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
sizeof(key->tp.src));
fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
- &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
sizeof(key->tp.dst));
}
+ if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
+ tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ fl_set_key_val(tb, &key->enc_ipv4.src,
+ TCA_FLOWER_KEY_ENC_IPV4_SRC,
+ &mask->enc_ipv4.src,
+ TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
+ sizeof(key->enc_ipv4.src));
+ fl_set_key_val(tb, &key->enc_ipv4.dst,
+ TCA_FLOWER_KEY_ENC_IPV4_DST,
+ &mask->enc_ipv4.dst,
+ TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
+ sizeof(key->enc_ipv4.dst));
+ }
+
+ if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] ||
+ tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) {
+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ fl_set_key_val(tb, &key->enc_ipv6.src,
+ TCA_FLOWER_KEY_ENC_IPV6_SRC,
+ &mask->enc_ipv6.src,
+ TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
+ sizeof(key->enc_ipv6.src));
+ fl_set_key_val(tb, &key->enc_ipv6.dst,
+ TCA_FLOWER_KEY_ENC_IPV6_DST,
+ &mask->enc_ipv6.dst,
+ TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
+ sizeof(key->enc_ipv6.dst));
+ }
+
+ fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID,
+ &mask->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID,
+ sizeof(key->enc_key_id.keyid));
+
return 0;
}
@@ -806,21 +881,48 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (key->basic.ip_proto == IPPROTO_TCP &&
(fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
- &mask->tp.src, TCA_FLOWER_UNSPEC,
+ &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
sizeof(key->tp.src)) ||
fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
- &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
sizeof(key->tp.dst))))
goto nla_put_failure;
else if (key->basic.ip_proto == IPPROTO_UDP &&
(fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
- &mask->tp.src, TCA_FLOWER_UNSPEC,
+ &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
sizeof(key->tp.src)) ||
fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
- &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
sizeof(key->tp.dst))))
goto nla_put_failure;
+ if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
+ (fl_dump_key_val(skb, &key->enc_ipv4.src,
+ TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src,
+ TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
+ sizeof(key->enc_ipv4.src)) ||
+ fl_dump_key_val(skb, &key->enc_ipv4.dst,
+ TCA_FLOWER_KEY_ENC_IPV4_DST, &mask->enc_ipv4.dst,
+ TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
+ sizeof(key->enc_ipv4.dst))))
+ goto nla_put_failure;
+ else if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
+ (fl_dump_key_val(skb, &key->enc_ipv6.src,
+ TCA_FLOWER_KEY_ENC_IPV6_SRC, &mask->enc_ipv6.src,
+ TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
+ sizeof(key->enc_ipv6.src)) ||
+ fl_dump_key_val(skb, &key->enc_ipv6.dst,
+ TCA_FLOWER_KEY_ENC_IPV6_DST,
+ &mask->enc_ipv6.dst,
+ TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
+ sizeof(key->enc_ipv6.dst))))
+ goto nla_put_failure;
+
+ if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
+ &mask->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
+ sizeof(key->enc_key_id)))
+ goto nla_put_failure;
+
nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
if (tcf_exts_dump(skb, &f->exts))
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index cc0bda945800..9dc63d54e167 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -57,7 +57,7 @@ static u32 fw_hash(u32 handle)
}
static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+ struct tcf_result *res)
{
struct fw_head *head = rcu_dereference_bh(tp->root);
struct fw_filter *f;
@@ -188,7 +188,8 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
static int
fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
- struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr)
+ struct nlattr **tb, struct nlattr **tca, unsigned long base,
+ bool ovr)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct tcf_exts e;
@@ -237,9 +238,8 @@ errout:
static int fw_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
- u32 handle,
- struct nlattr **tca,
- unsigned long *arg, bool ovr)
+ u32 handle, struct nlattr **tca, unsigned long *arg,
+ bool ovr)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = (struct fw_filter *) *arg;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index c91e65d81a48..455fc8f83d0a 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -268,8 +268,7 @@ static int route4_init(struct tcf_proto *tp)
return 0;
}
-static void
-route4_delete_filter(struct rcu_head *head)
+static void route4_delete_filter(struct rcu_head *head)
{
struct route4_filter *f = container_of(head, struct route4_filter, rcu);
@@ -474,10 +473,8 @@ errout:
}
static int route4_change(struct net *net, struct sk_buff *in_skb,
- struct tcf_proto *tp, unsigned long base,
- u32 handle,
- struct nlattr **tca,
- unsigned long *arg, bool ovr)
+ struct tcf_proto *tp, unsigned long base, u32 handle,
+ struct nlattr **tca, unsigned long *arg, bool ovr)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter __rcu **fp;
@@ -562,7 +559,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
return 0;
errout:
- tcf_exts_destroy(&f->exts);
+ if (f)
+ tcf_exts_destroy(&f->exts);
kfree(f);
return err;
}
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index d9500709831f..96144bdf30db 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -50,14 +50,13 @@ struct tcindex_data {
struct rcu_head rcu;
};
-static inline int
-tcindex_filter_is_set(struct tcindex_filter_result *r)
+static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
{
return tcf_exts_is_predicative(&r->exts) || r->res.classid;
}
-static struct tcindex_filter_result *
-tcindex_lookup(struct tcindex_data *p, u16 key)
+static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
+ u16 key)
{
if (p->perfect) {
struct tcindex_filter_result *f = p->perfect + key;
@@ -144,7 +143,8 @@ static void tcindex_destroy_rexts(struct rcu_head *head)
static void tcindex_destroy_fexts(struct rcu_head *head)
{
- struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu);
+ struct tcindex_filter *f = container_of(head, struct tcindex_filter,
+ rcu);
tcf_exts_destroy(&f->result.exts);
kfree(f);
@@ -550,7 +550,7 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force)
static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index a29263a9d8c1..ae83c3aec308 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -104,7 +104,8 @@ static inline unsigned int u32_hash_fold(__be32 key,
return h;
}
-static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res)
+static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct {
struct tc_u_knode *knode;
@@ -256,8 +257,7 @@ deadloop:
return -1;
}
-static struct tc_u_hnode *
-u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
+static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
{
struct tc_u_hnode *ht;
@@ -270,8 +270,7 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
return ht;
}
-static struct tc_u_knode *
-u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
+static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
{
unsigned int sel;
struct tc_u_knode *n = NULL;
@@ -360,8 +359,7 @@ static int u32_init(struct tcf_proto *tp)
return 0;
}
-static int u32_destroy_key(struct tcf_proto *tp,
- struct tc_u_knode *n,
+static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
bool free_pf)
{
tcf_exts_destroy(&n->exts);
@@ -448,9 +446,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
}
}
-static int u32_replace_hw_hnode(struct tcf_proto *tp,
- struct tc_u_hnode *h,
- u32 flags)
+static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
+ u32 flags)
{
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_u32_offload u32_offload = {0};
@@ -496,9 +493,8 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
}
}
-static int u32_replace_hw_knode(struct tcf_proto *tp,
- struct tc_u_knode *n,
- u32 flags)
+static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
+ u32 flags)
{
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_u32_offload u32_offload = {0};
@@ -763,8 +759,7 @@ errout:
return err;
}
-static void u32_replace_knode(struct tcf_proto *tp,
- struct tc_u_common *tp_c,
+static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
struct tc_u_knode *n)
{
struct tc_u_knode __rcu **ins;
@@ -845,8 +840,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca,
- unsigned long *arg, bool ovr)
+ struct nlattr **tca, unsigned long *arg, bool ovr)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *ht;
@@ -1088,7 +1082,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t)
{
struct tc_u_knode *n = (struct tc_u_knode *)fh;
struct tc_u_hnode *ht_up, *ht_down;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d677b3484d81..206dc24add3a 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -389,7 +389,8 @@ static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
static struct qdisc_rate_table *qdisc_rtab_list;
-struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
+struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
+ struct nlattr *tab)
{
struct qdisc_rate_table *rtab;
@@ -541,7 +542,8 @@ nla_put_failure:
return -1;
}
-void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
+void __qdisc_calculate_pkt_len(struct sk_buff *skb,
+ const struct qdisc_size_table *stab)
{
int pkt_len, slot;
@@ -888,10 +890,10 @@ static struct lock_class_key qdisc_rx_lock;
Parameters are passed via opt.
*/
-static struct Qdisc *
-qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
- struct Qdisc *p, u32 parent, u32 handle,
- struct nlattr **tca, int *errp)
+static struct Qdisc *qdisc_create(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ struct Qdisc *p, u32 parent, u32 handle,
+ struct nlattr **tca, int *errp)
{
int err;
struct nlattr *kind = tca[TCA_KIND];
@@ -1073,7 +1075,8 @@ struct check_loop_arg {
int depth;
};
-static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
+static int check_loop_fn(struct Qdisc *q, unsigned long cl,
+ struct qdisc_walker *w);
static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
{
@@ -1450,7 +1453,8 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
} else {
if (!tc_qdisc_dump_ignore(q) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ RTM_NEWQDISC) <= 0)
goto done;
q_idx++;
}
@@ -1471,7 +1475,8 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
}
if (!tc_qdisc_dump_ignore(q) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ RTM_NEWQDISC) <= 0)
goto done;
q_idx++;
}
@@ -1505,7 +1510,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_q_idx = 0;
q_idx = 0;
- if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, true) < 0)
+ if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
+ true) < 0)
goto done;
dev_queue = dev_ingress_queue(dev);
@@ -1640,7 +1646,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
if (cops->delete)
err = cops->delete(q, cl);
if (err == 0)
- tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
+ tclass_notify(net, skb, n, q, cl,
+ RTM_DELTCLASS);
goto out;
case RTM_GETTCLASS:
err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
@@ -1738,12 +1745,14 @@ struct qdisc_dump_args {
struct netlink_callback *cb;
};
-static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
+static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
+ struct qdisc_walker *arg)
{
struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
- a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
+ a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ RTM_NEWTCLASS);
}
static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
@@ -1976,10 +1985,12 @@ static int __init pktsched_init(void)
rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
+ NULL);
rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
+ NULL);
return 0;
}
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 4002df3c7d9f..5bfa79ee657c 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -69,7 +69,7 @@ struct codel_sched_data {
static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx)
{
struct Qdisc *sch = ctx;
- struct sk_buff *skb = __skb_dequeue(&sch->q);
+ struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
if (skb)
sch->qstats.backlog -= qdisc_pkt_len(skb);
@@ -172,7 +172,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = __skb_dequeue(&sch->q);
+ struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index baeed6a78d28..1e37247656f8 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -31,7 +31,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
- if (likely(skb_queue_len(&sch->q) < sch->limit))
+ if (likely(sch->q.qlen < sch->limit))
return qdisc_enqueue_tail(skb, sch);
return qdisc_drop(skb, sch, to_free);
@@ -42,7 +42,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{
unsigned int prev_backlog;
- if (likely(skb_queue_len(&sch->q) < sch->limit))
+ if (likely(sch->q.qlen < sch->limit))
return qdisc_enqueue_tail(skb, sch);
prev_backlog = sch->qstats.backlog;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index e5458b99e09c..18e752439f6f 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -86,6 +86,7 @@ struct fq_sched_data {
struct rb_root delayed; /* for rate limited flows */
u64 time_next_delayed_flow;
+ unsigned long unthrottle_latency_ns;
struct fq_flow internal; /* for non classified or high prio packets */
u32 quantum;
@@ -94,6 +95,7 @@ struct fq_sched_data {
u32 flow_max_rate; /* optional max rate per flow */
u32 flow_plimit; /* max packets per flow */
u32 orphan_mask; /* mask for orphaned skb */
+ u32 low_rate_threshold;
struct rb_root *fq_root;
u8 rate_enable;
u8 fq_trees_log;
@@ -407,11 +409,19 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static void fq_check_throttled(struct fq_sched_data *q, u64 now)
{
+ unsigned long sample;
struct rb_node *p;
if (q->time_next_delayed_flow > now)
return;
+ /* Update unthrottle latency EWMA.
+ * This is cheap and can help diagnosing timer/latency problems.
+ */
+ sample = (unsigned long)(now - q->time_next_delayed_flow);
+ q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3;
+ q->unthrottle_latency_ns += sample >> 3;
+
q->time_next_delayed_flow = ~0ULL;
while ((p = rb_first(&q->delayed)) != NULL) {
struct fq_flow *f = container_of(p, struct fq_flow, rate_node);
@@ -433,7 +443,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
struct fq_flow_head *head;
struct sk_buff *skb;
struct fq_flow *f;
- u32 rate;
+ u32 rate, plen;
skb = fq_dequeue_head(sch, &q->internal);
if (skb)
@@ -482,7 +492,7 @@ begin:
prefetch(&skb->end);
f->credit -= qdisc_pkt_len(skb);
- if (f->credit > 0 || !q->rate_enable)
+ if (!q->rate_enable)
goto out;
/* Do not pace locally generated ack packets */
@@ -493,8 +503,15 @@ begin:
if (skb->sk)
rate = min(skb->sk->sk_pacing_rate, rate);
+ if (rate <= q->low_rate_threshold) {
+ f->credit = 0;
+ plen = qdisc_pkt_len(skb);
+ } else {
+ plen = max(qdisc_pkt_len(skb), q->quantum);
+ if (f->credit > 0)
+ goto out;
+ }
if (rate != ~0U) {
- u32 plen = max(qdisc_pkt_len(skb), q->quantum);
u64 len = (u64)plen * NSEC_PER_SEC;
if (likely(rate))
@@ -507,7 +524,12 @@ begin:
len = NSEC_PER_SEC;
q->stat_pkts_too_long++;
}
-
+ /* Account for schedule/timers drifts.
+ * f->time_next_packet was set when prior packet was sent,
+ * and current time (@now) can be too late by tens of us.
+ */
+ if (f->time_next_packet)
+ len -= min(len/2, now - f->time_next_packet);
f->time_next_packet = now + len;
}
out:
@@ -662,6 +684,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
[TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
[TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
+ [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
};
static int fq_change(struct Qdisc *sch, struct nlattr *opt)
@@ -716,6 +739,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_FQ_FLOW_MAX_RATE])
q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
+ if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
+ q->low_rate_threshold =
+ nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
+
if (tb[TCA_FQ_RATE_ENABLE]) {
u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
@@ -774,6 +801,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
q->flow_refill_delay = msecs_to_jiffies(40);
q->flow_max_rate = ~0U;
+ q->time_next_delayed_flow = ~0ULL;
q->rate_enable = 1;
q->new_flows.first = NULL;
q->old_flows.first = NULL;
@@ -781,6 +809,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
q->fq_root = NULL;
q->fq_trees_log = ilog2(1024);
q->orphan_mask = 1024 - 1;
+ q->low_rate_threshold = 550000 / 8;
qdisc_watchdog_init(&q->watchdog, sch);
if (opt)
@@ -811,6 +840,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
jiffies_to_usecs(q->flow_refill_delay)) ||
nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
+ nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
+ q->low_rate_threshold) ||
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
goto nla_put_failure;
@@ -823,20 +854,24 @@ nla_put_failure:
static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
struct fq_sched_data *q = qdisc_priv(sch);
- u64 now = ktime_get_ns();
- struct tc_fq_qd_stats st = {
- .gc_flows = q->stat_gc_flows,
- .highprio_packets = q->stat_internal_packets,
- .tcp_retrans = q->stat_tcp_retrans,
- .throttled = q->stat_throttled,
- .flows_plimit = q->stat_flows_plimit,
- .pkts_too_long = q->stat_pkts_too_long,
- .allocation_errors = q->stat_allocation_errors,
- .flows = q->flows,
- .inactive_flows = q->inactive_flows,
- .throttled_flows = q->throttled_flows,
- .time_next_delayed_flow = q->time_next_delayed_flow - now,
- };
+ struct tc_fq_qd_stats st;
+
+ sch_tree_lock(sch);
+
+ st.gc_flows = q->stat_gc_flows;
+ st.highprio_packets = q->stat_internal_packets;
+ st.tcp_retrans = q->stat_tcp_retrans;
+ st.throttled = q->stat_throttled;
+ st.flows_plimit = q->stat_flows_plimit;
+ st.pkts_too_long = q->stat_pkts_too_long;
+ st.allocation_errors = q->stat_allocation_errors;
+ st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns();
+ st.flows = q->flows;
+ st.inactive_flows = q->inactive_flows;
+ st.throttled_flows = q->throttled_flows;
+ st.unthrottle_latency_ns = min_t(unsigned long,
+ q->unthrottle_latency_ns, ~0U);
+ sch_tree_unlock(sch);
return gnet_stats_copy_app(d, &st, sizeof(st));
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 0d21b567ff27..6cfb6e9038c2 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -466,7 +466,7 @@ static const u8 prio2band[TC_PRIO_MAX + 1] = {
*/
struct pfifo_fast_priv {
u32 bitmap;
- struct sk_buff_head q[PFIFO_FAST_BANDS];
+ struct qdisc_skb_head q[PFIFO_FAST_BANDS];
};
/*
@@ -477,7 +477,7 @@ struct pfifo_fast_priv {
*/
static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0};
-static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
+static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv,
int band)
{
return priv->q + band;
@@ -486,10 +486,10 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
struct sk_buff **to_free)
{
- if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
+ if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) {
int band = prio2band[skb->priority & TC_PRIO_MAX];
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- struct sk_buff_head *list = band2list(priv, band);
+ struct qdisc_skb_head *list = band2list(priv, band);
priv->bitmap |= (1 << band);
qdisc->q.qlen++;
@@ -505,11 +505,16 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
int band = bitmap2band[priv->bitmap];
if (likely(band >= 0)) {
- struct sk_buff_head *list = band2list(priv, band);
- struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list);
+ struct qdisc_skb_head *qh = band2list(priv, band);
+ struct sk_buff *skb = __qdisc_dequeue_head(qh);
+
+ if (likely(skb != NULL)) {
+ qdisc_qstats_backlog_dec(qdisc, skb);
+ qdisc_bstats_update(qdisc, skb);
+ }
qdisc->q.qlen--;
- if (skb_queue_empty(list))
+ if (qh->qlen == 0)
priv->bitmap &= ~(1 << band);
return skb;
@@ -524,9 +529,9 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
int band = bitmap2band[priv->bitmap];
if (band >= 0) {
- struct sk_buff_head *list = band2list(priv, band);
+ struct qdisc_skb_head *qh = band2list(priv, band);
- return skb_peek(list);
+ return qh->head;
}
return NULL;
@@ -564,7 +569,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- __skb_queue_head_init(band2list(priv, prio));
+ qdisc_skb_head_init(band2list(priv, prio));
/* Can by-pass the queue discipline */
qdisc->flags |= TCQ_F_CAN_BYPASS;
@@ -612,7 +617,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
sch->padded = (char *) sch - (char *) p;
}
- skb_queue_head_init(&sch->q);
+ qdisc_skb_head_init(&sch->q);
+ spin_lock_init(&sch->q.lock);
spin_lock_init(&sch->busylock);
lockdep_set_class(&sch->busylock,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 53dbfa187870..c798d0de8a9d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -162,7 +162,7 @@ struct htb_sched {
struct work_struct work;
/* non shaped skbs; let them go directly thru */
- struct sk_buff_head direct_queue;
+ struct qdisc_skb_head direct_queue;
long direct_pkts;
struct qdisc_watchdog watchdog;
@@ -570,6 +570,22 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
list_del_init(&cl->un.leaf.drop_list);
}
+static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
+ struct qdisc_skb_head *qh)
+{
+ struct sk_buff *last = qh->tail;
+
+ if (last) {
+ skb->next = NULL;
+ last->next = skb;
+ qh->tail = skb;
+ } else {
+ qh->tail = skb;
+ qh->head = skb;
+ }
+ qh->qlen++;
+}
+
static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
@@ -580,7 +596,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (cl == HTB_DIRECT) {
/* enqueue to helper queue */
if (q->direct_queue.qlen < q->direct_qlen) {
- __skb_queue_tail(&q->direct_queue, skb);
+ htb_enqueue_tail(skb, sch, &q->direct_queue);
q->direct_pkts++;
} else {
return qdisc_drop(skb, sch, to_free);
@@ -888,7 +904,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
unsigned long start_at;
/* try to dequeue direct packets as high prio (!) to minimize cpu work */
- skb = __skb_dequeue(&q->direct_queue);
+ skb = __qdisc_dequeue_head(&q->direct_queue);
if (skb != NULL) {
ok:
qdisc_bstats_update(sch, skb);
@@ -1019,7 +1035,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
qdisc_watchdog_init(&q->watchdog, sch);
INIT_WORK(&q->work, htb_work_func);
- __skb_queue_head_init(&q->direct_queue);
+ qdisc_skb_head_init(&q->direct_queue);
if (tb[TCA_HTB_DIRECT_QLEN])
q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index aaaf02175338..9f7b380cf0a3 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -413,6 +413,16 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
return segs;
}
+static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb)
+{
+ skb->next = qh->head;
+
+ if (!qh->head)
+ qh->tail = skb;
+ qh->head = skb;
+ qh->qlen++;
+}
+
/*
* Insert one skb into qdisc.
* Note: parent depends on return value to account for queue length.
@@ -502,7 +512,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1<<(prandom_u32() % 8);
}
- if (unlikely(skb_queue_len(&sch->q) >= sch->limit))
+ if (unlikely(sch->q.qlen >= sch->limit))
return qdisc_drop(skb, sch, to_free);
qdisc_qstats_backlog_inc(sch, skb);
@@ -522,8 +532,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (q->rate) {
struct sk_buff *last;
- if (!skb_queue_empty(&sch->q))
- last = skb_peek_tail(&sch->q);
+ if (sch->q.qlen)
+ last = sch->q.tail;
else
last = netem_rb_to_skb(rb_last(&q->t_root));
if (last) {
@@ -552,7 +562,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
cb->time_to_send = psched_get_time();
q->counter = 0;
- __skb_queue_head(&sch->q, skb);
+ netem_enqueue_skb_head(&sch->q, skb);
sch->qstats.requeues++;
}
@@ -587,7 +597,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
struct rb_node *p;
tfifo_dequeue:
- skb = __skb_dequeue(&sch->q);
+ skb = __qdisc_dequeue_head(&sch->q);
if (skb) {
qdisc_qstats_backlog_dec(sch, skb);
deliver:
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index a570b0bb254c..5c3a99d6aa82 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -231,7 +231,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
/* Drop excess packets if new limit is lower */
qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = __skb_dequeue(&sch->q);
+ struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
@@ -511,7 +511,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
{
struct sk_buff *skb;
- skb = __qdisc_dequeue_head(sch, &sch->q);
+ skb = qdisc_dequeue_head(sch);
if (!skb)
return NULL;