summaryrefslogtreecommitdiff
path: root/net/netfilter/nf_flow_table_offload.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter/nf_flow_table_offload.c')
-rw-r--r--net/netfilter/nf_flow_table_offload.c330
1 files changed, 248 insertions, 82 deletions
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index f2c22c682851..e3b099c14eff 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -7,13 +7,13 @@
#include <linux/tc_act/tc_csum.h>
#include <net/flow_offload.h>
#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_tuple.h>
-static struct work_struct nf_flow_offload_work;
-static DEFINE_SPINLOCK(flow_offload_pending_list_lock);
-static LIST_HEAD(flow_offload_pending_list);
+static struct workqueue_struct *nf_flow_offload_wq;
struct flow_offload_work {
struct list_head list;
@@ -21,40 +21,68 @@ struct flow_offload_work {
int priority;
struct nf_flowtable *flowtable;
struct flow_offload *flow;
-};
-
-struct nf_flow_key {
- struct flow_dissector_key_meta meta;
- struct flow_dissector_key_control control;
- struct flow_dissector_key_basic basic;
- union {
- struct flow_dissector_key_ipv4_addrs ipv4;
- struct flow_dissector_key_ipv6_addrs ipv6;
- };
- struct flow_dissector_key_tcp tcp;
- struct flow_dissector_key_ports tp;
-} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
-
-struct nf_flow_match {
- struct flow_dissector dissector;
- struct nf_flow_key key;
- struct nf_flow_key mask;
-};
-
-struct nf_flow_rule {
- struct nf_flow_match match;
- struct flow_rule *rule;
+ struct work_struct work;
};
#define NF_FLOW_DISSECTOR(__match, __type, __field) \
(__match)->dissector.offset[__type] = \
offsetof(struct nf_flow_key, __field)
+static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
+ struct ip_tunnel_info *tun_info)
+{
+ struct nf_flow_key *mask = &match->mask;
+ struct nf_flow_key *key = &match->key;
+ unsigned int enc_keys;
+
+ if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
+ return;
+
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
+ key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
+ mask->enc_key_id.keyid = 0xffffffff;
+ enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
+
+ if (ip_tunnel_info_af(tun_info) == AF_INET) {
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+ enc_ipv4);
+ key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
+ key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
+ if (key->enc_ipv4.src)
+ mask->enc_ipv4.src = 0xffffffff;
+ if (key->enc_ipv4.dst)
+ mask->enc_ipv4.dst = 0xffffffff;
+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ } else {
+ memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
+ sizeof(struct in6_addr));
+ memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
+ sizeof(struct in6_addr));
+ if (memcmp(&key->enc_ipv6.src, &in6addr_any,
+ sizeof(struct in6_addr)))
+ memset(&key->enc_ipv6.src, 0xff,
+ sizeof(struct in6_addr));
+ if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
+ sizeof(struct in6_addr)))
+ memset(&key->enc_ipv6.dst, 0xff,
+ sizeof(struct in6_addr));
+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ }
+
+ match->dissector.used_keys |= enc_keys;
+}
+
static int nf_flow_rule_match(struct nf_flow_match *match,
- const struct flow_offload_tuple *tuple)
+ const struct flow_offload_tuple *tuple,
+ struct dst_entry *other_dst)
{
struct nf_flow_key *mask = &match->mask;
struct nf_flow_key *key = &match->key;
+ struct ip_tunnel_info *tun_info;
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
@@ -64,6 +92,11 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
+ if (other_dst && other_dst->lwtstate) {
+ tun_info = lwt_tun_info(other_dst->lwtstate);
+ nf_flow_rule_lwt_match(match, tun_info);
+ }
+
key->meta.ingress_ifindex = tuple->iifidx;
mask->meta.ingress_ifindex = 0xffffffff;
@@ -443,10 +476,52 @@ static void flow_offload_redirect(const struct flow_offload *flow,
dev_hold(rt->dst.dev);
}
+static void flow_offload_encap_tunnel(const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry;
+ struct dst_entry *dst;
+
+ dst = flow->tuplehash[dir].tuple.dst_cache;
+ if (dst && dst->lwtstate) {
+ struct ip_tunnel_info *tun_info;
+
+ tun_info = lwt_tun_info(dst->lwtstate);
+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
+ entry = flow_action_entry_next(flow_rule);
+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
+ entry->tunnel = tun_info;
+ }
+ }
+}
+
+static void flow_offload_decap_tunnel(const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry;
+ struct dst_entry *dst;
+
+ dst = flow->tuplehash[!dir].tuple.dst_cache;
+ if (dst && dst->lwtstate) {
+ struct ip_tunnel_info *tun_info;
+
+ tun_info = lwt_tun_info(dst->lwtstate);
+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
+ entry = flow_action_entry_next(flow_rule);
+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
+ }
+ }
+}
+
int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
+ flow_offload_decap_tunnel(flow, dir, flow_rule);
+ flow_offload_encap_tunnel(flow, dir, flow_rule);
+
if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
return -1;
@@ -473,6 +548,9 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
+ flow_offload_decap_tunnel(flow, dir, flow_rule);
+ flow_offload_encap_tunnel(flow, dir, flow_rule);
+
if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
return -1;
@@ -503,6 +581,7 @@ nf_flow_offload_rule_alloc(struct net *net,
const struct flow_offload *flow = offload->flow;
const struct flow_offload_tuple *tuple;
struct nf_flow_rule *flow_rule;
+ struct dst_entry *other_dst;
int err = -ENOMEM;
flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
@@ -518,7 +597,8 @@ nf_flow_offload_rule_alloc(struct net *net,
flow_rule->rule->match.key = &flow_rule->match.key;
tuple = &flow->tuplehash[dir].tuple;
- err = nf_flow_rule_match(&flow_rule->match, tuple);
+ other_dst = flow->tuplehash[!dir].tuple.dst_cache;
+ err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
if (err < 0)
goto err_flow_match;
@@ -598,6 +678,7 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
struct nf_flow_rule *flow_rule,
enum flow_offload_tuple_dir dir,
int priority, int cmd,
+ struct flow_stats *stats,
struct list_head *block_cb_list)
{
struct flow_cls_offload cls_flow = {};
@@ -611,6 +692,7 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
if (cmd == FLOW_CLS_REPLACE)
cls_flow.rule = flow_rule->rule;
+ down_read(&flowtable->flow_block_lock);
list_for_each_entry(block_cb, block_cb_list, list) {
err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
block_cb->cb_priv);
@@ -619,6 +701,10 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
i++;
}
+ up_read(&flowtable->flow_block_lock);
+
+ if (cmd == FLOW_CLS_STATS)
+ memcpy(stats, &cls_flow.stats, sizeof(*stats));
return i;
}
@@ -629,7 +715,7 @@ static int flow_offload_tuple_add(struct flow_offload_work *offload,
{
return nf_flow_offload_tuple(offload->flowtable, offload->flow,
flow_rule, dir, offload->priority,
- FLOW_CLS_REPLACE,
+ FLOW_CLS_REPLACE, NULL,
&offload->flowtable->flow_block.cb_list);
}
@@ -637,7 +723,7 @@ static void flow_offload_tuple_del(struct flow_offload_work *offload,
enum flow_offload_tuple_dir dir)
{
nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
- offload->priority, FLOW_CLS_DESTROY,
+ offload->priority, FLOW_CLS_DESTROY, NULL,
&offload->flowtable->flow_block.cb_list);
}
@@ -683,19 +769,9 @@ static void flow_offload_tuple_stats(struct flow_offload_work *offload,
enum flow_offload_tuple_dir dir,
struct flow_stats *stats)
{
- struct nf_flowtable *flowtable = offload->flowtable;
- struct flow_cls_offload cls_flow = {};
- struct flow_block_cb *block_cb;
- struct netlink_ext_ack extack;
- __be16 proto = ETH_P_ALL;
-
- nf_flow_offload_init(&cls_flow, proto, offload->priority,
- FLOW_CLS_STATS,
- &offload->flow->tuplehash[dir].tuple, &extack);
-
- list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
- block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv);
- memcpy(stats, &cls_flow.stats, sizeof(*stats));
+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
+ offload->priority, FLOW_CLS_STATS, stats,
+ &offload->flowtable->flow_block.cb_list);
}
static void flow_offload_work_stats(struct flow_offload_work *offload)
@@ -709,19 +785,25 @@ static void flow_offload_work_stats(struct flow_offload_work *offload)
lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
offload->flow->timeout = max_t(u64, offload->flow->timeout,
lastused + NF_FLOW_TIMEOUT);
+
+ if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
+ if (stats[0].pkts)
+ nf_ct_acct_add(offload->flow->ct,
+ FLOW_OFFLOAD_DIR_ORIGINAL,
+ stats[0].pkts, stats[0].bytes);
+ if (stats[1].pkts)
+ nf_ct_acct_add(offload->flow->ct,
+ FLOW_OFFLOAD_DIR_REPLY,
+ stats[1].pkts, stats[1].bytes);
+ }
}
static void flow_offload_work_handler(struct work_struct *work)
{
- struct flow_offload_work *offload, *next;
- LIST_HEAD(offload_pending_list);
-
- spin_lock_bh(&flow_offload_pending_list_lock);
- list_replace_init(&flow_offload_pending_list, &offload_pending_list);
- spin_unlock_bh(&flow_offload_pending_list_lock);
+ struct flow_offload_work *offload;
- list_for_each_entry_safe(offload, next, &offload_pending_list, list) {
- switch (offload->cmd) {
+ offload = container_of(work, struct flow_offload_work, work);
+ switch (offload->cmd) {
case FLOW_CLS_REPLACE:
flow_offload_work_add(offload);
break;
@@ -733,19 +815,14 @@ static void flow_offload_work_handler(struct work_struct *work)
break;
default:
WARN_ON_ONCE(1);
- }
- list_del(&offload->list);
- kfree(offload);
}
+
+ kfree(offload);
}
static void flow_offload_queue_work(struct flow_offload_work *offload)
{
- spin_lock_bh(&flow_offload_pending_list_lock);
- list_add_tail(&offload->list, &flow_offload_pending_list);
- spin_unlock_bh(&flow_offload_pending_list_lock);
-
- schedule_work(&nf_flow_offload_work);
+ queue_work(nf_flow_offload_wq, &offload->work);
}
static struct flow_offload_work *
@@ -762,6 +839,7 @@ nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
offload->flow = flow;
offload->priority = flowtable->priority;
offload->flowtable = flowtable;
+ INIT_WORK(&offload->work, flow_offload_work_handler);
return offload;
}
@@ -812,7 +890,7 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
{
if (nf_flowtable_hw_offload(flowtable))
- flush_work(&nf_flow_offload_work);
+ flush_workqueue(nf_flow_offload_wq);
}
static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
@@ -840,25 +918,47 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
return err;
}
-static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
- struct nf_flowtable *flowtable,
- struct net_device *dev,
- enum flow_block_command cmd,
- struct netlink_ext_ack *extack)
+static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
+ struct net *net,
+ enum flow_block_command cmd,
+ struct nf_flowtable *flowtable,
+ struct netlink_ext_ack *extack)
{
- int err;
-
- if (!dev->netdev_ops->ndo_setup_tc)
- return -EOPNOTSUPP;
-
memset(bo, 0, sizeof(*bo));
- bo->net = dev_net(dev);
+ bo->net = net;
bo->block = &flowtable->flow_block;
bo->command = cmd;
bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
bo->extack = extack;
INIT_LIST_HEAD(&bo->cb_list);
+}
+
+static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
+ struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd,
+ struct netlink_ext_ack *extack)
+{
+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
+ extack);
+ flow_indr_block_call(dev, bo, cmd, TC_SETUP_FT);
+
+ if (list_empty(&bo->cb_list))
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
+ struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
+ extack);
err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
if (err < 0)
return err;
@@ -877,7 +977,12 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
if (!nf_flowtable_hw_offload(flowtable))
return 0;
- err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack);
+ if (dev->netdev_ops->ndo_setup_tc)
+ err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
+ &extack);
+ else
+ err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
+ &extack);
if (err < 0)
return err;
@@ -885,22 +990,83 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
}
EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
-int nf_flow_table_offload_init(void)
+static void nf_flow_table_indr_block_ing_cmd(struct net_device *dev,
+ struct nf_flowtable *flowtable,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command cmd)
{
- INIT_WORK(&nf_flow_offload_work, flow_offload_work_handler);
+ struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo;
- return 0;
+ if (!flowtable)
+ return;
+
+ nf_flow_table_block_offload_init(&bo, dev_net(dev), cmd, flowtable,
+ &extack);
+
+ cb(dev, cb_priv, TC_SETUP_FT, &bo);
+
+ nf_flow_table_block_setup(flowtable, &bo, cmd);
}
-void nf_flow_table_offload_exit(void)
+static void nf_flow_table_indr_block_cb_cmd(struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command cmd)
{
- struct flow_offload_work *offload, *next;
- LIST_HEAD(offload_pending_list);
+ if (!(flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD))
+ return;
- cancel_work_sync(&nf_flow_offload_work);
+ nf_flow_table_indr_block_ing_cmd(dev, flowtable, cb, cb_priv, cmd);
+}
- list_for_each_entry_safe(offload, next, &offload_pending_list, list) {
- list_del(&offload->list);
- kfree(offload);
+static void nf_flow_table_indr_block_cb(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command cmd)
+{
+ struct net *net = dev_net(dev);
+ struct nft_flowtable *nft_ft;
+ struct nft_table *table;
+ struct nft_hook *hook;
+
+ mutex_lock(&net->nft.commit_mutex);
+ list_for_each_entry(table, &net->nft.tables, list) {
+ list_for_each_entry(nft_ft, &table->flowtables, list) {
+ list_for_each_entry(hook, &nft_ft->hook_list, list) {
+ if (hook->ops.dev != dev)
+ continue;
+
+ nf_flow_table_indr_block_cb_cmd(&nft_ft->data,
+ dev, cb,
+ cb_priv, cmd);
+ }
+ }
}
+ mutex_unlock(&net->nft.commit_mutex);
+}
+
+static struct flow_indr_block_entry block_ing_entry = {
+ .cb = nf_flow_table_indr_block_cb,
+ .list = LIST_HEAD_INIT(block_ing_entry.list),
+};
+
+int nf_flow_table_offload_init(void)
+{
+ nf_flow_offload_wq = alloc_workqueue("nf_flow_table_offload",
+ WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
+ if (!nf_flow_offload_wq)
+ return -ENOMEM;
+
+ flow_indr_add_block_cb(&block_ing_entry);
+
+ return 0;
+}
+
+void nf_flow_table_offload_exit(void)
+{
+ flow_indr_del_block_cb(&block_ing_entry);
+ destroy_workqueue(nf_flow_offload_wq);
}