summaryrefslogtreecommitdiff
path: root/net/sched/act_ct.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched/act_ct.c')
-rw-r--r--net/sched/act_ct.c807
1 files changed, 431 insertions, 376 deletions
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f99247fc6468..2b6ac7069dc1 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -24,6 +24,7 @@
#include <net/ipv6_frag.h>
#include <uapi/linux/tc_act/tc_ct.h>
#include <net/tc_act/tc_ct.h>
+#include <net/tc_wrapper.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
@@ -33,36 +34,36 @@
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/netfilter/nf_conntrack_act_ct.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <uapi/linux/netfilter/nf_nat.h>
static struct workqueue_struct *act_ct_wq;
static struct rhashtable zones_ht;
static DEFINE_MUTEX(zones_mutex);
+struct zones_ht_key {
+ struct net *net;
+ u16 zone;
+};
+
struct tcf_ct_flow_table {
struct rhash_head node; /* In zones tables */
struct rcu_work rwork;
struct nf_flowtable nf_ft;
refcount_t ref;
- u16 zone;
+ struct zones_ht_key key;
bool dying;
};
static const struct rhashtable_params zones_params = {
.head_offset = offsetof(struct tcf_ct_flow_table, node),
- .key_offset = offsetof(struct tcf_ct_flow_table, zone),
- .key_len = sizeof_field(struct tcf_ct_flow_table, zone),
+ .key_offset = offsetof(struct tcf_ct_flow_table, key),
+ .key_len = offsetofend(struct zones_ht_key, zone),
.automatic_shrinking = true,
};
-static struct nf_ct_ext_type act_ct_extend __read_mostly = {
- .len = sizeof(struct nf_conn_act_ct_ext),
- .align = __alignof__(struct nf_conn_act_ct_ext),
- .id = NF_CT_EXT_ACT_CT,
-};
-
static struct flow_action_entry *
tcf_ct_flow_table_flow_action_get_next(struct flow_action *flow_action)
{
@@ -174,20 +175,18 @@ tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple *tuple,
static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
enum ip_conntrack_dir dir,
+ enum ip_conntrack_info ctinfo,
struct flow_action *action)
{
struct nf_conn_labels *ct_labels;
struct flow_action_entry *entry;
- enum ip_conntrack_info ctinfo;
u32 *act_ct_labels;
entry = tcf_ct_flow_table_flow_action_get_next(action);
entry->id = FLOW_ACTION_CT_METADATA;
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
- entry->ct_metadata.mark = ct->mark;
+ entry->ct_metadata.mark = READ_ONCE(ct->mark);
#endif
- ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
- IP_CT_ESTABLISHED_REPLY;
/* aligns with the CT reference on the SKB nf_ct_set */
entry->ct_metadata.cookie = (unsigned long)ct | ctinfo;
entry->ct_metadata.orig_dir = dir == IP_CT_DIR_ORIGINAL;
@@ -241,22 +240,28 @@ static int tcf_ct_flow_table_add_action_nat(struct net *net,
}
static int tcf_ct_flow_table_fill_actions(struct net *net,
- const struct flow_offload *flow,
+ struct flow_offload *flow,
enum flow_offload_tuple_dir tdir,
struct nf_flow_rule *flow_rule)
{
struct flow_action *action = &flow_rule->rule->action;
int num_entries = action->num_entries;
struct nf_conn *ct = flow->ct;
+ enum ip_conntrack_info ctinfo;
enum ip_conntrack_dir dir;
int i, err;
switch (tdir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
dir = IP_CT_DIR_ORIGINAL;
+ ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+ IP_CT_ESTABLISHED : IP_CT_NEW;
+ if (ctinfo == IP_CT_ESTABLISHED)
+ set_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
break;
case FLOW_OFFLOAD_DIR_REPLY:
dir = IP_CT_DIR_REPLY;
+ ctinfo = IP_CT_ESTABLISHED_REPLY;
break;
default:
return -EOPNOTSUPP;
@@ -266,7 +271,7 @@ static int tcf_ct_flow_table_fill_actions(struct net *net,
if (err)
goto err_nat;
- tcf_ct_flow_table_add_action_meta(ct, dir, action);
+ tcf_ct_flow_table_add_action_meta(ct, dir, ctinfo, action);
return 0;
err_nat:
@@ -278,18 +283,50 @@ err_nat:
return err;
}
+static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow)
+{
+ return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) &&
+ test_bit(IPS_HW_OFFLOAD_BIT, &flow->ct->status) &&
+ !test_bit(NF_FLOW_HW_PENDING, &flow->flags) &&
+ !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
+}
+
+static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft);
+
+static void tcf_ct_nf_get(struct nf_flowtable *ft)
+{
+ struct tcf_ct_flow_table *ct_ft =
+ container_of(ft, struct tcf_ct_flow_table, nf_ft);
+
+ tcf_ct_flow_table_get_ref(ct_ft);
+}
+
+static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft);
+
+static void tcf_ct_nf_put(struct nf_flowtable *ft)
+{
+ struct tcf_ct_flow_table *ct_ft =
+ container_of(ft, struct tcf_ct_flow_table, nf_ft);
+
+ tcf_ct_flow_table_put(ct_ft);
+}
+
static struct nf_flowtable_type flowtable_ct = {
+ .gc = tcf_ct_flow_is_outdated,
.action = tcf_ct_flow_table_fill_actions,
+ .get = tcf_ct_nf_get,
+ .put = tcf_ct_nf_put,
.owner = THIS_MODULE,
};
-static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
+static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params)
{
+ struct zones_ht_key key = { .net = net, .zone = params->zone };
struct tcf_ct_flow_table *ct_ft;
int err = -ENOMEM;
mutex_lock(&zones_mutex);
- ct_ft = rhashtable_lookup_fast(&zones_ht, &params->zone, zones_params);
+ ct_ft = rhashtable_lookup_fast(&zones_ht, &key, zones_params);
if (ct_ft && refcount_inc_not_zero(&ct_ft->ref))
goto out_unlock;
@@ -298,7 +335,7 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
goto err_alloc;
refcount_set(&ct_ft->ref, 1);
- ct_ft->zone = params->zone;
+ ct_ft->key = key;
err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params);
if (err)
goto err_insert;
@@ -309,6 +346,7 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
err = nf_flow_table_init(&ct_ft->nf_ft);
if (err)
goto err_init;
+ write_pnet(&ct_ft->nf_ft.net, net);
__module_get(THIS_MODULE);
out_unlock:
@@ -327,9 +365,13 @@ err_alloc:
return err;
}
+static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft)
+{
+ refcount_inc(&ct_ft->ref);
+}
+
static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
{
- struct flow_block_cb *block_cb, *tmp_cb;
struct tcf_ct_flow_table *ct_ft;
struct flow_block *block;
@@ -337,33 +379,45 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
rwork);
nf_flow_table_free(&ct_ft->nf_ft);
- /* Remove any remaining callbacks before cleanup */
block = &ct_ft->nf_ft.flow_block;
down_write(&ct_ft->nf_ft.flow_block_lock);
- list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) {
- list_del(&block_cb->list);
- flow_block_cb_free(block_cb);
- }
+ WARN_ON(!list_empty(&block->cb_list));
up_write(&ct_ft->nf_ft.flow_block_lock);
kfree(ct_ft);
module_put(THIS_MODULE);
}
-static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
+static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft)
{
- struct tcf_ct_flow_table *ct_ft = params->ct_ft;
-
- if (refcount_dec_and_test(&params->ct_ft->ref)) {
+ if (refcount_dec_and_test(&ct_ft->ref)) {
rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
queue_rcu_work(act_ct_wq, &ct_ft->rwork);
}
}
+static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
+ struct nf_conn_act_ct_ext *act_ct_ext, u8 dir)
+{
+ entry->tuplehash[dir].tuple.xmit_type = FLOW_OFFLOAD_XMIT_TC;
+ entry->tuplehash[dir].tuple.tc.iifidx = act_ct_ext->ifindex[dir];
+}
+
+static void tcf_ct_flow_ct_ext_ifidx_update(struct flow_offload *entry)
+{
+ struct nf_conn_act_ct_ext *act_ct_ext;
+
+ act_ct_ext = nf_conn_act_ct_ext_find(entry->ct);
+ if (act_ct_ext) {
+ tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_ORIGINAL);
+ tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY);
+ }
+}
+
static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
- bool tcp)
+ bool tcp, bool bidirectional)
{
struct nf_conn_act_ct_ext *act_ct_ext;
struct flow_offload *entry;
@@ -382,13 +436,13 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
}
+ if (bidirectional)
+ __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &entry->flags);
act_ct_ext = nf_conn_act_ct_ext_find(ct);
if (act_ct_ext) {
- entry->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
- act_ct_ext->ifindex[IP_CT_DIR_ORIGINAL];
- entry->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
- act_ct_ext->ifindex[IP_CT_DIR_REPLY];
+ tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_ORIGINAL);
+ tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY);
}
err = flow_offload_add(&ct_ft->nf_ft, entry);
@@ -407,20 +461,41 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
- bool tcp = false;
-
- if ((ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) ||
- !test_bit(IPS_ASSURED_BIT, &ct->status))
- return;
+ bool tcp = false, bidirectional = true;
switch (nf_ct_protonum(ct)) {
case IPPROTO_TCP:
- tcp = true;
- if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
+ if ((ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED_REPLY) ||
+ !test_bit(IPS_ASSURED_BIT, &ct->status) ||
+ ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
return;
+
+ tcp = true;
break;
case IPPROTO_UDP:
+ if (!nf_ct_is_confirmed(ct))
+ return;
+ if (!test_bit(IPS_ASSURED_BIT, &ct->status))
+ bidirectional = false;
+ break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ case IPPROTO_GRE: {
+ struct nf_conntrack_tuple *tuple;
+
+ if ((ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED_REPLY) ||
+ !test_bit(IPS_ASSURED_BIT, &ct->status) ||
+ ct->status & IPS_NAT_MASK)
+ return;
+
+ tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ /* No support for GRE v1 */
+ if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
+ return;
break;
+ }
+#endif
default:
return;
}
@@ -429,7 +504,7 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
ct->status & IPS_SEQ_ADJUST)
return;
- tcf_ct_flow_table_add(ct_ft, ct, tcp);
+ tcf_ct_flow_table_add(ct_ft, ct, tcp, bidirectional);
}
static bool
@@ -440,6 +515,8 @@ tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb,
struct flow_ports *ports;
unsigned int thoff;
struct iphdr *iph;
+ size_t hdrsize;
+ u8 ipproto;
if (!pskb_network_may_pull(skb, sizeof(*iph)))
return false;
@@ -451,29 +528,54 @@ tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb,
unlikely(thoff != sizeof(struct iphdr)))
return false;
- if (iph->protocol != IPPROTO_TCP &&
- iph->protocol != IPPROTO_UDP)
+ ipproto = iph->protocol;
+ switch (ipproto) {
+ case IPPROTO_TCP:
+ hdrsize = sizeof(struct tcphdr);
+ break;
+ case IPPROTO_UDP:
+ hdrsize = sizeof(*ports);
+ break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ case IPPROTO_GRE:
+ hdrsize = sizeof(struct gre_base_hdr);
+ break;
+#endif
+ default:
return false;
+ }
if (iph->ttl <= 1)
return false;
- if (!pskb_network_may_pull(skb, iph->protocol == IPPROTO_TCP ?
- thoff + sizeof(struct tcphdr) :
- thoff + sizeof(*ports)))
+ if (!pskb_network_may_pull(skb, thoff + hdrsize))
return false;
- iph = ip_hdr(skb);
- if (iph->protocol == IPPROTO_TCP)
+ switch (ipproto) {
+ case IPPROTO_TCP:
*tcph = (void *)(skb_network_header(skb) + thoff);
+ fallthrough;
+ case IPPROTO_UDP:
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ break;
+ case IPPROTO_GRE: {
+ struct gre_base_hdr *greh;
+
+ greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
+ if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
+ return false;
+ break;
+ }
+ }
+
+ iph = ip_hdr(skb);
- ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v4.s_addr = iph->saddr;
tuple->dst_v4.s_addr = iph->daddr;
- tuple->src_port = ports->source;
- tuple->dst_port = ports->dest;
tuple->l3proto = AF_INET;
- tuple->l4proto = iph->protocol;
+ tuple->l4proto = ipproto;
return true;
}
@@ -486,36 +588,63 @@ tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb,
struct flow_ports *ports;
struct ipv6hdr *ip6h;
unsigned int thoff;
+ size_t hdrsize;
+ u8 nexthdr;
if (!pskb_network_may_pull(skb, sizeof(*ip6h)))
return false;
ip6h = ipv6_hdr(skb);
+ thoff = sizeof(*ip6h);
- if (ip6h->nexthdr != IPPROTO_TCP &&
- ip6h->nexthdr != IPPROTO_UDP)
+ nexthdr = ip6h->nexthdr;
+ switch (nexthdr) {
+ case IPPROTO_TCP:
+ hdrsize = sizeof(struct tcphdr);
+ break;
+ case IPPROTO_UDP:
+ hdrsize = sizeof(*ports);
+ break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ case IPPROTO_GRE:
+ hdrsize = sizeof(struct gre_base_hdr);
+ break;
+#endif
+ default:
return false;
+ }
if (ip6h->hop_limit <= 1)
return false;
- thoff = sizeof(*ip6h);
- if (!pskb_network_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ?
- thoff + sizeof(struct tcphdr) :
- thoff + sizeof(*ports)))
+ if (!pskb_network_may_pull(skb, thoff + hdrsize))
return false;
- ip6h = ipv6_hdr(skb);
- if (ip6h->nexthdr == IPPROTO_TCP)
+ switch (nexthdr) {
+ case IPPROTO_TCP:
*tcph = (void *)(skb_network_header(skb) + thoff);
+ fallthrough;
+ case IPPROTO_UDP:
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ break;
+ case IPPROTO_GRE: {
+ struct gre_base_hdr *greh;
+
+ greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
+ if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
+ return false;
+ break;
+ }
+ }
+
+ ip6h = ipv6_hdr(skb);
- ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v6 = ip6h->saddr;
tuple->dst_v6 = ip6h->daddr;
- tuple->src_port = ports->source;
- tuple->dst_port = ports->dest;
tuple->l3proto = AF_INET6;
- tuple->l4proto = ip6h->nexthdr;
+ tuple->l4proto = nexthdr;
return true;
}
@@ -529,15 +658,11 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
struct flow_offload_tuple tuple = {};
enum ip_conntrack_info ctinfo;
struct tcphdr *tcph = NULL;
+ bool force_refresh = false;
struct flow_offload *flow;
struct nf_conn *ct;
u8 dir;
- /* Previously seen or loopback */
- ct = nf_ct_get(skb, &ctinfo);
- if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
- return false;
-
switch (family) {
case NFPROTO_IPV4:
if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph))
@@ -559,15 +684,40 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
ct = flow->ct;
+ if (dir == FLOW_OFFLOAD_DIR_REPLY &&
+ !test_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags)) {
+ /* Only offload reply direction after connection became
+ * assured.
+ */
+ if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
+ else if (test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags))
+ /* If flow_table flow has already been updated to the
+ * established state, then don't refresh.
+ */
+ return false;
+ force_refresh = true;
+ }
+
if (tcph && (unlikely(tcph->fin || tcph->rst))) {
flow_offload_teardown(flow);
return false;
}
- ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
- IP_CT_ESTABLISHED_REPLY;
+ if (dir == FLOW_OFFLOAD_DIR_ORIGINAL)
+ ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+ IP_CT_ESTABLISHED : IP_CT_NEW;
+ else
+ ctinfo = IP_CT_ESTABLISHED_REPLY;
+
+ nf_conn_act_ct_ext_fill(skb, ct, ctinfo);
+ tcf_ct_flow_ct_ext_ifidx_update(flow);
+ flow_offload_refresh(nf_ft, flow, force_refresh);
+ if (!test_bit(IPS_ASSURED_BIT, &ct->status)) {
+ /* Process this flow in SW to allow promoting to ASSURED */
+ return false;
+ }
- flow_offload_refresh(nf_ft, flow);
nf_conntrack_get(&ct->ct_general);
nf_ct_set(skb, ct, ctinfo);
if (nf_ft->flags & NF_FLOWTABLE_COUNTER)
@@ -587,16 +737,14 @@ static void tcf_ct_flow_tables_uninit(void)
}
static struct tc_action_ops act_ct_ops;
-static unsigned int ct_net_id;
struct tc_ct_action_net {
struct tc_action_net tn; /* Must be first */
- bool labels;
};
/* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb,
- u16 zone_id, bool force)
+ struct tcf_ct_params *p)
{
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
@@ -605,50 +753,33 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb,
if (!ct)
return false;
if (!net_eq(net, read_pnet(&ct->ct_net)))
- return false;
- if (nf_ct_zone(ct)->id != zone_id)
- return false;
+ goto drop_ct;
+ if (nf_ct_zone(ct)->id != p->zone)
+ goto drop_ct;
+ if (p->helper) {
+ struct nf_conn_help *help;
+
+ help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
+ if (help && rcu_access_pointer(help->helper) != p->helper)
+ goto drop_ct;
+ }
/* Force conntrack entry direction. */
- if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
+ if ((p->ct_action & TCA_CT_ACT_FORCE) &&
+ CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
if (nf_ct_is_confirmed(ct))
nf_ct_kill(ct);
- nf_ct_put(ct);
- nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
-
- return false;
+ goto drop_ct;
}
return true;
-}
-/* Trim the skb to the length specified by the IP/IPv6 header,
- * removing any trailing lower-layer padding. This prepares the skb
- * for higher-layer processing that assumes skb->len excludes padding
- * (such as nf_ip_checksum). The caller needs to pull the skb to the
- * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
- */
-static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family)
-{
- unsigned int len;
- int err;
+drop_ct:
+ nf_ct_put(ct);
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
- switch (family) {
- case NFPROTO_IPV4:
- len = ntohs(ip_hdr(skb)->tot_len);
- break;
- case NFPROTO_IPV6:
- len = sizeof(struct ipv6hdr)
- + ntohs(ipv6_hdr(skb)->payload_len);
- break;
- default:
- len = skb->len;
- }
-
- err = pskb_trim_rcsum(skb, len);
-
- return err;
+ return false;
}
static u8 tcf_ct_skb_nf_family(struct sk_buff *skb)
@@ -710,6 +841,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
struct nf_conn *ct;
int err = 0;
bool frag;
+ u8 proto;
u16 mru;
/* Previously seen (loopback)? Ignore. */
@@ -724,148 +856,44 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
if (err || !frag)
return err;
- skb_get(skb);
- mru = tc_skb_cb(skb)->mru;
-
- if (family == NFPROTO_IPV4) {
- enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- local_bh_disable();
- err = ip_defrag(net, skb, user);
- local_bh_enable();
- if (err && err != -EINPROGRESS)
- return err;
-
- if (!err) {
- *defrag = true;
- mru = IPCB(skb)->frag_max_size;
- }
- } else { /* NFPROTO_IPV6 */
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
- err = nf_ct_frag6_gather(net, skb, user);
- if (err && err != -EINPROGRESS)
- goto out_free;
-
- if (!err) {
- *defrag = true;
- mru = IP6CB(skb)->frag_max_size;
- }
-#else
- err = -EOPNOTSUPP;
- goto out_free;
-#endif
- }
+ err = nf_ct_handle_fragments(net, skb, zone, family, &proto, &mru);
+ if (err)
+ return err;
- if (err != -EINPROGRESS)
- tc_skb_cb(skb)->mru = mru;
- skb_clear_hash(skb);
- skb->ignore_df = 1;
- return err;
+ *defrag = true;
+ tc_skb_cb(skb)->mru = mru;
-out_free:
- kfree_skb(skb);
- return err;
+ return 0;
}
-static void tcf_ct_params_free(struct rcu_head *head)
+static void tcf_ct_params_free(struct tcf_ct_params *params)
{
- struct tcf_ct_params *params = container_of(head,
- struct tcf_ct_params, rcu);
-
- tcf_ct_flow_table_put(params);
+ if (params->helper) {
+#if IS_ENABLED(CONFIG_NF_NAT)
+ if (params->ct_action & TCA_CT_ACT_NAT)
+ nf_nat_helper_put(params->helper);
+#endif
+ nf_conntrack_helper_put(params->helper);
+ }
+ if (params->ct_ft)
+ tcf_ct_flow_table_put(params->ct_ft);
+ if (params->tmpl) {
+ if (params->put_labels)
+ nf_connlabels_put(nf_ct_net(params->tmpl));
- if (params->tmpl)
nf_ct_put(params->tmpl);
+ }
+
kfree(params);
}
-#if IS_ENABLED(CONFIG_NF_NAT)
-/* Modelled after nf_nat_ipv[46]_fn().
- * range is only used for new, uninitialized NAT state.
- * Returns either NF_ACCEPT or NF_DROP.
- */
-static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype)
+static void tcf_ct_params_free_rcu(struct rcu_head *head)
{
- __be16 proto = skb_protocol(skb, true);
- int hooknum, err = NF_ACCEPT;
-
- /* See HOOK2MANIP(). */
- if (maniptype == NF_NAT_MANIP_SRC)
- hooknum = NF_INET_LOCAL_IN; /* Source NAT */
- else
- hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED_REPLY:
- if (proto == htons(ETH_P_IP) &&
- ip_hdr(skb)->protocol == IPPROTO_ICMP) {
- if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
- hooknum))
- err = NF_DROP;
- goto out;
- } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) {
- __be16 frag_off;
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- int hdrlen = ipv6_skip_exthdr(skb,
- sizeof(struct ipv6hdr),
- &nexthdr, &frag_off);
-
- if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
- if (!nf_nat_icmpv6_reply_translation(skb, ct,
- ctinfo,
- hooknum,
- hdrlen))
- err = NF_DROP;
- goto out;
- }
- }
- /* Non-ICMP, fall thru to initialize if needed. */
- fallthrough;
- case IP_CT_NEW:
- /* Seen it before? This can happen for loopback, retrans,
- * or local packets.
- */
- if (!nf_nat_initialized(ct, maniptype)) {
- /* Initialize according to the NAT action. */
- err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
- /* Action is set up to establish a new
- * mapping.
- */
- ? nf_nat_setup_info(ct, range, maniptype)
- : nf_nat_alloc_null_binding(ct, hooknum);
- if (err != NF_ACCEPT)
- goto out;
- }
- break;
-
- case IP_CT_ESTABLISHED:
- case IP_CT_ESTABLISHED_REPLY:
- break;
-
- default:
- err = NF_DROP;
- goto out;
- }
+ struct tcf_ct_params *params;
- err = nf_nat_packet(ct, ctinfo, hooknum, skb);
- if (err == NF_ACCEPT) {
- if (maniptype == NF_NAT_MANIP_SRC)
- tc_skb_cb(skb)->post_ct_snat = 1;
- if (maniptype == NF_NAT_MANIP_DST)
- tc_skb_cb(skb)->post_ct_dnat = 1;
- }
-out:
- return err;
+ params = container_of(head, struct tcf_ct_params, rcu);
+ tcf_ct_params_free(params);
}
-#endif /* CONFIG_NF_NAT */
static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask)
{
@@ -875,9 +903,9 @@ static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask)
if (!mask)
return;
- new_mark = mark | (ct->mark & ~(mask));
- if (ct->mark != new_mark) {
- ct->mark = new_mark;
+ new_mark = mark | (READ_ONCE(ct->mark) & ~(mask));
+ if (READ_ONCE(ct->mark) != new_mark) {
+ WRITE_ONCE(ct->mark, new_mark);
if (nf_ct_is_confirmed(ct))
nf_conntrack_event_cache(IPCT_MARK, ct);
}
@@ -906,69 +934,42 @@ static int tcf_ct_act_nat(struct sk_buff *skb,
bool commit)
{
#if IS_ENABLED(CONFIG_NF_NAT)
- int err;
- enum nf_nat_manip_type maniptype;
+ int err, action = 0;
if (!(ct_action & TCA_CT_ACT_NAT))
return NF_ACCEPT;
+ if (ct_action & TCA_CT_ACT_NAT_SRC)
+ action |= BIT(NF_NAT_MANIP_SRC);
+ if (ct_action & TCA_CT_ACT_NAT_DST)
+ action |= BIT(NF_NAT_MANIP_DST);
- /* Add NAT extension if not confirmed yet. */
- if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
- return NF_DROP; /* Can't NAT. */
-
- if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) &&
- (ctinfo != IP_CT_RELATED || commit)) {
- /* NAT an established or related connection like before. */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
- /* This is the REPLY direction for a connection
- * for which NAT was applied in the forward
- * direction. Do the reverse NAT.
- */
- maniptype = ct->status & IPS_SRC_NAT
- ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
- else
- maniptype = ct->status & IPS_SRC_NAT
- ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
- } else if (ct_action & TCA_CT_ACT_NAT_SRC) {
- maniptype = NF_NAT_MANIP_SRC;
- } else if (ct_action & TCA_CT_ACT_NAT_DST) {
- maniptype = NF_NAT_MANIP_DST;
- } else {
- return NF_ACCEPT;
- }
+ err = nf_ct_nat(skb, ct, ctinfo, &action, range, commit);
+ if (err != NF_ACCEPT)
+ return err & NF_VERDICT_MASK;
+
+ if (action & BIT(NF_NAT_MANIP_SRC))
+ qdisc_skb_cb(skb)->post_ct_snat = 1;
+ if (action & BIT(NF_NAT_MANIP_DST))
+ qdisc_skb_cb(skb)->post_ct_dnat = 1;
- err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
- if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
- if (ct->status & IPS_SRC_NAT) {
- if (maniptype == NF_NAT_MANIP_SRC)
- maniptype = NF_NAT_MANIP_DST;
- else
- maniptype = NF_NAT_MANIP_SRC;
-
- err = ct_nat_execute(skb, ct, ctinfo, range,
- maniptype);
- } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
- err = ct_nat_execute(skb, ct, ctinfo, NULL,
- NF_NAT_MANIP_SRC);
- }
- }
return err;
#else
return NF_ACCEPT;
#endif
}
-static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
{
struct net *net = dev_net(skb->dev);
- bool cached, commit, clear, force;
enum ip_conntrack_info ctinfo;
struct tcf_ct *c = to_ct(a);
struct nf_conn *tmpl = NULL;
struct nf_hook_state state;
+ bool cached, commit, clear;
int nh_ofs, err, retval;
struct tcf_ct_params *p;
+ bool add_helper = false;
bool skip_add = false;
bool defrag = false;
struct nf_conn *ct;
@@ -976,17 +977,16 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
p = rcu_dereference_bh(c->params);
- retval = READ_ONCE(c->tcf_action);
+ retval = p->action;
commit = p->ct_action & TCA_CT_ACT_COMMIT;
clear = p->ct_action & TCA_CT_ACT_CLEAR;
- force = p->ct_action & TCA_CT_ACT_FORCE;
tmpl = p->tmpl;
tcf_lastuse_update(&c->tcf_tm);
tcf_action_update_bstats(&c->common, skb);
if (clear) {
- tc_skb_cb(skb)->post_ct = false;
+ qdisc_skb_cb(skb)->post_ct = false;
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
nf_ct_put(ct);
@@ -1006,14 +1006,10 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
nh_ofs = skb_network_offset(skb);
skb_pull_rcsum(skb, nh_ofs);
err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag);
- if (err == -EINPROGRESS) {
- retval = TC_ACT_STOLEN;
- goto out_clear;
- }
if (err)
- goto drop;
+ goto out_frag;
- err = tcf_ct_skb_network_trim(skb, family);
+ err = nf_ct_skb_network_trim(skb, family);
if (err)
goto drop;
@@ -1022,7 +1018,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
* actually run the packet through conntrack twice unless it's for a
* different zone.
*/
- cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force);
+ cached = tcf_ct_skb_nfct_cached(net, skb, p);
if (!cached) {
if (tcf_ct_flow_table_lookup(p, skb, family)) {
skip_add = true;
@@ -1041,7 +1037,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
state.pf = family;
err = nf_conntrack_in(skb, &state);
if (err != NF_ACCEPT)
- goto out_push;
+ goto nf_error;
}
do_nat:
@@ -1053,20 +1049,46 @@ do_nat:
err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit);
if (err != NF_ACCEPT)
- goto drop;
+ goto nf_error;
+
+ if (!nf_ct_is_confirmed(ct) && commit && p->helper && !nfct_help(ct)) {
+ err = __nf_ct_try_assign_helper(ct, p->tmpl, GFP_ATOMIC);
+ if (err)
+ goto drop;
+ add_helper = true;
+ if (p->ct_action & TCA_CT_ACT_NAT && !nfct_seqadj(ct)) {
+ if (!nfct_seqadj_ext_add(ct))
+ goto drop;
+ }
+ }
+
+ if (nf_ct_is_confirmed(ct) ? ((!cached && !skip_add) || add_helper) : commit) {
+ err = nf_ct_helper(skb, ct, ctinfo, family);
+ if (err != NF_ACCEPT)
+ goto nf_error;
+ }
if (commit) {
tcf_ct_act_set_mark(ct, p->mark, p->mark_mask);
tcf_ct_act_set_labels(ct, p->labels, p->labels_mask);
if (!nf_ct_is_confirmed(ct))
- nf_conn_act_ct_ext_add(ct);
+ nf_conn_act_ct_ext_add(skb, ct, ctinfo);
/* This will take care of sending queued events
* even if the connection is already confirmed.
*/
- if (nf_conntrack_confirm(skb) != NF_ACCEPT)
- goto drop;
+ err = nf_conntrack_confirm(skb);
+ if (err != NF_ACCEPT)
+ goto nf_error;
+
+ /* The ct may be dropped if a clash has been resolved,
+ * so it's necessary to retrieve it from skb again to
+ * prevent UAF.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ skip_add = true;
}
if (!skip_add)
@@ -1075,16 +1097,36 @@ do_nat:
out_push:
skb_push_rcsum(skb, nh_ofs);
- tc_skb_cb(skb)->post_ct = true;
+ qdisc_skb_cb(skb)->post_ct = true;
tc_skb_cb(skb)->zone = p->zone;
out_clear:
if (defrag)
qdisc_skb_cb(skb)->pkt_len = skb->len;
return retval;
+out_frag:
+ if (err != -EINPROGRESS)
+ tcf_action_inc_drop_qstats(&c->common);
+ return TC_ACT_CONSUMED;
+
drop:
tcf_action_inc_drop_qstats(&c->common);
return TC_ACT_SHOT;
+
+nf_error:
+ /* some verdicts store extra data in upper bits, such
+ * as errno or queue number.
+ */
+ switch (err & NF_VERDICT_MASK) {
+ case NF_DROP:
+ goto drop;
+ case NF_STOLEN:
+ tcf_action_inc_drop_qstats(&c->common);
+ return TC_ACT_CONSUMED;
+ default:
+ DEBUG_NET_WARN_ON_ONCE(1);
+ goto drop;
+ }
}
static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = {
@@ -1103,6 +1145,9 @@ static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = {
[TCA_CT_NAT_IPV6_MAX] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
[TCA_CT_NAT_PORT_MIN] = { .type = NLA_U16 },
[TCA_CT_NAT_PORT_MAX] = { .type = NLA_U16 },
+ [TCA_CT_HELPER_NAME] = { .type = NLA_STRING, .len = NF_CT_HELPER_NAME_LEN },
+ [TCA_CT_HELPER_FAMILY] = { .type = NLA_U8 },
+ [TCA_CT_HELPER_PROTO] = { .type = NLA_U8 },
};
static int tcf_ct_fill_params_nat(struct tcf_ct_params *p,
@@ -1138,9 +1183,8 @@ static int tcf_ct_fill_params_nat(struct tcf_ct_params *p,
range->min_addr.ip =
nla_get_in_addr(tb[TCA_CT_NAT_IPV4_MIN]);
- range->max_addr.ip = max_attr ?
- nla_get_in_addr(max_attr) :
- range->min_addr.ip;
+ range->max_addr.ip =
+ nla_get_in_addr_default(max_attr, range->min_addr.ip);
} else if (tb[TCA_CT_NAT_IPV6_MIN]) {
struct nlattr *max_attr = tb[TCA_CT_NAT_IPV6_MAX];
@@ -1190,10 +1234,11 @@ static int tcf_ct_fill_params(struct net *net,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
- struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
struct nf_conntrack_zone zone;
+ int err, family, proto, len;
+ bool put_labels = false;
struct nf_conn *tmpl;
- int err;
+ char *name;
p->zone = NF_CT_DEFAULT_ZONE_ID;
@@ -1221,15 +1266,20 @@ static int tcf_ct_fill_params(struct net *net,
}
if (tb[TCA_CT_LABELS]) {
+ unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8;
+
if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) {
NL_SET_ERR_MSG_MOD(extack, "Conntrack labels isn't enabled.");
return -EOPNOTSUPP;
}
- if (!tn->labels) {
+ if (nf_connlabels_get(net, n_bits - 1)) {
NL_SET_ERR_MSG_MOD(extack, "Failed to set connlabel length");
return -EOPNOTSUPP;
+ } else {
+ put_labels = true;
}
+
tcf_ct_set_key_val(tb,
p->labels, TCA_CT_LABELS,
p->labels_mask, TCA_CT_LABELS_MASK,
@@ -1254,10 +1304,38 @@ static int tcf_ct_fill_params(struct net *net,
NL_SET_ERR_MSG_MOD(extack, "Failed to allocate conntrack template");
return -ENOMEM;
}
- __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
p->tmpl = tmpl;
+ if (tb[TCA_CT_HELPER_NAME]) {
+ name = nla_data(tb[TCA_CT_HELPER_NAME]);
+ len = nla_len(tb[TCA_CT_HELPER_NAME]);
+ if (len > 16 || name[len - 1] != '\0') {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to parse helper name.");
+ err = -EINVAL;
+ goto err;
+ }
+ family = nla_get_u8_default(tb[TCA_CT_HELPER_FAMILY], AF_INET);
+ proto = nla_get_u8_default(tb[TCA_CT_HELPER_PROTO],
+ IPPROTO_TCP);
+ err = nf_ct_add_helper(tmpl, name, family, proto,
+ p->ct_action & TCA_CT_ACT_NAT, &p->helper);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to add helper");
+ goto err;
+ }
+ }
+
+ p->put_labels = put_labels;
+ if (p->ct_action & TCA_CT_ACT_COMMIT)
+ __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
return 0;
+err:
+ if (put_labels)
+ nf_connlabels_put(net);
+
+ nf_ct_put(p->tmpl);
+ p->tmpl = NULL;
+ return err;
}
static int tcf_ct_init(struct net *net, struct nlattr *nla,
@@ -1265,7 +1343,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, ct_net_id);
+ struct tc_action_net *tn = net_generic(net, act_ct_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_ct_params *params = NULL;
struct nlattr *tb[TCA_CT_MAX + 1];
@@ -1304,7 +1382,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
res = ACT_P_CREATED;
} else {
if (bind)
- return 0;
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
@@ -1327,10 +1405,11 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
if (err)
goto cleanup;
- err = tcf_ct_flow_table_get(params);
+ err = tcf_ct_flow_table_get(net, params);
if (err)
goto cleanup;
+ params->action = parm->action;
spin_lock_bh(&c->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
params = rcu_replace_pointer(c->params, params,
@@ -1340,14 +1419,15 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
if (params)
- call_rcu(&params->rcu, tcf_ct_params_free);
+ call_rcu(&params->rcu, tcf_ct_params_free_rcu);
return res;
cleanup:
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
- kfree(params);
+ if (params)
+ tcf_ct_params_free(params);
tcf_idr_release(*a, bind);
return err;
}
@@ -1359,12 +1439,12 @@ static void tcf_ct_cleanup(struct tc_action *a)
params = rcu_dereference_protected(c->params, 1);
if (params)
- call_rcu(&params->rcu, tcf_ct_params_free);
+ call_rcu(&params->rcu, tcf_ct_params_free_rcu);
}
static int tcf_ct_dump_key_val(struct sk_buff *skb,
- void *val, int val_type,
- void *mask, int mask_type,
+ const void *val, int val_type,
+ const void *mask, int mask_type,
int len)
{
int err;
@@ -1385,9 +1465,9 @@ static int tcf_ct_dump_key_val(struct sk_buff *skb,
return 0;
}
-static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p)
+static int tcf_ct_dump_nat(struct sk_buff *skb, const struct tcf_ct_params *p)
{
- struct nf_nat_range2 *range = &p->range;
+ const struct nf_nat_range2 *range = &p->range;
if (!(p->ct_action & TCA_CT_ACT_NAT))
return 0;
@@ -1425,13 +1505,26 @@ static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p)
return 0;
}
+static int tcf_ct_dump_helper(struct sk_buff *skb,
+ const struct nf_conntrack_helper *helper)
+{
+ if (!helper)
+ return 0;
+
+ if (nla_put_string(skb, TCA_CT_HELPER_NAME, helper->name) ||
+ nla_put_u8(skb, TCA_CT_HELPER_FAMILY, helper->tuple.src.l3num) ||
+ nla_put_u8(skb, TCA_CT_HELPER_PROTO, helper->tuple.dst.protonum))
+ return -1;
+
+ return 0;
+}
+
static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
- struct tcf_ct *c = to_ct(a);
- struct tcf_ct_params *p;
-
+ const struct tcf_ct *c = to_ct(a);
+ const struct tcf_ct_params *p;
struct tc_ct opt = {
.index = c->tcf_index,
.refcnt = refcount_read(&c->tcf_refcnt) - ref,
@@ -1439,10 +1532,9 @@ static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
};
struct tcf_t t;
- spin_lock_bh(&c->tcf_lock);
- p = rcu_dereference_protected(c->params,
- lockdep_is_held(&c->tcf_lock));
- opt.action = c->tcf_action;
+ rcu_read_lock();
+ p = rcu_dereference(c->params);
+ opt.action = p->action;
if (tcf_ct_dump_key_val(skb,
&p->ct_action, TCA_CT_ACTION,
@@ -1477,6 +1569,9 @@ static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
if (tcf_ct_dump_nat(skb, p))
goto nla_put_failure;
+ if (tcf_ct_dump_helper(skb, p->helper))
+ goto nla_put_failure;
+
skip_dump:
if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -1484,32 +1579,15 @@ skip_dump:
tcf_tm_dump(&t, &c->tcf_tm);
if (nla_put_64bit(skb, TCA_CT_TM, sizeof(t), &t, TCA_CT_PAD))
goto nla_put_failure;
- spin_unlock_bh(&c->tcf_lock);
+ rcu_read_unlock();
return skb->len;
nla_put_failure:
- spin_unlock_bh(&c->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
-static int tcf_ct_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, ct_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, ct_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets,
u64 drops, u64 lastuse, bool hw)
{
@@ -1520,11 +1598,15 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets,
}
static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
+ if (tcf_ct_helper(act))
+ return -EOPNOTSUPP;
+
entry->id = FLOW_ACTION_CT;
entry->ct.action = tcf_ct_action(act);
entry->ct.zone = tcf_ct_zone(act);
@@ -1547,48 +1629,28 @@ static struct tc_action_ops act_ct_ops = {
.dump = tcf_ct_dump,
.init = tcf_ct_init,
.cleanup = tcf_ct_cleanup,
- .walk = tcf_ct_walker,
- .lookup = tcf_ct_search,
.stats_update = tcf_stats_update,
.offload_act_setup = tcf_ct_offload_act_setup,
.size = sizeof(struct tcf_ct),
};
+MODULE_ALIAS_NET_ACT("ct");
static __net_init int ct_init_net(struct net *net)
{
- unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8;
- struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
-
- if (nf_connlabels_get(net, n_bits - 1)) {
- tn->labels = false;
- pr_err("act_ct: Failed to set connlabels length");
- } else {
- tn->labels = true;
- }
+ struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id);
return tc_action_net_init(net, &tn->tn, &act_ct_ops);
}
static void __net_exit ct_exit_net(struct list_head *net_list)
{
- struct net *net;
-
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list) {
- struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
-
- if (tn->labels)
- nf_connlabels_put(net);
- }
- rtnl_unlock();
-
- tc_action_net_exit(net_list, ct_net_id);
+ tc_action_net_exit(net_list, act_ct_ops.net_id);
}
static struct pernet_operations ct_net_ops = {
.init = ct_init_net,
.exit_batch = ct_exit_net,
- .id = &ct_net_id,
+ .id = &act_ct_ops.net_id,
.size = sizeof(struct tc_ct_action_net),
};
@@ -1608,16 +1670,10 @@ static int __init ct_init_module(void)
if (err)
goto err_register;
- err = nf_ct_extend_register(&act_ct_extend);
- if (err)
- goto err_register_extend;
-
static_branch_inc(&tcf_frag_xmit_count);
return 0;
-err_register_extend:
- tcf_unregister_action(&act_ct_ops, &ct_net_ops);
err_register:
tcf_ct_flow_tables_uninit();
err_tbl_init:
@@ -1628,7 +1684,6 @@ err_tbl_init:
static void __exit ct_cleanup_module(void)
{
static_branch_dec(&tcf_frag_xmit_count);
- nf_ct_extend_unregister(&act_ct_extend);
tcf_unregister_action(&act_ct_ops, &ct_net_ops);
tcf_ct_flow_tables_uninit();
destroy_workqueue(act_ct_wq);