summaryrefslogtreecommitdiff
path: root/net/openvswitch/conntrack.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/openvswitch/conntrack.c')
-rw-r--r--net/openvswitch/conntrack.c530
1 files changed, 121 insertions, 409 deletions
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c07afff57dd3..a0811e1fba65 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -9,6 +9,7 @@
#include <linux/udp.h>
#include <linux/sctp.h>
#include <linux/static_key.h>
+#include <linux/string_helpers.h>
#include <net/ip.h>
#include <net/genetlink.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -28,6 +29,7 @@
#include <net/netfilter/nf_conntrack_act_ct.h>
#include "datapath.h"
+#include "drop.h"
#include "conntrack.h"
#include "flow.h"
#include "flow_netlink.h"
@@ -152,7 +154,7 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
static u32 ovs_ct_get_mark(const struct nf_conn *ct)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
- return ct ? ct->mark : 0;
+ return ct ? READ_ONCE(ct->mark) : 0;
#else
return 0;
#endif
@@ -166,8 +168,13 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct)
static void ovs_ct_get_labels(const struct nf_conn *ct,
struct ovs_key_ct_labels *labels)
{
- struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
+ struct nf_conn_labels *cl = NULL;
+ if (ct) {
+ if (ct->master && !nf_ct_is_confirmed(ct))
+ ct = ct->master;
+ cl = nf_ct_labels_find(ct);
+ }
if (cl)
memcpy(labels, cl->bits, OVS_CT_LABELS_LEN);
else
@@ -340,9 +347,9 @@ static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key,
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
u32 new_mark;
- new_mark = ct_mark | (ct->mark & ~(mask));
- if (ct->mark != new_mark) {
- ct->mark = new_mark;
+ new_mark = ct_mark | (READ_ONCE(ct->mark) & ~(mask));
+ if (READ_ONCE(ct->mark) != new_mark) {
+ WRITE_ONCE(ct->mark, new_mark);
if (nf_ct_is_confirmed(ct))
nf_conntrack_event_cache(IPCT_MARK, ct);
key->ct.mark = new_mark;
@@ -434,155 +441,26 @@ static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key,
return 0;
}
-/* 'skb' should already be pulled to nh_ofs. */
-static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
-{
- const struct nf_conntrack_helper *helper;
- const struct nf_conn_help *help;
- enum ip_conntrack_info ctinfo;
- unsigned int protoff;
- struct nf_conn *ct;
- int err;
-
- ct = nf_ct_get(skb, &ctinfo);
- if (!ct || ctinfo == IP_CT_RELATED_REPLY)
- return NF_ACCEPT;
-
- help = nfct_help(ct);
- if (!help)
- return NF_ACCEPT;
-
- helper = rcu_dereference(help->helper);
- if (!helper)
- return NF_ACCEPT;
-
- switch (proto) {
- case NFPROTO_IPV4:
- protoff = ip_hdrlen(skb);
- break;
- case NFPROTO_IPV6: {
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- __be16 frag_off;
- int ofs;
-
- ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
- &frag_off);
- if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
- pr_debug("proto header not found\n");
- return NF_ACCEPT;
- }
- protoff = ofs;
- break;
- }
- default:
- WARN_ONCE(1, "helper invoked on non-IP family!");
- return NF_DROP;
- }
-
- err = helper->help(skb, protoff, ct, ctinfo);
- if (err != NF_ACCEPT)
- return err;
-
- /* Adjust seqs after helper. This is needed due to some helpers (e.g.,
- * FTP with NAT) adusting the TCP payload size when mangling IP
- * addresses and/or port numbers in the text-based control connection.
- */
- if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
- !nf_ct_seq_adjust(skb, ct, ctinfo, protoff))
- return NF_DROP;
- return NF_ACCEPT;
-}
-
-/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
- * value if 'skb' is freed.
- */
-static int handle_fragments(struct net *net, struct sw_flow_key *key,
- u16 zone, struct sk_buff *skb)
+static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key,
+ u16 zone, int family, struct sk_buff *skb)
{
struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
int err;
- if (key->eth.type == htons(ETH_P_IP)) {
- enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- err = ip_defrag(net, skb, user);
- if (err)
- return err;
-
- ovs_cb.mru = IPCB(skb)->frag_max_size;
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- } else if (key->eth.type == htons(ETH_P_IPV6)) {
- enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
- err = nf_ct_frag6_gather(net, skb, user);
- if (err) {
- if (err != -EINPROGRESS)
- kfree_skb(skb);
- return err;
- }
-
- key->ip.proto = ipv6_hdr(skb)->nexthdr;
- ovs_cb.mru = IP6CB(skb)->frag_max_size;
-#endif
- } else {
- kfree_skb(skb);
- return -EPFNOSUPPORT;
- }
+ err = nf_ct_handle_fragments(net, skb, zone, family, &key->ip.proto, &ovs_cb.mru);
+ if (err)
+ return err;
/* The key extracted from the fragment that completed this datagram
* likely didn't have an L4 header, so regenerate it.
*/
ovs_flow_key_update_l3l4(skb, key);
-
key->ip.frag = OVS_FRAG_TYPE_NONE;
- skb_clear_hash(skb);
- skb->ignore_df = 1;
*OVS_CB(skb) = ovs_cb;
return 0;
}
-static struct nf_conntrack_expect *
-ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
- u16 proto, const struct sk_buff *skb)
-{
- struct nf_conntrack_tuple tuple;
- struct nf_conntrack_expect *exp;
-
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
- return NULL;
-
- exp = __nf_ct_expect_find(net, zone, &tuple);
- if (exp) {
- struct nf_conntrack_tuple_hash *h;
-
- /* Delete existing conntrack entry, if it clashes with the
- * expectation. This can happen since conntrack ALGs do not
- * check for clashes between (new) expectations and existing
- * conntrack entries. nf_conntrack_in() will check the
- * expectations only if a conntrack entry can not be found,
- * which can lead to OVS finding the expectation (here) in the
- * init direction, but which will not be removed by the
- * nf_conntrack_in() call, if a matching conntrack entry is
- * found instead. In this case all init direction packets
- * would be reported as new related packets, while reply
- * direction packets would be reported as un-related
- * established packets.
- */
- h = nf_conntrack_find_get(net, zone, &tuple);
- if (h) {
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-
- nf_ct_delete(ct, 0, 0);
- nf_ct_put(ct);
- }
- }
-
- return exp;
-}
-
/* This replicates logic from nf_conntrack_core.c that is not exported. */
static enum ip_conntrack_info
ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
@@ -734,88 +612,6 @@ static bool skb_nfct_cached(struct net *net,
}
#if IS_ENABLED(CONFIG_NF_NAT)
-/* Modelled after nf_nat_ipv[46]_fn().
- * range is only used for new, uninitialized NAT state.
- * Returns either NF_ACCEPT or NF_DROP.
- */
-static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype)
-{
- int hooknum, nh_off, err = NF_ACCEPT;
-
- nh_off = skb_network_offset(skb);
- skb_pull_rcsum(skb, nh_off);
-
- /* See HOOK2MANIP(). */
- if (maniptype == NF_NAT_MANIP_SRC)
- hooknum = NF_INET_LOCAL_IN; /* Source NAT */
- else
- hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED_REPLY:
- if (IS_ENABLED(CONFIG_NF_NAT) &&
- skb->protocol == htons(ETH_P_IP) &&
- ip_hdr(skb)->protocol == IPPROTO_ICMP) {
- if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
- hooknum))
- err = NF_DROP;
- goto push;
- } else if (IS_ENABLED(CONFIG_IPV6) &&
- skb->protocol == htons(ETH_P_IPV6)) {
- __be16 frag_off;
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- int hdrlen = ipv6_skip_exthdr(skb,
- sizeof(struct ipv6hdr),
- &nexthdr, &frag_off);
-
- if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
- if (!nf_nat_icmpv6_reply_translation(skb, ct,
- ctinfo,
- hooknum,
- hdrlen))
- err = NF_DROP;
- goto push;
- }
- }
- /* Non-ICMP, fall thru to initialize if needed. */
- fallthrough;
- case IP_CT_NEW:
- /* Seen it before? This can happen for loopback, retrans,
- * or local packets.
- */
- if (!nf_nat_initialized(ct, maniptype)) {
- /* Initialize according to the NAT action. */
- err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
- /* Action is set up to establish a new
- * mapping.
- */
- ? nf_nat_setup_info(ct, range, maniptype)
- : nf_nat_alloc_null_binding(ct, hooknum);
- if (err != NF_ACCEPT)
- goto push;
- }
- break;
-
- case IP_CT_ESTABLISHED:
- case IP_CT_ESTABLISHED_REPLY:
- break;
-
- default:
- err = NF_DROP;
- goto push;
- }
-
- err = nf_nat_packet(ct, ctinfo, hooknum, skb);
-push:
- skb_push_rcsum(skb, nh_off);
-
- return err;
-}
-
static void ovs_nat_update_key(struct sw_flow_key *key,
const struct sk_buff *skb,
enum nf_nat_manip_type maniptype)
@@ -873,59 +669,23 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
- enum nf_nat_manip_type maniptype;
- int err;
+ int err, action = 0;
- /* Add NAT extension if not confirmed yet. */
- if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
- return NF_ACCEPT; /* Can't NAT. */
+ if (!(info->nat & OVS_CT_NAT))
+ return NF_ACCEPT;
+ if (info->nat & OVS_CT_SRC_NAT)
+ action |= BIT(NF_NAT_MANIP_SRC);
+ if (info->nat & OVS_CT_DST_NAT)
+ action |= BIT(NF_NAT_MANIP_DST);
- /* Determine NAT type.
- * Check if the NAT type can be deduced from the tracked connection.
- * Make sure new expected connections (IP_CT_RELATED) are NATted only
- * when committing.
- */
- if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW &&
- ct->status & IPS_NAT_MASK &&
- (ctinfo != IP_CT_RELATED || info->commit)) {
- /* NAT an established or related connection like before. */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
- /* This is the REPLY direction for a connection
- * for which NAT was applied in the forward
- * direction. Do the reverse NAT.
- */
- maniptype = ct->status & IPS_SRC_NAT
- ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
- else
- maniptype = ct->status & IPS_SRC_NAT
- ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
- } else if (info->nat & OVS_CT_SRC_NAT) {
- maniptype = NF_NAT_MANIP_SRC;
- } else if (info->nat & OVS_CT_DST_NAT) {
- maniptype = NF_NAT_MANIP_DST;
- } else {
- return NF_ACCEPT; /* Connection is not NATed. */
- }
- err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
-
- if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
- if (ct->status & IPS_SRC_NAT) {
- if (maniptype == NF_NAT_MANIP_SRC)
- maniptype = NF_NAT_MANIP_DST;
- else
- maniptype = NF_NAT_MANIP_SRC;
-
- err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,
- maniptype);
- } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
- err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL,
- NF_NAT_MANIP_SRC);
- }
- }
+ err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit);
+ if (err != NF_ACCEPT)
+ return err;
- /* Mark NAT done if successful and update the flow key. */
- if (err == NF_ACCEPT)
- ovs_nat_update_key(key, skb, maniptype);
+ if (action & BIT(NF_NAT_MANIP_SRC))
+ ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC);
+ if (action & BIT(NF_NAT_MANIP_DST))
+ ovs_nat_update_key(key, skb, NF_NAT_MANIP_DST);
return err;
}
@@ -939,6 +699,22 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
}
#endif
+static int verdict_to_errno(unsigned int verdict)
+{
+ switch (verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ return 0;
+ case NF_DROP:
+ return -EINVAL;
+ case NF_STOLEN:
+ return -EINPROGRESS;
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
/* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
* not done already. Update key with new CT state after passing the packet
* through conntrack.
@@ -977,7 +753,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
err = nf_conntrack_in(skb, &state);
if (err != NF_ACCEPT)
- return -ENOENT;
+ return verdict_to_errno(err);
/* Clear CT state NAT flags to mark that we have not yet done
* NAT after the nf_conntrack_in() call. We can actually clear
@@ -1004,9 +780,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
* the key->ct_state.
*/
if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) &&
- (nf_ct_is_confirmed(ct) || info->commit) &&
- ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) {
- return -EINVAL;
+ (nf_ct_is_confirmed(ct) || info->commit)) {
+ int err = ovs_ct_nat(net, key, info, skb, ct, ctinfo);
+
+ err = verdict_to_errno(err);
+ if (err)
+ return err;
}
/* Userspace may decide to perform a ct lookup without a helper
@@ -1015,7 +794,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
* connections which we will commit, we may need to attach
* the helper here.
*/
- if (info->commit && info->helper && !nfct_help(ct)) {
+ if (!nf_ct_is_confirmed(ct) && info->commit &&
+ info->helper && !nfct_help(ct)) {
int err = __nf_ct_try_assign_helper(ct, info->ct,
GFP_ATOMIC);
if (err)
@@ -1036,9 +816,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
* - When committing an unconfirmed connection.
*/
if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
- info->commit) &&
- ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
- return -EINVAL;
+ info->commit)) {
+ int err = nf_ct_helper(skb, ct, ctinfo, info->family);
+
+ err = verdict_to_errno(err);
+ if (err)
+ return err;
}
if (nf_ct_protonum(ct) == IPPROTO_TCP &&
@@ -1060,36 +843,16 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
struct sk_buff *skb)
{
- struct nf_conntrack_expect *exp;
-
- /* If we pass an expected packet through nf_conntrack_in() the
- * expectation is typically removed, but the packet could still be
- * lost in upcall processing. To prevent this from happening we
- * perform an explicit expectation lookup. Expected connections are
- * always new, and will be passed through conntrack only when they are
- * committed, as it is OK to remove the expectation at that time.
- */
- exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
- if (exp) {
- u8 state;
-
- /* NOTE: New connections are NATted and Helped only when
- * committed, so we are not calling into NAT here.
- */
- state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
- __ovs_ct_update_key(key, state, &info->zone, exp->master);
- } else {
- struct nf_conn *ct;
- int err;
+ struct nf_conn *ct;
+ int err;
- err = __ovs_ct_lookup(net, key, info, skb);
- if (err)
- return err;
+ err = __ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ return err;
- ct = (struct nf_conn *)skb_nfct(skb);
- if (ct)
- nf_ct_deliver_cached_events(ct);
- }
+ ct = (struct nf_conn *)skb_nfct(skb);
+ if (ct)
+ nf_ct_deliver_cached_events(ct);
return 0;
}
@@ -1165,8 +928,8 @@ static u32 ct_limit_get(const struct ovs_ct_limit_info *info, u16 zone)
}
static int ovs_ct_check_limit(struct net *net,
- const struct ovs_conntrack_info *info,
- const struct nf_conntrack_tuple *tuple)
+ const struct sk_buff *skb,
+ const struct ovs_conntrack_info *info)
{
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
const struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
@@ -1179,8 +942,9 @@ static int ovs_ct_check_limit(struct net *net,
if (per_zone_limit == OVS_CT_LIMIT_UNLIMITED)
return 0;
- connections = nf_conncount_count(net, ct_limit_info->data,
- &conncount_key, tuple, &info->zone);
+ connections = nf_conncount_count_skb(net, skb, info->family,
+ ct_limit_info->data,
+ &conncount_key);
if (connections > per_zone_limit)
return -ENOMEM;
@@ -1209,8 +973,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
if (static_branch_unlikely(&ovs_ct_limit_enabled)) {
if (!nf_ct_is_confirmed(ct)) {
- err = ovs_ct_check_limit(net, info,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ err = ovs_ct_check_limit(net, skb, info);
if (err) {
net_warn_ratelimited("openvswitch: zone: %u "
"exceeds conntrack limit\n",
@@ -1251,7 +1014,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
if (err)
return err;
- nf_conn_act_ct_ext_add(ct);
+ nf_conn_act_ct_ext_add(skb, ct, ctinfo);
} else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
labels_nonzero(&info->labels.mask)) {
err = ovs_ct_set_labels(ct, key, &info->labels.value,
@@ -1262,40 +1025,9 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
/* This will take care of sending queued events even if the connection
* is already confirmed.
*/
- if (nf_conntrack_confirm(skb) != NF_ACCEPT)
- return -EINVAL;
-
- return 0;
-}
-
-/* Trim the skb to the length specified by the IP/IPv6 header,
- * removing any trailing lower-layer padding. This prepares the skb
- * for higher-layer processing that assumes skb->len excludes padding
- * (such as nf_ip_checksum). The caller needs to pull the skb to the
- * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
- */
-static int ovs_skb_network_trim(struct sk_buff *skb)
-{
- unsigned int len;
- int err;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- len = ntohs(ip_hdr(skb)->tot_len);
- break;
- case htons(ETH_P_IPV6):
- len = sizeof(struct ipv6hdr)
- + ntohs(ipv6_hdr(skb)->payload_len);
- break;
- default:
- len = skb->len;
- }
-
- err = pskb_trim_rcsum(skb, len);
- if (err)
- kfree_skb(skb);
+ err = nf_conntrack_confirm(skb);
- return err;
+ return verdict_to_errno(err);
}
/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
@@ -1312,12 +1044,15 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
nh_ofs = skb_network_offset(skb);
skb_pull_rcsum(skb, nh_ofs);
- err = ovs_skb_network_trim(skb);
- if (err)
+ err = nf_ct_skb_network_trim(skb, info->family);
+ if (err) {
+ kfree_skb(skb);
return err;
+ }
if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
- err = handle_fragments(net, key, info->zone.id, skb);
+ err = ovs_ct_handle_fragments(net, key, info->zone.id,
+ info->family, skb);
if (err)
return err;
}
@@ -1327,9 +1062,13 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
else
err = ovs_ct_lookup(net, key, info, skb);
+ /* conntrack core returned NF_STOLEN */
+ if (err == -EINPROGRESS)
+ return err;
+
skb_push_rcsum(skb, nh_ofs);
if (err)
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK);
return err;
}
@@ -1342,46 +1081,11 @@ int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
nf_ct_put(ct);
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
- ovs_ct_fill_key(skb, key, false);
-
- return 0;
-}
-
-static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
- const struct sw_flow_key *key, bool log)
-{
- struct nf_conntrack_helper *helper;
- struct nf_conn_help *help;
- int ret = 0;
-
- helper = nf_conntrack_helper_try_module_get(name, info->family,
- key->ip.proto);
- if (!helper) {
- OVS_NLERR(log, "Unknown helper \"%s\"", name);
- return -EINVAL;
- }
- help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL);
- if (!help) {
- nf_conntrack_helper_put(helper);
- return -ENOMEM;
- }
+ if (key)
+ ovs_ct_fill_key(skb, key, false);
-#if IS_ENABLED(CONFIG_NF_NAT)
- if (info->nat) {
- ret = nf_nat_helper_try_module_get(name, info->family,
- key->ip.proto);
- if (ret) {
- nf_conntrack_helper_put(helper);
- OVS_NLERR(log, "Failed to load \"%s\" NAT helper, error: %d",
- name, ret);
- return ret;
- }
- }
-#endif
- rcu_assign_pointer(help->helper, helper);
- info->helper = helper;
- return ret;
+ return 0;
}
#if IS_ENABLED(CONFIG_NF_NAT)
@@ -1596,7 +1300,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
#endif
case OVS_CT_ATTR_HELPER:
*helper = nla_data(a);
- if (!memchr(*helper, '\0', nla_len(a))) {
+ if (!string_is_terminated(*helper, nla_len(a))) {
OVS_NLERR(log, "Invalid conntrack helper");
return -EINVAL;
}
@@ -1617,7 +1321,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
case OVS_CT_ATTR_TIMEOUT:
memcpy(info->timeout, nla_data(a), nla_len(a));
- if (!memchr(info->timeout, '\0', nla_len(a))) {
+ if (!string_is_terminated(info->timeout, nla_len(a))) {
OVS_NLERR(log, "Invalid conntrack timeout");
return -EINVAL;
}
@@ -1708,8 +1412,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (ct_info.timeout[0]) {
if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
ct_info.timeout))
- pr_info_ratelimited("Failed to associated timeout "
- "policy `%s'\n", ct_info.timeout);
+ OVS_NLERR(log,
+ "Failed to associated timeout policy '%s'",
+ ct_info.timeout);
else
ct_info.nf_ct_timeout = rcu_dereference(
nf_ct_timeout_find(ct_info.ct)->timeout);
@@ -1717,9 +1422,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
}
if (helper) {
- err = ovs_ct_add_helper(&ct_info, helper, key, log);
- if (err)
+ err = nf_ct_add_helper(ct_info.ct, helper, ct_info.family,
+ key->ip.proto, ct_info.nat, &ct_info.helper);
+ if (err) {
+ OVS_NLERR(log, "Failed to add %s helper %d", helper, err);
goto err_free_ct;
+ }
}
err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
@@ -1727,7 +1435,8 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (err)
goto err_free_ct;
- __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
+ if (ct_info.commit)
+ __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
return 0;
err_free_ct:
__ovs_ct_free_action(&ct_info);
@@ -1894,8 +1603,7 @@ static int ovs_ct_limit_init(struct net *net, struct ovs_net *ovs_net)
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&ovs_net->ct_limit_info->limits[i]);
- ovs_net->ct_limit_info->data =
- nf_conncount_init(net, NFPROTO_INET, sizeof(u32));
+ ovs_net->ct_limit_info->data = nf_conncount_init(net, sizeof(u32));
if (IS_ERR(ovs_net->ct_limit_info->data)) {
err = PTR_ERR(ovs_net->ct_limit_info->data);
@@ -1912,13 +1620,13 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net)
const struct ovs_ct_limit_info *info = ovs_net->ct_limit_info;
int i;
- nf_conncount_destroy(net, NFPROTO_INET, info->data);
+ nf_conncount_destroy(net, info->data);
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
struct hlist_head *head = &info->limits[i];
struct ovs_ct_limit *ct_limit;
+ struct hlist_node *next;
- hlist_for_each_entry_rcu(ct_limit, head, hlist_node,
- lockdep_ovsl_is_held())
+ hlist_for_each_entry_safe(ct_limit, next, head, hlist_node)
kfree_rcu(ct_limit, rcu);
}
kfree(info->limits);
@@ -1929,7 +1637,7 @@ static struct sk_buff *
ovs_ct_limit_cmd_reply_start(struct genl_info *info, u8 cmd,
struct ovs_header **ovs_reply_header)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sk_buff *skb;
skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -1980,7 +1688,8 @@ static int ovs_ct_limit_set_zone_limit(struct nlattr *nla_zone_limit,
} else {
struct ovs_ct_limit *ct_limit;
- ct_limit = kmalloc(sizeof(*ct_limit), GFP_KERNEL);
+ ct_limit = kmalloc(sizeof(*ct_limit),
+ GFP_KERNEL_ACCOUNT);
if (!ct_limit)
return -ENOMEM;
@@ -2061,8 +1770,8 @@ static int __ovs_ct_limit_get_zone_limit(struct net *net,
zone_limit.limit = limit;
nf_ct_zone_init(&ct_zone, zone_id, NF_CT_DEFAULT_ZONE_DIR, 0);
- zone_limit.count = nf_conncount_count(net, data, &conncount_key, NULL,
- &ct_zone);
+ zone_limit.count = nf_conncount_count_skb(net, NULL, 0, data,
+ &conncount_key);
return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit);
}
@@ -2250,14 +1959,16 @@ exit_err:
static const struct genl_small_ops ct_limit_genl_ops[] = {
{ .cmd = OVS_CT_LIMIT_CMD_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
- * privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+ * privilege.
+ */
.doit = ovs_ct_limit_cmd_set,
},
{ .cmd = OVS_CT_LIMIT_CMD_DEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
- * privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+ * privilege.
+ */
.doit = ovs_ct_limit_cmd_del,
},
{ .cmd = OVS_CT_LIMIT_CMD_GET,
@@ -2281,6 +1992,7 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
.parallel_ops = true,
.small_ops = ct_limit_genl_ops,
.n_small_ops = ARRAY_SIZE(ct_limit_genl_ops),
+ .resv_start_op = OVS_CT_LIMIT_CMD_GET + 1,
.mcgrps = &ovs_ct_limit_multicast_group,
.n_mcgrps = 1,
.module = THIS_MODULE,