28 files changed, 13493 insertions, 3425 deletions
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 27ee56b688a3..e6aaee92dba4 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -1,10 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Open vSwitch
 #
 
 config OPENVSWITCH
 	tristate "Open vSwitch"
-	---help---
+	depends on INET
+	depends on !NF_CONNTRACK || \
+		   (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \
+				     (!NF_NAT || NF_NAT) && \
+				     (!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT)))
+	depends on PSAMPLE || !PSAMPLE
+	select MPLS
+	select NET_CRC32C
+	select NET_MPLS_GSO
+	select DST_CACHE
+	select NET_NSH
+	select NF_CONNTRACK_OVS if NF_CONNTRACK
+	select NF_NAT_OVS if NF_NAT
+	help
 	  Open vSwitch is a multilayer Ethernet switch targeted at virtualized
 	  environments.  In addition to supporting a variety of features
 	  expected in a traditional hardware switch, it enables fine-grained
@@ -28,15 +42,36 @@ config OPENVSWITCH
 	  If unsure, say N.
 
 config OPENVSWITCH_GRE
-	bool "Open vSwitch GRE tunneling support"
-	depends on INET
+	tristate "Open vSwitch GRE tunneling support"
 	depends on OPENVSWITCH
-	depends on NET_IPGRE_DEMUX && !(OPENVSWITCH=y && NET_IPGRE_DEMUX=m)
-	default y
-	---help---
+	depends on NET_IPGRE
+	default OPENVSWITCH
+	help
 	  If you say Y here, then the Open vSwitch will be able create GRE
 	  vport.
 
 	  Say N to exclude this support and reduce the binary size.
 
 	  If unsure, say Y.
+
+config OPENVSWITCH_VXLAN
+	tristate "Open vSwitch VXLAN tunneling support"
+	depends on OPENVSWITCH
+	depends on VXLAN
+	default OPENVSWITCH
+	help
+	  If you say Y here, then the Open vSwitch will be able create vxlan vport.
+
+	  Say N to exclude this support and reduce the binary size.
+
+	  If unsure, say Y.
+
+config OPENVSWITCH_GENEVE
+	tristate "Open vSwitch Geneve tunneling support"
+	depends on OPENVSWITCH
+	depends on GENEVE
+	default OPENVSWITCH
+	help
+	  If you say Y here, then the Open vSwitch will be able create geneve vport.
+
+	  Say N to exclude this support and reduce the binary size.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 01bddb2991e3..28982630bef3 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for Open vSwitch.
 #
@@ -9,7 +10,20 @@ openvswitch-y := \
 	datapath.o \
 	dp_notify.o \
 	flow.o \
+	flow_netlink.o \
+	flow_table.o \
+	meter.o \
+	openvswitch_trace.o \
 	vport.o \
-	vport-gre.o \
 	vport-internal_dev.o \
 	vport-netdev.o
+
+ifneq ($(CONFIG_NF_CONNTRACK),)
+openvswitch-y += conntrack.o
+endif
+
+obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o
+obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
+obj-$(CONFIG_OPENVSWITCH_GRE)	+= vport-gre.o
+
+CFLAGS_openvswitch_trace.o = -I$(src)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 22c5f399f1cf..792ca44a461d 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
+ * Copyright (c) 2007-2017 Nicira, Inc.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -22,148 +9,348 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/openvswitch.h>
+#include <linux/sctp.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/in6.h>
 #include <linux/if_arp.h>
 #include <linux/if_vlan.h>
+
+#include <net/dst.h>
+#include <net/gso.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/ip6_fib.h>
 #include <net/checksum.h>
 #include <net/dsfield.h>
+#include <net/mpls.h>
+
+#if IS_ENABLED(CONFIG_PSAMPLE)
+#include <net/psample.h>
+#endif
+
+#include <net/sctp/checksum.h>
 
 #include "datapath.h"
+#include "drop.h"
+#include "flow.h"
+#include "conntrack.h"
 #include "vport.h"
+#include "flow_netlink.h"
+#include "openvswitch_trace.h"
+
+struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
+
+/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
+ * space. Return NULL if out of key spaces.
+ */
+static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
+{
+	struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
+	struct action_flow_keys *keys = &ovs_pcpu->flow_keys;
+	int level = ovs_pcpu->exec_level;
+	struct sw_flow_key *key = NULL;
+
+	if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
+		key = &keys->key[level - 1];
+		*key = *key_;
+	}
+
+	return key;
+}
+
+static void action_fifo_init(struct action_fifo *fifo)
+{
+	fifo->head = 0;
+	fifo->tail = 0;
+}
+
+static bool action_fifo_is_empty(const struct action_fifo *fifo)
+{
+	return (fifo->head == fifo->tail);
+}
+
+static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
+{
+	if (action_fifo_is_empty(fifo))
+		return NULL;
+
+	return &fifo->fifo[fifo->tail++];
+}
+
+static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
+{
+	if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
+		return NULL;
+
+	return &fifo->fifo[fifo->head++];
+}
+
+/* Return true if fifo is not full */
+static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
+				    const struct sw_flow_key *key,
+				    const struct nlattr *actions,
+				    const int actions_len)
+{
+	struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
+	struct deferred_action *da;
+
+	da = action_fifo_put(fifo);
+	if (da) {
+		da->skb = skb;
+		da->actions = actions;
+		da->actions_len = actions_len;
+		da->pkt_key = *key;
+	}
+
+	return da;
+}
+
+static void invalidate_flow_key(struct sw_flow_key *key)
+{
+	key->mac_proto |= SW_FLOW_KEY_INVALID;
+}
+
+static bool is_flow_key_valid(const struct sw_flow_key *key)
+{
+	return !(key->mac_proto & SW_FLOW_KEY_INVALID);
+}
+
+static int clone_execute(struct datapath *dp, struct sk_buff *skb,
+			 struct sw_flow_key *key,
+			 u32 recirc_id,
+			 const struct nlattr *actions, int len,
+			 bool last, bool clone_flow_key);
 
 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
-			const struct nlattr *attr, int len, bool keep_skb);
+			      struct sw_flow_key *key,
+			      const struct nlattr *attr, int len);
 
-static int make_writable(struct sk_buff *skb, int write_len)
+static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
+		     __be32 mpls_lse, __be16 mpls_ethertype, __u16 mac_len)
 {
-	if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
-		return 0;
+	int err;
+
+	err = skb_mpls_push(skb, mpls_lse, mpls_ethertype, mac_len, !!mac_len);
+	if (err)
+		return err;
+
+	if (!mac_len)
+		key->mac_proto = MAC_PROTO_NONE;
 
-	return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+	invalidate_flow_key(key);
+	return 0;
 }
 
-/* remove VLAN header from packet and update csum accordingly. */
-static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
+static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
+		    const __be16 ethertype)
 {
-	struct vlan_hdr *vhdr;
 	int err;
 
-	err = make_writable(skb, VLAN_ETH_HLEN);
-	if (unlikely(err))
+	err = skb_mpls_pop(skb, ethertype, skb->mac_len,
+			   ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET);
+	if (err)
 		return err;
 
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
-		skb->csum = csum_sub(skb->csum, csum_partial(skb->data
-					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
+	if (ethertype == htons(ETH_P_TEB))
+		key->mac_proto = MAC_PROTO_ETHERNET;
 
-	vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
-	*current_tci = vhdr->h_vlan_TCI;
+	invalidate_flow_key(key);
+	return 0;
+}
 
-	memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
-	__skb_pull(skb, VLAN_HLEN);
+static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const __be32 *mpls_lse, const __be32 *mask)
+{
+	struct mpls_shim_hdr *stack;
+	__be32 lse;
+	int err;
 
-	vlan_set_encap_proto(skb, vhdr);
-	skb->mac_header += VLAN_HLEN;
-	skb_reset_mac_len(skb);
+	if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN))
+		return -ENOMEM;
+
+	stack = mpls_hdr(skb);
+	lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);
+	err = skb_mpls_update_lse(skb, lse);
+	if (err)
+		return err;
 
+	flow_key->mpls.lse[0] = lse;
 	return 0;
 }
 
-static int pop_vlan(struct sk_buff *skb)
+static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 {
-	__be16 tci;
 	int err;
 
-	if (likely(vlan_tx_tag_present(skb))) {
-		skb->vlan_tci = 0;
+	err = skb_vlan_pop(skb);
+	if (skb_vlan_tag_present(skb)) {
+		invalidate_flow_key(key);
 	} else {
-		if (unlikely(skb->protocol != htons(ETH_P_8021Q) ||
-			     skb->len < VLAN_ETH_HLEN))
-			return 0;
+		key->eth.vlan.tci = 0;
+		key->eth.vlan.tpid = 0;
+	}
+	return err;
+}
 
-		err = __pop_vlan_tci(skb, &tci);
-		if (err)
-			return err;
+static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
+		     const struct ovs_action_push_vlan *vlan)
+{
+	int err;
+
+	if (skb_vlan_tag_present(skb)) {
+		invalidate_flow_key(key);
+	} else {
+		key->eth.vlan.tci = vlan->vlan_tci;
+		key->eth.vlan.tpid = vlan->vlan_tpid;
 	}
-	/* move next vlan tag to hw accel tag */
-	if (likely(skb->protocol != htons(ETH_P_8021Q) ||
-		   skb->len < VLAN_ETH_HLEN))
-		return 0;
+	err = skb_vlan_push(skb, vlan->vlan_tpid,
+			    ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
+	skb_reset_mac_len(skb);
+	return err;
+}
+
+/* 'src' is already properly masked. */
+static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
+{
+	u16 *dst = (u16 *)dst_;
+	const u16 *src = (const u16 *)src_;
+	const u16 *mask = (const u16 *)mask_;
+
+	OVS_SET_MASKED(dst[0], src[0], mask[0]);
+	OVS_SET_MASKED(dst[1], src[1], mask[1]);
+	OVS_SET_MASKED(dst[2], src[2], mask[2]);
+}
+
+static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
+			const struct ovs_key_ethernet *key,
+			const struct ovs_key_ethernet *mask)
+{
+	int err;
 
-	err = __pop_vlan_tci(skb, &tci);
+	err = skb_ensure_writable(skb, ETH_HLEN);
 	if (unlikely(err))
 		return err;
 
-	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci));
+	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
+
+	ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
+			       mask->eth_src);
+	ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
+			       mask->eth_dst);
+
+	skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
+
+	ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
+	ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
 	return 0;
 }
 
-static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan)
+/* pop_eth does not support VLAN packets as this action is never called
+ * for them.
+ */
+static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
 {
-	if (unlikely(vlan_tx_tag_present(skb))) {
-		u16 current_tag;
+	int err;
 
-		/* push down current VLAN tag */
-		current_tag = vlan_tx_tag_get(skb);
+	err = skb_eth_pop(skb);
+	if (err)
+		return err;
 
-		if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
-			return -ENOMEM;
+	/* safe right before invalidate_flow_key */
+	key->mac_proto = MAC_PROTO_NONE;
+	invalidate_flow_key(key);
+	return 0;
+}
 
-		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->csum = csum_add(skb->csum, csum_partial(skb->data
-					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
+static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
+		    const struct ovs_action_push_eth *ethh)
+{
+	int err;
 
-	}
-	__vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
+	err = skb_eth_push(skb, ethh->addresses.eth_dst,
+			   ethh->addresses.eth_src);
+	if (err)
+		return err;
+
+	/* safe right before invalidate_flow_key */
+	key->mac_proto = MAC_PROTO_ETHERNET;
+	invalidate_flow_key(key);
 	return 0;
 }
 
-static int set_eth_addr(struct sk_buff *skb,
-			const struct ovs_key_ethernet *eth_key)
+static noinline_for_stack int push_nsh(struct sk_buff *skb,
+				       struct sw_flow_key *key,
+				       const struct nlattr *a)
 {
+	u8 buffer[NSH_HDR_MAX_LEN];
+	struct nshhdr *nh = (struct nshhdr *)buffer;
 	int err;
-	err = make_writable(skb, ETH_HLEN);
-	if (unlikely(err))
+
+	err = nsh_hdr_from_nlattr(a, nh, NSH_HDR_MAX_LEN);
+	if (err)
 		return err;
 
-	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
+	err = nsh_push(skb, nh);
+	if (err)
+		return err;
+
+	/* safe right before invalidate_flow_key */
+	key->mac_proto = MAC_PROTO_NONE;
+	invalidate_flow_key(key);
+	return 0;
+}
 
-	memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN);
-	memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN);
+static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	int err;
 
-	ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
+	err = nsh_pop(skb);
+	if (err)
+		return err;
 
+	/* safe right before invalidate_flow_key */
+	if (skb->protocol == htons(ETH_P_TEB))
+		key->mac_proto = MAC_PROTO_ETHERNET;
+	else
+		key->mac_proto = MAC_PROTO_NONE;
+	invalidate_flow_key(key);
 	return 0;
 }
 
-static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
-				__be32 *addr, __be32 new_addr)
+static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
+				  __be32 addr, __be32 new_addr)
 {
 	int transport_len = skb->len - skb_transport_offset(skb);
 
+	if (nh->frag_off & htons(IP_OFFSET))
+		return;
+
 	if (nh->protocol == IPPROTO_TCP) {
 		if (likely(transport_len >= sizeof(struct tcphdr)))
 			inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
-						 *addr, new_addr, 1);
+						 addr, new_addr, true);
 	} else if (nh->protocol == IPPROTO_UDP) {
 		if (likely(transport_len >= sizeof(struct udphdr))) {
 			struct udphdr *uh = udp_hdr(skb);
 
 			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 				inet_proto_csum_replace4(&uh->check, skb,
-							 *addr, new_addr, 1);
+							 addr, new_addr, true);
 				if (!uh->check)
 					uh->check = CSUM_MANGLED_0;
 			}
 		}
 	}
+}
 
+static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
+			__be32 *addr, __be32 new_addr)
+{
+	update_ip_l4_checksum(skb, nh, *addr, new_addr);
 	csum_replace4(&nh->check, *addr, new_addr);
-	skb->rxhash = 0;
+	skb_clear_hash(skb);
+	ovs_ct_clear(skb, NULL);
 	*addr = new_addr;
 }
 
@@ -172,24 +359,37 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
 {
 	int transport_len = skb->len - skb_transport_offset(skb);
 
-	if (l4_proto == IPPROTO_TCP) {
+	if (l4_proto == NEXTHDR_TCP) {
 		if (likely(transport_len >= sizeof(struct tcphdr)))
 			inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
-						  addr, new_addr, 1);
-	} else if (l4_proto == IPPROTO_UDP) {
+						  addr, new_addr, true);
+	} else if (l4_proto == NEXTHDR_UDP) {
 		if (likely(transport_len >= sizeof(struct udphdr))) {
 			struct udphdr *uh = udp_hdr(skb);
 
 			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 				inet_proto_csum_replace16(&uh->check, skb,
-							  addr, new_addr, 1);
+							  addr, new_addr, true);
 				if (!uh->check)
 					uh->check = CSUM_MANGLED_0;
 			}
 		}
+	} else if (l4_proto == NEXTHDR_ICMP) {
+		if (likely(transport_len >= sizeof(struct icmp6hdr)))
+			inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
+						  skb, addr, new_addr, true);
 	}
 }
 
+static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
+			   const __be32 mask[4], __be32 masked[4])
+{
+	masked[0] = OVS_MASKED(old[0], addr[0], mask[0]);
+	masked[1] = OVS_MASKED(old[1], addr[1], mask[1]);
+	masked[2] = OVS_MASKED(old[2], addr[2], mask[2]);
+	masked[3] = OVS_MASKED(old[3], addr[3], mask[3]);
+}
+
 static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
 			  __be32 addr[4], const __be32 new_addr[4],
 			  bool recalculate_csum)
@@ -197,282 +397,860 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
 	if (recalculate_csum)
 		update_ipv6_checksum(skb, l4_proto, addr, new_addr);
 
-	skb->rxhash = 0;
+	skb_clear_hash(skb);
+	ovs_ct_clear(skb, NULL);
 	memcpy(addr, new_addr, sizeof(__be32[4]));
 }
 
-static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
+static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask)
 {
-	nh->priority = tc >> 4;
-	nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
+	u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
+
+	ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12),
+			     (__force __wsum)(ipv6_tclass << 12));
+
+	ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
 }
 
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
+static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask)
 {
-	nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
-	nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
-	nh->flow_lbl[2] = fl & 0x000000FF;
+	u32 ofl;
+
+	ofl = nh->flow_lbl[0] << 16 |  nh->flow_lbl[1] << 8 |  nh->flow_lbl[2];
+	fl = OVS_MASKED(ofl, fl, mask);
+
+	/* Bits 21-24 are always unmasked, so this retains their values. */
+	nh->flow_lbl[0] = (u8)(fl >> 16);
+	nh->flow_lbl[1] = (u8)(fl >> 8);
+	nh->flow_lbl[2] = (u8)fl;
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl));
+}
+
+static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask)
+{
+	new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask);
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8),
+			     (__force __wsum)(new_ttl << 8));
+	nh->hop_limit = new_ttl;
 }
 
-static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
+static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
+		       u8 mask)
 {
+	new_ttl = OVS_MASKED(nh->ttl, new_ttl, mask);
+
 	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
 	nh->ttl = new_ttl;
 }
 
-static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
+static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const struct ovs_key_ipv4 *key,
+		    const struct ovs_key_ipv4 *mask)
 {
 	struct iphdr *nh;
+	__be32 new_addr;
 	int err;
 
-	err = make_writable(skb, skb_network_offset(skb) +
-				 sizeof(struct iphdr));
+	err = skb_ensure_writable(skb, skb_network_offset(skb) +
+				  sizeof(struct iphdr));
 	if (unlikely(err))
 		return err;
 
 	nh = ip_hdr(skb);
 
-	if (ipv4_key->ipv4_src != nh->saddr)
-		set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
-
-	if (ipv4_key->ipv4_dst != nh->daddr)
-		set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
+	/* Setting an IP addresses is typically only a side effect of
+	 * matching on them in the current userspace implementation, so it
+	 * makes sense to check if the value actually changed.
+	 */
+	if (mask->ipv4_src) {
+		new_addr = OVS_MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
 
-	if (ipv4_key->ipv4_tos != nh->tos)
-		ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
+		if (unlikely(new_addr != nh->saddr)) {
+			set_ip_addr(skb, nh, &nh->saddr, new_addr);
+			flow_key->ipv4.addr.src = new_addr;
+		}
+	}
+	if (mask->ipv4_dst) {
+		new_addr = OVS_MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
 
-	if (ipv4_key->ipv4_ttl != nh->ttl)
-		set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
+		if (unlikely(new_addr != nh->daddr)) {
+			set_ip_addr(skb, nh, &nh->daddr, new_addr);
+			flow_key->ipv4.addr.dst = new_addr;
+		}
+	}
+	if (mask->ipv4_tos) {
+		ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
+		flow_key->ip.tos = nh->tos;
+	}
+	if (mask->ipv4_ttl) {
+		set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
+		flow_key->ip.ttl = nh->ttl;
+	}
 
 	return 0;
 }
 
-static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
+static bool is_ipv6_mask_nonzero(const __be32 addr[4])
+{
+	return !!(addr[0] | addr[1] | addr[2] | addr[3]);
+}
+
+static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const struct ovs_key_ipv6 *key,
+		    const struct ovs_key_ipv6 *mask)
 {
 	struct ipv6hdr *nh;
 	int err;
-	__be32 *saddr;
-	__be32 *daddr;
 
-	err = make_writable(skb, skb_network_offset(skb) +
-			    sizeof(struct ipv6hdr));
+	err = skb_ensure_writable(skb, skb_network_offset(skb) +
+				  sizeof(struct ipv6hdr));
 	if (unlikely(err))
 		return err;
 
 	nh = ipv6_hdr(skb);
-	saddr = (__be32 *)&nh->saddr;
-	daddr = (__be32 *)&nh->daddr;
-
-	if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src)))
-		set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
-			      ipv6_key->ipv6_src, true);
 
-	if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
+	/* Setting an IP addresses is typically only a side effect of
+	 * matching on them in the current userspace implementation, so it
+	 * makes sense to check if the value actually changed.
+	 */
+	if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
+		__be32 *saddr = (__be32 *)&nh->saddr;
+		__be32 masked[4];
+
+		mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
+
+		if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
+			set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked,
+				      true);
+			memcpy(&flow_key->ipv6.addr.src, masked,
+			       sizeof(flow_key->ipv6.addr.src));
+		}
+	}
+	if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
 		unsigned int offset = 0;
 		int flags = IP6_FH_F_SKIP_RH;
 		bool recalc_csum = true;
-
-		if (ipv6_ext_hdr(nh->nexthdr))
-			recalc_csum = ipv6_find_hdr(skb, &offset,
-						    NEXTHDR_ROUTING, NULL,
-						    &flags) != NEXTHDR_ROUTING;
-
-		set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
-			      ipv6_key->ipv6_dst, recalc_csum);
+		__be32 *daddr = (__be32 *)&nh->daddr;
+		__be32 masked[4];
+
+		mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
+
+		if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
+			if (ipv6_ext_hdr(nh->nexthdr))
+				recalc_csum = (ipv6_find_hdr(skb, &offset,
+							     NEXTHDR_ROUTING,
+							     NULL, &flags)
+					       != NEXTHDR_ROUTING);
+
+			set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked,
+				      recalc_csum);
+			memcpy(&flow_key->ipv6.addr.dst, masked,
+			       sizeof(flow_key->ipv6.addr.dst));
+		}
+	}
+	if (mask->ipv6_tclass) {
+		set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass);
+		flow_key->ip.tos = ipv6_get_dsfield(nh);
+	}
+	if (mask->ipv6_label) {
+		set_ipv6_fl(skb, nh, ntohl(key->ipv6_label),
+			    ntohl(mask->ipv6_label));
+		flow_key->ipv6.label =
+		    *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
+	}
+	if (mask->ipv6_hlimit) {
+		set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit);
+		flow_key->ip.ttl = nh->hop_limit;
 	}
-
-	set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
-	set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
-	nh->hop_limit = ipv6_key->ipv6_hlimit;
-
 	return 0;
 }
 
-/* Must follow make_writable() since that can move the skb data. */
+/* Must follow skb_ensure_writable() since that can move the skb data. */
 static void set_tp_port(struct sk_buff *skb, __be16 *port,
-			 __be16 new_port, __sum16 *check)
+			__be16 new_port, __sum16 *check)
 {
-	inet_proto_csum_replace2(check, skb, *port, new_port, 0);
+	ovs_ct_clear(skb, NULL);
+	inet_proto_csum_replace2(check, skb, *port, new_port, false);
 	*port = new_port;
-	skb->rxhash = 0;
 }
 
-static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
+static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		   const struct ovs_key_udp *key,
+		   const struct ovs_key_udp *mask)
 {
-	struct udphdr *uh = udp_hdr(skb);
+	struct udphdr *uh;
+	__be16 src, dst;
+	int err;
+
+	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
+				  sizeof(struct udphdr));
+	if (unlikely(err))
+		return err;
+
+	uh = udp_hdr(skb);
+	/* Either of the masks is non-zero, so do not bother checking them. */
+	src = OVS_MASKED(uh->source, key->udp_src, mask->udp_src);
+	dst = OVS_MASKED(uh->dest, key->udp_dst, mask->udp_dst);
 
 	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
-		set_tp_port(skb, port, new_port, &uh->check);
+		if (likely(src != uh->source)) {
+			set_tp_port(skb, &uh->source, src, &uh->check);
+			flow_key->tp.src = src;
+		}
+		if (likely(dst != uh->dest)) {
+			set_tp_port(skb, &uh->dest, dst, &uh->check);
+			flow_key->tp.dst = dst;
+		}
 
-		if (!uh->check)
+		if (unlikely(!uh->check))
 			uh->check = CSUM_MANGLED_0;
 	} else {
-		*port = new_port;
-		skb->rxhash = 0;
+		uh->source = src;
+		uh->dest = dst;
+		flow_key->tp.src = src;
+		flow_key->tp.dst = dst;
+		ovs_ct_clear(skb, NULL);
 	}
+
+	skb_clear_hash(skb);
+
+	return 0;
 }
 
-static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
+static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		   const struct ovs_key_tcp *key,
+		   const struct ovs_key_tcp *mask)
 {
-	struct udphdr *uh;
+	struct tcphdr *th;
+	__be16 src, dst;
 	int err;
 
-	err = make_writable(skb, skb_transport_offset(skb) +
-				 sizeof(struct udphdr));
+	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
+				  sizeof(struct tcphdr));
 	if (unlikely(err))
 		return err;
 
-	uh = udp_hdr(skb);
-	if (udp_port_key->udp_src != uh->source)
-		set_udp_port(skb, &uh->source, udp_port_key->udp_src);
-
-	if (udp_port_key->udp_dst != uh->dest)
-		set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
+	th = tcp_hdr(skb);
+	src = OVS_MASKED(th->source, key->tcp_src, mask->tcp_src);
+	if (likely(src != th->source)) {
+		set_tp_port(skb, &th->source, src, &th->check);
+		flow_key->tp.src = src;
+	}
+	dst = OVS_MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
+	if (likely(dst != th->dest)) {
+		set_tp_port(skb, &th->dest, dst, &th->check);
+		flow_key->tp.dst = dst;
+	}
+	skb_clear_hash(skb);
 
 	return 0;
 }
 
-static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
+static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const struct ovs_key_sctp *key,
+		    const struct ovs_key_sctp *mask)
 {
-	struct tcphdr *th;
+	unsigned int sctphoff = skb_transport_offset(skb);
+	struct sctphdr *sh;
+	__le32 old_correct_csum, new_csum, old_csum;
 	int err;
 
-	err = make_writable(skb, skb_transport_offset(skb) +
-				 sizeof(struct tcphdr));
+	err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
 	if (unlikely(err))
 		return err;
 
-	th = tcp_hdr(skb);
-	if (tcp_port_key->tcp_src != th->source)
-		set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
+	sh = sctp_hdr(skb);
+	old_csum = sh->checksum;
+	old_correct_csum = sctp_compute_cksum(skb, sctphoff);
 
-	if (tcp_port_key->tcp_dst != th->dest)
-		set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
+	sh->source = OVS_MASKED(sh->source, key->sctp_src, mask->sctp_src);
+	sh->dest = OVS_MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
+
+	new_csum = sctp_compute_cksum(skb, sctphoff);
+
+	/* Carry any checksum errors through. */
+	sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
+
+	skb_clear_hash(skb);
+	ovs_ct_clear(skb, NULL);
+
+	flow_key->tp.src = sh->source;
+	flow_key->tp.dst = sh->dest;
 
 	return 0;
 }
 
-static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
+static int ovs_vport_output(struct net *net, struct sock *sk,
+			    struct sk_buff *skb)
 {
-	struct vport *vport;
+	struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
+	struct vport *vport = data->vport;
 
-	if (unlikely(!skb))
+	if (skb_cow_head(skb, data->l2_len) < 0) {
+		kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM);
 		return -ENOMEM;
+	}
 
-	vport = ovs_vport_rcu(dp, out_port);
-	if (unlikely(!vport)) {
-		kfree_skb(skb);
-		return -ENODEV;
+	__skb_dst_copy(skb, data->dst);
+	*OVS_CB(skb) = data->cb;
+	skb->inner_protocol = data->inner_protocol;
+	if (data->vlan_tci & VLAN_CFI_MASK)
+		__vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci & ~VLAN_CFI_MASK);
+	else
+		__vlan_hwaccel_clear_tag(skb);
+
+	/* Reconstruct the MAC header.  */
+	skb_push(skb, data->l2_len);
+	memcpy(skb->data, &data->l2_data, data->l2_len);
+	skb_postpush_rcsum(skb, skb->data, data->l2_len);
+	skb_reset_mac_header(skb);
+
+	if (eth_p_mpls(skb->protocol)) {
+		skb->inner_network_header = skb->network_header;
+		skb_set_network_header(skb, data->network_offset);
+		skb_reset_mac_len(skb);
 	}
 
-	ovs_vport_send(vport, skb);
+	ovs_vport_send(vport, skb, data->mac_proto);
 	return 0;
 }
 
+static unsigned int
+ovs_dst_get_mtu(const struct dst_entry *dst)
+{
+	return dst->dev->mtu;
+}
+
+static struct dst_ops ovs_dst_ops = {
+	.family = AF_UNSPEC,
+	.mtu = ovs_dst_get_mtu,
+};
+
+/* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is
+ * ovs_vport_output(), which is called once per fragmented packet.
+ */
+static void prepare_frag(struct vport *vport, struct sk_buff *skb,
+			 u16 orig_network_offset, u8 mac_proto)
+{
+	unsigned int hlen = skb_network_offset(skb);
+	struct ovs_frag_data *data;
+
+	data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
+	data->dst = skb->_skb_refdst;
+	data->vport = vport;
+	data->cb = *OVS_CB(skb);
+	data->inner_protocol = skb->inner_protocol;
+	data->network_offset = orig_network_offset;
+	if (skb_vlan_tag_present(skb))
+		data->vlan_tci = skb_vlan_tag_get(skb) | VLAN_CFI_MASK;
+	else
+		data->vlan_tci = 0;
+	data->vlan_proto = skb->vlan_proto;
+	data->mac_proto = mac_proto;
+	data->l2_len = hlen;
+	memcpy(&data->l2_data, skb->data, hlen);
+
+	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+	skb_pull(skb, hlen);
+}
+
+static void ovs_fragment(struct net *net, struct vport *vport,
+			 struct sk_buff *skb, u16 mru,
+			 struct sw_flow_key *key)
+{
+	enum ovs_drop_reason reason;
+	u16 orig_network_offset = 0;
+
+	if (eth_p_mpls(skb->protocol)) {
+		orig_network_offset = skb_network_offset(skb);
+		skb->network_header = skb->inner_network_header;
+	}
+
+	if (skb_network_offset(skb) > MAX_L2_LEN) {
+		OVS_NLERR(1, "L2 header too long to fragment");
+		reason = OVS_DROP_FRAG_L2_TOO_LONG;
+		goto err;
+	}
+
+	if (key->eth.type == htons(ETH_P_IP)) {
+		struct rtable ovs_rt = { 0 };
+		unsigned long orig_dst;
+
+		prepare_frag(vport, skb, orig_network_offset,
+			     ovs_key_mac_proto(key));
+		dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL,
+			 DST_OBSOLETE_NONE, DST_NOCOUNT);
+		ovs_rt.dst.dev = vport->dev;
+
+		orig_dst = skb->_skb_refdst;
+		skb_dst_set_noref(skb, &ovs_rt.dst);
+		IPCB(skb)->frag_max_size = mru;
+
+		ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
+		refdst_drop(orig_dst);
+	} else if (key->eth.type == htons(ETH_P_IPV6)) {
+		unsigned long orig_dst;
+		struct rt6_info ovs_rt;
+
+		prepare_frag(vport, skb, orig_network_offset,
+			     ovs_key_mac_proto(key));
+		memset(&ovs_rt, 0, sizeof(ovs_rt));
+		dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL,
+			 DST_OBSOLETE_NONE, DST_NOCOUNT);
+		ovs_rt.dst.dev = vport->dev;
+
+		orig_dst = skb->_skb_refdst;
+		skb_dst_set_noref(skb, &ovs_rt.dst);
+		IP6CB(skb)->frag_max_size = mru;
+
+		ipv6_stub->ipv6_fragment(net, skb->sk, skb, ovs_vport_output);
+		refdst_drop(orig_dst);
+	} else {
+		WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
+			  ovs_vport_name(vport), ntohs(key->eth.type), mru,
+			  vport->dev->mtu);
+		reason = OVS_DROP_FRAG_INVALID_PROTO;
+		goto err;
+	}
+
+	return;
+err:
+	ovs_kfree_skb_reason(skb, reason);
+}
+
+static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
+		      struct sw_flow_key *key)
+{
+	struct vport *vport = ovs_vport_rcu(dp, out_port);
+
+	if (likely(vport &&
+		   netif_running(vport->dev) &&
+		   netif_carrier_ok(vport->dev))) {
+		u16 mru = OVS_CB(skb)->mru;
+		u32 cutlen = OVS_CB(skb)->cutlen;
+
+		if (unlikely(cutlen > 0)) {
+			if (skb->len - cutlen > ovs_mac_header_len(key))
+				pskb_trim(skb, skb->len - cutlen);
+			else
+				pskb_trim(skb, ovs_mac_header_len(key));
+		}
+
+		if (likely(!mru ||
+		           (skb->len <= mru + vport->dev->hard_header_len))) {
+			ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
+		} else if (mru <= vport->dev->mtu) {
+			struct net *net = read_pnet(&dp->net);
+
+			ovs_fragment(net, vport, skb, mru, key);
+		} else {
+			kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
+		}
+	} else {
+		kfree_skb_reason(skb, SKB_DROP_REASON_DEV_READY);
+	}
+}
+
 static int output_userspace(struct datapath *dp, struct sk_buff *skb,
-			    const struct nlattr *attr)
+			    struct sw_flow_key *key, const struct nlattr *attr,
+			    const struct nlattr *actions, int actions_len,
+			    uint32_t cutlen)
 {
 	struct dp_upcall_info upcall;
 	const struct nlattr *a;
 	int rem;
 
+	memset(&upcall, 0, sizeof(upcall));
 	upcall.cmd = OVS_PACKET_CMD_ACTION;
-	upcall.key = &OVS_CB(skb)->flow->key;
-	upcall.userdata = NULL;
-	upcall.portid = 0;
+	upcall.mru = OVS_CB(skb)->mru;
 
-	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
-		 a = nla_next(a, &rem)) {
+	nla_for_each_nested(a, attr, rem) {
 		switch (nla_type(a)) {
 		case OVS_USERSPACE_ATTR_USERDATA:
 			upcall.userdata = a;
 			break;
 
 		case OVS_USERSPACE_ATTR_PID:
-			upcall.portid = nla_get_u32(a);
+			if (OVS_CB(skb)->upcall_pid)
+				upcall.portid = OVS_CB(skb)->upcall_pid;
+			else if (dp->user_features &
+				 OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+				upcall.portid =
+				  ovs_dp_get_upcall_portid(dp,
+							   smp_processor_id());
+			else
+				upcall.portid = nla_get_u32(a);
+			break;
+
+		case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
+			/* Get out tunnel info. */
+			struct vport *vport;
+
+			vport = ovs_vport_rcu(dp, nla_get_u32(a));
+			if (vport) {
+				int err;
+
+				err = dev_fill_metadata_dst(vport->dev, skb);
+				if (!err)
+					upcall.egress_tun_info = skb_tunnel_info(skb);
+			}
+
 			break;
 		}
+
+		case OVS_USERSPACE_ATTR_ACTIONS: {
+			/* Include actions. */
+			upcall.actions = actions;
+			upcall.actions_len = actions_len;
+			break;
+		}
+
+		} /* End of switch. */
 	}
 
-	return ovs_dp_upcall(dp, skb, &upcall);
+	return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
+}
+
+static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
+				     struct sw_flow_key *key,
+				     const struct nlattr *attr)
+{
+	/* The first attribute is always 'OVS_DEC_TTL_ATTR_ACTION'. */
+	struct nlattr *actions = nla_data(attr);
+
+	if (nla_len(actions))
+		return clone_execute(dp, skb, key, 0, nla_data(actions),
+				     nla_len(actions), true, false);
+
+	ovs_kfree_skb_reason(skb, OVS_DROP_IP_TTL);
+	return 0;
 }
 
+/* When 'last' is true, sample() should always consume the 'skb'.
+ * Otherwise, sample() should keep 'skb' intact regardless what
+ * actions are executed within sample().
+ */
 static int sample(struct datapath *dp, struct sk_buff *skb,
-		  const struct nlattr *attr)
+		  struct sw_flow_key *key, const struct nlattr *attr,
+		  bool last)
 {
-	const struct nlattr *acts_list = NULL;
-	const struct nlattr *a;
-	int rem;
+	struct nlattr *actions;
+	struct nlattr *sample_arg;
+	int rem = nla_len(attr);
+	const struct sample_arg *arg;
+	u32 init_probability;
+	bool clone_flow_key;
+	int err;
 
-	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
-		 a = nla_next(a, &rem)) {
-		switch (nla_type(a)) {
-		case OVS_SAMPLE_ATTR_PROBABILITY:
-			if (net_random() >= nla_get_u32(a))
-				return 0;
-			break;
+	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
+	sample_arg = nla_data(attr);
+	arg = nla_data(sample_arg);
+	actions = nla_next(sample_arg, &rem);
+	init_probability = OVS_CB(skb)->probability;
 
-		case OVS_SAMPLE_ATTR_ACTIONS:
-			acts_list = a;
-			break;
-		}
+	if ((arg->probability != U32_MAX) &&
+	    (!arg->probability || get_random_u32() > arg->probability)) {
+		if (last)
+			ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
+		return 0;
+	}
+
+	OVS_CB(skb)->probability = arg->probability;
+
+	clone_flow_key = !arg->exec;
+	err = clone_execute(dp, skb, key, 0, actions, rem, last,
+			    clone_flow_key);
+
+	if (!last)
+		OVS_CB(skb)->probability = init_probability;
+
+	return err;
+}
+
+/* When 'last' is true, clone() should always consume the 'skb'.
+ * Otherwise, clone() should keep 'skb' intact regardless what
+ * actions are executed within clone().
+ */
+static int clone(struct datapath *dp, struct sk_buff *skb,
+		 struct sw_flow_key *key, const struct nlattr *attr,
+		 bool last)
+{
+	struct nlattr *actions;
+	struct nlattr *clone_arg;
+	int rem = nla_len(attr);
+	bool dont_clone_flow_key;
+
+	/* The first action is always 'OVS_CLONE_ATTR_EXEC'. */
+	clone_arg = nla_data(attr);
+	dont_clone_flow_key = nla_get_u32(clone_arg);
+	actions = nla_next(clone_arg, &rem);
+
+	return clone_execute(dp, skb, key, 0, actions, rem, last,
+			     !dont_clone_flow_key);
+}
+
+static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
+			 const struct nlattr *attr)
+{
+	struct ovs_action_hash *hash_act = nla_data(attr);
+	u32 hash = 0;
+
+	if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
+		/* OVS_HASH_ALG_L4 hasing type. */
+		hash = skb_get_hash(skb);
+	} else if (hash_act->hash_alg == OVS_HASH_ALG_SYM_L4) {
+		/* OVS_HASH_ALG_SYM_L4 hashing type.  NOTE: this doesn't
+		 * extend past an encapsulated header.
+		 */
+		hash = __skb_get_hash_symmetric(skb);
 	}
 
-	return do_execute_actions(dp, skb, nla_data(acts_list),
-						 nla_len(acts_list), true);
+	hash = jhash_1word(hash, hash_act->hash_basis);
+	if (!hash)
+		hash = 0x1;
+
+	key->ovs_flow_hash = hash;
 }
 
 static int execute_set_action(struct sk_buff *skb,
-				 const struct nlattr *nested_attr)
+			      struct sw_flow_key *flow_key,
+			      const struct nlattr *a)
+{
+	/* Only tunnel set execution is supported without a mask. */
+	if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
+		struct ovs_tunnel_info *tun = nla_data(a);
+
+		skb_dst_drop(skb);
+		dst_hold((struct dst_entry *)tun->tun_dst);
+		skb_dst_set(skb, (struct dst_entry *)tun->tun_dst);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+/* Mask is at the midpoint of the data. */
+#define get_mask(a, type) ((const type)nla_data(a) + 1)
+
+static int execute_masked_set_action(struct sk_buff *skb,
+				     struct sw_flow_key *flow_key,
+				     const struct nlattr *a)
 {
 	int err = 0;
 
-	switch (nla_type(nested_attr)) {
+	switch (nla_type(a)) {
 	case OVS_KEY_ATTR_PRIORITY:
-		skb->priority = nla_get_u32(nested_attr);
+		OVS_SET_MASKED(skb->priority, nla_get_u32(a),
+			       *get_mask(a, u32 *));
+		flow_key->phy.priority = skb->priority;
 		break;
 
 	case OVS_KEY_ATTR_SKB_MARK:
-		skb->mark = nla_get_u32(nested_attr);
+		OVS_SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
+		flow_key->phy.skb_mark = skb->mark;
 		break;
 
-	case OVS_KEY_ATTR_IPV4_TUNNEL:
-		OVS_CB(skb)->tun_key = nla_data(nested_attr);
+	case OVS_KEY_ATTR_TUNNEL_INFO:
+		/* Masked data not supported for tunnel. */
+		err = -EINVAL;
 		break;
 
 	case OVS_KEY_ATTR_ETHERNET:
-		err = set_eth_addr(skb, nla_data(nested_attr));
+		err = set_eth_addr(skb, flow_key, nla_data(a),
+				   get_mask(a, struct ovs_key_ethernet *));
 		break;
 
 	case OVS_KEY_ATTR_IPV4:
-		err = set_ipv4(skb, nla_data(nested_attr));
+		err = set_ipv4(skb, flow_key, nla_data(a),
+			       get_mask(a, struct ovs_key_ipv4 *));
 		break;
 
 	case OVS_KEY_ATTR_IPV6:
-		err = set_ipv6(skb, nla_data(nested_attr));
+		err = set_ipv6(skb, flow_key, nla_data(a),
+			       get_mask(a, struct ovs_key_ipv6 *));
 		break;
 
 	case OVS_KEY_ATTR_TCP:
-		err = set_tcp(skb, nla_data(nested_attr));
+		err = set_tcp(skb, flow_key, nla_data(a),
+			      get_mask(a, struct ovs_key_tcp *));
 		break;
 
 	case OVS_KEY_ATTR_UDP:
-		err = set_udp(skb, nla_data(nested_attr));
+		err = set_udp(skb, flow_key, nla_data(a),
+			      get_mask(a, struct ovs_key_udp *));
+		break;
+
+	case OVS_KEY_ATTR_SCTP:
+		err = set_sctp(skb, flow_key, nla_data(a),
+			       get_mask(a, struct ovs_key_sctp *));
+		break;
+
+	case OVS_KEY_ATTR_MPLS:
+		err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
+								    __be32 *));
+		break;
+
+	case OVS_KEY_ATTR_CT_STATE:
+	case OVS_KEY_ATTR_CT_ZONE:
+	case OVS_KEY_ATTR_CT_MARK:
+	case OVS_KEY_ATTR_CT_LABELS:
+	case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
+	case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
+	case OVS_KEY_ATTR_NSH:
+		err = -EINVAL;
 		break;
 	}
 
 	return err;
 }
 
+static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
+			  struct sw_flow_key *key,
+			  const struct nlattr *a, bool last)
+{
+	u32 recirc_id;
+
+	if (!is_flow_key_valid(key)) {
+		int err;
+
+		err = ovs_flow_key_update(skb, key);
+		if (err)
+			return err;
+	}
+	BUG_ON(!is_flow_key_valid(key));
+
+	recirc_id = nla_get_u32(a);
+	return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true);
+}
+
+static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
+				 struct sw_flow_key *key,
+				 const struct nlattr *attr, bool last)
+{
+	struct ovs_skb_cb *ovs_cb = OVS_CB(skb);
+	const struct nlattr *actions, *cpl_arg;
+	int len, max_len, rem = nla_len(attr);
+	const struct check_pkt_len_arg *arg;
+	bool clone_flow_key;
+
+	/* The first netlink attribute in 'attr' is always
+	 * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
+	 */
+	cpl_arg = nla_data(attr);
+	arg = nla_data(cpl_arg);
+
+	len = ovs_cb->mru ? ovs_cb->mru + skb->mac_len : skb->len;
+	max_len = arg->pkt_len;
+
+	if ((skb_is_gso(skb) && skb_gso_validate_mac_len(skb, max_len)) ||
+	    len <= max_len) {
+		/* Second netlink attribute in 'attr' is always
+		 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
+		 */
+		actions = nla_next(cpl_arg, &rem);
+		clone_flow_key = !arg->exec_for_lesser_equal;
+	} else {
+		/* Third netlink attribute in 'attr' is always
+		 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER'.
+		 */
+		actions = nla_next(cpl_arg, &rem);
+		actions = nla_next(actions, &rem);
+		clone_flow_key = !arg->exec_for_greater;
+	}
+
+	return clone_execute(dp, skb, key, 0, nla_data(actions),
+			     nla_len(actions), last, clone_flow_key);
+}
+
+static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	int err;
+
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		struct ipv6hdr *nh;
+
+		err = skb_ensure_writable(skb, skb_network_offset(skb) +
+					  sizeof(*nh));
+		if (unlikely(err))
+			return err;
+
+		nh = ipv6_hdr(skb);
+
+		if (nh->hop_limit <= 1)
+			return -EHOSTUNREACH;
+
+		key->ip.ttl = --nh->hop_limit;
+	} else if (skb->protocol == htons(ETH_P_IP)) {
+		struct iphdr *nh;
+		u8 old_ttl;
+
+		err = skb_ensure_writable(skb, skb_network_offset(skb) +
+					  sizeof(*nh));
+		if (unlikely(err))
+			return err;
+
+		nh = ip_hdr(skb);
+		if (nh->ttl <= 1)
+			return -EHOSTUNREACH;
+
+		old_ttl = nh->ttl--;
+		csum_replace2(&nh->check, htons(old_ttl << 8),
+			      htons(nh->ttl << 8));
+		key->ip.ttl = nh->ttl;
+	}
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_PSAMPLE)
+static void execute_psample(struct datapath *dp, struct sk_buff *skb,
+			    const struct nlattr *attr)
+{
+	struct psample_group psample_group = {};
+	struct psample_metadata md = {};
+	const struct nlattr *a;
+	u32 rate;
+	int rem;
+
+	nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
+		switch (nla_type(a)) {
+		case OVS_PSAMPLE_ATTR_GROUP:
+			psample_group.group_num = nla_get_u32(a);
+			break;
+
+		case OVS_PSAMPLE_ATTR_COOKIE:
+			md.user_cookie = nla_data(a);
+			md.user_cookie_len = nla_len(a);
+			break;
+		}
+	}
+
+	psample_group.net = ovs_dp_get_net(dp);
+	md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex;
+	md.trunc_size = skb->len - OVS_CB(skb)->cutlen;
+	md.rate_as_probability = 1;
+
+	rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX;
+
+	psample_sample_packet(&psample_group, skb, rate, &md);
+}
+#else
+static void execute_psample(struct datapath *dp, struct sk_buff *skb,
+			    const struct nlattr *attr)
+{}
+#endif
+
 /* Execute a list of actions against 'skb'. */
 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
-			const struct nlattr *attr, int len, bool keep_skb)
+			      struct sw_flow_key *key,
+			      const struct nlattr *attr, int len)
 {
-	/* Every output action needs a separate clone of 'skb', but the common
-	 * case is just a single output action, so that doing a clone and
-	 * then freeing the original skbuff is wasteful.  So the following code
-	 * is slightly obscure just to avoid that. */
-	int prev_port = -1;
 	const struct nlattr *a;
 	int rem;
 
@@ -480,61 +1258,340 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 	     a = nla_next(a, &rem)) {
 		int err = 0;
 
-		if (prev_port != -1) {
-			do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port);
-			prev_port = -1;
-		}
+		if (trace_ovs_do_execute_action_enabled())
+			trace_ovs_do_execute_action(dp, skb, key, a, rem);
 
+		/* Actions that rightfully have to consume the skb should do it
+		 * and return directly.
+		 */
 		switch (nla_type(a)) {
-		case OVS_ACTION_ATTR_OUTPUT:
-			prev_port = nla_get_u32(a);
+		case OVS_ACTION_ATTR_OUTPUT: {
+			int port = nla_get_u32(a);
+			struct sk_buff *clone;
+
+			/* Every output action needs a separate clone
+			 * of 'skb', In case the output action is the
+			 * last action, cloning can be avoided.
+			 */
+			if (nla_is_last(a, rem)) {
+				do_output(dp, skb, port, key);
+				/* 'skb' has been used for output.
+				 */
+				return 0;
+			}
+
+			clone = skb_clone(skb, GFP_ATOMIC);
+			if (clone)
+				do_output(dp, clone, port, key);
+			OVS_CB(skb)->cutlen = 0;
 			break;
+		}
+
+		case OVS_ACTION_ATTR_TRUNC: {
+			struct ovs_action_trunc *trunc = nla_data(a);
+
+			if (skb->len > trunc->max_len)
+				OVS_CB(skb)->cutlen = skb->len - trunc->max_len;
+			break;
+		}
 
 		case OVS_ACTION_ATTR_USERSPACE:
-			output_userspace(dp, skb, a);
+			output_userspace(dp, skb, key, a, attr,
+						     len, OVS_CB(skb)->cutlen);
+			OVS_CB(skb)->cutlen = 0;
+			if (nla_is_last(a, rem)) {
+				consume_skb(skb);
+				return 0;
+			}
+			break;
+
+		case OVS_ACTION_ATTR_HASH:
+			execute_hash(skb, key, a);
+			break;
+
+		case OVS_ACTION_ATTR_PUSH_MPLS: {
+			struct ovs_action_push_mpls *mpls = nla_data(a);
+
+			err = push_mpls(skb, key, mpls->mpls_lse,
+					mpls->mpls_ethertype, skb->mac_len);
+			break;
+		}
+		case OVS_ACTION_ATTR_ADD_MPLS: {
+			struct ovs_action_add_mpls *mpls = nla_data(a);
+			__u16 mac_len = 0;
+
+			if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK)
+				mac_len = skb->mac_len;
+
+			err = push_mpls(skb, key, mpls->mpls_lse,
+					mpls->mpls_ethertype, mac_len);
+			break;
+		}
+		case OVS_ACTION_ATTR_POP_MPLS:
+			err = pop_mpls(skb, key, nla_get_be16(a));
 			break;
 
 		case OVS_ACTION_ATTR_PUSH_VLAN:
-			err = push_vlan(skb, nla_data(a));
-			if (unlikely(err)) /* skb already freed. */
-				return err;
+			err = push_vlan(skb, key, nla_data(a));
 			break;
 
 		case OVS_ACTION_ATTR_POP_VLAN:
-			err = pop_vlan(skb);
+			err = pop_vlan(skb, key);
+			break;
+
+		case OVS_ACTION_ATTR_RECIRC: {
+			bool last = nla_is_last(a, rem);
+
+			err = execute_recirc(dp, skb, key, a, last);
+			if (last) {
+				/* If this is the last action, the skb has
+				 * been consumed or freed.
+				 * Return immediately.
+				 */
+				return err;
+			}
 			break;
+		}
 
 		case OVS_ACTION_ATTR_SET:
-			err = execute_set_action(skb, nla_data(a));
+			err = execute_set_action(skb, key, nla_data(a));
+			break;
+
+		case OVS_ACTION_ATTR_SET_MASKED:
+		case OVS_ACTION_ATTR_SET_TO_MASKED:
+			err = execute_masked_set_action(skb, key, nla_data(a));
+			break;
+
+		case OVS_ACTION_ATTR_SAMPLE: {
+			bool last = nla_is_last(a, rem);
+
+			err = sample(dp, skb, key, a, last);
+			if (last)
+				return err;
+
+			break;
+		}
+
+		case OVS_ACTION_ATTR_CT:
+			if (!is_flow_key_valid(key)) {
+				err = ovs_flow_key_update(skb, key);
+				if (err)
+					return err;
+			}
+
+			err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
+					     nla_data(a));
+
+			/* Hide stolen IP fragments from user space. */
+			if (err)
+				return err == -EINPROGRESS ? 0 : err;
+			break;
+
+		case OVS_ACTION_ATTR_CT_CLEAR:
+			err = ovs_ct_clear(skb, key);
+			break;
+
+		case OVS_ACTION_ATTR_PUSH_ETH:
+			err = push_eth(skb, key, nla_data(a));
+			break;
+
+		case OVS_ACTION_ATTR_POP_ETH:
+			err = pop_eth(skb, key);
+			break;
+
+		case OVS_ACTION_ATTR_PUSH_NSH:
+			err = push_nsh(skb, key, nla_data(a));
+			break;
+
+		case OVS_ACTION_ATTR_POP_NSH:
+			err = pop_nsh(skb, key);
 			break;
 
-		case OVS_ACTION_ATTR_SAMPLE:
-			err = sample(dp, skb, a);
+		case OVS_ACTION_ATTR_METER:
+			if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
+				ovs_kfree_skb_reason(skb, OVS_DROP_METER);
+				return 0;
+			}
+			break;
+
+		case OVS_ACTION_ATTR_CLONE: {
+			bool last = nla_is_last(a, rem);
+
+			err = clone(dp, skb, key, a, last);
+			if (last)
+				return err;
+
+			break;
+		}
+
+		case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
+			bool last = nla_is_last(a, rem);
+
+			err = execute_check_pkt_len(dp, skb, key, a, last);
+			if (last)
+				return err;
+
+			break;
+		}
+
+		case OVS_ACTION_ATTR_DEC_TTL:
+			err = execute_dec_ttl(skb, key);
+			if (err == -EHOSTUNREACH)
+				return dec_ttl_exception_handler(dp, skb,
+								 key, a);
+			break;
+
+		case OVS_ACTION_ATTR_DROP: {
+			enum ovs_drop_reason reason = nla_get_u32(a)
+				? OVS_DROP_EXPLICIT_WITH_ERROR
+				: OVS_DROP_EXPLICIT;
+
+			ovs_kfree_skb_reason(skb, reason);
+			return 0;
+		}
+
+		case OVS_ACTION_ATTR_PSAMPLE:
+			execute_psample(dp, skb, a);
+			OVS_CB(skb)->cutlen = 0;
+			if (nla_is_last(a, rem)) {
+				consume_skb(skb);
+				return 0;
+			}
 			break;
 		}
 
 		if (unlikely(err)) {
-			kfree_skb(skb);
+			ovs_kfree_skb_reason(skb, OVS_DROP_ACTION_ERROR);
 			return err;
 		}
 	}
 
-	if (prev_port != -1) {
-		if (keep_skb)
-			skb = skb_clone(skb, GFP_ATOMIC);
+	ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
+	return 0;
+}
 
-		do_output(dp, skb, prev_port);
-	} else if (!keep_skb)
-		consume_skb(skb);
+/* Execute the actions on the clone of the packet. The effect of the
+ * execution does not affect the original 'skb' nor the original 'key'.
+ *
+ * The execution may be deferred in case the actions can not be executed
+ * immediately.
+ */
+static int clone_execute(struct datapath *dp, struct sk_buff *skb,
+			 struct sw_flow_key *key, u32 recirc_id,
+			 const struct nlattr *actions, int len,
+			 bool last, bool clone_flow_key)
+{
+	struct deferred_action *da;
+	struct sw_flow_key *clone;
+
+	skb = last ? skb : skb_clone(skb, GFP_ATOMIC);
+	if (!skb) {
+		/* Out of memory, skip this action.
+		 */
+		return 0;
+	}
+
+	/* When clone_flow_key is false, the 'key' will not be change
+	 * by the actions, then the 'key' can be used directly.
+	 * Otherwise, try to clone key from the next recursion level of
+	 * 'flow_keys'. If clone is successful, execute the actions
+	 * without deferring.
+	 */
+	clone = clone_flow_key ? clone_key(key) : key;
+	if (clone) {
+		int err = 0;
+		if (actions) { /* Sample action */
+			if (clone_flow_key)
+				__this_cpu_inc(ovs_pcpu_storage->exec_level);
+
+			err = do_execute_actions(dp, skb, clone,
+						 actions, len);
+
+			if (clone_flow_key)
+				__this_cpu_dec(ovs_pcpu_storage->exec_level);
+		} else { /* Recirc action */
+			clone->recirc_id = recirc_id;
+			ovs_dp_process_packet(skb, clone);
+		}
+		return err;
+	}
 
+	/* Out of 'flow_keys' space. Defer actions */
+	da = add_deferred_actions(skb, key, actions, len);
+	if (da) {
+		if (!actions) { /* Recirc action */
+			key = &da->pkt_key;
+			key->recirc_id = recirc_id;
+		}
+	} else {
+		/* Out of per CPU action FIFO space. Drop the 'skb' and
+		 * log an error.
+		 */
+		ovs_kfree_skb_reason(skb, OVS_DROP_DEFERRED_LIMIT);
+
+		if (net_ratelimit()) {
+			if (actions) { /* Sample action */
+				pr_warn("%s: deferred action limit reached, drop sample action\n",
+					ovs_dp_name(dp));
+			} else {  /* Recirc action */
+				pr_warn("%s: deferred action limit reached, drop recirc action (recirc_id=%#x)\n",
+					ovs_dp_name(dp), recirc_id);
+			}
+		}
+	}
 	return 0;
 }
 
+static void process_deferred_actions(struct datapath *dp)
+{
+	struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
+
+	/* Do not touch the FIFO in case there is no deferred actions. */
+	if (action_fifo_is_empty(fifo))
+		return;
+
+	/* Finishing executing all deferred actions. */
+	do {
+		struct deferred_action *da = action_fifo_get(fifo);
+		struct sk_buff *skb = da->skb;
+		struct sw_flow_key *key = &da->pkt_key;
+		const struct nlattr *actions = da->actions;
+		int actions_len = da->actions_len;
+
+		if (actions)
+			do_execute_actions(dp, skb, key, actions, actions_len);
+		else
+			ovs_dp_process_packet(skb, key);
+	} while (!action_fifo_is_empty(fifo));
+
+	/* Reset FIFO for the next packet.  */
+	action_fifo_init(fifo);
+}
+
 /* Execute a list of actions against 'skb'. */
-int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
+			const struct sw_flow_actions *acts,
+			struct sw_flow_key *key)
 {
-	struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
+	int err, level;
+
+	level = __this_cpu_inc_return(ovs_pcpu_storage->exec_level);
+	if (unlikely(level > OVS_RECURSION_LIMIT)) {
+		net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
+				     ovs_dp_name(dp));
+		ovs_kfree_skb_reason(skb, OVS_DROP_RECURSION_LIMIT);
+		err = -ENETDOWN;
+		goto out;
+	}
 
-	return do_execute_actions(dp, skb, acts->actions,
-					 acts->actions_len, false);
+	OVS_CB(skb)->acts_origlen = acts->orig_len;
+	err = do_execute_actions(dp, skb, key,
+				 acts->actions, acts->actions_len);
+
+	if (level == 1)
+		process_deferred_actions(dp);
+
+out:
+	__this_cpu_dec(ovs_pcpu_storage->exec_level);
+	return err;
 }
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
new file mode 100644
index 000000000000..a0811e1fba65
--- /dev/null
+++ b/net/openvswitch/conntrack.c
@@ -0,0 +1,2031 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ */
+
+#include <linux/module.h>
+#include <linux/openvswitch.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/sctp.h>
+#include <linux/static_key.h>
+#include <linux/string_helpers.h>
+#include <net/ip.h>
+#include <net/genetlink.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_count.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/ipv6_frag.h>
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_nat.h>
+#endif
+
+#include <net/netfilter/nf_conntrack_act_ct.h>
+
+#include "datapath.h"
+#include "drop.h"
+#include "conntrack.h"
+#include "flow.h"
+#include "flow_netlink.h"
+
+struct ovs_ct_len_tbl {
+	int maxlen;
+	int minlen;
+};
+
+/* Metadata mark for masked write to conntrack mark */
+struct md_mark {
+	u32 value;
+	u32 mask;
+};
+
+/* Metadata label for masked write to conntrack label. */
+struct md_labels {
+	struct ovs_key_ct_labels value;
+	struct ovs_key_ct_labels mask;
+};
+
+enum ovs_ct_nat {
+	OVS_CT_NAT = 1 << 0,     /* NAT for committed connections only. */
+	OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */
+	OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */
+};
+
+/* Conntrack action context for execution. */
+struct ovs_conntrack_info {
+	struct nf_conntrack_helper *helper;
+	struct nf_conntrack_zone zone;
+	struct nf_conn *ct;
+	u8 commit : 1;
+	u8 nat : 3;                 /* enum ovs_ct_nat */
+	u8 force : 1;
+	u8 have_eventmask : 1;
+	u16 family;
+	u32 eventmask;              /* Mask of 1 << IPCT_*. */
+	struct md_mark mark;
+	struct md_labels labels;
+	char timeout[CTNL_TIMEOUT_NAME_MAX];
+	struct nf_ct_timeout *nf_ct_timeout;
+#if IS_ENABLED(CONFIG_NF_NAT)
+	struct nf_nat_range2 range;  /* Only present for SRC NAT and DST NAT. */
+#endif
+};
+
+#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+#define OVS_CT_LIMIT_UNLIMITED	0
+#define OVS_CT_LIMIT_DEFAULT OVS_CT_LIMIT_UNLIMITED
+#define CT_LIMIT_HASH_BUCKETS 512
+static DEFINE_STATIC_KEY_FALSE(ovs_ct_limit_enabled);
+
+struct ovs_ct_limit {
+	/* Elements in ovs_ct_limit_info->limits hash table */
+	struct hlist_node hlist_node;
+	struct rcu_head rcu;
+	u16 zone;
+	u32 limit;
+};
+
+struct ovs_ct_limit_info {
+	u32 default_limit;
+	struct hlist_head *limits;
+	struct nf_conncount_data *data;
+};
+
+static const struct nla_policy ct_limit_policy[OVS_CT_LIMIT_ATTR_MAX + 1] = {
+	[OVS_CT_LIMIT_ATTR_ZONE_LIMIT] = { .type = NLA_NESTED, },
+};
+#endif
+
+static bool labels_nonzero(const struct ovs_key_ct_labels *labels);
+
+static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info);
+
+static u16 key_to_nfproto(const struct sw_flow_key *key)
+{
+	switch (ntohs(key->eth.type)) {
+	case ETH_P_IP:
+		return NFPROTO_IPV4;
+	case ETH_P_IPV6:
+		return NFPROTO_IPV6;
+	default:
+		return NFPROTO_UNSPEC;
+	}
+}
+
+/* Map SKB connection state into the values used by flow definition. */
+static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
+{
+	u8 ct_state = OVS_CS_F_TRACKED;
+
+	switch (ctinfo) {
+	case IP_CT_ESTABLISHED_REPLY:
+	case IP_CT_RELATED_REPLY:
+		ct_state |= OVS_CS_F_REPLY_DIR;
+		break;
+	default:
+		break;
+	}
+
+	switch (ctinfo) {
+	case IP_CT_ESTABLISHED:
+	case IP_CT_ESTABLISHED_REPLY:
+		ct_state |= OVS_CS_F_ESTABLISHED;
+		break;
+	case IP_CT_RELATED:
+	case IP_CT_RELATED_REPLY:
+		ct_state |= OVS_CS_F_RELATED;
+		break;
+	case IP_CT_NEW:
+		ct_state |= OVS_CS_F_NEW;
+		break;
+	default:
+		break;
+	}
+
+	return ct_state;
+}
+
+static u32 ovs_ct_get_mark(const struct nf_conn *ct)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
+	return ct ? READ_ONCE(ct->mark) : 0;
+#else
+	return 0;
+#endif
+}
+
+/* Guard against conntrack labels max size shrinking below 128 bits. */
+#if NF_CT_LABELS_MAX_SIZE < 16
+#error NF_CT_LABELS_MAX_SIZE must be at least 16 bytes
+#endif
+
+static void ovs_ct_get_labels(const struct nf_conn *ct,
+			      struct ovs_key_ct_labels *labels)
+{
+	struct nf_conn_labels *cl = NULL;
+
+	if (ct) {
+		if (ct->master && !nf_ct_is_confirmed(ct))
+			ct = ct->master;
+		cl = nf_ct_labels_find(ct);
+	}
+	if (cl)
+		memcpy(labels, cl->bits, OVS_CT_LABELS_LEN);
+	else
+		memset(labels, 0, OVS_CT_LABELS_LEN);
+}
+
+static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key,
+					const struct nf_conntrack_tuple *orig,
+					u8 icmp_proto)
+{
+	key->ct_orig_proto = orig->dst.protonum;
+	if (orig->dst.protonum == icmp_proto) {
+		key->ct.orig_tp.src = htons(orig->dst.u.icmp.type);
+		key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code);
+	} else {
+		key->ct.orig_tp.src = orig->src.u.all;
+		key->ct.orig_tp.dst = orig->dst.u.all;
+	}
+}
+
+static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
+				const struct nf_conntrack_zone *zone,
+				const struct nf_conn *ct)
+{
+	key->ct_state = state;
+	key->ct_zone = zone->id;
+	key->ct.mark = ovs_ct_get_mark(ct);
+	ovs_ct_get_labels(ct, &key->ct.labels);
+
+	if (ct) {
+		const struct nf_conntrack_tuple *orig;
+
+		/* Use the master if we have one. */
+		if (ct->master)
+			ct = ct->master;
+		orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+
+		/* IP version must match with the master connection. */
+		if (key->eth.type == htons(ETH_P_IP) &&
+		    nf_ct_l3num(ct) == NFPROTO_IPV4) {
+			key->ipv4.ct_orig.src = orig->src.u3.ip;
+			key->ipv4.ct_orig.dst = orig->dst.u3.ip;
+			__ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP);
+			return;
+		} else if (key->eth.type == htons(ETH_P_IPV6) &&
+			   !sw_flow_key_is_nd(key) &&
+			   nf_ct_l3num(ct) == NFPROTO_IPV6) {
+			key->ipv6.ct_orig.src = orig->src.u3.in6;
+			key->ipv6.ct_orig.dst = orig->dst.u3.in6;
+			__ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP);
+			return;
+		}
+	}
+	/* Clear 'ct_orig_proto' to mark the non-existence of conntrack
+	 * original direction key fields.
+	 */
+	key->ct_orig_proto = 0;
+}
+
+/* Update 'key' based on skb->_nfct.  If 'post_ct' is true, then OVS has
+ * previously sent the packet to conntrack via the ct action.  If
+ * 'keep_nat_flags' is true, the existing NAT flags retained, else they are
+ * initialized from the connection status.
+ */
+static void ovs_ct_update_key(const struct sk_buff *skb,
+			      const struct ovs_conntrack_info *info,
+			      struct sw_flow_key *key, bool post_ct,
+			      bool keep_nat_flags)
+{
+	const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	u8 state = 0;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		state = ovs_ct_get_state(ctinfo);
+		/* All unconfirmed entries are NEW connections. */
+		if (!nf_ct_is_confirmed(ct))
+			state |= OVS_CS_F_NEW;
+		/* OVS persists the related flag for the duration of the
+		 * connection.
+		 */
+		if (ct->master)
+			state |= OVS_CS_F_RELATED;
+		if (keep_nat_flags) {
+			state |= key->ct_state & OVS_CS_F_NAT_MASK;
+		} else {
+			if (ct->status & IPS_SRC_NAT)
+				state |= OVS_CS_F_SRC_NAT;
+			if (ct->status & IPS_DST_NAT)
+				state |= OVS_CS_F_DST_NAT;
+		}
+		zone = nf_ct_zone(ct);
+	} else if (post_ct) {
+		state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
+		if (info)
+			zone = &info->zone;
+	}
+	__ovs_ct_update_key(key, state, zone, ct);
+}
+
+/* This is called to initialize CT key fields possibly coming in from the local
+ * stack.
+ */
+void ovs_ct_fill_key(const struct sk_buff *skb,
+		     struct sw_flow_key *key,
+		     bool post_ct)
+{
+	ovs_ct_update_key(skb, NULL, key, post_ct, false);
+}
+
+int ovs_ct_put_key(const struct sw_flow_key *swkey,
+		   const struct sw_flow_key *output, struct sk_buff *skb)
+{
+	if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct_state))
+		return -EMSGSIZE;
+
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
+	    nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct_zone))
+		return -EMSGSIZE;
+
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+	    nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark))
+		return -EMSGSIZE;
+
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+	    nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(output->ct.labels),
+		    &output->ct.labels))
+		return -EMSGSIZE;
+
+	if (swkey->ct_orig_proto) {
+		if (swkey->eth.type == htons(ETH_P_IP)) {
+			struct ovs_key_ct_tuple_ipv4 orig;
+
+			memset(&orig, 0, sizeof(orig));
+			orig.ipv4_src = output->ipv4.ct_orig.src;
+			orig.ipv4_dst = output->ipv4.ct_orig.dst;
+			orig.src_port = output->ct.orig_tp.src;
+			orig.dst_port = output->ct.orig_tp.dst;
+			orig.ipv4_proto = output->ct_orig_proto;
+
+			if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,
+				    sizeof(orig), &orig))
+				return -EMSGSIZE;
+		} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+			struct ovs_key_ct_tuple_ipv6 orig;
+
+			memset(&orig, 0, sizeof(orig));
+			memcpy(orig.ipv6_src, output->ipv6.ct_orig.src.s6_addr32,
+			       sizeof(orig.ipv6_src));
+			memcpy(orig.ipv6_dst, output->ipv6.ct_orig.dst.s6_addr32,
+			       sizeof(orig.ipv6_dst));
+			orig.src_port = output->ct.orig_tp.src;
+			orig.dst_port = output->ct.orig_tp.dst;
+			orig.ipv6_proto = output->ct_orig_proto;
+
+			if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,
+				    sizeof(orig), &orig))
+				return -EMSGSIZE;
+		}
+	}
+
+	return 0;
+}
+
+static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key,
+			   u32 ct_mark, u32 mask)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
+	u32 new_mark;
+
+	new_mark = ct_mark | (READ_ONCE(ct->mark) & ~(mask));
+	if (READ_ONCE(ct->mark) != new_mark) {
+		WRITE_ONCE(ct->mark, new_mark);
+		if (nf_ct_is_confirmed(ct))
+			nf_conntrack_event_cache(IPCT_MARK, ct);
+		key->ct.mark = new_mark;
+	}
+
+	return 0;
+#else
+	return -ENOTSUPP;
+#endif
+}
+
+static struct nf_conn_labels *ovs_ct_get_conn_labels(struct nf_conn *ct)
+{
+	struct nf_conn_labels *cl;
+
+	cl = nf_ct_labels_find(ct);
+	if (!cl) {
+		nf_ct_labels_ext_add(ct);
+		cl = nf_ct_labels_find(ct);
+	}
+
+	return cl;
+}
+
+/* Initialize labels for a new, yet to be committed conntrack entry.  Note that
+ * since the new connection is not yet confirmed, and thus no-one else has
+ * access to it's labels, we simply write them over.
+ */
+static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key,
+			      const struct ovs_key_ct_labels *labels,
+			      const struct ovs_key_ct_labels *mask)
+{
+	struct nf_conn_labels *cl, *master_cl;
+	bool have_mask = labels_nonzero(mask);
+
+	/* Inherit master's labels to the related connection? */
+	master_cl = ct->master ? nf_ct_labels_find(ct->master) : NULL;
+
+	if (!master_cl && !have_mask)
+		return 0;   /* Nothing to do. */
+
+	cl = ovs_ct_get_conn_labels(ct);
+	if (!cl)
+		return -ENOSPC;
+
+	/* Inherit the master's labels, if any. */
+	if (master_cl)
+		*cl = *master_cl;
+
+	if (have_mask) {
+		u32 *dst = (u32 *)cl->bits;
+		int i;
+
+		for (i = 0; i < OVS_CT_LABELS_LEN_32; i++)
+			dst[i] = (dst[i] & ~mask->ct_labels_32[i]) |
+				(labels->ct_labels_32[i]
+				 & mask->ct_labels_32[i]);
+	}
+
+	/* Labels are included in the IPCTNL_MSG_CT_NEW event only if the
+	 * IPCT_LABEL bit is set in the event cache.
+	 */
+	nf_conntrack_event_cache(IPCT_LABEL, ct);
+
+	memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN);
+
+	return 0;
+}
+
+static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key,
+			     const struct ovs_key_ct_labels *labels,
+			     const struct ovs_key_ct_labels *mask)
+{
+	struct nf_conn_labels *cl;
+	int err;
+
+	cl = ovs_ct_get_conn_labels(ct);
+	if (!cl)
+		return -ENOSPC;
+
+	err = nf_connlabels_replace(ct, labels->ct_labels_32,
+				    mask->ct_labels_32,
+				    OVS_CT_LABELS_LEN_32);
+	if (err)
+		return err;
+
+	memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN);
+
+	return 0;
+}
+
+static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key,
+				   u16 zone, int family, struct sk_buff *skb)
+{
+	struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
+	int err;
+
+	err = nf_ct_handle_fragments(net, skb, zone, family, &key->ip.proto, &ovs_cb.mru);
+	if (err)
+		return err;
+
+	/* The key extracted from the fragment that completed this datagram
+	 * likely didn't have an L4 header, so regenerate it.
+	 */
+	ovs_flow_key_update_l3l4(skb, key);
+	key->ip.frag = OVS_FRAG_TYPE_NONE;
+	*OVS_CB(skb) = ovs_cb;
+
+	return 0;
+}
+
+/* This replicates logic from nf_conntrack_core.c that is not exported. */
+static enum ip_conntrack_info
+ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
+{
+	const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+	if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+		return IP_CT_ESTABLISHED_REPLY;
+	/* Once we've had two way comms, always ESTABLISHED. */
+	if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status))
+		return IP_CT_ESTABLISHED;
+	if (test_bit(IPS_EXPECTED_BIT, &ct->status))
+		return IP_CT_RELATED;
+	return IP_CT_NEW;
+}
+
+/* Find an existing connection which this packet belongs to without
+ * re-attributing statistics or modifying the connection state.  This allows an
+ * skb->_nfct lost due to an upcall to be recovered during actions execution.
+ *
+ * Must be called with rcu_read_lock.
+ *
+ * On success, populates skb->_nfct and returns the connection.  Returns NULL
+ * if there is no existing entry.
+ */
+static struct nf_conn *
+ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
+		     u8 l3num, struct sk_buff *skb, bool natted)
+{
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
+
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num,
+			       net, &tuple)) {
+		pr_debug("ovs_ct_find_existing: Can't get tuple\n");
+		return NULL;
+	}
+
+	/* Must invert the tuple if skb has been transformed by NAT. */
+	if (natted) {
+		struct nf_conntrack_tuple inverse;
+
+		if (!nf_ct_invert_tuple(&inverse, &tuple)) {
+			pr_debug("ovs_ct_find_existing: Inversion failed!\n");
+			return NULL;
+		}
+		tuple = inverse;
+	}
+
+	/* look for tuple match */
+	h = nf_conntrack_find_get(net, zone, &tuple);
+	if (!h)
+		return NULL;   /* Not found. */
+
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	/* Inverted packet tuple matches the reverse direction conntrack tuple,
+	 * select the other tuplehash to get the right 'ctinfo' bits for this
+	 * packet.
+	 */
+	if (natted)
+		h = &ct->tuplehash[!h->tuple.dst.dir];
+
+	nf_ct_set(skb, ct, ovs_ct_get_info(h));
+	return ct;
+}
+
+static
+struct nf_conn *ovs_ct_executed(struct net *net,
+				const struct sw_flow_key *key,
+				const struct ovs_conntrack_info *info,
+				struct sk_buff *skb,
+				bool *ct_executed)
+{
+	struct nf_conn *ct = NULL;
+
+	/* If no ct, check if we have evidence that an existing conntrack entry
+	 * might be found for this skb.  This happens when we lose a skb->_nfct
+	 * due to an upcall, or if the direction is being forced.  If the
+	 * connection was not confirmed, it is not cached and needs to be run
+	 * through conntrack again.
+	 */
+	*ct_executed = (key->ct_state & OVS_CS_F_TRACKED) &&
+		       !(key->ct_state & OVS_CS_F_INVALID) &&
+		       (key->ct_zone == info->zone.id);
+
+	if (*ct_executed || (!key->ct_state && info->force)) {
+		ct = ovs_ct_find_existing(net, &info->zone, info->family, skb,
+					  !!(key->ct_state &
+					  OVS_CS_F_NAT_MASK));
+	}
+
+	return ct;
+}
+
+/* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
+static bool skb_nfct_cached(struct net *net,
+			    const struct sw_flow_key *key,
+			    const struct ovs_conntrack_info *info,
+			    struct sk_buff *skb)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	bool ct_executed = true;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		ct = ovs_ct_executed(net, key, info, skb, &ct_executed);
+
+	if (ct)
+		nf_ct_get(skb, &ctinfo);
+	else
+		return false;
+
+	if (!net_eq(net, read_pnet(&ct->ct_net)))
+		return false;
+	if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
+		return false;
+	if (info->helper) {
+		struct nf_conn_help *help;
+
+		help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
+		if (help && rcu_access_pointer(help->helper) != info->helper)
+			return false;
+	}
+	if (info->nf_ct_timeout) {
+		struct nf_conn_timeout *timeout_ext;
+
+		timeout_ext = nf_ct_timeout_find(ct);
+		if (!timeout_ext || info->nf_ct_timeout !=
+		    rcu_dereference(timeout_ext->timeout))
+			return false;
+	}
+	/* Force conntrack entry direction to the current packet? */
+	if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
+		/* Delete the conntrack entry if confirmed, else just release
+		 * the reference.
+		 */
+		if (nf_ct_is_confirmed(ct))
+			nf_ct_delete(ct, 0, 0);
+
+		nf_ct_put(ct);
+		nf_ct_set(skb, NULL, 0);
+		return false;
+	}
+
+	return ct_executed;
+}
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+static void ovs_nat_update_key(struct sw_flow_key *key,
+			       const struct sk_buff *skb,
+			       enum nf_nat_manip_type maniptype)
+{
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		__be16 src;
+
+		key->ct_state |= OVS_CS_F_SRC_NAT;
+		if (key->eth.type == htons(ETH_P_IP))
+			key->ipv4.addr.src = ip_hdr(skb)->saddr;
+		else if (key->eth.type == htons(ETH_P_IPV6))
+			memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
+			       sizeof(key->ipv6.addr.src));
+		else
+			return;
+
+		if (key->ip.proto == IPPROTO_UDP)
+			src = udp_hdr(skb)->source;
+		else if (key->ip.proto == IPPROTO_TCP)
+			src = tcp_hdr(skb)->source;
+		else if (key->ip.proto == IPPROTO_SCTP)
+			src = sctp_hdr(skb)->source;
+		else
+			return;
+
+		key->tp.src = src;
+	} else {
+		__be16 dst;
+
+		key->ct_state |= OVS_CS_F_DST_NAT;
+		if (key->eth.type == htons(ETH_P_IP))
+			key->ipv4.addr.dst = ip_hdr(skb)->daddr;
+		else if (key->eth.type == htons(ETH_P_IPV6))
+			memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
+			       sizeof(key->ipv6.addr.dst));
+		else
+			return;
+
+		if (key->ip.proto == IPPROTO_UDP)
+			dst = udp_hdr(skb)->dest;
+		else if (key->ip.proto == IPPROTO_TCP)
+			dst = tcp_hdr(skb)->dest;
+		else if (key->ip.proto == IPPROTO_SCTP)
+			dst = sctp_hdr(skb)->dest;
+		else
+			return;
+
+		key->tp.dst = dst;
+	}
+}
+
+/* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
+static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
+		      const struct ovs_conntrack_info *info,
+		      struct sk_buff *skb, struct nf_conn *ct,
+		      enum ip_conntrack_info ctinfo)
+{
+	int err, action = 0;
+
+	if (!(info->nat & OVS_CT_NAT))
+		return NF_ACCEPT;
+	if (info->nat & OVS_CT_SRC_NAT)
+		action |= BIT(NF_NAT_MANIP_SRC);
+	if (info->nat & OVS_CT_DST_NAT)
+		action |= BIT(NF_NAT_MANIP_DST);
+
+	err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit);
+	if (err != NF_ACCEPT)
+		return err;
+
+	if (action & BIT(NF_NAT_MANIP_SRC))
+		ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC);
+	if (action & BIT(NF_NAT_MANIP_DST))
+		ovs_nat_update_key(key, skb, NF_NAT_MANIP_DST);
+
+	return err;
+}
+#else /* !CONFIG_NF_NAT */
+static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
+		      const struct ovs_conntrack_info *info,
+		      struct sk_buff *skb, struct nf_conn *ct,
+		      enum ip_conntrack_info ctinfo)
+{
+	return NF_ACCEPT;
+}
+#endif
+
+static int verdict_to_errno(unsigned int verdict)
+{
+	switch (verdict & NF_VERDICT_MASK) {
+	case NF_ACCEPT:
+		return 0;
+	case NF_DROP:
+		return -EINVAL;
+	case NF_STOLEN:
+		return -EINPROGRESS;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+/* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
+ * not done already.  Update key with new CT state after passing the packet
+ * through conntrack.
+ * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be
+ * set to NULL and 0 will be returned.
+ */
+static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
+			   const struct ovs_conntrack_info *info,
+			   struct sk_buff *skb)
+{
+	/* If we are recirculating packets to match on conntrack fields and
+	 * committing with a separate conntrack action,  then we don't need to
+	 * actually run the packet through conntrack twice unless it's for a
+	 * different zone.
+	 */
+	bool cached = skb_nfct_cached(net, key, info, skb);
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	if (!cached) {
+		struct nf_hook_state state = {
+			.hook = NF_INET_PRE_ROUTING,
+			.pf = info->family,
+			.net = net,
+		};
+		struct nf_conn *tmpl = info->ct;
+		int err;
+
+		/* Associate skb with specified zone. */
+		if (tmpl) {
+			ct = nf_ct_get(skb, &ctinfo);
+			nf_ct_put(ct);
+			nf_conntrack_get(&tmpl->ct_general);
+			nf_ct_set(skb, tmpl, IP_CT_NEW);
+		}
+
+		err = nf_conntrack_in(skb, &state);
+		if (err != NF_ACCEPT)
+			return verdict_to_errno(err);
+
+		/* Clear CT state NAT flags to mark that we have not yet done
+		 * NAT after the nf_conntrack_in() call.  We can actually clear
+		 * the whole state, as it will be re-initialized below.
+		 */
+		key->ct_state = 0;
+
+		/* Update the key, but keep the NAT flags. */
+		ovs_ct_update_key(skb, info, key, true, true);
+	}
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		bool add_helper = false;
+
+		/* Packets starting a new connection must be NATted before the
+		 * helper, so that the helper knows about the NAT.  We enforce
+		 * this by delaying both NAT and helper calls for unconfirmed
+		 * connections until the committing CT action.  For later
+		 * packets NAT and Helper may be called in either order.
+		 *
+		 * NAT will be done only if the CT action has NAT, and only
+		 * once per packet (per zone), as guarded by the NAT bits in
+		 * the key->ct_state.
+		 */
+		if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) &&
+		    (nf_ct_is_confirmed(ct) || info->commit)) {
+			int err = ovs_ct_nat(net, key, info, skb, ct, ctinfo);
+
+			err = verdict_to_errno(err);
+			if (err)
+				return err;
+		}
+
+		/* Userspace may decide to perform a ct lookup without a helper
+		 * specified followed by a (recirculate and) commit with one,
+		 * or attach a helper in a later commit.  Therefore, for
+		 * connections which we will commit, we may need to attach
+		 * the helper here.
+		 */
+		if (!nf_ct_is_confirmed(ct) && info->commit &&
+		    info->helper && !nfct_help(ct)) {
+			int err = __nf_ct_try_assign_helper(ct, info->ct,
+							    GFP_ATOMIC);
+			if (err)
+				return err;
+			add_helper = true;
+
+			/* helper installed, add seqadj if NAT is required */
+			if (info->nat && !nfct_seqadj(ct)) {
+				if (!nfct_seqadj_ext_add(ct))
+					return -EINVAL;
+			}
+		}
+
+		/* Call the helper only if:
+		 * - nf_conntrack_in() was executed above ("!cached") or a
+		 *   helper was just attached ("add_helper") for a confirmed
+		 *   connection, or
+		 * - When committing an unconfirmed connection.
+		 */
+		if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
+					      info->commit)) {
+			int err = nf_ct_helper(skb, ct, ctinfo, info->family);
+
+			err = verdict_to_errno(err);
+			if (err)
+				return err;
+		}
+
+		if (nf_ct_protonum(ct) == IPPROTO_TCP &&
+		    nf_ct_is_confirmed(ct) && nf_conntrack_tcp_established(ct)) {
+			/* Be liberal for tcp packets so that out-of-window
+			 * packets are not marked invalid.
+			 */
+			nf_ct_set_tcp_be_liberal(ct);
+		}
+
+		nf_conn_act_ct_ext_fill(skb, ct, ctinfo);
+	}
+
+	return 0;
+}
+
+/* Lookup connection and read fields into key. */
+static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
+			 const struct ovs_conntrack_info *info,
+			 struct sk_buff *skb)
+{
+	struct nf_conn *ct;
+	int err;
+
+	err = __ovs_ct_lookup(net, key, info, skb);
+	if (err)
+		return err;
+
+	ct = (struct nf_conn *)skb_nfct(skb);
+	if (ct)
+		nf_ct_deliver_cached_events(ct);
+
+	return 0;
+}
+
+static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
+{
+	size_t i;
+
+	for (i = 0; i < OVS_CT_LABELS_LEN_32; i++)
+		if (labels->ct_labels_32[i])
+			return true;
+
+	return false;
+}
+
+#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+static struct hlist_head *ct_limit_hash_bucket(
+	const struct ovs_ct_limit_info *info, u16 zone)
+{
+	return &info->limits[zone & (CT_LIMIT_HASH_BUCKETS - 1)];
+}
+
+/* Call with ovs_mutex */
+static void ct_limit_set(const struct ovs_ct_limit_info *info,
+			 struct ovs_ct_limit *new_ct_limit)
+{
+	struct ovs_ct_limit *ct_limit;
+	struct hlist_head *head;
+
+	head = ct_limit_hash_bucket(info, new_ct_limit->zone);
+	hlist_for_each_entry_rcu(ct_limit, head, hlist_node) {
+		if (ct_limit->zone == new_ct_limit->zone) {
+			hlist_replace_rcu(&ct_limit->hlist_node,
+					  &new_ct_limit->hlist_node);
+			kfree_rcu(ct_limit, rcu);
+			return;
+		}
+	}
+
+	hlist_add_head_rcu(&new_ct_limit->hlist_node, head);
+}
+
+/* Call with ovs_mutex */
+static void ct_limit_del(const struct ovs_ct_limit_info *info, u16 zone)
+{
+	struct ovs_ct_limit *ct_limit;
+	struct hlist_head *head;
+	struct hlist_node *n;
+
+	head = ct_limit_hash_bucket(info, zone);
+	hlist_for_each_entry_safe(ct_limit, n, head, hlist_node) {
+		if (ct_limit->zone == zone) {
+			hlist_del_rcu(&ct_limit->hlist_node);
+			kfree_rcu(ct_limit, rcu);
+			return;
+		}
+	}
+}
+
+/* Call with RCU read lock */
+static u32 ct_limit_get(const struct ovs_ct_limit_info *info, u16 zone)
+{
+	struct ovs_ct_limit *ct_limit;
+	struct hlist_head *head;
+
+	head = ct_limit_hash_bucket(info, zone);
+	hlist_for_each_entry_rcu(ct_limit, head, hlist_node) {
+		if (ct_limit->zone == zone)
+			return ct_limit->limit;
+	}
+
+	return info->default_limit;
+}
+
+static int ovs_ct_check_limit(struct net *net,
+			      const struct sk_buff *skb,
+			      const struct ovs_conntrack_info *info)
+{
+	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+	const struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
+	u32 per_zone_limit, connections;
+	u32 conncount_key;
+
+	conncount_key = info->zone.id;
+
+	per_zone_limit = ct_limit_get(ct_limit_info, info->zone.id);
+	if (per_zone_limit == OVS_CT_LIMIT_UNLIMITED)
+		return 0;
+
+	connections = nf_conncount_count_skb(net, skb, info->family,
+					     ct_limit_info->data,
+					     &conncount_key);
+	if (connections > per_zone_limit)
+		return -ENOMEM;
+
+	return 0;
+}
+#endif
+
+/* Lookup connection and confirm if unconfirmed. */
+static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
+			 const struct ovs_conntrack_info *info,
+			 struct sk_buff *skb)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	int err;
+
+	err = __ovs_ct_lookup(net, key, info, skb);
+	if (err)
+		return err;
+
+	/* The connection could be invalid, in which case this is a no-op.*/
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		return 0;
+
+#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+	if (static_branch_unlikely(&ovs_ct_limit_enabled)) {
+		if (!nf_ct_is_confirmed(ct)) {
+			err = ovs_ct_check_limit(net, skb, info);
+			if (err) {
+				net_warn_ratelimited("openvswitch: zone: %u "
+					"exceeds conntrack limit\n",
+					info->zone.id);
+				return err;
+			}
+		}
+	}
+#endif
+
+	/* Set the conntrack event mask if given.  NEW and DELETE events have
+	 * their own groups, but the NFNLGRP_CONNTRACK_UPDATE group listener
+	 * typically would receive many kinds of updates.  Setting the event
+	 * mask allows those events to be filtered.  The set event mask will
+	 * remain in effect for the lifetime of the connection unless changed
+	 * by a further CT action with both the commit flag and the eventmask
+	 * option. */
+	if (info->have_eventmask) {
+		struct nf_conntrack_ecache *cache = nf_ct_ecache_find(ct);
+
+		if (cache)
+			cache->ctmask = info->eventmask;
+	}
+
+	/* Apply changes before confirming the connection so that the initial
+	 * conntrack NEW netlink event carries the values given in the CT
+	 * action.
+	 */
+	if (info->mark.mask) {
+		err = ovs_ct_set_mark(ct, key, info->mark.value,
+				      info->mark.mask);
+		if (err)
+			return err;
+	}
+	if (!nf_ct_is_confirmed(ct)) {
+		err = ovs_ct_init_labels(ct, key, &info->labels.value,
+					 &info->labels.mask);
+		if (err)
+			return err;
+
+		nf_conn_act_ct_ext_add(skb, ct, ctinfo);
+	} else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+		   labels_nonzero(&info->labels.mask)) {
+		err = ovs_ct_set_labels(ct, key, &info->labels.value,
+					&info->labels.mask);
+		if (err)
+			return err;
+	}
+	/* This will take care of sending queued events even if the connection
+	 * is already confirmed.
+	 */
+	err = nf_conntrack_confirm(skb);
+
+	return verdict_to_errno(err);
+}
+
+/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
+ * value if 'skb' is freed.
+ */
+int ovs_ct_execute(struct net *net, struct sk_buff *skb,
+		   struct sw_flow_key *key,
+		   const struct ovs_conntrack_info *info)
+{
+	int nh_ofs;
+	int err;
+
+	/* The conntrack module expects to be working at L3. */
+	nh_ofs = skb_network_offset(skb);
+	skb_pull_rcsum(skb, nh_ofs);
+
+	err = nf_ct_skb_network_trim(skb, info->family);
+	if (err) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
+		err = ovs_ct_handle_fragments(net, key, info->zone.id,
+					      info->family, skb);
+		if (err)
+			return err;
+	}
+
+	if (info->commit)
+		err = ovs_ct_commit(net, key, info, skb);
+	else
+		err = ovs_ct_lookup(net, key, info, skb);
+
+	/* conntrack core returned NF_STOLEN */
+	if (err == -EINPROGRESS)
+		return err;
+
+	skb_push_rcsum(skb, nh_ofs);
+	if (err)
+		ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK);
+	return err;
+}
+
+int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+
+	nf_ct_put(ct);
+	nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+
+	if (key)
+		ovs_ct_fill_key(skb, key, false);
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+static int parse_nat(const struct nlattr *attr,
+		     struct ovs_conntrack_info *info, bool log)
+{
+	struct nlattr *a;
+	int rem;
+	bool have_ip_max = false;
+	bool have_proto_max = false;
+	bool ip_vers = (info->family == NFPROTO_IPV6);
+
+	nla_for_each_nested(a, attr, rem) {
+		static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = {
+			[OVS_NAT_ATTR_SRC] = {0, 0},
+			[OVS_NAT_ATTR_DST] = {0, 0},
+			[OVS_NAT_ATTR_IP_MIN] = {sizeof(struct in_addr),
+						 sizeof(struct in6_addr)},
+			[OVS_NAT_ATTR_IP_MAX] = {sizeof(struct in_addr),
+						 sizeof(struct in6_addr)},
+			[OVS_NAT_ATTR_PROTO_MIN] = {sizeof(u16), sizeof(u16)},
+			[OVS_NAT_ATTR_PROTO_MAX] = {sizeof(u16), sizeof(u16)},
+			[OVS_NAT_ATTR_PERSISTENT] = {0, 0},
+			[OVS_NAT_ATTR_PROTO_HASH] = {0, 0},
+			[OVS_NAT_ATTR_PROTO_RANDOM] = {0, 0},
+		};
+		int type = nla_type(a);
+
+		if (type > OVS_NAT_ATTR_MAX) {
+			OVS_NLERR(log, "Unknown NAT attribute (type=%d, max=%d)",
+				  type, OVS_NAT_ATTR_MAX);
+			return -EINVAL;
+		}
+
+		if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) {
+			OVS_NLERR(log, "NAT attribute type %d has unexpected length (%d != %d)",
+				  type, nla_len(a),
+				  ovs_nat_attr_lens[type][ip_vers]);
+			return -EINVAL;
+		}
+
+		switch (type) {
+		case OVS_NAT_ATTR_SRC:
+		case OVS_NAT_ATTR_DST:
+			if (info->nat) {
+				OVS_NLERR(log, "Only one type of NAT may be specified");
+				return -ERANGE;
+			}
+			info->nat |= OVS_CT_NAT;
+			info->nat |= ((type == OVS_NAT_ATTR_SRC)
+					? OVS_CT_SRC_NAT : OVS_CT_DST_NAT);
+			break;
+
+		case OVS_NAT_ATTR_IP_MIN:
+			nla_memcpy(&info->range.min_addr, a,
+				   sizeof(info->range.min_addr));
+			info->range.flags |= NF_NAT_RANGE_MAP_IPS;
+			break;
+
+		case OVS_NAT_ATTR_IP_MAX:
+			have_ip_max = true;
+			nla_memcpy(&info->range.max_addr, a,
+				   sizeof(info->range.max_addr));
+			info->range.flags |= NF_NAT_RANGE_MAP_IPS;
+			break;
+
+		case OVS_NAT_ATTR_PROTO_MIN:
+			info->range.min_proto.all = htons(nla_get_u16(a));
+			info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+			break;
+
+		case OVS_NAT_ATTR_PROTO_MAX:
+			have_proto_max = true;
+			info->range.max_proto.all = htons(nla_get_u16(a));
+			info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+			break;
+
+		case OVS_NAT_ATTR_PERSISTENT:
+			info->range.flags |= NF_NAT_RANGE_PERSISTENT;
+			break;
+
+		case OVS_NAT_ATTR_PROTO_HASH:
+			info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM;
+			break;
+
+		case OVS_NAT_ATTR_PROTO_RANDOM:
+			info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY;
+			break;
+
+		default:
+			OVS_NLERR(log, "Unknown nat attribute (%d)", type);
+			return -EINVAL;
+		}
+	}
+
+	if (rem > 0) {
+		OVS_NLERR(log, "NAT attribute has %d unknown bytes", rem);
+		return -EINVAL;
+	}
+	if (!info->nat) {
+		/* Do not allow flags if no type is given. */
+		if (info->range.flags) {
+			OVS_NLERR(log,
+				  "NAT flags may be given only when NAT range (SRC or DST) is also specified."
+				  );
+			return -EINVAL;
+		}
+		info->nat = OVS_CT_NAT;   /* NAT existing connections. */
+	} else if (!info->commit) {
+		OVS_NLERR(log,
+			  "NAT attributes may be specified only when CT COMMIT flag is also specified."
+			  );
+		return -EINVAL;
+	}
+	/* Allow missing IP_MAX. */
+	if (info->range.flags & NF_NAT_RANGE_MAP_IPS && !have_ip_max) {
+		memcpy(&info->range.max_addr, &info->range.min_addr,
+		       sizeof(info->range.max_addr));
+	}
+	/* Allow missing PROTO_MAX. */
+	if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED &&
+	    !have_proto_max) {
+		info->range.max_proto.all = info->range.min_proto.all;
+	}
+	return 0;
+}
+#endif
+
+static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
+	[OVS_CT_ATTR_COMMIT]	= { .minlen = 0, .maxlen = 0 },
+	[OVS_CT_ATTR_FORCE_COMMIT]	= { .minlen = 0, .maxlen = 0 },
+	[OVS_CT_ATTR_ZONE]	= { .minlen = sizeof(u16),
+				    .maxlen = sizeof(u16) },
+	[OVS_CT_ATTR_MARK]	= { .minlen = sizeof(struct md_mark),
+				    .maxlen = sizeof(struct md_mark) },
+	[OVS_CT_ATTR_LABELS]	= { .minlen = sizeof(struct md_labels),
+				    .maxlen = sizeof(struct md_labels) },
+	[OVS_CT_ATTR_HELPER]	= { .minlen = 1,
+				    .maxlen = NF_CT_HELPER_NAME_LEN },
+#if IS_ENABLED(CONFIG_NF_NAT)
+	/* NAT length is checked when parsing the nested attributes. */
+	[OVS_CT_ATTR_NAT]	= { .minlen = 0, .maxlen = INT_MAX },
+#endif
+	[OVS_CT_ATTR_EVENTMASK]	= { .minlen = sizeof(u32),
+				    .maxlen = sizeof(u32) },
+	[OVS_CT_ATTR_TIMEOUT] = { .minlen = 1,
+				  .maxlen = CTNL_TIMEOUT_NAME_MAX },
+};
+
+static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
+		    const char **helper, bool log)
+{
+	struct nlattr *a;
+	int rem;
+
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+		int maxlen;
+		int minlen;
+
+		if (type > OVS_CT_ATTR_MAX) {
+			OVS_NLERR(log,
+				  "Unknown conntrack attr (type=%d, max=%d)",
+				  type, OVS_CT_ATTR_MAX);
+			return -EINVAL;
+		}
+
+		maxlen = ovs_ct_attr_lens[type].maxlen;
+		minlen = ovs_ct_attr_lens[type].minlen;
+		if (nla_len(a) < minlen || nla_len(a) > maxlen) {
+			OVS_NLERR(log,
+				  "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
+				  type, nla_len(a), maxlen);
+			return -EINVAL;
+		}
+
+		switch (type) {
+		case OVS_CT_ATTR_FORCE_COMMIT:
+			info->force = true;
+			fallthrough;
+		case OVS_CT_ATTR_COMMIT:
+			info->commit = true;
+			break;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+		case OVS_CT_ATTR_ZONE:
+			info->zone.id = nla_get_u16(a);
+			break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_MARK
+		case OVS_CT_ATTR_MARK: {
+			struct md_mark *mark = nla_data(a);
+
+			if (!mark->mask) {
+				OVS_NLERR(log, "ct_mark mask cannot be 0");
+				return -EINVAL;
+			}
+			info->mark = *mark;
+			break;
+		}
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+		case OVS_CT_ATTR_LABELS: {
+			struct md_labels *labels = nla_data(a);
+
+			if (!labels_nonzero(&labels->mask)) {
+				OVS_NLERR(log, "ct_labels mask cannot be 0");
+				return -EINVAL;
+			}
+			info->labels = *labels;
+			break;
+		}
+#endif
+		case OVS_CT_ATTR_HELPER:
+			*helper = nla_data(a);
+			if (!string_is_terminated(*helper, nla_len(a))) {
+				OVS_NLERR(log, "Invalid conntrack helper");
+				return -EINVAL;
+			}
+			break;
+#if IS_ENABLED(CONFIG_NF_NAT)
+		case OVS_CT_ATTR_NAT: {
+			int err = parse_nat(a, info, log);
+
+			if (err)
+				return err;
+			break;
+		}
+#endif
+		case OVS_CT_ATTR_EVENTMASK:
+			info->have_eventmask = true;
+			info->eventmask = nla_get_u32(a);
+			break;
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+		case OVS_CT_ATTR_TIMEOUT:
+			memcpy(info->timeout, nla_data(a), nla_len(a));
+			if (!string_is_terminated(info->timeout, nla_len(a))) {
+				OVS_NLERR(log, "Invalid conntrack timeout");
+				return -EINVAL;
+			}
+			break;
+#endif
+
+		default:
+			OVS_NLERR(log, "Unknown conntrack attr (%d)",
+				  type);
+			return -EINVAL;
+		}
+	}
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	if (!info->commit && info->mark.mask) {
+		OVS_NLERR(log,
+			  "Setting conntrack mark requires 'commit' flag.");
+		return -EINVAL;
+	}
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+	if (!info->commit && labels_nonzero(&info->labels.mask)) {
+		OVS_NLERR(log,
+			  "Setting conntrack labels requires 'commit' flag.");
+		return -EINVAL;
+	}
+#endif
+	if (rem > 0) {
+		OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
+{
+	if (attr == OVS_KEY_ATTR_CT_STATE)
+		return true;
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
+	    attr == OVS_KEY_ATTR_CT_ZONE)
+		return true;
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+	    attr == OVS_KEY_ATTR_CT_MARK)
+		return true;
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+	    attr == OVS_KEY_ATTR_CT_LABELS) {
+		struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+		return ovs_net->xt_label;
+	}
+
+	return false;
+}
+
+int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
+		       const struct sw_flow_key *key,
+		       struct sw_flow_actions **sfa,  bool log)
+{
+	struct ovs_conntrack_info ct_info;
+	const char *helper = NULL;
+	u16 family;
+	int err;
+
+	family = key_to_nfproto(key);
+	if (family == NFPROTO_UNSPEC) {
+		OVS_NLERR(log, "ct family unspecified");
+		return -EINVAL;
+	}
+
+	memset(&ct_info, 0, sizeof(ct_info));
+	ct_info.family = family;
+
+	nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID,
+			NF_CT_DEFAULT_ZONE_DIR, 0);
+
+	err = parse_ct(attr, &ct_info, &helper, log);
+	if (err)
+		return err;
+
+	/* Set up template for tracking connections in specific zones. */
+	ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL);
+	if (!ct_info.ct) {
+		OVS_NLERR(log, "Failed to allocate conntrack template");
+		return -ENOMEM;
+	}
+
+	if (ct_info.timeout[0]) {
+		if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
+				      ct_info.timeout))
+			OVS_NLERR(log,
+				  "Failed to associated timeout policy '%s'",
+				  ct_info.timeout);
+		else
+			ct_info.nf_ct_timeout = rcu_dereference(
+				nf_ct_timeout_find(ct_info.ct)->timeout);
+
+	}
+
+	if (helper) {
+		err = nf_ct_add_helper(ct_info.ct, helper, ct_info.family,
+				       key->ip.proto, ct_info.nat, &ct_info.helper);
+		if (err) {
+			OVS_NLERR(log, "Failed to add %s helper %d", helper, err);
+			goto err_free_ct;
+		}
+	}
+
+	err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
+				 sizeof(ct_info), log);
+	if (err)
+		goto err_free_ct;
+
+	if (ct_info.commit)
+		__set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
+	return 0;
+err_free_ct:
+	__ovs_ct_free_action(&ct_info);
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info,
+			       struct sk_buff *skb)
+{
+	struct nlattr *start;
+
+	start = nla_nest_start_noflag(skb, OVS_CT_ATTR_NAT);
+	if (!start)
+		return false;
+
+	if (info->nat & OVS_CT_SRC_NAT) {
+		if (nla_put_flag(skb, OVS_NAT_ATTR_SRC))
+			return false;
+	} else if (info->nat & OVS_CT_DST_NAT) {
+		if (nla_put_flag(skb, OVS_NAT_ATTR_DST))
+			return false;
+	} else {
+		goto out;
+	}
+
+	if (info->range.flags & NF_NAT_RANGE_MAP_IPS) {
+		if (IS_ENABLED(CONFIG_NF_NAT) &&
+		    info->family == NFPROTO_IPV4) {
+			if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN,
+					    info->range.min_addr.ip) ||
+			    (info->range.max_addr.ip
+			     != info->range.min_addr.ip &&
+			     (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX,
+					      info->range.max_addr.ip))))
+				return false;
+		} else if (IS_ENABLED(CONFIG_IPV6) &&
+			   info->family == NFPROTO_IPV6) {
+			if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN,
+					     &info->range.min_addr.in6) ||
+			    (memcmp(&info->range.max_addr.in6,
+				    &info->range.min_addr.in6,
+				    sizeof(info->range.max_addr.in6)) &&
+			     (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX,
+					       &info->range.max_addr.in6))))
+				return false;
+		} else {
+			return false;
+		}
+	}
+	if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED &&
+	    (nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN,
+			 ntohs(info->range.min_proto.all)) ||
+	     (info->range.max_proto.all != info->range.min_proto.all &&
+	      nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX,
+			  ntohs(info->range.max_proto.all)))))
+		return false;
+
+	if (info->range.flags & NF_NAT_RANGE_PERSISTENT &&
+	    nla_put_flag(skb, OVS_NAT_ATTR_PERSISTENT))
+		return false;
+	if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM &&
+	    nla_put_flag(skb, OVS_NAT_ATTR_PROTO_HASH))
+		return false;
+	if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY &&
+	    nla_put_flag(skb, OVS_NAT_ATTR_PROTO_RANDOM))
+		return false;
+out:
+	nla_nest_end(skb, start);
+
+	return true;
+}
+#endif
+
+int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
+			  struct sk_buff *skb)
+{
+	struct nlattr *start;
+
+	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CT);
+	if (!start)
+		return -EMSGSIZE;
+
+	if (ct_info->commit && nla_put_flag(skb, ct_info->force
+					    ? OVS_CT_ATTR_FORCE_COMMIT
+					    : OVS_CT_ATTR_COMMIT))
+		return -EMSGSIZE;
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
+	    nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
+		return -EMSGSIZE;
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
+	    nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
+		    &ct_info->mark))
+		return -EMSGSIZE;
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+	    labels_nonzero(&ct_info->labels.mask) &&
+	    nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
+		    &ct_info->labels))
+		return -EMSGSIZE;
+	if (ct_info->helper) {
+		if (nla_put_string(skb, OVS_CT_ATTR_HELPER,
+				   ct_info->helper->name))
+			return -EMSGSIZE;
+	}
+	if (ct_info->have_eventmask &&
+	    nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask))
+		return -EMSGSIZE;
+	if (ct_info->timeout[0]) {
+		if (nla_put_string(skb, OVS_CT_ATTR_TIMEOUT, ct_info->timeout))
+			return -EMSGSIZE;
+	}
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+	if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))
+		return -EMSGSIZE;
+#endif
+	nla_nest_end(skb, start);
+
+	return 0;
+}
+
+void ovs_ct_free_action(const struct nlattr *a)
+{
+	struct ovs_conntrack_info *ct_info = nla_data(a);
+
+	__ovs_ct_free_action(ct_info);
+}
+
+static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
+{
+	if (ct_info->helper) {
+#if IS_ENABLED(CONFIG_NF_NAT)
+		if (ct_info->nat)
+			nf_nat_helper_put(ct_info->helper);
+#endif
+		nf_conntrack_helper_put(ct_info->helper);
+	}
+	if (ct_info->ct) {
+		if (ct_info->timeout[0])
+			nf_ct_destroy_timeout(ct_info->ct);
+		nf_ct_tmpl_free(ct_info->ct);
+	}
+}
+
+#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+static int ovs_ct_limit_init(struct net *net, struct ovs_net *ovs_net)
+{
+	int i, err;
+
+	ovs_net->ct_limit_info = kmalloc(sizeof(*ovs_net->ct_limit_info),
+					 GFP_KERNEL);
+	if (!ovs_net->ct_limit_info)
+		return -ENOMEM;
+
+	ovs_net->ct_limit_info->default_limit = OVS_CT_LIMIT_DEFAULT;
+	ovs_net->ct_limit_info->limits =
+		kmalloc_array(CT_LIMIT_HASH_BUCKETS, sizeof(struct hlist_head),
+			      GFP_KERNEL);
+	if (!ovs_net->ct_limit_info->limits) {
+		kfree(ovs_net->ct_limit_info);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < CT_LIMIT_HASH_BUCKETS; i++)
+		INIT_HLIST_HEAD(&ovs_net->ct_limit_info->limits[i]);
+
+	ovs_net->ct_limit_info->data = nf_conncount_init(net, sizeof(u32));
+
+	if (IS_ERR(ovs_net->ct_limit_info->data)) {
+		err = PTR_ERR(ovs_net->ct_limit_info->data);
+		kfree(ovs_net->ct_limit_info->limits);
+		kfree(ovs_net->ct_limit_info);
+		pr_err("openvswitch: failed to init nf_conncount %d\n", err);
+		return err;
+	}
+	return 0;
+}
+
+static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net)
+{
+	const struct ovs_ct_limit_info *info = ovs_net->ct_limit_info;
+	int i;
+
+	nf_conncount_destroy(net, info->data);
+	for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
+		struct hlist_head *head = &info->limits[i];
+		struct ovs_ct_limit *ct_limit;
+		struct hlist_node *next;
+
+		hlist_for_each_entry_safe(ct_limit, next, head, hlist_node)
+			kfree_rcu(ct_limit, rcu);
+	}
+	kfree(info->limits);
+	kfree(info);
+}
+
+static struct sk_buff *
+ovs_ct_limit_cmd_reply_start(struct genl_info *info, u8 cmd,
+			     struct ovs_header **ovs_reply_header)
+{
+	struct ovs_header *ovs_header = genl_info_userhdr(info);
+	struct sk_buff *skb;
+
+	skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	*ovs_reply_header = genlmsg_put(skb, info->snd_portid,
+					info->snd_seq,
+					&dp_ct_limit_genl_family, 0, cmd);
+
+	if (!*ovs_reply_header) {
+		nlmsg_free(skb);
+		return ERR_PTR(-EMSGSIZE);
+	}
+	(*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
+
+	return skb;
+}
+
+static bool check_zone_id(int zone_id, u16 *pzone)
+{
+	if (zone_id >= 0 && zone_id <= 65535) {
+		*pzone = (u16)zone_id;
+		return true;
+	}
+	return false;
+}
+
+static int ovs_ct_limit_set_zone_limit(struct nlattr *nla_zone_limit,
+				       struct ovs_ct_limit_info *info)
+{
+	struct ovs_zone_limit *zone_limit;
+	int rem;
+	u16 zone;
+
+	rem = NLA_ALIGN(nla_len(nla_zone_limit));
+	zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit);
+
+	while (rem >= sizeof(*zone_limit)) {
+		if (unlikely(zone_limit->zone_id ==
+				OVS_ZONE_LIMIT_DEFAULT_ZONE)) {
+			ovs_lock();
+			info->default_limit = zone_limit->limit;
+			ovs_unlock();
+		} else if (unlikely(!check_zone_id(
+				zone_limit->zone_id, &zone))) {
+			OVS_NLERR(true, "zone id is out of range");
+		} else {
+			struct ovs_ct_limit *ct_limit;
+
+			ct_limit = kmalloc(sizeof(*ct_limit),
+					   GFP_KERNEL_ACCOUNT);
+			if (!ct_limit)
+				return -ENOMEM;
+
+			ct_limit->zone = zone;
+			ct_limit->limit = zone_limit->limit;
+
+			ovs_lock();
+			ct_limit_set(info, ct_limit);
+			ovs_unlock();
+		}
+		rem -= NLA_ALIGN(sizeof(*zone_limit));
+		zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit +
+				NLA_ALIGN(sizeof(*zone_limit)));
+	}
+
+	if (rem)
+		OVS_NLERR(true, "set zone limit has %d unknown bytes", rem);
+
+	return 0;
+}
+
+static int ovs_ct_limit_del_zone_limit(struct nlattr *nla_zone_limit,
+				       struct ovs_ct_limit_info *info)
+{
+	struct ovs_zone_limit *zone_limit;
+	int rem;
+	u16 zone;
+
+	rem = NLA_ALIGN(nla_len(nla_zone_limit));
+	zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit);
+
+	while (rem >= sizeof(*zone_limit)) {
+		if (unlikely(zone_limit->zone_id ==
+				OVS_ZONE_LIMIT_DEFAULT_ZONE)) {
+			ovs_lock();
+			info->default_limit = OVS_CT_LIMIT_DEFAULT;
+			ovs_unlock();
+		} else if (unlikely(!check_zone_id(
+				zone_limit->zone_id, &zone))) {
+			OVS_NLERR(true, "zone id is out of range");
+		} else {
+			ovs_lock();
+			ct_limit_del(info, zone);
+			ovs_unlock();
+		}
+		rem -= NLA_ALIGN(sizeof(*zone_limit));
+		zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit +
+				NLA_ALIGN(sizeof(*zone_limit)));
+	}
+
+	if (rem)
+		OVS_NLERR(true, "del zone limit has %d unknown bytes", rem);
+
+	return 0;
+}
+
+static int ovs_ct_limit_get_default_limit(struct ovs_ct_limit_info *info,
+					  struct sk_buff *reply)
+{
+	struct ovs_zone_limit zone_limit = {
+		.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE,
+		.limit   = info->default_limit,
+	};
+
+	return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit);
+}
+
+static int __ovs_ct_limit_get_zone_limit(struct net *net,
+					 struct nf_conncount_data *data,
+					 u16 zone_id, u32 limit,
+					 struct sk_buff *reply)
+{
+	struct nf_conntrack_zone ct_zone;
+	struct ovs_zone_limit zone_limit;
+	u32 conncount_key = zone_id;
+
+	zone_limit.zone_id = zone_id;
+	zone_limit.limit = limit;
+	nf_ct_zone_init(&ct_zone, zone_id, NF_CT_DEFAULT_ZONE_DIR, 0);
+
+	zone_limit.count = nf_conncount_count_skb(net, NULL, 0, data,
+						  &conncount_key);
+	return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit);
+}
+
+static int ovs_ct_limit_get_zone_limit(struct net *net,
+				       struct nlattr *nla_zone_limit,
+				       struct ovs_ct_limit_info *info,
+				       struct sk_buff *reply)
+{
+	struct ovs_zone_limit *zone_limit;
+	int rem, err;
+	u32 limit;
+	u16 zone;
+
+	rem = NLA_ALIGN(nla_len(nla_zone_limit));
+	zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit);
+
+	while (rem >= sizeof(*zone_limit)) {
+		if (unlikely(zone_limit->zone_id ==
+				OVS_ZONE_LIMIT_DEFAULT_ZONE)) {
+			err = ovs_ct_limit_get_default_limit(info, reply);
+			if (err)
+				return err;
+		} else if (unlikely(!check_zone_id(zone_limit->zone_id,
+							&zone))) {
+			OVS_NLERR(true, "zone id is out of range");
+		} else {
+			rcu_read_lock();
+			limit = ct_limit_get(info, zone);
+			rcu_read_unlock();
+
+			err = __ovs_ct_limit_get_zone_limit(
+				net, info->data, zone, limit, reply);
+			if (err)
+				return err;
+		}
+		rem -= NLA_ALIGN(sizeof(*zone_limit));
+		zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit +
+				NLA_ALIGN(sizeof(*zone_limit)));
+	}
+
+	if (rem)
+		OVS_NLERR(true, "get zone limit has %d unknown bytes", rem);
+
+	return 0;
+}
+
+static int ovs_ct_limit_get_all_zone_limit(struct net *net,
+					   struct ovs_ct_limit_info *info,
+					   struct sk_buff *reply)
+{
+	struct ovs_ct_limit *ct_limit;
+	struct hlist_head *head;
+	int i, err = 0;
+
+	err = ovs_ct_limit_get_default_limit(info, reply);
+	if (err)
+		return err;
+
+	rcu_read_lock();
+	for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
+		head = &info->limits[i];
+		hlist_for_each_entry_rcu(ct_limit, head, hlist_node) {
+			err = __ovs_ct_limit_get_zone_limit(net, info->data,
+				ct_limit->zone, ct_limit->limit, reply);
+			if (err)
+				goto exit_err;
+		}
+	}
+
+exit_err:
+	rcu_read_unlock();
+	return err;
+}
+
+static int ovs_ct_limit_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct sk_buff *reply;
+	struct ovs_header *ovs_reply_header;
+	struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
+	struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
+	int err;
+
+	reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_SET,
+					     &ovs_reply_header);
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+
+	if (!a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
+		err = -EINVAL;
+		goto exit_err;
+	}
+
+	err = ovs_ct_limit_set_zone_limit(a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT],
+					  ct_limit_info);
+	if (err)
+		goto exit_err;
+
+	static_branch_enable(&ovs_ct_limit_enabled);
+
+	genlmsg_end(reply, ovs_reply_header);
+	return genlmsg_reply(reply, info);
+
+exit_err:
+	nlmsg_free(reply);
+	return err;
+}
+
+static int ovs_ct_limit_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct sk_buff *reply;
+	struct ovs_header *ovs_reply_header;
+	struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
+	struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
+	int err;
+
+	reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_DEL,
+					     &ovs_reply_header);
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+
+	if (!a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
+		err = -EINVAL;
+		goto exit_err;
+	}
+
+	err = ovs_ct_limit_del_zone_limit(a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT],
+					  ct_limit_info);
+	if (err)
+		goto exit_err;
+
+	genlmsg_end(reply, ovs_reply_header);
+	return genlmsg_reply(reply, info);
+
+exit_err:
+	nlmsg_free(reply);
+	return err;
+}
+
+static int ovs_ct_limit_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct nlattr *nla_reply;
+	struct sk_buff *reply;
+	struct ovs_header *ovs_reply_header;
+	struct net *net = sock_net(skb->sk);
+	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+	struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
+	int err;
+
+	reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_GET,
+					     &ovs_reply_header);
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+
+	nla_reply = nla_nest_start_noflag(reply, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
+	if (!nla_reply) {
+		err = -EMSGSIZE;
+		goto exit_err;
+	}
+
+	if (a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
+		err = ovs_ct_limit_get_zone_limit(
+			net, a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT], ct_limit_info,
+			reply);
+		if (err)
+			goto exit_err;
+	} else {
+		err = ovs_ct_limit_get_all_zone_limit(net, ct_limit_info,
+						      reply);
+		if (err)
+			goto exit_err;
+	}
+
+	nla_nest_end(reply, nla_reply);
+	genlmsg_end(reply, ovs_reply_header);
+	return genlmsg_reply(reply, info);
+
+exit_err:
+	nlmsg_free(reply);
+	return err;
+}
+
+static const struct genl_small_ops ct_limit_genl_ops[] = {
+	{ .cmd = OVS_CT_LIMIT_CMD_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+					       * privilege.
+					       */
+		.doit = ovs_ct_limit_cmd_set,
+	},
+	{ .cmd = OVS_CT_LIMIT_CMD_DEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+					       * privilege.
+					       */
+		.doit = ovs_ct_limit_cmd_del,
+	},
+	{ .cmd = OVS_CT_LIMIT_CMD_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags = 0,		  /* OK for unprivileged users. */
+		.doit = ovs_ct_limit_cmd_get,
+	},
+};
+
+static const struct genl_multicast_group ovs_ct_limit_multicast_group = {
+	.name = OVS_CT_LIMIT_MCGROUP,
+};
+
+struct genl_family dp_ct_limit_genl_family __ro_after_init = {
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_CT_LIMIT_FAMILY,
+	.version = OVS_CT_LIMIT_VERSION,
+	.maxattr = OVS_CT_LIMIT_ATTR_MAX,
+	.policy = ct_limit_policy,
+	.netnsok = true,
+	.parallel_ops = true,
+	.small_ops = ct_limit_genl_ops,
+	.n_small_ops = ARRAY_SIZE(ct_limit_genl_ops),
+	.resv_start_op = OVS_CT_LIMIT_CMD_GET + 1,
+	.mcgrps = &ovs_ct_limit_multicast_group,
+	.n_mcgrps = 1,
+	.module = THIS_MODULE,
+};
+#endif
+
+int ovs_ct_init(struct net *net)
+{
+	unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
+	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+	if (nf_connlabels_get(net, n_bits - 1)) {
+		ovs_net->xt_label = false;
+		OVS_NLERR(true, "Failed to set connlabel length");
+	} else {
+		ovs_net->xt_label = true;
+	}
+
+#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+	return ovs_ct_limit_init(net, ovs_net);
+#else
+	return 0;
+#endif
+}
+
+void ovs_ct_exit(struct net *net)
+{
+	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+	ovs_ct_limit_exit(net, ovs_net);
+#endif
+
+	if (ovs_net->xt_label)
+		nf_connlabels_put(net);
+}
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
new file mode 100644
index 000000000000..317e525c8a11
--- /dev/null
+++ b/net/openvswitch/conntrack.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ */
+
+#ifndef OVS_CONNTRACK_H
+#define OVS_CONNTRACK_H 1
+
+#include "flow.h"
+
+struct ovs_conntrack_info;
+struct ovs_ct_limit_info;
+enum ovs_key_attr;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+int ovs_ct_init(struct net *);
+void ovs_ct_exit(struct net *);
+bool ovs_ct_verify(struct net *, enum ovs_key_attr attr);
+int ovs_ct_copy_action(struct net *, const struct nlattr *,
+		       const struct sw_flow_key *, struct sw_flow_actions **,
+		       bool log);
+int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *);
+
+int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
+		   const struct ovs_conntrack_info *);
+int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key);
+
+void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key,
+		     bool post_ct);
+int ovs_ct_put_key(const struct sw_flow_key *swkey,
+		   const struct sw_flow_key *output, struct sk_buff *skb);
+void ovs_ct_free_action(const struct nlattr *a);
+
+#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
+			   OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \
+			   OVS_CS_F_INVALID | OVS_CS_F_TRACKED | \
+			   OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT)
+#else
+#include <linux/errno.h>
+
+static inline int ovs_ct_init(struct net *net) { return 0; }
+
+static inline void ovs_ct_exit(struct net *net) { }
+
+static inline bool ovs_ct_verify(struct net *net, int attr)
+{
+	return false;
+}
+
+static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla,
+				     const struct sw_flow_key *key,
+				     struct sw_flow_actions **acts, bool log)
+{
+	return -ENOTSUPP;
+}
+
+static inline int ovs_ct_action_to_attr(const struct ovs_conntrack_info *info,
+					struct sk_buff *skb)
+{
+	return -ENOTSUPP;
+}
+
+static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
+				 struct sw_flow_key *key,
+				 const struct ovs_conntrack_info *info)
+{
+	kfree_skb(skb);
+	return -ENOTSUPP;
+}
+
+static inline int ovs_ct_clear(struct sk_buff *skb,
+			       struct sw_flow_key *key)
+{
+	return -ENOTSUPP;
+}
+
+static inline void ovs_ct_fill_key(const struct sk_buff *skb,
+				   struct sw_flow_key *key,
+				   bool post_ct)
+{
+	key->ct_state = 0;
+	key->ct_zone = 0;
+	key->ct.mark = 0;
+	memset(&key->ct.labels, 0, sizeof(key->ct.labels));
+	/* Clear 'ct_orig_proto' to mark the non-existence of original
+	 * direction key fields.
+	 */
+	key->ct_orig_proto = 0;
+}
+
+static inline int ovs_ct_put_key(const struct sw_flow_key *swkey,
+				 const struct sw_flow_key *output,
+				 struct sk_buff *skb)
+{
+	return 0;
+}
+
+static inline void ovs_ct_free_action(const struct nlattr *a) { }
+
+#define CT_SUPPORTED_MASK 0
+#endif /* CONFIG_NF_CONNTRACK */
+
+#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+extern struct genl_family dp_ct_limit_genl_family;
+#endif
+#endif /* ovs_conntrack.h */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f7e3a0d84c40..d5b6e2002bc1 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
+ * Copyright (c) 2007-2014 Nicira, Inc.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -28,7 +15,6 @@
 #include <linux/delay.h>
 #include <linux/time.h>
 #include <linux/etherdevice.h>
-#include <linux/genetlink.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <linux/mutex.h>
@@ -44,32 +30,58 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/inetdevice.h>
 #include <linux/list.h>
-#include <linux/lockdep.h>
 #include <linux/openvswitch.h>
 #include <linux/rculist.h>
 #include <linux/dmi.h>
-#include <linux/workqueue.h>
 #include <net/genetlink.h>
+#include <net/gso.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/pkt_cls.h>
 
 #include "datapath.h"
+#include "drop.h"
 #include "flow.h"
+#include "flow_table.h"
+#include "flow_netlink.h"
+#include "meter.h"
+#include "openvswitch_trace.h"
 #include "vport-internal_dev.h"
 #include "vport-netdev.h"
 
+unsigned int ovs_net_id __read_mostly;
+
+static struct genl_family dp_packet_genl_family;
+static struct genl_family dp_flow_genl_family;
+static struct genl_family dp_datapath_genl_family;
+
+static const struct nla_policy flow_policy[];
+
+static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
+	.name = OVS_FLOW_MCGROUP,
+};
+
+static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
+	.name = OVS_DATAPATH_MCGROUP,
+};
 
-#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
-static void rehash_flow_table(struct work_struct *work);
-static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
+static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
+	.name = OVS_VPORT_MCGROUP,
+};
 
-int ovs_net_id __read_mostly;
+/* Check if need to build a reply message.
+ * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
+static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
+			    unsigned int group)
+{
+	return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
+	       genl_has_listeners(family, genl_info_net(info), group);
+}
 
-static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
-		       struct genl_multicast_group *grp)
+static void ovs_notify(struct genl_family *family,
+		       struct sk_buff *skb, struct genl_info *info)
 {
-	genl_notify(skb, genl_info_net(info), info->snd_portid,
-		    grp->id, info->nlhdr, GFP_KERNEL);
+	genl_notify(family, skb, info, 0, GFP_KERNEL);
 }
 
 /**
@@ -112,38 +124,27 @@ int lockdep_ovsl_is_held(void)
 #endif
 
 static struct vport *new_vport(const struct vport_parms *);
-static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
-			     const struct dp_upcall_info *);
-static int queue_userspace_packet(struct net *, int dp_ifindex,
-				  struct sk_buff *,
-				  const struct dp_upcall_info *);
+static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
+			     const struct sw_flow_key *,
+			     const struct dp_upcall_info *,
+			     uint32_t cutlen);
+static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
+				  const struct sw_flow_key *,
+				  const struct dp_upcall_info *,
+				  uint32_t cutlen);
 
-/* Must be called with rcu_read_lock or ovs_mutex. */
-static struct datapath *get_dp(struct net *net, int dp_ifindex)
-{
-	struct datapath *dp = NULL;
-	struct net_device *dev;
+static void ovs_dp_masks_rebalance(struct work_struct *work);
 
-	rcu_read_lock();
-	dev = dev_get_by_index_rcu(net, dp_ifindex);
-	if (dev) {
-		struct vport *vport = ovs_internal_dev_get_vport(dev);
-		if (vport)
-			dp = vport->dp;
-	}
-	rcu_read_unlock();
-
-	return dp;
-}
+static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
 
 /* Must be called with rcu_read_lock or ovs_mutex. */
 const char *ovs_dp_name(const struct datapath *dp)
 {
 	struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
-	return vport->ops->get_name(vport);
+	return ovs_vport_name(vport);
 }
 
-static int get_dpifindex(struct datapath *dp)
+static int get_dpifindex(const struct datapath *dp)
 {
 	struct vport *local;
 	int ifindex;
@@ -152,7 +153,7 @@ static int get_dpifindex(struct datapath *dp)
 
 	local = ovs_vport_rcu(dp, OVSP_LOCAL);
 	if (local)
-		ifindex = netdev_vport_priv(local)->dev->ifindex;
+		ifindex = local->dev->ifindex;
 	else
 		ifindex = 0;
 
@@ -165,10 +166,11 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
 {
 	struct datapath *dp = container_of(rcu, struct datapath, rcu);
 
-	ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
+	ovs_flow_tbl_destroy(&dp->table);
 	free_percpu(dp->stats_percpu);
-	release_net(ovs_dp_get_net(dp));
 	kfree(dp->ports);
+	ovs_meters_exit(dp);
+	kfree(rcu_dereference_raw(dp->upcall_portids));
 	kfree(dp);
 }
 
@@ -178,13 +180,15 @@ static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
 	return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
 }
 
+/* Called with ovs_mutex or RCU read lock. */
 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 {
 	struct vport *vport;
 	struct hlist_head *head;
 
 	head = vport_hash_bucket(dp, port_no);
-	hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
+	hlist_for_each_entry_rcu(vport, head, dp_hash_node,
+				 lockdep_ovsl_is_held()) {
 		if (vport->port_no == port_no)
 			return vport;
 	}
@@ -206,6 +210,26 @@ static struct vport *new_vport(const struct vport_parms *parms)
 	return vport;
 }
 
+static void ovs_vport_update_upcall_stats(struct sk_buff *skb,
+					  const struct dp_upcall_info *upcall_info,
+					  bool upcall_result)
+{
+	struct vport *p = OVS_CB(skb)->input_vport;
+	struct vport_upcall_stats_percpu *stats;
+
+	if (upcall_info->cmd != OVS_PACKET_CMD_MISS &&
+	    upcall_info->cmd != OVS_PACKET_CMD_ACTION)
+		return;
+
+	stats = this_cpu_ptr(p->upcall_stats);
+	u64_stats_update_begin(&stats->syncp);
+	if (upcall_result)
+		u64_stats_inc(&stats->n_success);
+	else
+		u64_stats_inc(&stats->n_fail);
+	u64_stats_update_end(&stats->syncp);
+}
+
 void ovs_dp_detach_port(struct vport *p)
 {
 	ASSERT_OVSL();
@@ -218,85 +242,112 @@ void ovs_dp_detach_port(struct vport *p)
 }
 
 /* Must be called with rcu_read_lock. */
-void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
+void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 {
+	struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
+	const struct vport *p = OVS_CB(skb)->input_vport;
 	struct datapath *dp = p->dp;
 	struct sw_flow *flow;
+	struct sw_flow_actions *sf_acts;
 	struct dp_stats_percpu *stats;
-	struct sw_flow_key key;
+	bool ovs_pcpu_locked = false;
 	u64 *stats_counter;
+	u32 n_mask_hit;
+	u32 n_cache_hit;
 	int error;
-	int key_len;
 
 	stats = this_cpu_ptr(dp->stats_percpu);
 
-	/* Extract flow from 'skb' into 'key'. */
-	error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
-	if (unlikely(error)) {
-		kfree_skb(skb);
-		return;
-	}
-
 	/* Look up flow. */
-	flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
+	flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
+					 &n_mask_hit, &n_cache_hit);
 	if (unlikely(!flow)) {
 		struct dp_upcall_info upcall;
 
+		memset(&upcall, 0, sizeof(upcall));
 		upcall.cmd = OVS_PACKET_CMD_MISS;
-		upcall.key = &key;
-		upcall.userdata = NULL;
-		upcall.portid = p->upcall_portid;
-		ovs_dp_upcall(dp, skb, &upcall);
-		consume_skb(skb);
+
+		if (OVS_CB(skb)->upcall_pid)
+			upcall.portid = OVS_CB(skb)->upcall_pid;
+		else if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+			upcall.portid =
+			    ovs_dp_get_upcall_portid(dp, smp_processor_id());
+		else
+			upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+
+		upcall.mru = OVS_CB(skb)->mru;
+		error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
+		switch (error) {
+		case 0:
+		case -EAGAIN:
+		case -ERESTARTSYS:
+		case -EINTR:
+			consume_skb(skb);
+			break;
+		default:
+			kfree_skb(skb);
+			break;
+		}
 		stats_counter = &stats->n_missed;
 		goto out;
 	}
 
-	OVS_CB(skb)->flow = flow;
+	ovs_flow_stats_update(flow, key->tp.flags, skb);
+	sf_acts = rcu_dereference(flow->sf_acts);
+	/* This path can be invoked recursively: Use the current task to
+	 * identify recursive invocation - the lock must be acquired only once.
+	 * Even with disabled bottom halves this can be preempted on PREEMPT_RT.
+	 * Limit the locking to RT to avoid assigning `owner' if it can be
+	 * avoided.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) {
+		local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
+		ovs_pcpu->owner = current;
+		ovs_pcpu_locked = true;
+	}
+
+	error = ovs_execute_actions(dp, skb, sf_acts, key);
+	if (unlikely(error))
+		net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
+				    ovs_dp_name(dp), error);
+	if (ovs_pcpu_locked) {
+		ovs_pcpu->owner = NULL;
+		local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
+	}
 
 	stats_counter = &stats->n_hit;
-	ovs_flow_used(OVS_CB(skb)->flow, skb);
-	ovs_execute_actions(dp, skb);
 
 out:
 	/* Update datapath statistics. */
-	u64_stats_update_begin(&stats->sync);
+	u64_stats_update_begin(&stats->syncp);
 	(*stats_counter)++;
-	u64_stats_update_end(&stats->sync);
+	stats->n_mask_hit += n_mask_hit;
+	stats->n_cache_hit += n_cache_hit;
+	u64_stats_update_end(&stats->syncp);
 }
 
-static struct genl_family dp_packet_genl_family = {
-	.id = GENL_ID_GENERATE,
-	.hdrsize = sizeof(struct ovs_header),
-	.name = OVS_PACKET_FAMILY,
-	.version = OVS_PACKET_VERSION,
-	.maxattr = OVS_PACKET_ATTR_MAX,
-	.netnsok = true,
-	.parallel_ops = true,
-};
-
 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
-		  const struct dp_upcall_info *upcall_info)
+		  const struct sw_flow_key *key,
+		  const struct dp_upcall_info *upcall_info,
+		  uint32_t cutlen)
 {
 	struct dp_stats_percpu *stats;
-	int dp_ifindex;
 	int err;
 
+	if (trace_ovs_dp_upcall_enabled())
+		trace_ovs_dp_upcall(dp, skb, key, upcall_info);
+
 	if (upcall_info->portid == 0) {
 		err = -ENOTCONN;
 		goto err;
 	}
 
-	dp_ifindex = get_dpifindex(dp);
-	if (!dp_ifindex) {
-		err = -ENODEV;
-		goto err;
-	}
-
 	if (!skb_is_gso(skb))
-		err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
+		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 	else
-		err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
+		err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
+
+	ovs_vport_update_upcall_stats(skb, upcall_info, !err);
 	if (err)
 		goto err;
 
@@ -305,117 +356,125 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 err:
 	stats = this_cpu_ptr(dp->stats_percpu);
 
-	u64_stats_update_begin(&stats->sync);
+	u64_stats_update_begin(&stats->syncp);
 	stats->n_lost++;
-	u64_stats_update_end(&stats->sync);
+	u64_stats_update_end(&stats->syncp);
 
 	return err;
 }
 
-static int queue_gso_packets(struct net *net, int dp_ifindex,
-			     struct sk_buff *skb,
-			     const struct dp_upcall_info *upcall_info)
+static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
+			     const struct sw_flow_key *key,
+			     const struct dp_upcall_info *upcall_info,
+			     uint32_t cutlen)
 {
-	unsigned short gso_type = skb_shinfo(skb)->gso_type;
-	struct dp_upcall_info later_info;
+	unsigned int gso_type = skb_shinfo(skb)->gso_type;
 	struct sw_flow_key later_key;
 	struct sk_buff *segs, *nskb;
 	int err;
 
-	segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false);
+	BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
+	segs = __skb_gso_segment(skb, NETIF_F_SG, false);
 	if (IS_ERR(segs))
 		return PTR_ERR(segs);
+	if (segs == NULL)
+		return -EINVAL;
+
+	if (gso_type & SKB_GSO_UDP) {
+		/* The initial flow key extracted by ovs_flow_key_extract()
+		 * in this case is for a first fragment, so we need to
+		 * properly mark later fragments.
+		 */
+		later_key = *key;
+		later_key.ip.frag = OVS_FRAG_TYPE_LATER;
+	}
 
 	/* Queue all of the segments. */
-	skb = segs;
-	do {
-		err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
+	skb_list_walk_safe(segs, skb, nskb) {
+		if (gso_type & SKB_GSO_UDP && skb != segs)
+			key = &later_key;
+
+		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 		if (err)
 			break;
 
-		if (skb == segs && gso_type & SKB_GSO_UDP) {
-			/* The initial flow key extracted by ovs_flow_extract()
-			 * in this case is for a first fragment, so we need to
-			 * properly mark later fragments.
-			 */
-			later_key = *upcall_info->key;
-			later_key.ip.frag = OVS_FRAG_TYPE_LATER;
-
-			later_info = *upcall_info;
-			later_info.key = &later_key;
-			upcall_info = &later_info;
-		}
-	} while ((skb = skb->next));
+	}
 
 	/* Free all of the segments. */
-	skb = segs;
-	do {
-		nskb = skb->next;
+	skb_list_walk_safe(segs, skb, nskb) {
 		if (err)
 			kfree_skb(skb);
 		else
 			consume_skb(skb);
-	} while ((skb = nskb));
+	}
 	return err;
 }
 
-static size_t key_attr_size(void)
-{
-	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
-		+ nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
-		  + nla_total_size(8)   /* OVS_TUNNEL_KEY_ATTR_ID */
-		  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
-		  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
-		  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TOS */
-		  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TTL */
-		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
-		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_CSUM */
-		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
-		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
-		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
-		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
-		+ nla_total_size(4)   /* OVS_KEY_ATTR_8021Q */
-		+ nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
-		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
-		+ nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
-		+ nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
-		+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
-}
-
-static size_t upcall_msg_size(const struct sk_buff *skb,
-			      const struct nlattr *userdata)
+static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
+			      unsigned int hdrlen, int actions_attrlen)
 {
 	size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
-		+ nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */
-		+ nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
+		+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
+		+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
+		+ nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
+		+ nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
 
 	/* OVS_PACKET_ATTR_USERDATA */
-	if (userdata)
-		size += NLA_ALIGN(userdata->nla_len);
+	if (upcall_info->userdata)
+		size += NLA_ALIGN(upcall_info->userdata->nla_len);
+
+	/* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
+	if (upcall_info->egress_tun_info)
+		size += nla_total_size(ovs_tun_key_attr_size());
+
+	/* OVS_PACKET_ATTR_ACTIONS */
+	if (upcall_info->actions_len)
+		size += nla_total_size(actions_attrlen);
+
+	/* OVS_PACKET_ATTR_MRU */
+	if (upcall_info->mru)
+		size += nla_total_size(sizeof(upcall_info->mru));
 
 	return size;
 }
 
-static int queue_userspace_packet(struct net *net, int dp_ifindex,
-				  struct sk_buff *skb,
-				  const struct dp_upcall_info *upcall_info)
+static void pad_packet(struct datapath *dp, struct sk_buff *skb)
+{
+	if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
+		size_t plen = NLA_ALIGN(skb->len) - skb->len;
+
+		if (plen > 0)
+			skb_put_zero(skb, plen);
+	}
+}
+
+static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
+				  const struct sw_flow_key *key,
+				  const struct dp_upcall_info *upcall_info,
+				  uint32_t cutlen)
 {
 	struct ovs_header *upcall;
 	struct sk_buff *nskb = NULL;
-	struct sk_buff *user_skb; /* to be queued to userspace */
+	struct sk_buff *user_skb = NULL; /* to be queued to userspace */
 	struct nlattr *nla;
-	int err;
+	size_t len;
+	unsigned int hlen;
+	int err, dp_ifindex;
+	u64 hash;
+
+	dp_ifindex = get_dpifindex(dp);
+	if (!dp_ifindex)
+		return -ENODEV;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		nskb = skb_clone(skb, GFP_ATOMIC);
 		if (!nskb)
 			return -ENOMEM;
 
-		nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
+		nskb = __vlan_hwaccel_push_inside(nskb);
 		if (!nskb)
 			return -ENOMEM;
 
-		nskb->vlan_tci = 0;
 		skb = nskb;
 	}
 
@@ -424,7 +483,23 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
 		goto out;
 	}
 
-	user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC);
+	/* Complete checksum if needed */
+	if (skb->ip_summed == CHECKSUM_PARTIAL &&
+	    (err = skb_csum_hwoffload_help(skb, 0)))
+		goto out;
+
+	/* Older versions of OVS user space enforce alignment of the last
+	 * Netlink attribute to NLA_ALIGNTO which would require extensive
+	 * padding logic. Only perform zerocopy if padding is not required.
+	 */
+	if (dp->user_features & OVS_DP_F_UNALIGNED)
+		hlen = skb_zerocopy_headlen(skb);
+	else
+		hlen = skb->len;
+
+	len = upcall_msg_size(upcall_info, hlen - cutlen,
+			      OVS_CB(skb)->acts_origlen);
+	user_skb = genlmsg_new(len, GFP_ATOMIC);
 	if (!user_skb) {
 		err = -ENOMEM;
 		goto out;
@@ -432,432 +507,122 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
 
 	upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 			     0, upcall_info->cmd);
+	if (!upcall) {
+		err = -EINVAL;
+		goto out;
+	}
 	upcall->dp_ifindex = dp_ifindex;
 
-	nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
-	ovs_flow_to_nlattrs(upcall_info->key, user_skb);
-	nla_nest_end(user_skb, nla);
+	err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
+	if (err)
+		goto out;
 
 	if (upcall_info->userdata)
 		__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
 			  nla_len(upcall_info->userdata),
 			  nla_data(upcall_info->userdata));
 
-	nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
-
-	skb_copy_and_csum_dev(skb, nla_data(nla));
-
-	genlmsg_end(user_skb, upcall);
-	err = genlmsg_unicast(net, user_skb, upcall_info->portid);
-
-out:
-	kfree_skb(nskb);
-	return err;
-}
-
-/* Called with ovs_mutex. */
-static int flush_flows(struct datapath *dp)
-{
-	struct flow_table *old_table;
-	struct flow_table *new_table;
-
-	old_table = ovsl_dereference(dp->table);
-	new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
-	if (!new_table)
-		return -ENOMEM;
-
-	rcu_assign_pointer(dp->table, new_table);
-
-	ovs_flow_tbl_deferred_destroy(old_table);
-	return 0;
-}
+	if (upcall_info->egress_tun_info) {
+		nla = nla_nest_start_noflag(user_skb,
+					    OVS_PACKET_ATTR_EGRESS_TUN_KEY);
+		if (!nla) {
+			err = -EMSGSIZE;
+			goto out;
+		}
+		err = ovs_nla_put_tunnel_info(user_skb,
+					      upcall_info->egress_tun_info);
+		if (err)
+			goto out;
 
-static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len)
-{
+		nla_nest_end(user_skb, nla);
+	}
 
-	struct sw_flow_actions *acts;
-	int new_acts_size;
-	int req_size = NLA_ALIGN(attr_len);
-	int next_offset = offsetof(struct sw_flow_actions, actions) +
-					(*sfa)->actions_len;
+	if (upcall_info->actions_len) {
+		nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
+		if (!nla) {
+			err = -EMSGSIZE;
+			goto out;
+		}
+		err = ovs_nla_put_actions(upcall_info->actions,
+					  upcall_info->actions_len,
+					  user_skb);
+		if (!err)
+			nla_nest_end(user_skb, nla);
+		else
+			nla_nest_cancel(user_skb, nla);
+	}
 
-	if (req_size <= (ksize(*sfa) - next_offset))
+	/* Add OVS_PACKET_ATTR_MRU */
+	if (upcall_info->mru &&
+	    nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
+		err = -ENOBUFS;
 		goto out;
-
-	new_acts_size = ksize(*sfa) * 2;
-
-	if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
-		if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
-			return ERR_PTR(-EMSGSIZE);
-		new_acts_size = MAX_ACTIONS_BUFSIZE;
 	}
 
-	acts = ovs_flow_actions_alloc(new_acts_size);
-	if (IS_ERR(acts))
-		return (void *)acts;
-
-	memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
-	acts->actions_len = (*sfa)->actions_len;
-	kfree(*sfa);
-	*sfa = acts;
-
-out:
-	(*sfa)->actions_len += req_size;
-	return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
-}
-
-static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
-{
-	struct nlattr *a;
-
-	a = reserve_sfa_size(sfa, nla_attr_size(len));
-	if (IS_ERR(a))
-		return PTR_ERR(a);
-
-	a->nla_type = attrtype;
-	a->nla_len = nla_attr_size(len);
-
-	if (data)
-		memcpy(nla_data(a), data, len);
-	memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
-
-	return 0;
-}
-
-static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype)
-{
-	int used = (*sfa)->actions_len;
-	int err;
-
-	err = add_action(sfa, attrtype, NULL, 0);
-	if (err)
-		return err;
-
-	return used;
-}
-
-static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset)
-{
-	struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset);
-
-	a->nla_len = sfa->actions_len - st_offset;
-}
+	/* Add OVS_PACKET_ATTR_LEN when packet is truncated */
+	if (cutlen > 0 &&
+	    nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
+		err = -ENOBUFS;
+		goto out;
+	}
 
-static int validate_and_copy_actions(const struct nlattr *attr,
-				     const struct sw_flow_key *key, int depth,
-				     struct sw_flow_actions **sfa);
+	/* Add OVS_PACKET_ATTR_HASH */
+	hash = skb_get_hash_raw(skb);
+	if (skb->sw_hash)
+		hash |= OVS_PACKET_HASH_SW_BIT;
 
-static int validate_and_copy_sample(const struct nlattr *attr,
-				    const struct sw_flow_key *key, int depth,
-				    struct sw_flow_actions **sfa)
-{
-	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
-	const struct nlattr *probability, *actions;
-	const struct nlattr *a;
-	int rem, start, err, st_acts;
+	if (skb->l4_hash)
+		hash |= OVS_PACKET_HASH_L4_BIT;
 
-	memset(attrs, 0, sizeof(attrs));
-	nla_for_each_nested(a, attr, rem) {
-		int type = nla_type(a);
-		if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
-			return -EINVAL;
-		attrs[type] = a;
+	if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
+		err = -ENOBUFS;
+		goto out;
 	}
-	if (rem)
-		return -EINVAL;
-
-	probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
-	if (!probability || nla_len(probability) != sizeof(u32))
-		return -EINVAL;
 
-	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
-	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
-		return -EINVAL;
-
-	/* validation done, copy sample action. */
-	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
-	if (start < 0)
-		return start;
-	err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32));
-	if (err)
-		return err;
-	st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
-	if (st_acts < 0)
-		return st_acts;
+	/* Only reserve room for attribute header, packet data is added
+	 * in skb_zerocopy() */
+	if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
+		err = -ENOBUFS;
+		goto out;
+	}
+	nla->nla_len = nla_attr_size(skb->len - cutlen);
 
-	err = validate_and_copy_actions(actions, key, depth + 1, sfa);
+	err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
 	if (err)
-		return err;
-
-	add_nested_action_end(*sfa, st_acts);
-	add_nested_action_end(*sfa, start);
-
-	return 0;
-}
-
-static int validate_tp_port(const struct sw_flow_key *flow_key)
-{
-	if (flow_key->eth.type == htons(ETH_P_IP)) {
-		if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
-			return 0;
-	} else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
-		if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
-			return 0;
-	}
+		goto out;
 
-	return -EINVAL;
-}
+	/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
+	pad_packet(dp, user_skb);
 
-static int validate_and_copy_set_tun(const struct nlattr *attr,
-				     struct sw_flow_actions **sfa)
-{
-	struct ovs_key_ipv4_tunnel tun_key;
-	int err, start;
+	((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
 
-	err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &tun_key);
+	err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
+	user_skb = NULL;
+out:
 	if (err)
-		return err;
-
-	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
-	if (start < 0)
-		return start;
-
-	err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key));
-	add_nested_action_end(*sfa, start);
+		skb_tx_error(skb);
+	consume_skb(user_skb);
+	consume_skb(nskb);
 
 	return err;
 }
 
-static int validate_set(const struct nlattr *a,
-			const struct sw_flow_key *flow_key,
-			struct sw_flow_actions **sfa,
-			bool *set_tun)
-{
-	const struct nlattr *ovs_key = nla_data(a);
-	int key_type = nla_type(ovs_key);
-
-	/* There can be only one key in a action */
-	if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
-		return -EINVAL;
-
-	if (key_type > OVS_KEY_ATTR_MAX ||
-	   (ovs_key_lens[key_type] != nla_len(ovs_key) &&
-	    ovs_key_lens[key_type] != -1))
-		return -EINVAL;
-
-	switch (key_type) {
-	const struct ovs_key_ipv4 *ipv4_key;
-	const struct ovs_key_ipv6 *ipv6_key;
-	int err;
-
-	case OVS_KEY_ATTR_PRIORITY:
-	case OVS_KEY_ATTR_SKB_MARK:
-	case OVS_KEY_ATTR_ETHERNET:
-		break;
-
-	case OVS_KEY_ATTR_TUNNEL:
-		*set_tun = true;
-		err = validate_and_copy_set_tun(a, sfa);
-		if (err)
-			return err;
-		break;
-
-	case OVS_KEY_ATTR_IPV4:
-		if (flow_key->eth.type != htons(ETH_P_IP))
-			return -EINVAL;
-
-		if (!flow_key->ip.proto)
-			return -EINVAL;
-
-		ipv4_key = nla_data(ovs_key);
-		if (ipv4_key->ipv4_proto != flow_key->ip.proto)
-			return -EINVAL;
-
-		if (ipv4_key->ipv4_frag != flow_key->ip.frag)
-			return -EINVAL;
-
-		break;
-
-	case OVS_KEY_ATTR_IPV6:
-		if (flow_key->eth.type != htons(ETH_P_IPV6))
-			return -EINVAL;
-
-		if (!flow_key->ip.proto)
-			return -EINVAL;
-
-		ipv6_key = nla_data(ovs_key);
-		if (ipv6_key->ipv6_proto != flow_key->ip.proto)
-			return -EINVAL;
-
-		if (ipv6_key->ipv6_frag != flow_key->ip.frag)
-			return -EINVAL;
-
-		if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
-			return -EINVAL;
-
-		break;
-
-	case OVS_KEY_ATTR_TCP:
-		if (flow_key->ip.proto != IPPROTO_TCP)
-			return -EINVAL;
-
-		return validate_tp_port(flow_key);
-
-	case OVS_KEY_ATTR_UDP:
-		if (flow_key->ip.proto != IPPROTO_UDP)
-			return -EINVAL;
-
-		return validate_tp_port(flow_key);
-
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int validate_userspace(const struct nlattr *attr)
-{
-	static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =	{
-		[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
-		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
-	};
-	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
-	int error;
-
-	error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
-				 attr, userspace_policy);
-	if (error)
-		return error;
-
-	if (!a[OVS_USERSPACE_ATTR_PID] ||
-	    !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int copy_action(const struct nlattr *from,
-		       struct sw_flow_actions **sfa)
-{
-	int totlen = NLA_ALIGN(from->nla_len);
-	struct nlattr *to;
-
-	to = reserve_sfa_size(sfa, from->nla_len);
-	if (IS_ERR(to))
-		return PTR_ERR(to);
-
-	memcpy(to, from, totlen);
-	return 0;
-}
-
-static int validate_and_copy_actions(const struct nlattr *attr,
-				     const struct sw_flow_key *key,
-				     int depth,
-				     struct sw_flow_actions **sfa)
-{
-	const struct nlattr *a;
-	int rem, err;
-
-	if (depth >= SAMPLE_ACTION_DEPTH)
-		return -EOVERFLOW;
-
-	nla_for_each_nested(a, attr, rem) {
-		/* Expected argument lengths, (u32)-1 for variable length. */
-		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
-			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
-			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
-			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
-			[OVS_ACTION_ATTR_POP_VLAN] = 0,
-			[OVS_ACTION_ATTR_SET] = (u32)-1,
-			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1
-		};
-		const struct ovs_action_push_vlan *vlan;
-		int type = nla_type(a);
-		bool skip_copy;
-
-		if (type > OVS_ACTION_ATTR_MAX ||
-		    (action_lens[type] != nla_len(a) &&
-		     action_lens[type] != (u32)-1))
-			return -EINVAL;
-
-		skip_copy = false;
-		switch (type) {
-		case OVS_ACTION_ATTR_UNSPEC:
-			return -EINVAL;
-
-		case OVS_ACTION_ATTR_USERSPACE:
-			err = validate_userspace(a);
-			if (err)
-				return err;
-			break;
-
-		case OVS_ACTION_ATTR_OUTPUT:
-			if (nla_get_u32(a) >= DP_MAX_PORTS)
-				return -EINVAL;
-			break;
-
-
-		case OVS_ACTION_ATTR_POP_VLAN:
-			break;
-
-		case OVS_ACTION_ATTR_PUSH_VLAN:
-			vlan = nla_data(a);
-			if (vlan->vlan_tpid != htons(ETH_P_8021Q))
-				return -EINVAL;
-			if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
-				return -EINVAL;
-			break;
-
-		case OVS_ACTION_ATTR_SET:
-			err = validate_set(a, key, sfa, &skip_copy);
-			if (err)
-				return err;
-			break;
-
-		case OVS_ACTION_ATTR_SAMPLE:
-			err = validate_and_copy_sample(a, key, depth, sfa);
-			if (err)
-				return err;
-			skip_copy = true;
-			break;
-
-		default:
-			return -EINVAL;
-		}
-		if (!skip_copy) {
-			err = copy_action(a, sfa);
-			if (err)
-				return err;
-		}
-	}
-
-	if (rem > 0)
-		return -EINVAL;
-
-	return 0;
-}
-
-static void clear_stats(struct sw_flow *flow)
-{
-	flow->used = 0;
-	flow->tcp_flags = 0;
-	flow->packet_count = 0;
-	flow->byte_count = 0;
-}
-
 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 {
-	struct ovs_header *ovs_header = info->userhdr;
+	struct ovs_header *ovs_header = genl_info_userhdr(info);
+	struct net *net = sock_net(skb->sk);
 	struct nlattr **a = info->attrs;
 	struct sw_flow_actions *acts;
 	struct sk_buff *packet;
 	struct sw_flow *flow;
+	struct sw_flow_actions *sf_acts;
 	struct datapath *dp;
-	struct ethhdr *eth;
+	struct vport *input_vport;
+	u16 mru = 0;
+	u64 hash;
 	int len;
 	int err;
-	int key_len;
+	bool log = !a[OVS_PACKET_ATTR_PROBE];
 
 	err = -EINVAL;
 	if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
@@ -873,16 +638,23 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 
 	nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
 
-	skb_reset_mac_header(packet);
-	eth = eth_hdr(packet);
+	/* Set packet's mru */
+	if (a[OVS_PACKET_ATTR_MRU]) {
+		mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
+		packet->ignore_df = 1;
+	}
+	OVS_CB(packet)->mru = mru;
 
-	/* Normally, setting the skb 'protocol' field would be handled by a
-	 * call to eth_type_trans(), but it assumes there's a sending
-	 * device, which we may not have. */
-	if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
-		packet->protocol = eth->h_proto;
-	else
-		packet->protocol = htons(ETH_P_802_2);
+	if (a[OVS_PACKET_ATTR_HASH]) {
+		hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
+
+		__skb_set_hash(packet, hash & 0xFFFFFFFFULL,
+			       !!(hash & OVS_PACKET_HASH_SW_BIT),
+			       !!(hash & OVS_PACKET_HASH_L4_BIT));
+	}
+
+	OVS_CB(packet)->upcall_pid =
+		nla_get_u32_default(a[OVS_PACKET_ATTR_UPCALL_PID], 0);
 
 	/* Build an sw_flow for sending this packet. */
 	flow = ovs_flow_alloc();
@@ -890,45 +662,55 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(flow))
 		goto err_kfree_skb;
 
-	err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
+	err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
+					     packet, &flow->key, log);
 	if (err)
 		goto err_flow_free;
 
-	err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]);
+	err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
+				   &flow->key, &acts, log);
 	if (err)
 		goto err_flow_free;
-	acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
-	err = PTR_ERR(acts);
-	if (IS_ERR(acts))
-		goto err_flow_free;
 
-	err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts);
 	rcu_assign_pointer(flow->sf_acts, acts);
-	if (err)
-		goto err_flow_free;
-
-	OVS_CB(packet)->flow = flow;
 	packet->priority = flow->key.phy.priority;
 	packet->mark = flow->key.phy.skb_mark;
 
 	rcu_read_lock();
-	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	dp = get_dp_rcu(net, ovs_header->dp_ifindex);
 	err = -ENODEV;
 	if (!dp)
 		goto err_unlock;
 
+	input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
+	if (!input_vport)
+		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
+
+	if (!input_vport)
+		goto err_unlock;
+
+	packet->dev = input_vport->dev;
+	OVS_CB(packet)->input_vport = input_vport;
+	sf_acts = rcu_dereference(flow->sf_acts);
+
 	local_bh_disable();
-	err = ovs_execute_actions(dp, packet);
+	local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		this_cpu_write(ovs_pcpu_storage->owner, current);
+	err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		this_cpu_write(ovs_pcpu_storage->owner, NULL);
+	local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
 	local_bh_enable();
 	rcu_read_unlock();
 
-	ovs_flow_free(flow);
+	ovs_flow_free(flow, false);
 	return err;
 
 err_unlock:
 	rcu_read_unlock();
 err_flow_free:
-	ovs_flow_free(flow);
+	ovs_flow_free(flow, false);
 err_kfree_skb:
 	kfree_skb(packet);
 err:
@@ -939,24 +721,46 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
 	[OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
 	[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
 	[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
+	[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
+	[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
+	[OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
+	[OVS_PACKET_ATTR_UPCALL_PID] = { .type = NLA_U32 },
 };
 
-static struct genl_ops dp_packet_genl_ops[] = {
+static const struct genl_small_ops dp_packet_genl_ops[] = {
 	{ .cmd = OVS_PACKET_CMD_EXECUTE,
-	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = packet_policy,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 	  .doit = ovs_packet_cmd_execute
 	}
 };
 
-static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
+static struct genl_family dp_packet_genl_family __ro_after_init = {
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_PACKET_FAMILY,
+	.version = OVS_PACKET_VERSION,
+	.maxattr = OVS_PACKET_ATTR_MAX,
+	.policy = packet_policy,
+	.netnsok = true,
+	.parallel_ops = true,
+	.small_ops = dp_packet_genl_ops,
+	.n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
+	.resv_start_op = OVS_PACKET_CMD_EXECUTE + 1,
+	.module = THIS_MODULE,
+};
+
+static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
+			 struct ovs_dp_megaflow_stats *mega_stats)
 {
 	int i;
-	struct flow_table *table = ovsl_dereference(dp->table);
 
-	stats->n_flows = ovs_flow_tbl_count(table);
+	memset(mega_stats, 0, sizeof(*mega_stats));
+
+	stats->n_flows = ovs_flow_tbl_count(&dp->table);
+	mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
 
 	stats->n_hit = stats->n_missed = stats->n_lost = 0;
+
 	for_each_possible_cpu(i) {
 		const struct dp_stats_percpu *percpu_stats;
 		struct dp_stats_percpu local_stats;
@@ -965,189 +769,100 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
 		percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 
 		do {
-			start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
+			start = u64_stats_fetch_begin(&percpu_stats->syncp);
 			local_stats = *percpu_stats;
-		} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
+		} while (u64_stats_fetch_retry(&percpu_stats->syncp, start));
 
 		stats->n_hit += local_stats.n_hit;
 		stats->n_missed += local_stats.n_missed;
 		stats->n_lost += local_stats.n_lost;
+		mega_stats->n_mask_hit += local_stats.n_mask_hit;
+		mega_stats->n_cache_hit += local_stats.n_cache_hit;
 	}
 }
 
-static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
-	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
-	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
-	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
-};
-
-static struct genl_family dp_flow_genl_family = {
-	.id = GENL_ID_GENERATE,
-	.hdrsize = sizeof(struct ovs_header),
-	.name = OVS_FLOW_FAMILY,
-	.version = OVS_FLOW_VERSION,
-	.maxattr = OVS_FLOW_ATTR_MAX,
-	.netnsok = true,
-	.parallel_ops = true,
-};
-
-static struct genl_multicast_group ovs_dp_flow_multicast_group = {
-	.name = OVS_FLOW_MCGROUP
-};
-
-static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb);
-static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
+static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
 {
-	const struct nlattr *a;
-	struct nlattr *start;
-	int err = 0, rem;
-
-	start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
-	if (!start)
-		return -EMSGSIZE;
-
-	nla_for_each_nested(a, attr, rem) {
-		int type = nla_type(a);
-		struct nlattr *st_sample;
-
-		switch (type) {
-		case OVS_SAMPLE_ATTR_PROBABILITY:
-			if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a)))
-				return -EMSGSIZE;
-			break;
-		case OVS_SAMPLE_ATTR_ACTIONS:
-			st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
-			if (!st_sample)
-				return -EMSGSIZE;
-			err = actions_to_attr(nla_data(a), nla_len(a), skb);
-			if (err)
-				return err;
-			nla_nest_end(skb, st_sample);
-			break;
-		}
-	}
-
-	nla_nest_end(skb, start);
-	return err;
+	return ovs_identifier_is_ufid(sfid) &&
+	       !(ufid_flags & OVS_UFID_F_OMIT_KEY);
 }
 
-static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
+static bool should_fill_mask(uint32_t ufid_flags)
 {
-	const struct nlattr *ovs_key = nla_data(a);
-	int key_type = nla_type(ovs_key);
-	struct nlattr *start;
-	int err;
-
-	switch (key_type) {
-	case OVS_KEY_ATTR_IPV4_TUNNEL:
-		start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
-		if (!start)
-			return -EMSGSIZE;
-
-		err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key));
-		if (err)
-			return err;
-		nla_nest_end(skb, start);
-		break;
-	default:
-		if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
-			return -EMSGSIZE;
-		break;
-	}
+	return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
+}
 
-	return 0;
+static bool should_fill_actions(uint32_t ufid_flags)
+{
+	return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
 }
 
-static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb)
+static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
+				    const struct sw_flow_id *sfid,
+				    uint32_t ufid_flags)
 {
-	const struct nlattr *a;
-	int rem, err;
+	size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
 
-	nla_for_each_attr(a, attr, len, rem) {
-		int type = nla_type(a);
+	/* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
+	 * see ovs_nla_put_identifier()
+	 */
+	if (sfid && ovs_identifier_is_ufid(sfid))
+		len += nla_total_size(sfid->ufid_len);
+	else
+		len += nla_total_size(ovs_key_attr_size());
 
-		switch (type) {
-		case OVS_ACTION_ATTR_SET:
-			err = set_action_to_attr(a, skb);
-			if (err)
-				return err;
-			break;
+	/* OVS_FLOW_ATTR_KEY */
+	if (!sfid || should_fill_key(sfid, ufid_flags))
+		len += nla_total_size(ovs_key_attr_size());
 
-		case OVS_ACTION_ATTR_SAMPLE:
-			err = sample_action_to_attr(a, skb);
-			if (err)
-				return err;
-			break;
-		default:
-			if (nla_put(skb, type, nla_len(a), nla_data(a)))
-				return -EMSGSIZE;
-			break;
-		}
-	}
+	/* OVS_FLOW_ATTR_MASK */
+	if (should_fill_mask(ufid_flags))
+		len += nla_total_size(ovs_key_attr_size());
 
-	return 0;
-}
+	/* OVS_FLOW_ATTR_ACTIONS */
+	if (should_fill_actions(ufid_flags))
+		len += nla_total_size(acts->orig_len);
 
-static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
-{
-	return NLMSG_ALIGN(sizeof(struct ovs_header))
-		+ nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
-		+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+	return len
+		+ nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
 		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
-		+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
-		+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
+		+ nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
 }
 
-/* Called with ovs_mutex. */
-static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
-				  struct sk_buff *skb, u32 portid,
-				  u32 seq, u32 flags, u8 cmd)
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
+				   struct sk_buff *skb)
 {
-	const int skb_orig_len = skb->len;
-	const struct sw_flow_actions *sf_acts;
-	struct nlattr *start;
 	struct ovs_flow_stats stats;
-	struct ovs_header *ovs_header;
-	struct nlattr *nla;
+	__be16 tcp_flags;
 	unsigned long used;
-	u8 tcp_flags;
-	int err;
 
-	sf_acts = ovsl_dereference(flow->sf_acts);
+	ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
 
-	ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
-	if (!ovs_header)
+	if (used &&
+	    nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
+			      OVS_FLOW_ATTR_PAD))
 		return -EMSGSIZE;
 
-	ovs_header->dp_ifindex = get_dpifindex(dp);
-
-	nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
-	if (!nla)
-		goto nla_put_failure;
-	err = ovs_flow_to_nlattrs(&flow->key, skb);
-	if (err)
-		goto error;
-	nla_nest_end(skb, nla);
-
-	spin_lock_bh(&flow->lock);
-	used = flow->used;
-	stats.n_packets = flow->packet_count;
-	stats.n_bytes = flow->byte_count;
-	tcp_flags = flow->tcp_flags;
-	spin_unlock_bh(&flow->lock);
+	if (stats.n_packets &&
+	    nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
+			  sizeof(struct ovs_flow_stats), &stats,
+			  OVS_FLOW_ATTR_PAD))
+		return -EMSGSIZE;
 
-	if (used &&
-	    nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
-		goto nla_put_failure;
+	if ((u8)ntohs(tcp_flags) &&
+	     nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
+		return -EMSGSIZE;
 
-	if (stats.n_packets &&
-	    nla_put(skb, OVS_FLOW_ATTR_STATS,
-		    sizeof(struct ovs_flow_stats), &stats))
-		goto nla_put_failure;
+	return 0;
+}
 
-	if (tcp_flags &&
-	    nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
-		goto nla_put_failure;
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
+				     struct sk_buff *skb, int skb_orig_len)
+{
+	struct nlattr *start;
+	int err;
 
 	/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
 	 * this is the first flow to be dumped into 'skb'.  This is unusual for
@@ -1159,134 +874,229 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
 	 * This can only fail for dump operations because the skb is always
 	 * properly sized for single flows.
 	 */
-	start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
+	start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
 	if (start) {
-		err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb);
+		const struct sw_flow_actions *sf_acts;
+
+		sf_acts = rcu_dereference_ovsl(flow->sf_acts);
+		err = ovs_nla_put_actions(sf_acts->actions,
+					  sf_acts->actions_len, skb);
+
 		if (!err)
 			nla_nest_end(skb, start);
 		else {
 			if (skb_orig_len)
-				goto error;
+				return err;
 
 			nla_nest_cancel(skb, start);
 		}
-	} else if (skb_orig_len)
-		goto nla_put_failure;
+	} else if (skb_orig_len) {
+		return -EMSGSIZE;
+	}
 
-	return genlmsg_end(skb, ovs_header);
+	return 0;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
+				  struct sk_buff *skb, u32 portid,
+				  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
+{
+	const int skb_orig_len = skb->len;
+	struct ovs_header *ovs_header;
+	int err;
+
+	ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
+				 flags, cmd);
+	if (!ovs_header)
+		return -EMSGSIZE;
+
+	ovs_header->dp_ifindex = dp_ifindex;
+
+	err = ovs_nla_put_identifier(flow, skb);
+	if (err)
+		goto error;
+
+	if (should_fill_key(&flow->id, ufid_flags)) {
+		err = ovs_nla_put_masked_key(flow, skb);
+		if (err)
+			goto error;
+	}
+
+	if (should_fill_mask(ufid_flags)) {
+		err = ovs_nla_put_mask(flow, skb);
+		if (err)
+			goto error;
+	}
+
+	err = ovs_flow_cmd_fill_stats(flow, skb);
+	if (err)
+		goto error;
+
+	if (should_fill_actions(ufid_flags)) {
+		err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
+		if (err)
+			goto error;
+	}
+
+	genlmsg_end(skb, ovs_header);
+	return 0;
 
-nla_put_failure:
-	err = -EMSGSIZE;
 error:
 	genlmsg_cancel(skb, ovs_header);
 	return err;
 }
 
-static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
+/* May not be called with RCU read lock. */
+static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
+					       const struct sw_flow_id *sfid,
+					       struct genl_info *info,
+					       bool always,
+					       uint32_t ufid_flags)
 {
-	const struct sw_flow_actions *sf_acts;
+	struct sk_buff *skb;
+	size_t len;
+
+	if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
+		return NULL;
 
-	sf_acts = ovsl_dereference(flow->sf_acts);
+	len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
+	skb = genlmsg_new(len, GFP_KERNEL);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
 
-	return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
+	return skb;
 }
 
-static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
-					       struct datapath *dp,
-					       u32 portid, u32 seq, u8 cmd)
+/* Called with ovs_mutex. */
+static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
+					       int dp_ifindex,
+					       struct genl_info *info, u8 cmd,
+					       bool always, u32 ufid_flags)
 {
 	struct sk_buff *skb;
 	int retval;
 
-	skb = ovs_flow_cmd_alloc_info(flow);
-	if (!skb)
-		return ERR_PTR(-ENOMEM);
+	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
+				      &flow->id, info, always, ufid_flags);
+	if (IS_ERR_OR_NULL(skb))
+		return skb;
 
-	retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd);
-	BUG_ON(retval < 0);
+	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
+					info->snd_portid, info->snd_seq, 0,
+					cmd, ufid_flags);
+	if (WARN_ON_ONCE(retval < 0)) {
+		kfree_skb(skb);
+		skb = ERR_PTR(retval);
+	}
 	return skb;
 }
 
-static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
+static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 {
+	struct net *net = sock_net(skb->sk);
 	struct nlattr **a = info->attrs;
-	struct ovs_header *ovs_header = info->userhdr;
-	struct sw_flow_key key;
-	struct sw_flow *flow;
+	struct ovs_header *ovs_header = genl_info_userhdr(info);
+	struct sw_flow *flow = NULL, *new_flow;
+	struct sw_flow_mask mask;
 	struct sk_buff *reply;
 	struct datapath *dp;
-	struct flow_table *table;
-	struct sw_flow_actions *acts = NULL;
+	struct sw_flow_key *key;
+	struct sw_flow_actions *acts;
+	struct sw_flow_match match;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 	int error;
-	int key_len;
+	bool log = !a[OVS_FLOW_ATTR_PROBE];
 
-	/* Extract key. */
+	/* Must have key and actions. */
 	error = -EINVAL;
-	if (!a[OVS_FLOW_ATTR_KEY])
+	if (!a[OVS_FLOW_ATTR_KEY]) {
+		OVS_NLERR(log, "Flow key attr not present in new flow.");
 		goto error;
-	error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
-	if (error)
+	}
+	if (!a[OVS_FLOW_ATTR_ACTIONS]) {
+		OVS_NLERR(log, "Flow actions attr not present in new flow.");
 		goto error;
+	}
 
-	/* Validate actions. */
-	if (a[OVS_FLOW_ATTR_ACTIONS]) {
-		acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
-		error = PTR_ERR(acts);
-		if (IS_ERR(acts))
-			goto error;
-
-		error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0, &acts);
-		if (error)
-			goto err_kfree;
-	} else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
-		error = -EINVAL;
+	/* Most of the time we need to allocate a new flow, do it before
+	 * locking.
+	 */
+	new_flow = ovs_flow_alloc();
+	if (IS_ERR(new_flow)) {
+		error = PTR_ERR(new_flow);
 		goto error;
 	}
 
-	ovs_lock();
-	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
-	error = -ENODEV;
-	if (!dp)
-		goto err_unlock_ovs;
+	/* Extract key. */
+	key = kzalloc(sizeof(*key), GFP_KERNEL);
+	if (!key) {
+		error = -ENOMEM;
+		goto err_kfree_flow;
+	}
 
-	table = ovsl_dereference(dp->table);
-	flow = ovs_flow_tbl_lookup(table, &key, key_len);
-	if (!flow) {
-		/* Bail out if we're not allowed to create a new flow. */
-		error = -ENOENT;
-		if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
-			goto err_unlock_ovs;
+	ovs_match_init(&match, key, false, &mask);
+	error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
+				  a[OVS_FLOW_ATTR_MASK], log);
+	if (error)
+		goto err_kfree_key;
 
-		/* Expand table, if necessary, to make room. */
-		if (ovs_flow_tbl_need_to_expand(table)) {
-			struct flow_table *new_table;
+	ovs_flow_mask_key(&new_flow->key, key, true, &mask);
 
-			new_table = ovs_flow_tbl_expand(table);
-			if (!IS_ERR(new_table)) {
-				rcu_assign_pointer(dp->table, new_table);
-				ovs_flow_tbl_deferred_destroy(table);
-				table = ovsl_dereference(dp->table);
-			}
-		}
+	/* Extract flow identifier. */
+	error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
+				       key, log);
+	if (error)
+		goto err_kfree_key;
 
-		/* Allocate flow. */
-		flow = ovs_flow_alloc();
-		if (IS_ERR(flow)) {
-			error = PTR_ERR(flow);
-			goto err_unlock_ovs;
-		}
-		clear_stats(flow);
+	/* Validate actions. */
+	error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
+				     &new_flow->key, &acts, log);
+	if (error) {
+		OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
+		goto err_kfree_key;
+	}
 
-		rcu_assign_pointer(flow->sf_acts, acts);
+	reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
+					ufid_flags);
+	if (IS_ERR(reply)) {
+		error = PTR_ERR(reply);
+		goto err_kfree_acts;
+	}
+
+	ovs_lock();
+	dp = get_dp(net, ovs_header->dp_ifindex);
+	if (unlikely(!dp)) {
+		error = -ENODEV;
+		goto err_unlock_ovs;
+	}
+
+	/* Check if this is a duplicate flow */
+	if (ovs_identifier_is_ufid(&new_flow->id))
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
+	if (!flow)
+		flow = ovs_flow_tbl_lookup(&dp->table, key);
+	if (likely(!flow)) {
+		rcu_assign_pointer(new_flow->sf_acts, acts);
 
 		/* Put flow in bucket. */
-		ovs_flow_tbl_insert(table, flow, &key, key_len);
+		error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
+		if (unlikely(error)) {
+			acts = NULL;
+			goto err_unlock_ovs;
+		}
 
-		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
-						info->snd_seq,
-						OVS_FLOW_CMD_NEW);
+		if (unlikely(reply)) {
+			error = ovs_flow_cmd_fill_info(new_flow,
+						       ovs_header->dp_ifindex,
+						       reply, info->snd_portid,
+						       info->snd_seq, 0,
+						       OVS_FLOW_CMD_NEW,
+						       ufid_flags);
+			BUG_ON(error < 0);
+		}
+		ovs_unlock();
 	} else {
-		/* We found a matching flow. */
 		struct sw_flow_actions *old_acts;
 
 		/* Bail out if we're not allowed to modify an existing flow.
@@ -1295,39 +1105,238 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 		 * request.  We also accept NLM_F_EXCL in case that bug ever
 		 * gets fixed.
 		 */
-		error = -EEXIST;
-		if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
-		    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
+		if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
+							 | NLM_F_EXCL))) {
+			error = -EEXIST;
 			goto err_unlock_ovs;
-
+		}
+		/* The flow identifier has to be the same for flow updates.
+		 * Look for any overlapping flow.
+		 */
+		if (unlikely(!ovs_flow_cmp(flow, &match))) {
+			if (ovs_identifier_is_key(&flow->id))
+				flow = ovs_flow_tbl_lookup_exact(&dp->table,
+								 &match);
+			else /* UFID matches but key is different */
+				flow = NULL;
+			if (!flow) {
+				error = -ENOENT;
+				goto err_unlock_ovs;
+			}
+		}
 		/* Update actions. */
 		old_acts = ovsl_dereference(flow->sf_acts);
 		rcu_assign_pointer(flow->sf_acts, acts);
-		ovs_flow_deferred_free_acts(old_acts);
-
-		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
-					       info->snd_seq, OVS_FLOW_CMD_NEW);
 
-		/* Clear stats. */
-		if (a[OVS_FLOW_ATTR_CLEAR]) {
-			spin_lock_bh(&flow->lock);
-			clear_stats(flow);
-			spin_unlock_bh(&flow->lock);
+		if (unlikely(reply)) {
+			error = ovs_flow_cmd_fill_info(flow,
+						       ovs_header->dp_ifindex,
+						       reply, info->snd_portid,
+						       info->snd_seq, 0,
+						       OVS_FLOW_CMD_NEW,
+						       ufid_flags);
+			BUG_ON(error < 0);
 		}
+		ovs_unlock();
+
+		ovs_nla_free_flow_actions_rcu(old_acts);
+		ovs_flow_free(new_flow, false);
 	}
+
+	if (reply)
+		ovs_notify(&dp_flow_genl_family, reply, info);
+
+	kfree(key);
+	return 0;
+
+err_unlock_ovs:
 	ovs_unlock();
+	kfree_skb(reply);
+err_kfree_acts:
+	ovs_nla_free_flow_actions(acts);
+err_kfree_key:
+	kfree(key);
+err_kfree_flow:
+	ovs_flow_free(new_flow, false);
+error:
+	return error;
+}
+
+/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
+static noinline_for_stack
+struct sw_flow_actions *get_flow_actions(struct net *net,
+					 const struct nlattr *a,
+					 const struct sw_flow_key *key,
+					 const struct sw_flow_mask *mask,
+					 bool log)
+{
+	struct sw_flow_actions *acts;
+	struct sw_flow_key masked_key;
+	int error;
 
-	if (!IS_ERR(reply))
-		ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
+	ovs_flow_mask_key(&masked_key, key, true, mask);
+	error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
+	if (error) {
+		OVS_NLERR(log,
+			  "Actions may not be safe on all matching packets");
+		return ERR_PTR(error);
+	}
+
+	return acts;
+}
+
+/* Factor out match-init and action-copy to avoid
+ * "Wframe-larger-than=1024" warning. Because mask is only
+ * used to get actions, we new a function to save some
+ * stack space.
+ *
+ * If there are not key and action attrs, we return 0
+ * directly. In the case, the caller will also not use the
+ * match as before. If there is action attr, we try to get
+ * actions and save them to *acts. Before returning from
+ * the function, we reset the match->mask pointer. Because
+ * we should not to return match object with dangling reference
+ * to mask.
+ * */
+static noinline_for_stack int
+ovs_nla_init_match_and_action(struct net *net,
+			      struct sw_flow_match *match,
+			      struct sw_flow_key *key,
+			      struct nlattr **a,
+			      struct sw_flow_actions **acts,
+			      bool log)
+{
+	struct sw_flow_mask mask;
+	int error = 0;
+
+	if (a[OVS_FLOW_ATTR_KEY]) {
+		ovs_match_init(match, key, true, &mask);
+		error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
+					  a[OVS_FLOW_ATTR_MASK], log);
+		if (error)
+			goto error;
+	}
+
+	if (a[OVS_FLOW_ATTR_ACTIONS]) {
+		if (!a[OVS_FLOW_ATTR_KEY]) {
+			OVS_NLERR(log,
+				  "Flow key attribute not present in set flow.");
+			error = -EINVAL;
+			goto error;
+		}
+
+		*acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
+					 &mask, log);
+		if (IS_ERR(*acts)) {
+			error = PTR_ERR(*acts);
+			goto error;
+		}
+	}
+
+	/* On success, error is 0. */
+error:
+	match->mask = NULL;
+	return error;
+}
+
+static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = genl_info_userhdr(info);
+	struct sw_flow_key key;
+	struct sw_flow *flow;
+	struct sk_buff *reply = NULL;
+	struct datapath *dp;
+	struct sw_flow_actions *old_acts = NULL, *acts = NULL;
+	struct sw_flow_match match;
+	struct sw_flow_id sfid;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
+	int error = 0;
+	bool log = !a[OVS_FLOW_ATTR_PROBE];
+	bool ufid_present;
+
+	ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
+	if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
+		OVS_NLERR(log,
+			  "Flow set message rejected, Key attribute missing.");
+		return -EINVAL;
+	}
+
+	error = ovs_nla_init_match_and_action(net, &match, &key, a,
+					      &acts, log);
+	if (error)
+		goto error;
+
+	if (acts) {
+		/* Can allocate before locking if have acts. */
+		reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
+						ufid_flags);
+		if (IS_ERR(reply)) {
+			error = PTR_ERR(reply);
+			goto err_kfree_acts;
+		}
+	}
+
+	ovs_lock();
+	dp = get_dp(net, ovs_header->dp_ifindex);
+	if (unlikely(!dp)) {
+		error = -ENODEV;
+		goto err_unlock_ovs;
+	}
+	/* Check that the flow exists. */
+	if (ufid_present)
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
 	else
-		netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
-				ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
+		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+	if (unlikely(!flow)) {
+		error = -ENOENT;
+		goto err_unlock_ovs;
+	}
+
+	/* Update actions, if present. */
+	if (likely(acts)) {
+		old_acts = ovsl_dereference(flow->sf_acts);
+		rcu_assign_pointer(flow->sf_acts, acts);
+
+		if (unlikely(reply)) {
+			error = ovs_flow_cmd_fill_info(flow,
+						       ovs_header->dp_ifindex,
+						       reply, info->snd_portid,
+						       info->snd_seq, 0,
+						       OVS_FLOW_CMD_SET,
+						       ufid_flags);
+			BUG_ON(error < 0);
+		}
+	} else {
+		/* Could not alloc without acts before locking. */
+		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
+						info, OVS_FLOW_CMD_SET, false,
+						ufid_flags);
+
+		if (IS_ERR(reply)) {
+			error = PTR_ERR(reply);
+			goto err_unlock_ovs;
+		}
+	}
+
+	/* Clear stats. */
+	if (a[OVS_FLOW_ATTR_CLEAR])
+		ovs_flow_stats_clear(flow);
+	ovs_unlock();
+
+	if (reply)
+		ovs_notify(&dp_flow_genl_family, reply, info);
+	if (old_acts)
+		ovs_nla_free_flow_actions_rcu(old_acts);
+
 	return 0;
 
 err_unlock_ovs:
 	ovs_unlock();
-err_kfree:
-	kfree(acts);
+	kfree_skb(reply);
+err_kfree_acts:
+	ovs_nla_free_flow_actions(acts);
 error:
 	return error;
 }
@@ -1335,18 +1344,29 @@ error:
 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr **a = info->attrs;
-	struct ovs_header *ovs_header = info->userhdr;
+	struct ovs_header *ovs_header = genl_info_userhdr(info);
+	struct net *net = sock_net(skb->sk);
 	struct sw_flow_key key;
 	struct sk_buff *reply;
 	struct sw_flow *flow;
 	struct datapath *dp;
-	struct flow_table *table;
-	int err;
-	int key_len;
-
-	if (!a[OVS_FLOW_ATTR_KEY])
-		return -EINVAL;
-	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+	struct sw_flow_match match;
+	struct sw_flow_id ufid;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
+	int err = 0;
+	bool log = !a[OVS_FLOW_ATTR_PROBE];
+	bool ufid_present;
+
+	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
+	if (a[OVS_FLOW_ATTR_KEY]) {
+		ovs_match_init(&match, &key, true, NULL);
+		err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
+					log);
+	} else if (!ufid_present) {
+		OVS_NLERR(log,
+			  "Flow get message rejected, Key attribute missing.");
+		err = -EINVAL;
+	}
 	if (err)
 		return err;
 
@@ -1357,15 +1377,17 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 		goto unlock;
 	}
 
-	table = ovsl_dereference(dp->table);
-	flow = ovs_flow_tbl_lookup(table, &key, key_len);
+	if (ufid_present)
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
+	else
+		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
 	if (!flow) {
 		err = -ENOENT;
 		goto unlock;
 	}
 
-	reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
-					info->snd_seq, OVS_FLOW_CMD_NEW);
+	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
+					OVS_FLOW_CMD_GET, true, ufid_flags);
 	if (IS_ERR(reply)) {
 		err = PTR_ERR(reply);
 		goto unlock;
@@ -1381,53 +1403,77 @@ unlock:
 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr **a = info->attrs;
-	struct ovs_header *ovs_header = info->userhdr;
+	struct ovs_header *ovs_header = genl_info_userhdr(info);
+	struct net *net = sock_net(skb->sk);
 	struct sw_flow_key key;
 	struct sk_buff *reply;
-	struct sw_flow *flow;
+	struct sw_flow *flow = NULL;
 	struct datapath *dp;
-	struct flow_table *table;
+	struct sw_flow_match match;
+	struct sw_flow_id ufid;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 	int err;
-	int key_len;
+	bool log = !a[OVS_FLOW_ATTR_PROBE];
+	bool ufid_present;
+
+	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
+	if (a[OVS_FLOW_ATTR_KEY]) {
+		ovs_match_init(&match, &key, true, NULL);
+		err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
+					NULL, log);
+		if (unlikely(err))
+			return err;
+	}
 
 	ovs_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
-	if (!dp) {
+	if (unlikely(!dp)) {
 		err = -ENODEV;
 		goto unlock;
 	}
 
-	if (!a[OVS_FLOW_ATTR_KEY]) {
-		err = flush_flows(dp);
+	if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
+		err = ovs_flow_tbl_flush(&dp->table);
 		goto unlock;
 	}
-	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
-	if (err)
-		goto unlock;
 
-	table = ovsl_dereference(dp->table);
-	flow = ovs_flow_tbl_lookup(table, &key, key_len);
-	if (!flow) {
+	if (ufid_present)
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
+	else
+		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+	if (unlikely(!flow)) {
 		err = -ENOENT;
 		goto unlock;
 	}
 
-	reply = ovs_flow_cmd_alloc_info(flow);
-	if (!reply) {
-		err = -ENOMEM;
-		goto unlock;
-	}
-
-	ovs_flow_tbl_remove(table, flow);
+	ovs_flow_tbl_remove(&dp->table, flow);
+	ovs_unlock();
 
-	err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
-				     info->snd_seq, 0, OVS_FLOW_CMD_DEL);
-	BUG_ON(err < 0);
+	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
+					&flow->id, info, false, ufid_flags);
+	if (likely(reply)) {
+		if (!IS_ERR(reply)) {
+			rcu_read_lock();	/*To keep RCU checker happy. */
+			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
+						     reply, info->snd_portid,
+						     info->snd_seq, 0,
+						     OVS_FLOW_CMD_DEL,
+						     ufid_flags);
+			rcu_read_unlock();
+			if (WARN_ON_ONCE(err < 0)) {
+				kfree_skb(reply);
+				goto out_free;
+			}
 
-	ovs_flow_deferred_free(flow);
-	ovs_unlock();
+			ovs_notify(&dp_flow_genl_family, reply, info);
+		} else {
+			netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
+					PTR_ERR(reply));
+		}
+	}
 
-	ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
+out_free:
+	ovs_flow_free(flow, true);
 	return 0;
 unlock:
 	ovs_unlock();
@@ -1436,83 +1482,98 @@ unlock:
 
 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct nlattr *a[__OVS_FLOW_ATTR_MAX];
 	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
+	struct table_instance *ti;
 	struct datapath *dp;
-	struct flow_table *table;
+	u32 ufid_flags;
+	int err;
 
-	ovs_lock();
-	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
+				       OVS_FLOW_ATTR_MAX, flow_policy, NULL);
+	if (err)
+		return err;
+	ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
+
+	rcu_read_lock();
+	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
 	if (!dp) {
-		ovs_unlock();
+		rcu_read_unlock();
 		return -ENODEV;
 	}
 
-	table = ovsl_dereference(dp->table);
-
+	ti = rcu_dereference(dp->table.ti);
 	for (;;) {
 		struct sw_flow *flow;
 		u32 bucket, obj;
 
 		bucket = cb->args[0];
 		obj = cb->args[1];
-		flow = ovs_flow_tbl_next(table, &bucket, &obj);
+		flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
 		if (!flow)
 			break;
 
-		if (ovs_flow_cmd_fill_info(flow, dp, skb,
+		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
 					   NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					   OVS_FLOW_CMD_NEW) < 0)
+					   OVS_FLOW_CMD_GET, ufid_flags) < 0)
 			break;
 
 		cb->args[0] = bucket;
 		cb->args[1] = obj;
 	}
-	ovs_unlock();
+	rcu_read_unlock();
 	return skb->len;
 }
 
-static struct genl_ops dp_flow_genl_ops[] = {
+static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
+	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
+	[OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
+	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
+	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
+	[OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
+	[OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
+	[OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
+};
+
+static const struct genl_small_ops dp_flow_genl_ops[] = {
 	{ .cmd = OVS_FLOW_CMD_NEW,
-	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = flow_policy,
-	  .doit = ovs_flow_cmd_new_or_set
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .doit = ovs_flow_cmd_new
 	},
 	{ .cmd = OVS_FLOW_CMD_DEL,
-	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = flow_policy,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 	  .doit = ovs_flow_cmd_del
 	},
 	{ .cmd = OVS_FLOW_CMD_GET,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = 0,		    /* OK for unprivileged users. */
-	  .policy = flow_policy,
 	  .doit = ovs_flow_cmd_get,
 	  .dumpit = ovs_flow_cmd_dump
 	},
 	{ .cmd = OVS_FLOW_CMD_SET,
-	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = flow_policy,
-	  .doit = ovs_flow_cmd_new_or_set,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .doit = ovs_flow_cmd_set,
 	},
 };
 
-static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
-	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
-	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
-};
-
-static struct genl_family dp_datapath_genl_family = {
-	.id = GENL_ID_GENERATE,
+static struct genl_family dp_flow_genl_family __ro_after_init = {
 	.hdrsize = sizeof(struct ovs_header),
-	.name = OVS_DATAPATH_FAMILY,
-	.version = OVS_DATAPATH_VERSION,
-	.maxattr = OVS_DP_ATTR_MAX,
+	.name = OVS_FLOW_FAMILY,
+	.version = OVS_FLOW_VERSION,
+	.maxattr = OVS_FLOW_ATTR_MAX,
+	.policy = flow_policy,
 	.netnsok = true,
 	.parallel_ops = true,
-};
-
-static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
-	.name = OVS_DATAPATH_MCGROUP
+	.small_ops = dp_flow_genl_ops,
+	.n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
+	.resv_start_op = OVS_FLOW_CMD_SET + 1,
+	.mcgrps = &ovs_dp_flow_multicast_group,
+	.n_mcgrps = 1,
+	.module = THIS_MODULE,
 };
 
 static size_t ovs_dp_cmd_msg_size(void)
@@ -1520,36 +1581,61 @@ static size_t ovs_dp_cmd_msg_size(void)
 	size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
 
 	msgsize += nla_total_size(IFNAMSIZ);
-	msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
+	msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
+	msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
+	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
+	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
+	msgsize += nla_total_size(sizeof(u32) * nr_cpu_ids); /* OVS_DP_ATTR_PER_CPU_PIDS */
 
 	return msgsize;
 }
 
+/* Called with ovs_mutex. */
 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
 				u32 portid, u32 seq, u32 flags, u8 cmd)
 {
 	struct ovs_header *ovs_header;
 	struct ovs_dp_stats dp_stats;
-	int err;
+	struct ovs_dp_megaflow_stats dp_megaflow_stats;
+	struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids);
+	int err, pids_len;
 
 	ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
-				   flags, cmd);
+				 flags, cmd);
 	if (!ovs_header)
 		goto error;
 
 	ovs_header->dp_ifindex = get_dpifindex(dp);
 
-	rcu_read_lock();
 	err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
-	rcu_read_unlock();
 	if (err)
 		goto nla_put_failure;
 
-	get_dp_stats(dp, &dp_stats);
-	if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats))
+	get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
+	if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
+			  &dp_stats, OVS_DP_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
+			  sizeof(struct ovs_dp_megaflow_stats),
+			  &dp_megaflow_stats, OVS_DP_ATTR_PAD))
 		goto nla_put_failure;
 
-	return genlmsg_end(skb, ovs_header);
+	if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
+			ovs_flow_tbl_masks_cache_size(&dp->table)))
+		goto nla_put_failure;
+
+	if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) {
+		pids_len = min(pids->n_pids, nr_cpu_ids) * sizeof(u32);
+		if (nla_put(skb, OVS_DP_ATTR_PER_CPU_PIDS, pids_len, &pids->pids))
+			goto nla_put_failure;
+	}
+
+	genlmsg_end(skb, ovs_header);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, ovs_header);
@@ -1557,27 +1643,14 @@ error:
 	return -EMSGSIZE;
 }
 
-static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
-					     u32 seq, u8 cmd)
+static struct sk_buff *ovs_dp_cmd_alloc_info(void)
 {
-	struct sk_buff *skb;
-	int retval;
-
-	skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
-	if (!skb)
-		return ERR_PTR(-ENOMEM);
-
-	retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd);
-	if (retval < 0) {
-		kfree_skb(skb);
-		return ERR_PTR(retval);
-	}
-	return skb;
+	return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
 }
 
-/* Called with ovs_mutex. */
+/* Called with rcu_read_lock or ovs_mutex. */
 static struct datapath *lookup_datapath(struct net *net,
-					struct ovs_header *ovs_header,
+					const struct ovs_header *ovs_header,
 					struct nlattr *a[OVS_DP_ATTR_MAX + 1])
 {
 	struct datapath *dp;
@@ -1587,14 +1660,155 @@ static struct datapath *lookup_datapath(struct net *net,
 	else {
 		struct vport *vport;
 
-		rcu_read_lock();
 		vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
 		dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
-		rcu_read_unlock();
 	}
 	return dp ? dp : ERR_PTR(-ENODEV);
 }
 
+static void ovs_dp_reset_user_features(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	struct datapath *dp;
+
+	dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+			     info->attrs);
+	if (IS_ERR(dp))
+		return;
+
+	pr_warn("%s: Dropping previously announced user features\n",
+		ovs_dp_name(dp));
+	dp->user_features = 0;
+}
+
+static int ovs_dp_set_upcall_portids(struct datapath *dp,
+			      const struct nlattr *ids)
+{
+	struct dp_nlsk_pids *old, *dp_nlsk_pids;
+
+	if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
+		return -EINVAL;
+
+	old = ovsl_dereference(dp->upcall_portids);
+
+	dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
+			       GFP_KERNEL);
+	if (!dp_nlsk_pids)
+		return -ENOMEM;
+
+	dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
+	nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
+
+	rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
+
+	kfree_rcu(old, rcu);
+
+	return 0;
+}
+
+u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
+{
+	struct dp_nlsk_pids *dp_nlsk_pids;
+
+	dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
+
+	if (dp_nlsk_pids) {
+		if (cpu_id < dp_nlsk_pids->n_pids) {
+			return dp_nlsk_pids->pids[cpu_id];
+		} else if (dp_nlsk_pids->n_pids > 0 &&
+			   cpu_id >= dp_nlsk_pids->n_pids) {
+			/* If the number of netlink PIDs is mismatched with
+			 * the number of CPUs as seen by the kernel, log this
+			 * and send the upcall to an arbitrary socket (0) in
+			 * order to not drop packets
+			 */
+			pr_info_ratelimited("cpu_id mismatch with handler threads");
+			return dp_nlsk_pids->pids[cpu_id %
+						  dp_nlsk_pids->n_pids];
+		} else {
+			return 0;
+		}
+	} else {
+		return 0;
+	}
+}
+
+static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
+{
+	u32 user_features = 0, old_features = dp->user_features;
+	int err;
+
+	if (a[OVS_DP_ATTR_USER_FEATURES]) {
+		user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+
+		if (user_features & ~(OVS_DP_F_VPORT_PIDS |
+				      OVS_DP_F_UNALIGNED |
+				      OVS_DP_F_TC_RECIRC_SHARING |
+				      OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
+			return -EOPNOTSUPP;
+
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+		if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
+			return -EOPNOTSUPP;
+#endif
+	}
+
+	if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
+		int err;
+		u32 cache_size;
+
+		cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
+		err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
+		if (err)
+			return err;
+	}
+
+	dp->user_features = user_features;
+
+	if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
+	    a[OVS_DP_ATTR_PER_CPU_PIDS]) {
+		/* Upcall Netlink Port IDs have been updated */
+		err = ovs_dp_set_upcall_portids(dp,
+						a[OVS_DP_ATTR_PER_CPU_PIDS]);
+		if (err)
+			return err;
+	}
+
+	if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+	    !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
+		tc_skb_ext_tc_enable();
+	else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+		 (old_features & OVS_DP_F_TC_RECIRC_SHARING))
+		tc_skb_ext_tc_disable();
+
+	return 0;
+}
+
+static int ovs_dp_stats_init(struct datapath *dp)
+{
+	dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
+	if (!dp->stats_percpu)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int ovs_dp_vport_init(struct datapath *dp)
+{
+	int i;
+
+	dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
+				  sizeof(struct hlist_head),
+				  GFP_KERNEL);
+	if (!dp->ports)
+		return -ENOMEM;
+
+	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+		INIT_HLIST_HEAD(&dp->ports[i]);
+
+	return 0;
+}
+
 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr **a = info->attrs;
@@ -1603,42 +1817,39 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	struct vport *vport;
 	struct ovs_net *ovs_net;
-	int err, i;
+	int err;
 
 	err = -EINVAL;
 	if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
 		goto err;
 
-	ovs_lock();
+	reply = ovs_dp_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
 
 	err = -ENOMEM;
 	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
 	if (dp == NULL)
-		goto err_unlock_ovs;
+		goto err_destroy_reply;
 
-	ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
+	ovs_dp_set_net(dp, sock_net(skb->sk));
 
 	/* Allocate table. */
-	err = -ENOMEM;
-	rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
-	if (!dp->table)
-		goto err_free_dp;
+	err = ovs_flow_tbl_init(&dp->table);
+	if (err)
+		goto err_destroy_dp;
 
-	dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
-	if (!dp->stats_percpu) {
-		err = -ENOMEM;
+	err = ovs_dp_stats_init(dp);
+	if (err)
 		goto err_destroy_table;
-	}
 
-	dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
-			GFP_KERNEL);
-	if (!dp->ports) {
-		err = -ENOMEM;
-		goto err_destroy_percpu;
-	}
+	err = ovs_dp_vport_init(dp);
+	if (err)
+		goto err_destroy_stats;
 
-	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
-		INIT_HLIST_HEAD(&dp->ports[i]);
+	err = ovs_meters_init(dp);
+	if (err)
+		goto err_destroy_ports;
 
 	/* Set up our datapath device. */
 	parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
@@ -1646,7 +1857,15 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	parms.options = NULL;
 	parms.dp = dp;
 	parms.port_no = OVSP_LOCAL;
-	parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
+	parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
+	parms.desired_ifindex = nla_get_s32_default(a[OVS_DP_ATTR_IFINDEX], 0);
+
+	/* So far only local changes have been made, now need the lock. */
+	ovs_lock();
+
+	err = ovs_dp_change(dp, a);
+	if (err)
+		goto err_unlock_and_destroy_meters;
 
 	vport = new_vport(&parms);
 	if (IS_ERR(vport)) {
@@ -1654,36 +1873,45 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		if (err == -EBUSY)
 			err = -EEXIST;
 
-		goto err_destroy_ports_array;
+		if (err == -EEXIST) {
+			/* An outdated user space instance that does not understand
+			 * the concept of user_features has attempted to create a new
+			 * datapath and is likely to reuse it. Drop all user features.
+			 */
+			if (info->genlhdr->version < OVS_DP_VER_FEATURES)
+				ovs_dp_reset_user_features(skb, info);
+		}
+
+		goto err_destroy_portids;
 	}
 
-	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
-				      info->snd_seq, OVS_DP_CMD_NEW);
-	err = PTR_ERR(reply);
-	if (IS_ERR(reply))
-		goto err_destroy_local_port;
+	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+				   info->snd_seq, 0, OVS_DP_CMD_NEW);
+	BUG_ON(err < 0);
 
 	ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
-	list_add_tail(&dp->list_node, &ovs_net->dps);
+	list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
 
 	ovs_unlock();
 
-	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
+	ovs_notify(&dp_datapath_genl_family, reply, info);
 	return 0;
 
-err_destroy_local_port:
-	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
-err_destroy_ports_array:
+err_destroy_portids:
+	kfree(rcu_dereference_raw(dp->upcall_portids));
+err_unlock_and_destroy_meters:
+	ovs_unlock();
+	ovs_meters_exit(dp);
+err_destroy_ports:
 	kfree(dp->ports);
-err_destroy_percpu:
+err_destroy_stats:
 	free_percpu(dp->stats_percpu);
 err_destroy_table:
-	ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
-err_free_dp:
-	release_net(ovs_dp_get_net(dp));
+	ovs_flow_tbl_destroy(&dp->table);
+err_destroy_dp:
 	kfree(dp);
-err_unlock_ovs:
-	ovs_unlock();
+err_destroy_reply:
+	kfree_skb(reply);
 err:
 	return err;
 }
@@ -1691,8 +1919,12 @@ err:
 /* Called with ovs_mutex. */
 static void __dp_destroy(struct datapath *dp)
 {
+	struct flow_table *table = &dp->table;
 	int i;
 
+	if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+		tc_skb_ext_tc_disable();
+
 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
 		struct vport *vport;
 		struct hlist_node *n;
@@ -1702,13 +1934,21 @@ static void __dp_destroy(struct datapath *dp)
 				ovs_dp_detach_port(vport);
 	}
 
-	list_del(&dp->list_node);
+	list_del_rcu(&dp->list_node);
 
 	/* OVSP_LOCAL is datapath internal port. We need to make sure that
-	 * all port in datapath are destroyed first before freeing datapath.
+	 * all ports in datapath are destroyed first before freeing datapath.
 	 */
 	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
 
+	/* Flush sw_flow in the tables. RCU cb only releases resource
+	 * such as dp, ports and tables. That may avoid some issues
+	 * such as RCU usage warning.
+	 */
+	table_instance_flow_flush(table, ovsl_dereference(table->ti),
+				  ovsl_dereference(table->ufid_ti));
+
+	/* RCU destroy the ports, meters and flow tables. */
 	call_rcu(&dp->rcu, destroy_dp_rcu);
 }
 
@@ -1718,26 +1958,31 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	int err;
 
+	reply = ovs_dp_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
+
 	ovs_lock();
-	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+	dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+			     info->attrs);
 	err = PTR_ERR(dp);
 	if (IS_ERR(dp))
-		goto unlock;
+		goto err_unlock_free;
 
-	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
-				      info->snd_seq, OVS_DP_CMD_DEL);
-	err = PTR_ERR(reply);
-	if (IS_ERR(reply))
-		goto unlock;
+	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+				   info->snd_seq, 0, OVS_DP_CMD_DEL);
+	BUG_ON(err < 0);
 
 	__dp_destroy(dp);
 	ovs_unlock();
 
-	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
+	ovs_notify(&dp_datapath_genl_family, reply, info);
 
 	return 0;
-unlock:
+
+err_unlock_free:
 	ovs_unlock();
+	kfree_skb(reply);
 	return err;
 }
 
@@ -1747,28 +1992,33 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	int err;
 
+	reply = ovs_dp_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
+
 	ovs_lock();
-	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+	dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+			     info->attrs);
 	err = PTR_ERR(dp);
 	if (IS_ERR(dp))
-		goto unlock;
+		goto err_unlock_free;
 
-	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
-				      info->snd_seq, OVS_DP_CMD_NEW);
-	if (IS_ERR(reply)) {
-		err = PTR_ERR(reply);
-		netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
-				ovs_dp_datapath_multicast_group.id, err);
-		err = 0;
-		goto unlock;
-	}
+	err = ovs_dp_change(dp, info->attrs);
+	if (err)
+		goto err_unlock_free;
+
+	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+				   info->snd_seq, 0, OVS_DP_CMD_SET);
+	BUG_ON(err < 0);
 
 	ovs_unlock();
-	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
+	ovs_notify(&dp_datapath_genl_family, reply, info);
 
 	return 0;
-unlock:
+
+err_unlock_free:
 	ovs_unlock();
+	kfree_skb(reply);
 	return err;
 }
 
@@ -1778,25 +2028,27 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	int err;
 
+	reply = ovs_dp_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
+
 	ovs_lock();
-	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+	dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+			     info->attrs);
 	if (IS_ERR(dp)) {
 		err = PTR_ERR(dp);
-		goto unlock;
-	}
-
-	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
-				      info->snd_seq, OVS_DP_CMD_NEW);
-	if (IS_ERR(reply)) {
-		err = PTR_ERR(reply);
-		goto unlock;
+		goto err_unlock_free;
 	}
-
+	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+				   info->snd_seq, 0, OVS_DP_CMD_GET);
+	BUG_ON(err < 0);
 	ovs_unlock();
+
 	return genlmsg_reply(reply, info);
 
-unlock:
+err_unlock_free:
 	ovs_unlock();
+	kfree_skb(reply);
 	return err;
 }
 
@@ -1812,7 +2064,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (i >= skip &&
 		    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					 OVS_DP_CMD_NEW) < 0)
+					 OVS_DP_CMD_GET) < 0)
 			break;
 		i++;
 	}
@@ -1823,59 +2075,63 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-static struct genl_ops dp_datapath_genl_ops[] = {
+static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
+	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+	[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
+	[OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 0,
+		PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
+	[OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
+};
+
+static const struct genl_small_ops dp_datapath_genl_ops[] = {
 	{ .cmd = OVS_DP_CMD_NEW,
-	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = datapath_policy,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 	  .doit = ovs_dp_cmd_new
 	},
 	{ .cmd = OVS_DP_CMD_DEL,
-	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = datapath_policy,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 	  .doit = ovs_dp_cmd_del
 	},
 	{ .cmd = OVS_DP_CMD_GET,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = 0,		    /* OK for unprivileged users. */
-	  .policy = datapath_policy,
 	  .doit = ovs_dp_cmd_get,
 	  .dumpit = ovs_dp_cmd_dump
 	},
 	{ .cmd = OVS_DP_CMD_SET,
-	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = datapath_policy,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 	  .doit = ovs_dp_cmd_set,
 	},
 };
 
-static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
-	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
-	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
-	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
-	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
-	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
-	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
-};
-
-static struct genl_family dp_vport_genl_family = {
-	.id = GENL_ID_GENERATE,
+static struct genl_family dp_datapath_genl_family __ro_after_init = {
 	.hdrsize = sizeof(struct ovs_header),
-	.name = OVS_VPORT_FAMILY,
-	.version = OVS_VPORT_VERSION,
-	.maxattr = OVS_VPORT_ATTR_MAX,
+	.name = OVS_DATAPATH_FAMILY,
+	.version = OVS_DATAPATH_VERSION,
+	.maxattr = OVS_DP_ATTR_MAX,
+	.policy = datapath_policy,
 	.netnsok = true,
 	.parallel_ops = true,
-};
-
-struct genl_multicast_group ovs_dp_vport_multicast_group = {
-	.name = OVS_VPORT_MCGROUP
+	.small_ops = dp_datapath_genl_ops,
+	.n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
+	.resv_start_op = OVS_DP_CMD_SET + 1,
+	.mcgrps = &ovs_dp_datapath_multicast_group,
+	.n_mcgrps = 1,
+	.module = THIS_MODULE,
 };
 
 /* Called with ovs_mutex or RCU read lock. */
 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
-				   u32 portid, u32 seq, u32 flags, u8 cmd)
+				   struct net *net, u32 portid, u32 seq,
+				   u32 flags, u8 cmd, gfp_t gfp)
 {
 	struct ovs_header *ovs_header;
 	struct ovs_vport_stats vport_stats;
+	struct net *net_vport;
 	int err;
 
 	ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
@@ -1887,21 +2143,42 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 
 	if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
 	    nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
-	    nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
-	    nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
+	    nla_put_string(skb, OVS_VPORT_ATTR_NAME,
+			   ovs_vport_name(vport)) ||
+	    nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
 		goto nla_put_failure;
 
+	rcu_read_lock();
+	net_vport = dev_net_rcu(vport->dev);
+	if (!net_eq(net, net_vport)) {
+		int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC);
+
+		if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
+			goto nla_put_failure_unlock;
+	}
+	rcu_read_unlock();
+
 	ovs_vport_get_stats(vport, &vport_stats);
-	if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
-		    &vport_stats))
+	if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
+			  sizeof(struct ovs_vport_stats), &vport_stats,
+			  OVS_VPORT_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (ovs_vport_get_upcall_stats(vport, skb))
+		goto nla_put_failure;
+
+	if (ovs_vport_get_upcall_portids(vport, skb))
 		goto nla_put_failure;
 
 	err = ovs_vport_get_options(vport, skb);
 	if (err == -EMSGSIZE)
 		goto error;
 
-	return genlmsg_end(skb, ovs_header);
+	genlmsg_end(skb, ovs_header);
+	return 0;
 
+nla_put_failure_unlock:
+	rcu_read_unlock();
 nla_put_failure:
 	err = -EMSGSIZE;
 error:
@@ -1909,18 +2186,24 @@ error:
 	return err;
 }
 
-/* Called with ovs_mutex or RCU read lock. */
-struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
-					 u32 seq, u8 cmd)
+static struct sk_buff *ovs_vport_cmd_alloc_info(void)
+{
+	return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+}
+
+/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
+					 u32 portid, u32 seq, u8 cmd)
 {
 	struct sk_buff *skb;
 	int retval;
 
-	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
-	retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
+	retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
+					 GFP_KERNEL);
 	BUG_ON(retval < 0);
 
 	return skb;
@@ -1928,12 +2211,14 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
 
 /* Called with ovs_mutex or RCU read lock. */
 static struct vport *lookup_vport(struct net *net,
-				  struct ovs_header *ovs_header,
+				  const struct ovs_header *ovs_header,
 				  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
 {
 	struct datapath *dp;
 	struct vport *vport;
 
+	if (a[OVS_VPORT_ATTR_IFINDEX])
+		return ERR_PTR(-EOPNOTSUPP);
 	if (a[OVS_VPORT_ATTR_NAME]) {
 		vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
 		if (!vport)
@@ -1958,46 +2243,89 @@ static struct vport *lookup_vport(struct net *net,
 		return vport;
 	} else
 		return ERR_PTR(-EINVAL);
+
+}
+
+static unsigned int ovs_get_max_headroom(struct datapath *dp)
+{
+	unsigned int dev_headroom, max_headroom = 0;
+	struct net_device *dev;
+	struct vport *vport;
+	int i;
+
+	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
+					 lockdep_ovsl_is_held()) {
+			dev = vport->dev;
+			dev_headroom = netdev_get_fwd_headroom(dev);
+			if (dev_headroom > max_headroom)
+				max_headroom = dev_headroom;
+		}
+	}
+
+	return max_headroom;
+}
+
+/* Called with ovs_mutex */
+static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
+{
+	struct vport *vport;
+	int i;
+
+	dp->max_headroom = new_headroom;
+	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
+					 lockdep_ovsl_is_held())
+			netdev_set_rx_headroom(vport->dev, new_headroom);
+	}
 }
 
 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr **a = info->attrs;
-	struct ovs_header *ovs_header = info->userhdr;
+	struct ovs_header *ovs_header = genl_info_userhdr(info);
 	struct vport_parms parms;
 	struct sk_buff *reply;
 	struct vport *vport;
 	struct datapath *dp;
+	unsigned int new_headroom;
 	u32 port_no;
 	int err;
 
-	err = -EINVAL;
 	if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
 	    !a[OVS_VPORT_ATTR_UPCALL_PID])
-		goto exit;
+		return -EINVAL;
+
+	parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
+
+	if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL)
+		return -EOPNOTSUPP;
+
+	port_no = nla_get_u32_default(a[OVS_VPORT_ATTR_PORT_NO], 0);
+	if (port_no >= DP_MAX_PORTS)
+		return -EFBIG;
+
+	reply = ovs_vport_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
 
 	ovs_lock();
+restart:
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 	err = -ENODEV;
 	if (!dp)
-		goto exit_unlock;
-
-	if (a[OVS_VPORT_ATTR_PORT_NO]) {
-		port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
-
-		err = -EFBIG;
-		if (port_no >= DP_MAX_PORTS)
-			goto exit_unlock;
+		goto exit_unlock_free;
 
+	if (port_no) {
 		vport = ovs_vport_ovsl(dp, port_no);
 		err = -EBUSY;
 		if (vport)
-			goto exit_unlock;
+			goto exit_unlock_free;
 	} else {
 		for (port_no = 1; ; port_no++) {
 			if (port_no >= DP_MAX_PORTS) {
 				err = -EFBIG;
-				goto exit_unlock;
+				goto exit_unlock_free;
 			}
 			vport = ovs_vport_ovsl(dp, port_no);
 			if (!vport)
@@ -2006,31 +2334,41 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
-	parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
 	parms.options = a[OVS_VPORT_ATTR_OPTIONS];
 	parms.dp = dp;
 	parms.port_no = port_no;
-	parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+	parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
+	parms.desired_ifindex = nla_get_s32_default(a[OVS_VPORT_ATTR_IFINDEX],
+						    0);
 
 	vport = new_vport(&parms);
 	err = PTR_ERR(vport);
-	if (IS_ERR(vport))
-		goto exit_unlock;
-
-	err = 0;
-	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
-					 OVS_VPORT_CMD_NEW);
-	if (IS_ERR(reply)) {
-		err = PTR_ERR(reply);
-		ovs_dp_detach_port(vport);
-		goto exit_unlock;
+	if (IS_ERR(vport)) {
+		if (err == -EAGAIN)
+			goto restart;
+		goto exit_unlock_free;
 	}
 
-	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
+	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+				      info->snd_portid, info->snd_seq, 0,
+				      OVS_VPORT_CMD_NEW, GFP_KERNEL);
+
+	new_headroom = netdev_get_fwd_headroom(vport->dev);
+
+	if (new_headroom > dp->max_headroom)
+		ovs_update_headroom(dp, new_headroom);
+	else
+		netdev_set_rx_headroom(vport->dev, dp->max_headroom);
+
+	BUG_ON(err < 0);
+	ovs_unlock();
+
+	ovs_notify(&dp_vport_genl_family, reply, info);
+	return 0;
 
-exit_unlock:
+exit_unlock_free:
 	ovs_unlock();
-exit:
+	kfree_skb(reply);
 	return err;
 }
 
@@ -2041,111 +2379,135 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct vport *vport;
 	int err;
 
+	reply = ovs_vport_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
+
 	ovs_lock();
-	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
+	vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
-		goto exit_unlock;
+		goto exit_unlock_free;
 
 	if (a[OVS_VPORT_ATTR_TYPE] &&
 	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
 		err = -EINVAL;
-		goto exit_unlock;
-	}
-
-	reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!reply) {
-		err = -ENOMEM;
-		goto exit_unlock;
+		goto exit_unlock_free;
 	}
 
 	if (a[OVS_VPORT_ATTR_OPTIONS]) {
 		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
 		if (err)
-			goto exit_free;
+			goto exit_unlock_free;
 	}
 
-	if (a[OVS_VPORT_ATTR_UPCALL_PID])
-		vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
 
-	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
-				      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+	if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
+		struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
+
+		err = ovs_vport_set_upcall_portids(vport, ids);
+		if (err)
+			goto exit_unlock_free;
+	}
+
+	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+				      info->snd_portid, info->snd_seq, 0,
+				      OVS_VPORT_CMD_SET, GFP_KERNEL);
 	BUG_ON(err < 0);
 
 	ovs_unlock();
-	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
-	return 0;
-
-	rtnl_unlock();
+	ovs_notify(&dp_vport_genl_family, reply, info);
 	return 0;
 
-exit_free:
-	kfree_skb(reply);
-exit_unlock:
+exit_unlock_free:
 	ovs_unlock();
+	kfree_skb(reply);
 	return err;
 }
 
 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 {
+	bool update_headroom = false;
 	struct nlattr **a = info->attrs;
 	struct sk_buff *reply;
+	struct datapath *dp;
 	struct vport *vport;
+	unsigned int new_headroom;
 	int err;
 
+	reply = ovs_vport_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
+
 	ovs_lock();
-	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
+	vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
-		goto exit_unlock;
+		goto exit_unlock_free;
 
 	if (vport->port_no == OVSP_LOCAL) {
 		err = -EINVAL;
-		goto exit_unlock;
+		goto exit_unlock_free;
 	}
 
-	reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
-					 info->snd_seq, OVS_VPORT_CMD_DEL);
-	err = PTR_ERR(reply);
-	if (IS_ERR(reply))
-		goto exit_unlock;
+	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+				      info->snd_portid, info->snd_seq, 0,
+				      OVS_VPORT_CMD_DEL, GFP_KERNEL);
+	BUG_ON(err < 0);
+
+	/* the vport deletion may trigger dp headroom update */
+	dp = vport->dp;
+	if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
+		update_headroom = true;
 
-	err = 0;
+	netdev_reset_rx_headroom(vport->dev);
 	ovs_dp_detach_port(vport);
 
-	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
+	if (update_headroom) {
+		new_headroom = ovs_get_max_headroom(dp);
 
-exit_unlock:
+		if (new_headroom < dp->max_headroom)
+			ovs_update_headroom(dp, new_headroom);
+	}
 	ovs_unlock();
+
+	ovs_notify(&dp_vport_genl_family, reply, info);
+	return 0;
+
+exit_unlock_free:
+	ovs_unlock();
+	kfree_skb(reply);
 	return err;
 }
 
 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr **a = info->attrs;
-	struct ovs_header *ovs_header = info->userhdr;
+	struct ovs_header *ovs_header = genl_info_userhdr(info);
 	struct sk_buff *reply;
 	struct vport *vport;
 	int err;
 
+	reply = ovs_vport_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
+
 	rcu_read_lock();
 	vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
-		goto exit_unlock;
-
-	reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
-					 info->snd_seq, OVS_VPORT_CMD_NEW);
-	err = PTR_ERR(reply);
-	if (IS_ERR(reply))
-		goto exit_unlock;
-
+		goto exit_unlock_free;
+	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+				      info->snd_portid, info->snd_seq, 0,
+				      OVS_VPORT_CMD_GET, GFP_ATOMIC);
+	BUG_ON(err < 0);
 	rcu_read_unlock();
 
 	return genlmsg_reply(reply, info);
 
-exit_unlock:
+exit_unlock_free:
 	rcu_read_unlock();
+	kfree_skb(reply);
 	return err;
 }
 
@@ -2156,11 +2518,12 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	int bucket = cb->args[0], skip = cb->args[1];
 	int i, j = 0;
 
-	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
-	if (!dp)
-		return -ENODEV;
-
 	rcu_read_lock();
+	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (!dp) {
+		rcu_read_unlock();
+		return -ENODEV;
+	}
 	for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
 		struct vport *vport;
 
@@ -2168,10 +2531,12 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
 			if (j >= skip &&
 			    ovs_vport_cmd_fill_info(vport, skb,
+						    sock_net(skb->sk),
 						    NETLINK_CB(cb->skb).portid,
 						    cb->nlh->nlmsg_seq,
 						    NLM_F_MULTI,
-						    OVS_VPORT_CMD_NEW) < 0)
+						    OVS_VPORT_CMD_GET,
+						    GFP_ATOMIC) < 0)
 				goto out;
 
 			j++;
@@ -2187,50 +2552,84 @@ out:
 	return skb->len;
 }
 
-static struct genl_ops dp_vport_genl_ops[] = {
+static void ovs_dp_masks_rebalance(struct work_struct *work)
+{
+	struct ovs_net *ovs_net = container_of(work, struct ovs_net,
+					       masks_rebalance.work);
+	struct datapath *dp;
+
+	ovs_lock();
+
+	list_for_each_entry(dp, &ovs_net->dps, list_node)
+		ovs_flow_masks_rebalance(&dp->table);
+
+	ovs_unlock();
+
+	schedule_delayed_work(&ovs_net->masks_rebalance,
+			      msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
+}
+
+static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
+	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
+	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
+	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
+	[OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
+	[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
+	[OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
+};
+
+static const struct genl_small_ops dp_vport_genl_ops[] = {
 	{ .cmd = OVS_VPORT_CMD_NEW,
-	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = vport_policy,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 	  .doit = ovs_vport_cmd_new
 	},
 	{ .cmd = OVS_VPORT_CMD_DEL,
-	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = vport_policy,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 	  .doit = ovs_vport_cmd_del
 	},
 	{ .cmd = OVS_VPORT_CMD_GET,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 	  .flags = 0,		    /* OK for unprivileged users. */
-	  .policy = vport_policy,
 	  .doit = ovs_vport_cmd_get,
 	  .dumpit = ovs_vport_cmd_dump
 	},
 	{ .cmd = OVS_VPORT_CMD_SET,
-	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-	  .policy = vport_policy,
+	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 	  .doit = ovs_vport_cmd_set,
 	},
 };
 
-struct genl_family_and_ops {
-	struct genl_family *family;
-	struct genl_ops *ops;
-	int n_ops;
-	struct genl_multicast_group *group;
+struct genl_family dp_vport_genl_family __ro_after_init = {
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_VPORT_FAMILY,
+	.version = OVS_VPORT_VERSION,
+	.maxattr = OVS_VPORT_ATTR_MAX,
+	.policy = vport_policy,
+	.netnsok = true,
+	.parallel_ops = true,
+	.small_ops = dp_vport_genl_ops,
+	.n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
+	.resv_start_op = OVS_VPORT_CMD_SET + 1,
+	.mcgrps = &ovs_dp_vport_multicast_group,
+	.n_mcgrps = 1,
+	.module = THIS_MODULE,
 };
 
-static const struct genl_family_and_ops dp_genl_families[] = {
-	{ &dp_datapath_genl_family,
-	  dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
-	  &ovs_dp_datapath_multicast_group },
-	{ &dp_vport_genl_family,
-	  dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
-	  &ovs_dp_vport_multicast_group },
-	{ &dp_flow_genl_family,
-	  dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
-	  &ovs_dp_flow_multicast_group },
-	{ &dp_packet_genl_family,
-	  dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
-	  NULL },
+static struct genl_family * const dp_genl_families[] = {
+	&dp_datapath_genl_family,
+	&dp_vport_genl_family,
+	&dp_flow_genl_family,
+	&dp_packet_genl_family,
+	&dp_meter_genl_family,
+#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+	&dp_ct_limit_genl_family,
+#endif
 };
 
 static void dp_unregister_genl(int n_families)
@@ -2238,84 +2637,98 @@ static void dp_unregister_genl(int n_families)
 	int i;
 
 	for (i = 0; i < n_families; i++)
-		genl_unregister_family(dp_genl_families[i].family);
+		genl_unregister_family(dp_genl_families[i]);
 }
 
-static int dp_register_genl(void)
+static int __init dp_register_genl(void)
 {
-	int n_registered;
 	int err;
 	int i;
 
-	n_registered = 0;
 	for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
-		const struct genl_family_and_ops *f = &dp_genl_families[i];
 
-		err = genl_register_family_with_ops(f->family, f->ops,
-						    f->n_ops);
+		err = genl_register_family(dp_genl_families[i]);
 		if (err)
 			goto error;
-		n_registered++;
-
-		if (f->group) {
-			err = genl_register_mc_group(f->family, f->group);
-			if (err)
-				goto error;
-		}
 	}
 
 	return 0;
 
 error:
-	dp_unregister_genl(n_registered);
+	dp_unregister_genl(i);
 	return err;
 }
 
-static void rehash_flow_table(struct work_struct *work)
-{
-	struct datapath *dp;
-	struct net *net;
-
-	ovs_lock();
-	rtnl_lock();
-	for_each_net(net) {
-		struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-
-		list_for_each_entry(dp, &ovs_net->dps, list_node) {
-			struct flow_table *old_table = ovsl_dereference(dp->table);
-			struct flow_table *new_table;
-
-			new_table = ovs_flow_tbl_rehash(old_table);
-			if (!IS_ERR(new_table)) {
-				rcu_assign_pointer(dp->table, new_table);
-				ovs_flow_tbl_deferred_destroy(old_table);
-			}
-		}
-	}
-	rtnl_unlock();
-	ovs_unlock();
-	schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
-}
-
 static int __net_init ovs_init_net(struct net *net)
 {
 	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+	int err;
 
 	INIT_LIST_HEAD(&ovs_net->dps);
 	INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
+	INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
+
+	err = ovs_ct_init(net);
+	if (err)
+		return err;
+
+	schedule_delayed_work(&ovs_net->masks_rebalance,
+			      msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
 	return 0;
 }
 
-static void __net_exit ovs_exit_net(struct net *net)
+static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
+					    struct list_head *head)
 {
-	struct datapath *dp, *dp_next;
 	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+	struct datapath *dp;
+
+	list_for_each_entry(dp, &ovs_net->dps, list_node) {
+		int i;
+
+		for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+			struct vport *vport;
+
+			hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
+				if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
+					continue;
+
+				if (dev_net(vport->dev) == dnet)
+					list_add(&vport->detach_list, head);
+			}
+		}
+	}
+}
+
+static void __net_exit ovs_exit_net(struct net *dnet)
+{
+	struct datapath *dp, *dp_next;
+	struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
+	struct vport *vport, *vport_next;
+	struct net *net;
+	LIST_HEAD(head);
 
 	ovs_lock();
+
+	ovs_ct_exit(dnet);
+
 	list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
 		__dp_destroy(dp);
+
+	down_read(&net_rwsem);
+	for_each_net(net)
+		list_vports_from_net(net, dnet, &head);
+	up_read(&net_rwsem);
+
+	/* Detach all vports from given namespace. */
+	list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
+		list_del(&vport->detach_list);
+		ovs_dp_detach_port(vport);
+	}
+
 	ovs_unlock();
 
+	cancel_delayed_work_sync(&ovs_net->masks_rebalance);
 	cancel_work_sync(&ovs_net->dp_notify_work);
 }
 
@@ -2326,18 +2739,60 @@ static struct pernet_operations ovs_net_ops = {
 	.size = sizeof(struct ovs_net),
 };
 
+static const char * const ovs_drop_reasons[] = {
+#define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x),
+	OVS_DROP_REASONS(S)
+#undef S
+};
+
+static struct drop_reason_list drop_reason_list_ovs = {
+	.reasons = ovs_drop_reasons,
+	.n_reasons = ARRAY_SIZE(ovs_drop_reasons),
+};
+
+static int __init ovs_alloc_percpu_storage(void)
+{
+	unsigned int cpu;
+
+	ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage);
+	if (!ovs_pcpu_storage)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		struct ovs_pcpu_storage *ovs_pcpu;
+
+		ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu);
+		local_lock_init(&ovs_pcpu->bh_lock);
+	}
+	return 0;
+}
+
+static void ovs_free_percpu_storage(void)
+{
+	free_percpu(ovs_pcpu_storage);
+}
+
 static int __init dp_init(void)
 {
 	int err;
 
-	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
+	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
+		     sizeof_field(struct sk_buff, cb));
 
 	pr_info("Open vSwitch switching datapath\n");
 
-	err = ovs_flow_init();
+	err = ovs_alloc_percpu_storage();
+	if (err)
+		goto error;
+
+	err = ovs_internal_dev_rtnl_link_register();
 	if (err)
 		goto error;
 
+	err = ovs_flow_init();
+	if (err)
+		goto error_unreg_rtnl_link;
+
 	err = ovs_vport_init();
 	if (err)
 		goto error_flow_exit;
@@ -2350,14 +2805,21 @@ static int __init dp_init(void)
 	if (err)
 		goto error_netns_exit;
 
+	err = ovs_netdev_init();
+	if (err)
+		goto error_unreg_notifier;
+
 	err = dp_register_genl();
 	if (err < 0)
-		goto error_unreg_notifier;
+		goto error_unreg_netdev;
 
-	schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
+	drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH,
+				     &drop_reason_list_ovs);
 
 	return 0;
 
+error_unreg_netdev:
+	ovs_netdev_exit();
 error_unreg_notifier:
 	unregister_netdevice_notifier(&ovs_dp_device_notifier);
 error_netns_exit:
@@ -2366,19 +2828,25 @@ error_vport_exit:
 	ovs_vport_exit();
 error_flow_exit:
 	ovs_flow_exit();
+error_unreg_rtnl_link:
+	ovs_internal_dev_rtnl_link_unregister();
 error:
+	ovs_free_percpu_storage();
 	return err;
 }
 
 static void dp_cleanup(void)
 {
-	cancel_delayed_work_sync(&rehash_flow_wq);
 	dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
+	ovs_netdev_exit();
 	unregister_netdevice_notifier(&ovs_dp_device_notifier);
 	unregister_pernet_device(&ovs_net_ops);
+	drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH);
 	rcu_barrier();
 	ovs_vport_exit();
 	ovs_flow_exit();
+	ovs_internal_dev_rtnl_link_unregister();
+	ovs_free_percpu_storage();
 }
 
 module_init(dp_init);
@@ -2386,3 +2854,9 @@ module_exit(dp_cleanup);
 
 MODULE_DESCRIPTION("Open vSwitch switching datapath");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index a91486484916..db0c3e69d66c 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -1,19 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
+ * Copyright (c) 2007-2014 Nicira, Inc.
  */
 
 #ifndef DATAPATH_H
@@ -25,43 +12,75 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/u64_stats_sync.h>
+#include <net/ip_tunnels.h>
+#include <net/mpls.h>
 
+#include "conntrack.h"
 #include "flow.h"
-#include "vport.h"
-
-#define DP_MAX_PORTS           USHRT_MAX
-#define DP_VPORT_HASH_BUCKETS  1024
+#include "flow_table.h"
+#include "meter.h"
+#include "vport-internal_dev.h"
 
-#define SAMPLE_ACTION_DEPTH 3
+#define DP_MAX_PORTS                USHRT_MAX
+#define DP_VPORT_HASH_BUCKETS       1024
+#define DP_MASKS_REBALANCE_INTERVAL 4000
 
 /**
  * struct dp_stats_percpu - per-cpu packet processing statistics for a given
  * datapath.
  * @n_hit: Number of received packets for which a matching flow was found in
  * the flow table.
- * @n_miss: Number of received packets that had no matching flow in the flow
- * table.  The sum of @n_hit and @n_miss is the number of packets that have
+ * @n_missed: Number of received packets that had no matching flow in the flow
+ * table.  The sum of @n_hit and @n_missed is the number of packets that have
  * been received by the datapath.
  * @n_lost: Number of received packets that had no matching flow in the flow
  * table that could not be sent to userspace (normally due to an overflow in
  * one of the datapath's queues).
+ * @n_mask_hit: Number of masks looked up for flow match.
+ *   @n_mask_hit / (@n_hit + @n_missed)  will be the average masks looked
+ *   up per packet.
+ * @n_cache_hit: The number of received packets that had their mask found using
+ * the mask cache.
+ * @syncp: Synchronization point for 64bit counters.
  */
 struct dp_stats_percpu {
 	u64 n_hit;
 	u64 n_missed;
 	u64 n_lost;
-	struct u64_stats_sync sync;
+	u64 n_mask_hit;
+	u64 n_cache_hit;
+	struct u64_stats_sync syncp;
+};
+
+/**
+ * struct dp_nlsk_pids - array of netlink portids of for a datapath.
+ *                       This is used when OVS_DP_F_DISPATCH_UPCALL_PER_CPU
+ *                       is enabled and must be protected by rcu.
+ * @rcu: RCU callback head for deferred destruction.
+ * @n_pids: Size of @pids array.
+ * @pids: Array storing the Netlink socket PIDs indexed by CPU ID for packets
+ *       that miss the flow table.
+ */
+struct dp_nlsk_pids {
+	struct rcu_head rcu;
+	u32 n_pids;
+	u32 pids[];
 };
 
 /**
  * struct datapath - datapath for flow-based packet switching
  * @rcu: RCU callback head for deferred destruction.
  * @list_node: Element in global 'dps' list.
- * @table: Current flow table.  Protected by ovs_mutex and RCU.
+ * @table: flow table.
  * @ports: Hash table for ports.  %OVSP_LOCAL port always exists.  Protected by
  * ovs_mutex and RCU.
  * @stats_percpu: Per-CPU datapath statistics.
  * @net: Reference to net namespace.
+ * @user_features: Bitmap of enabled %OVS_DP_F_* features.
+ * @max_headroom: The maximum headroom of all vports in this datapath; it will
+ * be used by all the internal vports in this dp.
+ * @meter_tbl: Meter table.
+ * @upcall_portids: RCU protected 'struct dp_nlsk_pids'.
  *
  * Context: See the comment on locking at the top of datapath.c for additional
  * locking information.
@@ -71,7 +90,7 @@ struct datapath {
 	struct list_head list_node;
 
 	/* Flow table. */
-	struct flow_table __rcu *table;
+	struct flow_table table;
 
 	/* Switch ports. */
 	struct hlist_head *ports;
@@ -79,53 +98,147 @@ struct datapath {
 	/* Stats. */
 	struct dp_stats_percpu __percpu *stats_percpu;
 
-#ifdef CONFIG_NET_NS
 	/* Network namespace ref. */
-	struct net *net;
-#endif
+	possible_net_t net;
+
+	u32 user_features;
+
+	u32 max_headroom;
+
+	/* Switch meters. */
+	struct dp_meter_table meter_tbl;
+
+	struct dp_nlsk_pids __rcu *upcall_portids;
 };
 
 /**
  * struct ovs_skb_cb - OVS data in skb CB
- * @flow: The flow associated with this packet.  May be %NULL if no flow.
- * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
- * packet is not being tunneled.
+ * @input_vport: The original vport packet came in on. This value is cached
+ * when a packet is received by OVS.
+ * @mru: The maximum received fragement size; 0 if the packet is not
+ * fragmented.
+ * @acts_origlen: The netlink size of the flow actions applied to this skb.
+ * @cutlen: The number of bytes from the packet end to be removed.
+ * @probability: The sampling probability that was applied to this skb; 0 means
+ * no sampling has occurred; U32_MAX means 100% probability.
+ * @upcall_pid: Netlink socket PID to use for sending this packet to userspace;
+ * 0 means "not set" and default per-CPU or per-vport dispatch should be used.
  */
 struct ovs_skb_cb {
-	struct sw_flow		*flow;
-	struct ovs_key_ipv4_tunnel  *tun_key;
+	struct vport		*input_vport;
+	u16			mru;
+	u16			acts_origlen;
+	u32			cutlen;
+	u32			probability;
+	u32			upcall_pid;
 };
 #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
 
 /**
- * struct dp_upcall - metadata to include with a packet to send to userspace
+ * struct dp_upcall_info - metadata to include with a packet sent to userspace
  * @cmd: One of %OVS_PACKET_CMD_*.
- * @key: Becomes %OVS_PACKET_ATTR_KEY.  Must be nonnull.
  * @userdata: If nonnull, its variable-length value is passed to userspace as
  * %OVS_PACKET_ATTR_USERDATA.
- * @pid: Netlink PID to which packet should be sent.  If @pid is 0 then no
- * packet is sent and the packet is accounted in the datapath's @n_lost
+ * @actions: If nonnull, its variable-length value is passed to userspace as
+ * %OVS_PACKET_ATTR_ACTIONS.
+ * @actions_len: The length of the @actions.
+ * @portid: Netlink portid to which packet should be sent.  If @portid is 0
+ * then no packet is sent and the packet is accounted in the datapath's @n_lost
  * counter.
+ * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
+ * @mru: If not zero, Maximum received IP fragment size.
  */
 struct dp_upcall_info {
-	u8 cmd;
-	const struct sw_flow_key *key;
+	struct ip_tunnel_info *egress_tun_info;
 	const struct nlattr *userdata;
+	const struct nlattr *actions;
+	int actions_len;
 	u32 portid;
+	u8 cmd;
+	u16 mru;
 };
 
 /**
  * struct ovs_net - Per net-namespace data for ovs.
  * @dps: List of datapaths to enable dumping them all out.
  * Protected by genl_mutex.
+ * @dp_notify_work: A work notifier to handle port unregistering.
+ * @masks_rebalance: A work to periodically optimize flow table caches.
+ * @ct_limit_info: A hash table of conntrack zone connection limits.
+ * @xt_label: Whether connlables are configured for the network or not.
  */
 struct ovs_net {
 	struct list_head dps;
 	struct work_struct dp_notify_work;
-	struct vport_net vport_net;
+	struct delayed_work masks_rebalance;
+#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+	struct ovs_ct_limit_info *ct_limit_info;
+#endif
+	bool xt_label;
+};
+
+#define MAX_L2_LEN	(VLAN_ETH_HLEN + 3 * MPLS_HLEN)
+struct ovs_frag_data {
+	unsigned long dst;
+	struct vport *vport;
+	struct ovs_skb_cb cb;
+	__be16 inner_protocol;
+	u16 network_offset;	/* valid only for MPLS */
+	u16 vlan_tci;
+	__be16 vlan_proto;
+	unsigned int l2_len;
+	u8 mac_proto;
+	u8 l2_data[MAX_L2_LEN];
+};
+
+struct deferred_action {
+	struct sk_buff *skb;
+	const struct nlattr *actions;
+	int actions_len;
+
+	/* Store pkt_key clone when creating deferred action. */
+	struct sw_flow_key pkt_key;
+};
+
+#define DEFERRED_ACTION_FIFO_SIZE 10
+#define OVS_RECURSION_LIMIT 5
+#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
+
+struct action_fifo {
+	int head;
+	int tail;
+	/* Deferred action fifo queue storage. */
+	struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
 };
 
-extern int ovs_net_id;
+struct action_flow_keys {
+	struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
+};
+
+struct ovs_pcpu_storage {
+	struct action_fifo action_fifos;
+	struct action_flow_keys flow_keys;
+	struct ovs_frag_data frag_data;
+	int exec_level;
+	struct task_struct *owner;
+	local_lock_t bh_lock;
+};
+
+extern struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
+
+/**
+ * enum ovs_pkt_hash_types - hash info to include with a packet
+ * to send to userspace.
+ * @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack.
+ * @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash
+ * over transport ports.
+ */
+enum ovs_pkt_hash_types {
+	OVS_PACKET_HASH_SW_BIT = (1ULL << 32),
+	OVS_PACKET_HASH_L4_BIT = (1ULL << 33),
+};
+
+extern unsigned int ovs_net_id;
 void ovs_lock(void);
 void ovs_unlock(void);
 
@@ -135,11 +248,13 @@ int lockdep_ovsl_is_held(void);
 #define lockdep_ovsl_is_held()	1
 #endif
 
-#define ASSERT_OVSL()		WARN_ON(unlikely(!lockdep_ovsl_is_held()))
+#define ASSERT_OVSL()		WARN_ON(!lockdep_ovsl_is_held())
 #define ovsl_dereference(p)					\
 	rcu_dereference_protected(p, lockdep_ovsl_is_held())
+#define rcu_dereference_ovsl(p)					\
+	rcu_dereference_check(p, lockdep_ovsl_is_held())
 
-static inline struct net *ovs_dp_get_net(struct datapath *dp)
+static inline struct net *ovs_dp_get_net(const struct datapath *dp)
 {
 	return read_pnet(&dp->net);
 }
@@ -169,18 +284,63 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n
 	return ovs_lookup_vport(dp, port_no);
 }
 
+/* Must be called with rcu_read_lock. */
+static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
+{
+	struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
+
+	if (dev) {
+		struct vport *vport = ovs_internal_dev_get_vport(dev);
+
+		if (vport)
+			return vport->dp;
+	}
+
+	return NULL;
+}
+
+/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
+ * returned dp pointer valid.
+ */
+static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
+{
+	struct datapath *dp;
+
+	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
+	rcu_read_lock();
+	dp = get_dp_rcu(net, dp_ifindex);
+	rcu_read_unlock();
+
+	return dp;
+}
+
 extern struct notifier_block ovs_dp_device_notifier;
-extern struct genl_multicast_group ovs_dp_vport_multicast_group;
+extern struct genl_family dp_vport_genl_family;
 
-void ovs_dp_process_received_packet(struct vport *, struct sk_buff *);
+void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
 void ovs_dp_detach_port(struct vport *);
 int ovs_dp_upcall(struct datapath *, struct sk_buff *,
-		  const struct dp_upcall_info *);
+		  const struct sw_flow_key *, const struct dp_upcall_info *,
+		  uint32_t cutlen);
+
+u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id);
 
 const char *ovs_dp_name(const struct datapath *dp);
-struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
-					 u8 cmd);
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
+					 u32 portid, u32 seq, u8 cmd);
+
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
+			const struct sw_flow_actions *, struct sw_flow_key *);
 
-int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
 void ovs_dp_notify_wq(struct work_struct *work);
+
+/* 'KEY' must not have any bits set outside of the 'MASK' */
+#define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
+#define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK))
+
+#define OVS_NLERR(logging_allowed, fmt, ...)			\
+do {								\
+	if (logging_allowed && net_ratelimit())			\
+		pr_info("netlink: " fmt "\n", ##__VA_ARGS__);	\
+} while (0)
 #endif /* datapath.h */
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index c3235675f359..a2af90ee99af 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #include <linux/netdevice.h>
@@ -30,19 +17,18 @@ static void dp_detach_port_notify(struct vport *vport)
 	struct datapath *dp;
 
 	dp = vport->dp;
-	notify = ovs_vport_cmd_build_info(vport, 0, 0,
-					  OVS_VPORT_CMD_DEL);
+	notify = ovs_vport_cmd_build_info(vport, ovs_dp_get_net(dp),
+					  0, 0, OVS_VPORT_CMD_DEL);
 	ovs_dp_detach_port(vport);
 	if (IS_ERR(notify)) {
-		netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
-				ovs_dp_vport_multicast_group.id,
-				PTR_ERR(notify));
+		genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0,
+			     0, PTR_ERR(notify));
 		return;
 	}
 
-	genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
-				ovs_dp_vport_multicast_group.id,
-				GFP_KERNEL);
+	genlmsg_multicast_netns(&dp_vport_genl_family,
+				ovs_dp_get_net(dp), notify, 0,
+				0, GFP_KERNEL);
 }
 
 void ovs_dp_notify_wq(struct work_struct *work)
@@ -59,14 +45,10 @@ void ovs_dp_notify_wq(struct work_struct *work)
 			struct hlist_node *n;
 
 			hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
-				struct netdev_vport *netdev_vport;
-
-				if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
+				if (vport->ops->type == OVS_VPORT_TYPE_INTERNAL)
 					continue;
 
-				netdev_vport = netdev_vport_priv(vport);
-				if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED ||
-				    netdev_vport->dev->reg_state == NETREG_UNREGISTERING)
+				if (!(netif_is_ovs_port(vport->dev)))
 					dp_detach_port_notify(vport);
 			}
 		}
@@ -88,8 +70,12 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event,
 		return NOTIFY_DONE;
 
 	if (event == NETDEV_UNREGISTER) {
+		/* upper_dev_unlink and decrement promisc immediately */
+		ovs_netdev_detach_dev(vport);
+
+		/* schedule vport destroy, dev_put and genl notification */
 		ovs_net = net_generic(dev_net(dev), ovs_net_id);
-		queue_work(system_wq, &ovs_net->dp_notify_work);
+		queue_work(system_percpu_wq, &ovs_net->dp_notify_work);
 	}
 
 	return NOTIFY_DONE;
diff --git a/net/openvswitch/drop.h b/net/openvswitch/drop.h
new file mode 100644
index 000000000000..cedf9b7b5796
--- /dev/null
+++ b/net/openvswitch/drop.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * OpenvSwitch drop reason list.
+ */
+
+#ifndef OPENVSWITCH_DROP_H
+#define OPENVSWITCH_DROP_H
+#include <linux/skbuff.h>
+#include <net/dropreason.h>
+
+#define OVS_DROP_REASONS(R)			\
+	R(OVS_DROP_LAST_ACTION)		        \
+	R(OVS_DROP_ACTION_ERROR)		\
+	R(OVS_DROP_EXPLICIT)			\
+	R(OVS_DROP_EXPLICIT_WITH_ERROR)		\
+	R(OVS_DROP_METER)			\
+	R(OVS_DROP_RECURSION_LIMIT)		\
+	R(OVS_DROP_DEFERRED_LIMIT)		\
+	R(OVS_DROP_FRAG_L2_TOO_LONG)		\
+	R(OVS_DROP_FRAG_INVALID_PROTO)		\
+	R(OVS_DROP_CONNTRACK)			\
+	R(OVS_DROP_IP_TTL)			\
+	/* deliberate comment for trailing \ */
+
+enum ovs_drop_reason {
+	__OVS_DROP_REASON = SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
+				SKB_DROP_REASON_SUBSYS_SHIFT,
+#define ENUM(x) x,
+	OVS_DROP_REASONS(ENUM)
+#undef ENUM
+
+	OVS_DROP_MAX,
+};
+
+static inline void
+ovs_kfree_skb_reason(struct sk_buff *skb, enum ovs_drop_reason reason)
+{
+	kfree_skb_reason(skb, (u32)reason);
+}
+
+#endif /* OPENVSWITCH_DROP_H */
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 5c519b121e1b..66366982f604 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -1,23 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2007-2011 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
+ * Copyright (c) 2007-2014 Nicira, Inc.
  */
 
-#include "flow.h"
-#include "datapath.h"
 #include <linux/uaccess.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
@@ -31,9 +16,13 @@
 #include <linux/module.h>
 #include <linux/in.h>
 #include <linux/rcupdate.h>
+#include <linux/cpumask.h>
 #include <linux/if_arp.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/mpls.h>
+#include <linux/sctp.h>
+#include <linux/smp.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/icmp.h>
@@ -42,9 +31,146 @@
 #include <net/ip.h>
 #include <net/ip_tunnels.h>
 #include <net/ipv6.h>
+#include <net/mpls.h>
 #include <net/ndisc.h>
+#include <net/nsh.h>
+#include <net/pkt_cls.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+#include "conntrack.h"
+#include "datapath.h"
+#include "flow.h"
+#include "flow_netlink.h"
+#include "vport.h"
+
+u64 ovs_flow_used_time(unsigned long flow_jiffies)
+{
+	struct timespec64 cur_ts;
+	u64 cur_ms, idle_ms;
+
+	ktime_get_ts64(&cur_ts);
+	idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
+	cur_ms = (u64)(u32)cur_ts.tv_sec * MSEC_PER_SEC +
+		 cur_ts.tv_nsec / NSEC_PER_MSEC;
 
-static struct kmem_cache *flow_cache;
+	return cur_ms - idle_ms;
+}
+
+#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
+
+void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
+			   const struct sk_buff *skb)
+{
+	struct sw_flow_stats *stats;
+	unsigned int cpu = smp_processor_id();
+	int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
+
+	stats = rcu_dereference(flow->stats[cpu]);
+
+	/* Check if already have CPU-specific stats. */
+	if (likely(stats)) {
+		spin_lock(&stats->lock);
+		/* Mark if we write on the pre-allocated stats. */
+		if (cpu == 0 && unlikely(flow->stats_last_writer != cpu))
+			flow->stats_last_writer = cpu;
+	} else {
+		stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */
+		spin_lock(&stats->lock);
+
+		/* If the current CPU is the only writer on the
+		 * pre-allocated stats keep using them.
+		 */
+		if (unlikely(flow->stats_last_writer != cpu)) {
+			/* A previous locker may have already allocated the
+			 * stats, so we need to check again.  If CPU-specific
+			 * stats were already allocated, we update the pre-
+			 * allocated stats as we have already locked them.
+			 */
+			if (likely(flow->stats_last_writer != -1) &&
+			    likely(!rcu_access_pointer(flow->stats[cpu]))) {
+				/* Try to allocate CPU-specific stats. */
+				struct sw_flow_stats *new_stats;
+
+				new_stats =
+					kmem_cache_alloc_node(flow_stats_cache,
+							      GFP_NOWAIT |
+							      __GFP_THISNODE |
+							      __GFP_NOWARN |
+							      __GFP_NOMEMALLOC,
+							      numa_node_id());
+				if (likely(new_stats)) {
+					new_stats->used = jiffies;
+					new_stats->packet_count = 1;
+					new_stats->byte_count = len;
+					new_stats->tcp_flags = tcp_flags;
+					spin_lock_init(&new_stats->lock);
+
+					rcu_assign_pointer(flow->stats[cpu],
+							   new_stats);
+					cpumask_set_cpu(cpu,
+							flow->cpu_used_mask);
+					goto unlock;
+				}
+			}
+			flow->stats_last_writer = cpu;
+		}
+	}
+
+	stats->used = jiffies;
+	stats->packet_count++;
+	stats->byte_count += len;
+	stats->tcp_flags |= tcp_flags;
+unlock:
+	spin_unlock(&stats->lock);
+}
+
+/* Must be called with rcu_read_lock or ovs_mutex. */
+void ovs_flow_stats_get(const struct sw_flow *flow,
+			struct ovs_flow_stats *ovs_stats,
+			unsigned long *used, __be16 *tcp_flags)
+{
+	unsigned int cpu;
+
+	*used = 0;
+	*tcp_flags = 0;
+	memset(ovs_stats, 0, sizeof(*ovs_stats));
+
+	for_each_cpu(cpu, flow->cpu_used_mask) {
+		struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
+
+		if (stats) {
+			/* Local CPU may write on non-local stats, so we must
+			 * block bottom-halves here.
+			 */
+			spin_lock_bh(&stats->lock);
+			if (!*used || time_after(stats->used, *used))
+				*used = stats->used;
+			*tcp_flags |= stats->tcp_flags;
+			ovs_stats->n_packets += stats->packet_count;
+			ovs_stats->n_bytes += stats->byte_count;
+			spin_unlock_bh(&stats->lock);
+		}
+	}
+}
+
+/* Called with ovs_mutex. */
+void ovs_flow_stats_clear(struct sw_flow *flow)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, flow->cpu_used_mask) {
+		struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
+
+		if (stats) {
+			spin_lock_bh(&stats->lock);
+			stats->used = 0;
+			stats->packet_count = 0;
+			stats->byte_count = 0;
+			stats->tcp_flags = 0;
+			spin_unlock_bh(&stats->lock);
+		}
+	}
+}
 
 static int check_header(struct sk_buff *skb, int len)
 {
@@ -102,49 +228,172 @@ static bool udphdr_ok(struct sk_buff *skb)
 				  sizeof(struct udphdr));
 }
 
+static bool sctphdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct sctphdr));
+}
+
 static bool icmphdr_ok(struct sk_buff *skb)
 {
 	return pskb_may_pull(skb, skb_transport_offset(skb) +
 				  sizeof(struct icmphdr));
 }
 
-u64 ovs_flow_used_time(unsigned long flow_jiffies)
+/**
+ * get_ipv6_ext_hdrs() - Parses packet and sets IPv6 extension header flags.
+ *
+ * @skb: buffer where extension header data starts in packet
+ * @nh: ipv6 header
+ * @ext_hdrs: flags are stored here
+ *
+ * OFPIEH12_UNREP is set if more than one of a given IPv6 extension header
+ * is unexpectedly encountered. (Two destination options headers may be
+ * expected and would not cause this bit to be set.)
+ *
+ * OFPIEH12_UNSEQ is set if IPv6 extension headers were not in the order
+ * preferred (but not required) by RFC 2460:
+ *
+ * When more than one extension header is used in the same packet, it is
+ * recommended that those headers appear in the following order:
+ *      IPv6 header
+ *      Hop-by-Hop Options header
+ *      Destination Options header
+ *      Routing header
+ *      Fragment header
+ *      Authentication header
+ *      Encapsulating Security Payload header
+ *      Destination Options header
+ *      upper-layer header
+ */
+static void get_ipv6_ext_hdrs(struct sk_buff *skb, struct ipv6hdr *nh,
+			      u16 *ext_hdrs)
 {
-	struct timespec cur_ts;
-	u64 cur_ms, idle_ms;
+	u8 next_type = nh->nexthdr;
+	unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
+	int dest_options_header_count = 0;
+
+	*ext_hdrs = 0;
+
+	while (ipv6_ext_hdr(next_type)) {
+		struct ipv6_opt_hdr _hdr, *hp;
+
+		switch (next_type) {
+		case IPPROTO_NONE:
+			*ext_hdrs |= OFPIEH12_NONEXT;
+			/* stop parsing */
+			return;
+
+		case IPPROTO_ESP:
+			if (*ext_hdrs & OFPIEH12_ESP)
+				*ext_hdrs |= OFPIEH12_UNREP;
+			if ((*ext_hdrs & ~(OFPIEH12_HOP | OFPIEH12_DEST |
+					   OFPIEH12_ROUTER | IPPROTO_FRAGMENT |
+					   OFPIEH12_AUTH | OFPIEH12_UNREP)) ||
+			    dest_options_header_count >= 2) {
+				*ext_hdrs |= OFPIEH12_UNSEQ;
+			}
+			*ext_hdrs |= OFPIEH12_ESP;
+			break;
 
-	ktime_get_ts(&cur_ts);
-	idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
-	cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
-		 cur_ts.tv_nsec / NSEC_PER_MSEC;
+		case IPPROTO_AH:
+			if (*ext_hdrs & OFPIEH12_AUTH)
+				*ext_hdrs |= OFPIEH12_UNREP;
+			if ((*ext_hdrs &
+			     ~(OFPIEH12_HOP | OFPIEH12_DEST | OFPIEH12_ROUTER |
+			       IPPROTO_FRAGMENT | OFPIEH12_UNREP)) ||
+			    dest_options_header_count >= 2) {
+				*ext_hdrs |= OFPIEH12_UNSEQ;
+			}
+			*ext_hdrs |= OFPIEH12_AUTH;
+			break;
 
-	return cur_ms - idle_ms;
-}
+		case IPPROTO_DSTOPTS:
+			if (dest_options_header_count == 0) {
+				if (*ext_hdrs &
+				    ~(OFPIEH12_HOP | OFPIEH12_UNREP))
+					*ext_hdrs |= OFPIEH12_UNSEQ;
+				*ext_hdrs |= OFPIEH12_DEST;
+			} else if (dest_options_header_count == 1) {
+				if (*ext_hdrs &
+				    ~(OFPIEH12_HOP | OFPIEH12_DEST |
+				      OFPIEH12_ROUTER | OFPIEH12_FRAG |
+				      OFPIEH12_AUTH | OFPIEH12_ESP |
+				      OFPIEH12_UNREP)) {
+					*ext_hdrs |= OFPIEH12_UNSEQ;
+				}
+			} else {
+				*ext_hdrs |= OFPIEH12_UNREP;
+			}
+			dest_options_header_count++;
+			break;
+
+		case IPPROTO_FRAGMENT:
+			if (*ext_hdrs & OFPIEH12_FRAG)
+				*ext_hdrs |= OFPIEH12_UNREP;
+			if ((*ext_hdrs & ~(OFPIEH12_HOP |
+					   OFPIEH12_DEST |
+					   OFPIEH12_ROUTER |
+					   OFPIEH12_UNREP)) ||
+			    dest_options_header_count >= 2) {
+				*ext_hdrs |= OFPIEH12_UNSEQ;
+			}
+			*ext_hdrs |= OFPIEH12_FRAG;
+			break;
+
+		case IPPROTO_ROUTING:
+			if (*ext_hdrs & OFPIEH12_ROUTER)
+				*ext_hdrs |= OFPIEH12_UNREP;
+			if ((*ext_hdrs & ~(OFPIEH12_HOP |
+					   OFPIEH12_DEST |
+					   OFPIEH12_UNREP)) ||
+			    dest_options_header_count >= 2) {
+				*ext_hdrs |= OFPIEH12_UNSEQ;
+			}
+			*ext_hdrs |= OFPIEH12_ROUTER;
+			break;
+
+		case IPPROTO_HOPOPTS:
+			if (*ext_hdrs & OFPIEH12_HOP)
+				*ext_hdrs |= OFPIEH12_UNREP;
+			/* OFPIEH12_HOP is set to 1 if a hop-by-hop IPv6
+			 * extension header is present as the first
+			 * extension header in the packet.
+			 */
+			if (*ext_hdrs == 0)
+				*ext_hdrs |= OFPIEH12_HOP;
+			else
+				*ext_hdrs |= OFPIEH12_UNSEQ;
+			break;
+
+		default:
+			return;
+		}
 
-#define SW_FLOW_KEY_OFFSET(field)		\
-	(offsetof(struct sw_flow_key, field) +	\
-	 FIELD_SIZEOF(struct sw_flow_key, field))
+		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+		if (!hp)
+			break;
+		next_type = hp->nexthdr;
+		start += ipv6_optlen(hp);
+	}
+}
 
-static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
-			 int *key_lenp)
+static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
 {
+	unsigned short frag_off;
+	unsigned int payload_ofs = 0;
 	unsigned int nh_ofs = skb_network_offset(skb);
 	unsigned int nh_len;
-	int payload_ofs;
 	struct ipv6hdr *nh;
-	uint8_t nexthdr;
-	__be16 frag_off;
-	int err;
-
-	*key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label);
+	int err, nexthdr, flags = 0;
 
 	err = check_header(skb, nh_ofs + sizeof(*nh));
 	if (unlikely(err))
 		return err;
 
 	nh = ipv6_hdr(skb);
-	nexthdr = nh->nexthdr;
-	payload_ofs = (u8 *)(nh + 1) - skb->data;
+
+	get_ipv6_ext_hdrs(skb, nh, &key->ipv6.exthdrs);
 
 	key->ip.proto = NEXTHDR_NONE;
 	key->ip.tos = ipv6_get_dsfield(nh);
@@ -153,17 +402,25 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
 	key->ipv6.addr.src = nh->saddr;
 	key->ipv6.addr.dst = nh->daddr;
 
-	payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off);
-	if (unlikely(payload_ofs < 0))
-		return -EINVAL;
-
-	if (frag_off) {
-		if (frag_off & htons(~0x7))
+	nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags);
+	if (flags & IP6_FH_F_FRAG) {
+		if (frag_off) {
 			key->ip.frag = OVS_FRAG_TYPE_LATER;
-		else
-			key->ip.frag = OVS_FRAG_TYPE_FIRST;
+			key->ip.proto = NEXTHDR_FRAGMENT;
+			return 0;
+		}
+		key->ip.frag = OVS_FRAG_TYPE_FIRST;
+	} else {
+		key->ip.frag = OVS_FRAG_TYPE_NONE;
 	}
 
+	/* Delayed handling of error in ipv6_find_hdr() as it
+	 * always sets flags and frag_off to a valid value which may be
+	 * used to set key->ip.frag above.
+	 */
+	if (unlikely(nexthdr < 0))
+		return -EPROTO;
+
 	nh_len = payload_ofs - nh_ofs;
 	skb_set_transport_header(skb, nh_ofs + nh_len);
 	key->ip.proto = nexthdr;
@@ -176,284 +433,78 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
 				  sizeof(struct icmp6hdr));
 }
 
-#define TCP_FLAGS_OFFSET 13
-#define TCP_FLAG_MASK 0x3f
-
-void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
-{
-	u8 tcp_flags = 0;
-
-	if ((flow->key.eth.type == htons(ETH_P_IP) ||
-	     flow->key.eth.type == htons(ETH_P_IPV6)) &&
-	    flow->key.ip.proto == IPPROTO_TCP &&
-	    likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) {
-		u8 *tcp = (u8 *)tcp_hdr(skb);
-		tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
-	}
-
-	spin_lock(&flow->lock);
-	flow->used = jiffies;
-	flow->packet_count++;
-	flow->byte_count += skb->len;
-	flow->tcp_flags |= tcp_flags;
-	spin_unlock(&flow->lock);
-}
-
-struct sw_flow_actions *ovs_flow_actions_alloc(int size)
-{
-	struct sw_flow_actions *sfa;
-
-	if (size > MAX_ACTIONS_BUFSIZE)
-		return ERR_PTR(-EINVAL);
-
-	sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
-	if (!sfa)
-		return ERR_PTR(-ENOMEM);
-
-	sfa->actions_len = 0;
-	return sfa;
-}
-
-struct sw_flow *ovs_flow_alloc(void)
-{
-	struct sw_flow *flow;
-
-	flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
-	if (!flow)
-		return ERR_PTR(-ENOMEM);
-
-	spin_lock_init(&flow->lock);
-	flow->sf_acts = NULL;
-
-	return flow;
-}
-
-static struct hlist_head *find_bucket(struct flow_table *table, u32 hash)
-{
-	hash = jhash_1word(hash, table->hash_seed);
-	return flex_array_get(table->buckets,
-				(hash & (table->n_buckets - 1)));
-}
-
-static struct flex_array *alloc_buckets(unsigned int n_buckets)
-{
-	struct flex_array *buckets;
-	int i, err;
-
-	buckets = flex_array_alloc(sizeof(struct hlist_head *),
-				   n_buckets, GFP_KERNEL);
-	if (!buckets)
-		return NULL;
-
-	err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
-	if (err) {
-		flex_array_free(buckets);
-		return NULL;
-	}
-
-	for (i = 0; i < n_buckets; i++)
-		INIT_HLIST_HEAD((struct hlist_head *)
-					flex_array_get(buckets, i));
-
-	return buckets;
-}
-
-static void free_buckets(struct flex_array *buckets)
-{
-	flex_array_free(buckets);
-}
-
-struct flow_table *ovs_flow_tbl_alloc(int new_size)
-{
-	struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL);
-
-	if (!table)
-		return NULL;
-
-	table->buckets = alloc_buckets(new_size);
-
-	if (!table->buckets) {
-		kfree(table);
-		return NULL;
-	}
-	table->n_buckets = new_size;
-	table->count = 0;
-	table->node_ver = 0;
-	table->keep_flows = false;
-	get_random_bytes(&table->hash_seed, sizeof(u32));
-
-	return table;
-}
-
-void ovs_flow_tbl_destroy(struct flow_table *table)
-{
-	int i;
-
-	if (!table)
-		return;
-
-	if (table->keep_flows)
-		goto skip_flows;
-
-	for (i = 0; i < table->n_buckets; i++) {
-		struct sw_flow *flow;
-		struct hlist_head *head = flex_array_get(table->buckets, i);
-		struct hlist_node *n;
-		int ver = table->node_ver;
-
-		hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
-			hlist_del_rcu(&flow->hash_node[ver]);
-			ovs_flow_free(flow);
-		}
-	}
-
-skip_flows:
-	free_buckets(table->buckets);
-	kfree(table);
-}
-
-static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
-{
-	struct flow_table *table = container_of(rcu, struct flow_table, rcu);
-
-	ovs_flow_tbl_destroy(table);
-}
-
-void ovs_flow_tbl_deferred_destroy(struct flow_table *table)
-{
-	if (!table)
-		return;
-
-	call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
-}
-
-struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last)
+/**
+ * parse_vlan_tag - Parse vlan tag from vlan header.
+ * @skb: skb containing frame to parse
+ * @key_vh: pointer to parsed vlan tag
+ * @untag_vlan: should the vlan header be removed from the frame
+ *
+ * Return: ERROR on memory error.
+ * %0 if it encounters a non-vlan or incomplete packet.
+ * %1 after successfully parsing vlan tag.
+ */
+static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh,
+			  bool untag_vlan)
 {
-	struct sw_flow *flow;
-	struct hlist_head *head;
-	int ver;
-	int i;
-
-	ver = table->node_ver;
-	while (*bucket < table->n_buckets) {
-		i = 0;
-		head = flex_array_get(table->buckets, *bucket);
-		hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
-			if (i < *last) {
-				i++;
-				continue;
-			}
-			*last = i + 1;
-			return flow;
-		}
-		(*bucket)++;
-		*last = 0;
-	}
+	struct vlan_head *vh = (struct vlan_head *)skb->data;
 
-	return NULL;
-}
-
-static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow)
-{
-	struct hlist_head *head;
-	head = find_bucket(table, flow->hash);
-	hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
-	table->count++;
-}
+	if (likely(!eth_type_vlan(vh->tpid)))
+		return 0;
 
-static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new)
-{
-	int old_ver;
-	int i;
+	if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16)))
+		return 0;
 
-	old_ver = old->node_ver;
-	new->node_ver = !old_ver;
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) +
+				 sizeof(__be16))))
+		return -ENOMEM;
 
-	/* Insert in new table. */
-	for (i = 0; i < old->n_buckets; i++) {
-		struct sw_flow *flow;
-		struct hlist_head *head;
+	vh = (struct vlan_head *)skb->data;
+	key_vh->tci = vh->tci | htons(VLAN_CFI_MASK);
+	key_vh->tpid = vh->tpid;
 
-		head = flex_array_get(old->buckets, i);
+	if (unlikely(untag_vlan)) {
+		int offset = skb->data - skb_mac_header(skb);
+		u16 tci;
+		int err;
 
-		hlist_for_each_entry(flow, head, hash_node[old_ver])
-			__flow_tbl_insert(new, flow);
+		__skb_push(skb, offset);
+		err = __skb_vlan_pop(skb, &tci);
+		__skb_pull(skb, offset);
+		if (err)
+			return err;
+		__vlan_hwaccel_put_tag(skb, key_vh->tpid, tci);
+	} else {
+		__skb_pull(skb, sizeof(struct vlan_head));
 	}
-	old->keep_flows = true;
-}
-
-static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets)
-{
-	struct flow_table *new_table;
-
-	new_table = ovs_flow_tbl_alloc(n_buckets);
-	if (!new_table)
-		return ERR_PTR(-ENOMEM);
-
-	flow_table_copy_flows(table, new_table);
-
-	return new_table;
-}
-
-struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table)
-{
-	return __flow_tbl_rehash(table, table->n_buckets);
-}
-
-struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
-{
-	return __flow_tbl_rehash(table, table->n_buckets * 2);
-}
-
-void ovs_flow_free(struct sw_flow *flow)
-{
-	if (unlikely(!flow))
-		return;
-
-	kfree((struct sf_flow_acts __force *)flow->sf_acts);
-	kmem_cache_free(flow_cache, flow);
+	return 1;
 }
 
-/* RCU callback used by ovs_flow_deferred_free. */
-static void rcu_free_flow_callback(struct rcu_head *rcu)
+static void clear_vlan(struct sw_flow_key *key)
 {
-	struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
-
-	ovs_flow_free(flow);
-}
-
-/* Schedules 'flow' to be freed after the next RCU grace period.
- * The caller must hold rcu_read_lock for this to be sensible. */
-void ovs_flow_deferred_free(struct sw_flow *flow)
-{
-	call_rcu(&flow->rcu, rcu_free_flow_callback);
-}
-
-/* Schedules 'sf_acts' to be freed after the next RCU grace period.
- * The caller must hold rcu_read_lock for this to be sensible. */
-void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
-{
-	kfree_rcu(sf_acts, rcu);
+	key->eth.vlan.tci = 0;
+	key->eth.vlan.tpid = 0;
+	key->eth.cvlan.tci = 0;
+	key->eth.cvlan.tpid = 0;
 }
 
 static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 {
-	struct qtag_prefix {
-		__be16 eth_type; /* ETH_P_8021Q */
-		__be16 tci;
-	};
-	struct qtag_prefix *qp;
-
-	if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)))
-		return 0;
+	int res;
 
-	if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
-					 sizeof(__be16))))
-		return -ENOMEM;
+	if (skb_vlan_tag_present(skb)) {
+		key->eth.vlan.tci = htons(skb->vlan_tci) | htons(VLAN_CFI_MASK);
+		key->eth.vlan.tpid = skb->vlan_proto;
+	} else {
+		/* Parse outer vlan tag in the non-accelerated case. */
+		res = parse_vlan_tag(skb, &key->eth.vlan, true);
+		if (res <= 0)
+			return res;
+	}
 
-	qp = (struct qtag_prefix *) skb->data;
-	key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT);
-	__skb_pull(skb, sizeof(struct qtag_prefix));
+	/* Parse inner vlan tag. */
+	res = parse_vlan_tag(skb, &key->eth.cvlan, false);
+	if (res <= 0)
+		return res;
 
 	return 0;
 }
@@ -473,7 +524,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 	proto = *(__be16 *) skb->data;
 	__skb_pull(skb, sizeof(__be16));
 
-	if (ntohs(proto) >= ETH_P_802_3_MIN)
+	if (eth_proto_is_802_3(proto))
 		return proto;
 
 	if (skb->len < sizeof(struct llc_snap_hdr))
@@ -490,25 +541,22 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 
 	__skb_pull(skb, sizeof(struct llc_snap_hdr));
 
-	if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN)
+	if (eth_proto_is_802_3(llc->ethertype))
 		return llc->ethertype;
 
 	return htons(ETH_P_802_2);
 }
 
 static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
-			int *key_lenp, int nh_len)
+			int nh_len)
 {
 	struct icmp6hdr *icmp = icmp6_hdr(skb);
-	int error = 0;
-	int key_len;
 
 	/* The ICMPv6 type and code fields use the 16-bit transport port
 	 * fields, so we need to store them in 16-bit network byte order.
 	 */
-	key->ipv6.tp.src = htons(icmp->icmp6_type);
-	key->ipv6.tp.dst = htons(icmp->icmp6_code);
-	key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+	key->tp.src = htons(icmp->icmp6_type);
+	key->tp.dst = htons(icmp->icmp6_code);
 
 	if (icmp->icmp6_code == 0 &&
 	    (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -517,21 +565,19 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 		struct nd_msg *nd;
 		int offset;
 
-		key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
+		memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
 
 		/* In order to process neighbor discovery options, we need the
 		 * entire packet.
 		 */
 		if (unlikely(icmp_len < sizeof(*nd)))
-			goto out;
-		if (unlikely(skb_linearize(skb))) {
-			error = -ENOMEM;
-			goto out;
-		}
+			return 0;
+
+		if (unlikely(skb_linearize(skb)))
+			return -ENOMEM;
 
 		nd = (struct nd_msg *)skb_transport_header(skb);
 		key->ipv6.nd.target = nd->target;
-		key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
 
 		icmp_len -= sizeof(*nd);
 		offset = 0;
@@ -541,7 +587,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 			int opt_len = nd_opt->nd_opt_len * 8;
 
 			if (unlikely(!opt_len || opt_len > icmp_len))
-				goto invalid;
+				return 0;
 
 			/* Store the link layer address if the appropriate
 			 * option is provided.  It is considered an error if
@@ -551,14 +597,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 			    && opt_len == 8) {
 				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
 					goto invalid;
-				memcpy(key->ipv6.nd.sll,
-				    &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+				ether_addr_copy(key->ipv6.nd.sll,
+						&nd->opt[offset+sizeof(*nd_opt)]);
 			} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
 				   && opt_len == 8) {
 				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
 					goto invalid;
-				memcpy(key->ipv6.nd.tll,
-				    &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+				ether_addr_copy(key->ipv6.nd.tll,
+						&nd->opt[offset+sizeof(*nd_opt)]);
 			}
 
 			icmp_len -= opt_len;
@@ -566,98 +612,88 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 		}
 	}
 
-	goto out;
+	return 0;
 
 invalid:
 	memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
 	memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
 	memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
 
-out:
-	*key_lenp = key_len;
-	return error;
+	return 0;
 }
 
-/**
- * ovs_flow_extract - extracts a flow key from an Ethernet frame.
- * @skb: sk_buff that contains the frame, with skb->data pointing to the
- * Ethernet header
- * @in_port: port number on which @skb was received.
- * @key: output flow key
- * @key_lenp: length of output flow key
- *
- * The caller must ensure that skb->len >= ETH_HLEN.
- *
- * Returns 0 if successful, otherwise a negative errno value.
- *
- * Initializes @skb header pointers as follows:
- *
- *    - skb->mac_header: the Ethernet header.
- *
- *    - skb->network_header: just past the Ethernet header, or just past the
- *      VLAN header, to the first byte of the Ethernet payload.
- *
- *    - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
- *      on output, then just past the IP header, if one is present and
- *      of a correct length, otherwise the same as skb->network_header.
- *      For other key->eth.type values it is left untouched.
- */
-int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
-		 int *key_lenp)
+static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
 {
-	int error = 0;
-	int key_len = SW_FLOW_KEY_OFFSET(eth);
-	struct ethhdr *eth;
-
-	memset(key, 0, sizeof(*key));
+	struct nshhdr *nh;
+	unsigned int nh_ofs = skb_network_offset(skb);
+	u8 version, length;
+	int err;
 
-	key->phy.priority = skb->priority;
-	if (OVS_CB(skb)->tun_key)
-		memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key));
-	key->phy.in_port = in_port;
-	key->phy.skb_mark = skb->mark;
+	err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN);
+	if (unlikely(err))
+		return err;
 
-	skb_reset_mac_header(skb);
+	nh = nsh_hdr(skb);
+	version = nsh_get_ver(nh);
+	length = nsh_hdr_len(nh);
 
-	/* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
-	 * header in the linear data area.
-	 */
-	eth = eth_hdr(skb);
-	memcpy(key->eth.src, eth->h_source, ETH_ALEN);
-	memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
+	if (version != 0)
+		return -EINVAL;
 
-	__skb_pull(skb, 2 * ETH_ALEN);
-	/* We are going to push all headers that we pull, so no need to
-	 * update skb->csum here.
-	 */
+	err = check_header(skb, nh_ofs + length);
+	if (unlikely(err))
+		return err;
 
-	if (vlan_tx_tag_present(skb))
-		key->eth.tci = htons(skb->vlan_tci);
-	else if (eth->h_proto == htons(ETH_P_8021Q))
-		if (unlikely(parse_vlan(skb, key)))
-			return -ENOMEM;
+	nh = nsh_hdr(skb);
+	key->nsh.base.flags = nsh_get_flags(nh);
+	key->nsh.base.ttl = nsh_get_ttl(nh);
+	key->nsh.base.mdtype = nh->mdtype;
+	key->nsh.base.np = nh->np;
+	key->nsh.base.path_hdr = nh->path_hdr;
+	switch (key->nsh.base.mdtype) {
+	case NSH_M_TYPE1:
+		if (length != NSH_M_TYPE1_LEN)
+			return -EINVAL;
+		memcpy(key->nsh.context, nh->md1.context,
+		       sizeof(nh->md1));
+		break;
+	case NSH_M_TYPE2:
+		memset(key->nsh.context, 0,
+		       sizeof(nh->md1));
+		break;
+	default:
+		return -EINVAL;
+	}
 
-	key->eth.type = parse_ethertype(skb);
-	if (unlikely(key->eth.type == htons(0)))
-		return -ENOMEM;
+	return 0;
+}
 
-	skb_reset_network_header(skb);
-	__skb_push(skb, skb->data - skb_mac_header(skb));
+/**
+ * key_extract_l3l4 - extracts L3/L4 header information.
+ * @skb: sk_buff that contains the frame, with skb->data pointing to the
+ *       L3 header
+ * @key: output flow key
+ *
+ * Return: %0 if successful, otherwise a negative errno value.
+ */
+static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	int error;
 
 	/* Network layer. */
 	if (key->eth.type == htons(ETH_P_IP)) {
 		struct iphdr *nh;
 		__be16 offset;
 
-		key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
-
 		error = check_iphdr(skb);
 		if (unlikely(error)) {
+			memset(&key->ip, 0, sizeof(key->ip));
+			memset(&key->ipv4, 0, sizeof(key->ipv4));
 			if (error == -EINVAL) {
 				skb->transport_header = skb->network_header;
 				error = 0;
 			}
-			goto out;
+			return error;
 		}
 
 		nh = ip_hdr(skb);
@@ -671,836 +707,410 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 		offset = nh->frag_off & htons(IP_OFFSET);
 		if (offset) {
 			key->ip.frag = OVS_FRAG_TYPE_LATER;
-			goto out;
+			memset(&key->tp, 0, sizeof(key->tp));
+			return 0;
 		}
 		if (nh->frag_off & htons(IP_MF) ||
-			 skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+			skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
 			key->ip.frag = OVS_FRAG_TYPE_FIRST;
+		else
+			key->ip.frag = OVS_FRAG_TYPE_NONE;
 
 		/* Transport layer. */
 		if (key->ip.proto == IPPROTO_TCP) {
-			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
 			if (tcphdr_ok(skb)) {
 				struct tcphdr *tcp = tcp_hdr(skb);
-				key->ipv4.tp.src = tcp->source;
-				key->ipv4.tp.dst = tcp->dest;
+				key->tp.src = tcp->source;
+				key->tp.dst = tcp->dest;
+				key->tp.flags = TCP_FLAGS_BE16(tcp);
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
 			}
+
 		} else if (key->ip.proto == IPPROTO_UDP) {
-			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
 			if (udphdr_ok(skb)) {
 				struct udphdr *udp = udp_hdr(skb);
-				key->ipv4.tp.src = udp->source;
-				key->ipv4.tp.dst = udp->dest;
+				key->tp.src = udp->source;
+				key->tp.dst = udp->dest;
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
+			}
+		} else if (key->ip.proto == IPPROTO_SCTP) {
+			if (sctphdr_ok(skb)) {
+				struct sctphdr *sctp = sctp_hdr(skb);
+				key->tp.src = sctp->source;
+				key->tp.dst = sctp->dest;
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		} else if (key->ip.proto == IPPROTO_ICMP) {
-			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
 			if (icmphdr_ok(skb)) {
 				struct icmphdr *icmp = icmp_hdr(skb);
 				/* The ICMP type and code fields use the 16-bit
 				 * transport port fields, so we need to store
 				 * them in 16-bit network byte order. */
-				key->ipv4.tp.src = htons(icmp->type);
-				key->ipv4.tp.dst = htons(icmp->code);
+				key->tp.src = htons(icmp->type);
+				key->tp.dst = htons(icmp->code);
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		}
 
-	} else if ((key->eth.type == htons(ETH_P_ARP) ||
-		   key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) {
+	} else if (key->eth.type == htons(ETH_P_ARP) ||
+		   key->eth.type == htons(ETH_P_RARP)) {
 		struct arp_eth_header *arp;
+		bool arp_available = arphdr_ok(skb);
 
 		arp = (struct arp_eth_header *)skb_network_header(skb);
 
-		if (arp->ar_hrd == htons(ARPHRD_ETHER)
-				&& arp->ar_pro == htons(ETH_P_IP)
-				&& arp->ar_hln == ETH_ALEN
-				&& arp->ar_pln == 4) {
+		if (arp_available &&
+		    arp->ar_hrd == htons(ARPHRD_ETHER) &&
+		    arp->ar_pro == htons(ETH_P_IP) &&
+		    arp->ar_hln == ETH_ALEN &&
+		    arp->ar_pln == 4) {
 
 			/* We only match on the lower 8 bits of the opcode. */
 			if (ntohs(arp->ar_op) <= 0xff)
 				key->ip.proto = ntohs(arp->ar_op);
+			else
+				key->ip.proto = 0;
+
 			memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
 			memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
-			memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
-			memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
-			key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
+			ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
+			ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
+		} else {
+			memset(&key->ip, 0, sizeof(key->ip));
+			memset(&key->ipv4, 0, sizeof(key->ipv4));
 		}
+	} else if (eth_p_mpls(key->eth.type)) {
+		size_t label_count = 1;
+
+		memset(&key->mpls, 0, sizeof(key->mpls));
+		skb_set_inner_network_header(skb, skb->mac_len);
+		while (1) {
+			__be32 lse;
+
+			error = check_header(skb, skb->mac_len +
+					     label_count * MPLS_HLEN);
+			if (unlikely(error))
+				return 0;
+
+			memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
+
+			if (label_count <= MPLS_LABEL_DEPTH)
+				memcpy(&key->mpls.lse[label_count - 1], &lse,
+				       MPLS_HLEN);
+
+			skb_set_inner_network_header(skb, skb->mac_len +
+						     label_count * MPLS_HLEN);
+			if (lse & htonl(MPLS_LS_S_MASK))
+				break;
+
+			label_count++;
+		}
+		if (label_count > MPLS_LABEL_DEPTH)
+			label_count = MPLS_LABEL_DEPTH;
+
+		key->mpls.num_labels_mask = GENMASK(label_count - 1, 0);
 	} else if (key->eth.type == htons(ETH_P_IPV6)) {
 		int nh_len;             /* IPv6 Header + Extensions */
 
-		nh_len = parse_ipv6hdr(skb, key, &key_len);
+		nh_len = parse_ipv6hdr(skb, key);
 		if (unlikely(nh_len < 0)) {
-			if (nh_len == -EINVAL)
+			switch (nh_len) {
+			case -EINVAL:
+				memset(&key->ip, 0, sizeof(key->ip));
+				memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
+				fallthrough;
+			case -EPROTO:
 				skb->transport_header = skb->network_header;
-			else
+				error = 0;
+				break;
+			default:
 				error = nh_len;
-			goto out;
+			}
+			return error;
 		}
 
-		if (key->ip.frag == OVS_FRAG_TYPE_LATER)
-			goto out;
+		if (key->ip.frag == OVS_FRAG_TYPE_LATER) {
+			memset(&key->tp, 0, sizeof(key->tp));
+			return 0;
+		}
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
 			key->ip.frag = OVS_FRAG_TYPE_FIRST;
 
 		/* Transport layer. */
 		if (key->ip.proto == NEXTHDR_TCP) {
-			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
 			if (tcphdr_ok(skb)) {
 				struct tcphdr *tcp = tcp_hdr(skb);
-				key->ipv6.tp.src = tcp->source;
-				key->ipv6.tp.dst = tcp->dest;
+				key->tp.src = tcp->source;
+				key->tp.dst = tcp->dest;
+				key->tp.flags = TCP_FLAGS_BE16(tcp);
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		} else if (key->ip.proto == NEXTHDR_UDP) {
-			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
 			if (udphdr_ok(skb)) {
 				struct udphdr *udp = udp_hdr(skb);
-				key->ipv6.tp.src = udp->source;
-				key->ipv6.tp.dst = udp->dest;
+				key->tp.src = udp->source;
+				key->tp.dst = udp->dest;
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
+			}
+		} else if (key->ip.proto == NEXTHDR_SCTP) {
+			if (sctphdr_ok(skb)) {
+				struct sctphdr *sctp = sctp_hdr(skb);
+				key->tp.src = sctp->source;
+				key->tp.dst = sctp->dest;
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		} else if (key->ip.proto == NEXTHDR_ICMP) {
-			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
 			if (icmp6hdr_ok(skb)) {
-				error = parse_icmpv6(skb, key, &key_len, nh_len);
-				if (error < 0)
-					goto out;
+				error = parse_icmpv6(skb, key, nh_len);
+				if (error)
+					return error;
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		}
+	} else if (key->eth.type == htons(ETH_P_NSH)) {
+		error = parse_nsh(skb, key);
+		if (error)
+			return error;
 	}
-
-out:
-	*key_lenp = key_len;
-	return error;
-}
-
-static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len)
-{
-	return jhash2((u32 *)((u8 *)key + key_start),
-		      DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0);
-}
-
-static int flow_key_start(struct sw_flow_key *key)
-{
-	if (key->tun_key.ipv4_dst)
-		return 0;
-	else
-		return offsetof(struct sw_flow_key, phy);
-}
-
-struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
-				struct sw_flow_key *key, int key_len)
-{
-	struct sw_flow *flow;
-	struct hlist_head *head;
-	u8 *_key;
-	int key_start;
-	u32 hash;
-
-	key_start = flow_key_start(key);
-	hash = ovs_flow_hash(key, key_start, key_len);
-
-	_key = (u8 *) key + key_start;
-	head = find_bucket(table, hash);
-	hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
-
-		if (flow->hash == hash &&
-		    !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) {
-			return flow;
-		}
-	}
-	return NULL;
-}
-
-void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
-			 struct sw_flow_key *key, int key_len)
-{
-	flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len);
-	memcpy(&flow->key, key, sizeof(flow->key));
-	__flow_tbl_insert(table, flow);
-}
-
-void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
-{
-	BUG_ON(table->count == 0);
-	hlist_del_rcu(&flow->hash_node[table->node_ver]);
-	table->count--;
+	return 0;
 }
 
-/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
-const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
-	[OVS_KEY_ATTR_ENCAP] = -1,
-	[OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
-	[OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
-	[OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
-	[OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
-	[OVS_KEY_ATTR_VLAN] = sizeof(__be16),
-	[OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
-	[OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
-	[OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
-	[OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
-	[OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
-	[OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
-	[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
-	[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
-	[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
-	[OVS_KEY_ATTR_TUNNEL] = -1,
-};
-
-static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
-				  const struct nlattr *a[], u32 *attrs)
+/**
+ * key_extract - extracts a flow key from an Ethernet frame.
+ * @skb: sk_buff that contains the frame, with skb->data pointing to the
+ * Ethernet header
+ * @key: output flow key
+ *
+ * The caller must ensure that skb->len >= ETH_HLEN.
+ *
+ * Initializes @skb header fields as follows:
+ *
+ *    - skb->mac_header: the L2 header.
+ *
+ *    - skb->network_header: just past the L2 header, or just past the
+ *      VLAN header, to the first byte of the L2 payload.
+ *
+ *    - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
+ *      on output, then just past the IP header, if one is present and
+ *      of a correct length, otherwise the same as skb->network_header.
+ *      For other key->eth.type values it is left untouched.
+ *
+ *    - skb->protocol: the type of the data starting at skb->network_header.
+ *      Equals to key->eth.type.
+ *
+ * Return: %0 if successful, otherwise a negative errno value.
+ */
+static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 {
-	const struct ovs_key_icmp *icmp_key;
-	const struct ovs_key_tcp *tcp_key;
-	const struct ovs_key_udp *udp_key;
-
-	switch (swkey->ip.proto) {
-	case IPPROTO_TCP:
-		if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
-			return -EINVAL;
-		*attrs &= ~(1 << OVS_KEY_ATTR_TCP);
-
-		*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
-		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
-		swkey->ipv4.tp.src = tcp_key->tcp_src;
-		swkey->ipv4.tp.dst = tcp_key->tcp_dst;
-		break;
+	struct ethhdr *eth;
 
-	case IPPROTO_UDP:
-		if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
-			return -EINVAL;
-		*attrs &= ~(1 << OVS_KEY_ATTR_UDP);
+	/* Flags are always used as part of stats */
+	key->tp.flags = 0;
 
-		*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
-		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
-		swkey->ipv4.tp.src = udp_key->udp_src;
-		swkey->ipv4.tp.dst = udp_key->udp_dst;
-		break;
+	skb_reset_mac_header(skb);
 
-	case IPPROTO_ICMP:
-		if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP)))
+	/* Link layer. */
+	clear_vlan(key);
+	if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
+		if (unlikely(eth_type_vlan(skb->protocol)))
 			return -EINVAL;
-		*attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
 
-		*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
-		icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
-		swkey->ipv4.tp.src = htons(icmp_key->icmp_type);
-		swkey->ipv4.tp.dst = htons(icmp_key->icmp_code);
-		break;
-	}
-
-	return 0;
-}
-
-static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
-				  const struct nlattr *a[], u32 *attrs)
-{
-	const struct ovs_key_icmpv6 *icmpv6_key;
-	const struct ovs_key_tcp *tcp_key;
-	const struct ovs_key_udp *udp_key;
+		skb_reset_network_header(skb);
+		key->eth.type = skb->protocol;
+	} else {
+		eth = eth_hdr(skb);
+		ether_addr_copy(key->eth.src, eth->h_source);
+		ether_addr_copy(key->eth.dst, eth->h_dest);
 
-	switch (swkey->ip.proto) {
-	case IPPROTO_TCP:
-		if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
-			return -EINVAL;
-		*attrs &= ~(1 << OVS_KEY_ATTR_TCP);
+		__skb_pull(skb, 2 * ETH_ALEN);
+		/* We are going to push all headers that we pull, so no need to
+		 * update skb->csum here.
+		 */
 
-		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
-		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
-		swkey->ipv6.tp.src = tcp_key->tcp_src;
-		swkey->ipv6.tp.dst = tcp_key->tcp_dst;
-		break;
+		if (unlikely(parse_vlan(skb, key)))
+			return -ENOMEM;
 
-	case IPPROTO_UDP:
-		if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
-			return -EINVAL;
-		*attrs &= ~(1 << OVS_KEY_ATTR_UDP);
+		key->eth.type = parse_ethertype(skb);
+		if (unlikely(key->eth.type == htons(0)))
+			return -ENOMEM;
 
-		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
-		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
-		swkey->ipv6.tp.src = udp_key->udp_src;
-		swkey->ipv6.tp.dst = udp_key->udp_dst;
-		break;
+		/* Multiple tagged packets need to retain TPID to satisfy
+		 * skb_vlan_pop(), which will later shift the ethertype into
+		 * skb->protocol.
+		 */
+		if (key->eth.cvlan.tci & htons(VLAN_CFI_MASK))
+			skb->protocol = key->eth.cvlan.tpid;
+		else
+			skb->protocol = key->eth.type;
 
-	case IPPROTO_ICMPV6:
-		if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6)))
-			return -EINVAL;
-		*attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
-
-		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
-		icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
-		swkey->ipv6.tp.src = htons(icmpv6_key->icmpv6_type);
-		swkey->ipv6.tp.dst = htons(icmpv6_key->icmpv6_code);
-
-		if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
-		    swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
-			const struct ovs_key_nd *nd_key;
-
-			if (!(*attrs & (1 << OVS_KEY_ATTR_ND)))
-				return -EINVAL;
-			*attrs &= ~(1 << OVS_KEY_ATTR_ND);
-
-			*key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
-			nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
-			memcpy(&swkey->ipv6.nd.target, nd_key->nd_target,
-			       sizeof(swkey->ipv6.nd.target));
-			memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN);
-			memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN);
-		}
-		break;
+		skb_reset_network_header(skb);
+		__skb_push(skb, skb->data - skb_mac_header(skb));
 	}
 
-	return 0;
+	skb_reset_mac_len(skb);
+
+	/* Fill out L3/L4 key info, if any */
+	return key_extract_l3l4(skb, key);
 }
 
-static int parse_flow_nlattrs(const struct nlattr *attr,
-			      const struct nlattr *a[], u32 *attrsp)
+/* In the case of conntrack fragment handling it expects L3 headers,
+ * add a helper.
+ */
+int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
 {
-	const struct nlattr *nla;
-	u32 attrs;
-	int rem;
-
-	attrs = 0;
-	nla_for_each_nested(nla, attr, rem) {
-		u16 type = nla_type(nla);
-		int expected_len;
-
-		if (type > OVS_KEY_ATTR_MAX || attrs & (1 << type))
-			return -EINVAL;
-
-		expected_len = ovs_key_lens[type];
-		if (nla_len(nla) != expected_len && expected_len != -1)
-			return -EINVAL;
-
-		attrs |= 1 << type;
-		a[type] = nla;
-	}
-	if (rem)
-		return -EINVAL;
-
-	*attrsp = attrs;
-	return 0;
+	return key_extract_l3l4(skb, key);
 }
 
-int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
-			     struct ovs_key_ipv4_tunnel *tun_key)
+int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
 {
-	struct nlattr *a;
-	int rem;
-	bool ttl = false;
-
-	memset(tun_key, 0, sizeof(*tun_key));
-
-	nla_for_each_nested(a, attr, rem) {
-		int type = nla_type(a);
-		static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
-			[OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
-			[OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
-			[OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
-			[OVS_TUNNEL_KEY_ATTR_TOS] = 1,
-			[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
-			[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
-			[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
-		};
-
-		if (type > OVS_TUNNEL_KEY_ATTR_MAX ||
-			ovs_tunnel_key_lens[type] != nla_len(a))
-			return -EINVAL;
-
-		switch (type) {
-		case OVS_TUNNEL_KEY_ATTR_ID:
-			tun_key->tun_id = nla_get_be64(a);
-			tun_key->tun_flags |= TUNNEL_KEY;
-			break;
-		case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
-			tun_key->ipv4_src = nla_get_be32(a);
-			break;
-		case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
-			tun_key->ipv4_dst = nla_get_be32(a);
-			break;
-		case OVS_TUNNEL_KEY_ATTR_TOS:
-			tun_key->ipv4_tos = nla_get_u8(a);
-			break;
-		case OVS_TUNNEL_KEY_ATTR_TTL:
-			tun_key->ipv4_ttl = nla_get_u8(a);
-			ttl = true;
-			break;
-		case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
-			tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT;
-			break;
-		case OVS_TUNNEL_KEY_ATTR_CSUM:
-			tun_key->tun_flags |= TUNNEL_CSUM;
-			break;
-		default:
-			return -EINVAL;
+	int res;
 
-		}
-	}
-	if (rem > 0)
-		return -EINVAL;
-
-	if (!tun_key->ipv4_dst)
-		return -EINVAL;
+	res = key_extract(skb, key);
+	if (!res)
+		key->mac_proto &= ~SW_FLOW_KEY_INVALID;
 
-	if (!ttl)
-		return -EINVAL;
-
-	return 0;
+	return res;
 }
 
-int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
-			   const struct ovs_key_ipv4_tunnel *tun_key)
+static int key_extract_mac_proto(struct sk_buff *skb)
 {
-	struct nlattr *nla;
-
-	nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
-	if (!nla)
-		return -EMSGSIZE;
-
-	if (tun_key->tun_flags & TUNNEL_KEY &&
-	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id))
-		return -EMSGSIZE;
-	if (tun_key->ipv4_src &&
-	    nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ipv4_src))
-		return -EMSGSIZE;
-	if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ipv4_dst))
-		return -EMSGSIZE;
-	if (tun_key->ipv4_tos &&
-	    nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ipv4_tos))
-		return -EMSGSIZE;
-	if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl))
-		return -EMSGSIZE;
-	if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
-		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
-		return -EMSGSIZE;
-	if ((tun_key->tun_flags & TUNNEL_CSUM) &&
-		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
-		return -EMSGSIZE;
-
-	nla_nest_end(skb, nla);
-	return 0;
+	switch (skb->dev->type) {
+	case ARPHRD_ETHER:
+		return MAC_PROTO_ETHERNET;
+	case ARPHRD_NONE:
+		if (skb->protocol == htons(ETH_P_TEB))
+			return MAC_PROTO_ETHERNET;
+		return MAC_PROTO_NONE;
+	}
+	WARN_ON_ONCE(1);
+	return -EINVAL;
 }
 
-/**
- * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key.
- * @swkey: receives the extracted flow key.
- * @key_lenp: number of bytes used in @swkey.
- * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
- * sequence.
- */
-int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
-		      const struct nlattr *attr)
+int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
+			 struct sk_buff *skb, struct sw_flow_key *key)
 {
-	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
-	const struct ovs_key_ethernet *eth_key;
-	int key_len;
-	u32 attrs;
-	int err;
-
-	memset(swkey, 0, sizeof(struct sw_flow_key));
-	key_len = SW_FLOW_KEY_OFFSET(eth);
-
-	err = parse_flow_nlattrs(attr, a, &attrs);
-	if (err)
-		return err;
-
-	/* Metadata attributes. */
-	if (attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
-		swkey->phy.priority = nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]);
-		attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
-	}
-	if (attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
-		u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
-		if (in_port >= DP_MAX_PORTS)
-			return -EINVAL;
-		swkey->phy.in_port = in_port;
-		attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
-	} else {
-		swkey->phy.in_port = DP_MAX_PORTS;
-	}
-	if (attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
-		swkey->phy.skb_mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
-		attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
-	}
-
-	if (attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
-		err = ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key);
-		if (err)
-			return err;
-
-		attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
-	}
-
-	/* Data attributes. */
-	if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET)))
-		return -EINVAL;
-	attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
-
-	eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
-	memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN);
-	memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN);
-
-	if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) &&
-	    nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) {
-		const struct nlattr *encap;
-		__be16 tci;
-
-		if (attrs != ((1 << OVS_KEY_ATTR_VLAN) |
-			      (1 << OVS_KEY_ATTR_ETHERTYPE) |
-			      (1 << OVS_KEY_ATTR_ENCAP)))
-			return -EINVAL;
-
-		encap = a[OVS_KEY_ATTR_ENCAP];
-		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-		if (tci & htons(VLAN_TAG_PRESENT)) {
-			swkey->eth.tci = tci;
-
-			err = parse_flow_nlattrs(encap, a, &attrs);
-			if (err)
-				return err;
-		} else if (!tci) {
-			/* Corner case for truncated 802.1Q header. */
-			if (nla_len(encap))
-				return -EINVAL;
-
-			swkey->eth.type = htons(ETH_P_8021Q);
-			*key_lenp = key_len;
-			return 0;
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	struct tc_skb_ext *tc_ext;
+#endif
+	bool post_ct = false, post_ct_snat = false, post_ct_dnat = false;
+	int res, err;
+	u16 zone = 0;
+
+	/* Extract metadata from packet. */
+	if (tun_info) {
+		key->tun_proto = ip_tunnel_info_af(tun_info);
+		memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
+
+		if (tun_info->options_len) {
+			BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
+						   8)) - 1
+					> sizeof(key->tun_opts));
+
+			ip_tunnel_info_opts_get(TUN_METADATA_OPTS(key, tun_info->options_len),
+						tun_info);
+			key->tun_opts_len = tun_info->options_len;
 		} else {
-			return -EINVAL;
+			key->tun_opts_len = 0;
 		}
+	} else  {
+		key->tun_proto = 0;
+		key->tun_opts_len = 0;
+		memset(&key->tun_key, 0, sizeof(key->tun_key));
 	}
 
-	if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
-		swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-		if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN)
-			return -EINVAL;
-		attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+	key->phy.priority = skb->priority;
+	key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
+	key->phy.skb_mark = skb->mark;
+	key->ovs_flow_hash = 0;
+	res = key_extract_mac_proto(skb);
+	if (res < 0)
+		return res;
+	key->mac_proto = res;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	if (tc_skb_ext_tc_enabled()) {
+		tc_ext = skb_ext_find(skb, TC_SKB_EXT);
+		key->recirc_id = tc_ext && !tc_ext->act_miss ?
+				 tc_ext->chain : 0;
+		OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
+		post_ct = tc_ext ? tc_ext->post_ct : false;
+		post_ct_snat = post_ct ? tc_ext->post_ct_snat : false;
+		post_ct_dnat = post_ct ? tc_ext->post_ct_dnat : false;
+		zone = post_ct ? tc_ext->zone : 0;
 	} else {
-		swkey->eth.type = htons(ETH_P_802_2);
-	}
-
-	if (swkey->eth.type == htons(ETH_P_IP)) {
-		const struct ovs_key_ipv4 *ipv4_key;
-
-		if (!(attrs & (1 << OVS_KEY_ATTR_IPV4)))
-			return -EINVAL;
-		attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
-
-		key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
-		ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
-		if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX)
-			return -EINVAL;
-		swkey->ip.proto = ipv4_key->ipv4_proto;
-		swkey->ip.tos = ipv4_key->ipv4_tos;
-		swkey->ip.ttl = ipv4_key->ipv4_ttl;
-		swkey->ip.frag = ipv4_key->ipv4_frag;
-		swkey->ipv4.addr.src = ipv4_key->ipv4_src;
-		swkey->ipv4.addr.dst = ipv4_key->ipv4_dst;
-
-		if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
-			err = ipv4_flow_from_nlattrs(swkey, &key_len, a, &attrs);
-			if (err)
-				return err;
-		}
-	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
-		const struct ovs_key_ipv6 *ipv6_key;
-
-		if (!(attrs & (1 << OVS_KEY_ATTR_IPV6)))
-			return -EINVAL;
-		attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
-
-		key_len = SW_FLOW_KEY_OFFSET(ipv6.label);
-		ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
-		if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX)
-			return -EINVAL;
-		swkey->ipv6.label = ipv6_key->ipv6_label;
-		swkey->ip.proto = ipv6_key->ipv6_proto;
-		swkey->ip.tos = ipv6_key->ipv6_tclass;
-		swkey->ip.ttl = ipv6_key->ipv6_hlimit;
-		swkey->ip.frag = ipv6_key->ipv6_frag;
-		memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src,
-		       sizeof(swkey->ipv6.addr.src));
-		memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst,
-		       sizeof(swkey->ipv6.addr.dst));
-
-		if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
-			err = ipv6_flow_from_nlattrs(swkey, &key_len, a, &attrs);
-			if (err)
-				return err;
-		}
-	} else if (swkey->eth.type == htons(ETH_P_ARP) ||
-		   swkey->eth.type == htons(ETH_P_RARP)) {
-		const struct ovs_key_arp *arp_key;
-
-		if (!(attrs & (1 << OVS_KEY_ATTR_ARP)))
-			return -EINVAL;
-		attrs &= ~(1 << OVS_KEY_ATTR_ARP);
-
-		key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
-		arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
-		swkey->ipv4.addr.src = arp_key->arp_sip;
-		swkey->ipv4.addr.dst = arp_key->arp_tip;
-		if (arp_key->arp_op & htons(0xff00))
-			return -EINVAL;
-		swkey->ip.proto = ntohs(arp_key->arp_op);
-		memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
-		memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
+		key->recirc_id = 0;
 	}
-
-	if (attrs)
-		return -EINVAL;
-	*key_lenp = key_len;
-
-	return 0;
-}
-
-/**
- * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
- * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
- * @key_len: Length of key in @flow.  Used for calculating flow hash.
- * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
- * sequence.
- *
- * This parses a series of Netlink attributes that form a flow key, which must
- * take the same form accepted by flow_from_nlattrs(), but only enough of it to
- * get the metadata, that is, the parts of the flow key that cannot be
- * extracted from the packet itself.
- */
-int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len,
-				   const struct nlattr *attr)
-{
-	struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
-	const struct nlattr *nla;
-	int rem;
-
-	flow->key.phy.in_port = DP_MAX_PORTS;
-	flow->key.phy.priority = 0;
-	flow->key.phy.skb_mark = 0;
-	memset(tun_key, 0, sizeof(flow->key.tun_key));
-
-	nla_for_each_nested(nla, attr, rem) {
-		int type = nla_type(nla);
-
-		if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) {
-			int err;
-
-			if (nla_len(nla) != ovs_key_lens[type])
-				return -EINVAL;
-
-			switch (type) {
-			case OVS_KEY_ATTR_PRIORITY:
-				flow->key.phy.priority = nla_get_u32(nla);
-				break;
-
-			case OVS_KEY_ATTR_TUNNEL:
-				err = ovs_ipv4_tun_from_nlattr(nla, tun_key);
-				if (err)
-					return err;
-				break;
-
-			case OVS_KEY_ATTR_IN_PORT:
-				if (nla_get_u32(nla) >= DP_MAX_PORTS)
-					return -EINVAL;
-				flow->key.phy.in_port = nla_get_u32(nla);
-				break;
-
-			case OVS_KEY_ATTR_SKB_MARK:
-				flow->key.phy.skb_mark = nla_get_u32(nla);
-				break;
+#else
+	key->recirc_id = 0;
+#endif
+
+	err = key_extract(skb, key);
+	if (!err) {
+		ovs_ct_fill_key(skb, key, post_ct);   /* Must be after key_extract(). */
+		if (post_ct) {
+			if (!skb_get_nfct(skb)) {
+				key->ct_zone = zone;
+			} else {
+				if (!post_ct_dnat)
+					key->ct_state &= ~OVS_CS_F_DST_NAT;
+				if (!post_ct_snat)
+					key->ct_state &= ~OVS_CS_F_SRC_NAT;
 			}
 		}
 	}
-	if (rem)
-		return -EINVAL;
-
-	flow->hash = ovs_flow_hash(&flow->key,
-				   flow_key_start(&flow->key), key_len);
-
-	return 0;
+	return err;
 }
 
-int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
+int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
+				   struct sk_buff *skb,
+				   struct sw_flow_key *key, bool log)
 {
-	struct ovs_key_ethernet *eth_key;
-	struct nlattr *nla, *encap;
-
-	if (swkey->phy.priority &&
-	    nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
-		goto nla_put_failure;
-
-	if (swkey->tun_key.ipv4_dst &&
-	    ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key))
-		goto nla_put_failure;
-
-	if (swkey->phy.in_port != DP_MAX_PORTS &&
-	    nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port))
-		goto nla_put_failure;
-
-	if (swkey->phy.skb_mark &&
-	    nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark))
-		goto nla_put_failure;
-
-	nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
-	if (!nla)
-		goto nla_put_failure;
-	eth_key = nla_data(nla);
-	memcpy(eth_key->eth_src, swkey->eth.src, ETH_ALEN);
-	memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN);
-
-	if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
-		if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q)) ||
-		    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci))
-			goto nla_put_failure;
-		encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
-		if (!swkey->eth.tci)
-			goto unencap;
-	} else {
-		encap = NULL;
-	}
-
-	if (swkey->eth.type == htons(ETH_P_802_2))
-		goto unencap;
-
-	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type))
-		goto nla_put_failure;
-
-	if (swkey->eth.type == htons(ETH_P_IP)) {
-		struct ovs_key_ipv4 *ipv4_key;
-
-		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
-		if (!nla)
-			goto nla_put_failure;
-		ipv4_key = nla_data(nla);
-		ipv4_key->ipv4_src = swkey->ipv4.addr.src;
-		ipv4_key->ipv4_dst = swkey->ipv4.addr.dst;
-		ipv4_key->ipv4_proto = swkey->ip.proto;
-		ipv4_key->ipv4_tos = swkey->ip.tos;
-		ipv4_key->ipv4_ttl = swkey->ip.ttl;
-		ipv4_key->ipv4_frag = swkey->ip.frag;
-	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
-		struct ovs_key_ipv6 *ipv6_key;
-
-		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
-		if (!nla)
-			goto nla_put_failure;
-		ipv6_key = nla_data(nla);
-		memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src,
-				sizeof(ipv6_key->ipv6_src));
-		memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst,
-				sizeof(ipv6_key->ipv6_dst));
-		ipv6_key->ipv6_label = swkey->ipv6.label;
-		ipv6_key->ipv6_proto = swkey->ip.proto;
-		ipv6_key->ipv6_tclass = swkey->ip.tos;
-		ipv6_key->ipv6_hlimit = swkey->ip.ttl;
-		ipv6_key->ipv6_frag = swkey->ip.frag;
-	} else if (swkey->eth.type == htons(ETH_P_ARP) ||
-		   swkey->eth.type == htons(ETH_P_RARP)) {
-		struct ovs_key_arp *arp_key;
-
-		nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
-		if (!nla)
-			goto nla_put_failure;
-		arp_key = nla_data(nla);
-		memset(arp_key, 0, sizeof(struct ovs_key_arp));
-		arp_key->arp_sip = swkey->ipv4.addr.src;
-		arp_key->arp_tip = swkey->ipv4.addr.dst;
-		arp_key->arp_op = htons(swkey->ip.proto);
-		memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN);
-		memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
-	}
+	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+	u64 attrs = 0;
+	int err;
 
-	if ((swkey->eth.type == htons(ETH_P_IP) ||
-	     swkey->eth.type == htons(ETH_P_IPV6)) &&
-	     swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
-
-		if (swkey->ip.proto == IPPROTO_TCP) {
-			struct ovs_key_tcp *tcp_key;
-
-			nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
-			if (!nla)
-				goto nla_put_failure;
-			tcp_key = nla_data(nla);
-			if (swkey->eth.type == htons(ETH_P_IP)) {
-				tcp_key->tcp_src = swkey->ipv4.tp.src;
-				tcp_key->tcp_dst = swkey->ipv4.tp.dst;
-			} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
-				tcp_key->tcp_src = swkey->ipv6.tp.src;
-				tcp_key->tcp_dst = swkey->ipv6.tp.dst;
-			}
-		} else if (swkey->ip.proto == IPPROTO_UDP) {
-			struct ovs_key_udp *udp_key;
-
-			nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
-			if (!nla)
-				goto nla_put_failure;
-			udp_key = nla_data(nla);
-			if (swkey->eth.type == htons(ETH_P_IP)) {
-				udp_key->udp_src = swkey->ipv4.tp.src;
-				udp_key->udp_dst = swkey->ipv4.tp.dst;
-			} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
-				udp_key->udp_src = swkey->ipv6.tp.src;
-				udp_key->udp_dst = swkey->ipv6.tp.dst;
-			}
-		} else if (swkey->eth.type == htons(ETH_P_IP) &&
-			   swkey->ip.proto == IPPROTO_ICMP) {
-			struct ovs_key_icmp *icmp_key;
-
-			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
-			if (!nla)
-				goto nla_put_failure;
-			icmp_key = nla_data(nla);
-			icmp_key->icmp_type = ntohs(swkey->ipv4.tp.src);
-			icmp_key->icmp_code = ntohs(swkey->ipv4.tp.dst);
-		} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
-			   swkey->ip.proto == IPPROTO_ICMPV6) {
-			struct ovs_key_icmpv6 *icmpv6_key;
-
-			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
-						sizeof(*icmpv6_key));
-			if (!nla)
-				goto nla_put_failure;
-			icmpv6_key = nla_data(nla);
-			icmpv6_key->icmpv6_type = ntohs(swkey->ipv6.tp.src);
-			icmpv6_key->icmpv6_code = ntohs(swkey->ipv6.tp.dst);
-
-			if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
-			    icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
-				struct ovs_key_nd *nd_key;
-
-				nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
-				if (!nla)
-					goto nla_put_failure;
-				nd_key = nla_data(nla);
-				memcpy(nd_key->nd_target, &swkey->ipv6.nd.target,
-							sizeof(nd_key->nd_target));
-				memcpy(nd_key->nd_sll, swkey->ipv6.nd.sll, ETH_ALEN);
-				memcpy(nd_key->nd_tll, swkey->ipv6.nd.tll, ETH_ALEN);
-			}
-		}
-	}
+	err = parse_flow_nlattrs(attr, a, &attrs, log);
+	if (err)
+		return -EINVAL;
 
-unencap:
-	if (encap)
-		nla_nest_end(skb, encap);
+	/* Extract metadata from netlink attributes. */
+	err = ovs_nla_get_flow_metadata(net, a, attrs, key, log);
+	if (err)
+		return err;
 
-	return 0;
+	/* key_extract assumes that skb->protocol is set-up for
+	 * layer 3 packets which is the case for other callers,
+	 * in particular packets received from the network stack.
+	 * Here the correct value can be set from the metadata
+	 * extracted above.
+	 * For L2 packet key eth type would be zero. skb protocol
+	 * would be set to correct value later during key-extact.
+	 */
 
-nla_put_failure:
-	return -EMSGSIZE;
-}
+	skb->protocol = key->eth.type;
+	err = key_extract(skb, key);
+	if (err)
+		return err;
 
-/* Initializes the flow module.
- * Returns zero if successful or a negative error code. */
-int ovs_flow_init(void)
-{
-	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
-					0, NULL);
-	if (flow_cache == NULL)
-		return -ENOMEM;
+	/* Check that we have conntrack original direction tuple metadata only
+	 * for packets for which it makes sense.  Otherwise the key may be
+	 * corrupted due to overlapping key fields.
+	 */
+	if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) &&
+	    key->eth.type != htons(ETH_P_IP))
+		return -EINVAL;
+	if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) &&
+	    (key->eth.type != htons(ETH_P_IPV6) ||
+	     sw_flow_key_is_nd(key)))
+		return -EINVAL;
 
 	return 0;
 }
-
-/* Uninitializes the flow module. */
-void ovs_flow_exit(void)
-{
-	kmem_cache_destroy(flow_cache);
-}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 66ef7220293e..b5711aff6e76 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -1,24 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2007-2011 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
+ * Copyright (c) 2007-2017 Nicira, Inc.
  */
 
 #ifndef FLOW_H
 #define FLOW_H 1
 
+#include <linux/cache.h>
 #include <linux/kernel.h>
 #include <linux/netlink.h>
 #include <linux/openvswitch.h>
@@ -29,66 +17,100 @@
 #include <linux/in6.h>
 #include <linux/jiffies.h>
 #include <linux/time.h>
-#include <linux/flex_array.h>
+#include <linux/cpumask.h>
 #include <net/inet_ecn.h>
+#include <net/ip_tunnels.h>
+#include <net/dst_metadata.h>
+#include <net/nsh.h>
 
 struct sk_buff;
 
-struct sw_flow_actions {
-	struct rcu_head rcu;
-	u32 actions_len;
-	struct nlattr actions[];
+enum sw_flow_mac_proto {
+	MAC_PROTO_NONE = 0,
+	MAC_PROTO_ETHERNET,
 };
+#define SW_FLOW_KEY_INVALID	0x80
+#define MPLS_LABEL_DEPTH       3
 
-/* Used to memset ovs_key_ipv4_tunnel padding. */
-#define OVS_TUNNEL_KEY_SIZE					\
-	(offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) +	\
-	FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl))
-
-struct ovs_key_ipv4_tunnel {
-	__be64 tun_id;
-	__be32 ipv4_src;
-	__be32 ipv4_dst;
-	__be16 tun_flags;
-	u8   ipv4_tos;
-	u8   ipv4_ttl;
+/* Bit definitions for IPv6 Extension Header pseudo-field. */
+enum ofp12_ipv6exthdr_flags {
+	OFPIEH12_NONEXT = 1 << 0,   /* "No next header" encountered. */
+	OFPIEH12_ESP    = 1 << 1,   /* Encrypted Sec Payload header present. */
+	OFPIEH12_AUTH   = 1 << 2,   /* Authentication header present. */
+	OFPIEH12_DEST   = 1 << 3,   /* 1 or 2 dest headers present. */
+	OFPIEH12_FRAG   = 1 << 4,   /* Fragment header present. */
+	OFPIEH12_ROUTER = 1 << 5,   /* Router header present. */
+	OFPIEH12_HOP    = 1 << 6,   /* Hop-by-hop header present. */
+	OFPIEH12_UNREP  = 1 << 7,   /* Unexpected repeats encountered. */
+	OFPIEH12_UNSEQ  = 1 << 8    /* Unexpected sequencing encountered. */
 };
 
-static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
-					 const struct iphdr *iph, __be64 tun_id,
-					 __be16 tun_flags)
-{
-	tun_key->tun_id = tun_id;
-	tun_key->ipv4_src = iph->saddr;
-	tun_key->ipv4_dst = iph->daddr;
-	tun_key->ipv4_tos = iph->tos;
-	tun_key->ipv4_ttl = iph->ttl;
-	tun_key->tun_flags = tun_flags;
-
-	/* clear struct padding. */
-	memset((unsigned char *) tun_key + OVS_TUNNEL_KEY_SIZE, 0,
-	       sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE);
-}
+/* Store options at the end of the array if they are less than the
+ * maximum size. This allows us to get the benefits of variable length
+ * matching for small options.
+ */
+#define TUN_METADATA_OFFSET(opt_len) \
+	(sizeof_field(struct sw_flow_key, tun_opts) - opt_len)
+#define TUN_METADATA_OPTS(flow_key, opt_len) \
+	((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
+
+struct ovs_tunnel_info {
+	struct metadata_dst	*tun_dst;
+};
+
+struct vlan_head {
+	__be16 tpid; /* Vlan type. Generally 802.1q or 802.1ad.*/
+	__be16 tci;  /* 0 if no VLAN, VLAN_CFI_MASK set otherwise. */
+};
+
+#define OVS_SW_FLOW_KEY_METADATA_SIZE			\
+	(offsetof(struct sw_flow_key, recirc_id) +	\
+	sizeof_field(struct sw_flow_key, recirc_id))
+
+struct ovs_key_nsh {
+	struct ovs_nsh_key_base base;
+	__be32 context[NSH_MD1_CONTEXT_SIZE];
+};
 
 struct sw_flow_key {
-	struct ovs_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
+	u8 tun_opts[IP_TUNNEL_OPTS_MAX];
+	u8 tun_opts_len;
+	struct ip_tunnel_key tun_key;	/* Encapsulating tunnel key. */
 	struct {
 		u32	priority;	/* Packet QoS priority. */
 		u32	skb_mark;	/* SKB mark. */
 		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
-	} phy;
+	} __packed phy; /* Safe when right after 'tun_key'. */
+	u8 mac_proto;			/* MAC layer protocol (e.g. Ethernet). */
+	u8 tun_proto;			/* Protocol of encapsulating tunnel. */
+	u32 ovs_flow_hash;		/* Datapath computed hash value.  */
+	u32 recirc_id;			/* Recirculation ID.  */
 	struct {
 		u8     src[ETH_ALEN];	/* Ethernet source address. */
 		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
-		__be16 tci;		/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+		struct vlan_head vlan;
+		struct vlan_head cvlan;
 		__be16 type;		/* Ethernet frame type. */
 	} eth;
+	/* Filling a hole of two bytes. */
+	u8 ct_state;
+	u8 ct_orig_proto;		/* CT original direction tuple IP
+					 * protocol.
+					 */
+	union {
+		struct {
+			u8     proto;	/* IP protocol or lower 8 bits of ARP opcode. */
+			u8     tos;	    /* IP ToS. */
+			u8     ttl;	    /* IP TTL/hop limit. */
+			u8     frag;	/* One of OVS_FRAG_TYPE_*. */
+		} ip;
+	};
+	u16 ct_zone;			/* Conntrack zone. */
 	struct {
-		u8     proto;		/* IP protocol or lower 8 bits of ARP opcode. */
-		u8     tos;		/* IP ToS. */
-		u8     ttl;		/* IP TTL/hop limit. */
-		u8     frag;		/* One of OVS_FRAG_TYPE_*. */
-	} ip;
+		__be16 src;		/* TCP/UDP/SCTP source port. */
+		__be16 dst;		/* TCP/UDP/SCTP destination port. */
+		__be16 flags;		/* TCP flags. */
+	} tp;
 	union {
 		struct {
 			struct {
@@ -97,9 +119,9 @@ struct sw_flow_key {
 			} addr;
 			union {
 				struct {
-					__be16 src;		/* TCP/UDP source port. */
-					__be16 dst;		/* TCP/UDP destination port. */
-				} tp;
+					__be32 src;
+					__be32 dst;
+				} ct_orig;	/* Conntrack original direction fields. */
 				struct {
 					u8 sha[ETH_ALEN];	/* ARP source hardware address. */
 					u8 tha[ETH_ALEN];	/* ARP target hardware address. */
@@ -112,32 +134,109 @@ struct sw_flow_key {
 				struct in6_addr dst;	/* IPv6 destination address. */
 			} addr;
 			__be32 label;			/* IPv6 flow label. */
-			struct {
-				__be16 src;		/* TCP/UDP source port. */
-				__be16 dst;		/* TCP/UDP destination port. */
-			} tp;
-			struct {
-				struct in6_addr target;	/* ND target address. */
-				u8 sll[ETH_ALEN];	/* ND source link layer address. */
-				u8 tll[ETH_ALEN];	/* ND target link layer address. */
-			} nd;
+			u16 exthdrs;	/* IPv6 extension header flags */
+			union {
+				struct {
+					struct in6_addr src;
+					struct in6_addr dst;
+				} ct_orig;	/* Conntrack original direction fields. */
+				struct {
+					struct in6_addr target;	/* ND target address. */
+					u8 sll[ETH_ALEN];	/* ND source link layer address. */
+					u8 tll[ETH_ALEN];	/* ND target link layer address. */
+				} nd;
+			};
 		} ipv6;
+		struct {
+			u32 num_labels_mask;    /* labels present bitmap of effective length MPLS_LABEL_DEPTH */
+			__be32 lse[MPLS_LABEL_DEPTH];     /* label stack entry  */
+		} mpls;
+
+		struct ovs_key_nsh nsh;         /* network service header */
 	};
+	struct {
+		/* Connection tracking fields not packed above. */
+		struct {
+			__be16 src;	/* CT orig tuple tp src port. */
+			__be16 dst;	/* CT orig tuple tp dst port. */
+		} orig_tp;
+		u32 mark;
+		struct ovs_key_ct_labels labels;
+	} ct;
+
+} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
+
+static inline bool sw_flow_key_is_nd(const struct sw_flow_key *key)
+{
+	return key->eth.type == htons(ETH_P_IPV6) &&
+		key->ip.proto == NEXTHDR_ICMP &&
+		key->tp.dst == 0 &&
+		(key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+		 key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT));
+}
+
+struct sw_flow_key_range {
+	unsigned short int start;
+	unsigned short int end;
 };
 
-struct sw_flow {
+struct sw_flow_mask {
+	int ref_count;
 	struct rcu_head rcu;
-	struct hlist_node hash_node[2];
-	u32 hash;
+	struct sw_flow_key_range range;
+	struct sw_flow_key key;
+};
 
+struct sw_flow_match {
+	struct sw_flow_key *key;
+	struct sw_flow_key_range range;
+	struct sw_flow_mask *mask;
+};
+
+#define MAX_UFID_LENGTH 16 /* 128 bits */
+
+struct sw_flow_id {
+	u32 ufid_len;
+	union {
+		u32 ufid[MAX_UFID_LENGTH / 4];
+		struct sw_flow_key *unmasked_key;
+	};
+};
+
+struct sw_flow_actions {
+	struct rcu_head rcu;
+	size_t orig_len;	/* From flow_cmd_new netlink actions size */
+	u32 actions_len;
+	struct nlattr actions[];
+};
+
+struct sw_flow_stats {
+	u64 packet_count;		/* Number of packets matched. */
+	u64 byte_count;			/* Number of bytes matched. */
+	unsigned long used;		/* Last used time (in jiffies). */
+	spinlock_t lock;		/* Lock for atomic stats update. */
+	__be16 tcp_flags;		/* Union of seen TCP flags. */
+};
+
+struct sw_flow {
+	struct rcu_head rcu;
+	struct {
+		struct hlist_node node[2];
+		u32 hash;
+	} flow_table, ufid_table;
+	int stats_last_writer;		/* CPU id of the last writer on
+					 * 'stats[0]'.
+					 */
 	struct sw_flow_key key;
+	struct sw_flow_id id;
+	struct cpumask *cpu_used_mask;
+	struct sw_flow_mask *mask;
 	struct sw_flow_actions __rcu *sf_acts;
-
-	spinlock_t lock;	/* Lock for values below. */
-	unsigned long used;	/* Last used time (in jiffies). */
-	u64 packet_count;	/* Number of packets matched. */
-	u64 byte_count;		/* Number of bytes matched. */
-	u8 tcp_flags;		/* Union of seen TCP flags. */
+	struct sw_flow_stats __rcu *stats[]; /* One for each CPU.  First one
+					   * is allocated at flow creation time,
+					   * the rest are allocated on demand
+					   * while holding the 'stats[0].lock'.
+					   */
 };
 
 struct arp_eth_header {
@@ -154,65 +253,46 @@ struct arp_eth_header {
 	unsigned char       ar_tip[4];		/* target IP address        */
 } __packed;
 
-int ovs_flow_init(void);
-void ovs_flow_exit(void);
-
-struct sw_flow *ovs_flow_alloc(void);
-void ovs_flow_deferred_free(struct sw_flow *);
-void ovs_flow_free(struct sw_flow *flow);
-
-struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len);
-void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
-
-int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
-		     int *key_lenp);
-void ovs_flow_used(struct sw_flow *, struct sk_buff *);
-u64 ovs_flow_used_time(unsigned long flow_jiffies);
-
-int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
-int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
-		      const struct nlattr *);
-int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len,
-				  const struct nlattr *attr);
+static inline u8 ovs_key_mac_proto(const struct sw_flow_key *key)
+{
+	return key->mac_proto & ~SW_FLOW_KEY_INVALID;
+}
 
-#define MAX_ACTIONS_BUFSIZE    (32 * 1024)
-#define TBL_MIN_BUCKETS		1024
+static inline u16 __ovs_mac_header_len(u8 mac_proto)
+{
+	return mac_proto == MAC_PROTO_ETHERNET ? ETH_HLEN : 0;
+}
 
-struct flow_table {
-	struct flex_array *buckets;
-	unsigned int count, n_buckets;
-	struct rcu_head rcu;
-	int node_ver;
-	u32 hash_seed;
-	bool keep_flows;
-};
+static inline u16 ovs_mac_header_len(const struct sw_flow_key *key)
+{
+	return __ovs_mac_header_len(ovs_key_mac_proto(key));
+}
 
-static inline int ovs_flow_tbl_count(struct flow_table *table)
+static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
 {
-	return table->count;
+	return sfid->ufid_len;
 }
 
-static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table)
+static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid)
 {
-	return (table->count > table->n_buckets);
+	return !ovs_identifier_is_ufid(sfid);
 }
 
-struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
-				    struct sw_flow_key *key, int len);
-void ovs_flow_tbl_destroy(struct flow_table *table);
-void ovs_flow_tbl_deferred_destroy(struct flow_table *table);
-struct flow_table *ovs_flow_tbl_alloc(int new_size);
-struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
-struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
-void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
-			 struct sw_flow_key *key, int key_len);
-void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
-
-struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx);
-extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
-int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
-			 struct ovs_key_ipv4_tunnel *tun_key);
-int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
-			const struct ovs_key_ipv4_tunnel *tun_key);
+void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
+			   const struct sk_buff *);
+void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
+			unsigned long *used, __be16 *tcp_flags);
+void ovs_flow_stats_clear(struct sw_flow *);
+u64 ovs_flow_used_time(unsigned long flow_jiffies);
+
+int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
+int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key);
+int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
+			 struct sk_buff *skb,
+			 struct sw_flow_key *key);
+/* Extract key from packet coming from userspace. */
+int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
+				   struct sk_buff *skb,
+				   struct sw_flow_key *key, bool log);
 
 #endif /* flow.h */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
new file mode 100644
index 000000000000..1cb4f97335d8
--- /dev/null
+++ b/net/openvswitch/flow_netlink.c
@@ -0,0 +1,3794 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2007-2017 Nicira, Inc.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "flow.h"
+#include "datapath.h"
+#include <linux/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/llc_pdu.h>
+#include <linux/kernel.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/llc.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/rcupdate.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/sctp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/rculist.h>
+#include <net/geneve.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/mpls.h>
+#include <net/vxlan.h>
+#include <net/tun_proto.h>
+#include <net/erspan.h>
+
+#include "drop.h"
+#include "flow_netlink.h"
+
+struct ovs_len_tbl {
+	int len;
+	const struct ovs_len_tbl *next;
+};
+
+#define OVS_ATTR_NESTED -1
+#define OVS_ATTR_VARIABLE -2
+#define OVS_COPY_ACTIONS_MAX_DEPTH 16
+
+static bool actions_may_change_flow(const struct nlattr *actions)
+{
+	struct nlattr *nla;
+	int rem;
+
+	nla_for_each_nested(nla, actions, rem) {
+		u16 action = nla_type(nla);
+
+		switch (action) {
+		case OVS_ACTION_ATTR_OUTPUT:
+		case OVS_ACTION_ATTR_RECIRC:
+		case OVS_ACTION_ATTR_TRUNC:
+		case OVS_ACTION_ATTR_USERSPACE:
+		case OVS_ACTION_ATTR_DROP:
+		case OVS_ACTION_ATTR_PSAMPLE:
+			break;
+
+		case OVS_ACTION_ATTR_CT:
+		case OVS_ACTION_ATTR_CT_CLEAR:
+		case OVS_ACTION_ATTR_HASH:
+		case OVS_ACTION_ATTR_POP_ETH:
+		case OVS_ACTION_ATTR_POP_MPLS:
+		case OVS_ACTION_ATTR_POP_NSH:
+		case OVS_ACTION_ATTR_POP_VLAN:
+		case OVS_ACTION_ATTR_PUSH_ETH:
+		case OVS_ACTION_ATTR_PUSH_MPLS:
+		case OVS_ACTION_ATTR_PUSH_NSH:
+		case OVS_ACTION_ATTR_PUSH_VLAN:
+		case OVS_ACTION_ATTR_SAMPLE:
+		case OVS_ACTION_ATTR_SET:
+		case OVS_ACTION_ATTR_SET_MASKED:
+		case OVS_ACTION_ATTR_METER:
+		case OVS_ACTION_ATTR_CHECK_PKT_LEN:
+		case OVS_ACTION_ATTR_ADD_MPLS:
+		case OVS_ACTION_ATTR_DEC_TTL:
+		default:
+			return true;
+		}
+	}
+	return false;
+}
+
+static void update_range(struct sw_flow_match *match,
+			 size_t offset, size_t size, bool is_mask)
+{
+	struct sw_flow_key_range *range;
+	size_t start = rounddown(offset, sizeof(long));
+	size_t end = roundup(offset + size, sizeof(long));
+
+	if (!is_mask)
+		range = &match->range;
+	else
+		range = &match->mask->range;
+
+	if (range->start == range->end) {
+		range->start = start;
+		range->end = end;
+		return;
+	}
+
+	if (range->start > start)
+		range->start = start;
+
+	if (range->end < end)
+		range->end = end;
+}
+
+#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
+	do { \
+		update_range(match, offsetof(struct sw_flow_key, field),    \
+			     sizeof((match)->key->field), is_mask);	    \
+		if (is_mask)						    \
+			(match)->mask->key.field = value;		    \
+		else							    \
+			(match)->key->field = value;		            \
+	} while (0)
+
+#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)	    \
+	do {								    \
+		update_range(match, offset, len, is_mask);		    \
+		if (is_mask)						    \
+			memcpy((u8 *)&(match)->mask->key + offset, value_p, \
+			       len);					   \
+		else							    \
+			memcpy((u8 *)(match)->key + offset, value_p, len);  \
+	} while (0)
+
+#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)		      \
+	SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
+				  value_p, len, is_mask)
+
+#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)		    \
+	do {								    \
+		update_range(match, offsetof(struct sw_flow_key, field),    \
+			     sizeof((match)->key->field), is_mask);	    \
+		if (is_mask)						    \
+			memset((u8 *)&(match)->mask->key.field, value,      \
+			       sizeof((match)->mask->key.field));	    \
+		else							    \
+			memset((u8 *)&(match)->key->field, value,           \
+			       sizeof((match)->key->field));                \
+	} while (0)
+
+#define SW_FLOW_KEY_BITMAP_COPY(match, field, value_p, nbits, is_mask) ({     \
+	update_range(match, offsetof(struct sw_flow_key, field),	      \
+		     bitmap_size(nbits), is_mask);			      \
+	bitmap_copy(is_mask ? (match)->mask->key.field : (match)->key->field, \
+		    value_p, nbits);					      \
+})
+
+static bool match_validate(const struct sw_flow_match *match,
+			   u64 key_attrs, u64 mask_attrs, bool log)
+{
+	u64 key_expected = 0;
+	u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
+
+	/* The following mask attributes allowed only if they
+	 * pass the validation tests. */
+	mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
+			| (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)
+			| (1 << OVS_KEY_ATTR_IPV6)
+			| (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)
+			| (1 << OVS_KEY_ATTR_TCP)
+			| (1 << OVS_KEY_ATTR_TCP_FLAGS)
+			| (1 << OVS_KEY_ATTR_UDP)
+			| (1 << OVS_KEY_ATTR_SCTP)
+			| (1 << OVS_KEY_ATTR_ICMP)
+			| (1 << OVS_KEY_ATTR_ICMPV6)
+			| (1 << OVS_KEY_ATTR_ARP)
+			| (1 << OVS_KEY_ATTR_ND)
+			| (1 << OVS_KEY_ATTR_MPLS)
+			| (1 << OVS_KEY_ATTR_NSH));
+
+	/* Always allowed mask fields. */
+	mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
+		       | (1 << OVS_KEY_ATTR_IN_PORT)
+		       | (1 << OVS_KEY_ATTR_ETHERTYPE));
+
+	/* Check key attributes. */
+	if (match->key->eth.type == htons(ETH_P_ARP)
+			|| match->key->eth.type == htons(ETH_P_RARP)) {
+		key_expected |= 1 << OVS_KEY_ATTR_ARP;
+		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+			mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
+	}
+
+	if (eth_p_mpls(match->key->eth.type)) {
+		key_expected |= 1 << OVS_KEY_ATTR_MPLS;
+		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+			mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
+	}
+
+	if (match->key->eth.type == htons(ETH_P_IP)) {
+		key_expected |= 1 << OVS_KEY_ATTR_IPV4;
+		if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
+			mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
+			mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4;
+		}
+
+		if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+			if (match->key->ip.proto == IPPROTO_UDP) {
+				key_expected |= 1 << OVS_KEY_ATTR_UDP;
+				if (match->mask && (match->mask->key.ip.proto == 0xff))
+					mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
+			}
+
+			if (match->key->ip.proto == IPPROTO_SCTP) {
+				key_expected |= 1 << OVS_KEY_ATTR_SCTP;
+				if (match->mask && (match->mask->key.ip.proto == 0xff))
+					mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
+			}
+
+			if (match->key->ip.proto == IPPROTO_TCP) {
+				key_expected |= 1 << OVS_KEY_ATTR_TCP;
+				key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
+				if (match->mask && (match->mask->key.ip.proto == 0xff)) {
+					mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
+					mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
+				}
+			}
+
+			if (match->key->ip.proto == IPPROTO_ICMP) {
+				key_expected |= 1 << OVS_KEY_ATTR_ICMP;
+				if (match->mask && (match->mask->key.ip.proto == 0xff))
+					mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
+			}
+		}
+	}
+
+	if (match->key->eth.type == htons(ETH_P_IPV6)) {
+		key_expected |= 1 << OVS_KEY_ATTR_IPV6;
+		if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
+			mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
+			mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6;
+		}
+
+		if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+			if (match->key->ip.proto == IPPROTO_UDP) {
+				key_expected |= 1 << OVS_KEY_ATTR_UDP;
+				if (match->mask && (match->mask->key.ip.proto == 0xff))
+					mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
+			}
+
+			if (match->key->ip.proto == IPPROTO_SCTP) {
+				key_expected |= 1 << OVS_KEY_ATTR_SCTP;
+				if (match->mask && (match->mask->key.ip.proto == 0xff))
+					mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
+			}
+
+			if (match->key->ip.proto == IPPROTO_TCP) {
+				key_expected |= 1 << OVS_KEY_ATTR_TCP;
+				key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
+				if (match->mask && (match->mask->key.ip.proto == 0xff)) {
+					mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
+					mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
+				}
+			}
+
+			if (match->key->ip.proto == IPPROTO_ICMPV6) {
+				key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
+				if (match->mask && (match->mask->key.ip.proto == 0xff))
+					mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
+
+				if (match->key->tp.src ==
+						htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+				    match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+					key_expected |= 1 << OVS_KEY_ATTR_ND;
+					/* Original direction conntrack tuple
+					 * uses the same space as the ND fields
+					 * in the key, so both are not allowed
+					 * at the same time.
+					 */
+					mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
+					if (match->mask && (match->mask->key.tp.src == htons(0xff)))
+						mask_allowed |= 1 << OVS_KEY_ATTR_ND;
+				}
+			}
+		}
+	}
+
+	if (match->key->eth.type == htons(ETH_P_NSH)) {
+		key_expected |= 1 << OVS_KEY_ATTR_NSH;
+		if (match->mask &&
+		    match->mask->key.eth.type == htons(0xffff)) {
+			mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
+		}
+	}
+
+	if ((key_attrs & key_expected) != key_expected) {
+		/* Key attributes check failed. */
+		OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
+			  (unsigned long long)key_attrs,
+			  (unsigned long long)key_expected);
+		return false;
+	}
+
+	if ((mask_attrs & mask_allowed) != mask_attrs) {
+		/* Mask attributes check failed. */
+		OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
+			  (unsigned long long)mask_attrs,
+			  (unsigned long long)mask_allowed);
+		return false;
+	}
+
+	return true;
+}
+
+size_t ovs_tun_key_attr_size(void)
+{
+	/* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
+	 * updating this function.
+	 */
+	return    nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */
+		+ nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
+		+ nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
+		+ nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
+		+ nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
+		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
+		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
+		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
+		+ nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+		/* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and
+		 * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with
+		 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
+		 */
+		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
+		+ nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
+}
+
+static size_t ovs_nsh_key_attr_size(void)
+{
+	/* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
+	 * updating this function.
+	 */
+	return  nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
+		/* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
+		 * mutually exclusive, so the bigger one can cover
+		 * the small one.
+		 */
+		+ nla_total_size(NSH_CTX_HDRS_MAX_LEN);
+}
+
+size_t ovs_key_attr_size(void)
+{
+	/* Whenever adding new OVS_KEY_ FIELDS, we should consider
+	 * updating this function.
+	 */
+	BUILD_BUG_ON(OVS_KEY_ATTR_MAX != 32);
+
+	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
+		+ nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
+		  + ovs_tun_key_attr_size()
+		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
+		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
+		+ nla_total_size(4)   /* OVS_KEY_ATTR_DP_HASH */
+		+ nla_total_size(4)   /* OVS_KEY_ATTR_RECIRC_ID */
+		+ nla_total_size(4)   /* OVS_KEY_ATTR_CT_STATE */
+		+ nla_total_size(2)   /* OVS_KEY_ATTR_CT_ZONE */
+		+ nla_total_size(4)   /* OVS_KEY_ATTR_CT_MARK */
+		+ nla_total_size(16)  /* OVS_KEY_ATTR_CT_LABELS */
+		+ nla_total_size(40)  /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
+		+ nla_total_size(0)   /* OVS_KEY_ATTR_NSH */
+		  + ovs_nsh_key_attr_size()
+		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
+		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
+		+ nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
+		+ nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
+		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
+		+ nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
+		+ nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
+		+ nla_total_size(28)  /* OVS_KEY_ATTR_ND */
+		+ nla_total_size(2);  /* OVS_KEY_ATTR_IPV6_EXTHDRS */
+}
+
+static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
+	[OVS_VXLAN_EXT_GBP]	    = { .len = sizeof(u32) },
+};
+
+static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
+	[OVS_TUNNEL_KEY_ATTR_ID]	    = { .len = sizeof(u64) },
+	[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]	    = { .len = sizeof(u32) },
+	[OVS_TUNNEL_KEY_ATTR_IPV4_DST]	    = { .len = sizeof(u32) },
+	[OVS_TUNNEL_KEY_ATTR_TOS]	    = { .len = 1 },
+	[OVS_TUNNEL_KEY_ATTR_TTL]	    = { .len = 1 },
+	[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
+	[OVS_TUNNEL_KEY_ATTR_CSUM]	    = { .len = 0 },
+	[OVS_TUNNEL_KEY_ATTR_TP_SRC]	    = { .len = sizeof(u16) },
+	[OVS_TUNNEL_KEY_ATTR_TP_DST]	    = { .len = sizeof(u16) },
+	[OVS_TUNNEL_KEY_ATTR_OAM]	    = { .len = 0 },
+	[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_VARIABLE },
+	[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED,
+						.next = ovs_vxlan_ext_key_lens },
+	[OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
+	[OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
+	[OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = OVS_ATTR_VARIABLE },
+	[OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE]   = { .len = 0 },
+};
+
+static const struct ovs_len_tbl
+ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
+	[OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
+	[OVS_NSH_KEY_ATTR_MD1]  = { .len = sizeof(struct ovs_nsh_key_md1) },
+	[OVS_NSH_KEY_ATTR_MD2]  = { .len = OVS_ATTR_VARIABLE },
+};
+
+/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
+static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
+	[OVS_KEY_ATTR_ENCAP]	 = { .len = OVS_ATTR_NESTED },
+	[OVS_KEY_ATTR_PRIORITY]	 = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_IN_PORT]	 = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_SKB_MARK]	 = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_ETHERNET]	 = { .len = sizeof(struct ovs_key_ethernet) },
+	[OVS_KEY_ATTR_VLAN]	 = { .len = sizeof(__be16) },
+	[OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
+	[OVS_KEY_ATTR_IPV4]	 = { .len = sizeof(struct ovs_key_ipv4) },
+	[OVS_KEY_ATTR_IPV6]	 = { .len = sizeof(struct ovs_key_ipv6) },
+	[OVS_KEY_ATTR_TCP]	 = { .len = sizeof(struct ovs_key_tcp) },
+	[OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
+	[OVS_KEY_ATTR_UDP]	 = { .len = sizeof(struct ovs_key_udp) },
+	[OVS_KEY_ATTR_SCTP]	 = { .len = sizeof(struct ovs_key_sctp) },
+	[OVS_KEY_ATTR_ICMP]	 = { .len = sizeof(struct ovs_key_icmp) },
+	[OVS_KEY_ATTR_ICMPV6]	 = { .len = sizeof(struct ovs_key_icmpv6) },
+	[OVS_KEY_ATTR_ARP]	 = { .len = sizeof(struct ovs_key_arp) },
+	[OVS_KEY_ATTR_ND]	 = { .len = sizeof(struct ovs_key_nd) },
+	[OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_DP_HASH]	 = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_TUNNEL]	 = { .len = OVS_ATTR_NESTED,
+				     .next = ovs_tunnel_key_lens, },
+	[OVS_KEY_ATTR_MPLS]	 = { .len = OVS_ATTR_VARIABLE },
+	[OVS_KEY_ATTR_CT_STATE]	 = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_CT_ZONE]	 = { .len = sizeof(u16) },
+	[OVS_KEY_ATTR_CT_MARK]	 = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
+	[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = {
+		.len = sizeof(struct ovs_key_ct_tuple_ipv4) },
+	[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
+		.len = sizeof(struct ovs_key_ct_tuple_ipv6) },
+	[OVS_KEY_ATTR_NSH]       = { .len = OVS_ATTR_NESTED,
+				     .next = ovs_nsh_key_attr_lens, },
+	[OVS_KEY_ATTR_IPV6_EXTHDRS] = {
+		.len = sizeof(struct ovs_key_ipv6_exthdrs) },
+};
+
+static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
+{
+	return expected_len == attr_len ||
+	       expected_len == OVS_ATTR_NESTED ||
+	       expected_len == OVS_ATTR_VARIABLE;
+}
+
+static bool is_all_zero(const u8 *fp, size_t size)
+{
+	int i;
+
+	if (!fp)
+		return false;
+
+	for (i = 0; i < size; i++)
+		if (fp[i])
+			return false;
+
+	return true;
+}
+
+static int __parse_flow_nlattrs(const struct nlattr *attr,
+				const struct nlattr *a[],
+				u64 *attrsp, bool log, bool nz)
+{
+	const struct nlattr *nla;
+	u64 attrs;
+	int rem;
+
+	attrs = *attrsp;
+	nla_for_each_nested(nla, attr, rem) {
+		u16 type = nla_type(nla);
+		int expected_len;
+
+		if (type > OVS_KEY_ATTR_MAX) {
+			OVS_NLERR(log, "Key type %d is out of range max %d",
+				  type, OVS_KEY_ATTR_MAX);
+			return -EINVAL;
+		}
+
+		if (type == OVS_KEY_ATTR_PACKET_TYPE ||
+		    type == OVS_KEY_ATTR_ND_EXTENSIONS ||
+		    type == OVS_KEY_ATTR_TUNNEL_INFO) {
+			OVS_NLERR(log, "Key type %d is not supported", type);
+			return -EINVAL;
+		}
+
+		if (attrs & (1ULL << type)) {
+			OVS_NLERR(log, "Duplicate key (type %d).", type);
+			return -EINVAL;
+		}
+
+		expected_len = ovs_key_lens[type].len;
+		if (!check_attr_len(nla_len(nla), expected_len)) {
+			OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
+				  type, nla_len(nla), expected_len);
+			return -EINVAL;
+		}
+
+		if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) {
+			attrs |= 1ULL << type;
+			a[type] = nla;
+		}
+	}
+	if (rem) {
+		OVS_NLERR(log, "Message has %d unknown bytes.", rem);
+		return -EINVAL;
+	}
+
+	*attrsp = attrs;
+	return 0;
+}
+
+static int parse_flow_mask_nlattrs(const struct nlattr *attr,
+				   const struct nlattr *a[], u64 *attrsp,
+				   bool log)
+{
+	return __parse_flow_nlattrs(attr, a, attrsp, log, true);
+}
+
+int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
+		       u64 *attrsp, bool log)
+{
+	return __parse_flow_nlattrs(attr, a, attrsp, log, false);
+}
+
+static int genev_tun_opt_from_nlattr(const struct nlattr *a,
+				     struct sw_flow_match *match, bool is_mask,
+				     bool log)
+{
+	unsigned long opt_key_offset;
+
+	if (nla_len(a) > sizeof(match->key->tun_opts)) {
+		OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
+			  nla_len(a), sizeof(match->key->tun_opts));
+		return -EINVAL;
+	}
+
+	if (nla_len(a) % 4 != 0) {
+		OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
+			  nla_len(a));
+		return -EINVAL;
+	}
+
+	/* We need to record the length of the options passed
+	 * down, otherwise packets with the same format but
+	 * additional options will be silently matched.
+	 */
+	if (!is_mask) {
+		SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
+				false);
+	} else {
+		/* This is somewhat unusual because it looks at
+		 * both the key and mask while parsing the
+		 * attributes (and by extension assumes the key
+		 * is parsed first). Normally, we would verify
+		 * that each is the correct length and that the
+		 * attributes line up in the validate function.
+		 * However, that is difficult because this is
+		 * variable length and we won't have the
+		 * information later.
+		 */
+		if (match->key->tun_opts_len != nla_len(a)) {
+			OVS_NLERR(log, "Geneve option len %d != mask len %d",
+				  match->key->tun_opts_len, nla_len(a));
+			return -EINVAL;
+		}
+
+		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+	}
+
+	opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
+	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
+				  nla_len(a), is_mask);
+	return 0;
+}
+
+static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
+				     struct sw_flow_match *match, bool is_mask,
+				     bool log)
+{
+	struct nlattr *a;
+	int rem;
+	unsigned long opt_key_offset;
+	struct vxlan_metadata opts;
+
+	BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
+
+	memset(&opts, 0, sizeof(opts));
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+
+		if (type > OVS_VXLAN_EXT_MAX) {
+			OVS_NLERR(log, "VXLAN extension %d out of range max %d",
+				  type, OVS_VXLAN_EXT_MAX);
+			return -EINVAL;
+		}
+
+		if (!check_attr_len(nla_len(a),
+				    ovs_vxlan_ext_key_lens[type].len)) {
+			OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d",
+				  type, nla_len(a),
+				  ovs_vxlan_ext_key_lens[type].len);
+			return -EINVAL;
+		}
+
+		switch (type) {
+		case OVS_VXLAN_EXT_GBP:
+			opts.gbp = nla_get_u32(a);
+			break;
+		default:
+			OVS_NLERR(log, "Unknown VXLAN extension attribute %d",
+				  type);
+			return -EINVAL;
+		}
+	}
+	if (rem) {
+		OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.",
+			  rem);
+		return -EINVAL;
+	}
+
+	if (!is_mask)
+		SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
+	else
+		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+
+	opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
+	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
+				  is_mask);
+	return 0;
+}
+
+static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
+				      struct sw_flow_match *match, bool is_mask,
+				      bool log)
+{
+	unsigned long opt_key_offset;
+
+	BUILD_BUG_ON(sizeof(struct erspan_metadata) >
+		     sizeof(match->key->tun_opts));
+
+	if (nla_len(a) > sizeof(match->key->tun_opts)) {
+		OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).",
+			  nla_len(a), sizeof(match->key->tun_opts));
+		return -EINVAL;
+	}
+
+	if (!is_mask)
+		SW_FLOW_KEY_PUT(match, tun_opts_len,
+				sizeof(struct erspan_metadata), false);
+	else
+		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+
+	opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
+	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
+				  nla_len(a), is_mask);
+	return 0;
+}
+
+static int ip_tun_from_nlattr(const struct nlattr *attr,
+			      struct sw_flow_match *match, bool is_mask,
+			      bool log)
+{
+	bool ttl = false, ipv4 = false, ipv6 = false;
+	IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { };
+	bool info_bridge_mode = false;
+	int opts_type = 0;
+	struct nlattr *a;
+	int rem;
+
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+		int err;
+
+		if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
+			OVS_NLERR(log, "Tunnel attr %d out of range max %d",
+				  type, OVS_TUNNEL_KEY_ATTR_MAX);
+			return -EINVAL;
+		}
+
+		if (!check_attr_len(nla_len(a),
+				    ovs_tunnel_key_lens[type].len)) {
+			OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
+				  type, nla_len(a), ovs_tunnel_key_lens[type].len);
+			return -EINVAL;
+		}
+
+		switch (type) {
+		case OVS_TUNNEL_KEY_ATTR_ID:
+			SW_FLOW_KEY_PUT(match, tun_key.tun_id,
+					nla_get_be64(a), is_mask);
+			__set_bit(IP_TUNNEL_KEY_BIT, tun_flags);
+			break;
+		case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
+			SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
+					nla_get_in_addr(a), is_mask);
+			ipv4 = true;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
+			SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
+					nla_get_in_addr(a), is_mask);
+			ipv4 = true;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
+			SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
+					nla_get_in6_addr(a), is_mask);
+			ipv6 = true;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
+			SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
+					nla_get_in6_addr(a), is_mask);
+			ipv6 = true;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_TOS:
+			SW_FLOW_KEY_PUT(match, tun_key.tos,
+					nla_get_u8(a), is_mask);
+			break;
+		case OVS_TUNNEL_KEY_ATTR_TTL:
+			SW_FLOW_KEY_PUT(match, tun_key.ttl,
+					nla_get_u8(a), is_mask);
+			ttl = true;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
+			__set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_flags);
+			break;
+		case OVS_TUNNEL_KEY_ATTR_CSUM:
+			__set_bit(IP_TUNNEL_CSUM_BIT, tun_flags);
+			break;
+		case OVS_TUNNEL_KEY_ATTR_TP_SRC:
+			SW_FLOW_KEY_PUT(match, tun_key.tp_src,
+					nla_get_be16(a), is_mask);
+			break;
+		case OVS_TUNNEL_KEY_ATTR_TP_DST:
+			SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
+					nla_get_be16(a), is_mask);
+			break;
+		case OVS_TUNNEL_KEY_ATTR_OAM:
+			__set_bit(IP_TUNNEL_OAM_BIT, tun_flags);
+			break;
+		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+			if (opts_type) {
+				OVS_NLERR(log, "Multiple metadata blocks provided");
+				return -EINVAL;
+			}
+
+			err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
+			if (err)
+				return err;
+
+			__set_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_flags);
+			opts_type = type;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+			if (opts_type) {
+				OVS_NLERR(log, "Multiple metadata blocks provided");
+				return -EINVAL;
+			}
+
+			err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
+			if (err)
+				return err;
+
+			__set_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_flags);
+			opts_type = type;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_PAD:
+			break;
+		case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+			if (opts_type) {
+				OVS_NLERR(log, "Multiple metadata blocks provided");
+				return -EINVAL;
+			}
+
+			err = erspan_tun_opt_from_nlattr(a, match, is_mask,
+							 log);
+			if (err)
+				return err;
+
+			__set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_flags);
+			opts_type = type;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE:
+			info_bridge_mode = true;
+			ipv4 = true;
+			break;
+		default:
+			OVS_NLERR(log, "Unknown IP tunnel attribute %d",
+				  type);
+			return -EINVAL;
+		}
+	}
+
+	SW_FLOW_KEY_BITMAP_COPY(match, tun_key.tun_flags, tun_flags,
+				__IP_TUNNEL_FLAG_NUM, is_mask);
+	if (is_mask)
+		SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
+	else
+		SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
+				false);
+
+	if (rem > 0) {
+		OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
+			  rem);
+		return -EINVAL;
+	}
+
+	if (ipv4 && ipv6) {
+		OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
+		return -EINVAL;
+	}
+
+	if (!is_mask) {
+		if (!ipv4 && !ipv6) {
+			OVS_NLERR(log, "IP tunnel dst address not specified");
+			return -EINVAL;
+		}
+		if (ipv4) {
+			if (info_bridge_mode) {
+				__clear_bit(IP_TUNNEL_KEY_BIT, tun_flags);
+
+				if (match->key->tun_key.u.ipv4.src ||
+				    match->key->tun_key.u.ipv4.dst ||
+				    match->key->tun_key.tp_src ||
+				    match->key->tun_key.tp_dst ||
+				    match->key->tun_key.ttl ||
+				    match->key->tun_key.tos ||
+				    !ip_tunnel_flags_empty(tun_flags)) {
+					OVS_NLERR(log, "IPv4 tun info is not correct");
+					return -EINVAL;
+				}
+			} else if (!match->key->tun_key.u.ipv4.dst) {
+				OVS_NLERR(log, "IPv4 tunnel dst address is zero");
+				return -EINVAL;
+			}
+		}
+		if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
+			OVS_NLERR(log, "IPv6 tunnel dst address is zero");
+			return -EINVAL;
+		}
+
+		if (!ttl && !info_bridge_mode) {
+			OVS_NLERR(log, "IP tunnel TTL not specified.");
+			return -EINVAL;
+		}
+	}
+
+	return opts_type;
+}
+
+static int vxlan_opt_to_nlattr(struct sk_buff *skb,
+			       const void *tun_opts, int swkey_tun_opts_len)
+{
+	const struct vxlan_metadata *opts = tun_opts;
+	struct nlattr *nla;
+
+	nla = nla_nest_start_noflag(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
+	if (!nla)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
+		return -EMSGSIZE;
+
+	nla_nest_end(skb, nla);
+	return 0;
+}
+
+static int __ip_tun_to_nlattr(struct sk_buff *skb,
+			      const struct ip_tunnel_key *output,
+			      const void *tun_opts, int swkey_tun_opts_len,
+			      unsigned short tun_proto, u8 mode)
+{
+	if (test_bit(IP_TUNNEL_KEY_BIT, output->tun_flags) &&
+	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
+			 OVS_TUNNEL_KEY_ATTR_PAD))
+		return -EMSGSIZE;
+
+	if (mode & IP_TUNNEL_INFO_BRIDGE)
+		return nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE)
+		       ? -EMSGSIZE : 0;
+
+	switch (tun_proto) {
+	case AF_INET:
+		if (output->u.ipv4.src &&
+		    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
+				    output->u.ipv4.src))
+			return -EMSGSIZE;
+		if (output->u.ipv4.dst &&
+		    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
+				    output->u.ipv4.dst))
+			return -EMSGSIZE;
+		break;
+	case AF_INET6:
+		if (!ipv6_addr_any(&output->u.ipv6.src) &&
+		    nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
+				     &output->u.ipv6.src))
+			return -EMSGSIZE;
+		if (!ipv6_addr_any(&output->u.ipv6.dst) &&
+		    nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
+				     &output->u.ipv6.dst))
+			return -EMSGSIZE;
+		break;
+	}
+	if (output->tos &&
+	    nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
+		return -EMSGSIZE;
+	if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
+		return -EMSGSIZE;
+	if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, output->tun_flags) &&
+	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
+		return -EMSGSIZE;
+	if (test_bit(IP_TUNNEL_CSUM_BIT, output->tun_flags) &&
+	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
+		return -EMSGSIZE;
+	if (output->tp_src &&
+	    nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
+		return -EMSGSIZE;
+	if (output->tp_dst &&
+	    nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
+		return -EMSGSIZE;
+	if (test_bit(IP_TUNNEL_OAM_BIT, output->tun_flags) &&
+	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
+		return -EMSGSIZE;
+	if (swkey_tun_opts_len) {
+		if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, output->tun_flags) &&
+		    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+			    swkey_tun_opts_len, tun_opts))
+			return -EMSGSIZE;
+		else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT,
+				  output->tun_flags) &&
+			 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
+			return -EMSGSIZE;
+		else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+				  output->tun_flags) &&
+			 nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
+				 swkey_tun_opts_len, tun_opts))
+			return -EMSGSIZE;
+	}
+
+	return 0;
+}
+
+static int ip_tun_to_nlattr(struct sk_buff *skb,
+			    const struct ip_tunnel_key *output,
+			    const void *tun_opts, int swkey_tun_opts_len,
+			    unsigned short tun_proto, u8 mode)
+{
+	struct nlattr *nla;
+	int err;
+
+	nla = nla_nest_start_noflag(skb, OVS_KEY_ATTR_TUNNEL);
+	if (!nla)
+		return -EMSGSIZE;
+
+	err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
+				 tun_proto, mode);
+	if (err)
+		return err;
+
+	nla_nest_end(skb, nla);
+	return 0;
+}
+
+int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+			    struct ip_tunnel_info *tun_info)
+{
+	return __ip_tun_to_nlattr(skb, &tun_info->key,
+				  ip_tunnel_info_opts(tun_info),
+				  tun_info->options_len,
+				  ip_tunnel_info_af(tun_info), tun_info->mode);
+}
+
+static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
+				    const struct nlattr *a[],
+				    bool is_mask, bool inner)
+{
+	__be16 tci = 0;
+	__be16 tpid = 0;
+
+	if (a[OVS_KEY_ATTR_VLAN])
+		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+	if (a[OVS_KEY_ATTR_ETHERTYPE])
+		tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
+	if (likely(!inner)) {
+		SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
+		SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
+	} else {
+		SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
+		SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
+	}
+	return 0;
+}
+
+static int validate_vlan_from_nlattrs(const struct sw_flow_match *match,
+				      u64 key_attrs, bool inner,
+				      const struct nlattr **a, bool log)
+{
+	__be16 tci = 0;
+
+	if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
+	      (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
+	       eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) {
+		/* Not a VLAN. */
+		return 0;
+	}
+
+	if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
+	      (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
+		OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN");
+		return -EINVAL;
+	}
+
+	if (a[OVS_KEY_ATTR_VLAN])
+		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+	if (!(tci & htons(VLAN_CFI_MASK))) {
+		if (tci) {
+			OVS_NLERR(log, "%s TCI does not have VLAN_CFI_MASK bit set.",
+				  (inner) ? "C-VLAN" : "VLAN");
+			return -EINVAL;
+		} else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
+			/* Corner case for truncated VLAN header. */
+			OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
+				  (inner) ? "C-VLAN" : "VLAN");
+			return -EINVAL;
+		}
+	}
+
+	return 1;
+}
+
+static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match,
+					   u64 key_attrs, bool inner,
+					   const struct nlattr **a, bool log)
+{
+	__be16 tci = 0;
+	__be16 tpid = 0;
+	bool encap_valid = !!(match->key->eth.vlan.tci &
+			      htons(VLAN_CFI_MASK));
+	bool i_encap_valid = !!(match->key->eth.cvlan.tci &
+				htons(VLAN_CFI_MASK));
+
+	if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
+		/* Not a VLAN. */
+		return 0;
+	}
+
+	if ((!inner && !encap_valid) || (inner && !i_encap_valid)) {
+		OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.",
+			  (inner) ? "C-VLAN" : "VLAN");
+		return -EINVAL;
+	}
+
+	if (a[OVS_KEY_ATTR_VLAN])
+		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+	if (a[OVS_KEY_ATTR_ETHERTYPE])
+		tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
+	if (tpid != htons(0xffff)) {
+		OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).",
+			  (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
+		return -EINVAL;
+	}
+	if (!(tci & htons(VLAN_CFI_MASK))) {
+		OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_CFI_MASK bit.",
+			  (inner) ? "C-VLAN" : "VLAN");
+		return -EINVAL;
+	}
+
+	return 1;
+}
+
+static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
+				     u64 *key_attrs, bool inner,
+				     const struct nlattr **a, bool is_mask,
+				     bool log)
+{
+	int err;
+	const struct nlattr *encap;
+
+	if (!is_mask)
+		err = validate_vlan_from_nlattrs(match, *key_attrs, inner,
+						 a, log);
+	else
+		err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner,
+						      a, log);
+	if (err <= 0)
+		return err;
+
+	err = encode_vlan_from_nlattrs(match, a, is_mask, inner);
+	if (err)
+		return err;
+
+	*key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
+	*key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
+	*key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+
+	encap = a[OVS_KEY_ATTR_ENCAP];
+
+	if (!is_mask)
+		err = parse_flow_nlattrs(encap, a, key_attrs, log);
+	else
+		err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
+
+	return err;
+}
+
+static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
+				   u64 *key_attrs, const struct nlattr **a,
+				   bool is_mask, bool log)
+{
+	int err;
+	bool encap_valid = false;
+
+	err = __parse_vlan_from_nlattrs(match, key_attrs, false, a,
+					is_mask, log);
+	if (err)
+		return err;
+
+	encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_CFI_MASK));
+	if (encap_valid) {
+		err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
+						is_mask, log);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
+				       u64 *attrs, const struct nlattr **a,
+				       bool is_mask, bool log)
+{
+	__be16 eth_type;
+
+	eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+	if (is_mask) {
+		/* Always exact match EtherType. */
+		eth_type = htons(0xffff);
+	} else if (!eth_proto_is_802_3(eth_type)) {
+		OVS_NLERR(log, "EtherType %x is less than min %x",
+				ntohs(eth_type), ETH_P_802_3_MIN);
+		return -EINVAL;
+	}
+
+	SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
+	*attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+	return 0;
+}
+
+static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
+				 u64 *attrs, const struct nlattr **a,
+				 bool is_mask, bool log)
+{
+	u8 mac_proto = MAC_PROTO_ETHERNET;
+
+	if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
+		u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
+
+		SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
+		*attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
+	}
+
+	if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
+		u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
+
+		SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
+		*attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
+	}
+
+	if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
+		SW_FLOW_KEY_PUT(match, phy.priority,
+			  nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
+		*attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
+	}
+
+	if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
+		u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
+
+		if (is_mask) {
+			in_port = 0xffffffff; /* Always exact match in_port. */
+		} else if (in_port >= DP_MAX_PORTS) {
+			OVS_NLERR(log, "Port %d exceeds max allowable %d",
+				  in_port, DP_MAX_PORTS);
+			return -EINVAL;
+		}
+
+		SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
+		*attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
+	} else if (!is_mask) {
+		SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
+	}
+
+	if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
+		uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
+
+		SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
+		*attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
+	}
+	if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
+		if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
+				       is_mask, log) < 0)
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
+	}
+
+	if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
+	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
+		u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
+
+		if (ct_state & ~CT_SUPPORTED_MASK) {
+			OVS_NLERR(log, "ct_state flags %08x unsupported",
+				  ct_state);
+			return -EINVAL;
+		}
+
+		SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask);
+		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
+	}
+	if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
+	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
+		u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
+
+		SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask);
+		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
+	}
+	if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
+	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) {
+		u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
+
+		SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
+		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
+	}
+	if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) &&
+	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) {
+		const struct ovs_key_ct_labels *cl;
+
+		cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]);
+		SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,
+				   sizeof(*cl), is_mask);
+		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
+	}
+	if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) {
+		const struct ovs_key_ct_tuple_ipv4 *ct;
+
+		ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]);
+
+		SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask);
+		SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask);
+		SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
+		SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
+		SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask);
+		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4);
+	}
+	if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) {
+		const struct ovs_key_ct_tuple_ipv6 *ct;
+
+		ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]);
+
+		SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src,
+				   sizeof(match->key->ipv6.ct_orig.src),
+				   is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst,
+				   sizeof(match->key->ipv6.ct_orig.dst),
+				   is_mask);
+		SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
+		SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
+		SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask);
+		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
+	}
+
+	/* For layer 3 packets the Ethernet type is provided
+	 * and treated as metadata but no MAC addresses are provided.
+	 */
+	if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
+	    (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
+		mac_proto = MAC_PROTO_NONE;
+
+	/* Always exact match mac_proto */
+	SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
+
+	if (mac_proto == MAC_PROTO_NONE)
+		return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
+						   log);
+
+	return 0;
+}
+
+/*
+ * Constructs NSH header 'nh' from attributes of OVS_ACTION_ATTR_PUSH_NSH,
+ * where 'nh' points to a memory block of 'size' bytes.  It's assumed that
+ * attributes were previously validated with validate_push_nsh().
+ */
+int nsh_hdr_from_nlattr(const struct nlattr *attr,
+			struct nshhdr *nh, size_t size)
+{
+	struct nlattr *a;
+	int rem;
+	u8 flags = 0;
+	u8 ttl = 0;
+	int mdlen = 0;
+
+	if (size < NSH_BASE_HDR_LEN)
+		return -ENOBUFS;
+
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+
+		switch (type) {
+		case OVS_NSH_KEY_ATTR_BASE: {
+			const struct ovs_nsh_key_base *base = nla_data(a);
+
+			flags = base->flags;
+			ttl = base->ttl;
+			nh->np = base->np;
+			nh->mdtype = base->mdtype;
+			nh->path_hdr = base->path_hdr;
+			break;
+		}
+		case OVS_NSH_KEY_ATTR_MD1:
+			mdlen = nla_len(a);
+			if (mdlen > size - NSH_BASE_HDR_LEN)
+				return -ENOBUFS;
+			memcpy(&nh->md1, nla_data(a), mdlen);
+			break;
+
+		case OVS_NSH_KEY_ATTR_MD2:
+			mdlen = nla_len(a);
+			if (mdlen > size - NSH_BASE_HDR_LEN)
+				return -ENOBUFS;
+			memcpy(&nh->md2, nla_data(a), mdlen);
+			break;
+
+		default:
+			return -EINVAL;
+		}
+	}
+
+	/* nsh header length  = NSH_BASE_HDR_LEN + mdlen */
+	nh->ver_flags_ttl_len = 0;
+	nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
+
+	return 0;
+}
+
+static int nsh_key_put_from_nlattr(const struct nlattr *attr,
+				   struct sw_flow_match *match, bool is_mask,
+				   bool is_push_nsh, bool log)
+{
+	struct nlattr *a;
+	int rem;
+	bool has_base = false;
+	bool has_md1 = false;
+	bool has_md2 = false;
+	u8 mdtype = 0;
+	int mdlen = 0;
+
+	if (WARN_ON(is_push_nsh && is_mask))
+		return -EINVAL;
+
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+		int i;
+
+		if (type > OVS_NSH_KEY_ATTR_MAX) {
+			OVS_NLERR(log, "nsh attr %d is out of range max %d",
+				  type, OVS_NSH_KEY_ATTR_MAX);
+			return -EINVAL;
+		}
+
+		if (!check_attr_len(nla_len(a),
+				    ovs_nsh_key_attr_lens[type].len)) {
+			OVS_NLERR(
+			    log,
+			    "nsh attr %d has unexpected len %d expected %d",
+			    type,
+			    nla_len(a),
+			    ovs_nsh_key_attr_lens[type].len
+			);
+			return -EINVAL;
+		}
+
+		switch (type) {
+		case OVS_NSH_KEY_ATTR_BASE: {
+			const struct ovs_nsh_key_base *base = nla_data(a);
+
+			has_base = true;
+			mdtype = base->mdtype;
+			SW_FLOW_KEY_PUT(match, nsh.base.flags,
+					base->flags, is_mask);
+			SW_FLOW_KEY_PUT(match, nsh.base.ttl,
+					base->ttl, is_mask);
+			SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
+					base->mdtype, is_mask);
+			SW_FLOW_KEY_PUT(match, nsh.base.np,
+					base->np, is_mask);
+			SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
+					base->path_hdr, is_mask);
+			break;
+		}
+		case OVS_NSH_KEY_ATTR_MD1: {
+			const struct ovs_nsh_key_md1 *md1 = nla_data(a);
+
+			has_md1 = true;
+			for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
+				SW_FLOW_KEY_PUT(match, nsh.context[i],
+						md1->context[i], is_mask);
+			break;
+		}
+		case OVS_NSH_KEY_ATTR_MD2:
+			if (!is_push_nsh) /* Not supported MD type 2 yet */
+				return -ENOTSUPP;
+
+			has_md2 = true;
+			mdlen = nla_len(a);
+			if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
+				OVS_NLERR(
+				    log,
+				    "Invalid MD length %d for MD type %d",
+				    mdlen,
+				    mdtype
+				);
+				return -EINVAL;
+			}
+			break;
+		default:
+			OVS_NLERR(log, "Unknown nsh attribute %d",
+				  type);
+			return -EINVAL;
+		}
+	}
+
+	if (rem > 0) {
+		OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
+		return -EINVAL;
+	}
+
+	if (has_md1 && has_md2) {
+		OVS_NLERR(
+		    1,
+		    "invalid nsh attribute: md1 and md2 are exclusive."
+		);
+		return -EINVAL;
+	}
+
+	if (!is_mask) {
+		if ((has_md1 && mdtype != NSH_M_TYPE1) ||
+		    (has_md2 && mdtype != NSH_M_TYPE2)) {
+			OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
+				  mdtype);
+			return -EINVAL;
+		}
+
+		if (is_push_nsh &&
+		    (!has_base || (!has_md1 && !has_md2))) {
+			OVS_NLERR(
+			    1,
+			    "push_nsh: missing base or metadata attributes"
+			);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
+				u64 attrs, const struct nlattr **a,
+				bool is_mask, bool log)
+{
+	int err;
+
+	err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log);
+	if (err)
+		return err;
+
+	if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
+		const struct ovs_key_ethernet *eth_key;
+
+		eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
+		SW_FLOW_KEY_MEMCPY(match, eth.src,
+				eth_key->eth_src, ETH_ALEN, is_mask);
+		SW_FLOW_KEY_MEMCPY(match, eth.dst,
+				eth_key->eth_dst, ETH_ALEN, is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
+
+		if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
+			/* VLAN attribute is always parsed before getting here since it
+			 * may occur multiple times.
+			 */
+			OVS_NLERR(log, "VLAN attribute unexpected.");
+			return -EINVAL;
+		}
+
+		if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
+			err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
+							  log);
+			if (err)
+				return err;
+		} else if (!is_mask) {
+			SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
+		}
+	} else if (!match->key->eth.type) {
+		OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
+		return -EINVAL;
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+		const struct ovs_key_ipv4 *ipv4_key;
+
+		ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
+		if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
+			OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
+				  ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
+			return -EINVAL;
+		}
+		SW_FLOW_KEY_PUT(match, ip.proto,
+				ipv4_key->ipv4_proto, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.tos,
+				ipv4_key->ipv4_tos, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.ttl,
+				ipv4_key->ipv4_ttl, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.frag,
+				ipv4_key->ipv4_frag, is_mask);
+		SW_FLOW_KEY_PUT(match, ipv4.addr.src,
+				ipv4_key->ipv4_src, is_mask);
+		SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
+				ipv4_key->ipv4_dst, is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
+		const struct ovs_key_ipv6 *ipv6_key;
+
+		ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
+		if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
+			OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
+				  ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
+			return -EINVAL;
+		}
+
+		if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
+			OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)",
+				  ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
+			return -EINVAL;
+		}
+
+		SW_FLOW_KEY_PUT(match, ipv6.label,
+				ipv6_key->ipv6_label, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.proto,
+				ipv6_key->ipv6_proto, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.tos,
+				ipv6_key->ipv6_tclass, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.ttl,
+				ipv6_key->ipv6_hlimit, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.frag,
+				ipv6_key->ipv6_frag, is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
+				ipv6_key->ipv6_src,
+				sizeof(match->key->ipv6.addr.src),
+				is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
+				ipv6_key->ipv6_dst,
+				sizeof(match->key->ipv6.addr.dst),
+				is_mask);
+
+		attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
+	}
+
+	if (attrs & (1ULL << OVS_KEY_ATTR_IPV6_EXTHDRS)) {
+		const struct ovs_key_ipv6_exthdrs *ipv6_exthdrs_key;
+
+		ipv6_exthdrs_key = nla_data(a[OVS_KEY_ATTR_IPV6_EXTHDRS]);
+
+		SW_FLOW_KEY_PUT(match, ipv6.exthdrs,
+				ipv6_exthdrs_key->hdrs, is_mask);
+
+		attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6_EXTHDRS);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
+		const struct ovs_key_arp *arp_key;
+
+		arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
+		if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
+			OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
+				  arp_key->arp_op);
+			return -EINVAL;
+		}
+
+		SW_FLOW_KEY_PUT(match, ipv4.addr.src,
+				arp_key->arp_sip, is_mask);
+		SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
+			arp_key->arp_tip, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.proto,
+				ntohs(arp_key->arp_op), is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
+				arp_key->arp_sha, ETH_ALEN, is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
+				arp_key->arp_tha, ETH_ALEN, is_mask);
+
+		attrs &= ~(1 << OVS_KEY_ATTR_ARP);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
+		if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
+					    is_mask, false, log) < 0)
+			return -EINVAL;
+		attrs &= ~(1 << OVS_KEY_ATTR_NSH);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
+		const struct ovs_key_mpls *mpls_key;
+		u32 hdr_len;
+		u32 label_count, label_count_mask, i;
+
+		mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
+		hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
+		label_count = hdr_len / sizeof(struct ovs_key_mpls);
+
+		if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
+		    hdr_len % sizeof(struct ovs_key_mpls))
+			return -EINVAL;
+
+		label_count_mask =  GENMASK(label_count - 1, 0);
+
+		for (i = 0 ; i < label_count; i++)
+			SW_FLOW_KEY_PUT(match, mpls.lse[i],
+					mpls_key[i].mpls_lse, is_mask);
+
+		SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
+				label_count_mask, is_mask);
+
+		attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
+	 }
+
+	if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
+		const struct ovs_key_tcp *tcp_key;
+
+		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
+		SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
+		SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_TCP);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
+		SW_FLOW_KEY_PUT(match, tp.flags,
+				nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
+				is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
+		const struct ovs_key_udp *udp_key;
+
+		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
+		SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
+		SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_UDP);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
+		const struct ovs_key_sctp *sctp_key;
+
+		sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
+		SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
+		SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
+		const struct ovs_key_icmp *icmp_key;
+
+		icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
+		SW_FLOW_KEY_PUT(match, tp.src,
+				htons(icmp_key->icmp_type), is_mask);
+		SW_FLOW_KEY_PUT(match, tp.dst,
+				htons(icmp_key->icmp_code), is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
+		const struct ovs_key_icmpv6 *icmpv6_key;
+
+		icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
+		SW_FLOW_KEY_PUT(match, tp.src,
+				htons(icmpv6_key->icmpv6_type), is_mask);
+		SW_FLOW_KEY_PUT(match, tp.dst,
+				htons(icmpv6_key->icmpv6_code), is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_ND)) {
+		const struct ovs_key_nd *nd_key;
+
+		nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
+			nd_key->nd_target,
+			sizeof(match->key->ipv6.nd.target),
+			is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
+			nd_key->nd_sll, ETH_ALEN, is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
+				nd_key->nd_tll, ETH_ALEN, is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_ND);
+	}
+
+	if (attrs != 0) {
+		OVS_NLERR(log, "Unknown key attributes %llx",
+			  (unsigned long long)attrs);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void nlattr_set(struct nlattr *attr, u8 val,
+		       const struct ovs_len_tbl *tbl)
+{
+	struct nlattr *nla;
+	int rem;
+
+	/* The nlattr stream should already have been validated */
+	nla_for_each_nested(nla, attr, rem) {
+		if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
+			nlattr_set(nla, val, tbl[nla_type(nla)].next ? : tbl);
+		else
+			memset(nla_data(nla), val, nla_len(nla));
+
+		if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
+			*(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
+	}
+}
+
+static void mask_set_nlattr(struct nlattr *attr, u8 val)
+{
+	nlattr_set(attr, val, ovs_key_lens);
+}
+
+/**
+ * ovs_nla_get_match - parses Netlink attributes into a flow key and
+ * mask. In case the 'mask' is NULL, the flow is treated as exact match
+ * flow. Otherwise, it is treated as a wildcarded flow, except the mask
+ * does not include any don't care bit.
+ * @net: Used to determine per-namespace field support.
+ * @match: receives the extracted flow match information.
+ * @nla_key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence. The fields should of the packet that triggered the creation
+ * of this flow.
+ * @nla_mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_*
+ * Netlink attribute specifies the mask field of the wildcarded flow.
+ * @log: Boolean to allow kernel error logging.  Normally true, but when
+ * probing for feature compatibility this should be passed in as false to
+ * suppress unnecessary error logging.
+ */
+int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
+		      const struct nlattr *nla_key,
+		      const struct nlattr *nla_mask,
+		      bool log)
+{
+	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+	struct nlattr *newmask = NULL;
+	u64 key_attrs = 0;
+	u64 mask_attrs = 0;
+	int err;
+
+	err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
+	if (err)
+		return err;
+
+	err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log);
+	if (err)
+		return err;
+
+	err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
+	if (err)
+		return err;
+
+	if (match->mask) {
+		if (!nla_mask) {
+			/* Create an exact match mask. We need to set to 0xff
+			 * all the 'match->mask' fields that have been touched
+			 * in 'match->key'. We cannot simply memset
+			 * 'match->mask', because padding bytes and fields not
+			 * specified in 'match->key' should be left to 0.
+			 * Instead, we use a stream of netlink attributes,
+			 * copied from 'key' and set to 0xff.
+			 * ovs_key_from_nlattrs() will take care of filling
+			 * 'match->mask' appropriately.
+			 */
+			newmask = kmemdup(nla_key,
+					  nla_total_size(nla_len(nla_key)),
+					  GFP_KERNEL);
+			if (!newmask)
+				return -ENOMEM;
+
+			mask_set_nlattr(newmask, 0xff);
+
+			/* The userspace does not send tunnel attributes that
+			 * are 0, but we should not wildcard them nonetheless.
+			 */
+			if (match->key->tun_proto)
+				SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
+							 0xff, true);
+
+			nla_mask = newmask;
+		}
+
+		err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
+		if (err)
+			goto free_newmask;
+
+		/* Always match on tci. */
+		SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
+		SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
+
+		err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log);
+		if (err)
+			goto free_newmask;
+
+		err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
+					   log);
+		if (err)
+			goto free_newmask;
+	}
+
+	if (!match_validate(match, key_attrs, mask_attrs, log))
+		err = -EINVAL;
+
+free_newmask:
+	kfree(newmask);
+	return err;
+}
+
+static size_t get_ufid_len(const struct nlattr *attr, bool log)
+{
+	size_t len;
+
+	if (!attr)
+		return 0;
+
+	len = nla_len(attr);
+	if (len < 1 || len > MAX_UFID_LENGTH) {
+		OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
+			  nla_len(attr), MAX_UFID_LENGTH);
+		return 0;
+	}
+
+	return len;
+}
+
+/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
+ * or false otherwise.
+ */
+bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
+		      bool log)
+{
+	sfid->ufid_len = get_ufid_len(attr, log);
+	if (sfid->ufid_len)
+		memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
+
+	return sfid->ufid_len;
+}
+
+int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
+			   const struct sw_flow_key *key, bool log)
+{
+	struct sw_flow_key *new_key;
+
+	if (ovs_nla_get_ufid(sfid, ufid, log))
+		return 0;
+
+	/* If UFID was not provided, use unmasked key. */
+	new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
+	if (!new_key)
+		return -ENOMEM;
+	memcpy(new_key, key, sizeof(*key));
+	sfid->unmasked_key = new_key;
+
+	return 0;
+}
+
+u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
+{
+	return nla_get_u32_default(attr, 0);
+}
+
+/**
+ * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
+ * @net: Network namespace.
+ * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack
+ * metadata.
+ * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink
+ * attributes.
+ * @attrs: Bit mask for the netlink attributes included in @a.
+ * @log: Boolean to allow kernel error logging.  Normally true, but when
+ * probing for feature compatibility this should be passed in as false to
+ * suppress unnecessary error logging.
+ *
+ * This parses a series of Netlink attributes that form a flow key, which must
+ * take the same form accepted by flow_from_nlattrs(), but only enough of it to
+ * get the metadata, that is, the parts of the flow key that cannot be
+ * extracted from the packet itself.
+ *
+ * This must be called before the packet key fields are filled in 'key'.
+ */
+
+int ovs_nla_get_flow_metadata(struct net *net,
+			      const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
+			      u64 attrs, struct sw_flow_key *key, bool log)
+{
+	struct sw_flow_match match;
+
+	memset(&match, 0, sizeof(match));
+	match.key = key;
+
+	key->ct_state = 0;
+	key->ct_zone = 0;
+	key->ct_orig_proto = 0;
+	memset(&key->ct, 0, sizeof(key->ct));
+	memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig));
+	memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));
+
+	key->phy.in_port = DP_MAX_PORTS;
+
+	return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
+}
+
+static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
+			    bool is_mask)
+{
+	__be16 eth_type = !is_mask ? vh->tpid : htons(0xffff);
+
+	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
+	    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci))
+		return -EMSGSIZE;
+	return 0;
+}
+
+static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
+			     struct sk_buff *skb)
+{
+	struct nlattr *start;
+
+	start = nla_nest_start_noflag(skb, OVS_KEY_ATTR_NSH);
+	if (!start)
+		return -EMSGSIZE;
+
+	if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
+		goto nla_put_failure;
+
+	if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
+		if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
+			    sizeof(nsh->context), nsh->context))
+			goto nla_put_failure;
+	}
+
+	/* Don't support MD type 2 yet */
+
+	nla_nest_end(skb, start);
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
+			     const struct sw_flow_key *output, bool is_mask,
+			     struct sk_buff *skb)
+{
+	struct ovs_key_ethernet *eth_key;
+	struct nlattr *nla;
+	struct nlattr *encap = NULL;
+	struct nlattr *in_encap = NULL;
+
+	if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
+		goto nla_put_failure;
+
+	if ((swkey->tun_proto || is_mask)) {
+		const void *opts = NULL;
+
+		if (ip_tunnel_is_options_present(output->tun_key.tun_flags))
+			opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
+
+		if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
+				     swkey->tun_opts_len, swkey->tun_proto, 0))
+			goto nla_put_failure;
+	}
+
+	if (swkey->phy.in_port == DP_MAX_PORTS) {
+		if (is_mask && (output->phy.in_port == 0xffff))
+			if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
+				goto nla_put_failure;
+	} else {
+		u16 upper_u16;
+		upper_u16 = !is_mask ? 0 : 0xffff;
+
+		if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
+				(upper_u16 << 16) | output->phy.in_port))
+			goto nla_put_failure;
+	}
+
+	if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
+		goto nla_put_failure;
+
+	if (ovs_ct_put_key(swkey, output, skb))
+		goto nla_put_failure;
+
+	if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
+		nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
+		if (!nla)
+			goto nla_put_failure;
+
+		eth_key = nla_data(nla);
+		ether_addr_copy(eth_key->eth_src, output->eth.src);
+		ether_addr_copy(eth_key->eth_dst, output->eth.dst);
+
+		if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
+			if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
+				goto nla_put_failure;
+			encap = nla_nest_start_noflag(skb, OVS_KEY_ATTR_ENCAP);
+			if (!swkey->eth.vlan.tci)
+				goto unencap;
+
+			if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
+				if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
+					goto nla_put_failure;
+				in_encap = nla_nest_start_noflag(skb,
+								 OVS_KEY_ATTR_ENCAP);
+				if (!swkey->eth.cvlan.tci)
+					goto unencap;
+			}
+		}
+
+		if (swkey->eth.type == htons(ETH_P_802_2)) {
+			/*
+			* Ethertype 802.2 is represented in the netlink with omitted
+			* OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
+			* 0xffff in the mask attribute.  Ethertype can also
+			* be wildcarded.
+			*/
+			if (is_mask && output->eth.type)
+				if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
+							output->eth.type))
+					goto nla_put_failure;
+			goto unencap;
+		}
+	}
+
+	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
+		goto nla_put_failure;
+
+	if (eth_type_vlan(swkey->eth.type)) {
+		/* There are 3 VLAN tags, we don't know anything about the rest
+		 * of the packet, so truncate here.
+		 */
+		WARN_ON_ONCE(!(encap && in_encap));
+		goto unencap;
+	}
+
+	if (swkey->eth.type == htons(ETH_P_IP)) {
+		struct ovs_key_ipv4 *ipv4_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
+		if (!nla)
+			goto nla_put_failure;
+		ipv4_key = nla_data(nla);
+		ipv4_key->ipv4_src = output->ipv4.addr.src;
+		ipv4_key->ipv4_dst = output->ipv4.addr.dst;
+		ipv4_key->ipv4_proto = output->ip.proto;
+		ipv4_key->ipv4_tos = output->ip.tos;
+		ipv4_key->ipv4_ttl = output->ip.ttl;
+		ipv4_key->ipv4_frag = output->ip.frag;
+	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+		struct ovs_key_ipv6 *ipv6_key;
+		struct ovs_key_ipv6_exthdrs *ipv6_exthdrs_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
+		if (!nla)
+			goto nla_put_failure;
+		ipv6_key = nla_data(nla);
+		memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
+				sizeof(ipv6_key->ipv6_src));
+		memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
+				sizeof(ipv6_key->ipv6_dst));
+		ipv6_key->ipv6_label = output->ipv6.label;
+		ipv6_key->ipv6_proto = output->ip.proto;
+		ipv6_key->ipv6_tclass = output->ip.tos;
+		ipv6_key->ipv6_hlimit = output->ip.ttl;
+		ipv6_key->ipv6_frag = output->ip.frag;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6_EXTHDRS,
+				  sizeof(*ipv6_exthdrs_key));
+		if (!nla)
+			goto nla_put_failure;
+		ipv6_exthdrs_key = nla_data(nla);
+		ipv6_exthdrs_key->hdrs = output->ipv6.exthdrs;
+	} else if (swkey->eth.type == htons(ETH_P_NSH)) {
+		if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
+			goto nla_put_failure;
+	} else if (swkey->eth.type == htons(ETH_P_ARP) ||
+		   swkey->eth.type == htons(ETH_P_RARP)) {
+		struct ovs_key_arp *arp_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
+		if (!nla)
+			goto nla_put_failure;
+		arp_key = nla_data(nla);
+		memset(arp_key, 0, sizeof(struct ovs_key_arp));
+		arp_key->arp_sip = output->ipv4.addr.src;
+		arp_key->arp_tip = output->ipv4.addr.dst;
+		arp_key->arp_op = htons(output->ip.proto);
+		ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
+		ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
+	} else if (eth_p_mpls(swkey->eth.type)) {
+		u8 i, num_labels;
+		struct ovs_key_mpls *mpls_key;
+
+		num_labels = hweight_long(output->mpls.num_labels_mask);
+		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
+				  num_labels * sizeof(*mpls_key));
+		if (!nla)
+			goto nla_put_failure;
+
+		mpls_key = nla_data(nla);
+		for (i = 0; i < num_labels; i++)
+			mpls_key[i].mpls_lse = output->mpls.lse[i];
+	}
+
+	if ((swkey->eth.type == htons(ETH_P_IP) ||
+	     swkey->eth.type == htons(ETH_P_IPV6)) &&
+	     swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
+
+		if (swkey->ip.proto == IPPROTO_TCP) {
+			struct ovs_key_tcp *tcp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
+			if (!nla)
+				goto nla_put_failure;
+			tcp_key = nla_data(nla);
+			tcp_key->tcp_src = output->tp.src;
+			tcp_key->tcp_dst = output->tp.dst;
+			if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
+					 output->tp.flags))
+				goto nla_put_failure;
+		} else if (swkey->ip.proto == IPPROTO_UDP) {
+			struct ovs_key_udp *udp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
+			if (!nla)
+				goto nla_put_failure;
+			udp_key = nla_data(nla);
+			udp_key->udp_src = output->tp.src;
+			udp_key->udp_dst = output->tp.dst;
+		} else if (swkey->ip.proto == IPPROTO_SCTP) {
+			struct ovs_key_sctp *sctp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
+			if (!nla)
+				goto nla_put_failure;
+			sctp_key = nla_data(nla);
+			sctp_key->sctp_src = output->tp.src;
+			sctp_key->sctp_dst = output->tp.dst;
+		} else if (swkey->eth.type == htons(ETH_P_IP) &&
+			   swkey->ip.proto == IPPROTO_ICMP) {
+			struct ovs_key_icmp *icmp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
+			if (!nla)
+				goto nla_put_failure;
+			icmp_key = nla_data(nla);
+			icmp_key->icmp_type = ntohs(output->tp.src);
+			icmp_key->icmp_code = ntohs(output->tp.dst);
+		} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
+			   swkey->ip.proto == IPPROTO_ICMPV6) {
+			struct ovs_key_icmpv6 *icmpv6_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
+						sizeof(*icmpv6_key));
+			if (!nla)
+				goto nla_put_failure;
+			icmpv6_key = nla_data(nla);
+			icmpv6_key->icmpv6_type = ntohs(output->tp.src);
+			icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
+
+			if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+			    swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+				struct ovs_key_nd *nd_key;
+
+				nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
+				if (!nla)
+					goto nla_put_failure;
+				nd_key = nla_data(nla);
+				memcpy(nd_key->nd_target, &output->ipv6.nd.target,
+							sizeof(nd_key->nd_target));
+				ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
+				ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
+			}
+		}
+	}
+
+unencap:
+	if (in_encap)
+		nla_nest_end(skb, in_encap);
+	if (encap)
+		nla_nest_end(skb, encap);
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+int ovs_nla_put_key(const struct sw_flow_key *swkey,
+		    const struct sw_flow_key *output, int attr, bool is_mask,
+		    struct sk_buff *skb)
+{
+	int err;
+	struct nlattr *nla;
+
+	nla = nla_nest_start_noflag(skb, attr);
+	if (!nla)
+		return -EMSGSIZE;
+	err = __ovs_nla_put_key(swkey, output, is_mask, skb);
+	if (err)
+		return err;
+	nla_nest_end(skb, nla);
+
+	return 0;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
+{
+	if (ovs_identifier_is_ufid(&flow->id))
+		return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
+			       flow->id.ufid);
+
+	return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
+			       OVS_FLOW_ATTR_KEY, false, skb);
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
+{
+	return ovs_nla_put_key(&flow->key, &flow->key,
+				OVS_FLOW_ATTR_KEY, false, skb);
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
+{
+	return ovs_nla_put_key(&flow->key, &flow->mask->key,
+				OVS_FLOW_ATTR_MASK, true, skb);
+}
+
+static struct sw_flow_actions *nla_alloc_flow_actions(int size)
+{
+	struct sw_flow_actions *sfa;
+
+	sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL);
+	if (!sfa)
+		return ERR_PTR(-ENOMEM);
+
+	sfa->actions_len = 0;
+	return sfa;
+}
+
+static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len);
+
+static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action)
+{
+	const struct nlattr *a;
+	int rem;
+
+	nla_for_each_nested(a, action, rem) {
+		switch (nla_type(a)) {
+		case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL:
+		case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER:
+			ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
+			break;
+		}
+	}
+}
+
+static void ovs_nla_free_clone_action(const struct nlattr *action)
+{
+	const struct nlattr *a = nla_data(action);
+	int rem = nla_len(action);
+
+	switch (nla_type(a)) {
+	case OVS_CLONE_ATTR_EXEC:
+		/* The real list of actions follows this attribute. */
+		a = nla_next(a, &rem);
+		ovs_nla_free_nested_actions(a, rem);
+		break;
+	}
+}
+
+static void ovs_nla_free_dec_ttl_action(const struct nlattr *action)
+{
+	const struct nlattr *a = nla_data(action);
+
+	switch (nla_type(a)) {
+	case OVS_DEC_TTL_ATTR_ACTION:
+		ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
+		break;
+	}
+}
+
+static void ovs_nla_free_sample_action(const struct nlattr *action)
+{
+	const struct nlattr *a = nla_data(action);
+	int rem = nla_len(action);
+
+	switch (nla_type(a)) {
+	case OVS_SAMPLE_ATTR_ARG:
+		/* The real list of actions follows this attribute. */
+		a = nla_next(a, &rem);
+		ovs_nla_free_nested_actions(a, rem);
+		break;
+	}
+}
+
+static void ovs_nla_free_set_action(const struct nlattr *a)
+{
+	const struct nlattr *ovs_key = nla_data(a);
+	struct ovs_tunnel_info *ovs_tun;
+
+	switch (nla_type(ovs_key)) {
+	case OVS_KEY_ATTR_TUNNEL_INFO:
+		ovs_tun = nla_data(ovs_key);
+		dst_release((struct dst_entry *)ovs_tun->tun_dst);
+		break;
+	}
+}
+
+static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
+{
+	const struct nlattr *a;
+	int rem;
+
+	/* Whenever new actions are added, the need to update this
+	 * function should be considered.
+	 */
+	BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 25);
+
+	if (!actions)
+		return;
+
+	nla_for_each_attr(a, actions, len, rem) {
+		switch (nla_type(a)) {
+		case OVS_ACTION_ATTR_CHECK_PKT_LEN:
+			ovs_nla_free_check_pkt_len_action(a);
+			break;
+
+		case OVS_ACTION_ATTR_CLONE:
+			ovs_nla_free_clone_action(a);
+			break;
+
+		case OVS_ACTION_ATTR_CT:
+			ovs_ct_free_action(a);
+			break;
+
+		case OVS_ACTION_ATTR_DEC_TTL:
+			ovs_nla_free_dec_ttl_action(a);
+			break;
+
+		case OVS_ACTION_ATTR_SAMPLE:
+			ovs_nla_free_sample_action(a);
+			break;
+
+		case OVS_ACTION_ATTR_SET:
+			ovs_nla_free_set_action(a);
+			break;
+		}
+	}
+}
+
+void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+{
+	if (!sf_acts)
+		return;
+
+	ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len);
+	kfree(sf_acts);
+}
+
+static void __ovs_nla_free_flow_actions(struct rcu_head *head)
+{
+	ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu));
+}
+
+/* Schedules 'sf_acts' to be freed after the next RCU grace period.
+ * The caller must hold rcu_read_lock for this to be sensible. */
+void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts)
+{
+	call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions);
+}
+
+static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
+				       int attr_len, bool log)
+{
+
+	struct sw_flow_actions *acts;
+	int new_acts_size;
+	size_t req_size = NLA_ALIGN(attr_len);
+	int next_offset = offsetof(struct sw_flow_actions, actions) +
+					(*sfa)->actions_len;
+
+	if (req_size <= (ksize(*sfa) - next_offset))
+		goto out;
+
+	new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
+
+	acts = nla_alloc_flow_actions(new_acts_size);
+	if (IS_ERR(acts))
+		return ERR_CAST(acts);
+
+	memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
+	acts->actions_len = (*sfa)->actions_len;
+	acts->orig_len = (*sfa)->orig_len;
+	kfree(*sfa);
+	*sfa = acts;
+
+out:
+	(*sfa)->actions_len += req_size;
+	return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
+}
+
+static struct nlattr *__add_action(struct sw_flow_actions **sfa,
+				   int attrtype, void *data, int len, bool log)
+{
+	struct nlattr *a;
+
+	a = reserve_sfa_size(sfa, nla_attr_size(len), log);
+	if (IS_ERR(a))
+		return a;
+
+	a->nla_type = attrtype;
+	a->nla_len = nla_attr_size(len);
+
+	if (data)
+		memcpy(nla_data(a), data, len);
+	memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
+
+	return a;
+}
+
+int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
+		       int len, bool log)
+{
+	struct nlattr *a;
+
+	a = __add_action(sfa, attrtype, data, len, log);
+
+	return PTR_ERR_OR_ZERO(a);
+}
+
+static inline int add_nested_action_start(struct sw_flow_actions **sfa,
+					  int attrtype, bool log)
+{
+	int used = (*sfa)->actions_len;
+	int err;
+
+	err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
+	if (err)
+		return err;
+
+	return used;
+}
+
+static inline void add_nested_action_end(struct sw_flow_actions *sfa,
+					 int st_offset)
+{
+	struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
+							       st_offset);
+
+	a->nla_len = sfa->actions_len - st_offset;
+}
+
+static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
+				  const struct sw_flow_key *key,
+				  struct sw_flow_actions **sfa,
+				  __be16 eth_type, __be16 vlan_tci,
+				  u32 mpls_label_count, bool log,
+				  u32 depth);
+
+static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
+				    const struct sw_flow_key *key,
+				    struct sw_flow_actions **sfa,
+				    __be16 eth_type, __be16 vlan_tci,
+				    u32 mpls_label_count, bool log, bool last,
+				    u32 depth)
+{
+	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
+	const struct nlattr *probability, *actions;
+	const struct nlattr *a;
+	int rem, start, err;
+	struct sample_arg arg;
+
+	memset(attrs, 0, sizeof(attrs));
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+		if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
+			return -EINVAL;
+		attrs[type] = a;
+	}
+	if (rem)
+		return -EINVAL;
+
+	probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
+	if (!probability || nla_len(probability) != sizeof(u32))
+		return -EINVAL;
+
+	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
+	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+		return -EINVAL;
+
+	/* validation done, copy sample action. */
+	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
+	if (start < 0)
+		return start;
+
+	/* When both skb and flow may be changed, put the sample
+	 * into a deferred fifo. On the other hand, if only skb
+	 * may be modified, the actions can be executed in place.
+	 *
+	 * Do this analysis at the flow installation time.
+	 * Set 'clone_action->exec' to true if the actions can be
+	 * executed without being deferred.
+	 *
+	 * If the sample is the last action, it can always be excuted
+	 * rather than deferred.
+	 */
+	arg.exec = last || !actions_may_change_flow(actions);
+	arg.probability = nla_get_u32(probability);
+
+	err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
+				 log);
+	if (err)
+		return err;
+
+	err = __ovs_nla_copy_actions(net, actions, key, sfa,
+				     eth_type, vlan_tci, mpls_label_count, log,
+				     depth + 1);
+
+	if (err)
+		return err;
+
+	add_nested_action_end(*sfa, start);
+
+	return 0;
+}
+
+static int validate_and_copy_dec_ttl(struct net *net,
+				     const struct nlattr *attr,
+				     const struct sw_flow_key *key,
+				     struct sw_flow_actions **sfa,
+				     __be16 eth_type, __be16 vlan_tci,
+				     u32 mpls_label_count, bool log,
+				     u32 depth)
+{
+	const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1];
+	int start, action_start, err, rem;
+	const struct nlattr *a, *actions;
+
+	memset(attrs, 0, sizeof(attrs));
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+
+		/* Ignore unknown attributes to be future proof. */
+		if (type > OVS_DEC_TTL_ATTR_MAX)
+			continue;
+
+		if (!type || attrs[type]) {
+			OVS_NLERR(log, "Duplicate or invalid key (type %d).",
+				  type);
+			return -EINVAL;
+		}
+
+		attrs[type] = a;
+	}
+
+	if (rem) {
+		OVS_NLERR(log, "Message has %d unknown bytes.", rem);
+		return -EINVAL;
+	}
+
+	actions = attrs[OVS_DEC_TTL_ATTR_ACTION];
+	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) {
+		OVS_NLERR(log, "Missing valid actions attribute.");
+		return -EINVAL;
+	}
+
+	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
+	if (start < 0)
+		return start;
+
+	action_start = add_nested_action_start(sfa, OVS_DEC_TTL_ATTR_ACTION, log);
+	if (action_start < 0)
+		return action_start;
+
+	err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type,
+				     vlan_tci, mpls_label_count, log,
+				     depth + 1);
+	if (err)
+		return err;
+
+	add_nested_action_end(*sfa, action_start);
+	add_nested_action_end(*sfa, start);
+	return 0;
+}
+
+static int validate_and_copy_clone(struct net *net,
+				   const struct nlattr *attr,
+				   const struct sw_flow_key *key,
+				   struct sw_flow_actions **sfa,
+				   __be16 eth_type, __be16 vlan_tci,
+				   u32 mpls_label_count, bool log, bool last,
+				   u32 depth)
+{
+	int start, err;
+	u32 exec;
+
+	if (nla_len(attr) && nla_len(attr) < NLA_HDRLEN)
+		return -EINVAL;
+
+	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CLONE, log);
+	if (start < 0)
+		return start;
+
+	exec = last || !actions_may_change_flow(attr);
+
+	err = ovs_nla_add_action(sfa, OVS_CLONE_ATTR_EXEC, &exec,
+				 sizeof(exec), log);
+	if (err)
+		return err;
+
+	err = __ovs_nla_copy_actions(net, attr, key, sfa,
+				     eth_type, vlan_tci, mpls_label_count, log,
+				     depth + 1);
+	if (err)
+		return err;
+
+	add_nested_action_end(*sfa, start);
+
+	return 0;
+}
+
+void ovs_match_init(struct sw_flow_match *match,
+		    struct sw_flow_key *key,
+		    bool reset_key,
+		    struct sw_flow_mask *mask)
+{
+	memset(match, 0, sizeof(*match));
+	match->key = key;
+	match->mask = mask;
+
+	if (reset_key)
+		memset(key, 0, sizeof(*key));
+
+	if (mask) {
+		memset(&mask->key, 0, sizeof(mask->key));
+		mask->range.start = mask->range.end = 0;
+	}
+}
+
+static int validate_geneve_opts(struct sw_flow_key *key)
+{
+	struct geneve_opt *option;
+	int opts_len = key->tun_opts_len;
+	bool crit_opt = false;
+
+	option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
+	while (opts_len > 0) {
+		int len;
+
+		if (opts_len < sizeof(*option))
+			return -EINVAL;
+
+		len = sizeof(*option) + option->length * 4;
+		if (len > opts_len)
+			return -EINVAL;
+
+		crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
+
+		option = (struct geneve_opt *)((u8 *)option + len);
+		opts_len -= len;
+	}
+
+	if (crit_opt)
+		__set_bit(IP_TUNNEL_CRIT_OPT_BIT, key->tun_key.tun_flags);
+
+	return 0;
+}
+
+static int validate_and_copy_set_tun(const struct nlattr *attr,
+				     struct sw_flow_actions **sfa, bool log)
+{
+	IP_TUNNEL_DECLARE_FLAGS(dst_opt_type) = { };
+	struct sw_flow_match match;
+	struct sw_flow_key key;
+	struct metadata_dst *tun_dst;
+	struct ip_tunnel_info *tun_info;
+	struct ovs_tunnel_info *ovs_tun;
+	struct nlattr *a;
+	int err = 0, start, opts_type;
+
+	ovs_match_init(&match, &key, true, NULL);
+	opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
+	if (opts_type < 0)
+		return opts_type;
+
+	if (key.tun_opts_len) {
+		switch (opts_type) {
+		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+			err = validate_geneve_opts(&key);
+			if (err < 0)
+				return err;
+
+			__set_bit(IP_TUNNEL_GENEVE_OPT_BIT, dst_opt_type);
+			break;
+		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+			__set_bit(IP_TUNNEL_VXLAN_OPT_BIT, dst_opt_type);
+			break;
+		case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+			__set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, dst_opt_type);
+			break;
+		}
+	}
+
+	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
+	if (start < 0)
+		return start;
+
+	tun_dst = metadata_dst_alloc(key.tun_opts_len, METADATA_IP_TUNNEL,
+				     GFP_KERNEL);
+
+	if (!tun_dst)
+		return -ENOMEM;
+
+	err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
+	if (err) {
+		dst_release((struct dst_entry *)tun_dst);
+		return err;
+	}
+
+	a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
+			 sizeof(*ovs_tun), log);
+	if (IS_ERR(a)) {
+		dst_release((struct dst_entry *)tun_dst);
+		return PTR_ERR(a);
+	}
+
+	ovs_tun = nla_data(a);
+	ovs_tun->tun_dst = tun_dst;
+
+	tun_info = &tun_dst->u.tun_info;
+	tun_info->mode = IP_TUNNEL_INFO_TX;
+	if (key.tun_proto == AF_INET6)
+		tun_info->mode |= IP_TUNNEL_INFO_IPV6;
+	else if (key.tun_proto == AF_INET && key.tun_key.u.ipv4.dst == 0)
+		tun_info->mode |= IP_TUNNEL_INFO_BRIDGE;
+	tun_info->key = key.tun_key;
+
+	/* We need to store the options in the action itself since
+	 * everything else will go away after flow setup. We can append
+	 * it to tun_info and then point there.
+	 */
+	ip_tunnel_info_opts_set(tun_info,
+				TUN_METADATA_OPTS(&key, key.tun_opts_len),
+				key.tun_opts_len, dst_opt_type);
+	add_nested_action_end(*sfa, start);
+
+	return err;
+}
+
+static bool validate_push_nsh(const struct nlattr *attr, bool log)
+{
+	struct sw_flow_match match;
+	struct sw_flow_key key;
+
+	ovs_match_init(&match, &key, true, NULL);
+	return !nsh_key_put_from_nlattr(attr, &match, false, true, log);
+}
+
+/* Return false if there are any non-masked bits set.
+ * Mask follows data immediately, before any netlink padding.
+ */
+static bool validate_masked(u8 *data, int len)
+{
+	u8 *mask = data + len;
+
+	while (len--)
+		if (*data++ & ~*mask++)
+			return false;
+
+	return true;
+}
+
+static int validate_set(const struct nlattr *a,
+			const struct sw_flow_key *flow_key,
+			struct sw_flow_actions **sfa, bool *skip_copy,
+			u8 mac_proto, __be16 eth_type, bool masked, bool log)
+{
+	const struct nlattr *ovs_key = nla_data(a);
+	int key_type = nla_type(ovs_key);
+	size_t key_len;
+
+	/* There can be only one key in a action */
+	if (!nla_ok(ovs_key, nla_len(a)) ||
+	    nla_total_size(nla_len(ovs_key)) != nla_len(a))
+		return -EINVAL;
+
+	key_len = nla_len(ovs_key);
+	if (masked)
+		key_len /= 2;
+
+	if (key_type > OVS_KEY_ATTR_MAX ||
+	    !check_attr_len(key_len, ovs_key_lens[key_type].len))
+		return -EINVAL;
+
+	if (masked && !validate_masked(nla_data(ovs_key), key_len))
+		return -EINVAL;
+
+	switch (key_type) {
+	case OVS_KEY_ATTR_PRIORITY:
+	case OVS_KEY_ATTR_SKB_MARK:
+	case OVS_KEY_ATTR_CT_MARK:
+	case OVS_KEY_ATTR_CT_LABELS:
+		break;
+
+	case OVS_KEY_ATTR_ETHERNET:
+		if (mac_proto != MAC_PROTO_ETHERNET)
+			return -EINVAL;
+		break;
+
+	case OVS_KEY_ATTR_TUNNEL: {
+		int err;
+
+		if (masked)
+			return -EINVAL; /* Masked tunnel set not supported. */
+
+		*skip_copy = true;
+		err = validate_and_copy_set_tun(a, sfa, log);
+		if (err)
+			return err;
+		break;
+	}
+	case OVS_KEY_ATTR_IPV4: {
+		const struct ovs_key_ipv4 *ipv4_key;
+
+		if (eth_type != htons(ETH_P_IP))
+			return -EINVAL;
+
+		ipv4_key = nla_data(ovs_key);
+
+		if (masked) {
+			const struct ovs_key_ipv4 *mask = ipv4_key + 1;
+
+			/* Non-writeable fields. */
+			if (mask->ipv4_proto || mask->ipv4_frag)
+				return -EINVAL;
+		} else {
+			if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+				return -EINVAL;
+
+			if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+				return -EINVAL;
+		}
+		break;
+	}
+	case OVS_KEY_ATTR_IPV6: {
+		const struct ovs_key_ipv6 *ipv6_key;
+
+		if (eth_type != htons(ETH_P_IPV6))
+			return -EINVAL;
+
+		ipv6_key = nla_data(ovs_key);
+
+		if (masked) {
+			const struct ovs_key_ipv6 *mask = ipv6_key + 1;
+
+			/* Non-writeable fields. */
+			if (mask->ipv6_proto || mask->ipv6_frag)
+				return -EINVAL;
+
+			/* Invalid bits in the flow label mask? */
+			if (ntohl(mask->ipv6_label) & 0xFFF00000)
+				return -EINVAL;
+		} else {
+			if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+				return -EINVAL;
+
+			if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+				return -EINVAL;
+		}
+		if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
+			return -EINVAL;
+
+		break;
+	}
+	case OVS_KEY_ATTR_TCP:
+		if ((eth_type != htons(ETH_P_IP) &&
+		     eth_type != htons(ETH_P_IPV6)) ||
+		    flow_key->ip.proto != IPPROTO_TCP)
+			return -EINVAL;
+
+		break;
+
+	case OVS_KEY_ATTR_UDP:
+		if ((eth_type != htons(ETH_P_IP) &&
+		     eth_type != htons(ETH_P_IPV6)) ||
+		    flow_key->ip.proto != IPPROTO_UDP)
+			return -EINVAL;
+
+		break;
+
+	case OVS_KEY_ATTR_MPLS:
+		if (!eth_p_mpls(eth_type))
+			return -EINVAL;
+		break;
+
+	case OVS_KEY_ATTR_SCTP:
+		if ((eth_type != htons(ETH_P_IP) &&
+		     eth_type != htons(ETH_P_IPV6)) ||
+		    flow_key->ip.proto != IPPROTO_SCTP)
+			return -EINVAL;
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* Convert non-masked non-tunnel set actions to masked set actions. */
+	if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
+		int start, len = key_len * 2;
+		struct nlattr *at;
+
+		*skip_copy = true;
+
+		start = add_nested_action_start(sfa,
+						OVS_ACTION_ATTR_SET_TO_MASKED,
+						log);
+		if (start < 0)
+			return start;
+
+		at = __add_action(sfa, key_type, NULL, len, log);
+		if (IS_ERR(at))
+			return PTR_ERR(at);
+
+		memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
+		memset(nla_data(at) + key_len, 0xff, key_len);    /* Mask. */
+		/* Clear non-writeable bits from otherwise writeable fields. */
+		if (key_type == OVS_KEY_ATTR_IPV6) {
+			struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
+
+			mask->ipv6_label &= htonl(0x000FFFFF);
+		}
+		add_nested_action_end(*sfa, start);
+	}
+
+	return 0;
+}
+
+static int validate_userspace(const struct nlattr *attr)
+{
+	static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
+		[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
+		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
+		[OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
+	};
+	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
+	int error;
+
+	error = nla_parse_deprecated_strict(a, OVS_USERSPACE_ATTR_MAX,
+					    nla_data(attr), nla_len(attr),
+					    userspace_policy, NULL);
+	if (error)
+		return error;
+
+	if (!a[OVS_USERSPACE_ATTR_PID] ||
+	    !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
+		return -EINVAL;
+
+	return 0;
+}
+
+static const struct nla_policy cpl_policy[OVS_CHECK_PKT_LEN_ATTR_MAX + 1] = {
+	[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] = {.type = NLA_U16 },
+	[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER] = {.type = NLA_NESTED },
+	[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL] = {.type = NLA_NESTED },
+};
+
+static int validate_and_copy_check_pkt_len(struct net *net,
+					   const struct nlattr *attr,
+					   const struct sw_flow_key *key,
+					   struct sw_flow_actions **sfa,
+					   __be16 eth_type, __be16 vlan_tci,
+					   u32 mpls_label_count,
+					   bool log, bool last, u32 depth)
+{
+	const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
+	struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1];
+	struct check_pkt_len_arg arg;
+	int nested_acts_start;
+	int start, err;
+
+	err = nla_parse_deprecated_strict(a, OVS_CHECK_PKT_LEN_ATTR_MAX,
+					  nla_data(attr), nla_len(attr),
+					  cpl_policy, NULL);
+	if (err)
+		return err;
+
+	if (!a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] ||
+	    !nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]))
+		return -EINVAL;
+
+	acts_if_lesser_eq = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL];
+	acts_if_greater = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER];
+
+	/* Both the nested action should be present. */
+	if (!acts_if_greater || !acts_if_lesser_eq)
+		return -EINVAL;
+
+	/* validation done, copy the nested actions. */
+	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CHECK_PKT_LEN,
+					log);
+	if (start < 0)
+		return start;
+
+	arg.pkt_len = nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]);
+	arg.exec_for_lesser_equal =
+		last || !actions_may_change_flow(acts_if_lesser_eq);
+	arg.exec_for_greater =
+		last || !actions_may_change_flow(acts_if_greater);
+
+	err = ovs_nla_add_action(sfa, OVS_CHECK_PKT_LEN_ATTR_ARG, &arg,
+				 sizeof(arg), log);
+	if (err)
+		return err;
+
+	nested_acts_start = add_nested_action_start(sfa,
+		OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, log);
+	if (nested_acts_start < 0)
+		return nested_acts_start;
+
+	err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
+				     eth_type, vlan_tci, mpls_label_count, log,
+				     depth + 1);
+
+	if (err)
+		return err;
+
+	add_nested_action_end(*sfa, nested_acts_start);
+
+	nested_acts_start = add_nested_action_start(sfa,
+		OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, log);
+	if (nested_acts_start < 0)
+		return nested_acts_start;
+
+	err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
+				     eth_type, vlan_tci, mpls_label_count, log,
+				     depth + 1);
+
+	if (err)
+		return err;
+
+	add_nested_action_end(*sfa, nested_acts_start);
+	add_nested_action_end(*sfa, start);
+	return 0;
+}
+
+static int validate_psample(const struct nlattr *attr)
+{
+	static const struct nla_policy policy[OVS_PSAMPLE_ATTR_MAX + 1] = {
+		[OVS_PSAMPLE_ATTR_GROUP] = { .type = NLA_U32 },
+		[OVS_PSAMPLE_ATTR_COOKIE] = {
+			.type = NLA_BINARY,
+			.len = OVS_PSAMPLE_COOKIE_MAX_SIZE,
+		},
+	};
+	struct nlattr *a[OVS_PSAMPLE_ATTR_MAX + 1];
+	int err;
+
+	if (!IS_ENABLED(CONFIG_PSAMPLE))
+		return -EOPNOTSUPP;
+
+	err = nla_parse_nested(a, OVS_PSAMPLE_ATTR_MAX, attr, policy, NULL);
+	if (err)
+		return err;
+
+	return a[OVS_PSAMPLE_ATTR_GROUP] ? 0 : -EINVAL;
+}
+
+static int copy_action(const struct nlattr *from,
+		       struct sw_flow_actions **sfa, bool log)
+{
+	int totlen = NLA_ALIGN(from->nla_len);
+	struct nlattr *to;
+
+	to = reserve_sfa_size(sfa, from->nla_len, log);
+	if (IS_ERR(to))
+		return PTR_ERR(to);
+
+	memcpy(to, from, totlen);
+	return 0;
+}
+
+static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
+				  const struct sw_flow_key *key,
+				  struct sw_flow_actions **sfa,
+				  __be16 eth_type, __be16 vlan_tci,
+				  u32 mpls_label_count, bool log,
+				  u32 depth)
+{
+	u8 mac_proto = ovs_key_mac_proto(key);
+	const struct nlattr *a;
+	int rem, err;
+
+	if (depth > OVS_COPY_ACTIONS_MAX_DEPTH)
+		return -EOVERFLOW;
+
+	nla_for_each_nested(a, attr, rem) {
+		/* Expected argument lengths, (u32)-1 for variable length. */
+		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
+			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
+			[OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
+			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+			[OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
+			[OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
+			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
+			[OVS_ACTION_ATTR_POP_VLAN] = 0,
+			[OVS_ACTION_ATTR_SET] = (u32)-1,
+			[OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
+			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
+			[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
+			[OVS_ACTION_ATTR_CT] = (u32)-1,
+			[OVS_ACTION_ATTR_CT_CLEAR] = 0,
+			[OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
+			[OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
+			[OVS_ACTION_ATTR_POP_ETH] = 0,
+			[OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
+			[OVS_ACTION_ATTR_POP_NSH] = 0,
+			[OVS_ACTION_ATTR_METER] = sizeof(u32),
+			[OVS_ACTION_ATTR_CLONE] = (u32)-1,
+			[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
+			[OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
+			[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
+			[OVS_ACTION_ATTR_DROP] = sizeof(u32),
+			[OVS_ACTION_ATTR_PSAMPLE] = (u32)-1,
+		};
+		const struct ovs_action_push_vlan *vlan;
+		int type = nla_type(a);
+		bool skip_copy;
+
+		if (type > OVS_ACTION_ATTR_MAX ||
+		    (action_lens[type] != nla_len(a) &&
+		     action_lens[type] != (u32)-1))
+			return -EINVAL;
+
+		skip_copy = false;
+		switch (type) {
+		case OVS_ACTION_ATTR_UNSPEC:
+			return -EINVAL;
+
+		case OVS_ACTION_ATTR_USERSPACE:
+			err = validate_userspace(a);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_OUTPUT:
+			if (nla_get_u32(a) >= DP_MAX_PORTS)
+				return -EINVAL;
+			break;
+
+		case OVS_ACTION_ATTR_TRUNC: {
+			const struct ovs_action_trunc *trunc = nla_data(a);
+
+			if (trunc->max_len < ETH_HLEN)
+				return -EINVAL;
+			break;
+		}
+
+		case OVS_ACTION_ATTR_HASH: {
+			const struct ovs_action_hash *act_hash = nla_data(a);
+
+			switch (act_hash->hash_alg) {
+			case OVS_HASH_ALG_L4:
+				fallthrough;
+			case OVS_HASH_ALG_SYM_L4:
+				break;
+			default:
+				return  -EINVAL;
+			}
+
+			break;
+		}
+
+		case OVS_ACTION_ATTR_POP_VLAN:
+			if (mac_proto != MAC_PROTO_ETHERNET)
+				return -EINVAL;
+			vlan_tci = htons(0);
+			break;
+
+		case OVS_ACTION_ATTR_PUSH_VLAN:
+			if (mac_proto != MAC_PROTO_ETHERNET)
+				return -EINVAL;
+			vlan = nla_data(a);
+			if (!eth_type_vlan(vlan->vlan_tpid))
+				return -EINVAL;
+			if (!(vlan->vlan_tci & htons(VLAN_CFI_MASK)))
+				return -EINVAL;
+			vlan_tci = vlan->vlan_tci;
+			break;
+
+		case OVS_ACTION_ATTR_RECIRC:
+			break;
+
+		case OVS_ACTION_ATTR_ADD_MPLS: {
+			const struct ovs_action_add_mpls *mpls = nla_data(a);
+
+			if (!eth_p_mpls(mpls->mpls_ethertype))
+				return -EINVAL;
+
+			if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK) {
+				if (vlan_tci & htons(VLAN_CFI_MASK) ||
+				    (eth_type != htons(ETH_P_IP) &&
+				     eth_type != htons(ETH_P_IPV6) &&
+				     eth_type != htons(ETH_P_ARP) &&
+				     eth_type != htons(ETH_P_RARP) &&
+				     !eth_p_mpls(eth_type)))
+					return -EINVAL;
+				mpls_label_count++;
+			} else {
+				if (mac_proto == MAC_PROTO_ETHERNET) {
+					mpls_label_count = 1;
+					mac_proto = MAC_PROTO_NONE;
+				} else {
+					mpls_label_count++;
+				}
+			}
+			eth_type = mpls->mpls_ethertype;
+			break;
+		}
+
+		case OVS_ACTION_ATTR_PUSH_MPLS: {
+			const struct ovs_action_push_mpls *mpls = nla_data(a);
+
+			if (!eth_p_mpls(mpls->mpls_ethertype))
+				return -EINVAL;
+			/* Prohibit push MPLS other than to a white list
+			 * for packets that have a known tag order.
+			 */
+			if (vlan_tci & htons(VLAN_CFI_MASK) ||
+			    (eth_type != htons(ETH_P_IP) &&
+			     eth_type != htons(ETH_P_IPV6) &&
+			     eth_type != htons(ETH_P_ARP) &&
+			     eth_type != htons(ETH_P_RARP) &&
+			     !eth_p_mpls(eth_type)))
+				return -EINVAL;
+			eth_type = mpls->mpls_ethertype;
+			mpls_label_count++;
+			break;
+		}
+
+		case OVS_ACTION_ATTR_POP_MPLS: {
+			__be16  proto;
+			if (vlan_tci & htons(VLAN_CFI_MASK) ||
+			    !eth_p_mpls(eth_type))
+				return -EINVAL;
+
+			/* Disallow subsequent L2.5+ set actions and mpls_pop
+			 * actions once the last MPLS label in the packet is
+			 * popped as there is no check here to ensure that
+			 * the new eth type is valid and thus set actions could
+			 * write off the end of the packet or otherwise corrupt
+			 * it.
+			 *
+			 * Support for these actions is planned using packet
+			 * recirculation.
+			 */
+			proto = nla_get_be16(a);
+
+			if (proto == htons(ETH_P_TEB) &&
+			    mac_proto != MAC_PROTO_NONE)
+				return -EINVAL;
+
+			mpls_label_count--;
+
+			if (!eth_p_mpls(proto) || !mpls_label_count)
+				eth_type = htons(0);
+			else
+				eth_type =  proto;
+
+			break;
+		}
+
+		case OVS_ACTION_ATTR_SET:
+			err = validate_set(a, key, sfa,
+					   &skip_copy, mac_proto, eth_type,
+					   false, log);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_SET_MASKED:
+			err = validate_set(a, key, sfa,
+					   &skip_copy, mac_proto, eth_type,
+					   true, log);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_SAMPLE: {
+			bool last = nla_is_last(a, rem);
+
+			err = validate_and_copy_sample(net, a, key, sfa,
+						       eth_type, vlan_tci,
+						       mpls_label_count,
+						       log, last, depth);
+			if (err)
+				return err;
+			skip_copy = true;
+			break;
+		}
+
+		case OVS_ACTION_ATTR_CT:
+			err = ovs_ct_copy_action(net, a, key, sfa, log);
+			if (err)
+				return err;
+			skip_copy = true;
+			break;
+
+		case OVS_ACTION_ATTR_CT_CLEAR:
+			break;
+
+		case OVS_ACTION_ATTR_PUSH_ETH:
+			/* Disallow pushing an Ethernet header if one
+			 * is already present */
+			if (mac_proto != MAC_PROTO_NONE)
+				return -EINVAL;
+			mac_proto = MAC_PROTO_ETHERNET;
+			break;
+
+		case OVS_ACTION_ATTR_POP_ETH:
+			if (mac_proto != MAC_PROTO_ETHERNET)
+				return -EINVAL;
+			if (vlan_tci & htons(VLAN_CFI_MASK))
+				return -EINVAL;
+			mac_proto = MAC_PROTO_NONE;
+			break;
+
+		case OVS_ACTION_ATTR_PUSH_NSH:
+			if (mac_proto != MAC_PROTO_ETHERNET) {
+				u8 next_proto;
+
+				next_proto = tun_p_from_eth_p(eth_type);
+				if (!next_proto)
+					return -EINVAL;
+			}
+			mac_proto = MAC_PROTO_NONE;
+			if (!validate_push_nsh(nla_data(a), log))
+				return -EINVAL;
+			break;
+
+		case OVS_ACTION_ATTR_POP_NSH: {
+			__be16 inner_proto;
+
+			if (eth_type != htons(ETH_P_NSH))
+				return -EINVAL;
+			inner_proto = tun_p_to_eth_p(key->nsh.base.np);
+			if (!inner_proto)
+				return -EINVAL;
+			if (key->nsh.base.np == TUN_P_ETHERNET)
+				mac_proto = MAC_PROTO_ETHERNET;
+			else
+				mac_proto = MAC_PROTO_NONE;
+			break;
+		}
+
+		case OVS_ACTION_ATTR_METER:
+			/* Non-existent meters are simply ignored.  */
+			break;
+
+		case OVS_ACTION_ATTR_CLONE: {
+			bool last = nla_is_last(a, rem);
+
+			err = validate_and_copy_clone(net, a, key, sfa,
+						      eth_type, vlan_tci,
+						      mpls_label_count,
+						      log, last, depth);
+			if (err)
+				return err;
+			skip_copy = true;
+			break;
+		}
+
+		case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
+			bool last = nla_is_last(a, rem);
+
+			err = validate_and_copy_check_pkt_len(net, a, key, sfa,
+							      eth_type,
+							      vlan_tci,
+							      mpls_label_count,
+							      log, last,
+							      depth);
+			if (err)
+				return err;
+			skip_copy = true;
+			break;
+		}
+
+		case OVS_ACTION_ATTR_DEC_TTL:
+			err = validate_and_copy_dec_ttl(net, a, key, sfa,
+							eth_type, vlan_tci,
+							mpls_label_count, log,
+							depth);
+			if (err)
+				return err;
+			skip_copy = true;
+			break;
+
+		case OVS_ACTION_ATTR_DROP:
+			if (!nla_is_last(a, rem))
+				return -EINVAL;
+			break;
+
+		case OVS_ACTION_ATTR_PSAMPLE:
+			err = validate_psample(a);
+			if (err)
+				return err;
+			break;
+
+		default:
+			OVS_NLERR(log, "Unknown Action type %d", type);
+			return -EINVAL;
+		}
+		if (!skip_copy) {
+			err = copy_action(a, sfa, log);
+			if (err)
+				return err;
+		}
+	}
+
+	if (rem > 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+/* 'key' must be the masked key. */
+int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
+			 const struct sw_flow_key *key,
+			 struct sw_flow_actions **sfa, bool log)
+{
+	int err;
+	u32 mpls_label_count = 0;
+
+	*sfa = nla_alloc_flow_actions(nla_len(attr));
+	if (IS_ERR(*sfa))
+		return PTR_ERR(*sfa);
+
+	if (eth_p_mpls(key->eth.type))
+		mpls_label_count = hweight_long(key->mpls.num_labels_mask);
+
+	(*sfa)->orig_len = nla_len(attr);
+	err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
+				     key->eth.vlan.tci, mpls_label_count, log,
+				     0);
+	if (err)
+		ovs_nla_free_flow_actions(*sfa);
+
+	return err;
+}
+
+static int sample_action_to_attr(const struct nlattr *attr,
+				 struct sk_buff *skb)
+{
+	struct nlattr *start, *ac_start = NULL, *sample_arg;
+	int err = 0, rem = nla_len(attr);
+	const struct sample_arg *arg;
+	struct nlattr *actions;
+
+	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SAMPLE);
+	if (!start)
+		return -EMSGSIZE;
+
+	sample_arg = nla_data(attr);
+	arg = nla_data(sample_arg);
+	actions = nla_next(sample_arg, &rem);
+
+	if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
+	if (!ac_start) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	err = ovs_nla_put_actions(actions, rem, skb);
+
+out:
+	if (err) {
+		nla_nest_cancel(skb, ac_start);
+		nla_nest_cancel(skb, start);
+	} else {
+		nla_nest_end(skb, ac_start);
+		nla_nest_end(skb, start);
+	}
+
+	return err;
+}
+
+static int clone_action_to_attr(const struct nlattr *attr,
+				struct sk_buff *skb)
+{
+	struct nlattr *start;
+	int err = 0, rem = nla_len(attr);
+
+	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CLONE);
+	if (!start)
+		return -EMSGSIZE;
+
+	/* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */
+	attr = nla_next(nla_data(attr), &rem);
+	err = ovs_nla_put_actions(attr, rem, skb);
+
+	if (err)
+		nla_nest_cancel(skb, start);
+	else
+		nla_nest_end(skb, start);
+
+	return err;
+}
+
+static int check_pkt_len_action_to_attr(const struct nlattr *attr,
+					struct sk_buff *skb)
+{
+	struct nlattr *start, *ac_start = NULL;
+	const struct check_pkt_len_arg *arg;
+	const struct nlattr *a, *cpl_arg;
+	int err = 0, rem = nla_len(attr);
+
+	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CHECK_PKT_LEN);
+	if (!start)
+		return -EMSGSIZE;
+
+	/* The first nested attribute in 'attr' is always
+	 * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
+	 */
+	cpl_arg = nla_data(attr);
+	arg = nla_data(cpl_arg);
+
+	if (nla_put_u16(skb, OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, arg->pkt_len)) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	/* Second nested attribute in 'attr' is always
+	 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
+	 */
+	a = nla_next(cpl_arg, &rem);
+	ac_start =  nla_nest_start_noflag(skb,
+					  OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL);
+	if (!ac_start) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
+	if (err) {
+		nla_nest_cancel(skb, ac_start);
+		goto out;
+	} else {
+		nla_nest_end(skb, ac_start);
+	}
+
+	/* Third nested attribute in 'attr' is always
+	 * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER.
+	 */
+	a = nla_next(a, &rem);
+	ac_start =  nla_nest_start_noflag(skb,
+					  OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER);
+	if (!ac_start) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
+	if (err) {
+		nla_nest_cancel(skb, ac_start);
+		goto out;
+	} else {
+		nla_nest_end(skb, ac_start);
+	}
+
+	nla_nest_end(skb, start);
+	return 0;
+
+out:
+	nla_nest_cancel(skb, start);
+	return err;
+}
+
+static int dec_ttl_action_to_attr(const struct nlattr *attr,
+				  struct sk_buff *skb)
+{
+	struct nlattr *start, *action_start;
+	const struct nlattr *a;
+	int err = 0, rem;
+
+	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
+	if (!start)
+		return -EMSGSIZE;
+
+	nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
+		switch (nla_type(a)) {
+		case OVS_DEC_TTL_ATTR_ACTION:
+
+			action_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
+			if (!action_start) {
+				err = -EMSGSIZE;
+				goto out;
+			}
+
+			err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
+			if (err)
+				goto out;
+
+			nla_nest_end(skb, action_start);
+			break;
+
+		default:
+			/* Ignore all other option to be future compatible */
+			break;
+		}
+	}
+
+	nla_nest_end(skb, start);
+	return 0;
+
+out:
+	nla_nest_cancel(skb, start);
+	return err;
+}
+
+static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
+{
+	const struct nlattr *ovs_key = nla_data(a);
+	int key_type = nla_type(ovs_key);
+	struct nlattr *start;
+	int err;
+
+	switch (key_type) {
+	case OVS_KEY_ATTR_TUNNEL_INFO: {
+		struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key);
+		struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info;
+
+		start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET);
+		if (!start)
+			return -EMSGSIZE;
+
+		err =  ip_tun_to_nlattr(skb, &tun_info->key,
+					ip_tunnel_info_opts(tun_info),
+					tun_info->options_len,
+					ip_tunnel_info_af(tun_info), tun_info->mode);
+		if (err)
+			return err;
+		nla_nest_end(skb, start);
+		break;
+	}
+	default:
+		if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
+			return -EMSGSIZE;
+		break;
+	}
+
+	return 0;
+}
+
+static int masked_set_action_to_set_action_attr(const struct nlattr *a,
+						struct sk_buff *skb)
+{
+	const struct nlattr *ovs_key = nla_data(a);
+	struct nlattr *nla;
+	size_t key_len = nla_len(ovs_key) / 2;
+
+	/* Revert the conversion we did from a non-masked set action to
+	 * masked set action.
+	 */
+	nla = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET);
+	if (!nla)
+		return -EMSGSIZE;
+
+	if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
+		return -EMSGSIZE;
+
+	nla_nest_end(skb, nla);
+	return 0;
+}
+
+int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
+{
+	const struct nlattr *a;
+	int rem, err;
+
+	nla_for_each_attr(a, attr, len, rem) {
+		int type = nla_type(a);
+
+		switch (type) {
+		case OVS_ACTION_ATTR_SET:
+			err = set_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_SET_TO_MASKED:
+			err = masked_set_action_to_set_action_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_SAMPLE:
+			err = sample_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_CT:
+			err = ovs_ct_action_to_attr(nla_data(a), skb);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_CLONE:
+			err = clone_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_CHECK_PKT_LEN:
+			err = check_pkt_len_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_DEC_TTL:
+			err = dec_ttl_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
+		default:
+			if (nla_put(skb, type, nla_len(a), nla_data(a)))
+				return -EMSGSIZE;
+			break;
+		}
+	}
+
+	return 0;
+}
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
new file mode 100644
index 000000000000..ff8cdecbe346
--- /dev/null
+++ b/net/openvswitch/flow_netlink.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ */
+
+
+#ifndef FLOW_NETLINK_H
+#define FLOW_NETLINK_H 1
+
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+
+#include <net/inet_ecn.h>
+#include <net/ip_tunnels.h>
+
+#include "flow.h"
+
+size_t ovs_tun_key_attr_size(void);
+size_t ovs_key_attr_size(void);
+
+void ovs_match_init(struct sw_flow_match *match,
+		    struct sw_flow_key *key, bool reset_key,
+		    struct sw_flow_mask *mask);
+
+int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
+		    int attr, bool is_mask, struct sk_buff *);
+int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
+		       u64 *attrsp, bool log);
+int ovs_nla_get_flow_metadata(struct net *net,
+			      const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
+			      u64 attrs, struct sw_flow_key *key, bool log);
+
+int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb);
+int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb);
+int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
+
+int ovs_nla_get_match(struct net *, struct sw_flow_match *,
+		      const struct nlattr *key, const struct nlattr *mask,
+		      bool log);
+
+int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+			    struct ip_tunnel_info *tun_info);
+
+bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
+int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
+			   const struct sw_flow_key *key, bool log);
+u32 ovs_nla_get_ufid_flags(const struct nlattr *attr);
+
+int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
+			 const struct sw_flow_key *key,
+			 struct sw_flow_actions **sfa, bool log);
+int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype,
+		       void *data, int len, bool log);
+int ovs_nla_put_actions(const struct nlattr *attr,
+			int len, struct sk_buff *skb);
+
+void ovs_nla_free_flow_actions(struct sw_flow_actions *);
+void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *);
+
+int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh,
+			size_t size);
+
+#endif /* flow_netlink.h */
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
new file mode 100644
index 000000000000..ffc72a741a50
--- /dev/null
+++ b/net/openvswitch/flow_table.c
@@ -0,0 +1,1219 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2007-2014 Nicira, Inc.
+ */
+
+#include "flow.h"
+#include "datapath.h"
+#include "flow_netlink.h"
+#include <linux/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/llc_pdu.h>
+#include <linux/kernel.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/llc.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/rcupdate.h>
+#include <linux/cpumask.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/sctp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/rculist.h>
+#include <linux/sort.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+
+#define TBL_MIN_BUCKETS		1024
+#define MASK_ARRAY_SIZE_MIN	16
+#define REHASH_INTERVAL		(10 * 60 * HZ)
+
+#define MC_DEFAULT_HASH_ENTRIES	256
+#define MC_HASH_SHIFT		8
+#define MC_HASH_SEGS		((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
+
+static struct kmem_cache *flow_cache;
+struct kmem_cache *flow_stats_cache __read_mostly;
+
+static u16 range_n_bytes(const struct sw_flow_key_range *range)
+{
+	return range->end - range->start;
+}
+
+void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
+		       bool full, const struct sw_flow_mask *mask)
+{
+	int start = full ? 0 : mask->range.start;
+	int len = full ? sizeof *dst : range_n_bytes(&mask->range);
+	const long *m = (const long *)((const u8 *)&mask->key + start);
+	const long *s = (const long *)((const u8 *)src + start);
+	long *d = (long *)((u8 *)dst + start);
+	int i;
+
+	/* If 'full' is true then all of 'dst' is fully initialized. Otherwise,
+	 * if 'full' is false the memory outside of the 'mask->range' is left
+	 * uninitialized. This can be used as an optimization when further
+	 * operations on 'dst' only use contents within 'mask->range'.
+	 */
+	for (i = 0; i < len; i += sizeof(long))
+		*d++ = *s++ & *m++;
+}
+
+struct sw_flow *ovs_flow_alloc(void)
+{
+	struct sw_flow *flow;
+	struct sw_flow_stats *stats;
+
+	flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);
+	if (!flow)
+		return ERR_PTR(-ENOMEM);
+
+	flow->stats_last_writer = -1;
+	flow->cpu_used_mask = (struct cpumask *)&flow->stats[nr_cpu_ids];
+
+	/* Initialize the default stat node. */
+	stats = kmem_cache_alloc_node(flow_stats_cache,
+				      GFP_KERNEL | __GFP_ZERO,
+				      node_online(0) ? 0 : NUMA_NO_NODE);
+	if (!stats)
+		goto err;
+
+	spin_lock_init(&stats->lock);
+
+	RCU_INIT_POINTER(flow->stats[0], stats);
+
+	cpumask_set_cpu(0, flow->cpu_used_mask);
+
+	return flow;
+err:
+	kmem_cache_free(flow_cache, flow);
+	return ERR_PTR(-ENOMEM);
+}
+
+int ovs_flow_tbl_count(const struct flow_table *table)
+{
+	return table->count;
+}
+
+static void flow_free(struct sw_flow *flow)
+{
+	unsigned int cpu;
+
+	if (ovs_identifier_is_key(&flow->id))
+		kfree(flow->id.unmasked_key);
+	if (flow->sf_acts)
+		ovs_nla_free_flow_actions((struct sw_flow_actions __force *)
+					  flow->sf_acts);
+
+	for_each_cpu(cpu, flow->cpu_used_mask) {
+		if (flow->stats[cpu])
+			kmem_cache_free(flow_stats_cache,
+					(struct sw_flow_stats __force *)flow->stats[cpu]);
+	}
+
+	kmem_cache_free(flow_cache, flow);
+}
+
+static void rcu_free_flow_callback(struct rcu_head *rcu)
+{
+	struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
+
+	flow_free(flow);
+}
+
+void ovs_flow_free(struct sw_flow *flow, bool deferred)
+{
+	if (!flow)
+		return;
+
+	if (deferred)
+		call_rcu(&flow->rcu, rcu_free_flow_callback);
+	else
+		flow_free(flow);
+}
+
+static void __table_instance_destroy(struct table_instance *ti)
+{
+	kvfree(ti->buckets);
+	kfree(ti);
+}
+
+static struct table_instance *table_instance_alloc(int new_size)
+{
+	struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+	int i;
+
+	if (!ti)
+		return NULL;
+
+	ti->buckets = kvmalloc_array(new_size, sizeof(struct hlist_head),
+				     GFP_KERNEL);
+	if (!ti->buckets) {
+		kfree(ti);
+		return NULL;
+	}
+
+	for (i = 0; i < new_size; i++)
+		INIT_HLIST_HEAD(&ti->buckets[i]);
+
+	ti->n_buckets = new_size;
+	ti->node_ver = 0;
+	get_random_bytes(&ti->hash_seed, sizeof(u32));
+
+	return ti;
+}
+
+static void __mask_array_destroy(struct mask_array *ma)
+{
+	free_percpu(ma->masks_usage_stats);
+	kfree(ma);
+}
+
+static void mask_array_rcu_cb(struct rcu_head *rcu)
+{
+	struct mask_array *ma = container_of(rcu, struct mask_array, rcu);
+
+	__mask_array_destroy(ma);
+}
+
+static void tbl_mask_array_reset_counters(struct mask_array *ma)
+{
+	int i, cpu;
+
+	/* As the per CPU counters are not atomic we can not go ahead and
+	 * reset them from another CPU. To be able to still have an approximate
+	 * zero based counter we store the value at reset, and subtract it
+	 * later when processing.
+	 */
+	for (i = 0; i < ma->max; i++) {
+		ma->masks_usage_zero_cntr[i] = 0;
+
+		for_each_possible_cpu(cpu) {
+			struct mask_array_stats *stats;
+			unsigned int start;
+			u64 counter;
+
+			stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
+			do {
+				start = u64_stats_fetch_begin(&stats->syncp);
+				counter = stats->usage_cntrs[i];
+			} while (u64_stats_fetch_retry(&stats->syncp, start));
+
+			ma->masks_usage_zero_cntr[i] += counter;
+		}
+	}
+}
+
+static struct mask_array *tbl_mask_array_alloc(int size)
+{
+	struct mask_array *new;
+
+	size = max(MASK_ARRAY_SIZE_MIN, size);
+	new = kzalloc(struct_size(new, masks, size) +
+		      sizeof(u64) * size, GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+	new->masks_usage_zero_cntr = (u64 *)((u8 *)new +
+					     struct_size(new, masks, size));
+
+	new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) +
+						sizeof(u64) * size,
+						__alignof__(u64));
+	if (!new->masks_usage_stats) {
+		kfree(new);
+		return NULL;
+	}
+
+	new->count = 0;
+	new->max = size;
+
+	return new;
+}
+
+static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
+{
+	struct mask_array *old;
+	struct mask_array *new;
+
+	new = tbl_mask_array_alloc(size);
+	if (!new)
+		return -ENOMEM;
+
+	old = ovsl_dereference(tbl->mask_array);
+	if (old) {
+		int i;
+
+		for (i = 0; i < old->max; i++) {
+			if (ovsl_dereference(old->masks[i]))
+				new->masks[new->count++] = old->masks[i];
+		}
+		call_rcu(&old->rcu, mask_array_rcu_cb);
+	}
+
+	rcu_assign_pointer(tbl->mask_array, new);
+
+	return 0;
+}
+
+static int tbl_mask_array_add_mask(struct flow_table *tbl,
+				   struct sw_flow_mask *new)
+{
+	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+	int err, ma_count = READ_ONCE(ma->count);
+
+	if (ma_count >= ma->max) {
+		err = tbl_mask_array_realloc(tbl, ma->max +
+						  MASK_ARRAY_SIZE_MIN);
+		if (err)
+			return err;
+
+		ma = ovsl_dereference(tbl->mask_array);
+	} else {
+		/* On every add or delete we need to reset the counters so
+		 * every new mask gets a fair chance of being prioritized.
+		 */
+		tbl_mask_array_reset_counters(ma);
+	}
+
+	BUG_ON(ovsl_dereference(ma->masks[ma_count]));
+
+	rcu_assign_pointer(ma->masks[ma_count], new);
+	WRITE_ONCE(ma->count, ma_count + 1);
+
+	return 0;
+}
+
+static void tbl_mask_array_del_mask(struct flow_table *tbl,
+				    struct sw_flow_mask *mask)
+{
+	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+	int i, ma_count = READ_ONCE(ma->count);
+
+	/* Remove the deleted mask pointers from the array */
+	for (i = 0; i < ma_count; i++) {
+		if (mask == ovsl_dereference(ma->masks[i]))
+			goto found;
+	}
+
+	BUG();
+	return;
+
+found:
+	WRITE_ONCE(ma->count, ma_count - 1);
+
+	rcu_assign_pointer(ma->masks[i], ma->masks[ma_count - 1]);
+	RCU_INIT_POINTER(ma->masks[ma_count - 1], NULL);
+
+	kfree_rcu(mask, rcu);
+
+	/* Shrink the mask array if necessary. */
+	if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
+	    ma_count <= (ma->max / 3))
+		tbl_mask_array_realloc(tbl, ma->max / 2);
+	else
+		tbl_mask_array_reset_counters(ma);
+
+}
+
+/* Remove 'mask' from the mask list, if it is not needed any more. */
+static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+	if (mask) {
+		/* ovs-lock is required to protect mask-refcount and
+		 * mask list.
+		 */
+		ASSERT_OVSL();
+		BUG_ON(!mask->ref_count);
+		mask->ref_count--;
+
+		if (!mask->ref_count)
+			tbl_mask_array_del_mask(tbl, mask);
+	}
+}
+
+static void __mask_cache_destroy(struct mask_cache *mc)
+{
+	free_percpu(mc->mask_cache);
+	kfree(mc);
+}
+
+static void mask_cache_rcu_cb(struct rcu_head *rcu)
+{
+	struct mask_cache *mc = container_of(rcu, struct mask_cache, rcu);
+
+	__mask_cache_destroy(mc);
+}
+
+static struct mask_cache *tbl_mask_cache_alloc(u32 size)
+{
+	struct mask_cache_entry __percpu *cache = NULL;
+	struct mask_cache *new;
+
+	/* Only allow size to be 0, or a power of 2, and does not exceed
+	 * percpu allocation size.
+	 */
+	if ((!is_power_of_2(size) && size != 0) ||
+	    (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE)
+		return NULL;
+
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+	new->cache_size = size;
+	if (new->cache_size > 0) {
+		cache = __alloc_percpu(array_size(sizeof(struct mask_cache_entry),
+						  new->cache_size),
+				       __alignof__(struct mask_cache_entry));
+		if (!cache) {
+			kfree(new);
+			return NULL;
+		}
+	}
+
+	new->mask_cache = cache;
+	return new;
+}
+int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
+{
+	struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache);
+	struct mask_cache *new;
+
+	if (size == mc->cache_size)
+		return 0;
+
+	if ((!is_power_of_2(size) && size != 0) ||
+	    (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE)
+		return -EINVAL;
+
+	new = tbl_mask_cache_alloc(size);
+	if (!new)
+		return -ENOMEM;
+
+	rcu_assign_pointer(table->mask_cache, new);
+	call_rcu(&mc->rcu, mask_cache_rcu_cb);
+
+	return 0;
+}
+
+int ovs_flow_tbl_init(struct flow_table *table)
+{
+	struct table_instance *ti, *ufid_ti;
+	struct mask_cache *mc;
+	struct mask_array *ma;
+
+	mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
+	if (!mc)
+		return -ENOMEM;
+
+	ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
+	if (!ma)
+		goto free_mask_cache;
+
+	ti = table_instance_alloc(TBL_MIN_BUCKETS);
+	if (!ti)
+		goto free_mask_array;
+
+	ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+	if (!ufid_ti)
+		goto free_ti;
+
+	rcu_assign_pointer(table->ti, ti);
+	rcu_assign_pointer(table->ufid_ti, ufid_ti);
+	rcu_assign_pointer(table->mask_array, ma);
+	rcu_assign_pointer(table->mask_cache, mc);
+	table->last_rehash = jiffies;
+	table->count = 0;
+	table->ufid_count = 0;
+	return 0;
+
+free_ti:
+	__table_instance_destroy(ti);
+free_mask_array:
+	__mask_array_destroy(ma);
+free_mask_cache:
+	__mask_cache_destroy(mc);
+	return -ENOMEM;
+}
+
+static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
+{
+	struct table_instance *ti;
+
+	ti = container_of(rcu, struct table_instance, rcu);
+	__table_instance_destroy(ti);
+}
+
+static void table_instance_flow_free(struct flow_table *table,
+				     struct table_instance *ti,
+				     struct table_instance *ufid_ti,
+				     struct sw_flow *flow)
+{
+	hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
+	table->count--;
+
+	if (ovs_identifier_is_ufid(&flow->id)) {
+		hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
+		table->ufid_count--;
+	}
+
+	flow_mask_remove(table, flow->mask);
+}
+
+/* Must be called with OVS mutex held. */
+void table_instance_flow_flush(struct flow_table *table,
+			       struct table_instance *ti,
+			       struct table_instance *ufid_ti)
+{
+	int i;
+
+	for (i = 0; i < ti->n_buckets; i++) {
+		struct hlist_head *head = &ti->buckets[i];
+		struct hlist_node *n;
+		struct sw_flow *flow;
+
+		hlist_for_each_entry_safe(flow, n, head,
+					  flow_table.node[ti->node_ver]) {
+
+			table_instance_flow_free(table, ti, ufid_ti,
+						 flow);
+			ovs_flow_free(flow, true);
+		}
+	}
+
+	if (WARN_ON(table->count != 0 ||
+		    table->ufid_count != 0)) {
+		table->count = 0;
+		table->ufid_count = 0;
+	}
+}
+
+static void table_instance_destroy(struct table_instance *ti,
+				   struct table_instance *ufid_ti)
+{
+	call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
+	call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
+}
+
+/* No need for locking this function is called from RCU callback or
+ * error path.
+ */
+void ovs_flow_tbl_destroy(struct flow_table *table)
+{
+	struct table_instance *ti = rcu_dereference_raw(table->ti);
+	struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
+	struct mask_cache *mc = rcu_dereference_raw(table->mask_cache);
+	struct mask_array *ma = rcu_dereference_raw(table->mask_array);
+
+	call_rcu(&mc->rcu, mask_cache_rcu_cb);
+	call_rcu(&ma->rcu, mask_array_rcu_cb);
+	table_instance_destroy(ti, ufid_ti);
+}
+
+struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
+				       u32 *bucket, u32 *last)
+{
+	struct sw_flow *flow;
+	struct hlist_head *head;
+	int ver;
+	int i;
+
+	ver = ti->node_ver;
+	while (*bucket < ti->n_buckets) {
+		i = 0;
+		head = &ti->buckets[*bucket];
+		hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) {
+			if (i < *last) {
+				i++;
+				continue;
+			}
+			*last = i + 1;
+			return flow;
+		}
+		(*bucket)++;
+		*last = 0;
+	}
+
+	return NULL;
+}
+
+static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
+{
+	hash = jhash_1word(hash, ti->hash_seed);
+	return &ti->buckets[hash & (ti->n_buckets - 1)];
+}
+
+static void table_instance_insert(struct table_instance *ti,
+				  struct sw_flow *flow)
+{
+	struct hlist_head *head;
+
+	head = find_bucket(ti, flow->flow_table.hash);
+	hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head);
+}
+
+static void ufid_table_instance_insert(struct table_instance *ti,
+				       struct sw_flow *flow)
+{
+	struct hlist_head *head;
+
+	head = find_bucket(ti, flow->ufid_table.hash);
+	hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head);
+}
+
+static void flow_table_copy_flows(struct table_instance *old,
+				  struct table_instance *new, bool ufid)
+{
+	int old_ver;
+	int i;
+
+	old_ver = old->node_ver;
+	new->node_ver = !old_ver;
+
+	/* Insert in new table. */
+	for (i = 0; i < old->n_buckets; i++) {
+		struct sw_flow *flow;
+		struct hlist_head *head = &old->buckets[i];
+
+		if (ufid)
+			hlist_for_each_entry_rcu(flow, head,
+						 ufid_table.node[old_ver],
+						 lockdep_ovsl_is_held())
+				ufid_table_instance_insert(new, flow);
+		else
+			hlist_for_each_entry_rcu(flow, head,
+						 flow_table.node[old_ver],
+						 lockdep_ovsl_is_held())
+				table_instance_insert(new, flow);
+	}
+}
+
+static struct table_instance *table_instance_rehash(struct table_instance *ti,
+						    int n_buckets, bool ufid)
+{
+	struct table_instance *new_ti;
+
+	new_ti = table_instance_alloc(n_buckets);
+	if (!new_ti)
+		return NULL;
+
+	flow_table_copy_flows(ti, new_ti, ufid);
+
+	return new_ti;
+}
+
+int ovs_flow_tbl_flush(struct flow_table *flow_table)
+{
+	struct table_instance *old_ti, *new_ti;
+	struct table_instance *old_ufid_ti, *new_ufid_ti;
+
+	new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+	if (!new_ti)
+		return -ENOMEM;
+	new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+	if (!new_ufid_ti)
+		goto err_free_ti;
+
+	old_ti = ovsl_dereference(flow_table->ti);
+	old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
+
+	rcu_assign_pointer(flow_table->ti, new_ti);
+	rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
+	flow_table->last_rehash = jiffies;
+
+	table_instance_flow_flush(flow_table, old_ti, old_ufid_ti);
+	table_instance_destroy(old_ti, old_ufid_ti);
+	return 0;
+
+err_free_ti:
+	__table_instance_destroy(new_ti);
+	return -ENOMEM;
+}
+
+static u32 flow_hash(const struct sw_flow_key *key,
+		     const struct sw_flow_key_range *range)
+{
+	const u32 *hash_key = (const u32 *)((const u8 *)key + range->start);
+
+	/* Make sure number of hash bytes are multiple of u32. */
+	int hash_u32s = range_n_bytes(range) >> 2;
+
+	return jhash2(hash_key, hash_u32s, 0);
+}
+
+static int flow_key_start(const struct sw_flow_key *key)
+{
+	if (key->tun_proto)
+		return 0;
+	else
+		return rounddown(offsetof(struct sw_flow_key, phy),
+				 sizeof(long));
+}
+
+static bool cmp_key(const struct sw_flow_key *key1,
+		    const struct sw_flow_key *key2,
+		    int key_start, int key_end)
+{
+	const long *cp1 = (const long *)((const u8 *)key1 + key_start);
+	const long *cp2 = (const long *)((const u8 *)key2 + key_start);
+	int i;
+
+	for (i = key_start; i < key_end; i += sizeof(long))
+		if (*cp1++ ^ *cp2++)
+			return false;
+
+	return true;
+}
+
+static bool flow_cmp_masked_key(const struct sw_flow *flow,
+				const struct sw_flow_key *key,
+				const struct sw_flow_key_range *range)
+{
+	return cmp_key(&flow->key, key, range->start, range->end);
+}
+
+static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+				      const struct sw_flow_match *match)
+{
+	struct sw_flow_key *key = match->key;
+	int key_start = flow_key_start(key);
+	int key_end = match->range.end;
+
+	BUG_ON(ovs_identifier_is_ufid(&flow->id));
+	return cmp_key(flow->id.unmasked_key, key, key_start, key_end);
+}
+
+static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
+					  const struct sw_flow_key *unmasked,
+					  const struct sw_flow_mask *mask,
+					  u32 *n_mask_hit)
+{
+	struct sw_flow *flow;
+	struct hlist_head *head;
+	u32 hash;
+	struct sw_flow_key masked_key;
+
+	ovs_flow_mask_key(&masked_key, unmasked, false, mask);
+	hash = flow_hash(&masked_key, &mask->range);
+	head = find_bucket(ti, hash);
+	(*n_mask_hit)++;
+
+	hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver],
+				 lockdep_ovsl_is_held()) {
+		if (flow->mask == mask && flow->flow_table.hash == hash &&
+		    flow_cmp_masked_key(flow, &masked_key, &mask->range))
+			return flow;
+	}
+	return NULL;
+}
+
+/* Flow lookup does full lookup on flow table. It starts with
+ * mask from index passed in *index.
+ * This function MUST be called with BH disabled due to the use
+ * of CPU specific variables.
+ */
+static struct sw_flow *flow_lookup(struct flow_table *tbl,
+				   struct table_instance *ti,
+				   struct mask_array *ma,
+				   const struct sw_flow_key *key,
+				   u32 *n_mask_hit,
+				   u32 *n_cache_hit,
+				   u32 *index)
+{
+	struct mask_array_stats *stats = this_cpu_ptr(ma->masks_usage_stats);
+	struct sw_flow *flow;
+	struct sw_flow_mask *mask;
+	int i;
+
+	if (likely(*index < ma->max)) {
+		mask = rcu_dereference_ovsl(ma->masks[*index]);
+		if (mask) {
+			flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
+			if (flow) {
+				u64_stats_update_begin(&stats->syncp);
+				stats->usage_cntrs[*index]++;
+				u64_stats_update_end(&stats->syncp);
+				(*n_cache_hit)++;
+				return flow;
+			}
+		}
+	}
+
+	for (i = 0; i < ma->max; i++)  {
+
+		if (i == *index)
+			continue;
+
+		mask = rcu_dereference_ovsl(ma->masks[i]);
+		if (unlikely(!mask))
+			break;
+
+		flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
+		if (flow) { /* Found */
+			*index = i;
+			u64_stats_update_begin(&stats->syncp);
+			stats->usage_cntrs[*index]++;
+			u64_stats_update_end(&stats->syncp);
+			return flow;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * mask_cache maps flow to probable mask. This cache is not tightly
+ * coupled cache, It means updates to  mask list can result in inconsistent
+ * cache entry in mask cache.
+ * This is per cpu cache and is divided in MC_HASH_SEGS segments.
+ * In case of a hash collision the entry is hashed in next segment.
+ * */
+struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
+					  const struct sw_flow_key *key,
+					  u32 skb_hash,
+					  u32 *n_mask_hit,
+					  u32 *n_cache_hit)
+{
+	struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
+	struct mask_array *ma = rcu_dereference(tbl->mask_array);
+	struct table_instance *ti = rcu_dereference(tbl->ti);
+	struct mask_cache_entry *entries, *ce;
+	struct sw_flow *flow;
+	u32 hash;
+	int seg;
+
+	*n_mask_hit = 0;
+	*n_cache_hit = 0;
+	if (unlikely(!skb_hash || mc->cache_size == 0)) {
+		u32 mask_index = 0;
+		u32 cache = 0;
+
+		return flow_lookup(tbl, ti, ma, key, n_mask_hit, &cache,
+				   &mask_index);
+	}
+
+	/* Pre and post recirulation flows usually have the same skb_hash
+	 * value. To avoid hash collisions, rehash the 'skb_hash' with
+	 * 'recirc_id'.  */
+	if (key->recirc_id)
+		skb_hash = jhash_1word(skb_hash, key->recirc_id);
+
+	ce = NULL;
+	hash = skb_hash;
+	entries = this_cpu_ptr(mc->mask_cache);
+
+	/* Find the cache entry 'ce' to operate on. */
+	for (seg = 0; seg < MC_HASH_SEGS; seg++) {
+		int index = hash & (mc->cache_size - 1);
+		struct mask_cache_entry *e;
+
+		e = &entries[index];
+		if (e->skb_hash == skb_hash) {
+			flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
+					   n_cache_hit, &e->mask_index);
+			if (!flow)
+				e->skb_hash = 0;
+			return flow;
+		}
+
+		if (!ce || e->skb_hash < ce->skb_hash)
+			ce = e;  /* A better replacement cache candidate. */
+
+		hash >>= MC_HASH_SHIFT;
+	}
+
+	/* Cache miss, do full lookup. */
+	flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, n_cache_hit,
+			   &ce->mask_index);
+	if (flow)
+		ce->skb_hash = skb_hash;
+
+	*n_cache_hit = 0;
+	return flow;
+}
+
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
+				    const struct sw_flow_key *key)
+{
+	struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
+	u32 __always_unused n_mask_hit;
+	u32 __always_unused n_cache_hit;
+	struct sw_flow *flow;
+	u32 index = 0;
+
+	/* This function gets called trough the netlink interface and therefore
+	 * is preemptible. However, flow_lookup() function needs to be called
+	 * with BH disabled due to CPU specific variables.
+	 */
+	local_bh_disable();
+	flow = flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index);
+	local_bh_enable();
+	return flow;
+}
+
+struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+					  const struct sw_flow_match *match)
+{
+	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+	int i;
+
+	/* Always called under ovs-mutex. */
+	for (i = 0; i < ma->max; i++) {
+		struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+		u32 __always_unused n_mask_hit;
+		struct sw_flow_mask *mask;
+		struct sw_flow *flow;
+
+		mask = ovsl_dereference(ma->masks[i]);
+		if (!mask)
+			continue;
+
+		flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
+		if (flow && ovs_identifier_is_key(&flow->id) &&
+		    ovs_flow_cmp_unmasked_key(flow, match)) {
+			return flow;
+		}
+	}
+
+	return NULL;
+}
+
+static u32 ufid_hash(const struct sw_flow_id *sfid)
+{
+	return jhash(sfid->ufid, sfid->ufid_len, 0);
+}
+
+static bool ovs_flow_cmp_ufid(const struct sw_flow *flow,
+			      const struct sw_flow_id *sfid)
+{
+	if (flow->id.ufid_len != sfid->ufid_len)
+		return false;
+
+	return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len);
+}
+
+bool ovs_flow_cmp(const struct sw_flow *flow,
+		  const struct sw_flow_match *match)
+{
+	if (ovs_identifier_is_ufid(&flow->id))
+		return flow_cmp_masked_key(flow, match->key, &match->range);
+
+	return ovs_flow_cmp_unmasked_key(flow, match);
+}
+
+struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
+					 const struct sw_flow_id *ufid)
+{
+	struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);
+	struct sw_flow *flow;
+	struct hlist_head *head;
+	u32 hash;
+
+	hash = ufid_hash(ufid);
+	head = find_bucket(ti, hash);
+	hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver],
+				 lockdep_ovsl_is_held()) {
+		if (flow->ufid_table.hash == hash &&
+		    ovs_flow_cmp_ufid(flow, ufid))
+			return flow;
+	}
+	return NULL;
+}
+
+int ovs_flow_tbl_num_masks(const struct flow_table *table)
+{
+	struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
+	return READ_ONCE(ma->count);
+}
+
+u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
+{
+	struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache);
+
+	return READ_ONCE(mc->cache_size);
+}
+
+static struct table_instance *table_instance_expand(struct table_instance *ti,
+						    bool ufid)
+{
+	return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
+}
+
+/* Must be called with OVS mutex held. */
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
+{
+	struct table_instance *ti = ovsl_dereference(table->ti);
+	struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
+
+	BUG_ON(table->count == 0);
+	table_instance_flow_free(table, ti, ufid_ti, flow);
+}
+
+static struct sw_flow_mask *mask_alloc(void)
+{
+	struct sw_flow_mask *mask;
+
+	mask = kmalloc(sizeof(*mask), GFP_KERNEL);
+	if (mask)
+		mask->ref_count = 1;
+
+	return mask;
+}
+
+static bool mask_equal(const struct sw_flow_mask *a,
+		       const struct sw_flow_mask *b)
+{
+	const u8 *a_ = (const u8 *)&a->key + a->range.start;
+	const u8 *b_ = (const u8 *)&b->key + b->range.start;
+
+	return  (a->range.end == b->range.end)
+		&& (a->range.start == b->range.start)
+		&& (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
+}
+
+static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
+					   const struct sw_flow_mask *mask)
+{
+	struct mask_array *ma;
+	int i;
+
+	ma = ovsl_dereference(tbl->mask_array);
+	for (i = 0; i < ma->max; i++) {
+		struct sw_flow_mask *t;
+		t = ovsl_dereference(ma->masks[i]);
+
+		if (t && mask_equal(mask, t))
+			return t;
+	}
+
+	return NULL;
+}
+
+/* Add 'mask' into the mask list, if it is not already there. */
+static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
+			    const struct sw_flow_mask *new)
+{
+	struct sw_flow_mask *mask;
+
+	mask = flow_mask_find(tbl, new);
+	if (!mask) {
+		/* Allocate a new mask if none exists. */
+		mask = mask_alloc();
+		if (!mask)
+			return -ENOMEM;
+		mask->key = new->key;
+		mask->range = new->range;
+
+		/* Add mask to mask-list. */
+		if (tbl_mask_array_add_mask(tbl, mask)) {
+			kfree(mask);
+			return -ENOMEM;
+		}
+	} else {
+		BUG_ON(!mask->ref_count);
+		mask->ref_count++;
+	}
+
+	flow->mask = mask;
+	return 0;
+}
+
+/* Must be called with OVS mutex held. */
+static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
+{
+	struct table_instance *new_ti = NULL;
+	struct table_instance *ti;
+
+	flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range);
+	ti = ovsl_dereference(table->ti);
+	table_instance_insert(ti, flow);
+	table->count++;
+
+	/* Expand table, if necessary, to make room. */
+	if (table->count > ti->n_buckets)
+		new_ti = table_instance_expand(ti, false);
+	else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
+		new_ti = table_instance_rehash(ti, ti->n_buckets, false);
+
+	if (new_ti) {
+		rcu_assign_pointer(table->ti, new_ti);
+		call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
+		table->last_rehash = jiffies;
+	}
+}
+
+/* Must be called with OVS mutex held. */
+static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
+{
+	struct table_instance *ti;
+
+	flow->ufid_table.hash = ufid_hash(&flow->id);
+	ti = ovsl_dereference(table->ufid_ti);
+	ufid_table_instance_insert(ti, flow);
+	table->ufid_count++;
+
+	/* Expand table, if necessary, to make room. */
+	if (table->ufid_count > ti->n_buckets) {
+		struct table_instance *new_ti;
+
+		new_ti = table_instance_expand(ti, true);
+		if (new_ti) {
+			rcu_assign_pointer(table->ufid_ti, new_ti);
+			call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
+		}
+	}
+}
+
+/* Must be called with OVS mutex held. */
+int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
+			const struct sw_flow_mask *mask)
+{
+	int err;
+
+	err = flow_mask_insert(table, flow, mask);
+	if (err)
+		return err;
+	flow_key_insert(table, flow);
+	if (ovs_identifier_is_ufid(&flow->id))
+		flow_ufid_insert(table, flow);
+
+	return 0;
+}
+
+static int compare_mask_and_count(const void *a, const void *b)
+{
+	const struct mask_count *mc_a = a;
+	const struct mask_count *mc_b = b;
+
+	return (s64)mc_b->counter - (s64)mc_a->counter;
+}
+
+/* Must be called with OVS mutex held. */
+void ovs_flow_masks_rebalance(struct flow_table *table)
+{
+	struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
+	struct mask_count *masks_and_count;
+	struct mask_array *new;
+	int masks_entries = 0;
+	int i;
+
+	/* Build array of all current entries with use counters. */
+	masks_and_count = kmalloc_array(ma->max, sizeof(*masks_and_count),
+					GFP_KERNEL);
+	if (!masks_and_count)
+		return;
+
+	for (i = 0; i < ma->max; i++) {
+		struct sw_flow_mask *mask;
+		int cpu;
+
+		mask = rcu_dereference_ovsl(ma->masks[i]);
+		if (unlikely(!mask))
+			break;
+
+		masks_and_count[i].index = i;
+		masks_and_count[i].counter = 0;
+
+		for_each_possible_cpu(cpu) {
+			struct mask_array_stats *stats;
+			unsigned int start;
+			u64 counter;
+
+			stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
+			do {
+				start = u64_stats_fetch_begin(&stats->syncp);
+				counter = stats->usage_cntrs[i];
+			} while (u64_stats_fetch_retry(&stats->syncp, start));
+
+			masks_and_count[i].counter += counter;
+		}
+
+		/* Subtract the zero count value. */
+		masks_and_count[i].counter -= ma->masks_usage_zero_cntr[i];
+
+		/* Rather than calling tbl_mask_array_reset_counters()
+		 * below when no change is needed, do it inline here.
+		 */
+		ma->masks_usage_zero_cntr[i] += masks_and_count[i].counter;
+	}
+
+	if (i == 0)
+		goto free_mask_entries;
+
+	/* Sort the entries */
+	masks_entries = i;
+	sort(masks_and_count, masks_entries, sizeof(*masks_and_count),
+	     compare_mask_and_count, NULL);
+
+	/* If the order is the same, nothing to do... */
+	for (i = 0; i < masks_entries; i++) {
+		if (i != masks_and_count[i].index)
+			break;
+	}
+	if (i == masks_entries)
+		goto free_mask_entries;
+
+	/* Rebuilt the new list in order of usage. */
+	new = tbl_mask_array_alloc(ma->max);
+	if (!new)
+		goto free_mask_entries;
+
+	for (i = 0; i < masks_entries; i++) {
+		int index = masks_and_count[i].index;
+
+		if (ovsl_dereference(ma->masks[index]))
+			new->masks[new->count++] = ma->masks[index];
+	}
+
+	rcu_assign_pointer(table->mask_array, new);
+	call_rcu(&ma->rcu, mask_array_rcu_cb);
+
+free_mask_entries:
+	kfree(masks_and_count);
+}
+
+/* Initializes the flow module.
+ * Returns zero if successful or a negative error code. */
+int ovs_flow_init(void)
+{
+	BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
+	BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
+
+	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+				       + (nr_cpu_ids
+					  * sizeof(struct sw_flow_stats *))
+				       + cpumask_size(),
+				       0, 0, NULL);
+	if (flow_cache == NULL)
+		return -ENOMEM;
+
+	flow_stats_cache
+		= kmem_cache_create("sw_flow_stats", sizeof(struct sw_flow_stats),
+				    0, SLAB_HWCACHE_ALIGN, NULL);
+	if (flow_stats_cache == NULL) {
+		kmem_cache_destroy(flow_cache);
+		flow_cache = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/* Uninitializes the flow module. */
+void ovs_flow_exit(void)
+{
+	kmem_cache_destroy(flow_stats_cache);
+	kmem_cache_destroy(flow_cache);
+}
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
new file mode 100644
index 000000000000..f524dc3e4862
--- /dev/null
+++ b/net/openvswitch/flow_table.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ */
+
+#ifndef FLOW_TABLE_H
+#define FLOW_TABLE_H 1
+
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+
+#include <net/inet_ecn.h>
+#include <net/ip_tunnels.h>
+
+#include "flow.h"
+
+struct mask_cache_entry {
+	u32 skb_hash;
+	u32 mask_index;
+};
+
+struct mask_cache {
+	struct rcu_head rcu;
+	u32 cache_size;  /* Must be ^2 value. */
+	struct mask_cache_entry __percpu *mask_cache;
+};
+
+struct mask_count {
+	int index;
+	u64 counter;
+};
+
+struct mask_array_stats {
+	struct u64_stats_sync syncp;
+	u64 usage_cntrs[];
+};
+
+struct mask_array {
+	struct rcu_head rcu;
+	int count, max;
+	struct mask_array_stats __percpu *masks_usage_stats;
+	u64 *masks_usage_zero_cntr;
+	struct sw_flow_mask __rcu *masks[] __counted_by(max);
+};
+
+struct table_instance {
+	struct hlist_head *buckets;
+	unsigned int n_buckets;
+	struct rcu_head rcu;
+	int node_ver;
+	u32 hash_seed;
+};
+
+struct flow_table {
+	struct table_instance __rcu *ti;
+	struct table_instance __rcu *ufid_ti;
+	struct mask_cache __rcu *mask_cache;
+	struct mask_array __rcu *mask_array;
+	unsigned long last_rehash;
+	unsigned int count;
+	unsigned int ufid_count;
+};
+
+extern struct kmem_cache *flow_stats_cache;
+
+int ovs_flow_init(void);
+void ovs_flow_exit(void);
+
+struct sw_flow *ovs_flow_alloc(void);
+void ovs_flow_free(struct sw_flow *, bool deferred);
+
+int ovs_flow_tbl_init(struct flow_table *);
+int ovs_flow_tbl_count(const struct flow_table *table);
+void ovs_flow_tbl_destroy(struct flow_table *table);
+int ovs_flow_tbl_flush(struct flow_table *flow_table);
+
+int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
+			const struct sw_flow_mask *mask);
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
+int  ovs_flow_tbl_num_masks(const struct flow_table *table);
+u32  ovs_flow_tbl_masks_cache_size(const struct flow_table *table);
+int  ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size);
+struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
+				       u32 *bucket, u32 *idx);
+struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
+					  const struct sw_flow_key *,
+					  u32 skb_hash,
+					  u32 *n_mask_hit,
+					  u32 *n_cache_hit);
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
+				    const struct sw_flow_key *);
+struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+					  const struct sw_flow_match *match);
+struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *,
+					 const struct sw_flow_id *);
+
+bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *);
+
+void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
+		       bool full, const struct sw_flow_mask *mask);
+
+void ovs_flow_masks_rebalance(struct flow_table *table);
+void table_instance_flow_flush(struct flow_table *table,
+			       struct table_instance *ti,
+			       struct table_instance *ufid_ti);
+
+#endif /* flow_table.h */
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
new file mode 100644
index 000000000000..cc08e0403909
--- /dev/null
+++ b/net/openvswitch/meter.c
@@ -0,0 +1,766 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2017 Nicira, Inc.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/openvswitch.h>
+#include <linux/netlink.h>
+#include <linux/rculist.h>
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "datapath.h"
+#include "meter.h"
+
+static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
+	[OVS_METER_ATTR_ID] = { .type = NLA_U32, },
+	[OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
+	[OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
+	[OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
+	[OVS_METER_ATTR_USED] = { .type = NLA_U64 },
+	[OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
+	[OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
+	[OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
+	[OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
+	[OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
+	[OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
+	[OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
+};
+
+static u32 meter_hash(struct dp_meter_instance *ti, u32 id)
+{
+	return id % ti->n_meters;
+}
+
+static void ovs_meter_free(struct dp_meter *meter)
+{
+	if (!meter)
+		return;
+
+	kfree_rcu(meter, rcu);
+}
+
+/* Call with ovs_mutex or RCU read lock. */
+static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
+				     u32 meter_id)
+{
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	u32 hash = meter_hash(ti, meter_id);
+	struct dp_meter *meter;
+
+	meter = rcu_dereference_ovsl(ti->dp_meters[hash]);
+	if (meter && likely(meter->id == meter_id))
+		return meter;
+
+	return NULL;
+}
+
+static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
+{
+	struct dp_meter_instance *ti;
+
+	ti = kvzalloc(struct_size(ti, dp_meters, size), GFP_KERNEL);
+	if (!ti)
+		return NULL;
+
+	ti->n_meters = size;
+
+	return ti;
+}
+
+static void dp_meter_instance_free(struct dp_meter_instance *ti)
+{
+	kvfree(ti);
+}
+
+static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
+{
+	struct dp_meter_instance *ti;
+
+	ti = container_of(rcu, struct dp_meter_instance, rcu);
+	kvfree(ti);
+}
+
+static int
+dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size)
+{
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	int n_meters = min(size, ti->n_meters);
+	struct dp_meter_instance *new_ti;
+	int i;
+
+	new_ti = dp_meter_instance_alloc(size);
+	if (!new_ti)
+		return -ENOMEM;
+
+	for (i = 0; i < n_meters; i++)
+		if (rcu_dereference_ovsl(ti->dp_meters[i]))
+			new_ti->dp_meters[i] = ti->dp_meters[i];
+
+	rcu_assign_pointer(tbl->ti, new_ti);
+	call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
+
+	return 0;
+}
+
+static void dp_meter_instance_insert(struct dp_meter_instance *ti,
+				     struct dp_meter *meter)
+{
+	u32 hash;
+
+	hash = meter_hash(ti, meter->id);
+	rcu_assign_pointer(ti->dp_meters[hash], meter);
+}
+
+static void dp_meter_instance_remove(struct dp_meter_instance *ti,
+				     struct dp_meter *meter)
+{
+	u32 hash;
+
+	hash = meter_hash(ti, meter->id);
+	RCU_INIT_POINTER(ti->dp_meters[hash], NULL);
+}
+
+static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
+{
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	u32 hash = meter_hash(ti, meter->id);
+	int err;
+
+	/* In generally, slots selected should be empty, because
+	 * OvS uses id-pool to fetch a available id.
+	 */
+	if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash])))
+		return -EBUSY;
+
+	dp_meter_instance_insert(ti, meter);
+
+	/* That function is thread-safe. */
+	tbl->count++;
+	if (tbl->count >= tbl->max_meters_allowed) {
+		err = -EFBIG;
+		goto attach_err;
+	}
+
+	if (tbl->count >= ti->n_meters &&
+	    dp_meter_instance_realloc(tbl, ti->n_meters * 2)) {
+		err = -ENOMEM;
+		goto attach_err;
+	}
+
+	return 0;
+
+attach_err:
+	dp_meter_instance_remove(ti, meter);
+	tbl->count--;
+	return err;
+}
+
+static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
+{
+	struct dp_meter_instance *ti;
+
+	ASSERT_OVSL();
+	if (!meter)
+		return 0;
+
+	ti = rcu_dereference_ovsl(tbl->ti);
+	dp_meter_instance_remove(ti, meter);
+
+	tbl->count--;
+
+	/* Shrink the meter array if necessary. */
+	if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
+	    tbl->count <= (ti->n_meters / 4)) {
+		int half_size = ti->n_meters / 2;
+		int i;
+
+		/* Avoid hash collision, don't move slots to other place.
+		 * Make sure there are no references of meters in array
+		 * which will be released.
+		 */
+		for (i = half_size; i < ti->n_meters; i++)
+			if (rcu_dereference_ovsl(ti->dp_meters[i]))
+				goto out;
+
+		if (dp_meter_instance_realloc(tbl, half_size))
+			goto shrink_err;
+	}
+
+out:
+	return 0;
+
+shrink_err:
+	dp_meter_instance_insert(ti, meter);
+	tbl->count++;
+	return -ENOMEM;
+}
+
+static struct sk_buff *
+ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
+			  struct ovs_header **ovs_reply_header)
+{
+	struct sk_buff *skb;
+	struct ovs_header *ovs_header = genl_info_userhdr(info);
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	*ovs_reply_header = genlmsg_put(skb, info->snd_portid,
+					info->snd_seq,
+					&dp_meter_genl_family, 0, cmd);
+	if (!*ovs_reply_header) {
+		nlmsg_free(skb);
+		return ERR_PTR(-EMSGSIZE);
+	}
+	(*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
+
+	return skb;
+}
+
+static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
+				     struct dp_meter *meter)
+{
+	struct nlattr *nla;
+	struct dp_meter_band *band;
+	u16 i;
+
+	if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
+		goto error;
+
+	if (nla_put(reply, OVS_METER_ATTR_STATS,
+		    sizeof(struct ovs_flow_stats), &meter->stats))
+		goto error;
+
+	if (nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
+			      OVS_METER_ATTR_PAD))
+		goto error;
+
+	nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
+	if (!nla)
+		goto error;
+
+	band = meter->bands;
+
+	for (i = 0; i < meter->n_bands; ++i, ++band) {
+		struct nlattr *band_nla;
+
+		band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC);
+		if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
+					 sizeof(struct ovs_flow_stats),
+					 &band->stats))
+			goto error;
+		nla_nest_end(reply, band_nla);
+	}
+	nla_nest_end(reply, nla);
+
+	return 0;
+error:
+	return -EMSGSIZE;
+}
+
+static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ovs_header *ovs_header = genl_info_userhdr(info);
+	struct ovs_header *ovs_reply_header;
+	struct nlattr *nla, *band_nla;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	int err = -EMSGSIZE;
+
+	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
+					  &ovs_reply_header);
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+
+	ovs_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (!dp) {
+		err = -ENODEV;
+		goto exit_unlock;
+	}
+
+	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS,
+			dp->meter_tbl.max_meters_allowed))
+		goto exit_unlock;
+
+	ovs_unlock();
+
+	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
+		goto nla_put_failure;
+
+	nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
+	if (!nla)
+		goto nla_put_failure;
+
+	band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC);
+	if (!band_nla)
+		goto nla_put_failure;
+	/* Currently only DROP band type is supported. */
+	if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
+		goto nla_put_failure;
+	nla_nest_end(reply, band_nla);
+	nla_nest_end(reply, nla);
+
+	genlmsg_end(reply, ovs_reply_header);
+	return genlmsg_reply(reply, info);
+
+exit_unlock:
+	ovs_unlock();
+nla_put_failure:
+	nlmsg_free(reply);
+	return err;
+}
+
+static struct dp_meter *dp_meter_create(struct nlattr **a)
+{
+	struct nlattr *nla;
+	int rem;
+	u16 n_bands = 0;
+	struct dp_meter *meter;
+	struct dp_meter_band *band;
+	int err;
+
+	/* Validate attributes, count the bands. */
+	if (!a[OVS_METER_ATTR_BANDS])
+		return ERR_PTR(-EINVAL);
+
+	nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
+		if (++n_bands > DP_MAX_BANDS)
+			return ERR_PTR(-EINVAL);
+
+	/* Allocate and set up the meter before locking anything. */
+	meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL_ACCOUNT);
+	if (!meter)
+		return ERR_PTR(-ENOMEM);
+
+	meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+	meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
+	meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
+	meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
+	spin_lock_init(&meter->lock);
+	if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
+		meter->stats = *(struct ovs_flow_stats *)
+			nla_data(a[OVS_METER_ATTR_STATS]);
+	}
+	meter->n_bands = n_bands;
+
+	/* Set up meter bands. */
+	band = meter->bands;
+	nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
+		struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
+		u32 band_max_delta_t;
+
+		err = nla_parse_deprecated((struct nlattr **)&attr,
+					   OVS_BAND_ATTR_MAX, nla_data(nla),
+					   nla_len(nla), band_policy, NULL);
+		if (err)
+			goto exit_free_meter;
+
+		if (!attr[OVS_BAND_ATTR_TYPE] ||
+		    !attr[OVS_BAND_ATTR_RATE] ||
+		    !attr[OVS_BAND_ATTR_BURST]) {
+			err = -EINVAL;
+			goto exit_free_meter;
+		}
+
+		band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
+		band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
+		if (band->rate == 0) {
+			err = -EINVAL;
+			goto exit_free_meter;
+		}
+
+		band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
+		/* Figure out max delta_t that is enough to fill any bucket.
+		 * Keep max_delta_t size to the bucket units:
+		 * pkts => 1/1000 packets, kilobits => bits.
+		 *
+		 * Start with a full bucket.
+		 */
+		band->bucket = band->burst_size * 1000ULL;
+		band_max_delta_t = div_u64(band->bucket, band->rate);
+		if (band_max_delta_t > meter->max_delta_t)
+			meter->max_delta_t = band_max_delta_t;
+		band++;
+	}
+
+	return meter;
+
+exit_free_meter:
+	kfree(meter);
+	return ERR_PTR(err);
+}
+
+static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct dp_meter *meter, *old_meter;
+	struct sk_buff *reply;
+	struct ovs_header *ovs_reply_header;
+	struct ovs_header *ovs_header = genl_info_userhdr(info);
+	struct dp_meter_table *meter_tbl;
+	struct datapath *dp;
+	int err;
+	u32 meter_id;
+	bool failed;
+
+	if (!a[OVS_METER_ATTR_ID])
+		return -EINVAL;
+
+	meter = dp_meter_create(a);
+	if (IS_ERR(meter))
+		return PTR_ERR(meter);
+
+	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
+					  &ovs_reply_header);
+	if (IS_ERR(reply)) {
+		err = PTR_ERR(reply);
+		goto exit_free_meter;
+	}
+
+	ovs_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (!dp) {
+		err = -ENODEV;
+		goto exit_unlock;
+	}
+
+	meter_tbl = &dp->meter_tbl;
+	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+
+	old_meter = lookup_meter(meter_tbl, meter_id);
+	err = detach_meter(meter_tbl, old_meter);
+	if (err)
+		goto exit_unlock;
+
+	err = attach_meter(meter_tbl, meter);
+	if (err)
+		goto exit_free_old_meter;
+
+	ovs_unlock();
+
+	/* Build response with the meter_id and stats from
+	 * the old meter, if any.
+	 */
+	failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
+	WARN_ON(failed);
+	if (old_meter) {
+		spin_lock_bh(&old_meter->lock);
+		if (old_meter->keep_stats) {
+			err = ovs_meter_cmd_reply_stats(reply, meter_id,
+							old_meter);
+			WARN_ON(err);
+		}
+		spin_unlock_bh(&old_meter->lock);
+		ovs_meter_free(old_meter);
+	}
+
+	genlmsg_end(reply, ovs_reply_header);
+	return genlmsg_reply(reply, info);
+
+exit_free_old_meter:
+	ovs_meter_free(old_meter);
+exit_unlock:
+	ovs_unlock();
+	nlmsg_free(reply);
+exit_free_meter:
+	kfree(meter);
+	return err;
+}
+
+static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ovs_header *ovs_header = genl_info_userhdr(info);
+	struct ovs_header *ovs_reply_header;
+	struct nlattr **a = info->attrs;
+	struct dp_meter *meter;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	u32 meter_id;
+	int err;
+
+	if (!a[OVS_METER_ATTR_ID])
+		return -EINVAL;
+
+	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+
+	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
+					  &ovs_reply_header);
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+
+	ovs_lock();
+
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (!dp) {
+		err = -ENODEV;
+		goto exit_unlock;
+	}
+
+	/* Locate meter, copy stats. */
+	meter = lookup_meter(&dp->meter_tbl, meter_id);
+	if (!meter) {
+		err = -ENOENT;
+		goto exit_unlock;
+	}
+
+	spin_lock_bh(&meter->lock);
+	err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
+	spin_unlock_bh(&meter->lock);
+	if (err)
+		goto exit_unlock;
+
+	ovs_unlock();
+
+	genlmsg_end(reply, ovs_reply_header);
+	return genlmsg_reply(reply, info);
+
+exit_unlock:
+	ovs_unlock();
+	nlmsg_free(reply);
+	return err;
+}
+
+static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ovs_header *ovs_header = genl_info_userhdr(info);
+	struct ovs_header *ovs_reply_header;
+	struct nlattr **a = info->attrs;
+	struct dp_meter *old_meter;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	u32 meter_id;
+	int err;
+
+	if (!a[OVS_METER_ATTR_ID])
+		return -EINVAL;
+
+	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
+					  &ovs_reply_header);
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+
+	ovs_lock();
+
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (!dp) {
+		err = -ENODEV;
+		goto exit_unlock;
+	}
+
+	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+	old_meter = lookup_meter(&dp->meter_tbl, meter_id);
+	if (old_meter) {
+		spin_lock_bh(&old_meter->lock);
+		err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
+		WARN_ON(err);
+		spin_unlock_bh(&old_meter->lock);
+
+		err = detach_meter(&dp->meter_tbl, old_meter);
+		if (err)
+			goto exit_unlock;
+	}
+
+	ovs_unlock();
+	ovs_meter_free(old_meter);
+	genlmsg_end(reply, ovs_reply_header);
+	return genlmsg_reply(reply, info);
+
+exit_unlock:
+	ovs_unlock();
+	nlmsg_free(reply);
+	return err;
+}
+
+/* Meter action execution.
+ *
+ * Return true 'meter_id' drop band is triggered. The 'skb' should be
+ * dropped by the caller'.
+ */
+bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
+		       struct sw_flow_key *key, u32 meter_id)
+{
+	long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
+	long long int long_delta_ms;
+	struct dp_meter_band *band;
+	struct dp_meter *meter;
+	int i, band_exceeded_max = -1;
+	u32 band_exceeded_rate = 0;
+	u32 delta_ms;
+	u32 cost;
+
+	meter = lookup_meter(&dp->meter_tbl, meter_id);
+	/* Do not drop the packet when there is no meter. */
+	if (!meter)
+		return false;
+
+	/* Lock the meter while using it. */
+	spin_lock(&meter->lock);
+
+	long_delta_ms = (now_ms - meter->used); /* ms */
+	if (long_delta_ms < 0) {
+		/* This condition means that we have several threads fighting
+		 * for a meter lock, and the one who received the packets a
+		 * bit later wins. Assuming that all racing threads received
+		 * packets at the same time to avoid overflow.
+		 */
+		long_delta_ms = 0;
+	}
+
+	/* Make sure delta_ms will not be too large, so that bucket will not
+	 * wrap around below.
+	 */
+	delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
+		   ? meter->max_delta_t : (u32)long_delta_ms;
+
+	/* Update meter statistics.
+	 */
+	meter->used = now_ms;
+	meter->stats.n_packets += 1;
+	meter->stats.n_bytes += skb->len;
+
+	/* Bucket rate is either in kilobits per second, or in packets per
+	 * second.  We maintain the bucket in the units of either bits or
+	 * 1/1000th of a packet, correspondingly.
+	 * Then, when rate is multiplied with milliseconds, we get the
+	 * bucket units:
+	 * msec * kbps = bits, and
+	 * msec * packets/sec = 1/1000 packets.
+	 *
+	 * 'cost' is the number of bucket units in this packet.
+	 */
+	cost = (meter->kbps) ? skb->len * 8 : 1000;
+
+	/* Update all bands and find the one hit with the highest rate. */
+	for (i = 0; i < meter->n_bands; ++i) {
+		long long int max_bucket_size;
+
+		band = &meter->bands[i];
+		max_bucket_size = band->burst_size * 1000LL;
+
+		band->bucket += delta_ms * band->rate;
+		if (band->bucket > max_bucket_size)
+			band->bucket = max_bucket_size;
+
+		if (band->bucket >= cost) {
+			band->bucket -= cost;
+		} else if (band->rate > band_exceeded_rate) {
+			band_exceeded_rate = band->rate;
+			band_exceeded_max = i;
+		}
+	}
+
+	if (band_exceeded_max >= 0) {
+		/* Update band statistics. */
+		band = &meter->bands[band_exceeded_max];
+		band->stats.n_packets += 1;
+		band->stats.n_bytes += skb->len;
+
+		/* Drop band triggered, let the caller drop the 'skb'.  */
+		if (band->type == OVS_METER_BAND_TYPE_DROP) {
+			spin_unlock(&meter->lock);
+			return true;
+		}
+	}
+
+	spin_unlock(&meter->lock);
+	return false;
+}
+
+static const struct genl_small_ops dp_meter_genl_ops[] = {
+	{ .cmd = OVS_METER_CMD_FEATURES,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags = 0,		  /* OK for unprivileged users. */
+		.doit = ovs_meter_cmd_features
+	},
+	{ .cmd = OVS_METER_CMD_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+					       *  privilege.
+					       */
+		.doit = ovs_meter_cmd_set,
+	},
+	{ .cmd = OVS_METER_CMD_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags = 0,		  /* OK for unprivileged users. */
+		.doit = ovs_meter_cmd_get,
+	},
+	{ .cmd = OVS_METER_CMD_DEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+					       *  privilege.
+					       */
+		.doit = ovs_meter_cmd_del
+	},
+};
+
+static const struct genl_multicast_group ovs_meter_multicast_group = {
+	.name = OVS_METER_MCGROUP,
+};
+
+struct genl_family dp_meter_genl_family __ro_after_init = {
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_METER_FAMILY,
+	.version = OVS_METER_VERSION,
+	.maxattr = OVS_METER_ATTR_MAX,
+	.policy = meter_policy,
+	.netnsok = true,
+	.parallel_ops = true,
+	.small_ops = dp_meter_genl_ops,
+	.n_small_ops = ARRAY_SIZE(dp_meter_genl_ops),
+	.resv_start_op = OVS_METER_CMD_GET + 1,
+	.mcgrps = &ovs_meter_multicast_group,
+	.n_mcgrps = 1,
+	.module = THIS_MODULE,
+};
+
+int ovs_meters_init(struct datapath *dp)
+{
+	struct dp_meter_table *tbl = &dp->meter_tbl;
+	struct dp_meter_instance *ti;
+	unsigned long free_mem_bytes;
+
+	ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
+	if (!ti)
+		return -ENOMEM;
+
+	/* Allow meters in a datapath to use ~3.12% of physical memory. */
+	free_mem_bytes = nr_free_buffer_pages() * (PAGE_SIZE >> 5);
+	tbl->max_meters_allowed = min(free_mem_bytes / sizeof(struct dp_meter),
+				      DP_METER_NUM_MAX);
+	if (!tbl->max_meters_allowed)
+		goto out_err;
+
+	rcu_assign_pointer(tbl->ti, ti);
+	tbl->count = 0;
+
+	return 0;
+
+out_err:
+	dp_meter_instance_free(ti);
+	return -ENOMEM;
+}
+
+void ovs_meters_exit(struct datapath *dp)
+{
+	struct dp_meter_table *tbl = &dp->meter_tbl;
+	struct dp_meter_instance *ti = rcu_dereference_raw(tbl->ti);
+	int i;
+
+	for (i = 0; i < ti->n_meters; i++)
+		ovs_meter_free(rcu_dereference_raw(ti->dp_meters[i]));
+
+	dp_meter_instance_free(ti);
+}
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
new file mode 100644
index 000000000000..8bbf983cd244
--- /dev/null
+++ b/net/openvswitch/meter.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2017 Nicira, Inc.
+ */
+
+#ifndef METER_H
+#define METER_H 1
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/skbuff.h>
+#include <linux/bits.h>
+
+#include "flow.h"
+struct datapath;
+
+#define DP_MAX_BANDS		1
+#define DP_METER_ARRAY_SIZE_MIN	BIT_ULL(10)
+#define DP_METER_NUM_MAX	(200000UL)
+
+struct dp_meter_band {
+	u32 type;
+	u32 rate;
+	u32 burst_size;
+	u64 bucket; /* 1/1000 packets, or in bits */
+	struct ovs_flow_stats stats;
+};
+
+struct dp_meter {
+	spinlock_t lock;    /* Per meter lock */
+	struct rcu_head rcu;
+	u32 id;
+	u16 kbps:1, keep_stats:1;
+	u16 n_bands;
+	u32 max_delta_t;
+	u64 used;
+	struct ovs_flow_stats stats;
+	struct dp_meter_band bands[] __counted_by(n_bands);
+};
+
+struct dp_meter_instance {
+	struct rcu_head rcu;
+	u32 n_meters;
+	struct dp_meter __rcu *dp_meters[] __counted_by(n_meters);
+};
+
+struct dp_meter_table {
+	struct dp_meter_instance __rcu *ti;
+	u32 count;
+	u32 max_meters_allowed;
+};
+
+extern struct genl_family dp_meter_genl_family;
+int ovs_meters_init(struct datapath *dp);
+void ovs_meters_exit(struct datapath *dp);
+bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
+		       struct sw_flow_key *key, u32 meter_id);
+
+#endif /* meter.h */
diff --git a/net/openvswitch/openvswitch_trace.c b/net/openvswitch/openvswitch_trace.c
new file mode 100644
index 000000000000..62c5f7d6f023
--- /dev/null
+++ b/net/openvswitch/openvswitch_trace.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/* bug in tracepoint.h, it should include this */
+#include <linux/module.h>
+
+/* sparse isn't too happy with all macros... */
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "openvswitch_trace.h"
+
+#endif
diff --git a/net/openvswitch/openvswitch_trace.h b/net/openvswitch/openvswitch_trace.h
new file mode 100644
index 000000000000..74d75aaebef4
--- /dev/null
+++ b/net/openvswitch/openvswitch_trace.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM openvswitch
+
+#if !defined(_TRACE_OPENVSWITCH_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_OPENVSWITCH_H
+
+#include <linux/tracepoint.h>
+
+#include "datapath.h"
+
+TRACE_EVENT(ovs_do_execute_action,
+
+	TP_PROTO(struct datapath *dp, struct sk_buff *skb,
+		 struct sw_flow_key *key, const struct nlattr *a, int rem),
+
+	TP_ARGS(dp, skb, key, a, rem),
+
+	TP_STRUCT__entry(
+		__field(	void *,		dpaddr			)
+		__string(	dp_name,	ovs_dp_name(dp)		)
+		__string(	dev_name,	skb->dev->name		)
+		__field(	void *,		skbaddr			)
+		__field(	unsigned int,	len			)
+		__field(	unsigned int,	data_len		)
+		__field(	unsigned int,	truesize		)
+		__field(	u8,		nr_frags		)
+		__field(	u16,		gso_size		)
+		__field(	u16,		gso_type		)
+		__field(	u32,		ovs_flow_hash		)
+		__field(	u32,		recirc_id		)
+		__field(	void *,		keyaddr			)
+		__field(	u16,		key_eth_type		)
+		__field(	u8,		key_ct_state		)
+		__field(	u8,		key_ct_orig_proto	)
+		__field(	u16,		key_ct_zone		)
+		__field(	unsigned int,	flow_key_valid		)
+		__field(	u8,		action_type		)
+		__field(	unsigned int,	action_len		)
+		__field(	void *,		action_data		)
+		__field(	u8,		is_last			)
+	),
+
+	TP_fast_assign(
+		__entry->dpaddr = dp;
+		__assign_str(dp_name);
+		__assign_str(dev_name);
+		__entry->skbaddr = skb;
+		__entry->len = skb->len;
+		__entry->data_len = skb->data_len;
+		__entry->truesize = skb->truesize;
+		__entry->nr_frags = skb_shinfo(skb)->nr_frags;
+		__entry->gso_size = skb_shinfo(skb)->gso_size;
+		__entry->gso_type = skb_shinfo(skb)->gso_type;
+		__entry->ovs_flow_hash = key->ovs_flow_hash;
+		__entry->recirc_id = key->recirc_id;
+		__entry->keyaddr = key;
+		__entry->key_eth_type = key->eth.type;
+		__entry->key_ct_state = key->ct_state;
+		__entry->key_ct_orig_proto = key->ct_orig_proto;
+		__entry->key_ct_zone = key->ct_zone;
+		__entry->flow_key_valid = !(key->mac_proto & SW_FLOW_KEY_INVALID);
+		__entry->action_type = nla_type(a);
+		__entry->action_len = nla_len(a);
+		__entry->action_data = nla_data(a);
+		__entry->is_last = nla_is_last(a, rem);
+	),
+
+	TP_printk("dpaddr=%p dp_name=%s dev=%s skbaddr=%p len=%u data_len=%u truesize=%u nr_frags=%d gso_size=%d gso_type=%#x ovs_flow_hash=0x%08x recirc_id=0x%08x keyaddr=%p eth_type=0x%04x ct_state=%02x ct_orig_proto=%02x ct_Zone=%04x flow_key_valid=%d action_type=%u action_len=%u action_data=%p is_last=%d",
+		  __entry->dpaddr, __get_str(dp_name), __get_str(dev_name),
+		  __entry->skbaddr, __entry->len, __entry->data_len,
+		  __entry->truesize, __entry->nr_frags, __entry->gso_size,
+		  __entry->gso_type, __entry->ovs_flow_hash,
+		  __entry->recirc_id, __entry->keyaddr, __entry->key_eth_type,
+		  __entry->key_ct_state, __entry->key_ct_orig_proto,
+		  __entry->key_ct_zone,
+		  __entry->flow_key_valid,
+		  __entry->action_type, __entry->action_len,
+		  __entry->action_data, __entry->is_last)
+);
+
+TRACE_EVENT(ovs_dp_upcall,
+
+	TP_PROTO(struct datapath *dp, struct sk_buff *skb,
+		 const struct sw_flow_key *key,
+		 const struct dp_upcall_info *upcall_info),
+
+	TP_ARGS(dp, skb, key, upcall_info),
+
+	TP_STRUCT__entry(
+		__field(	void *,		dpaddr			)
+		__string(	dp_name,	ovs_dp_name(dp)		)
+		__string(	dev_name,	skb->dev->name		)
+		__field(	void *,		skbaddr			)
+		__field(	unsigned int,	len			)
+		__field(	unsigned int,	data_len		)
+		__field(	unsigned int,	truesize		)
+		__field(	u8,		nr_frags		)
+		__field(	u16,		gso_size		)
+		__field(	u16,		gso_type		)
+		__field(	u32,		ovs_flow_hash		)
+		__field(	u32,		recirc_id		)
+		__field(	const void *,	keyaddr			)
+		__field(	u16,		key_eth_type		)
+		__field(	u8,		key_ct_state		)
+		__field(	u8,		key_ct_orig_proto	)
+		__field(	u16,		key_ct_zone		)
+		__field(	unsigned int,	flow_key_valid		)
+		__field(	u8,		upcall_cmd		)
+		__field(	u32,		upcall_port		)
+		__field(	u16,		upcall_mru		)
+	),
+
+	TP_fast_assign(
+		__entry->dpaddr = dp;
+		__assign_str(dp_name);
+		__assign_str(dev_name);
+		__entry->skbaddr = skb;
+		__entry->len = skb->len;
+		__entry->data_len = skb->data_len;
+		__entry->truesize = skb->truesize;
+		__entry->nr_frags = skb_shinfo(skb)->nr_frags;
+		__entry->gso_size = skb_shinfo(skb)->gso_size;
+		__entry->gso_type = skb_shinfo(skb)->gso_type;
+		__entry->ovs_flow_hash = key->ovs_flow_hash;
+		__entry->recirc_id = key->recirc_id;
+		__entry->keyaddr = key;
+		__entry->key_eth_type = key->eth.type;
+		__entry->key_ct_state = key->ct_state;
+		__entry->key_ct_orig_proto = key->ct_orig_proto;
+		__entry->key_ct_zone = key->ct_zone;
+		__entry->flow_key_valid =  !(key->mac_proto & SW_FLOW_KEY_INVALID);
+		__entry->upcall_cmd = upcall_info->cmd;
+		__entry->upcall_port = upcall_info->portid;
+		__entry->upcall_mru = upcall_info->mru;
+	),
+
+	TP_printk("dpaddr=%p dp_name=%s dev=%s skbaddr=%p len=%u data_len=%u truesize=%u nr_frags=%d gso_size=%d gso_type=%#x ovs_flow_hash=0x%08x recirc_id=0x%08x keyaddr=%p eth_type=0x%04x ct_state=%02x ct_orig_proto=%02x ct_zone=%04x flow_key_valid=%d upcall_cmd=%u upcall_port=%u upcall_mru=%u",
+		  __entry->dpaddr, __get_str(dp_name), __get_str(dev_name),
+		  __entry->skbaddr, __entry->len, __entry->data_len,
+		  __entry->truesize, __entry->nr_frags, __entry->gso_size,
+		  __entry->gso_type, __entry->ovs_flow_hash,
+		  __entry->recirc_id, __entry->keyaddr, __entry->key_eth_type,
+		  __entry->key_ct_state, __entry->key_ct_orig_proto,
+		  __entry->key_ct_zone,
+		  __entry->flow_key_valid,
+		  __entry->upcall_cmd, __entry->upcall_port,
+		  __entry->upcall_mru)
+);
+
+#endif /* _TRACE_OPENVSWITCH_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE openvswitch_trace
+#include <trace/define_trace.h>
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
new file mode 100644
index 000000000000..b10e1602c6b1
--- /dev/null
+++ b/net/openvswitch/vport-geneve.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/rculist.h>
+#include <linux/udp.h>
+#include <linux/if_vlan.h>
+#include <linux/module.h>
+
+#include <net/geneve.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+
+#include "datapath.h"
+#include "vport.h"
+#include "vport-netdev.h"
+
+static struct vport_ops ovs_geneve_vport_ops;
+/**
+ * struct geneve_port - Keeps track of open UDP ports
+ * @dst_port: destination port.
+ */
+struct geneve_port {
+	u16 dst_port;
+};
+
+static inline struct geneve_port *geneve_vport(const struct vport *vport)
+{
+	return vport_priv(vport);
+}
+
+static int geneve_get_options(const struct vport *vport,
+			      struct sk_buff *skb)
+{
+	struct geneve_port *geneve_port = geneve_vport(vport);
+
+	if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, geneve_port->dst_port))
+		return -EMSGSIZE;
+	return 0;
+}
+
+static struct vport *geneve_tnl_create(const struct vport_parms *parms)
+{
+	struct net *net = ovs_dp_get_net(parms->dp);
+	struct nlattr *options = parms->options;
+	struct geneve_port *geneve_port;
+	struct net_device *dev;
+	struct vport *vport;
+	struct nlattr *a;
+	u16 dst_port;
+	int err;
+
+	if (!options) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
+	if (a && nla_len(a) == sizeof(u16)) {
+		dst_port = nla_get_u16(a);
+	} else {
+		/* Require destination port from userspace. */
+		err = -EINVAL;
+		goto error;
+	}
+
+	vport = ovs_vport_alloc(sizeof(struct geneve_port),
+				&ovs_geneve_vport_ops, parms);
+	if (IS_ERR(vport))
+		return vport;
+
+	geneve_port = geneve_vport(vport);
+	geneve_port->dst_port = dst_port;
+
+	rtnl_lock();
+	dev = geneve_dev_create_fb(net, parms->name, NET_NAME_USER, dst_port);
+	if (IS_ERR(dev)) {
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		return ERR_CAST(dev);
+	}
+
+	err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
+	if (err < 0) {
+		rtnl_delete_link(dev, 0, NULL);
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		goto error;
+	}
+
+	rtnl_unlock();
+	return vport;
+error:
+	return ERR_PTR(err);
+}
+
+static struct vport *geneve_create(const struct vport_parms *parms)
+{
+	struct vport *vport;
+
+	vport = geneve_tnl_create(parms);
+	if (IS_ERR(vport))
+		return vport;
+
+	return ovs_netdev_link(vport, parms->name);
+}
+
+static struct vport_ops ovs_geneve_vport_ops = {
+	.type		= OVS_VPORT_TYPE_GENEVE,
+	.create		= geneve_create,
+	.destroy	= ovs_netdev_tunnel_destroy,
+	.get_options	= geneve_get_options,
+	.send		= dev_queue_xmit,
+};
+
+static int __init ovs_geneve_tnl_init(void)
+{
+	return ovs_vport_ops_register(&ovs_geneve_vport_ops);
+}
+
+static void __exit ovs_geneve_tnl_exit(void)
+{
+	ovs_vport_ops_unregister(&ovs_geneve_vport_ops);
+}
+
+module_init(ovs_geneve_tnl_init);
+module_exit(ovs_geneve_tnl_exit);
+
+MODULE_DESCRIPTION("OVS: Geneve switching port");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("vport-type-5");
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 493e9775dcda..4014c9b5eb79 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -1,22 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2007-2013 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
+ * Copyright (c) 2007-2014 Nicira, Inc.
  */
 
-#ifdef CONFIG_OPENVSWITCH_GRE
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/if.h>
@@ -25,13 +11,12 @@
 #include <linux/if_tunnel.h>
 #include <linux/if_vlan.h>
 #include <linux/in.h>
-#include <linux/if_vlan.h>
-#include <linux/in.h>
 #include <linux/in_route.h>
 #include <linux/inetdevice.h>
 #include <linux/jhash.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/workqueue.h>
 #include <linux/rculist.h>
 #include <net/route.h>
@@ -47,229 +32,72 @@
 
 #include "datapath.h"
 #include "vport.h"
+#include "vport-netdev.h"
 
-/* Returns the least-significant 32 bits of a __be64. */
-static __be32 be64_get_low32(__be64 x)
-{
-#ifdef __BIG_ENDIAN
-	return (__force __be32)x;
-#else
-	return (__force __be32)((__force u64)x >> 32);
-#endif
-}
-
-static __be16 filter_tnl_flags(__be16 flags)
-{
-	return flags & (TUNNEL_CSUM | TUNNEL_KEY);
-}
-
-static struct sk_buff *__build_header(struct sk_buff *skb,
-				      int tunnel_hlen)
-{
-	const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
-	struct tnl_ptk_info tpi;
-
-	skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
-	if (IS_ERR(skb))
-		return NULL;
-
-	tpi.flags = filter_tnl_flags(tun_key->tun_flags);
-	tpi.proto = htons(ETH_P_TEB);
-	tpi.key = be64_get_low32(tun_key->tun_id);
-	tpi.seq = 0;
-	gre_build_header(skb, &tpi, tunnel_hlen);
+static struct vport_ops ovs_gre_vport_ops;
 
-	return skb;
-}
-
-static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
+static struct vport *gre_tnl_create(const struct vport_parms *parms)
 {
-#ifdef __BIG_ENDIAN
-	return (__force __be64)((__force u64)seq << 32 | (__force u32)key);
-#else
-	return (__force __be64)((__force u64)key << 32 | (__force u32)seq);
-#endif
-}
-
-/* Called with rcu_read_lock and BH disabled. */
-static int gre_rcv(struct sk_buff *skb,
-		   const struct tnl_ptk_info *tpi)
-{
-	struct ovs_key_ipv4_tunnel tun_key;
-	struct ovs_net *ovs_net;
+	struct net *net = ovs_dp_get_net(parms->dp);
+	struct net_device *dev;
 	struct vport *vport;
-	__be64 key;
-
-	ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
-	vport = rcu_dereference(ovs_net->vport_net.gre_vport);
-	if (unlikely(!vport))
-		return PACKET_REJECT;
-
-	key = key_to_tunnel_id(tpi->key, tpi->seq);
-	ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key,
-			      filter_tnl_flags(tpi->flags));
-
-	ovs_vport_receive(vport, skb, &tun_key);
-	return PACKET_RCVD;
-}
-
-static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
-{
-	struct net *net = ovs_dp_get_net(vport->dp);
-	struct flowi4 fl;
-	struct rtable *rt;
-	int min_headroom;
-	int tunnel_hlen;
-	__be16 df;
 	int err;
 
-	if (unlikely(!OVS_CB(skb)->tun_key)) {
-		err = -EINVAL;
-		goto error;
-	}
-
-	/* Route lookup */
-	memset(&fl, 0, sizeof(fl));
-	fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst;
-	fl.saddr = OVS_CB(skb)->tun_key->ipv4_src;
-	fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos);
-	fl.flowi4_mark = skb->mark;
-	fl.flowi4_proto = IPPROTO_GRE;
-
-	rt = ip_route_output_key(net, &fl);
-	if (IS_ERR(rt))
-		return PTR_ERR(rt);
-
-	tunnel_hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags);
-
-	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
-			+ tunnel_hlen + sizeof(struct iphdr)
-			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
-	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
-		int head_delta = SKB_DATA_ALIGN(min_headroom -
-						skb_headroom(skb) +
-						16);
-		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
-					0, GFP_ATOMIC);
-		if (unlikely(err))
-			goto err_free_rt;
-	}
-
-	if (vlan_tx_tag_present(skb)) {
-		if (unlikely(!__vlan_put_tag(skb,
-					     skb->vlan_proto,
-					     vlan_tx_tag_get(skb)))) {
-			err = -ENOMEM;
-			goto err_free_rt;
-		}
-		skb->vlan_tci = 0;
+	vport = ovs_vport_alloc(0, &ovs_gre_vport_ops, parms);
+	if (IS_ERR(vport))
+		return vport;
+
+	rtnl_lock();
+	dev = gretap_fb_dev_create(net, parms->name, NET_NAME_USER);
+	if (IS_ERR(dev)) {
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		return ERR_CAST(dev);
 	}
 
-	/* Push Tunnel header. */
-	skb = __build_header(skb, tunnel_hlen);
-	if (unlikely(!skb)) {
-		err = 0;
-		goto err_free_rt;
+	err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
+	if (err < 0) {
+		rtnl_delete_link(dev, 0, NULL);
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		return ERR_PTR(err);
 	}
 
-	df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
-		htons(IP_DF) : 0;
-
-	skb->local_df = 1;
-
-	return iptunnel_xmit(net, rt, skb, fl.saddr,
-			     OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
-			     OVS_CB(skb)->tun_key->ipv4_tos,
-			     OVS_CB(skb)->tun_key->ipv4_ttl, df);
-err_free_rt:
-	ip_rt_put(rt);
-error:
-	return err;
-}
-
-static struct gre_cisco_protocol gre_protocol = {
-	.handler        = gre_rcv,
-	.priority       = 1,
-};
-
-static int gre_ports;
-static int gre_init(void)
-{
-	int err;
-
-	gre_ports++;
-	if (gre_ports > 1)
-		return 0;
-
-	err = gre_cisco_register(&gre_protocol);
-	if (err)
-		pr_warn("cannot register gre protocol handler\n");
-
-	return err;
-}
-
-static void gre_exit(void)
-{
-	gre_ports--;
-	if (gre_ports > 0)
-		return;
-
-	gre_cisco_unregister(&gre_protocol);
-}
-
-static const char *gre_get_name(const struct vport *vport)
-{
-	return vport_priv(vport);
+	rtnl_unlock();
+	return vport;
 }
 
 static struct vport *gre_create(const struct vport_parms *parms)
 {
-	struct net *net = ovs_dp_get_net(parms->dp);
-	struct ovs_net *ovs_net;
 	struct vport *vport;
-	int err;
 
-	err = gre_init();
-	if (err)
-		return ERR_PTR(err);
-
-	ovs_net = net_generic(net, ovs_net_id);
-	if (ovsl_dereference(ovs_net->vport_net.gre_vport)) {
-		vport = ERR_PTR(-EEXIST);
-		goto error;
-	}
-
-	vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms);
+	vport = gre_tnl_create(parms);
 	if (IS_ERR(vport))
-		goto error;
+		return vport;
 
-	strncpy(vport_priv(vport), parms->name, IFNAMSIZ);
-	rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport);
-	return vport;
-
-error:
-	gre_exit();
-	return vport;
+	return ovs_netdev_link(vport, parms->name);
 }
 
-static void gre_tnl_destroy(struct vport *vport)
-{
-	struct net *net = ovs_dp_get_net(vport->dp);
-	struct ovs_net *ovs_net;
+static struct vport_ops ovs_gre_vport_ops = {
+	.type		= OVS_VPORT_TYPE_GRE,
+	.create		= gre_create,
+	.send		= dev_queue_xmit,
+	.destroy	= ovs_netdev_tunnel_destroy,
+};
 
-	ovs_net = net_generic(net, ovs_net_id);
+static int __init ovs_gre_tnl_init(void)
+{
+	return ovs_vport_ops_register(&ovs_gre_vport_ops);
+}
 
-	rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL);
-	ovs_vport_deferred_free(vport);
-	gre_exit();
+static void __exit ovs_gre_tnl_exit(void)
+{
+	ovs_vport_ops_unregister(&ovs_gre_vport_ops);
 }
 
-const struct vport_ops ovs_gre_vport_ops = {
-	.type		= OVS_VPORT_TYPE_GRE,
-	.create		= gre_create,
-	.destroy	= gre_tnl_destroy,
-	.get_name	= gre_get_name,
-	.send		= gre_tnl_send,
-};
+module_init(ovs_gre_tnl_init);
+module_exit(ovs_gre_tnl_exit);
 
-#endif /* OPENVSWITCH_GRE */
+MODULE_DESCRIPTION("OVS: GRE switching port");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("vport-type-3");
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 98d3edbbc235..125d310871e9 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -1,22 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
-#include <linux/hardirq.h>
 #include <linux/if_vlan.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
@@ -26,6 +12,7 @@
 
 #include <net/dst.h>
 #include <net/xfrm.h>
+#include <net/rtnetlink.h>
 
 #include "datapath.h"
 #include "vport-internal_dev.h"
@@ -35,41 +22,32 @@ struct internal_dev {
 	struct vport *vport;
 };
 
+static struct vport_ops ovs_internal_vport_ops;
+
 static struct internal_dev *internal_dev_priv(struct net_device *netdev)
 {
 	return netdev_priv(netdev);
 }
 
-/* This function is only called by the kernel network layer.*/
-static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netdev,
-							struct rtnl_link_stats64 *stats)
-{
-	struct vport *vport = ovs_internal_dev_get_vport(netdev);
-	struct ovs_vport_stats vport_stats;
-
-	ovs_vport_get_stats(vport, &vport_stats);
-
-	/* The tx and rx stats need to be swapped because the
-	 * switch and host OS have opposite perspectives. */
-	stats->rx_packets	= vport_stats.tx_packets;
-	stats->tx_packets	= vport_stats.rx_packets;
-	stats->rx_bytes		= vport_stats.tx_bytes;
-	stats->tx_bytes		= vport_stats.rx_bytes;
-	stats->rx_errors	= vport_stats.tx_errors;
-	stats->tx_errors	= vport_stats.rx_errors;
-	stats->rx_dropped	= vport_stats.tx_dropped;
-	stats->tx_dropped	= vport_stats.rx_dropped;
-
-	return stats;
-}
-
 /* Called with rcu_read_lock_bh. */
-static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t
+internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
+	int len, err;
+
+	/* store len value because skb can be freed inside ovs_vport_receive() */
+	len = skb->len;
+
 	rcu_read_lock();
-	ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL);
+	err = ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL);
 	rcu_read_unlock();
-	return 0;
+
+	if (likely(!err))
+		dev_sw_netstats_tx_add(netdev, 1, len);
+	else
+		netdev->stats.tx_errors++;
+
+	return NETDEV_TX_OK;
 }
 
 static int internal_dev_open(struct net_device *netdev)
@@ -87,7 +65,7 @@ static int internal_dev_stop(struct net_device *netdev)
 static void internal_dev_getinfo(struct net_device *netdev,
 				 struct ethtool_drvinfo *info)
 {
-	strlcpy(info->driver, "openvswitch", sizeof(info->driver));
+	strscpy(info->driver, "openvswitch", sizeof(info->driver));
 }
 
 static const struct ethtool_ops internal_dev_ethtool_ops = {
@@ -95,21 +73,11 @@ static const struct ethtool_ops internal_dev_ethtool_ops = {
 	.get_link	= ethtool_op_get_link,
 };
 
-static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	if (new_mtu < 68)
-		return -EINVAL;
-
-	netdev->mtu = new_mtu;
-	return 0;
-}
-
 static void internal_dev_destructor(struct net_device *dev)
 {
 	struct vport *vport = ovs_internal_dev_get_vport(dev);
 
 	ovs_vport_free(vport);
-	free_netdev(dev);
 }
 
 static const struct net_device_ops internal_dev_netdev_ops = {
@@ -117,76 +85,87 @@ static const struct net_device_ops internal_dev_netdev_ops = {
 	.ndo_stop = internal_dev_stop,
 	.ndo_start_xmit = internal_dev_xmit,
 	.ndo_set_mac_address = eth_mac_addr,
-	.ndo_change_mtu = internal_dev_change_mtu,
-	.ndo_get_stats64 = internal_dev_get_stats,
+};
+
+static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
+	.kind = "openvswitch",
 };
 
 static void do_setup(struct net_device *netdev)
 {
 	ether_setup(netdev);
 
+	netdev->max_mtu = ETH_MAX_MTU;
+
 	netdev->netdev_ops = &internal_dev_netdev_ops;
 
 	netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
-	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
-	netdev->destructor = internal_dev_destructor;
-	SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);
-	netdev->tx_queue_len = 0;
-
-	netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
-			   NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
+	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
+			      IFF_NO_QUEUE;
+	netdev->lltx = true;
+	netdev->needs_free_netdev = true;
+	netdev->priv_destructor = NULL;
+	netdev->ethtool_ops = &internal_dev_ethtool_ops;
+	netdev->rtnl_link_ops = &internal_dev_link_ops;
+
+	netdev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
+			   NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE |
+			   NETIF_F_GSO_ENCAP_ALL;
 
 	netdev->vlan_features = netdev->features;
-	netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;
-	netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
+	netdev->hw_enc_features = netdev->features;
+	netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+	netdev->hw_features = netdev->features;
+
 	eth_hw_addr_random(netdev);
 }
 
 static struct vport *internal_dev_create(const struct vport_parms *parms)
 {
 	struct vport *vport;
-	struct netdev_vport *netdev_vport;
 	struct internal_dev *internal_dev;
+	struct net_device *dev;
 	int err;
 
-	vport = ovs_vport_alloc(sizeof(struct netdev_vport),
-				&ovs_internal_vport_ops, parms);
+	vport = ovs_vport_alloc(0, &ovs_internal_vport_ops, parms);
 	if (IS_ERR(vport)) {
 		err = PTR_ERR(vport);
 		goto error;
 	}
 
-	netdev_vport = netdev_vport_priv(vport);
-
-	netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev),
-					 parms->name, do_setup);
-	if (!netdev_vport->dev) {
+	dev = alloc_netdev(sizeof(struct internal_dev),
+			   parms->name, NET_NAME_USER, do_setup);
+	vport->dev = dev;
+	if (!vport->dev) {
 		err = -ENOMEM;
 		goto error_free_vport;
 	}
+	dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
 
-	dev_net_set(netdev_vport->dev, ovs_dp_get_net(vport->dp));
-	internal_dev = internal_dev_priv(netdev_vport->dev);
+	dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
+	dev->ifindex = parms->desired_ifindex;
+	internal_dev = internal_dev_priv(vport->dev);
 	internal_dev->vport = vport;
 
 	/* Restrict bridge port to current netns. */
 	if (vport->port_no == OVSP_LOCAL)
-		netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
+		vport->dev->netns_immutable = true;
 
 	rtnl_lock();
-	err = register_netdevice(netdev_vport->dev);
+	err = register_netdevice(vport->dev);
 	if (err)
-		goto error_free_netdev;
+		goto error_unlock;
+	vport->dev->priv_destructor = internal_dev_destructor;
 
-	dev_set_promiscuity(netdev_vport->dev, 1);
+	dev_set_promiscuity(vport->dev, 1);
 	rtnl_unlock();
-	netif_start_queue(netdev_vport->dev);
+	netif_start_queue(vport->dev);
 
 	return vport;
 
-error_free_netdev:
+error_unlock:
 	rtnl_unlock();
-	free_netdev(netdev_vport->dev);
+	free_netdev(dev);
 error_free_vport:
 	ovs_vport_free(vport);
 error:
@@ -195,44 +174,41 @@ error:
 
 static void internal_dev_destroy(struct vport *vport)
 {
-	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
-
-	netif_stop_queue(netdev_vport->dev);
+	netif_stop_queue(vport->dev);
 	rtnl_lock();
-	dev_set_promiscuity(netdev_vport->dev, -1);
+	dev_set_promiscuity(vport->dev, -1);
 
 	/* unregister_netdevice() waits for an RCU grace period. */
-	unregister_netdevice(netdev_vport->dev);
-
+	unregister_netdevice(vport->dev);
 	rtnl_unlock();
 }
 
-static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
+static int internal_dev_recv(struct sk_buff *skb)
 {
-	struct net_device *netdev = netdev_vport_priv(vport)->dev;
-	int len;
+	struct net_device *netdev = skb->dev;
 
-	len = skb->len;
+	if (unlikely(!(netdev->flags & IFF_UP))) {
+		kfree_skb(skb);
+		netdev->stats.rx_dropped++;
+		return NETDEV_TX_OK;
+	}
 
 	skb_dst_drop(skb);
-	nf_reset(skb);
-	secpath_reset(skb);
+	nf_reset_ct(skb);
 
-	skb->dev = netdev;
 	skb->pkt_type = PACKET_HOST;
 	skb->protocol = eth_type_trans(skb, netdev);
 	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+	dev_sw_netstats_rx_add(netdev, skb->len);
 
 	netif_rx(skb);
-
-	return len;
+	return NETDEV_TX_OK;
 }
 
-const struct vport_ops ovs_internal_vport_ops = {
+static struct vport_ops ovs_internal_vport_ops = {
 	.type		= OVS_VPORT_TYPE_INTERNAL,
 	.create		= internal_dev_create,
 	.destroy	= internal_dev_destroy,
-	.get_name	= ovs_netdev_get_name,
 	.send		= internal_dev_recv,
 };
 
@@ -248,3 +224,24 @@ struct vport *ovs_internal_dev_get_vport(struct net_device *netdev)
 
 	return internal_dev_priv(netdev)->vport;
 }
+
+int ovs_internal_dev_rtnl_link_register(void)
+{
+	int err;
+
+	err = rtnl_link_register(&internal_dev_link_ops);
+	if (err < 0)
+		return err;
+
+	err = ovs_vport_ops_register(&ovs_internal_vport_ops);
+	if (err < 0)
+		rtnl_link_unregister(&internal_dev_link_ops);
+
+	return err;
+}
+
+void ovs_internal_dev_rtnl_link_unregister(void)
+{
+	ovs_vport_ops_unregister(&ovs_internal_vport_ops);
+	rtnl_link_unregister(&internal_dev_link_ops);
+}
diff --git a/net/openvswitch/vport-internal_dev.h b/net/openvswitch/vport-internal_dev.h
index 9a7d30ecc6a2..0112d1b09d4b 100644
--- a/net/openvswitch/vport-internal_dev.h
+++ b/net/openvswitch/vport-internal_dev.h
@@ -1,19 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2007-2011 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #ifndef VPORT_INTERNAL_DEV_H
@@ -24,5 +11,7 @@
 
 int ovs_is_internal_dev(const struct net_device *);
 struct vport *ovs_internal_dev_get_vport(struct net_device *);
+int ovs_internal_dev_rtnl_link_register(void);
+void ovs_internal_dev_rtnl_link_unregister(void);
 
 #endif /* vport-internal_dev.h */
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 5982f3f62835..91a11067e458 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -25,16 +12,25 @@
 #include <linux/llc.h>
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
+#include <linux/openvswitch.h>
+#include <linux/export.h>
 
-#include <net/llc.h>
+#include <net/ip_tunnels.h>
+#include <net/rtnetlink.h>
 
 #include "datapath.h"
+#include "vport.h"
 #include "vport-internal_dev.h"
 #include "vport-netdev.h"
 
+static struct vport_ops ovs_netdev_vport_ops;
+
 /* Must be called with rcu_read_lock. */
-static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
+static void netdev_port_receive(struct sk_buff *skb)
 {
+	struct vport *vport;
+
+	vport = ovs_netdev_get_vport(skb->dev);
 	if (unlikely(!vport))
 		goto error;
 
@@ -48,12 +44,11 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
 	if (unlikely(!skb))
 		return;
 
-	skb_push(skb, ETH_HLEN);
-	ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+	if (skb->dev->type == ARPHRD_ETHER)
+		skb_push_rcsum(skb, ETH_HLEN);
 
-	ovs_vport_receive(vport, skb, NULL);
+	ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
 	return;
-
 error:
 	kfree_skb(skb);
 }
@@ -62,144 +57,160 @@ error:
 static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
 {
 	struct sk_buff *skb = *pskb;
-	struct vport *vport;
 
 	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
 		return RX_HANDLER_PASS;
 
-	vport = ovs_netdev_get_vport(skb->dev);
-
-	netdev_port_receive(vport, skb);
-
+	netdev_port_receive(skb);
 	return RX_HANDLER_CONSUMED;
 }
 
-static struct vport *netdev_create(const struct vport_parms *parms)
+static struct net_device *get_dpdev(const struct datapath *dp)
 {
-	struct vport *vport;
-	struct netdev_vport *netdev_vport;
-	int err;
+	struct vport *local;
 
-	vport = ovs_vport_alloc(sizeof(struct netdev_vport),
-				&ovs_netdev_vport_ops, parms);
-	if (IS_ERR(vport)) {
-		err = PTR_ERR(vport);
-		goto error;
-	}
+	local = ovs_vport_ovsl(dp, OVSP_LOCAL);
+	return local->dev;
+}
 
-	netdev_vport = netdev_vport_priv(vport);
+struct vport *ovs_netdev_link(struct vport *vport, const char *name)
+{
+	int err;
 
-	netdev_vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name);
-	if (!netdev_vport->dev) {
+	vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), name);
+	if (!vport->dev) {
 		err = -ENODEV;
 		goto error_free_vport;
 	}
-
-	if (netdev_vport->dev->flags & IFF_LOOPBACK ||
-	    netdev_vport->dev->type != ARPHRD_ETHER ||
-	    ovs_is_internal_dev(netdev_vport->dev)) {
+	/* Ensure that the device exists and that the provided
+	 * name is not one of its aliases.
+	 */
+	if (strcmp(name, ovs_vport_name(vport))) {
+		err = -ENODEV;
+		goto error_put;
+	}
+	netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL);
+	if (vport->dev->flags & IFF_LOOPBACK ||
+	    (vport->dev->type != ARPHRD_ETHER &&
+	     vport->dev->type != ARPHRD_NONE) ||
+	    ovs_is_internal_dev(vport->dev)) {
 		err = -EINVAL;
 		goto error_put;
 	}
 
 	rtnl_lock();
-	err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
-					 vport);
+	err = netdev_master_upper_dev_link(vport->dev,
+					   get_dpdev(vport->dp),
+					   NULL, NULL, NULL);
 	if (err)
 		goto error_unlock;
 
-	dev_set_promiscuity(netdev_vport->dev, 1);
-	netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
+	err = netdev_rx_handler_register(vport->dev, netdev_frame_hook,
+					 vport);
+	if (err)
+		goto error_master_upper_dev_unlink;
+
+	dev_disable_lro(vport->dev);
+	dev_set_promiscuity(vport->dev, 1);
+	vport->dev->priv_flags |= IFF_OVS_DATAPATH;
 	rtnl_unlock();
 
 	return vport;
 
+error_master_upper_dev_unlink:
+	netdev_upper_dev_unlink(vport->dev, get_dpdev(vport->dp));
 error_unlock:
 	rtnl_unlock();
 error_put:
-	dev_put(netdev_vport->dev);
+	netdev_put(vport->dev, &vport->dev_tracker);
 error_free_vport:
 	ovs_vport_free(vport);
-error:
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL_GPL(ovs_netdev_link);
 
-static void free_port_rcu(struct rcu_head *rcu)
+static struct vport *netdev_create(const struct vport_parms *parms)
 {
-	struct netdev_vport *netdev_vport = container_of(rcu,
-					struct netdev_vport, rcu);
+	struct vport *vport;
+
+	vport = ovs_vport_alloc(0, &ovs_netdev_vport_ops, parms);
+	if (IS_ERR(vport))
+		return vport;
 
-	dev_put(netdev_vport->dev);
-	ovs_vport_free(vport_from_priv(netdev_vport));
+	return ovs_netdev_link(vport, parms->name);
 }
 
-static void netdev_destroy(struct vport *vport)
+static void vport_netdev_free(struct rcu_head *rcu)
 {
-	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+	struct vport *vport = container_of(rcu, struct vport, rcu);
 
-	rtnl_lock();
-	netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
-	netdev_rx_handler_unregister(netdev_vport->dev);
-	dev_set_promiscuity(netdev_vport->dev, -1);
-	rtnl_unlock();
-
-	call_rcu(&netdev_vport->rcu, free_port_rcu);
+	netdev_put(vport->dev, &vport->dev_tracker);
+	ovs_vport_free(vport);
 }
 
-const char *ovs_netdev_get_name(const struct vport *vport)
+void ovs_netdev_detach_dev(struct vport *vport)
 {
-	const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
-	return netdev_vport->dev->name;
+	ASSERT_RTNL();
+	vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
+	netdev_rx_handler_unregister(vport->dev);
+	netdev_upper_dev_unlink(vport->dev,
+				netdev_master_upper_dev_get(vport->dev));
+	dev_set_promiscuity(vport->dev, -1);
 }
 
-static unsigned int packet_length(const struct sk_buff *skb)
+static void netdev_destroy(struct vport *vport)
 {
-	unsigned int length = skb->len - ETH_HLEN;
-
-	if (skb->protocol == htons(ETH_P_8021Q))
-		length -= VLAN_HLEN;
+	rtnl_lock();
+	if (netif_is_ovs_port(vport->dev))
+		ovs_netdev_detach_dev(vport);
+	rtnl_unlock();
 
-	return length;
+	call_rcu(&vport->rcu, vport_netdev_free);
 }
 
-static int netdev_send(struct vport *vport, struct sk_buff *skb)
+void ovs_netdev_tunnel_destroy(struct vport *vport)
 {
-	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
-	int mtu = netdev_vport->dev->mtu;
-	int len;
-
-	if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
-		net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
-				     netdev_vport->dev->name,
-				     packet_length(skb), mtu);
-		goto drop;
-	}
-
-	skb->dev = netdev_vport->dev;
-	len = skb->len;
-	dev_queue_xmit(skb);
+	rtnl_lock();
+	if (netif_is_ovs_port(vport->dev))
+		ovs_netdev_detach_dev(vport);
 
-	return len;
+	/* We can be invoked by both explicit vport deletion and
+	 * underlying netdev deregistration; delete the link only
+	 * if it's not already shutting down.
+	 */
+	if (vport->dev->reg_state == NETREG_REGISTERED)
+		rtnl_delete_link(vport->dev, 0, NULL);
+	netdev_put(vport->dev, &vport->dev_tracker);
+	vport->dev = NULL;
+	rtnl_unlock();
 
-drop:
-	kfree_skb(skb);
-	return 0;
+	call_rcu(&vport->rcu, vport_netdev_free);
 }
+EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy);
 
 /* Returns null if this device is not attached to a datapath. */
 struct vport *ovs_netdev_get_vport(struct net_device *dev)
 {
-	if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
+	if (likely(netif_is_ovs_port(dev)))
 		return (struct vport *)
 			rcu_dereference_rtnl(dev->rx_handler_data);
 	else
 		return NULL;
 }
 
-const struct vport_ops ovs_netdev_vport_ops = {
+static struct vport_ops ovs_netdev_vport_ops = {
 	.type		= OVS_VPORT_TYPE_NETDEV,
 	.create		= netdev_create,
 	.destroy	= netdev_destroy,
-	.get_name	= ovs_netdev_get_name,
-	.send		= netdev_send,
+	.send		= dev_queue_xmit,
 };
+
+int __init ovs_netdev_init(void)
+{
+	return ovs_vport_ops_register(&ovs_netdev_vport_ops);
+}
+
+void ovs_netdev_exit(void)
+{
+	ovs_vport_ops_unregister(&ovs_netdev_vport_ops);
+}
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index dd298b5c5cdb..c5d83a43bfc4 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -1,19 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2007-2011 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #ifndef VPORT_NETDEV_H
@@ -26,18 +13,11 @@
 
 struct vport *ovs_netdev_get_vport(struct net_device *dev);
 
-struct netdev_vport {
-	struct rcu_head rcu;
+struct vport *ovs_netdev_link(struct vport *vport, const char *name);
+void ovs_netdev_detach_dev(struct vport *);
 
-	struct net_device *dev;
-};
-
-static inline struct netdev_vport *
-netdev_vport_priv(const struct vport *vport)
-{
-	return vport_priv(vport);
-}
-
-const char *ovs_netdev_get_name(const struct vport *);
+int __init ovs_netdev_init(void);
+void ovs_netdev_exit(void);
 
+void ovs_netdev_tunnel_destroy(struct vport *vport);
 #endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
new file mode 100644
index 000000000000..0b881b043bcf
--- /dev/null
+++ b/net/openvswitch/vport-vxlan.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ * Copyright (c) 2013 Cisco Systems, Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/openvswitch.h>
+#include <linux/module.h>
+#include <net/udp.h>
+#include <net/ip_tunnels.h>
+#include <net/rtnetlink.h>
+#include <net/vxlan.h>
+
+#include "datapath.h"
+#include "vport.h"
+#include "vport-netdev.h"
+
+static struct vport_ops ovs_vxlan_netdev_vport_ops;
+
+static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
+{
+	struct vxlan_dev *vxlan = netdev_priv(vport->dev);
+	__be16 dst_port = vxlan->cfg.dst_port;
+
+	if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
+		return -EMSGSIZE;
+
+	if (vxlan->cfg.flags & VXLAN_F_GBP) {
+		struct nlattr *exts;
+
+		exts = nla_nest_start_noflag(skb, OVS_TUNNEL_ATTR_EXTENSION);
+		if (!exts)
+			return -EMSGSIZE;
+
+		if (vxlan->cfg.flags & VXLAN_F_GBP &&
+		    nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
+			return -EMSGSIZE;
+
+		nla_nest_end(skb, exts);
+	}
+
+	return 0;
+}
+
+static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX + 1] = {
+	[OVS_VXLAN_EXT_GBP]	= { .type = NLA_FLAG, },
+};
+
+static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr,
+				struct vxlan_config *conf)
+{
+	struct nlattr *exts[OVS_VXLAN_EXT_MAX + 1];
+	int err;
+
+	if (nla_len(attr) < sizeof(struct nlattr))
+		return -EINVAL;
+
+	err = nla_parse_nested_deprecated(exts, OVS_VXLAN_EXT_MAX, attr,
+					  exts_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (exts[OVS_VXLAN_EXT_GBP])
+		conf->flags |= VXLAN_F_GBP;
+
+	return 0;
+}
+
+static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
+{
+	struct net *net = ovs_dp_get_net(parms->dp);
+	struct nlattr *options = parms->options;
+	struct net_device *dev;
+	struct vport *vport;
+	struct nlattr *a;
+	int err;
+	struct vxlan_config conf = {
+		.no_share = true,
+		.flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX,
+		/* Don't restrict the packets that can be sent by MTU */
+		.mtu = IP_MAX_MTU,
+	};
+
+	if (!options) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
+	if (a && nla_len(a) == sizeof(u16)) {
+		conf.dst_port = htons(nla_get_u16(a));
+	} else {
+		/* Require destination port from userspace. */
+		err = -EINVAL;
+		goto error;
+	}
+
+	vport = ovs_vport_alloc(0, &ovs_vxlan_netdev_vport_ops, parms);
+	if (IS_ERR(vport))
+		return vport;
+
+	a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
+	if (a) {
+		err = vxlan_configure_exts(vport, a, &conf);
+		if (err) {
+			ovs_vport_free(vport);
+			goto error;
+		}
+	}
+
+	rtnl_lock();
+	dev = vxlan_dev_create(net, parms->name, NET_NAME_USER, &conf);
+	if (IS_ERR(dev)) {
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		return ERR_CAST(dev);
+	}
+
+	err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
+	if (err < 0) {
+		rtnl_delete_link(dev, 0, NULL);
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		goto error;
+	}
+
+	rtnl_unlock();
+	return vport;
+error:
+	return ERR_PTR(err);
+}
+
+static struct vport *vxlan_create(const struct vport_parms *parms)
+{
+	struct vport *vport;
+
+	vport = vxlan_tnl_create(parms);
+	if (IS_ERR(vport))
+		return vport;
+
+	return ovs_netdev_link(vport, parms->name);
+}
+
+static struct vport_ops ovs_vxlan_netdev_vport_ops = {
+	.type			= OVS_VPORT_TYPE_VXLAN,
+	.create			= vxlan_create,
+	.destroy		= ovs_netdev_tunnel_destroy,
+	.get_options		= vxlan_get_options,
+	.send			= dev_queue_xmit,
+};
+
+static int __init ovs_vxlan_tnl_init(void)
+{
+	return ovs_vport_ops_register(&ovs_vxlan_netdev_vport_ops);
+}
+
+static void __exit ovs_vxlan_tnl_exit(void)
+{
+	ovs_vport_ops_unregister(&ovs_vxlan_netdev_vport_ops);
+}
+
+module_init(ovs_vxlan_tnl_init);
+module_exit(ovs_vxlan_tnl_exit);
+
+MODULE_DESCRIPTION("OVS: VXLAN switching port");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("vport-type-4");
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index d4c7fa04ce08..6bbbc16ab778 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
+ * Copyright (c) 2007-2014 Nicira, Inc.
  */
 
 #include <linux/etherdevice.h>
@@ -28,21 +15,13 @@
 #include <linux/rtnetlink.h>
 #include <linux/compat.h>
 #include <net/net_namespace.h>
+#include <linux/module.h>
 
 #include "datapath.h"
 #include "vport.h"
 #include "vport-internal_dev.h"
 
-/* List of statically compiled vport implementations.  Don't forget to also
- * add yours to the list at the bottom of vport.h. */
-static const struct vport_ops *vport_ops_list[] = {
-	&ovs_netdev_vport_ops,
-	&ovs_internal_vport_ops,
-
-#ifdef CONFIG_OPENVSWITCH_GRE
-	&ovs_gre_vport_ops,
-#endif
-};
+static LIST_HEAD(vport_ops_list);
 
 /* Protected by RCU read lock for reading, ovs_mutex for writing. */
 static struct hlist_head *dev_table;
@@ -55,7 +34,7 @@ static struct hlist_head *dev_table;
  */
 int ovs_vport_init(void)
 {
-	dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
+	dev_table = kcalloc(VPORT_HASH_BUCKETS, sizeof(struct hlist_head),
 			    GFP_KERNEL);
 	if (!dev_table)
 		return -ENOMEM;
@@ -73,26 +52,54 @@ void ovs_vport_exit(void)
 	kfree(dev_table);
 }
 
-static struct hlist_head *hash_bucket(struct net *net, const char *name)
+static struct hlist_head *hash_bucket(const struct net *net, const char *name)
 {
 	unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
 	return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
 }
 
+int __ovs_vport_ops_register(struct vport_ops *ops)
+{
+	int err = -EEXIST;
+	struct vport_ops *o;
+
+	ovs_lock();
+	list_for_each_entry(o, &vport_ops_list, list)
+		if (ops->type == o->type)
+			goto errout;
+
+	list_add_tail(&ops->list, &vport_ops_list);
+	err = 0;
+errout:
+	ovs_unlock();
+	return err;
+}
+EXPORT_SYMBOL_GPL(__ovs_vport_ops_register);
+
+void ovs_vport_ops_unregister(struct vport_ops *ops)
+{
+	ovs_lock();
+	list_del(&ops->list);
+	ovs_unlock();
+}
+EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister);
+
 /**
  *	ovs_vport_locate - find a port that has already been created
  *
+ * @net: network namespace
  * @name: name of port to find
  *
  * Must be called with ovs or RCU read lock.
  */
-struct vport *ovs_vport_locate(struct net *net, const char *name)
+struct vport *ovs_vport_locate(const struct net *net, const char *name)
 {
 	struct hlist_head *bucket = hash_bucket(net, name);
 	struct vport *vport;
 
-	hlist_for_each_entry_rcu(vport, bucket, hash_node)
-		if (!strcmp(name, vport->ops->get_name(vport)) &&
+	hlist_for_each_entry_rcu(vport, bucket, hash_node,
+				 lockdep_ovsl_is_held())
+		if (!strcmp(name, ovs_vport_name(vport)) &&
 		    net_eq(ovs_dp_get_net(vport->dp), net))
 			return vport;
 
@@ -104,17 +111,20 @@ struct vport *ovs_vport_locate(struct net *net, const char *name)
  *
  * @priv_size: Size of private data area to allocate.
  * @ops: vport device ops
+ * @parms: information about new vport.
  *
  * Allocate and initialize a new vport defined by @ops.  The vport will contain
  * a private data area of size @priv_size that can be accessed using
- * vport_priv().  vports that are no longer needed should be released with
+ * vport_priv().  Some parameters of the vport will be initialized from @parms.
+ * @vports that are no longer needed should be released with
  * vport_free().
  */
 struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
-			  const struct vport_parms *parms)
+			      const struct vport_parms *parms)
 {
 	struct vport *vport;
 	size_t alloc_size;
+	int err;
 
 	alloc_size = sizeof(struct vport);
 	if (priv_size) {
@@ -126,22 +136,31 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
 	if (!vport)
 		return ERR_PTR(-ENOMEM);
 
+	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
+	if (!vport->upcall_stats) {
+		err = -ENOMEM;
+		goto err_kfree_vport;
+	}
+
 	vport->dp = parms->dp;
 	vport->port_no = parms->port_no;
-	vport->upcall_portid = parms->upcall_portid;
 	vport->ops = ops;
 	INIT_HLIST_NODE(&vport->dp_hash_node);
 
-	vport->percpu_stats = alloc_percpu(struct pcpu_tstats);
-	if (!vport->percpu_stats) {
-		kfree(vport);
-		return ERR_PTR(-ENOMEM);
+	if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
+		err = -EINVAL;
+		goto err_free_percpu;
 	}
 
-	spin_lock_init(&vport->stats_lock);
-
 	return vport;
+
+err_free_percpu:
+	free_percpu(vport->upcall_stats);
+err_kfree_vport:
+	kfree(vport);
+	return ERR_PTR(err);
 }
+EXPORT_SYMBOL_GPL(ovs_vport_alloc);
 
 /**
  *	ovs_vport_free - uninitialize and free vport
@@ -155,9 +174,25 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
  */
 void ovs_vport_free(struct vport *vport)
 {
-	free_percpu(vport->percpu_stats);
+	/* vport is freed from RCU callback or error path, Therefore
+	 * it is safe to use raw dereference.
+	 */
+	kfree(rcu_dereference_raw(vport->upcall_portids));
+	free_percpu(vport->upcall_stats);
 	kfree(vport);
 }
+EXPORT_SYMBOL_GPL(ovs_vport_free);
+
+static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms)
+{
+	struct vport_ops *ops;
+
+	list_for_each_entry(ops, &vport_ops_list, list)
+		if (ops->type == parms->type)
+			return ops;
+
+	return NULL;
+}
 
 /**
  *	ovs_vport_add - add vport device (for kernel callers)
@@ -169,38 +204,47 @@ void ovs_vport_free(struct vport *vport)
  */
 struct vport *ovs_vport_add(const struct vport_parms *parms)
 {
+	struct vport_ops *ops;
 	struct vport *vport;
-	int err = 0;
-	int i;
 
-	for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
-		if (vport_ops_list[i]->type == parms->type) {
-			struct hlist_head *bucket;
+	ops = ovs_vport_lookup(parms);
+	if (ops) {
+		struct hlist_head *bucket;
 
-			vport = vport_ops_list[i]->create(parms);
-			if (IS_ERR(vport)) {
-				err = PTR_ERR(vport);
-				goto out;
-			}
+		if (!try_module_get(ops->owner))
+			return ERR_PTR(-EAFNOSUPPORT);
 
-			bucket = hash_bucket(ovs_dp_get_net(vport->dp),
-					     vport->ops->get_name(vport));
-			hlist_add_head_rcu(&vport->hash_node, bucket);
+		vport = ops->create(parms);
+		if (IS_ERR(vport)) {
+			module_put(ops->owner);
 			return vport;
 		}
-	}
 
-	err = -EAFNOSUPPORT;
+		bucket = hash_bucket(ovs_dp_get_net(vport->dp),
+				     ovs_vport_name(vport));
+		hlist_add_head_rcu(&vport->hash_node, bucket);
+		return vport;
+	}
 
-out:
-	return ERR_PTR(err);
+	/* Unlock to attempt module load and return -EAGAIN if load
+	 * was successful as we need to restart the port addition
+	 * workflow.
+	 */
+	ovs_unlock();
+	request_module("vport-type-%d", parms->type);
+	ovs_lock();
+
+	if (!ovs_vport_lookup(parms))
+		return ERR_PTR(-EAFNOSUPPORT);
+	else
+		return ERR_PTR(-EAGAIN);
 }
 
 /**
  *	ovs_vport_set_options - modify existing vport device (for kernel callers)
  *
  * @vport: vport to modify.
- * @port: New configuration.
+ * @options: New configuration.
  *
  * Modifies an existing device with the specified configuration (which is
  * dependent on device type).  ovs_mutex must be held.
@@ -217,15 +261,13 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
  *
  * @vport: vport to delete.
  *
- * Detaches @vport from its datapath and destroys it.  It is possible to fail
- * for reasons such as lack of memory.  ovs_mutex must be held.
+ * Detaches @vport from its datapath and destroys it.  ovs_mutex must
+ * be held.
  */
 void ovs_vport_del(struct vport *vport)
 {
-	ASSERT_OVSL();
-
 	hlist_del_rcu(&vport->hash_node);
-
+	module_put(vport->ops->owner);
 	vport->ops->destroy(vport);
 }
 
@@ -241,45 +283,69 @@ void ovs_vport_del(struct vport *vport)
  */
 void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
 {
-	int i;
-
-	memset(stats, 0, sizeof(*stats));
-
-	/* We potentially have 2 sources of stats that need to be combined:
-	 * those we have collected (split into err_stats and percpu_stats) from
-	 * set_stats() and device error stats from netdev->get_stats() (for
-	 * errors that happen  downstream and therefore aren't reported through
-	 * our vport_record_error() function).
-	 * Stats from first source are reported by ovs (OVS_VPORT_ATTR_STATS).
-	 * netdev-stats can be directly read over netlink-ioctl.
-	 */
-
-	spin_lock_bh(&vport->stats_lock);
+	const struct rtnl_link_stats64 *dev_stats;
+	struct rtnl_link_stats64 temp;
+
+	dev_stats = dev_get_stats(vport->dev, &temp);
+	stats->rx_errors  = dev_stats->rx_errors;
+	stats->tx_errors  = dev_stats->tx_errors;
+	stats->tx_dropped = dev_stats->tx_dropped;
+	stats->rx_dropped = dev_stats->rx_dropped;
+
+	stats->rx_bytes	  = dev_stats->rx_bytes;
+	stats->rx_packets = dev_stats->rx_packets;
+	stats->tx_bytes	  = dev_stats->tx_bytes;
+	stats->tx_packets = dev_stats->tx_packets;
+}
 
-	stats->rx_errors	= vport->err_stats.rx_errors;
-	stats->tx_errors	= vport->err_stats.tx_errors;
-	stats->tx_dropped	= vport->err_stats.tx_dropped;
-	stats->rx_dropped	= vport->err_stats.rx_dropped;
+/**
+ *	ovs_vport_get_upcall_stats - retrieve upcall stats
+ *
+ * @vport: vport from which to retrieve the stats.
+ * @skb: sk_buff where upcall stats should be appended.
+ *
+ * Retrieves upcall stats for the given device.
+ *
+ * Must be called with ovs_mutex or rcu_read_lock.
+ */
+int ovs_vport_get_upcall_stats(struct vport *vport, struct sk_buff *skb)
+{
+	struct nlattr *nla;
+	int i;
 
-	spin_unlock_bh(&vport->stats_lock);
+	__u64 tx_success = 0;
+	__u64 tx_fail = 0;
 
 	for_each_possible_cpu(i) {
-		const struct pcpu_tstats *percpu_stats;
-		struct pcpu_tstats local_stats;
+		const struct vport_upcall_stats_percpu *stats;
 		unsigned int start;
 
-		percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
-
+		stats = per_cpu_ptr(vport->upcall_stats, i);
 		do {
-			start = u64_stats_fetch_begin_bh(&percpu_stats->syncp);
-			local_stats = *percpu_stats;
-		} while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start));
-
-		stats->rx_bytes		+= local_stats.rx_bytes;
-		stats->rx_packets	+= local_stats.rx_packets;
-		stats->tx_bytes		+= local_stats.tx_bytes;
-		stats->tx_packets	+= local_stats.tx_packets;
+			start = u64_stats_fetch_begin(&stats->syncp);
+			tx_success += u64_stats_read(&stats->n_success);
+			tx_fail += u64_stats_read(&stats->n_fail);
+		} while (u64_stats_fetch_retry(&stats->syncp, start));
+	}
+
+	nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_UPCALL_STATS);
+	if (!nla)
+		return -EMSGSIZE;
+
+	if (nla_put_u64_64bit(skb, OVS_VPORT_UPCALL_ATTR_SUCCESS, tx_success,
+			      OVS_VPORT_ATTR_PAD)) {
+		nla_nest_cancel(skb, nla);
+		return -EMSGSIZE;
 	}
+
+	if (nla_put_u64_64bit(skb, OVS_VPORT_UPCALL_ATTR_FAIL, tx_fail,
+			      OVS_VPORT_ATTR_PAD)) {
+		nla_nest_cancel(skb, nla);
+		return -EMSGSIZE;
+	}
+	nla_nest_end(skb, nla);
+
+	return 0;
 }
 
 /**
@@ -306,7 +372,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
 	if (!vport->ops->get_options)
 		return 0;
 
-	nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
+	nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_OPTIONS);
 	if (!nla)
 		return -EMSGSIZE;
 
@@ -321,105 +387,196 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
 }
 
 /**
- *	ovs_vport_receive - pass up received packet to the datapath for processing
+ *	ovs_vport_set_upcall_portids - set upcall portids of @vport.
  *
- * @vport: vport that received the packet
- * @skb: skb that was received
+ * @vport: vport to modify.
+ * @ids: new configuration, an array of port ids.
  *
- * Must be called with rcu_read_lock.  The packet cannot be shared and
- * skb->data should point to the Ethernet header.
+ * Sets the vport's upcall_portids to @ids.
+ *
+ * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed
+ * as an array of U32.
+ *
+ * Must be called with ovs_mutex.
  */
-void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
-		       struct ovs_key_ipv4_tunnel *tun_key)
+int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids)
 {
-	struct pcpu_tstats *stats;
+	struct vport_portids *old, *vport_portids;
+
+	if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
+		return -EINVAL;
 
-	stats = this_cpu_ptr(vport->percpu_stats);
-	u64_stats_update_begin(&stats->syncp);
-	stats->rx_packets++;
-	stats->rx_bytes += skb->len;
-	u64_stats_update_end(&stats->syncp);
+	old = ovsl_dereference(vport->upcall_portids);
+
+	vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids),
+				GFP_KERNEL);
+	if (!vport_portids)
+		return -ENOMEM;
 
-	OVS_CB(skb)->tun_key = tun_key;
-	ovs_dp_process_received_packet(vport, skb);
+	vport_portids->n_ids = nla_len(ids) / sizeof(u32);
+	vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids);
+	nla_memcpy(vport_portids->ids, ids, nla_len(ids));
+
+	rcu_assign_pointer(vport->upcall_portids, vport_portids);
+
+	if (old)
+		kfree_rcu(old, rcu);
+	return 0;
 }
 
 /**
- *	ovs_vport_send - send a packet on a device
+ *	ovs_vport_get_upcall_portids - get the upcall_portids of @vport.
  *
- * @vport: vport on which to send the packet
- * @skb: skb to send
+ * @vport: vport from which to retrieve the portids.
+ * @skb: sk_buff where portids should be appended.
  *
- * Sends the given packet and returns the length of data sent.  Either ovs
- * lock or rcu_read_lock must be held.
+ * Retrieves the configuration of the given vport, appending the
+ * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall
+ * portids to @skb.
+ *
+ * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room.
+ * If an error occurs, @skb is left unmodified.  Must be called with
+ * ovs_mutex or rcu_read_lock.
  */
-int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
+int ovs_vport_get_upcall_portids(const struct vport *vport,
+				 struct sk_buff *skb)
 {
-	int sent = vport->ops->send(vport, skb);
-
-	if (likely(sent > 0)) {
-		struct pcpu_tstats *stats;
+	struct vport_portids *ids;
 
-		stats = this_cpu_ptr(vport->percpu_stats);
-
-		u64_stats_update_begin(&stats->syncp);
-		stats->tx_packets++;
-		stats->tx_bytes += sent;
-		u64_stats_update_end(&stats->syncp);
-	} else if (sent < 0) {
-		ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
-		kfree_skb(skb);
-	} else
-		ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
+	ids = rcu_dereference_ovsl(vport->upcall_portids);
 
-	return sent;
+	if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS)
+		return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID,
+			       ids->n_ids * sizeof(u32), (void *)ids->ids);
+	else
+		return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]);
 }
 
 /**
- *	ovs_vport_record_error - indicate device error to generic stats layer
+ *	ovs_vport_find_upcall_portid - find the upcall portid to send upcall.
  *
- * @vport: vport that encountered the error
- * @err_type: one of enum vport_err_type types to indicate the error type
+ * @vport: vport from which the missed packet is received.
+ * @skb: skb that the missed packet was received.
  *
- * If using the vport generic stats layer indicate that an error of the given
- * type has occurred.
+ * Uses the skb_get_hash() to select the upcall portid to send the
+ * upcall.
+ *
+ * Returns the portid of the target socket.  Must be called with rcu_read_lock.
  */
-void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type)
+u32 ovs_vport_find_upcall_portid(const struct vport *vport,
+				 struct sk_buff *skb)
 {
-	spin_lock(&vport->stats_lock);
+	struct vport_portids *ids;
+	u32 ids_index;
+	u32 hash;
 
-	switch (err_type) {
-	case VPORT_E_RX_DROPPED:
-		vport->err_stats.rx_dropped++;
-		break;
+	ids = rcu_dereference(vport->upcall_portids);
 
-	case VPORT_E_RX_ERROR:
-		vport->err_stats.rx_errors++;
-		break;
+	/* If there is only one portid, select it in the fast-path. */
+	if (ids->n_ids == 1)
+		return ids->ids[0];
 
-	case VPORT_E_TX_DROPPED:
-		vport->err_stats.tx_dropped++;
-		break;
+	hash = skb_get_hash(skb);
+	ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
+	return ids->ids[ids_index];
+}
 
-	case VPORT_E_TX_ERROR:
-		vport->err_stats.tx_errors++;
-		break;
+/**
+ *	ovs_vport_receive - pass up received packet to the datapath for processing
+ *
+ * @vport: vport that received the packet
+ * @skb: skb that was received
+ * @tun_info: tunnel (if any) that carried packet
+ *
+ * Must be called with rcu_read_lock.  The packet cannot be shared and
+ * skb->data should point to the Ethernet header.
+ */
+int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
+		      const struct ip_tunnel_info *tun_info)
+{
+	struct sw_flow_key key;
+	int error;
+
+	OVS_CB(skb)->input_vport = vport;
+	OVS_CB(skb)->mru = 0;
+	OVS_CB(skb)->cutlen = 0;
+	OVS_CB(skb)->probability = 0;
+	OVS_CB(skb)->upcall_pid = 0;
+	if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
+		u32 mark;
+
+		mark = skb->mark;
+		skb_scrub_packet(skb, true);
+		skb->mark = mark;
+		tun_info = NULL;
 	}
 
-	spin_unlock(&vport->stats_lock);
+	/* Extract flow from 'skb' into 'key'. */
+	error = ovs_flow_key_extract(tun_info, skb, &key);
+	if (unlikely(error)) {
+		kfree_skb(skb);
+		return error;
+	}
+	ovs_dp_process_packet(skb, &key);
+	return 0;
 }
 
-static void free_vport_rcu(struct rcu_head *rcu)
+static int packet_length(const struct sk_buff *skb,
+			 struct net_device *dev)
 {
-	struct vport *vport = container_of(rcu, struct vport, rcu);
+	int length = skb->len - dev->hard_header_len;
 
-	ovs_vport_free(vport);
+	if (!skb_vlan_tag_present(skb) &&
+	    eth_type_vlan(skb->protocol))
+		length -= VLAN_HLEN;
+
+	/* Don't subtract for multiple VLAN tags. Most (all?) drivers allow
+	 * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none
+	 * account for 802.1ad. e.g. is_skb_forwardable().
+	 */
+
+	return length > 0 ? length : 0;
 }
 
-void ovs_vport_deferred_free(struct vport *vport)
+void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
 {
-	if (!vport)
-		return;
+	int mtu = vport->dev->mtu;
+
+	switch (vport->dev->type) {
+	case ARPHRD_NONE:
+		if (mac_proto == MAC_PROTO_ETHERNET) {
+			skb_reset_network_header(skb);
+			skb_reset_mac_len(skb);
+			skb->protocol = htons(ETH_P_TEB);
+		} else if (mac_proto != MAC_PROTO_NONE) {
+			WARN_ON_ONCE(1);
+			goto drop;
+		}
+		break;
+	case ARPHRD_ETHER:
+		if (mac_proto != MAC_PROTO_ETHERNET)
+			goto drop;
+		break;
+	default:
+		goto drop;
+	}
+
+	if (unlikely(packet_length(skb, vport->dev) > mtu &&
+		     !skb_is_gso(skb))) {
+		vport->dev->stats.tx_errors++;
+		if (vport->dev->flags & IFF_UP)
+			net_warn_ratelimited("%s: dropped over-mtu packet: "
+					     "%d > %d\n", vport->dev->name,
+					     packet_length(skb, vport->dev),
+					     mtu);
+		goto drop;
+	}
+
+	skb->dev = vport->dev;
+	skb_clear_tstamp(skb);
+	vport->ops->send(skb);
+	return;
 
-	call_rcu(&vport->rcu, free_vport_rcu);
+drop:
+	kfree_skb(skb);
 }
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 376045c42f8b..9f67b9dd49f9 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -1,19 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
  */
 
 #ifndef VPORT_H
@@ -23,6 +10,7 @@
 #include <linux/list.h>
 #include <linux/netlink.h>
 #include <linux/openvswitch.h>
+#include <linux/reciprocal_div.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/u64_stats_sync.h>
@@ -32,12 +20,7 @@
 struct vport;
 struct vport_parms;
 
-/* The following definitions are for users of the vport subsytem: */
-
-/* The following definitions are for users of the vport subsytem: */
-struct vport_net {
-	struct vport __rcu *gre_vport;
-};
+/* The following definitions are for users of the vport subsystem: */
 
 int ovs_vport_init(void);
 void ovs_vport_exit(void);
@@ -45,52 +28,63 @@ void ovs_vport_exit(void);
 struct vport *ovs_vport_add(const struct vport_parms *);
 void ovs_vport_del(struct vport *);
 
-struct vport *ovs_vport_locate(struct net *net, const char *name);
+struct vport *ovs_vport_locate(const struct net *net, const char *name);
 
 void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
 
+int ovs_vport_get_upcall_stats(struct vport *vport, struct sk_buff *skb);
+
 int ovs_vport_set_options(struct vport *, struct nlattr *options);
 int ovs_vport_get_options(const struct vport *, struct sk_buff *);
 
-int ovs_vport_send(struct vport *, struct sk_buff *);
+int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);
+int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
+u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
 
-/* The following definitions are for implementers of vport devices: */
-
-struct vport_err_stats {
-	u64 rx_dropped;
-	u64 rx_errors;
-	u64 tx_dropped;
-	u64 tx_errors;
+/**
+ * struct vport_portids - array of netlink portids of a vport.
+ *                        must be protected by rcu.
+ * @rn_ids: The reciprocal value of @n_ids.
+ * @rcu: RCU callback head for deferred destruction.
+ * @n_ids: Size of @ids array.
+ * @ids: Array storing the Netlink socket pids to be used for packets received
+ * on this port that miss the flow table.
+ */
+struct vport_portids {
+	struct reciprocal_value rn_ids;
+	struct rcu_head rcu;
+	u32 n_ids;
+	u32 ids[];
 };
 
 /**
  * struct vport - one port within a datapath
- * @rcu: RCU callback head for deferred destruction.
+ * @dev: Pointer to net_device.
+ * @dev_tracker: refcount tracker for @dev reference
  * @dp: Datapath to which this port belongs.
- * @upcall_portid: The Netlink port to use for packets received on this port that
- * miss the flow table.
+ * @upcall_portids: RCU protected 'struct vport_portids'.
  * @port_no: Index into @dp's @ports array.
  * @hash_node: Element in @dev_table hash table in vport.c.
  * @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
  * @ops: Class structure.
- * @percpu_stats: Points to per-CPU statistics used and maintained by vport
- * @stats_lock: Protects @err_stats;
- * @err_stats: Points to error statistics used and maintained by vport
+ * @upcall_stats: Upcall stats of every ports.
+ * @detach_list: list used for detaching vport in net-exit call.
+ * @rcu: RCU callback head for deferred destruction.
  */
 struct vport {
-	struct rcu_head rcu;
+	struct net_device *dev;
+	netdevice_tracker dev_tracker;
 	struct datapath	*dp;
-	u32 upcall_portid;
+	struct vport_portids __rcu *upcall_portids;
 	u16 port_no;
 
 	struct hlist_node hash_node;
 	struct hlist_node dp_hash_node;
 	const struct vport_ops *ops;
+	struct vport_upcall_stats_percpu __percpu *upcall_stats;
 
-	struct pcpu_tstats __percpu *percpu_stats;
-
-	spinlock_t stats_lock;
-	struct vport_err_stats err_stats;
+	struct list_head detach_list;
+	struct rcu_head rcu;
 };
 
 /**
@@ -100,18 +94,22 @@ struct vport {
  * @type: New vport's type.
  * @options: %OVS_VPORT_ATTR_OPTIONS attribute from Netlink message, %NULL if
  * none was supplied.
+ * @desired_ifindex: New vport's ifindex.
  * @dp: New vport's datapath.
  * @port_no: New vport's port number.
+ * @upcall_portids: %OVS_VPORT_ATTR_UPCALL_PID attribute from Netlink message,
+ * %NULL if none was supplied.
  */
 struct vport_parms {
 	const char *name;
 	enum ovs_vport_type type;
+	int desired_ifindex;
 	struct nlattr *options;
 
 	/* For ovs_vport_alloc(). */
 	struct datapath *dp;
 	u16 port_no;
-	u32 upcall_portid;
+	struct nlattr *upcall_portids;
 };
 
 /**
@@ -127,9 +125,10 @@ struct vport_parms {
  * @get_options: Appends vport-specific attributes for the configuration of an
  * existing vport to a &struct sk_buff.  May be %NULL for a vport that does not
  * have any configuration.
- * @get_name: Get the device's name.
- * @send: Send a packet on the device.  Returns the length of the packet sent,
+ * @send: Send a packet on the device.
  * zero for dropped packets or negative for error.
+ * @owner: Module that implements this vport type.
+ * @list: List entry in the global list of vport types.
  */
 struct vport_ops {
 	enum ovs_vport_type type;
@@ -141,23 +140,27 @@ struct vport_ops {
 	int (*set_options)(struct vport *, struct nlattr *);
 	int (*get_options)(const struct vport *, struct sk_buff *);
 
-	/* Called with rcu_read_lock or ovs_mutex. */
-	const char *(*get_name)(const struct vport *);
-
-	int (*send)(struct vport *, struct sk_buff *);
+	int (*send)(struct sk_buff *skb);
+	struct module *owner;
+	struct list_head list;
 };
 
-enum vport_err_type {
-	VPORT_E_RX_DROPPED,
-	VPORT_E_RX_ERROR,
-	VPORT_E_TX_DROPPED,
-	VPORT_E_TX_ERROR,
+/**
+ * struct vport_upcall_stats_percpu - per-cpu packet upcall statistics for
+ * a given vport.
+ * @syncp:     Synchronization point for 64bit counters.
+ * @n_success: Number of packets that upcall to userspace succeed.
+ * @n_fail:    Number of packets that upcall to userspace failed.
+ */
+struct vport_upcall_stats_percpu {
+	struct u64_stats_sync syncp;
+	u64_stats_t n_success;
+	u64_stats_t n_fail;
 };
 
 struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
 			      const struct vport_parms *);
 void ovs_vport_free(struct vport *);
-void ovs_vport_deferred_free(struct vport *vport);
 
 #define VPORT_ALIGN 8
 
@@ -166,13 +169,15 @@ void ovs_vport_deferred_free(struct vport *vport);
  *
  * @vport: vport to access
  *
+ * Returns: A void pointer to a private data allocated in the @vport.
+ *
  * If a nonzero size was passed in priv_size of vport_alloc() a private data
  * area was allocated on creation.  This allows that area to be accessed and
  * used for any purpose needed by the vport implementer.
  */
 static inline void *vport_priv(const struct vport *vport)
 {
-	return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
+	return (u8 *)(uintptr_t)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
 }
 
 /**
@@ -180,31 +185,34 @@ static inline void *vport_priv(const struct vport *vport)
  *
  * @priv: Start of private data area.
  *
+ * Returns: A reference to a vport structure that contains @priv.
+ *
  * It is sometimes useful to translate from a pointer to the private data
  * area to the vport, such as in the case where the private data pointer is
  * the result of a hash table lookup.  @priv must point to the start of the
  * private data area.
  */
-static inline struct vport *vport_from_priv(const void *priv)
+static inline struct vport *vport_from_priv(void *priv)
 {
-	return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
+	return (struct vport *)((u8 *)priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
 }
 
-void ovs_vport_receive(struct vport *, struct sk_buff *,
-		       struct ovs_key_ipv4_tunnel *);
-void ovs_vport_record_error(struct vport *, enum vport_err_type err_type);
+int ovs_vport_receive(struct vport *, struct sk_buff *,
+		      const struct ip_tunnel_info *);
 
-/* List of statically compiled vport implementations.  Don't forget to also
- * add yours to the list at the top of vport.c. */
-extern const struct vport_ops ovs_netdev_vport_ops;
-extern const struct vport_ops ovs_internal_vport_ops;
-extern const struct vport_ops ovs_gre_vport_ops;
-
-static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
-				      const void *start, unsigned int len)
+static inline const char *ovs_vport_name(struct vport *vport)
 {
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
-		skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
+	return vport->dev->name;
 }
 
+int __ovs_vport_ops_register(struct vport_ops *ops);
+#define ovs_vport_ops_register(ops)		\
+	({					\
+		(ops)->owner = THIS_MODULE;	\
+		__ovs_vport_ops_register(ops);	\
+	})
+
+void ovs_vport_ops_unregister(struct vport_ops *ops);
+void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto);
+
 #endif /* vport.h */