diff options
Diffstat (limited to 'net/openvswitch/flow.c')
| -rw-r--r-- | net/openvswitch/flow.c | 1848 |
1 files changed, 729 insertions, 1119 deletions
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 5c519b121e1b..66366982f604 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -1,23 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2007-2011 Nicira, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * Copyright (c) 2007-2014 Nicira, Inc. */ -#include "flow.h" -#include "datapath.h" #include <linux/uaccess.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> @@ -31,9 +16,13 @@ #include <linux/module.h> #include <linux/in.h> #include <linux/rcupdate.h> +#include <linux/cpumask.h> #include <linux/if_arp.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/mpls.h> +#include <linux/sctp.h> +#include <linux/smp.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/icmp.h> @@ -42,9 +31,146 @@ #include <net/ip.h> #include <net/ip_tunnels.h> #include <net/ipv6.h> +#include <net/mpls.h> #include <net/ndisc.h> +#include <net/nsh.h> +#include <net/pkt_cls.h> +#include <net/netfilter/nf_conntrack_zones.h> + +#include "conntrack.h" +#include "datapath.h" +#include "flow.h" +#include "flow_netlink.h" +#include "vport.h" + +u64 ovs_flow_used_time(unsigned long flow_jiffies) +{ + struct timespec64 cur_ts; + u64 cur_ms, idle_ms; + + ktime_get_ts64(&cur_ts); + idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); + cur_ms = (u64)(u32)cur_ts.tv_sec * MSEC_PER_SEC + + cur_ts.tv_nsec / NSEC_PER_MSEC; -static struct kmem_cache *flow_cache; + return cur_ms - idle_ms; +} + +#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF)) + +void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, + const struct sk_buff *skb) +{ + struct sw_flow_stats *stats; + unsigned int cpu = smp_processor_id(); + int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); + + stats = rcu_dereference(flow->stats[cpu]); + + /* Check if already have CPU-specific stats. */ + if (likely(stats)) { + spin_lock(&stats->lock); + /* Mark if we write on the pre-allocated stats. */ + if (cpu == 0 && unlikely(flow->stats_last_writer != cpu)) + flow->stats_last_writer = cpu; + } else { + stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */ + spin_lock(&stats->lock); + + /* If the current CPU is the only writer on the + * pre-allocated stats keep using them. + */ + if (unlikely(flow->stats_last_writer != cpu)) { + /* A previous locker may have already allocated the + * stats, so we need to check again. If CPU-specific + * stats were already allocated, we update the pre- + * allocated stats as we have already locked them. + */ + if (likely(flow->stats_last_writer != -1) && + likely(!rcu_access_pointer(flow->stats[cpu]))) { + /* Try to allocate CPU-specific stats. */ + struct sw_flow_stats *new_stats; + + new_stats = + kmem_cache_alloc_node(flow_stats_cache, + GFP_NOWAIT | + __GFP_THISNODE | + __GFP_NOWARN | + __GFP_NOMEMALLOC, + numa_node_id()); + if (likely(new_stats)) { + new_stats->used = jiffies; + new_stats->packet_count = 1; + new_stats->byte_count = len; + new_stats->tcp_flags = tcp_flags; + spin_lock_init(&new_stats->lock); + + rcu_assign_pointer(flow->stats[cpu], + new_stats); + cpumask_set_cpu(cpu, + flow->cpu_used_mask); + goto unlock; + } + } + flow->stats_last_writer = cpu; + } + } + + stats->used = jiffies; + stats->packet_count++; + stats->byte_count += len; + stats->tcp_flags |= tcp_flags; +unlock: + spin_unlock(&stats->lock); +} + +/* Must be called with rcu_read_lock or ovs_mutex. */ +void ovs_flow_stats_get(const struct sw_flow *flow, + struct ovs_flow_stats *ovs_stats, + unsigned long *used, __be16 *tcp_flags) +{ + unsigned int cpu; + + *used = 0; + *tcp_flags = 0; + memset(ovs_stats, 0, sizeof(*ovs_stats)); + + for_each_cpu(cpu, flow->cpu_used_mask) { + struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]); + + if (stats) { + /* Local CPU may write on non-local stats, so we must + * block bottom-halves here. + */ + spin_lock_bh(&stats->lock); + if (!*used || time_after(stats->used, *used)) + *used = stats->used; + *tcp_flags |= stats->tcp_flags; + ovs_stats->n_packets += stats->packet_count; + ovs_stats->n_bytes += stats->byte_count; + spin_unlock_bh(&stats->lock); + } + } +} + +/* Called with ovs_mutex. */ +void ovs_flow_stats_clear(struct sw_flow *flow) +{ + unsigned int cpu; + + for_each_cpu(cpu, flow->cpu_used_mask) { + struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]); + + if (stats) { + spin_lock_bh(&stats->lock); + stats->used = 0; + stats->packet_count = 0; + stats->byte_count = 0; + stats->tcp_flags = 0; + spin_unlock_bh(&stats->lock); + } + } +} static int check_header(struct sk_buff *skb, int len) { @@ -102,49 +228,172 @@ static bool udphdr_ok(struct sk_buff *skb) sizeof(struct udphdr)); } +static bool sctphdr_ok(struct sk_buff *skb) +{ + return pskb_may_pull(skb, skb_transport_offset(skb) + + sizeof(struct sctphdr)); +} + static bool icmphdr_ok(struct sk_buff *skb) { return pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct icmphdr)); } -u64 ovs_flow_used_time(unsigned long flow_jiffies) +/** + * get_ipv6_ext_hdrs() - Parses packet and sets IPv6 extension header flags. + * + * @skb: buffer where extension header data starts in packet + * @nh: ipv6 header + * @ext_hdrs: flags are stored here + * + * OFPIEH12_UNREP is set if more than one of a given IPv6 extension header + * is unexpectedly encountered. (Two destination options headers may be + * expected and would not cause this bit to be set.) + * + * OFPIEH12_UNSEQ is set if IPv6 extension headers were not in the order + * preferred (but not required) by RFC 2460: + * + * When more than one extension header is used in the same packet, it is + * recommended that those headers appear in the following order: + * IPv6 header + * Hop-by-Hop Options header + * Destination Options header + * Routing header + * Fragment header + * Authentication header + * Encapsulating Security Payload header + * Destination Options header + * upper-layer header + */ +static void get_ipv6_ext_hdrs(struct sk_buff *skb, struct ipv6hdr *nh, + u16 *ext_hdrs) { - struct timespec cur_ts; - u64 cur_ms, idle_ms; + u8 next_type = nh->nexthdr; + unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); + int dest_options_header_count = 0; + + *ext_hdrs = 0; + + while (ipv6_ext_hdr(next_type)) { + struct ipv6_opt_hdr _hdr, *hp; + + switch (next_type) { + case IPPROTO_NONE: + *ext_hdrs |= OFPIEH12_NONEXT; + /* stop parsing */ + return; + + case IPPROTO_ESP: + if (*ext_hdrs & OFPIEH12_ESP) + *ext_hdrs |= OFPIEH12_UNREP; + if ((*ext_hdrs & ~(OFPIEH12_HOP | OFPIEH12_DEST | + OFPIEH12_ROUTER | IPPROTO_FRAGMENT | + OFPIEH12_AUTH | OFPIEH12_UNREP)) || + dest_options_header_count >= 2) { + *ext_hdrs |= OFPIEH12_UNSEQ; + } + *ext_hdrs |= OFPIEH12_ESP; + break; - ktime_get_ts(&cur_ts); - idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); - cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + - cur_ts.tv_nsec / NSEC_PER_MSEC; + case IPPROTO_AH: + if (*ext_hdrs & OFPIEH12_AUTH) + *ext_hdrs |= OFPIEH12_UNREP; + if ((*ext_hdrs & + ~(OFPIEH12_HOP | OFPIEH12_DEST | OFPIEH12_ROUTER | + IPPROTO_FRAGMENT | OFPIEH12_UNREP)) || + dest_options_header_count >= 2) { + *ext_hdrs |= OFPIEH12_UNSEQ; + } + *ext_hdrs |= OFPIEH12_AUTH; + break; - return cur_ms - idle_ms; -} + case IPPROTO_DSTOPTS: + if (dest_options_header_count == 0) { + if (*ext_hdrs & + ~(OFPIEH12_HOP | OFPIEH12_UNREP)) + *ext_hdrs |= OFPIEH12_UNSEQ; + *ext_hdrs |= OFPIEH12_DEST; + } else if (dest_options_header_count == 1) { + if (*ext_hdrs & + ~(OFPIEH12_HOP | OFPIEH12_DEST | + OFPIEH12_ROUTER | OFPIEH12_FRAG | + OFPIEH12_AUTH | OFPIEH12_ESP | + OFPIEH12_UNREP)) { + *ext_hdrs |= OFPIEH12_UNSEQ; + } + } else { + *ext_hdrs |= OFPIEH12_UNREP; + } + dest_options_header_count++; + break; + + case IPPROTO_FRAGMENT: + if (*ext_hdrs & OFPIEH12_FRAG) + *ext_hdrs |= OFPIEH12_UNREP; + if ((*ext_hdrs & ~(OFPIEH12_HOP | + OFPIEH12_DEST | + OFPIEH12_ROUTER | + OFPIEH12_UNREP)) || + dest_options_header_count >= 2) { + *ext_hdrs |= OFPIEH12_UNSEQ; + } + *ext_hdrs |= OFPIEH12_FRAG; + break; + + case IPPROTO_ROUTING: + if (*ext_hdrs & OFPIEH12_ROUTER) + *ext_hdrs |= OFPIEH12_UNREP; + if ((*ext_hdrs & ~(OFPIEH12_HOP | + OFPIEH12_DEST | + OFPIEH12_UNREP)) || + dest_options_header_count >= 2) { + *ext_hdrs |= OFPIEH12_UNSEQ; + } + *ext_hdrs |= OFPIEH12_ROUTER; + break; + + case IPPROTO_HOPOPTS: + if (*ext_hdrs & OFPIEH12_HOP) + *ext_hdrs |= OFPIEH12_UNREP; + /* OFPIEH12_HOP is set to 1 if a hop-by-hop IPv6 + * extension header is present as the first + * extension header in the packet. + */ + if (*ext_hdrs == 0) + *ext_hdrs |= OFPIEH12_HOP; + else + *ext_hdrs |= OFPIEH12_UNSEQ; + break; + + default: + return; + } -#define SW_FLOW_KEY_OFFSET(field) \ - (offsetof(struct sw_flow_key, field) + \ - FIELD_SIZEOF(struct sw_flow_key, field)) + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); + if (!hp) + break; + next_type = hp->nexthdr; + start += ipv6_optlen(hp); + } +} -static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key, - int *key_lenp) +static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) { + unsigned short frag_off; + unsigned int payload_ofs = 0; unsigned int nh_ofs = skb_network_offset(skb); unsigned int nh_len; - int payload_ofs; struct ipv6hdr *nh; - uint8_t nexthdr; - __be16 frag_off; - int err; - - *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label); + int err, nexthdr, flags = 0; err = check_header(skb, nh_ofs + sizeof(*nh)); if (unlikely(err)) return err; nh = ipv6_hdr(skb); - nexthdr = nh->nexthdr; - payload_ofs = (u8 *)(nh + 1) - skb->data; + + get_ipv6_ext_hdrs(skb, nh, &key->ipv6.exthdrs); key->ip.proto = NEXTHDR_NONE; key->ip.tos = ipv6_get_dsfield(nh); @@ -153,17 +402,25 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key, key->ipv6.addr.src = nh->saddr; key->ipv6.addr.dst = nh->daddr; - payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off); - if (unlikely(payload_ofs < 0)) - return -EINVAL; - - if (frag_off) { - if (frag_off & htons(~0x7)) + nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags); + if (flags & IP6_FH_F_FRAG) { + if (frag_off) { key->ip.frag = OVS_FRAG_TYPE_LATER; - else - key->ip.frag = OVS_FRAG_TYPE_FIRST; + key->ip.proto = NEXTHDR_FRAGMENT; + return 0; + } + key->ip.frag = OVS_FRAG_TYPE_FIRST; + } else { + key->ip.frag = OVS_FRAG_TYPE_NONE; } + /* Delayed handling of error in ipv6_find_hdr() as it + * always sets flags and frag_off to a valid value which may be + * used to set key->ip.frag above. + */ + if (unlikely(nexthdr < 0)) + return -EPROTO; + nh_len = payload_ofs - nh_ofs; skb_set_transport_header(skb, nh_ofs + nh_len); key->ip.proto = nexthdr; @@ -176,284 +433,78 @@ static bool icmp6hdr_ok(struct sk_buff *skb) sizeof(struct icmp6hdr)); } -#define TCP_FLAGS_OFFSET 13 -#define TCP_FLAG_MASK 0x3f - -void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) -{ - u8 tcp_flags = 0; - - if ((flow->key.eth.type == htons(ETH_P_IP) || - flow->key.eth.type == htons(ETH_P_IPV6)) && - flow->key.ip.proto == IPPROTO_TCP && - likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { - u8 *tcp = (u8 *)tcp_hdr(skb); - tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; - } - - spin_lock(&flow->lock); - flow->used = jiffies; - flow->packet_count++; - flow->byte_count += skb->len; - flow->tcp_flags |= tcp_flags; - spin_unlock(&flow->lock); -} - -struct sw_flow_actions *ovs_flow_actions_alloc(int size) -{ - struct sw_flow_actions *sfa; - - if (size > MAX_ACTIONS_BUFSIZE) - return ERR_PTR(-EINVAL); - - sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); - if (!sfa) - return ERR_PTR(-ENOMEM); - - sfa->actions_len = 0; - return sfa; -} - -struct sw_flow *ovs_flow_alloc(void) -{ - struct sw_flow *flow; - - flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); - if (!flow) - return ERR_PTR(-ENOMEM); - - spin_lock_init(&flow->lock); - flow->sf_acts = NULL; - - return flow; -} - -static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) -{ - hash = jhash_1word(hash, table->hash_seed); - return flex_array_get(table->buckets, - (hash & (table->n_buckets - 1))); -} - -static struct flex_array *alloc_buckets(unsigned int n_buckets) -{ - struct flex_array *buckets; - int i, err; - - buckets = flex_array_alloc(sizeof(struct hlist_head *), - n_buckets, GFP_KERNEL); - if (!buckets) - return NULL; - - err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); - if (err) { - flex_array_free(buckets); - return NULL; - } - - for (i = 0; i < n_buckets; i++) - INIT_HLIST_HEAD((struct hlist_head *) - flex_array_get(buckets, i)); - - return buckets; -} - -static void free_buckets(struct flex_array *buckets) -{ - flex_array_free(buckets); -} - -struct flow_table *ovs_flow_tbl_alloc(int new_size) -{ - struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); - - if (!table) - return NULL; - - table->buckets = alloc_buckets(new_size); - - if (!table->buckets) { - kfree(table); - return NULL; - } - table->n_buckets = new_size; - table->count = 0; - table->node_ver = 0; - table->keep_flows = false; - get_random_bytes(&table->hash_seed, sizeof(u32)); - - return table; -} - -void ovs_flow_tbl_destroy(struct flow_table *table) -{ - int i; - - if (!table) - return; - - if (table->keep_flows) - goto skip_flows; - - for (i = 0; i < table->n_buckets; i++) { - struct sw_flow *flow; - struct hlist_head *head = flex_array_get(table->buckets, i); - struct hlist_node *n; - int ver = table->node_ver; - - hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { - hlist_del_rcu(&flow->hash_node[ver]); - ovs_flow_free(flow); - } - } - -skip_flows: - free_buckets(table->buckets); - kfree(table); -} - -static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) -{ - struct flow_table *table = container_of(rcu, struct flow_table, rcu); - - ovs_flow_tbl_destroy(table); -} - -void ovs_flow_tbl_deferred_destroy(struct flow_table *table) -{ - if (!table) - return; - - call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); -} - -struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last) +/** + * parse_vlan_tag - Parse vlan tag from vlan header. + * @skb: skb containing frame to parse + * @key_vh: pointer to parsed vlan tag + * @untag_vlan: should the vlan header be removed from the frame + * + * Return: ERROR on memory error. + * %0 if it encounters a non-vlan or incomplete packet. + * %1 after successfully parsing vlan tag. + */ +static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh, + bool untag_vlan) { - struct sw_flow *flow; - struct hlist_head *head; - int ver; - int i; - - ver = table->node_ver; - while (*bucket < table->n_buckets) { - i = 0; - head = flex_array_get(table->buckets, *bucket); - hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { - if (i < *last) { - i++; - continue; - } - *last = i + 1; - return flow; - } - (*bucket)++; - *last = 0; - } + struct vlan_head *vh = (struct vlan_head *)skb->data; - return NULL; -} - -static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) -{ - struct hlist_head *head; - head = find_bucket(table, flow->hash); - hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); - table->count++; -} + if (likely(!eth_type_vlan(vh->tpid))) + return 0; -static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) -{ - int old_ver; - int i; + if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16))) + return 0; - old_ver = old->node_ver; - new->node_ver = !old_ver; + if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) + + sizeof(__be16)))) + return -ENOMEM; - /* Insert in new table. */ - for (i = 0; i < old->n_buckets; i++) { - struct sw_flow *flow; - struct hlist_head *head; + vh = (struct vlan_head *)skb->data; + key_vh->tci = vh->tci | htons(VLAN_CFI_MASK); + key_vh->tpid = vh->tpid; - head = flex_array_get(old->buckets, i); + if (unlikely(untag_vlan)) { + int offset = skb->data - skb_mac_header(skb); + u16 tci; + int err; - hlist_for_each_entry(flow, head, hash_node[old_ver]) - __flow_tbl_insert(new, flow); + __skb_push(skb, offset); + err = __skb_vlan_pop(skb, &tci); + __skb_pull(skb, offset); + if (err) + return err; + __vlan_hwaccel_put_tag(skb, key_vh->tpid, tci); + } else { + __skb_pull(skb, sizeof(struct vlan_head)); } - old->keep_flows = true; -} - -static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets) -{ - struct flow_table *new_table; - - new_table = ovs_flow_tbl_alloc(n_buckets); - if (!new_table) - return ERR_PTR(-ENOMEM); - - flow_table_copy_flows(table, new_table); - - return new_table; -} - -struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) -{ - return __flow_tbl_rehash(table, table->n_buckets); -} - -struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) -{ - return __flow_tbl_rehash(table, table->n_buckets * 2); -} - -void ovs_flow_free(struct sw_flow *flow) -{ - if (unlikely(!flow)) - return; - - kfree((struct sf_flow_acts __force *)flow->sf_acts); - kmem_cache_free(flow_cache, flow); + return 1; } -/* RCU callback used by ovs_flow_deferred_free. */ -static void rcu_free_flow_callback(struct rcu_head *rcu) +static void clear_vlan(struct sw_flow_key *key) { - struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); - - ovs_flow_free(flow); -} - -/* Schedules 'flow' to be freed after the next RCU grace period. - * The caller must hold rcu_read_lock for this to be sensible. */ -void ovs_flow_deferred_free(struct sw_flow *flow) -{ - call_rcu(&flow->rcu, rcu_free_flow_callback); -} - -/* Schedules 'sf_acts' to be freed after the next RCU grace period. - * The caller must hold rcu_read_lock for this to be sensible. */ -void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts) -{ - kfree_rcu(sf_acts, rcu); + key->eth.vlan.tci = 0; + key->eth.vlan.tpid = 0; + key->eth.cvlan.tci = 0; + key->eth.cvlan.tpid = 0; } static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) { - struct qtag_prefix { - __be16 eth_type; /* ETH_P_8021Q */ - __be16 tci; - }; - struct qtag_prefix *qp; - - if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))) - return 0; + int res; - if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) + - sizeof(__be16)))) - return -ENOMEM; + if (skb_vlan_tag_present(skb)) { + key->eth.vlan.tci = htons(skb->vlan_tci) | htons(VLAN_CFI_MASK); + key->eth.vlan.tpid = skb->vlan_proto; + } else { + /* Parse outer vlan tag in the non-accelerated case. */ + res = parse_vlan_tag(skb, &key->eth.vlan, true); + if (res <= 0) + return res; + } - qp = (struct qtag_prefix *) skb->data; - key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT); - __skb_pull(skb, sizeof(struct qtag_prefix)); + /* Parse inner vlan tag. */ + res = parse_vlan_tag(skb, &key->eth.cvlan, false); + if (res <= 0) + return res; return 0; } @@ -473,7 +524,7 @@ static __be16 parse_ethertype(struct sk_buff *skb) proto = *(__be16 *) skb->data; __skb_pull(skb, sizeof(__be16)); - if (ntohs(proto) >= ETH_P_802_3_MIN) + if (eth_proto_is_802_3(proto)) return proto; if (skb->len < sizeof(struct llc_snap_hdr)) @@ -490,25 +541,22 @@ static __be16 parse_ethertype(struct sk_buff *skb) __skb_pull(skb, sizeof(struct llc_snap_hdr)); - if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN) + if (eth_proto_is_802_3(llc->ethertype)) return llc->ethertype; return htons(ETH_P_802_2); } static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, - int *key_lenp, int nh_len) + int nh_len) { struct icmp6hdr *icmp = icmp6_hdr(skb); - int error = 0; - int key_len; /* The ICMPv6 type and code fields use the 16-bit transport port * fields, so we need to store them in 16-bit network byte order. */ - key->ipv6.tp.src = htons(icmp->icmp6_type); - key->ipv6.tp.dst = htons(icmp->icmp6_code); - key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); + key->tp.src = htons(icmp->icmp6_type); + key->tp.dst = htons(icmp->icmp6_code); if (icmp->icmp6_code == 0 && (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || @@ -517,21 +565,19 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, struct nd_msg *nd; int offset; - key_len = SW_FLOW_KEY_OFFSET(ipv6.nd); + memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd)); /* In order to process neighbor discovery options, we need the * entire packet. */ if (unlikely(icmp_len < sizeof(*nd))) - goto out; - if (unlikely(skb_linearize(skb))) { - error = -ENOMEM; - goto out; - } + return 0; + + if (unlikely(skb_linearize(skb))) + return -ENOMEM; nd = (struct nd_msg *)skb_transport_header(skb); key->ipv6.nd.target = nd->target; - key_len = SW_FLOW_KEY_OFFSET(ipv6.nd); icmp_len -= sizeof(*nd); offset = 0; @@ -541,7 +587,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, int opt_len = nd_opt->nd_opt_len * 8; if (unlikely(!opt_len || opt_len > icmp_len)) - goto invalid; + return 0; /* Store the link layer address if the appropriate * option is provided. It is considered an error if @@ -551,14 +597,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, && opt_len == 8) { if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll))) goto invalid; - memcpy(key->ipv6.nd.sll, - &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); + ether_addr_copy(key->ipv6.nd.sll, + &nd->opt[offset+sizeof(*nd_opt)]); } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR && opt_len == 8) { if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll))) goto invalid; - memcpy(key->ipv6.nd.tll, - &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); + ether_addr_copy(key->ipv6.nd.tll, + &nd->opt[offset+sizeof(*nd_opt)]); } icmp_len -= opt_len; @@ -566,98 +612,88 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, } } - goto out; + return 0; invalid: memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target)); memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll)); memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll)); -out: - *key_lenp = key_len; - return error; + return 0; } -/** - * ovs_flow_extract - extracts a flow key from an Ethernet frame. - * @skb: sk_buff that contains the frame, with skb->data pointing to the - * Ethernet header - * @in_port: port number on which @skb was received. - * @key: output flow key - * @key_lenp: length of output flow key - * - * The caller must ensure that skb->len >= ETH_HLEN. - * - * Returns 0 if successful, otherwise a negative errno value. - * - * Initializes @skb header pointers as follows: - * - * - skb->mac_header: the Ethernet header. - * - * - skb->network_header: just past the Ethernet header, or just past the - * VLAN header, to the first byte of the Ethernet payload. - * - * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 - * on output, then just past the IP header, if one is present and - * of a correct length, otherwise the same as skb->network_header. - * For other key->eth.type values it is left untouched. - */ -int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, - int *key_lenp) +static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key) { - int error = 0; - int key_len = SW_FLOW_KEY_OFFSET(eth); - struct ethhdr *eth; - - memset(key, 0, sizeof(*key)); + struct nshhdr *nh; + unsigned int nh_ofs = skb_network_offset(skb); + u8 version, length; + int err; - key->phy.priority = skb->priority; - if (OVS_CB(skb)->tun_key) - memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key)); - key->phy.in_port = in_port; - key->phy.skb_mark = skb->mark; + err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN); + if (unlikely(err)) + return err; - skb_reset_mac_header(skb); + nh = nsh_hdr(skb); + version = nsh_get_ver(nh); + length = nsh_hdr_len(nh); - /* Link layer. We are guaranteed to have at least the 14 byte Ethernet - * header in the linear data area. - */ - eth = eth_hdr(skb); - memcpy(key->eth.src, eth->h_source, ETH_ALEN); - memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); + if (version != 0) + return -EINVAL; - __skb_pull(skb, 2 * ETH_ALEN); - /* We are going to push all headers that we pull, so no need to - * update skb->csum here. - */ + err = check_header(skb, nh_ofs + length); + if (unlikely(err)) + return err; - if (vlan_tx_tag_present(skb)) - key->eth.tci = htons(skb->vlan_tci); - else if (eth->h_proto == htons(ETH_P_8021Q)) - if (unlikely(parse_vlan(skb, key))) - return -ENOMEM; + nh = nsh_hdr(skb); + key->nsh.base.flags = nsh_get_flags(nh); + key->nsh.base.ttl = nsh_get_ttl(nh); + key->nsh.base.mdtype = nh->mdtype; + key->nsh.base.np = nh->np; + key->nsh.base.path_hdr = nh->path_hdr; + switch (key->nsh.base.mdtype) { + case NSH_M_TYPE1: + if (length != NSH_M_TYPE1_LEN) + return -EINVAL; + memcpy(key->nsh.context, nh->md1.context, + sizeof(nh->md1)); + break; + case NSH_M_TYPE2: + memset(key->nsh.context, 0, + sizeof(nh->md1)); + break; + default: + return -EINVAL; + } - key->eth.type = parse_ethertype(skb); - if (unlikely(key->eth.type == htons(0))) - return -ENOMEM; + return 0; +} - skb_reset_network_header(skb); - __skb_push(skb, skb->data - skb_mac_header(skb)); +/** + * key_extract_l3l4 - extracts L3/L4 header information. + * @skb: sk_buff that contains the frame, with skb->data pointing to the + * L3 header + * @key: output flow key + * + * Return: %0 if successful, otherwise a negative errno value. + */ +static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) +{ + int error; /* Network layer. */ if (key->eth.type == htons(ETH_P_IP)) { struct iphdr *nh; __be16 offset; - key_len = SW_FLOW_KEY_OFFSET(ipv4.addr); - error = check_iphdr(skb); if (unlikely(error)) { + memset(&key->ip, 0, sizeof(key->ip)); + memset(&key->ipv4, 0, sizeof(key->ipv4)); if (error == -EINVAL) { skb->transport_header = skb->network_header; error = 0; } - goto out; + return error; } nh = ip_hdr(skb); @@ -671,836 +707,410 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, offset = nh->frag_off & htons(IP_OFFSET); if (offset) { key->ip.frag = OVS_FRAG_TYPE_LATER; - goto out; + memset(&key->tp, 0, sizeof(key->tp)); + return 0; } if (nh->frag_off & htons(IP_MF) || - skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + skb_shinfo(skb)->gso_type & SKB_GSO_UDP) key->ip.frag = OVS_FRAG_TYPE_FIRST; + else + key->ip.frag = OVS_FRAG_TYPE_NONE; /* Transport layer. */ if (key->ip.proto == IPPROTO_TCP) { - key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); - key->ipv4.tp.src = tcp->source; - key->ipv4.tp.dst = tcp->dest; + key->tp.src = tcp->source; + key->tp.dst = tcp->dest; + key->tp.flags = TCP_FLAGS_BE16(tcp); + } else { + memset(&key->tp, 0, sizeof(key->tp)); } + } else if (key->ip.proto == IPPROTO_UDP) { - key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); - key->ipv4.tp.src = udp->source; - key->ipv4.tp.dst = udp->dest; + key->tp.src = udp->source; + key->tp.dst = udp->dest; + } else { + memset(&key->tp, 0, sizeof(key->tp)); + } + } else if (key->ip.proto == IPPROTO_SCTP) { + if (sctphdr_ok(skb)) { + struct sctphdr *sctp = sctp_hdr(skb); + key->tp.src = sctp->source; + key->tp.dst = sctp->dest; + } else { + memset(&key->tp, 0, sizeof(key->tp)); } } else if (key->ip.proto == IPPROTO_ICMP) { - key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); if (icmphdr_ok(skb)) { struct icmphdr *icmp = icmp_hdr(skb); /* The ICMP type and code fields use the 16-bit * transport port fields, so we need to store * them in 16-bit network byte order. */ - key->ipv4.tp.src = htons(icmp->type); - key->ipv4.tp.dst = htons(icmp->code); + key->tp.src = htons(icmp->type); + key->tp.dst = htons(icmp->code); + } else { + memset(&key->tp, 0, sizeof(key->tp)); } } - } else if ((key->eth.type == htons(ETH_P_ARP) || - key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) { + } else if (key->eth.type == htons(ETH_P_ARP) || + key->eth.type == htons(ETH_P_RARP)) { struct arp_eth_header *arp; + bool arp_available = arphdr_ok(skb); arp = (struct arp_eth_header *)skb_network_header(skb); - if (arp->ar_hrd == htons(ARPHRD_ETHER) - && arp->ar_pro == htons(ETH_P_IP) - && arp->ar_hln == ETH_ALEN - && arp->ar_pln == 4) { + if (arp_available && + arp->ar_hrd == htons(ARPHRD_ETHER) && + arp->ar_pro == htons(ETH_P_IP) && + arp->ar_hln == ETH_ALEN && + arp->ar_pln == 4) { /* We only match on the lower 8 bits of the opcode. */ if (ntohs(arp->ar_op) <= 0xff) key->ip.proto = ntohs(arp->ar_op); + else + key->ip.proto = 0; + memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src)); memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); - memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); - memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); - key_len = SW_FLOW_KEY_OFFSET(ipv4.arp); + ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha); + ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha); + } else { + memset(&key->ip, 0, sizeof(key->ip)); + memset(&key->ipv4, 0, sizeof(key->ipv4)); } + } else if (eth_p_mpls(key->eth.type)) { + size_t label_count = 1; + + memset(&key->mpls, 0, sizeof(key->mpls)); + skb_set_inner_network_header(skb, skb->mac_len); + while (1) { + __be32 lse; + + error = check_header(skb, skb->mac_len + + label_count * MPLS_HLEN); + if (unlikely(error)) + return 0; + + memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN); + + if (label_count <= MPLS_LABEL_DEPTH) + memcpy(&key->mpls.lse[label_count - 1], &lse, + MPLS_HLEN); + + skb_set_inner_network_header(skb, skb->mac_len + + label_count * MPLS_HLEN); + if (lse & htonl(MPLS_LS_S_MASK)) + break; + + label_count++; + } + if (label_count > MPLS_LABEL_DEPTH) + label_count = MPLS_LABEL_DEPTH; + + key->mpls.num_labels_mask = GENMASK(label_count - 1, 0); } else if (key->eth.type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ - nh_len = parse_ipv6hdr(skb, key, &key_len); + nh_len = parse_ipv6hdr(skb, key); if (unlikely(nh_len < 0)) { - if (nh_len == -EINVAL) + switch (nh_len) { + case -EINVAL: + memset(&key->ip, 0, sizeof(key->ip)); + memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr)); + fallthrough; + case -EPROTO: skb->transport_header = skb->network_header; - else + error = 0; + break; + default: error = nh_len; - goto out; + } + return error; } - if (key->ip.frag == OVS_FRAG_TYPE_LATER) - goto out; + if (key->ip.frag == OVS_FRAG_TYPE_LATER) { + memset(&key->tp, 0, sizeof(key->tp)); + return 0; + } if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) key->ip.frag = OVS_FRAG_TYPE_FIRST; /* Transport layer. */ if (key->ip.proto == NEXTHDR_TCP) { - key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); - key->ipv6.tp.src = tcp->source; - key->ipv6.tp.dst = tcp->dest; + key->tp.src = tcp->source; + key->tp.dst = tcp->dest; + key->tp.flags = TCP_FLAGS_BE16(tcp); + } else { + memset(&key->tp, 0, sizeof(key->tp)); } } else if (key->ip.proto == NEXTHDR_UDP) { - key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); - key->ipv6.tp.src = udp->source; - key->ipv6.tp.dst = udp->dest; + key->tp.src = udp->source; + key->tp.dst = udp->dest; + } else { + memset(&key->tp, 0, sizeof(key->tp)); + } + } else if (key->ip.proto == NEXTHDR_SCTP) { + if (sctphdr_ok(skb)) { + struct sctphdr *sctp = sctp_hdr(skb); + key->tp.src = sctp->source; + key->tp.dst = sctp->dest; + } else { + memset(&key->tp, 0, sizeof(key->tp)); } } else if (key->ip.proto == NEXTHDR_ICMP) { - key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); if (icmp6hdr_ok(skb)) { - error = parse_icmpv6(skb, key, &key_len, nh_len); - if (error < 0) - goto out; + error = parse_icmpv6(skb, key, nh_len); + if (error) + return error; + } else { + memset(&key->tp, 0, sizeof(key->tp)); } } + } else if (key->eth.type == htons(ETH_P_NSH)) { + error = parse_nsh(skb, key); + if (error) + return error; } - -out: - *key_lenp = key_len; - return error; -} - -static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len) -{ - return jhash2((u32 *)((u8 *)key + key_start), - DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0); -} - -static int flow_key_start(struct sw_flow_key *key) -{ - if (key->tun_key.ipv4_dst) - return 0; - else - return offsetof(struct sw_flow_key, phy); -} - -struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, - struct sw_flow_key *key, int key_len) -{ - struct sw_flow *flow; - struct hlist_head *head; - u8 *_key; - int key_start; - u32 hash; - - key_start = flow_key_start(key); - hash = ovs_flow_hash(key, key_start, key_len); - - _key = (u8 *) key + key_start; - head = find_bucket(table, hash); - hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { - - if (flow->hash == hash && - !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) { - return flow; - } - } - return NULL; -} - -void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, - struct sw_flow_key *key, int key_len) -{ - flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len); - memcpy(&flow->key, key, sizeof(flow->key)); - __flow_tbl_insert(table, flow); -} - -void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) -{ - BUG_ON(table->count == 0); - hlist_del_rcu(&flow->hash_node[table->node_ver]); - table->count--; + return 0; } -/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ -const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { - [OVS_KEY_ATTR_ENCAP] = -1, - [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), - [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), - [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), - [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), - [OVS_KEY_ATTR_VLAN] = sizeof(__be16), - [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), - [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), - [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), - [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), - [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), - [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), - [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), - [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), - [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), - [OVS_KEY_ATTR_TUNNEL] = -1, -}; - -static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, - const struct nlattr *a[], u32 *attrs) +/** + * key_extract - extracts a flow key from an Ethernet frame. + * @skb: sk_buff that contains the frame, with skb->data pointing to the + * Ethernet header + * @key: output flow key + * + * The caller must ensure that skb->len >= ETH_HLEN. + * + * Initializes @skb header fields as follows: + * + * - skb->mac_header: the L2 header. + * + * - skb->network_header: just past the L2 header, or just past the + * VLAN header, to the first byte of the L2 payload. + * + * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 + * on output, then just past the IP header, if one is present and + * of a correct length, otherwise the same as skb->network_header. + * For other key->eth.type values it is left untouched. + * + * - skb->protocol: the type of the data starting at skb->network_header. + * Equals to key->eth.type. + * + * Return: %0 if successful, otherwise a negative errno value. + */ +static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) { - const struct ovs_key_icmp *icmp_key; - const struct ovs_key_tcp *tcp_key; - const struct ovs_key_udp *udp_key; - - switch (swkey->ip.proto) { - case IPPROTO_TCP: - if (!(*attrs & (1 << OVS_KEY_ATTR_TCP))) - return -EINVAL; - *attrs &= ~(1 << OVS_KEY_ATTR_TCP); - - *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); - tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); - swkey->ipv4.tp.src = tcp_key->tcp_src; - swkey->ipv4.tp.dst = tcp_key->tcp_dst; - break; + struct ethhdr *eth; - case IPPROTO_UDP: - if (!(*attrs & (1 << OVS_KEY_ATTR_UDP))) - return -EINVAL; - *attrs &= ~(1 << OVS_KEY_ATTR_UDP); + /* Flags are always used as part of stats */ + key->tp.flags = 0; - *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); - udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); - swkey->ipv4.tp.src = udp_key->udp_src; - swkey->ipv4.tp.dst = udp_key->udp_dst; - break; + skb_reset_mac_header(skb); - case IPPROTO_ICMP: - if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP))) + /* Link layer. */ + clear_vlan(key); + if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { + if (unlikely(eth_type_vlan(skb->protocol))) return -EINVAL; - *attrs &= ~(1 << OVS_KEY_ATTR_ICMP); - *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); - icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); - swkey->ipv4.tp.src = htons(icmp_key->icmp_type); - swkey->ipv4.tp.dst = htons(icmp_key->icmp_code); - break; - } - - return 0; -} - -static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, - const struct nlattr *a[], u32 *attrs) -{ - const struct ovs_key_icmpv6 *icmpv6_key; - const struct ovs_key_tcp *tcp_key; - const struct ovs_key_udp *udp_key; + skb_reset_network_header(skb); + key->eth.type = skb->protocol; + } else { + eth = eth_hdr(skb); + ether_addr_copy(key->eth.src, eth->h_source); + ether_addr_copy(key->eth.dst, eth->h_dest); - switch (swkey->ip.proto) { - case IPPROTO_TCP: - if (!(*attrs & (1 << OVS_KEY_ATTR_TCP))) - return -EINVAL; - *attrs &= ~(1 << OVS_KEY_ATTR_TCP); + __skb_pull(skb, 2 * ETH_ALEN); + /* We are going to push all headers that we pull, so no need to + * update skb->csum here. + */ - *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); - tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); - swkey->ipv6.tp.src = tcp_key->tcp_src; - swkey->ipv6.tp.dst = tcp_key->tcp_dst; - break; + if (unlikely(parse_vlan(skb, key))) + return -ENOMEM; - case IPPROTO_UDP: - if (!(*attrs & (1 << OVS_KEY_ATTR_UDP))) - return -EINVAL; - *attrs &= ~(1 << OVS_KEY_ATTR_UDP); + key->eth.type = parse_ethertype(skb); + if (unlikely(key->eth.type == htons(0))) + return -ENOMEM; - *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); - udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); - swkey->ipv6.tp.src = udp_key->udp_src; - swkey->ipv6.tp.dst = udp_key->udp_dst; - break; + /* Multiple tagged packets need to retain TPID to satisfy + * skb_vlan_pop(), which will later shift the ethertype into + * skb->protocol. + */ + if (key->eth.cvlan.tci & htons(VLAN_CFI_MASK)) + skb->protocol = key->eth.cvlan.tpid; + else + skb->protocol = key->eth.type; - case IPPROTO_ICMPV6: - if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6))) - return -EINVAL; - *attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); - - *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); - icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); - swkey->ipv6.tp.src = htons(icmpv6_key->icmpv6_type); - swkey->ipv6.tp.dst = htons(icmpv6_key->icmpv6_code); - - if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || - swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { - const struct ovs_key_nd *nd_key; - - if (!(*attrs & (1 << OVS_KEY_ATTR_ND))) - return -EINVAL; - *attrs &= ~(1 << OVS_KEY_ATTR_ND); - - *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd); - nd_key = nla_data(a[OVS_KEY_ATTR_ND]); - memcpy(&swkey->ipv6.nd.target, nd_key->nd_target, - sizeof(swkey->ipv6.nd.target)); - memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN); - memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN); - } - break; + skb_reset_network_header(skb); + __skb_push(skb, skb->data - skb_mac_header(skb)); } - return 0; + skb_reset_mac_len(skb); + + /* Fill out L3/L4 key info, if any */ + return key_extract_l3l4(skb, key); } -static int parse_flow_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u32 *attrsp) +/* In the case of conntrack fragment handling it expects L3 headers, + * add a helper. + */ +int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key) { - const struct nlattr *nla; - u32 attrs; - int rem; - - attrs = 0; - nla_for_each_nested(nla, attr, rem) { - u16 type = nla_type(nla); - int expected_len; - - if (type > OVS_KEY_ATTR_MAX || attrs & (1 << type)) - return -EINVAL; - - expected_len = ovs_key_lens[type]; - if (nla_len(nla) != expected_len && expected_len != -1) - return -EINVAL; - - attrs |= 1 << type; - a[type] = nla; - } - if (rem) - return -EINVAL; - - *attrsp = attrs; - return 0; + return key_extract_l3l4(skb, key); } -int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, - struct ovs_key_ipv4_tunnel *tun_key) +int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) { - struct nlattr *a; - int rem; - bool ttl = false; - - memset(tun_key, 0, sizeof(*tun_key)); - - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { - [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), - [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_TOS] = 1, - [OVS_TUNNEL_KEY_ATTR_TTL] = 1, - [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, - [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, - }; - - if (type > OVS_TUNNEL_KEY_ATTR_MAX || - ovs_tunnel_key_lens[type] != nla_len(a)) - return -EINVAL; - - switch (type) { - case OVS_TUNNEL_KEY_ATTR_ID: - tun_key->tun_id = nla_get_be64(a); - tun_key->tun_flags |= TUNNEL_KEY; - break; - case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: - tun_key->ipv4_src = nla_get_be32(a); - break; - case OVS_TUNNEL_KEY_ATTR_IPV4_DST: - tun_key->ipv4_dst = nla_get_be32(a); - break; - case OVS_TUNNEL_KEY_ATTR_TOS: - tun_key->ipv4_tos = nla_get_u8(a); - break; - case OVS_TUNNEL_KEY_ATTR_TTL: - tun_key->ipv4_ttl = nla_get_u8(a); - ttl = true; - break; - case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: - tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT; - break; - case OVS_TUNNEL_KEY_ATTR_CSUM: - tun_key->tun_flags |= TUNNEL_CSUM; - break; - default: - return -EINVAL; + int res; - } - } - if (rem > 0) - return -EINVAL; - - if (!tun_key->ipv4_dst) - return -EINVAL; + res = key_extract(skb, key); + if (!res) + key->mac_proto &= ~SW_FLOW_KEY_INVALID; - if (!ttl) - return -EINVAL; - - return 0; + return res; } -int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, - const struct ovs_key_ipv4_tunnel *tun_key) +static int key_extract_mac_proto(struct sk_buff *skb) { - struct nlattr *nla; - - nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); - if (!nla) - return -EMSGSIZE; - - if (tun_key->tun_flags & TUNNEL_KEY && - nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id)) - return -EMSGSIZE; - if (tun_key->ipv4_src && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ipv4_src)) - return -EMSGSIZE; - if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ipv4_dst)) - return -EMSGSIZE; - if (tun_key->ipv4_tos && - nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ipv4_tos)) - return -EMSGSIZE; - if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl)) - return -EMSGSIZE; - if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && - nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) - return -EMSGSIZE; - if ((tun_key->tun_flags & TUNNEL_CSUM) && - nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) - return -EMSGSIZE; - - nla_nest_end(skb, nla); - return 0; + switch (skb->dev->type) { + case ARPHRD_ETHER: + return MAC_PROTO_ETHERNET; + case ARPHRD_NONE: + if (skb->protocol == htons(ETH_P_TEB)) + return MAC_PROTO_ETHERNET; + return MAC_PROTO_NONE; + } + WARN_ON_ONCE(1); + return -EINVAL; } -/** - * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key. - * @swkey: receives the extracted flow key. - * @key_lenp: number of bytes used in @swkey. - * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute - * sequence. - */ -int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, - const struct nlattr *attr) +int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, + struct sk_buff *skb, struct sw_flow_key *key) { - const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; - const struct ovs_key_ethernet *eth_key; - int key_len; - u32 attrs; - int err; - - memset(swkey, 0, sizeof(struct sw_flow_key)); - key_len = SW_FLOW_KEY_OFFSET(eth); - - err = parse_flow_nlattrs(attr, a, &attrs); - if (err) - return err; - - /* Metadata attributes. */ - if (attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { - swkey->phy.priority = nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]); - attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); - } - if (attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { - u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); - if (in_port >= DP_MAX_PORTS) - return -EINVAL; - swkey->phy.in_port = in_port; - attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); - } else { - swkey->phy.in_port = DP_MAX_PORTS; - } - if (attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { - swkey->phy.skb_mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); - attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); - } - - if (attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { - err = ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key); - if (err) - return err; - - attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); - } - - /* Data attributes. */ - if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) - return -EINVAL; - attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); - - eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); - memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN); - memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN); - - if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) && - nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) { - const struct nlattr *encap; - __be16 tci; - - if (attrs != ((1 << OVS_KEY_ATTR_VLAN) | - (1 << OVS_KEY_ATTR_ETHERTYPE) | - (1 << OVS_KEY_ATTR_ENCAP))) - return -EINVAL; - - encap = a[OVS_KEY_ATTR_ENCAP]; - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - if (tci & htons(VLAN_TAG_PRESENT)) { - swkey->eth.tci = tci; - - err = parse_flow_nlattrs(encap, a, &attrs); - if (err) - return err; - } else if (!tci) { - /* Corner case for truncated 802.1Q header. */ - if (nla_len(encap)) - return -EINVAL; - - swkey->eth.type = htons(ETH_P_8021Q); - *key_lenp = key_len; - return 0; +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + struct tc_skb_ext *tc_ext; +#endif + bool post_ct = false, post_ct_snat = false, post_ct_dnat = false; + int res, err; + u16 zone = 0; + + /* Extract metadata from packet. */ + if (tun_info) { + key->tun_proto = ip_tunnel_info_af(tun_info); + memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key)); + + if (tun_info->options_len) { + BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) * + 8)) - 1 + > sizeof(key->tun_opts)); + + ip_tunnel_info_opts_get(TUN_METADATA_OPTS(key, tun_info->options_len), + tun_info); + key->tun_opts_len = tun_info->options_len; } else { - return -EINVAL; + key->tun_opts_len = 0; } + } else { + key->tun_proto = 0; + key->tun_opts_len = 0; + memset(&key->tun_key, 0, sizeof(key->tun_key)); } - if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { - swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN) - return -EINVAL; - attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + key->phy.priority = skb->priority; + key->phy.in_port = OVS_CB(skb)->input_vport->port_no; + key->phy.skb_mark = skb->mark; + key->ovs_flow_hash = 0; + res = key_extract_mac_proto(skb); + if (res < 0) + return res; + key->mac_proto = res; + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (tc_skb_ext_tc_enabled()) { + tc_ext = skb_ext_find(skb, TC_SKB_EXT); + key->recirc_id = tc_ext && !tc_ext->act_miss ? + tc_ext->chain : 0; + OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0; + post_ct = tc_ext ? tc_ext->post_ct : false; + post_ct_snat = post_ct ? tc_ext->post_ct_snat : false; + post_ct_dnat = post_ct ? tc_ext->post_ct_dnat : false; + zone = post_ct ? tc_ext->zone : 0; } else { - swkey->eth.type = htons(ETH_P_802_2); - } - - if (swkey->eth.type == htons(ETH_P_IP)) { - const struct ovs_key_ipv4 *ipv4_key; - - if (!(attrs & (1 << OVS_KEY_ATTR_IPV4))) - return -EINVAL; - attrs &= ~(1 << OVS_KEY_ATTR_IPV4); - - key_len = SW_FLOW_KEY_OFFSET(ipv4.addr); - ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); - if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) - return -EINVAL; - swkey->ip.proto = ipv4_key->ipv4_proto; - swkey->ip.tos = ipv4_key->ipv4_tos; - swkey->ip.ttl = ipv4_key->ipv4_ttl; - swkey->ip.frag = ipv4_key->ipv4_frag; - swkey->ipv4.addr.src = ipv4_key->ipv4_src; - swkey->ipv4.addr.dst = ipv4_key->ipv4_dst; - - if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) { - err = ipv4_flow_from_nlattrs(swkey, &key_len, a, &attrs); - if (err) - return err; - } - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - const struct ovs_key_ipv6 *ipv6_key; - - if (!(attrs & (1 << OVS_KEY_ATTR_IPV6))) - return -EINVAL; - attrs &= ~(1 << OVS_KEY_ATTR_IPV6); - - key_len = SW_FLOW_KEY_OFFSET(ipv6.label); - ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); - if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) - return -EINVAL; - swkey->ipv6.label = ipv6_key->ipv6_label; - swkey->ip.proto = ipv6_key->ipv6_proto; - swkey->ip.tos = ipv6_key->ipv6_tclass; - swkey->ip.ttl = ipv6_key->ipv6_hlimit; - swkey->ip.frag = ipv6_key->ipv6_frag; - memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src, - sizeof(swkey->ipv6.addr.src)); - memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst, - sizeof(swkey->ipv6.addr.dst)); - - if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) { - err = ipv6_flow_from_nlattrs(swkey, &key_len, a, &attrs); - if (err) - return err; - } - } else if (swkey->eth.type == htons(ETH_P_ARP) || - swkey->eth.type == htons(ETH_P_RARP)) { - const struct ovs_key_arp *arp_key; - - if (!(attrs & (1 << OVS_KEY_ATTR_ARP))) - return -EINVAL; - attrs &= ~(1 << OVS_KEY_ATTR_ARP); - - key_len = SW_FLOW_KEY_OFFSET(ipv4.arp); - arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); - swkey->ipv4.addr.src = arp_key->arp_sip; - swkey->ipv4.addr.dst = arp_key->arp_tip; - if (arp_key->arp_op & htons(0xff00)) - return -EINVAL; - swkey->ip.proto = ntohs(arp_key->arp_op); - memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN); - memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN); + key->recirc_id = 0; } - - if (attrs) - return -EINVAL; - *key_lenp = key_len; - - return 0; -} - -/** - * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. - * @flow: Receives extracted in_port, priority, tun_key and skb_mark. - * @key_len: Length of key in @flow. Used for calculating flow hash. - * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute - * sequence. - * - * This parses a series of Netlink attributes that form a flow key, which must - * take the same form accepted by flow_from_nlattrs(), but only enough of it to - * get the metadata, that is, the parts of the flow key that cannot be - * extracted from the packet itself. - */ -int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, - const struct nlattr *attr) -{ - struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; - const struct nlattr *nla; - int rem; - - flow->key.phy.in_port = DP_MAX_PORTS; - flow->key.phy.priority = 0; - flow->key.phy.skb_mark = 0; - memset(tun_key, 0, sizeof(flow->key.tun_key)); - - nla_for_each_nested(nla, attr, rem) { - int type = nla_type(nla); - - if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) { - int err; - - if (nla_len(nla) != ovs_key_lens[type]) - return -EINVAL; - - switch (type) { - case OVS_KEY_ATTR_PRIORITY: - flow->key.phy.priority = nla_get_u32(nla); - break; - - case OVS_KEY_ATTR_TUNNEL: - err = ovs_ipv4_tun_from_nlattr(nla, tun_key); - if (err) - return err; - break; - - case OVS_KEY_ATTR_IN_PORT: - if (nla_get_u32(nla) >= DP_MAX_PORTS) - return -EINVAL; - flow->key.phy.in_port = nla_get_u32(nla); - break; - - case OVS_KEY_ATTR_SKB_MARK: - flow->key.phy.skb_mark = nla_get_u32(nla); - break; +#else + key->recirc_id = 0; +#endif + + err = key_extract(skb, key); + if (!err) { + ovs_ct_fill_key(skb, key, post_ct); /* Must be after key_extract(). */ + if (post_ct) { + if (!skb_get_nfct(skb)) { + key->ct_zone = zone; + } else { + if (!post_ct_dnat) + key->ct_state &= ~OVS_CS_F_DST_NAT; + if (!post_ct_snat) + key->ct_state &= ~OVS_CS_F_SRC_NAT; } } } - if (rem) - return -EINVAL; - - flow->hash = ovs_flow_hash(&flow->key, - flow_key_start(&flow->key), key_len); - - return 0; + return err; } -int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) +int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, + struct sk_buff *skb, + struct sw_flow_key *key, bool log) { - struct ovs_key_ethernet *eth_key; - struct nlattr *nla, *encap; - - if (swkey->phy.priority && - nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) - goto nla_put_failure; - - if (swkey->tun_key.ipv4_dst && - ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key)) - goto nla_put_failure; - - if (swkey->phy.in_port != DP_MAX_PORTS && - nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) - goto nla_put_failure; - - if (swkey->phy.skb_mark && - nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark)) - goto nla_put_failure; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); - if (!nla) - goto nla_put_failure; - eth_key = nla_data(nla); - memcpy(eth_key->eth_src, swkey->eth.src, ETH_ALEN); - memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN); - - if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q)) || - nla_put_be16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci)) - goto nla_put_failure; - encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); - if (!swkey->eth.tci) - goto unencap; - } else { - encap = NULL; - } - - if (swkey->eth.type == htons(ETH_P_802_2)) - goto unencap; - - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type)) - goto nla_put_failure; - - if (swkey->eth.type == htons(ETH_P_IP)) { - struct ovs_key_ipv4 *ipv4_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); - if (!nla) - goto nla_put_failure; - ipv4_key = nla_data(nla); - ipv4_key->ipv4_src = swkey->ipv4.addr.src; - ipv4_key->ipv4_dst = swkey->ipv4.addr.dst; - ipv4_key->ipv4_proto = swkey->ip.proto; - ipv4_key->ipv4_tos = swkey->ip.tos; - ipv4_key->ipv4_ttl = swkey->ip.ttl; - ipv4_key->ipv4_frag = swkey->ip.frag; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - struct ovs_key_ipv6 *ipv6_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); - if (!nla) - goto nla_put_failure; - ipv6_key = nla_data(nla); - memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src, - sizeof(ipv6_key->ipv6_src)); - memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst, - sizeof(ipv6_key->ipv6_dst)); - ipv6_key->ipv6_label = swkey->ipv6.label; - ipv6_key->ipv6_proto = swkey->ip.proto; - ipv6_key->ipv6_tclass = swkey->ip.tos; - ipv6_key->ipv6_hlimit = swkey->ip.ttl; - ipv6_key->ipv6_frag = swkey->ip.frag; - } else if (swkey->eth.type == htons(ETH_P_ARP) || - swkey->eth.type == htons(ETH_P_RARP)) { - struct ovs_key_arp *arp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); - if (!nla) - goto nla_put_failure; - arp_key = nla_data(nla); - memset(arp_key, 0, sizeof(struct ovs_key_arp)); - arp_key->arp_sip = swkey->ipv4.addr.src; - arp_key->arp_tip = swkey->ipv4.addr.dst; - arp_key->arp_op = htons(swkey->ip.proto); - memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN); - memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN); - } + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; + u64 attrs = 0; + int err; - if ((swkey->eth.type == htons(ETH_P_IP) || - swkey->eth.type == htons(ETH_P_IPV6)) && - swkey->ip.frag != OVS_FRAG_TYPE_LATER) { - - if (swkey->ip.proto == IPPROTO_TCP) { - struct ovs_key_tcp *tcp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); - if (!nla) - goto nla_put_failure; - tcp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - tcp_key->tcp_src = swkey->ipv4.tp.src; - tcp_key->tcp_dst = swkey->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - tcp_key->tcp_src = swkey->ipv6.tp.src; - tcp_key->tcp_dst = swkey->ipv6.tp.dst; - } - } else if (swkey->ip.proto == IPPROTO_UDP) { - struct ovs_key_udp *udp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); - if (!nla) - goto nla_put_failure; - udp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - udp_key->udp_src = swkey->ipv4.tp.src; - udp_key->udp_dst = swkey->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - udp_key->udp_src = swkey->ipv6.tp.src; - udp_key->udp_dst = swkey->ipv6.tp.dst; - } - } else if (swkey->eth.type == htons(ETH_P_IP) && - swkey->ip.proto == IPPROTO_ICMP) { - struct ovs_key_icmp *icmp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); - if (!nla) - goto nla_put_failure; - icmp_key = nla_data(nla); - icmp_key->icmp_type = ntohs(swkey->ipv4.tp.src); - icmp_key->icmp_code = ntohs(swkey->ipv4.tp.dst); - } else if (swkey->eth.type == htons(ETH_P_IPV6) && - swkey->ip.proto == IPPROTO_ICMPV6) { - struct ovs_key_icmpv6 *icmpv6_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, - sizeof(*icmpv6_key)); - if (!nla) - goto nla_put_failure; - icmpv6_key = nla_data(nla); - icmpv6_key->icmpv6_type = ntohs(swkey->ipv6.tp.src); - icmpv6_key->icmpv6_code = ntohs(swkey->ipv6.tp.dst); - - if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || - icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { - struct ovs_key_nd *nd_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); - if (!nla) - goto nla_put_failure; - nd_key = nla_data(nla); - memcpy(nd_key->nd_target, &swkey->ipv6.nd.target, - sizeof(nd_key->nd_target)); - memcpy(nd_key->nd_sll, swkey->ipv6.nd.sll, ETH_ALEN); - memcpy(nd_key->nd_tll, swkey->ipv6.nd.tll, ETH_ALEN); - } - } - } + err = parse_flow_nlattrs(attr, a, &attrs, log); + if (err) + return -EINVAL; -unencap: - if (encap) - nla_nest_end(skb, encap); + /* Extract metadata from netlink attributes. */ + err = ovs_nla_get_flow_metadata(net, a, attrs, key, log); + if (err) + return err; - return 0; + /* key_extract assumes that skb->protocol is set-up for + * layer 3 packets which is the case for other callers, + * in particular packets received from the network stack. + * Here the correct value can be set from the metadata + * extracted above. + * For L2 packet key eth type would be zero. skb protocol + * would be set to correct value later during key-extact. + */ -nla_put_failure: - return -EMSGSIZE; -} + skb->protocol = key->eth.type; + err = key_extract(skb, key); + if (err) + return err; -/* Initializes the flow module. - * Returns zero if successful or a negative error code. */ -int ovs_flow_init(void) -{ - flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, - 0, NULL); - if (flow_cache == NULL) - return -ENOMEM; + /* Check that we have conntrack original direction tuple metadata only + * for packets for which it makes sense. Otherwise the key may be + * corrupted due to overlapping key fields. + */ + if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) && + key->eth.type != htons(ETH_P_IP)) + return -EINVAL; + if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) && + (key->eth.type != htons(ETH_P_IPV6) || + sw_flow_key_is_nd(key))) + return -EINVAL; return 0; } - -/* Uninitializes the flow module. */ -void ovs_flow_exit(void) -{ - kmem_cache_destroy(flow_cache); -} |
