diff options
| author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2015-11-23 09:04:05 +0100 | 
|---|---|---|
| committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2015-11-23 09:04:05 +0100 | 
| commit | 92907cbbef8625bb3998d1eb385fc88f23c97a3f (patch) | |
| tree | 15626ff9287e37c3cb81c7286d6db5a7fd77c854 /net/openvswitch | |
| parent | 15fbfccfe92c62ae8d1ecc647c44157ed01ac02e (diff) | |
| parent | 1ec218373b8ebda821aec00bb156a9c94fad9cd4 (diff) | |
Merge tag 'v4.4-rc2' into drm-intel-next-queued
Linux 4.4-rc2
Backmerge to get at
commit 1b0e3a049efe471c399674fd954500ce97438d30
Author: Imre Deak <imre.deak@intel.com>
Date:   Thu Nov 5 23:04:11 2015 +0200
    drm/i915/skl: disable display side power well support for now
so that we can proplery re-eanble skl power wells in -next.
Conflicts are just adjacent lines changed, except for intel_fbdev.c
where we need to interleave the changs. Nothing nefarious.
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Diffstat (limited to 'net/openvswitch')
| -rw-r--r-- | net/openvswitch/actions.c | 45 | ||||
| -rw-r--r-- | net/openvswitch/conntrack.c | 143 | ||||
| -rw-r--r-- | net/openvswitch/conntrack.h | 9 | ||||
| -rw-r--r-- | net/openvswitch/datapath.c | 10 | ||||
| -rw-r--r-- | net/openvswitch/datapath.h | 1 | ||||
| -rw-r--r-- | net/openvswitch/flow.c | 4 | ||||
| -rw-r--r-- | net/openvswitch/flow.h | 3 | ||||
| -rw-r--r-- | net/openvswitch/flow_netlink.c | 171 | ||||
| -rw-r--r-- | net/openvswitch/flow_netlink.h | 6 | ||||
| -rw-r--r-- | net/openvswitch/flow_table.c | 5 | ||||
| -rw-r--r-- | net/openvswitch/vport-geneve.c | 15 | ||||
| -rw-r--r-- | net/openvswitch/vport-gre.c | 10 | ||||
| -rw-r--r-- | net/openvswitch/vport-internal_dev.c | 54 | ||||
| -rw-r--r-- | net/openvswitch/vport-netdev.c | 33 | ||||
| -rw-r--r-- | net/openvswitch/vport-netdev.h | 1 | ||||
| -rw-r--r-- | net/openvswitch/vport-vxlan.c | 21 | ||||
| -rw-r--r-- | net/openvswitch/vport.c | 121 | ||||
| -rw-r--r-- | net/openvswitch/vport.h | 23 | 
18 files changed, 340 insertions, 335 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 315f5330b6e5..c88d0f2d3e01 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -620,7 +620,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,  	return 0;  } -static int ovs_vport_output(struct sock *sock, struct sk_buff *skb) +static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb)  {  	struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);  	struct vport *vport = data->vport; @@ -679,12 +679,12 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb)  	skb_pull(skb, hlen);  } -static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, -			 __be16 ethertype) +static void ovs_fragment(struct net *net, struct vport *vport, +			 struct sk_buff *skb, u16 mru, __be16 ethertype)  {  	if (skb_network_offset(skb) > MAX_L2_LEN) {  		OVS_NLERR(1, "L2 header too long to fragment"); -		return; +		goto err;  	}  	if (ethertype == htons(ETH_P_IP)) { @@ -700,7 +700,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,  		skb_dst_set_noref(skb, &ovs_dst);  		IPCB(skb)->frag_max_size = mru; -		ip_do_fragment(skb->sk, skb, ovs_vport_output); +		ip_do_fragment(net, skb->sk, skb, ovs_vport_output);  		refdst_drop(orig_dst);  	} else if (ethertype == htons(ETH_P_IPV6)) {  		const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); @@ -708,8 +708,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,  		struct rt6_info ovs_rt;  		if (!v6ops) { -			kfree_skb(skb); -			return; +			goto err;  		}  		prepare_frag(vport, skb); @@ -722,14 +721,18 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,  		skb_dst_set_noref(skb, &ovs_rt.dst);  		IP6CB(skb)->frag_max_size = mru; -		v6ops->fragment(skb->sk, skb, ovs_vport_output); +		v6ops->fragment(net, skb->sk, skb, ovs_vport_output);  		refdst_drop(orig_dst);  	} else {  		WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",  			  ovs_vport_name(vport), ntohs(ethertype), mru,  			  vport->dev->mtu); -		kfree_skb(skb); +		goto err;  	} + +	return; +err: +	kfree_skb(skb);  }  static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, @@ -743,6 +746,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,  		if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {  			ovs_vport_send(vport, skb);  		} else if (mru <= vport->dev->mtu) { +			struct net *net = read_pnet(&dp->net);  			__be16 ethertype = key->eth.type;  			if (!is_flow_key_valid(key)) { @@ -752,7 +756,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,  					ethertype = vlan_get_protocol(skb);  			} -			ovs_fragment(vport, skb, mru, ethertype); +			ovs_fragment(net, vport, skb, mru, ethertype);  		} else {  			kfree_skb(skb);  		} @@ -765,7 +769,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,  			    struct sw_flow_key *key, const struct nlattr *attr,  			    const struct nlattr *actions, int actions_len)  { -	struct ip_tunnel_info info;  	struct dp_upcall_info upcall;  	const struct nlattr *a;  	int rem; @@ -793,11 +796,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,  			if (vport) {  				int err; -				upcall.egress_tun_info = &info; -				err = ovs_vport_get_egress_tun_info(vport, skb, -								    &upcall); -				if (err) -					upcall.egress_tun_info = NULL; +				err = dev_fill_metadata_dst(vport->dev, skb); +				if (!err) +					upcall.egress_tun_info = skb_tunnel_info(skb);  			}  			break; @@ -968,7 +969,7 @@ static int execute_masked_set_action(struct sk_buff *skb,  	case OVS_KEY_ATTR_CT_STATE:  	case OVS_KEY_ATTR_CT_ZONE:  	case OVS_KEY_ATTR_CT_MARK: -	case OVS_KEY_ATTR_CT_LABEL: +	case OVS_KEY_ATTR_CT_LABELS:  		err = -EINVAL;  		break;  	} @@ -1099,12 +1100,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,  			break;  		case OVS_ACTION_ATTR_CT: +			if (!is_flow_key_valid(key)) { +				err = ovs_flow_key_update(skb, key); +				if (err) +					return err; +			} +  			err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,  					     nla_data(a));  			/* Hide stolen IP fragments from user space. */ -			if (err == -EINPROGRESS) -				return 0; +			if (err) +				return err == -EINPROGRESS ? 0 : err;  			break;  		} diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 002a755fa07e..c2cc11168fd5 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -37,9 +37,9 @@ struct md_mark {  };  /* Metadata label for masked write to conntrack label. */ -struct md_label { -	struct ovs_key_ct_label value; -	struct ovs_key_ct_label mask; +struct md_labels { +	struct ovs_key_ct_labels value; +	struct ovs_key_ct_labels mask;  };  /* Conntrack action context for execution. */ @@ -47,10 +47,10 @@ struct ovs_conntrack_info {  	struct nf_conntrack_helper *helper;  	struct nf_conntrack_zone zone;  	struct nf_conn *ct; -	u32 flags; +	u8 commit : 1;  	u16 family;  	struct md_mark mark; -	struct md_label label; +	struct md_labels labels;  };  static u16 key_to_nfproto(const struct sw_flow_key *key) @@ -109,21 +109,21 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct)  #endif  } -static void ovs_ct_get_label(const struct nf_conn *ct, -			     struct ovs_key_ct_label *label) +static void ovs_ct_get_labels(const struct nf_conn *ct, +			      struct ovs_key_ct_labels *labels)  {  	struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;  	if (cl) {  		size_t len = cl->words * sizeof(long); -		if (len > OVS_CT_LABEL_LEN) -			len = OVS_CT_LABEL_LEN; -		else if (len < OVS_CT_LABEL_LEN) -			memset(label, 0, OVS_CT_LABEL_LEN); -		memcpy(label, cl->bits, len); +		if (len > OVS_CT_LABELS_LEN) +			len = OVS_CT_LABELS_LEN; +		else if (len < OVS_CT_LABELS_LEN) +			memset(labels, 0, OVS_CT_LABELS_LEN); +		memcpy(labels, cl->bits, len);  	} else { -		memset(label, 0, OVS_CT_LABEL_LEN); +		memset(labels, 0, OVS_CT_LABELS_LEN);  	}  } @@ -134,7 +134,7 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,  	key->ct.state = state;  	key->ct.zone = zone->id;  	key->ct.mark = ovs_ct_get_mark(ct); -	ovs_ct_get_label(ct, &key->ct.label); +	ovs_ct_get_labels(ct, &key->ct.labels);  }  /* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has @@ -151,6 +151,8 @@ static void ovs_ct_update_key(const struct sk_buff *skb,  	ct = nf_ct_get(skb, &ctinfo);  	if (ct) {  		state = ovs_ct_get_state(ctinfo); +		if (!nf_ct_is_confirmed(ct)) +			state |= OVS_CS_F_NEW;  		if (ct->master)  			state |= OVS_CS_F_RELATED;  		zone = nf_ct_zone(ct); @@ -167,7 +169,7 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)  int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)  { -	if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) +	if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))  		return -EMSGSIZE;  	if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && @@ -179,8 +181,8 @@ int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)  		return -EMSGSIZE;  	if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && -	    nla_put(skb, OVS_KEY_ATTR_CT_LABEL, sizeof(key->ct.label), -		    &key->ct.label)) +	    nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels), +		    &key->ct.labels))  		return -EMSGSIZE;  	return 0; @@ -213,18 +215,15 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,  #endif  } -static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key, -			    const struct ovs_key_ct_label *label, -			    const struct ovs_key_ct_label *mask) +static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, +			     const struct ovs_key_ct_labels *labels, +			     const struct ovs_key_ct_labels *mask)  {  	enum ip_conntrack_info ctinfo;  	struct nf_conn_labels *cl;  	struct nf_conn *ct;  	int err; -	if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) -		return -ENOTSUPP; -  	/* The connection could be invalid, in which case set_label is no-op.*/  	ct = nf_ct_get(skb, &ctinfo);  	if (!ct) @@ -235,15 +234,15 @@ static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key,  		nf_ct_labels_ext_add(ct);  		cl = nf_ct_labels_find(ct);  	} -	if (!cl || cl->words * sizeof(long) < OVS_CT_LABEL_LEN) +	if (!cl || cl->words * sizeof(long) < OVS_CT_LABELS_LEN)  		return -ENOSPC; -	err = nf_connlabels_replace(ct, (u32 *)label, (u32 *)mask, -				    OVS_CT_LABEL_LEN / sizeof(u32)); +	err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask, +				    OVS_CT_LABELS_LEN / sizeof(u32));  	if (err)  		return err; -	ovs_ct_get_label(ct, &key->ct.label); +	ovs_ct_get_labels(ct, &key->ct.labels);  	return 0;  } @@ -294,6 +293,9 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)  	return helper->help(skb, protoff, ct, ctinfo);  } +/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero + * value if 'skb' is freed. + */  static int handle_fragments(struct net *net, struct sw_flow_key *key,  			    u16 zone, struct sk_buff *skb)  { @@ -304,32 +306,40 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,  		int err;  		memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); -		err = ip_defrag(skb, user); +		err = ip_defrag(net, skb, user);  		if (err)  			return err;  		ovs_cb.mru = IPCB(skb)->frag_max_size; -	} else if (key->eth.type == htons(ETH_P_IPV6)) {  #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) +	} else if (key->eth.type == htons(ETH_P_IPV6)) {  		enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;  		struct sk_buff *reasm;  		memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); -		reasm = nf_ct_frag6_gather(skb, user); +		reasm = nf_ct_frag6_gather(net, skb, user);  		if (!reasm)  			return -EINPROGRESS; -		if (skb == reasm) +		if (skb == reasm) { +			kfree_skb(skb);  			return -EINVAL; +		} + +		/* Don't free 'skb' even though it is one of the original +		 * fragments, as we're going to morph it into the head. +		 */ +		skb_get(skb); +		nf_ct_frag6_consume_orig(reasm);  		key->ip.proto = ipv6_hdr(reasm)->nexthdr;  		skb_morph(skb, reasm); +		skb->next = reasm->next;  		consume_skb(reasm);  		ovs_cb.mru = IP6CB(skb)->frag_max_size; -#else -		return -EPFNOSUPPORT;  #endif  	} else { +		kfree_skb(skb);  		return -EPFNOSUPPORT;  	} @@ -347,7 +357,7 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,  {  	struct nf_conntrack_tuple tuple; -	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple)) +	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))  		return NULL;  	return __nf_ct_expect_find(net, zone, &tuple);  } @@ -377,7 +387,7 @@ static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,  	return true;  } -static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key, +static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,  			   const struct ovs_conntrack_info *info,  			   struct sk_buff *skb)  { @@ -408,6 +418,8 @@ static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,  		}  	} +	ovs_ct_update_key(skb, key, true); +  	return 0;  } @@ -430,8 +442,6 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,  		err = __ovs_ct_lookup(net, key, info, skb);  		if (err)  			return err; - -		ovs_ct_update_key(skb, key, true);  	}  	return 0; @@ -460,22 +470,23 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,  	if (nf_conntrack_confirm(skb) != NF_ACCEPT)  		return -EINVAL; -	ovs_ct_update_key(skb, key, true); -  	return 0;  } -static bool label_nonzero(const struct ovs_key_ct_label *label) +static bool labels_nonzero(const struct ovs_key_ct_labels *labels)  {  	size_t i; -	for (i = 0; i < sizeof(*label); i++) -		if (label->ct_label[i]) +	for (i = 0; i < sizeof(*labels); i++) +		if (labels->ct_labels[i])  			return true;  	return false;  } +/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero + * value if 'skb' is freed. + */  int ovs_ct_execute(struct net *net, struct sk_buff *skb,  		   struct sw_flow_key *key,  		   const struct ovs_conntrack_info *info) @@ -493,7 +504,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,  			return err;  	} -	if (info->flags & OVS_CT_F_COMMIT) +	if (info->commit)  		err = ovs_ct_commit(net, key, info, skb);  	else  		err = ovs_ct_lookup(net, key, info, skb); @@ -506,11 +517,13 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,  		if (err)  			goto err;  	} -	if (label_nonzero(&info->label.mask)) -		err = ovs_ct_set_label(skb, key, &info->label.value, -				       &info->label.mask); +	if (labels_nonzero(&info->labels.mask)) +		err = ovs_ct_set_labels(skb, key, &info->labels.value, +					&info->labels.mask);  err:  	skb_push(skb, nh_ofs); +	if (err) +		kfree_skb(skb);  	return err;  } @@ -539,14 +552,13 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,  }  static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { -	[OVS_CT_ATTR_FLAGS]	= { .minlen = sizeof(u32), -				    .maxlen = sizeof(u32) }, +	[OVS_CT_ATTR_COMMIT]	= { .minlen = 0, .maxlen = 0 },  	[OVS_CT_ATTR_ZONE]	= { .minlen = sizeof(u16),  				    .maxlen = sizeof(u16) },  	[OVS_CT_ATTR_MARK]	= { .minlen = sizeof(struct md_mark),  				    .maxlen = sizeof(struct md_mark) }, -	[OVS_CT_ATTR_LABEL]	= { .minlen = sizeof(struct md_label), -				    .maxlen = sizeof(struct md_label) }, +	[OVS_CT_ATTR_LABELS]	= { .minlen = sizeof(struct md_labels), +				    .maxlen = sizeof(struct md_labels) },  	[OVS_CT_ATTR_HELPER]	= { .minlen = 1,  				    .maxlen = NF_CT_HELPER_NAME_LEN }  }; @@ -576,8 +588,8 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,  		}  		switch (type) { -		case OVS_CT_ATTR_FLAGS: -			info->flags = nla_get_u32(a); +		case OVS_CT_ATTR_COMMIT: +			info->commit = true;  			break;  #ifdef CONFIG_NF_CONNTRACK_ZONES  		case OVS_CT_ATTR_ZONE: @@ -588,15 +600,23 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,  		case OVS_CT_ATTR_MARK: {  			struct md_mark *mark = nla_data(a); +			if (!mark->mask) { +				OVS_NLERR(log, "ct_mark mask cannot be 0"); +				return -EINVAL; +			}  			info->mark = *mark;  			break;  		}  #endif  #ifdef CONFIG_NF_CONNTRACK_LABELS -		case OVS_CT_ATTR_LABEL: { -			struct md_label *label = nla_data(a); +		case OVS_CT_ATTR_LABELS: { +			struct md_labels *labels = nla_data(a); -			info->label = *label; +			if (!labels_nonzero(&labels->mask)) { +				OVS_NLERR(log, "ct_labels mask cannot be 0"); +				return -EINVAL; +			} +			info->labels = *labels;  			break;  		}  #endif @@ -633,7 +653,7 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)  	    attr == OVS_KEY_ATTR_CT_MARK)  		return true;  	if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && -	    attr == OVS_KEY_ATTR_CT_LABEL) { +	    attr == OVS_KEY_ATTR_CT_LABELS) {  		struct ovs_net *ovs_net = net_generic(net, ovs_net_id);  		return ovs_net->xt_label; @@ -701,18 +721,19 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,  	if (!start)  		return -EMSGSIZE; -	if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags)) +	if (ct_info->commit && nla_put_flag(skb, OVS_CT_ATTR_COMMIT))  		return -EMSGSIZE;  	if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&  	    nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))  		return -EMSGSIZE; -	if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && +	if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&  	    nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),  		    &ct_info->mark))  		return -EMSGSIZE;  	if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && -	    nla_put(skb, OVS_CT_ATTR_LABEL, sizeof(ct_info->label), -		    &ct_info->label)) +	    labels_nonzero(&ct_info->labels.mask) && +	    nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels), +		    &ct_info->labels))  		return -EMSGSIZE;  	if (ct_info->helper) {  		if (nla_put_string(skb, OVS_CT_ATTR_HELPER, @@ -737,7 +758,7 @@ void ovs_ct_free_action(const struct nlattr *a)  void ovs_ct_init(struct net *net)  { -	unsigned int n_bits = sizeof(struct ovs_key_ct_label) * BITS_PER_BYTE; +	unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;  	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);  	if (nf_connlabels_get(net, n_bits)) { diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index 43f5dd7a5577..a7544f405c16 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -34,6 +34,10 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,  void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);  int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);  void ovs_ct_free_action(const struct nlattr *a); + +#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \ +			   OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \ +			   OVS_CS_F_INVALID | OVS_CS_F_TRACKED)  #else  #include <linux/errno.h> @@ -63,6 +67,7 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,  				 struct sw_flow_key *key,  				 const struct ovs_conntrack_info *info)  { +	kfree_skb(skb);  	return -ENOTSUPP;  } @@ -72,7 +77,7 @@ static inline void ovs_ct_fill_key(const struct sk_buff *skb,  	key->ct.state = 0;  	key->ct.zone = 0;  	key->ct.mark = 0; -	memset(&key->ct.label, 0, sizeof(key->ct.label)); +	memset(&key->ct.labels, 0, sizeof(key->ct.labels));  }  static inline int ovs_ct_put_key(const struct sw_flow_key *key, @@ -82,5 +87,7 @@ static inline int ovs_ct_put_key(const struct sw_flow_key *key,  }  static inline void ovs_ct_free_action(const struct nlattr *a) { } + +#define CT_SUPPORTED_MASK 0  #endif /* CONFIG_NF_CONNTRACK */  #endif /* ovs_conntrack.h */ diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index b816ff871528..91a8b004dc51 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -91,8 +91,7 @@ static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,  static void ovs_notify(struct genl_family *family,  		       struct sk_buff *skb, struct genl_info *info)  { -	genl_notify(family, skb, genl_info_net(info), info->snd_portid, -		    0, info->nlhdr, GFP_KERNEL); +	genl_notify(family, skb, info, 0, GFP_KERNEL);  }  /** @@ -490,9 +489,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,  	if (upcall_info->egress_tun_info) {  		nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); -		err = ovs_nla_put_egress_tunnel_key(user_skb, -						    upcall_info->egress_tun_info, -						    upcall_info->egress_tun_opts); +		err = ovs_nla_put_tunnel_info(user_skb, +					      upcall_info->egress_tun_info);  		BUG_ON(err);  		nla_nest_end(user_skb, nla);  	} @@ -1177,7 +1175,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)  						info, OVS_FLOW_CMD_NEW, false,  						ufid_flags); -		if (unlikely(IS_ERR(reply))) { +		if (IS_ERR(reply)) {  			error = PTR_ERR(reply);  			goto err_unlock_ovs;  		} diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index f88038a99f44..67bdecd9fdc1 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -117,7 +117,6 @@ struct ovs_skb_cb {   */  struct dp_upcall_info {  	struct ip_tunnel_info *egress_tun_info; -	const void *egress_tun_opts;  	const struct nlattr *userdata;  	const struct nlattr *actions;  	int actions_len; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index c8db44ab2ee7..0ea128eeeab2 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -698,8 +698,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,  {  	/* Extract metadata from packet. */  	if (tun_info) { -		if (ip_tunnel_info_af(tun_info) != AF_INET) -			return -EINVAL; +		key->tun_proto = ip_tunnel_info_af(tun_info);  		memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));  		if (tun_info->options_len) { @@ -714,6 +713,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,  			key->tun_opts_len = 0;  		}  	} else  { +		key->tun_proto = 0;  		key->tun_opts_len = 0;  		memset(&key->tun_key, 0, sizeof(key->tun_key));  	} diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index fe527d2dd4b7..1d055c559eaf 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -63,6 +63,7 @@ struct sw_flow_key {  		u32	skb_mark;	/* SKB mark. */  		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */  	} __packed phy; /* Safe when right after 'tun_key'. */ +	u8 tun_proto;			/* Protocol of encapsulating tunnel. */  	u32 ovs_flow_hash;		/* Datapath computed hash value.  */  	u32 recirc_id;			/* Recirculation ID.  */  	struct { @@ -116,7 +117,7 @@ struct sw_flow_key {  		u16 zone;  		u32 mark;  		u8 state; -		struct ovs_key_ct_label label; +		struct ovs_key_ct_labels labels;  	} ct;  } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 5c030a4d7338..907d6fd28ede 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -262,8 +262,8 @@ size_t ovs_tun_key_attr_size(void)  	 * updating this function.  	 */  	return    nla_total_size(8)    /* OVS_TUNNEL_KEY_ATTR_ID */ -		+ nla_total_size(4)    /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ -		+ nla_total_size(4)    /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ +		+ nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */ +		+ nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */  		+ nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */  		+ nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */  		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ @@ -291,10 +291,10 @@ size_t ovs_key_attr_size(void)  		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */  		+ nla_total_size(4)   /* OVS_KEY_ATTR_DP_HASH */  		+ nla_total_size(4)   /* OVS_KEY_ATTR_RECIRC_ID */ -		+ nla_total_size(1)   /* OVS_KEY_ATTR_CT_STATE */ +		+ nla_total_size(4)   /* OVS_KEY_ATTR_CT_STATE */  		+ nla_total_size(2)   /* OVS_KEY_ATTR_CT_ZONE */  		+ nla_total_size(4)   /* OVS_KEY_ATTR_CT_MARK */ -		+ nla_total_size(16)  /* OVS_KEY_ATTR_CT_LABEL */ +		+ nla_total_size(16)  /* OVS_KEY_ATTR_CT_LABELS */  		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */  		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */  		+ nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */ @@ -323,6 +323,8 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]  	[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_VARIABLE },  	[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED,  						.next = ovs_vxlan_ext_key_lens }, +	[OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) }, +	[OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },  };  /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */ @@ -349,10 +351,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {  	[OVS_KEY_ATTR_TUNNEL]	 = { .len = OVS_ATTR_NESTED,  				     .next = ovs_tunnel_key_lens, },  	[OVS_KEY_ATTR_MPLS]	 = { .len = sizeof(struct ovs_key_mpls) }, -	[OVS_KEY_ATTR_CT_STATE]	 = { .len = sizeof(u8) }, +	[OVS_KEY_ATTR_CT_STATE]	 = { .len = sizeof(u32) },  	[OVS_KEY_ATTR_CT_ZONE]	 = { .len = sizeof(u16) },  	[OVS_KEY_ATTR_CT_MARK]	 = { .len = sizeof(u32) }, -	[OVS_KEY_ATTR_CT_LABEL]	 = { .len = sizeof(struct ovs_key_ct_label) }, +	[OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },  };  static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) @@ -542,15 +544,15 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,  	return 0;  } -static int ipv4_tun_from_nlattr(const struct nlattr *attr, -				struct sw_flow_match *match, bool is_mask, -				bool log) +static int ip_tun_from_nlattr(const struct nlattr *attr, +			      struct sw_flow_match *match, bool is_mask, +			      bool log)  { -	struct nlattr *a; -	int rem; -	bool ttl = false; +	bool ttl = false, ipv4 = false, ipv6 = false;  	__be16 tun_flags = 0;  	int opts_type = 0; +	struct nlattr *a; +	int rem;  	nla_for_each_nested(a, attr, rem) {  		int type = nla_type(a); @@ -578,10 +580,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,  		case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:  			SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,  					nla_get_in_addr(a), is_mask); +			ipv4 = true;  			break;  		case OVS_TUNNEL_KEY_ATTR_IPV4_DST:  			SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,  					nla_get_in_addr(a), is_mask); +			ipv4 = true; +			break; +		case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: +			SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst, +					nla_get_in6_addr(a), is_mask); +			ipv6 = true; +			break; +		case OVS_TUNNEL_KEY_ATTR_IPV6_DST: +			SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst, +					nla_get_in6_addr(a), is_mask); +			ipv6 = true;  			break;  		case OVS_TUNNEL_KEY_ATTR_TOS:  			SW_FLOW_KEY_PUT(match, tun_key.tos, @@ -636,28 +650,46 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,  			opts_type = type;  			break;  		default: -			OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", +			OVS_NLERR(log, "Unknown IP tunnel attribute %d",  				  type);  			return -EINVAL;  		}  	}  	SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); +	if (is_mask) +		SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true); +	else +		SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET, +				false);  	if (rem > 0) { -		OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.", +		OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",  			  rem);  		return -EINVAL;  	} +	if (ipv4 && ipv6) { +		OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes"); +		return -EINVAL; +	} +  	if (!is_mask) { -		if (!match->key->tun_key.u.ipv4.dst) { +		if (!ipv4 && !ipv6) { +			OVS_NLERR(log, "IP tunnel dst address not specified"); +			return -EINVAL; +		} +		if (ipv4 && !match->key->tun_key.u.ipv4.dst) {  			OVS_NLERR(log, "IPv4 tunnel dst address is zero");  			return -EINVAL;  		} +		if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) { +			OVS_NLERR(log, "IPv6 tunnel dst address is zero"); +			return -EINVAL; +		}  		if (!ttl) { -			OVS_NLERR(log, "IPv4 tunnel TTL not specified."); +			OVS_NLERR(log, "IP tunnel TTL not specified.");  			return -EINVAL;  		}  	} @@ -682,21 +714,36 @@ static int vxlan_opt_to_nlattr(struct sk_buff *skb,  	return 0;  } -static int __ipv4_tun_to_nlattr(struct sk_buff *skb, -				const struct ip_tunnel_key *output, -				const void *tun_opts, int swkey_tun_opts_len) +static int __ip_tun_to_nlattr(struct sk_buff *skb, +			      const struct ip_tunnel_key *output, +			      const void *tun_opts, int swkey_tun_opts_len, +			      unsigned short tun_proto)  {  	if (output->tun_flags & TUNNEL_KEY &&  	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))  		return -EMSGSIZE; -	if (output->u.ipv4.src && -	    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, -			    output->u.ipv4.src)) -		return -EMSGSIZE; -	if (output->u.ipv4.dst && -	    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, -			    output->u.ipv4.dst)) -		return -EMSGSIZE; +	switch (tun_proto) { +	case AF_INET: +		if (output->u.ipv4.src && +		    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, +				    output->u.ipv4.src)) +			return -EMSGSIZE; +		if (output->u.ipv4.dst && +		    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, +				    output->u.ipv4.dst)) +			return -EMSGSIZE; +		break; +	case AF_INET6: +		if (!ipv6_addr_any(&output->u.ipv6.src) && +		    nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, +				     &output->u.ipv6.src)) +			return -EMSGSIZE; +		if (!ipv6_addr_any(&output->u.ipv6.dst) && +		    nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST, +				     &output->u.ipv6.dst)) +			return -EMSGSIZE; +		break; +	}  	if (output->tos &&  	    nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))  		return -EMSGSIZE; @@ -717,7 +764,7 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,  	if ((output->tun_flags & TUNNEL_OAM) &&  	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))  		return -EMSGSIZE; -	if (tun_opts) { +	if (swkey_tun_opts_len) {  		if (output->tun_flags & TUNNEL_GENEVE_OPT &&  		    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,  			    swkey_tun_opts_len, tun_opts)) @@ -730,9 +777,10 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,  	return 0;  } -static int ipv4_tun_to_nlattr(struct sk_buff *skb, -			      const struct ip_tunnel_key *output, -			      const void *tun_opts, int swkey_tun_opts_len) +static int ip_tun_to_nlattr(struct sk_buff *skb, +			    const struct ip_tunnel_key *output, +			    const void *tun_opts, int swkey_tun_opts_len, +			    unsigned short tun_proto)  {  	struct nlattr *nla;  	int err; @@ -741,7 +789,8 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,  	if (!nla)  		return -EMSGSIZE; -	err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len); +	err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len, +				 tun_proto);  	if (err)  		return err; @@ -749,13 +798,13 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,  	return 0;  } -int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, -				  const struct ip_tunnel_info *egress_tun_info, -				  const void *egress_tun_opts) +int ovs_nla_put_tunnel_info(struct sk_buff *skb, +			    struct ip_tunnel_info *tun_info)  { -	return __ipv4_tun_to_nlattr(skb, &egress_tun_info->key, -				    egress_tun_opts, -				    egress_tun_info->options_len); +	return __ip_tun_to_nlattr(skb, &tun_info->key, +				  ip_tunnel_info_opts(tun_info), +				  tun_info->options_len, +				  ip_tunnel_info_af(tun_info));  }  static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, @@ -806,15 +855,21 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,  		*attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);  	}  	if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { -		if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, -					 is_mask, log) < 0) +		if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, +				       is_mask, log) < 0)  			return -EINVAL;  		*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);  	}  	if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&  	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { -		u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]); +		u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]); + +		if (ct_state & ~CT_SUPPORTED_MASK) { +			OVS_NLERR(log, "ct_state flags %08x unsupported", +				  ct_state); +			return -EINVAL; +		}  		SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask);  		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); @@ -833,14 +888,14 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,  		SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);  		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);  	} -	if (*attrs & (1 << OVS_KEY_ATTR_CT_LABEL) && -	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABEL)) { -		const struct ovs_key_ct_label *cl; +	if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) && +	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) { +		const struct ovs_key_ct_labels *cl; -		cl = nla_data(a[OVS_KEY_ATTR_CT_LABEL]); -		SW_FLOW_KEY_MEMCPY(match, ct.label, cl->ct_label, +		cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]); +		SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,  				   sizeof(*cl), is_mask); -		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABEL); +		*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);  	}  	return 0;  } @@ -1093,6 +1148,9 @@ static void nlattr_set(struct nlattr *attr, u8 val,  		} else {  			memset(nla_data(nla), val, nla_len(nla));  		} + +		if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE) +			*(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;  	}  } @@ -1194,7 +1252,7 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,  			/* The userspace does not send tunnel attributes that  			 * are 0, but we should not wildcard them nonetheless.  			 */ -			if (match->key->tun_key.u.ipv4.dst) +			if (match->key->tun_proto)  				SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,  							 0xff, true); @@ -1367,14 +1425,14 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,  	if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))  		goto nla_put_failure; -	if ((swkey->tun_key.u.ipv4.dst || is_mask)) { +	if ((swkey->tun_proto || is_mask)) {  		const void *opts = NULL;  		if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)  			opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); -		if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, -				       swkey->tun_opts_len)) +		if (ip_tun_to_nlattr(skb, &output->tun_key, opts, +				     swkey->tun_opts_len, swkey->tun_proto))  			goto nla_put_failure;  	} @@ -1877,7 +1935,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,  	int err = 0, start, opts_type;  	ovs_match_init(&match, &key, NULL); -	opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); +	opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);  	if (opts_type < 0)  		return opts_type; @@ -1913,6 +1971,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,  	tun_info = &tun_dst->u.tun_info;  	tun_info->mode = IP_TUNNEL_INFO_TX; +	if (key.tun_proto == AF_INET6) +		tun_info->mode |= IP_TUNNEL_INFO_IPV6;  	tun_info->key = key.tun_key;  	/* We need to store the options in the action itself since @@ -1973,7 +2033,7 @@ static int validate_set(const struct nlattr *a,  	case OVS_KEY_ATTR_PRIORITY:  	case OVS_KEY_ATTR_SKB_MARK:  	case OVS_KEY_ATTR_CT_MARK: -	case OVS_KEY_ATTR_CT_LABEL: +	case OVS_KEY_ATTR_CT_LABELS:  	case OVS_KEY_ATTR_ETHERNET:  		break; @@ -2374,10 +2434,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)  		if (!start)  			return -EMSGSIZE; -		err = ipv4_tun_to_nlattr(skb, &tun_info->key, -					 tun_info->options_len ? -					     ip_tunnel_info_opts(tun_info) : NULL, -					 tun_info->options_len); +		err = ovs_nla_put_tunnel_info(skb, tun_info);  		if (err)  			return err;  		nla_nest_end(skb, start); diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 6ca3f0baf449..47dd142eca1c 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -55,9 +55,9 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);  int ovs_nla_get_match(struct net *, struct sw_flow_match *,  		      const struct nlattr *key, const struct nlattr *mask,  		      bool log); -int ovs_nla_put_egress_tunnel_key(struct sk_buff *, -				  const struct ip_tunnel_info *, -				  const void *egress_tun_opts); + +int ovs_nla_put_tunnel_info(struct sk_buff *skb, +			    struct ip_tunnel_info *tun_info);  bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);  int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index f2ea83ba4763..d073fff82fdb 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -93,7 +93,8 @@ struct sw_flow *ovs_flow_alloc(void)  	/* Initialize the default stat node. */  	stats = kmem_cache_alloc_node(flow_stats_cache, -				      GFP_KERNEL | __GFP_ZERO, 0); +				      GFP_KERNEL | __GFP_ZERO, +				      node_online(0) ? 0 : NUMA_NO_NODE);  	if (!stats)  		goto err; @@ -427,7 +428,7 @@ static u32 flow_hash(const struct sw_flow_key *key,  static int flow_key_start(const struct sw_flow_key *key)  { -	if (key->tun_key.u.ipv4.dst) +	if (key->tun_proto)  		return 0;  	else  		return rounddown(offsetof(struct sw_flow_key, phy), diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 2735e9c4a3b8..efb736bb6855 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -52,18 +52,6 @@ static int geneve_get_options(const struct vport *vport,  	return 0;  } -static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, -				      struct dp_upcall_info *upcall) -{ -	struct geneve_port *geneve_port = geneve_vport(vport); -	struct net *net = ovs_dp_get_net(vport->dp); -	__be16 dport = htons(geneve_port->port_no); -	__be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); - -	return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp), -					  skb, IPPROTO_UDP, sport, dport); -} -  static struct vport *geneve_tnl_create(const struct vport_parms *parms)  {  	struct net *net = ovs_dp_get_net(parms->dp); @@ -128,9 +116,8 @@ static struct vport_ops ovs_geneve_vport_ops = {  	.create		= geneve_create,  	.destroy	= ovs_netdev_tunnel_destroy,  	.get_options	= geneve_get_options, -	.send		= ovs_netdev_send, +	.send		= dev_queue_xmit,  	.owner          = THIS_MODULE, -	.get_egress_tun_info	= geneve_get_egress_tun_info,  };  static int __init ovs_geneve_tnl_init(void) diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 4d24481669c9..c3257d78d3d2 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -84,18 +84,10 @@ static struct vport *gre_create(const struct vport_parms *parms)  	return ovs_netdev_link(vport, parms->name);  } -static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, -				   struct dp_upcall_info *upcall) -{ -	return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp), -					  skb, IPPROTO_GRE, 0, 0); -} -  static struct vport_ops ovs_gre_vport_ops = {  	.type		= OVS_VPORT_TYPE_GRE,  	.create		= gre_create, -	.send		= ovs_netdev_send, -	.get_egress_tun_info	= gre_get_egress_tun_info, +	.send		= dev_queue_xmit,  	.destroy	= ovs_netdev_tunnel_destroy,  	.owner		= THIS_MODULE,  }; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 388b8a6bf112..ec76398a792f 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -106,12 +106,45 @@ static void internal_dev_destructor(struct net_device *dev)  	free_netdev(dev);  } +static struct rtnl_link_stats64 * +internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ +	int i; + +	memset(stats, 0, sizeof(*stats)); +	stats->rx_errors  = dev->stats.rx_errors; +	stats->tx_errors  = dev->stats.tx_errors; +	stats->tx_dropped = dev->stats.tx_dropped; +	stats->rx_dropped = dev->stats.rx_dropped; + +	for_each_possible_cpu(i) { +		const struct pcpu_sw_netstats *percpu_stats; +		struct pcpu_sw_netstats local_stats; +		unsigned int start; + +		percpu_stats = per_cpu_ptr(dev->tstats, i); + +		do { +			start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); +			local_stats = *percpu_stats; +		} while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); + +		stats->rx_bytes         += local_stats.rx_bytes; +		stats->rx_packets       += local_stats.rx_packets; +		stats->tx_bytes         += local_stats.tx_bytes; +		stats->tx_packets       += local_stats.tx_packets; +	} + +	return stats; +} +  static const struct net_device_ops internal_dev_netdev_ops = {  	.ndo_open = internal_dev_open,  	.ndo_stop = internal_dev_stop,  	.ndo_start_xmit = internal_dev_xmit,  	.ndo_set_mac_address = eth_mac_addr,  	.ndo_change_mtu = internal_dev_change_mtu, +	.ndo_get_stats64 = internal_get_stats,  };  static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { @@ -161,6 +194,11 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)  		err = -ENOMEM;  		goto error_free_vport;  	} +	vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); +	if (!vport->dev->tstats) { +		err = -ENOMEM; +		goto error_free_netdev; +	}  	dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));  	internal_dev = internal_dev_priv(vport->dev); @@ -173,7 +211,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)  	rtnl_lock();  	err = register_netdevice(vport->dev);  	if (err) -		goto error_free_netdev; +		goto error_unlock;  	dev_set_promiscuity(vport->dev, 1);  	rtnl_unlock(); @@ -181,8 +219,10 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)  	return vport; -error_free_netdev: +error_unlock:  	rtnl_unlock(); +	free_percpu(vport->dev->tstats); +error_free_netdev:  	free_netdev(vport->dev);  error_free_vport:  	ovs_vport_free(vport); @@ -198,26 +238,25 @@ static void internal_dev_destroy(struct vport *vport)  	/* unregister_netdevice() waits for an RCU grace period. */  	unregister_netdevice(vport->dev); - +	free_percpu(vport->dev->tstats);  	rtnl_unlock();  } -static void internal_dev_recv(struct vport *vport, struct sk_buff *skb) +static netdev_tx_t internal_dev_recv(struct sk_buff *skb)  { -	struct net_device *netdev = vport->dev; +	struct net_device *netdev = skb->dev;  	struct pcpu_sw_netstats *stats;  	if (unlikely(!(netdev->flags & IFF_UP))) {  		kfree_skb(skb);  		netdev->stats.rx_dropped++; -		return; +		return NETDEV_TX_OK;  	}  	skb_dst_drop(skb);  	nf_reset(skb);  	secpath_reset(skb); -	skb->dev = netdev;  	skb->pkt_type = PACKET_HOST;  	skb->protocol = eth_type_trans(skb, netdev);  	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); @@ -229,6 +268,7 @@ static void internal_dev_recv(struct vport *vport, struct sk_buff *skb)  	u64_stats_update_end(&stats->syncp);  	netif_rx(skb); +	return NETDEV_TX_OK;  }  static struct vport_ops ovs_internal_vport_ops = { diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index f7e8dcce7ada..b327368a3848 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -190,37 +190,6 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)  }  EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy); -static unsigned int packet_length(const struct sk_buff *skb) -{ -	unsigned int length = skb->len - ETH_HLEN; - -	if (skb->protocol == htons(ETH_P_8021Q)) -		length -= VLAN_HLEN; - -	return length; -} - -void ovs_netdev_send(struct vport *vport, struct sk_buff *skb) -{ -	int mtu = vport->dev->mtu; - -	if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { -		net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", -				     vport->dev->name, -				     packet_length(skb), mtu); -		vport->dev->stats.tx_errors++; -		goto drop; -	} - -	skb->dev = vport->dev; -	dev_queue_xmit(skb); -	return; - -drop: -	kfree_skb(skb); -} -EXPORT_SYMBOL_GPL(ovs_netdev_send); -  /* Returns null if this device is not attached to a datapath. */  struct vport *ovs_netdev_get_vport(struct net_device *dev)  { @@ -235,7 +204,7 @@ static struct vport_ops ovs_netdev_vport_ops = {  	.type		= OVS_VPORT_TYPE_NETDEV,  	.create		= netdev_create,  	.destroy	= netdev_destroy, -	.send		= ovs_netdev_send, +	.send		= dev_queue_xmit,  };  int __init ovs_netdev_init(void) diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index bf22fcedbc69..19e29c12adcc 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -27,7 +27,6 @@  struct vport *ovs_netdev_get_vport(struct net_device *dev);  struct vport *ovs_netdev_link(struct vport *vport, const char *name); -void ovs_netdev_send(struct vport *vport, struct sk_buff *skb);  void ovs_netdev_detach_dev(struct vport *);  int __init ovs_netdev_init(void); diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index c11413d5075f..1605691d9414 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -146,31 +146,12 @@ static struct vport *vxlan_create(const struct vport_parms *parms)  	return ovs_netdev_link(vport, parms->name);  } -static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, -				     struct dp_upcall_info *upcall) -{ -	struct vxlan_dev *vxlan = netdev_priv(vport->dev); -	struct net *net = ovs_dp_get_net(vport->dp); -	__be16 dst_port = vxlan_dev_dst_port(vxlan); -	__be16 src_port; -	int port_min; -	int port_max; - -	inet_get_local_port_range(net, &port_min, &port_max); -	src_port = udp_flow_src_port(net, skb, 0, 0, true); - -	return ovs_tunnel_get_egress_info(upcall, net, -					  skb, IPPROTO_UDP, -					  src_port, dst_port); -} -  static struct vport_ops ovs_vxlan_netdev_vport_ops = {  	.type			= OVS_VPORT_TYPE_VXLAN,  	.create			= vxlan_create,  	.destroy		= ovs_netdev_tunnel_destroy,  	.get_options		= vxlan_get_options, -	.send			= ovs_netdev_send, -	.get_egress_tun_info	= vxlan_get_egress_tun_info, +	.send			= dev_queue_xmit,  };  static int __init ovs_vxlan_tnl_init(void) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index dc81dc619aa2..0ac0fd004d7e 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -280,35 +280,19 @@ void ovs_vport_del(struct vport *vport)   */  void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)  { -	struct net_device *dev = vport->dev; -	int i; - -	memset(stats, 0, sizeof(*stats)); -	stats->rx_errors  = dev->stats.rx_errors; -	stats->tx_errors  = dev->stats.tx_errors; -	stats->tx_dropped = dev->stats.tx_dropped; -	stats->rx_dropped = dev->stats.rx_dropped; - -	stats->rx_dropped += atomic_long_read(&dev->rx_dropped); -	stats->tx_dropped += atomic_long_read(&dev->tx_dropped); - -	for_each_possible_cpu(i) { -		const struct pcpu_sw_netstats *percpu_stats; -		struct pcpu_sw_netstats local_stats; -		unsigned int start; - -		percpu_stats = per_cpu_ptr(dev->tstats, i); - -		do { -			start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); -			local_stats = *percpu_stats; -		} while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); - -		stats->rx_bytes		+= local_stats.rx_bytes; -		stats->rx_packets	+= local_stats.rx_packets; -		stats->tx_bytes		+= local_stats.tx_bytes; -		stats->tx_packets	+= local_stats.tx_packets; -	} +	const struct rtnl_link_stats64 *dev_stats; +	struct rtnl_link_stats64 temp; + +	dev_stats = dev_get_stats(vport->dev, &temp); +	stats->rx_errors  = dev_stats->rx_errors; +	stats->tx_errors  = dev_stats->tx_errors; +	stats->tx_dropped = dev_stats->tx_dropped; +	stats->rx_dropped = dev_stats->rx_dropped; + +	stats->rx_bytes	  = dev_stats->rx_bytes; +	stats->rx_packets = dev_stats->rx_packets; +	stats->tx_bytes	  = dev_stats->tx_bytes; +	stats->tx_packets = dev_stats->tx_packets;  }  /** @@ -460,6 +444,15 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,  	OVS_CB(skb)->input_vport = vport;  	OVS_CB(skb)->mru = 0; +	if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { +		u32 mark; + +		mark = skb->mark; +		skb_scrub_packet(skb, true); +		skb->mark = mark; +		tun_info = NULL; +	} +  	/* Extract flow from 'skb' into 'key'. */  	error = ovs_flow_key_extract(tun_info, skb, &key);  	if (unlikely(error)) { @@ -487,60 +480,32 @@ void ovs_vport_deferred_free(struct vport *vport)  }  EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); -int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall, -			       struct net *net, -			       struct sk_buff *skb, -			       u8 ipproto, -			       __be16 tp_src, -			       __be16 tp_dst) +static unsigned int packet_length(const struct sk_buff *skb)  { -	struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info; -	const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb); -	const struct ip_tunnel_key *tun_key; -	u32 skb_mark = skb->mark; -	struct rtable *rt; -	struct flowi4 fl; - -	if (unlikely(!tun_info)) -		return -EINVAL; -	if (ip_tunnel_info_af(tun_info) != AF_INET) -		return -EINVAL; - -	tun_key = &tun_info->key; +	unsigned int length = skb->len - ETH_HLEN; -	/* Route lookup to get srouce IP address. -	 * The process may need to be changed if the corresponding process -	 * in vports ops changed. -	 */ -	rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto); -	if (IS_ERR(rt)) -		return PTR_ERR(rt); - -	ip_rt_put(rt); +	if (skb->protocol == htons(ETH_P_8021Q)) +		length -= VLAN_HLEN; -	/* Generate egress_tun_info based on tun_info, -	 * saddr, tp_src and tp_dst -	 */ -	ip_tunnel_key_init(&egress_tun_info->key, -			   fl.saddr, tun_key->u.ipv4.dst, -			   tun_key->tos, -			   tun_key->ttl, -			   tp_src, tp_dst, -			   tun_key->tun_id, -			   tun_key->tun_flags); -	egress_tun_info->options_len = tun_info->options_len; -	egress_tun_info->mode = tun_info->mode; -	upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info); -	return 0; +	return length;  } -EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info); -int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, -				  struct dp_upcall_info *upcall) +void ovs_vport_send(struct vport *vport, struct sk_buff *skb)  { -	/* get_egress_tun_info() is only implemented on tunnel ports. */ -	if (unlikely(!vport->ops->get_egress_tun_info)) -		return -EINVAL; +	int mtu = vport->dev->mtu; + +	if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { +		net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", +				     vport->dev->name, +				     packet_length(skb), mtu); +		vport->dev->stats.tx_errors++; +		goto drop; +	} + +	skb->dev = vport->dev; +	vport->ops->send(skb); +	return; -	return vport->ops->get_egress_tun_info(vport, skb, upcall); +drop: +	kfree_skb(skb);  } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index a413f3ae6a7b..bdfd82a7c064 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -27,7 +27,6 @@  #include <linux/skbuff.h>  #include <linux/spinlock.h>  #include <linux/u64_stats_sync.h> -#include <net/route.h>  #include "datapath.h" @@ -53,16 +52,6 @@ int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);  int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);  u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *); -int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall, -			       struct net *net, -			       struct sk_buff *, -			       u8 ipproto, -			       __be16 tp_src, -			       __be16 tp_dst); - -int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, -				  struct dp_upcall_info *upcall); -  /**   * struct vport_portids - array of netlink portids of a vport.   *                        must be protected by rcu. @@ -140,8 +129,6 @@ struct vport_parms {   * have any configuration.   * @send: Send a packet on the device.   * zero for dropped packets or negative for error. - * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for - * a packet.   */  struct vport_ops {  	enum ovs_vport_type type; @@ -153,10 +140,7 @@ struct vport_ops {  	int (*set_options)(struct vport *, struct nlattr *);  	int (*get_options)(const struct vport *, struct sk_buff *); -	void (*send)(struct vport *, struct sk_buff *); -	int (*get_egress_tun_info)(struct vport *, struct sk_buff *, -				   struct dp_upcall_info *upcall); - +	netdev_tx_t (*send) (struct sk_buff *skb);  	struct module *owner;  	struct list_head list;  }; @@ -234,9 +218,6 @@ static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,  	return rt;  } -static inline void ovs_vport_send(struct vport *vport, struct sk_buff *skb) -{ -	vport->ops->send(vport, skb); -} +void ovs_vport_send(struct vport *vport, struct sk_buff *skb);  #endif /* vport.h */  | 
