diff options
| -rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/action.c | 139 | ||||
| -rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 33 | ||||
| -rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/main.h | 1 | ||||
| -rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/match.c | 34 | ||||
| -rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/offload.c | 42 | ||||
| -rw-r--r-- | include/net/flow_dissector.h | 17 | ||||
| -rw-r--r-- | include/uapi/linux/pkt_cls.h | 26 | ||||
| -rw-r--r-- | net/core/flow_dissector.c | 19 | ||||
| -rw-r--r-- | net/sched/cls_flower.c | 244 | 
9 files changed, 525 insertions, 30 deletions
| diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index e56b815a8dc6..0ba0356ec4e6 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -32,6 +32,7 @@   */  #include <linux/bitfield.h> +#include <net/geneve.h>  #include <net/pkt_cls.h>  #include <net/switchdev.h>  #include <net/tc_act/tc_csum.h> @@ -45,7 +46,15 @@  #include "main.h"  #include "../nfp_net_repr.h" -#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS	(TUNNEL_CSUM | TUNNEL_KEY) +/* The kernel versions of TUNNEL_* are not ABI and therefore vulnerable + * to change. Such changes will break our FW ABI. + */ +#define NFP_FL_TUNNEL_CSUM			cpu_to_be16(0x01) +#define NFP_FL_TUNNEL_KEY			cpu_to_be16(0x04) +#define NFP_FL_TUNNEL_GENEVE_OPT		cpu_to_be16(0x0800) +#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS	(NFP_FL_TUNNEL_CSUM | \ +						 NFP_FL_TUNNEL_KEY | \ +						 NFP_FL_TUNNEL_GENEVE_OPT)  static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan)  { @@ -229,7 +238,71 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len)  }  static int -nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, +nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, +			   const struct tc_action *action) +{ +	struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); +	int opt_len, opt_cnt, act_start, tot_push_len; +	u8 *src = ip_tunnel_info_opts(ip_tun); + +	/* We need to populate the options in reverse order for HW. +	 * Therefore we go through the options, calculating the +	 * number of options and the total size, then we populate +	 * them in reverse order in the action list. +	 */ +	opt_cnt = 0; +	tot_push_len = 0; +	opt_len = ip_tun->options_len; +	while (opt_len > 0) { +		struct geneve_opt *opt = (struct geneve_opt *)src; + +		opt_cnt++; +		if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT) +			return -EOPNOTSUPP; + +		tot_push_len += sizeof(struct nfp_fl_push_geneve) + +			       opt->length * 4; +		if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT) +			return -EOPNOTSUPP; + +		opt_len -= sizeof(struct geneve_opt) + opt->length * 4; +		src += sizeof(struct geneve_opt) + opt->length * 4; +	} + +	if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ) +		return -EOPNOTSUPP; + +	act_start = *list_len; +	*list_len += tot_push_len; +	src = ip_tunnel_info_opts(ip_tun); +	while (opt_cnt) { +		struct geneve_opt *opt = (struct geneve_opt *)src; +		struct nfp_fl_push_geneve *push; +		size_t act_size, len; + +		opt_cnt--; +		act_size = sizeof(struct nfp_fl_push_geneve) + opt->length * 4; +		tot_push_len -= act_size; +		len = act_start + tot_push_len; + +		push = (struct nfp_fl_push_geneve *)&nfp_fl->action_data[len]; +		push->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_GENEVE; +		push->head.len_lw = act_size >> NFP_FL_LW_SIZ; +		push->reserved = 0; +		push->class = opt->opt_class; +		push->type = opt->type; +		push->length = opt->length; +		memcpy(&push->opt_data, opt->opt_data, opt->length * 4); + +		src += sizeof(struct geneve_opt) + opt->length * 4; +	} + +	return 0; +} + +static int +nfp_fl_set_ipv4_udp_tun(struct nfp_app *app, +			struct nfp_fl_set_ipv4_udp_tun *set_tun,  			const struct tc_action *action,  			struct nfp_fl_pre_tunnel *pre_tun,  			enum nfp_flower_tun_type tun_type, @@ -237,19 +310,19 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,  {  	size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun);  	struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); +	struct nfp_flower_priv *priv = app->priv;  	u32 tmp_set_ip_tun_type_index = 0; -	struct flowi4 flow = {};  	/* Currently support one pre-tunnel so index is always 0. */  	int pretun_idx = 0; -	struct rtable *rt; -	struct net *net; -	int err; -	if (ip_tun->options_len) +	BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM || +		     NFP_FL_TUNNEL_KEY	!= TUNNEL_KEY || +		     NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT); +	if (ip_tun->options_len && +	    (tun_type != NFP_FL_TUNNEL_GENEVE || +	    !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)))  		return -EOPNOTSUPP; -	net = dev_net(netdev); -  	set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL;  	set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; @@ -261,28 +334,42 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,  	set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index);  	set_tun->tun_id = ip_tun->key.tun_id; -	/* Do a route lookup to determine ttl - if fails then use default. -	 * Note that CONFIG_INET is a requirement of CONFIG_NET_SWITCHDEV so -	 * must be defined here. -	 */ -	flow.daddr = ip_tun->key.u.ipv4.dst; -	flow.flowi4_proto = IPPROTO_UDP; -	rt = ip_route_output_key(net, &flow); -	err = PTR_ERR_OR_ZERO(rt); -	if (!err) { -		set_tun->ttl = ip4_dst_hoplimit(&rt->dst); -		ip_rt_put(rt); +	if (ip_tun->key.ttl) { +		set_tun->ttl = ip_tun->key.ttl;  	} else { -		set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; +		struct net *net = dev_net(netdev); +		struct flowi4 flow = {}; +		struct rtable *rt; +		int err; + +		/* Do a route lookup to determine ttl - if fails then use +		 * default. Note that CONFIG_INET is a requirement of +		 * CONFIG_NET_SWITCHDEV so must be defined here. +		 */ +		flow.daddr = ip_tun->key.u.ipv4.dst; +		flow.flowi4_proto = IPPROTO_UDP; +		rt = ip_route_output_key(net, &flow); +		err = PTR_ERR_OR_ZERO(rt); +		if (!err) { +			set_tun->ttl = ip4_dst_hoplimit(&rt->dst); +			ip_rt_put(rt); +		} else { +			set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; +		}  	}  	set_tun->tos = ip_tun->key.tos; -	if (!(ip_tun->key.tun_flags & TUNNEL_KEY) || +	if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) ||  	    ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS)  		return -EOPNOTSUPP;  	set_tun->tun_flags = ip_tun->key.tun_flags; +	if (tun_type == NFP_FL_TUNNEL_GENEVE) { +		set_tun->tun_proto = htons(ETH_P_TEB); +		set_tun->tun_len = ip_tun->options_len / 4; +	} +  	/* Complete pre_tunnel action. */  	pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; @@ -671,9 +758,13 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a,  		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);  		*a_len += sizeof(struct nfp_fl_pre_tunnel); +		err = nfp_fl_push_geneve_options(nfp_fl, a_len, a); +		if (err) +			return err; +  		set_tun = (void *)&nfp_fl->action_data[*a_len]; -		err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type, -					      netdev); +		err = nfp_fl_set_ipv4_udp_tun(app, set_tun, a, pre_tun, +					      *tun_type, netdev);  		if (err)  			return err;  		*a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 15f1eacd76b6..325954b829c8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -37,6 +37,7 @@  #include <linux/bitfield.h>  #include <linux/skbuff.h>  #include <linux/types.h> +#include <net/geneve.h>  #include "../nfp_app.h"  #include "../nfpcore/nfp_cpp.h" @@ -51,6 +52,7 @@  #define NFP_FLOWER_LAYER_VXLAN		BIT(7)  #define NFP_FLOWER_LAYER2_GENEVE	BIT(5) +#define NFP_FLOWER_LAYER2_GENEVE_OP	BIT(6)  #define NFP_FLOWER_MASK_VLAN_PRIO	GENMASK(15, 13)  #define NFP_FLOWER_MASK_VLAN_CFI	BIT(12) @@ -81,6 +83,11 @@  #define NFP_FL_MAX_A_SIZ		1216  #define NFP_FL_LW_SIZ			2 +/* Maximum allowed geneve options */ +#define NFP_FL_MAX_GENEVE_OPT_ACT	32 +#define NFP_FL_MAX_GENEVE_OPT_CNT	64 +#define NFP_FL_MAX_GENEVE_OPT_KEY	32 +  /* Action opcodes */  #define NFP_FL_ACTION_OPCODE_OUTPUT		0  #define NFP_FL_ACTION_OPCODE_PUSH_VLAN		1 @@ -94,6 +101,7 @@  #define NFP_FL_ACTION_OPCODE_SET_TCP		15  #define NFP_FL_ACTION_OPCODE_PRE_LAG		16  #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL		17 +#define NFP_FL_ACTION_OPCODE_PUSH_GENEVE	26  #define NFP_FL_ACTION_OPCODE_NUM		32  #define NFP_FL_OUT_FLAGS_LAST		BIT(15) @@ -206,7 +214,19 @@ struct nfp_fl_set_ipv4_udp_tun {  	__be16 tun_flags;  	u8 ttl;  	u8 tos; -	__be32 extra[2]; +	__be32 extra; +	u8 tun_len; +	u8 res2; +	__be16 tun_proto; +}; + +struct nfp_fl_push_geneve { +	struct nfp_fl_act_head head; +	__be16 reserved; +	__be16 class; +	u8 type; +	u8 length; +	u8 opt_data[];  };  /* Metadata with L2 (1W/4B) @@ -346,7 +366,7 @@ struct nfp_flower_ipv6 {   * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+   * |                         ipv4_addr_dst                         |   * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * |                            Reserved                           | + * |           Reserved            |      tos      |      ttl      |   * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+   * |                            Reserved                           |   * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -356,10 +376,17 @@ struct nfp_flower_ipv6 {  struct nfp_flower_ipv4_udp_tun {  	__be32 ip_src;  	__be32 ip_dst; -	__be32 reserved[2]; +	__be16 reserved1; +	u8 tos; +	u8 ttl; +	__be32 reserved2;  	__be32 tun_id;  }; +struct nfp_flower_geneve_options { +	u8 data[NFP_FL_MAX_GENEVE_OPT_KEY]; +}; +  #define NFP_FL_TUN_VNI_OFFSET 8  /* The base header for a control message packet. diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index ef2114d13387..85f8209bf007 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -69,6 +69,7 @@ struct nfp_app;  /* Extra features bitmap. */  #define NFP_FL_FEATS_GENEVE		BIT(0)  #define NFP_FL_NBI_MTU_SETTING		BIT(1) +#define NFP_FL_FEATS_GENEVE_OPT		BIT(2)  #define NFP_FL_FEATS_LAG		BIT(31)  struct nfp_fl_mask_id { diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 84f7a5dbea9d..a0c72f277faa 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -262,6 +262,21 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,  	nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version);  } +static int +nfp_flower_compile_geneve_opt(void *key_buf, struct tc_cls_flower_offload *flow, +			      bool mask_version) +{ +	struct fl_flow_key *target = mask_version ? flow->mask : flow->key; +	struct flow_dissector_key_enc_opts *opts; + +	opts = skb_flow_dissector_target(flow->dissector, +					 FLOW_DISSECTOR_KEY_ENC_OPTS, +					 target); +	memcpy(key_buf, opts->data, opts->len); + +	return 0; +} +  static void  nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame,  				struct tc_cls_flower_offload *flow, @@ -270,6 +285,7 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame,  	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;  	struct flow_dissector_key_ipv4_addrs *tun_ips;  	struct flow_dissector_key_keyid *vni; +	struct flow_dissector_key_ip *ip;  	memset(frame, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); @@ -293,6 +309,14 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame,  		frame->ip_src = tun_ips->src;  		frame->ip_dst = tun_ips->dst;  	} + +	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { +		ip = skb_flow_dissector_target(flow->dissector, +					       FLOW_DISSECTOR_KEY_ENC_IP, +					       target); +		frame->tos = ip->tos; +		frame->ttl = ip->ttl; +	}  }  int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, @@ -415,6 +439,16 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,  			nfp_flow->nfp_tun_ipv4_addr = tun_dst;  			nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst);  		} + +		if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { +			err = nfp_flower_compile_geneve_opt(ext, flow, false); +			if (err) +				return err; + +			err = nfp_flower_compile_geneve_opt(msk, flow, true); +			if (err) +				return err; +		}  	}  	return 0; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 6bc8a97f7e03..2edab01c3beb 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -66,6 +66,8 @@  	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \  	 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \  	 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ +	 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ +	 BIT(FLOW_DISSECTOR_KEY_ENC_IP) | \  	 BIT(FLOW_DISSECTOR_KEY_MPLS) | \  	 BIT(FLOW_DISSECTOR_KEY_IP)) @@ -74,7 +76,9 @@  	 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \  	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \  	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ -	 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)) +	 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ +	 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ +	 BIT(FLOW_DISSECTOR_KEY_ENC_IP))  #define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \  	(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ @@ -139,6 +143,21 @@ static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f)  }  static int +nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts, +			  u32 *key_layer_two, int *key_size) +{ +	if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) +		return -EOPNOTSUPP; + +	if (enc_opts->len > 0) { +		*key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP; +		*key_size += sizeof(struct nfp_flower_geneve_options); +	} + +	return 0; +} + +static int  nfp_flower_calculate_key_layers(struct nfp_app *app,  				struct nfp_fl_key_ls *ret_key_ls,  				struct tc_cls_flower_offload *flow, @@ -151,6 +170,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,  	u32 key_layer_two;  	u8 key_layer;  	int key_size; +	int err;  	if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR)  		return -EOPNOTSUPP; @@ -176,6 +196,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,  			       FLOW_DISSECTOR_KEY_ENC_CONTROL)) {  		struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL;  		struct flow_dissector_key_ports *mask_enc_ports = NULL; +		struct flow_dissector_key_enc_opts *enc_op = NULL;  		struct flow_dissector_key_ports *enc_ports = NULL;  		struct flow_dissector_key_control *mask_enc_ctl =  			skb_flow_dissector_target(flow->dissector, @@ -212,11 +233,21 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,  		if (mask_enc_ports->dst != cpu_to_be16(~0))  			return -EOPNOTSUPP; +		if (dissector_uses_key(flow->dissector, +				       FLOW_DISSECTOR_KEY_ENC_OPTS)) { +			enc_op = skb_flow_dissector_target(flow->dissector, +							   FLOW_DISSECTOR_KEY_ENC_OPTS, +							   flow->key); +		} +  		switch (enc_ports->dst) {  		case htons(NFP_FL_VXLAN_PORT):  			*tun_type = NFP_FL_TUNNEL_VXLAN;  			key_layer |= NFP_FLOWER_LAYER_VXLAN;  			key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + +			if (enc_op) +				return -EOPNOTSUPP;  			break;  		case htons(NFP_FL_GENEVE_PORT):  			if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)) @@ -226,6 +257,15 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,  			key_size += sizeof(struct nfp_flower_ext_meta);  			key_layer_two |= NFP_FLOWER_LAYER2_GENEVE;  			key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + +			if (!enc_op) +				break; +			if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)) +				return -EOPNOTSUPP; +			err = nfp_flower_calc_opt_layer(enc_op, &key_layer_two, +							&key_size); +			if (err) +				return err;  			break;  		default:  			return -EOPNOTSUPP; diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 2a17f041f7a1..6a4586dcdede 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -57,6 +57,21 @@ struct flow_dissector_key_mpls {  		mpls_label:20;  }; +#define FLOW_DIS_TUN_OPTS_MAX 255 +/** + * struct flow_dissector_key_enc_opts: + * @data: tunnel option data + * @len: length of tunnel option data + * @dst_opt_type: tunnel option type + */ +struct flow_dissector_key_enc_opts { +	u8 data[FLOW_DIS_TUN_OPTS_MAX];	/* Using IP_TUNNEL_OPTS_MAX is desired +					 * here but seems difficult to #include +					 */ +	u8 len; +	__be16 dst_opt_type; +}; +  struct flow_dissector_key_keyid {  	__be32	keyid;  }; @@ -208,6 +223,8 @@ enum flow_dissector_key_id {  	FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */  	FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */  	FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */ +	FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */ +  	FLOW_DISSECTOR_KEY_MAX,  }; diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 48e5b5d49a34..be382fb0592d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -480,12 +480,38 @@ enum {  	TCA_FLOWER_KEY_ENC_IP_TTL,	/* u8 */  	TCA_FLOWER_KEY_ENC_IP_TTL_MASK,	/* u8 */ +	TCA_FLOWER_KEY_ENC_OPTS, +	TCA_FLOWER_KEY_ENC_OPTS_MASK, +  	__TCA_FLOWER_MAX,  };  #define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)  enum { +	TCA_FLOWER_KEY_ENC_OPTS_UNSPEC, +	TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested +					 * TCA_FLOWER_KEY_ENC_OPT_GENEVE_ +					 * attributes +					 */ +	__TCA_FLOWER_KEY_ENC_OPTS_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1) + +enum { +	TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC, +	TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS,            /* u16 */ +	TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE,             /* u8 */ +	TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA,             /* 4 to 128 bytes */ + +	__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \ +		(__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1) + +enum {  	TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),  	TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),  }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 08a5184f4b34..ce9eeeb7c024 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -154,7 +154,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,  	    !dissector_uses_key(flow_dissector,  				FLOW_DISSECTOR_KEY_ENC_PORTS) &&  	    !dissector_uses_key(flow_dissector, -				FLOW_DISSECTOR_KEY_ENC_IP)) +				FLOW_DISSECTOR_KEY_ENC_IP) && +	    !dissector_uses_key(flow_dissector, +				FLOW_DISSECTOR_KEY_ENC_OPTS))  		return;  	info = skb_tunnel_info(skb); @@ -224,6 +226,21 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,  		ip->tos = key->tos;  		ip->ttl = key->ttl;  	} + +	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) { +		struct flow_dissector_key_enc_opts *enc_opt; + +		enc_opt = skb_flow_dissector_target(flow_dissector, +						    FLOW_DISSECTOR_KEY_ENC_OPTS, +						    target_container); + +		if (info->options_len) { +			enc_opt->len = info->options_len; +			ip_tunnel_info_opts_get(enc_opt->data, info); +			enc_opt->dst_opt_type = info->key.tun_flags & +						TUNNEL_OPTIONS_PRESENT; +		} +	}  }  EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index a3b69bb6f4b0..9da244235170 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -24,6 +24,7 @@  #include <net/pkt_cls.h>  #include <net/ip.h>  #include <net/flow_dissector.h> +#include <net/geneve.h>  #include <net/dst.h>  #include <net/dst_metadata.h> @@ -53,6 +54,7 @@ struct fl_flow_key {  	struct flow_dissector_key_tcp tcp;  	struct flow_dissector_key_ip ip;  	struct flow_dissector_key_ip enc_ip; +	struct flow_dissector_key_enc_opts enc_opts;  } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */  struct fl_flow_mask_range { @@ -482,6 +484,21 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {  	[TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 },  	[TCA_FLOWER_KEY_ENC_IP_TTL]	 = { .type = NLA_U8 },  	[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 }, +	[TCA_FLOWER_KEY_ENC_OPTS]	= { .type = NLA_NESTED }, +	[TCA_FLOWER_KEY_ENC_OPTS_MASK]	= { .type = NLA_NESTED }, +}; + +static const struct nla_policy +enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = { +	[TCA_FLOWER_KEY_ENC_OPTS_GENEVE]        = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = { +	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]      = { .type = NLA_U16 }, +	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]       = { .type = NLA_U8 }, +	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]       = { .type = NLA_BINARY, +						       .len = 128 },  };  static void fl_set_key_val(struct nlattr **tb, @@ -603,6 +620,145 @@ static void fl_set_key_ip(struct nlattr **tb, bool encap,  	fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl));  } +static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key, +			     int depth, int option_len, +			     struct netlink_ext_ack *extack) +{ +	struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1]; +	struct nlattr *class = NULL, *type = NULL, *data = NULL; +	struct geneve_opt *opt; +	int err, data_len = 0; + +	if (option_len > sizeof(struct geneve_opt)) +		data_len = option_len - sizeof(struct geneve_opt); + +	opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len]; +	memset(opt, 0xff, option_len); +	opt->length = data_len / 4; +	opt->r1 = 0; +	opt->r2 = 0; +	opt->r3 = 0; + +	/* If no mask has been prodived we assume an exact match. */ +	if (!depth) +		return sizeof(struct geneve_opt) + data_len; + +	if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GENEVE) { +		NL_SET_ERR_MSG(extack, "Non-geneve option type for mask"); +		return -EINVAL; +	} + +	err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, +			       nla, geneve_opt_policy, extack); +	if (err < 0) +		return err; + +	/* We are not allowed to omit any of CLASS, TYPE or DATA +	 * fields from the key. +	 */ +	if (!option_len && +	    (!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] || +	     !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] || +	     !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA])) { +		NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data"); +		return -EINVAL; +	} + +	/* Omitting any of CLASS, TYPE or DATA fields is allowed +	 * for the mask. +	 */ +	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]) { +		int new_len = key->enc_opts.len; + +		data = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]; +		data_len = nla_len(data); +		if (data_len < 4) { +			NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long"); +			return -ERANGE; +		} +		if (data_len % 4) { +			NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long"); +			return -ERANGE; +		} + +		new_len += sizeof(struct geneve_opt) + data_len; +		BUILD_BUG_ON(FLOW_DIS_TUN_OPTS_MAX != IP_TUNNEL_OPTS_MAX); +		if (new_len > FLOW_DIS_TUN_OPTS_MAX) { +			NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size"); +			return -ERANGE; +		} +		opt->length = data_len / 4; +		memcpy(opt->opt_data, nla_data(data), data_len); +	} + +	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]) { +		class = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]; +		opt->opt_class = nla_get_be16(class); +	} + +	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]) { +		type = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]; +		opt->type = nla_get_u8(type); +	} + +	return sizeof(struct geneve_opt) + data_len; +} + +static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, +			  struct fl_flow_key *mask, +			  struct netlink_ext_ack *extack) +{ +	const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL; +	int option_len, key_depth, msk_depth = 0; + +	nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]); + +	if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) { +		nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); +		msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); +	} + +	nla_for_each_attr(nla_opt_key, nla_enc_key, +			  nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) { +		switch (nla_type(nla_opt_key)) { +		case TCA_FLOWER_KEY_ENC_OPTS_GENEVE: +			option_len = 0; +			key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; +			option_len = fl_set_geneve_opt(nla_opt_key, key, +						       key_depth, option_len, +						       extack); +			if (option_len < 0) +				return option_len; + +			key->enc_opts.len += option_len; +			/* At the same time we need to parse through the mask +			 * in order to verify exact and mask attribute lengths. +			 */ +			mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; +			option_len = fl_set_geneve_opt(nla_opt_msk, mask, +						       msk_depth, option_len, +						       extack); +			if (option_len < 0) +				return option_len; + +			mask->enc_opts.len += option_len; +			if (key->enc_opts.len != mask->enc_opts.len) { +				NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); +				return -EINVAL; +			} + +			if (msk_depth) +				nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); +			break; +		default: +			NL_SET_ERR_MSG(extack, "Unknown tunnel option type"); +			return -EINVAL; +		} +	} + +	return 0; +} +  static int fl_set_key(struct net *net, struct nlattr **tb,  		      struct fl_flow_key *key, struct fl_flow_key *mask,  		      struct netlink_ext_ack *extack) @@ -799,6 +955,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb,  	fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip); +	if (tb[TCA_FLOWER_KEY_ENC_OPTS]) { +		ret = fl_set_enc_opt(tb, key, mask, extack); +		if (ret) +			return ret; +	} +  	if (tb[TCA_FLOWER_KEY_FLAGS])  		ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags); @@ -894,6 +1056,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,  			     FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);  	FL_KEY_SET_IF_MASKED(mask, keys, cnt,  			     FLOW_DISSECTOR_KEY_ENC_IP, enc_ip); +	FL_KEY_SET_IF_MASKED(mask, keys, cnt, +			     FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts);  	skb_flow_dissector_init(dissector, keys, cnt);  } @@ -1414,6 +1578,83 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)  	return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);  } +static int fl_dump_key_geneve_opt(struct sk_buff *skb, +				  struct flow_dissector_key_enc_opts *enc_opts) +{ +	struct geneve_opt *opt; +	struct nlattr *nest; +	int opt_off = 0; + +	nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE); +	if (!nest) +		goto nla_put_failure; + +	while (enc_opts->len > opt_off) { +		opt = (struct geneve_opt *)&enc_opts->data[opt_off]; + +		if (nla_put_be16(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, +				 opt->opt_class)) +			goto nla_put_failure; +		if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, +			       opt->type)) +			goto nla_put_failure; +		if (nla_put(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, +			    opt->length * 4, opt->opt_data)) +			goto nla_put_failure; + +		opt_off += sizeof(struct geneve_opt) + opt->length * 4; +	} +	nla_nest_end(skb, nest); +	return 0; + +nla_put_failure: +	nla_nest_cancel(skb, nest); +	return -EMSGSIZE; +} + +static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, +			       struct flow_dissector_key_enc_opts *enc_opts) +{ +	struct nlattr *nest; +	int err; + +	if (!enc_opts->len) +		return 0; + +	nest = nla_nest_start(skb, enc_opt_type); +	if (!nest) +		goto nla_put_failure; + +	switch (enc_opts->dst_opt_type) { +	case TUNNEL_GENEVE_OPT: +		err = fl_dump_key_geneve_opt(skb, enc_opts); +		if (err) +			goto nla_put_failure; +		break; +	default: +		goto nla_put_failure; +	} +	nla_nest_end(skb, nest); +	return 0; + +nla_put_failure: +	nla_nest_cancel(skb, nest); +	return -EMSGSIZE; +} + +static int fl_dump_key_enc_opt(struct sk_buff *skb, +			       struct flow_dissector_key_enc_opts *key_opts, +			       struct flow_dissector_key_enc_opts *msk_opts) +{ +	int err; + +	err = fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS, key_opts); +	if (err) +		return err; + +	return fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS_MASK, msk_opts); +} +  static int fl_dump_key(struct sk_buff *skb, struct net *net,  		       struct fl_flow_key *key, struct fl_flow_key *mask)  { @@ -1594,7 +1835,8 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,  			    &mask->enc_tp.dst,  			    TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,  			    sizeof(key->enc_tp.dst)) || -	    fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip)) +	    fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip) || +	    fl_dump_key_enc_opt(skb, &key->enc_opts, &mask->enc_opts))  		goto nla_put_failure;  	if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags)) | 
