diff options
Diffstat (limited to 'net/openvswitch/actions.c')
| -rw-r--r-- | net/openvswitch/actions.c | 324 |
1 files changed, 146 insertions, 178 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index ca3ebfdb3023..792ca44a461d 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -17,71 +17,38 @@ #include <linux/if_vlan.h> #include <net/dst.h> +#include <net/gso.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/checksum.h> #include <net/dsfield.h> #include <net/mpls.h> + +#if IS_ENABLED(CONFIG_PSAMPLE) +#include <net/psample.h> +#endif + #include <net/sctp/checksum.h> #include "datapath.h" +#include "drop.h" #include "flow.h" #include "conntrack.h" #include "vport.h" #include "flow_netlink.h" #include "openvswitch_trace.h" -struct deferred_action { - struct sk_buff *skb; - const struct nlattr *actions; - int actions_len; - - /* Store pkt_key clone when creating deferred action. */ - struct sw_flow_key pkt_key; -}; - -#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN) -struct ovs_frag_data { - unsigned long dst; - struct vport *vport; - struct ovs_skb_cb cb; - __be16 inner_protocol; - u16 network_offset; /* valid only for MPLS */ - u16 vlan_tci; - __be16 vlan_proto; - unsigned int l2_len; - u8 mac_proto; - u8 l2_data[MAX_L2_LEN]; -}; - -static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage); - -#define DEFERRED_ACTION_FIFO_SIZE 10 -#define OVS_RECURSION_LIMIT 5 -#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2) -struct action_fifo { - int head; - int tail; - /* Deferred action fifo queue storage. */ - struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE]; -}; - -struct action_flow_keys { - struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; -}; - -static struct action_fifo __percpu *action_fifos; -static struct action_flow_keys __percpu *flow_keys; -static DEFINE_PER_CPU(int, exec_actions_level); +struct ovs_pcpu_storage __percpu *ovs_pcpu_storage; /* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys' * space. Return NULL if out of key spaces. */ static struct sw_flow_key *clone_key(const struct sw_flow_key *key_) { - struct action_flow_keys *keys = this_cpu_ptr(flow_keys); - int level = this_cpu_read(exec_actions_level); + struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage); + struct action_flow_keys *keys = &ovs_pcpu->flow_keys; + int level = ovs_pcpu->exec_level; struct sw_flow_key *key = NULL; if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { @@ -125,10 +92,9 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, const struct nlattr *actions, const int actions_len) { - struct action_fifo *fifo; + struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos); struct deferred_action *da; - fifo = this_cpu_ptr(action_fifos); da = action_fifo_put(fifo); if (da) { da->skb = skb; @@ -230,14 +196,18 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_vlan *vlan) { + int err; + if (skb_vlan_tag_present(skb)) { invalidate_flow_key(key); } else { key->eth.vlan.tci = vlan->vlan_tci; key->eth.vlan.tpid = vlan->vlan_tpid; } - return skb_vlan_push(skb, vlan->vlan_tpid, - ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK); + err = skb_vlan_push(skb, vlan->vlan_tpid, + ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK); + skb_reset_mac_len(skb); + return err; } /* 'src' is already properly masked. */ @@ -309,11 +279,18 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key, return 0; } -static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key, - const struct nshhdr *nh) +static noinline_for_stack int push_nsh(struct sk_buff *skb, + struct sw_flow_key *key, + const struct nlattr *a) { + u8 buffer[NSH_HDR_MAX_LEN]; + struct nshhdr *nh = (struct nshhdr *)buffer; int err; + err = nsh_hdr_from_nlattr(a, nh, NSH_HDR_MAX_LEN); + if (err) + return err; + err = nsh_push(skb, nh); if (err) return err; @@ -595,69 +572,6 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, return 0; } -static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key, - const struct nlattr *a) -{ - struct nshhdr *nh; - size_t length; - int err; - u8 flags; - u8 ttl; - int i; - - struct ovs_key_nsh key; - struct ovs_key_nsh mask; - - err = nsh_key_from_nlattr(a, &key, &mask); - if (err) - return err; - - /* Make sure the NSH base header is there */ - if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN)) - return -ENOMEM; - - nh = nsh_hdr(skb); - length = nsh_hdr_len(nh); - - /* Make sure the whole NSH header is there */ - err = skb_ensure_writable(skb, skb_network_offset(skb) + - length); - if (unlikely(err)) - return err; - - nh = nsh_hdr(skb); - skb_postpull_rcsum(skb, nh, length); - flags = nsh_get_flags(nh); - flags = OVS_MASKED(flags, key.base.flags, mask.base.flags); - flow_key->nsh.base.flags = flags; - ttl = nsh_get_ttl(nh); - ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl); - flow_key->nsh.base.ttl = ttl; - nsh_set_flags_and_ttl(nh, flags, ttl); - nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr, - mask.base.path_hdr); - flow_key->nsh.base.path_hdr = nh->path_hdr; - switch (nh->mdtype) { - case NSH_M_TYPE1: - for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) { - nh->md1.context[i] = - OVS_MASKED(nh->md1.context[i], key.context[i], - mask.context[i]); - } - memcpy(flow_key->nsh.context, nh->md1.context, - sizeof(nh->md1.context)); - break; - case NSH_M_TYPE2: - memset(flow_key->nsh.context, 0, - sizeof(flow_key->nsh.context)); - break; - default: - return -EINVAL; - } - skb_postpush_rcsum(skb, nh, length); - return 0; -} - /* Must follow skb_ensure_writable() since that can move the skb data. */ static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) @@ -776,11 +690,11 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage); + struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage->frag_data); struct vport *vport = data->vport; if (skb_cow_head(skb, data->l2_len) < 0) { - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM); return -ENOMEM; } @@ -828,7 +742,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb, unsigned int hlen = skb_network_offset(skb); struct ovs_frag_data *data; - data = this_cpu_ptr(&ovs_frag_data_storage); + data = this_cpu_ptr(&ovs_pcpu_storage->frag_data); data->dst = skb->_skb_refdst; data->vport = vport; data->cb = *OVS_CB(skb); @@ -851,6 +765,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, struct sk_buff *skb, u16 mru, struct sw_flow_key *key) { + enum ovs_drop_reason reason; u16 orig_network_offset = 0; if (eth_p_mpls(skb->protocol)) { @@ -860,6 +775,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, if (skb_network_offset(skb) > MAX_L2_LEN) { OVS_NLERR(1, "L2 header too long to fragment"); + reason = OVS_DROP_FRAG_L2_TOO_LONG; goto err; } @@ -869,7 +785,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, prepare_frag(vport, skb, orig_network_offset, ovs_key_mac_proto(key)); - dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, + dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); ovs_rt.dst.dev = vport->dev; @@ -886,7 +802,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, prepare_frag(vport, skb, orig_network_offset, ovs_key_mac_proto(key)); memset(&ovs_rt, 0, sizeof(ovs_rt)); - dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, + dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); ovs_rt.dst.dev = vport->dev; @@ -900,12 +816,13 @@ static void ovs_fragment(struct net *net, struct vport *vport, WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", ovs_vport_name(vport), ntohs(key->eth.type), mru, vport->dev->mtu); + reason = OVS_DROP_FRAG_INVALID_PROTO; goto err; } return; err: - kfree_skb(skb); + ovs_kfree_skb_reason(skb, reason); } static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, @@ -913,7 +830,9 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, { struct vport *vport = ovs_vport_rcu(dp, out_port); - if (likely(vport)) { + if (likely(vport && + netif_running(vport->dev) && + netif_carrier_ok(vport->dev))) { u16 mru = OVS_CB(skb)->mru; u32 cutlen = OVS_CB(skb)->cutlen; @@ -932,10 +851,10 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, ovs_fragment(net, vport, skb, mru, key); } else { - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); } } else { - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_DEV_READY); } } @@ -952,16 +871,17 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.mru = OVS_CB(skb)->mru; - for (a = nla_data(attr), rem = nla_len(attr); rem > 0; - a = nla_next(a, &rem)) { + nla_for_each_nested(a, attr, rem) { switch (nla_type(a)) { case OVS_USERSPACE_ATTR_USERDATA: upcall.userdata = a; break; case OVS_USERSPACE_ATTR_PID: - if (dp->user_features & - OVS_DP_F_DISPATCH_UPCALL_PER_CPU) + if (OVS_CB(skb)->upcall_pid) + upcall.portid = OVS_CB(skb)->upcall_pid; + else if (dp->user_features & + OVS_DP_F_DISPATCH_UPCALL_PER_CPU) upcall.portid = ovs_dp_get_upcall_portid(dp, smp_processor_id()); @@ -1009,7 +929,7 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb, return clone_execute(dp, skb, key, 0, nla_data(actions), nla_len(actions), true, false); - consume_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_IP_TTL); return 0; } @@ -1025,23 +945,33 @@ static int sample(struct datapath *dp, struct sk_buff *skb, struct nlattr *sample_arg; int rem = nla_len(attr); const struct sample_arg *arg; + u32 init_probability; bool clone_flow_key; + int err; /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */ sample_arg = nla_data(attr); arg = nla_data(sample_arg); actions = nla_next(sample_arg, &rem); + init_probability = OVS_CB(skb)->probability; if ((arg->probability != U32_MAX) && (!arg->probability || get_random_u32() > arg->probability)) { if (last) - consume_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION); return 0; } + OVS_CB(skb)->probability = arg->probability; + clone_flow_key = !arg->exec; - return clone_execute(dp, skb, key, 0, actions, rem, last, - clone_flow_key); + err = clone_execute(dp, skb, key, 0, actions, rem, last, + clone_flow_key); + + if (!last) + OVS_CB(skb)->probability = init_probability; + + return err; } /* When 'last' is true, clone() should always consume the 'skb'. @@ -1072,8 +1002,16 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key, struct ovs_action_hash *hash_act = nla_data(attr); u32 hash = 0; - /* OVS_HASH_ALG_L4 is the only possible hash algorithm. */ - hash = skb_get_hash(skb); + if (hash_act->hash_alg == OVS_HASH_ALG_L4) { + /* OVS_HASH_ALG_L4 hasing type. */ + hash = skb_get_hash(skb); + } else if (hash_act->hash_alg == OVS_HASH_ALG_SYM_L4) { + /* OVS_HASH_ALG_SYM_L4 hashing type. NOTE: this doesn't + * extend past an encapsulated header. + */ + hash = __skb_get_hash_symmetric(skb); + } + hash = jhash_1word(hash, hash_act->hash_basis); if (!hash) hash = 0x1; @@ -1129,10 +1067,6 @@ static int execute_masked_set_action(struct sk_buff *skb, get_mask(a, struct ovs_key_ethernet *)); break; - case OVS_KEY_ATTR_NSH: - err = set_nsh(skb, flow_key, a); - break; - case OVS_KEY_ATTR_IPV4: err = set_ipv4(skb, flow_key, nla_data(a), get_mask(a, struct ovs_key_ipv4 *)); @@ -1169,6 +1103,7 @@ static int execute_masked_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_CT_LABELS: case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: + case OVS_KEY_ATTR_NSH: err = -EINVAL; break; } @@ -1273,6 +1208,44 @@ static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key) return 0; } +#if IS_ENABLED(CONFIG_PSAMPLE) +static void execute_psample(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr) +{ + struct psample_group psample_group = {}; + struct psample_metadata md = {}; + const struct nlattr *a; + u32 rate; + int rem; + + nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) { + switch (nla_type(a)) { + case OVS_PSAMPLE_ATTR_GROUP: + psample_group.group_num = nla_get_u32(a); + break; + + case OVS_PSAMPLE_ATTR_COOKIE: + md.user_cookie = nla_data(a); + md.user_cookie_len = nla_len(a); + break; + } + } + + psample_group.net = ovs_dp_get_net(dp); + md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex; + md.trunc_size = skb->len - OVS_CB(skb)->cutlen; + md.rate_as_probability = 1; + + rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX; + + psample_sample_packet(&psample_group, skb, rate, &md); +} +#else +static void execute_psample(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr) +{} +#endif + /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, @@ -1288,6 +1261,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, if (trace_ovs_do_execute_action_enabled()) trace_ovs_do_execute_action(dp, skb, key, a, rem); + /* Actions that rightfully have to consume the skb should do it + * and return directly. + */ switch (nla_type(a)) { case OVS_ACTION_ATTR_OUTPUT: { int port = nla_get_u32(a); @@ -1323,6 +1299,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, output_userspace(dp, skb, key, a, attr, len, OVS_CB(skb)->cutlen); OVS_CB(skb)->cutlen = 0; + if (nla_is_last(a, rem)) { + consume_skb(skb); + return 0; + } break; case OVS_ACTION_ATTR_HASH: @@ -1419,17 +1399,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = pop_eth(skb, key); break; - case OVS_ACTION_ATTR_PUSH_NSH: { - u8 buffer[NSH_HDR_MAX_LEN]; - struct nshhdr *nh = (struct nshhdr *)buffer; - - err = nsh_hdr_from_nlattr(nla_data(a), nh, - NSH_HDR_MAX_LEN); - if (unlikely(err)) - break; - err = push_nsh(skb, key, nh); + case OVS_ACTION_ATTR_PUSH_NSH: + err = push_nsh(skb, key, nla_data(a)); break; - } case OVS_ACTION_ATTR_POP_NSH: err = pop_nsh(skb, key); @@ -1437,7 +1409,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_METER: if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) { - consume_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_METER); return 0; } break; @@ -1468,15 +1440,33 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, return dec_ttl_exception_handler(dp, skb, key, a); break; + + case OVS_ACTION_ATTR_DROP: { + enum ovs_drop_reason reason = nla_get_u32(a) + ? OVS_DROP_EXPLICIT_WITH_ERROR + : OVS_DROP_EXPLICIT; + + ovs_kfree_skb_reason(skb, reason); + return 0; + } + + case OVS_ACTION_ATTR_PSAMPLE: + execute_psample(dp, skb, a); + OVS_CB(skb)->cutlen = 0; + if (nla_is_last(a, rem)) { + consume_skb(skb); + return 0; + } + break; } if (unlikely(err)) { - kfree_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_ACTION_ERROR); return err; } } - consume_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION); return 0; } @@ -1510,16 +1500,15 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, clone = clone_flow_key ? clone_key(key) : key; if (clone) { int err = 0; - if (actions) { /* Sample action */ if (clone_flow_key) - __this_cpu_inc(exec_actions_level); + __this_cpu_inc(ovs_pcpu_storage->exec_level); err = do_execute_actions(dp, skb, clone, actions, len); if (clone_flow_key) - __this_cpu_dec(exec_actions_level); + __this_cpu_dec(ovs_pcpu_storage->exec_level); } else { /* Recirc action */ clone->recirc_id = recirc_id; ovs_dp_process_packet(skb, clone); @@ -1538,7 +1527,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, /* Out of per CPU action FIFO space. Drop the 'skb' and * log an error. */ - kfree_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_DEFERRED_LIMIT); if (net_ratelimit()) { if (actions) { /* Sample action */ @@ -1555,7 +1544,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, static void process_deferred_actions(struct datapath *dp) { - struct action_fifo *fifo = this_cpu_ptr(action_fifos); + struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos); /* Do not touch the FIFO in case there is no deferred actions. */ if (action_fifo_is_empty(fifo)) @@ -1586,11 +1575,11 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, { int err, level; - level = __this_cpu_inc_return(exec_actions_level); + level = __this_cpu_inc_return(ovs_pcpu_storage->exec_level); if (unlikely(level > OVS_RECURSION_LIMIT)) { net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n", ovs_dp_name(dp)); - kfree_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_RECURSION_LIMIT); err = -ENETDOWN; goto out; } @@ -1603,27 +1592,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, process_deferred_actions(dp); out: - __this_cpu_dec(exec_actions_level); + __this_cpu_dec(ovs_pcpu_storage->exec_level); return err; } - -int action_fifos_init(void) -{ - action_fifos = alloc_percpu(struct action_fifo); - if (!action_fifos) - return -ENOMEM; - - flow_keys = alloc_percpu(struct action_flow_keys); - if (!flow_keys) { - free_percpu(action_fifos); - return -ENOMEM; - } - - return 0; -} - -void action_fifos_exit(void) -{ - free_percpu(action_fifos); - free_percpu(flow_keys); -} |
