diff options
Diffstat (limited to 'net/openvswitch')
-rw-r--r-- | net/openvswitch/Kconfig | 3 | ||||
-rw-r--r-- | net/openvswitch/actions.c | 167 | ||||
-rw-r--r-- | net/openvswitch/conntrack.c | 59 | ||||
-rw-r--r-- | net/openvswitch/datapath.c | 58 | ||||
-rw-r--r-- | net/openvswitch/datapath.h | 75 | ||||
-rw-r--r-- | net/openvswitch/flow.c | 5 | ||||
-rw-r--r-- | net/openvswitch/flow_netlink.c | 118 | ||||
-rw-r--r-- | net/openvswitch/meter.h | 1 | ||||
-rw-r--r-- | net/openvswitch/openvswitch_trace.h | 8 | ||||
-rw-r--r-- | net/openvswitch/vport-internal_dev.c | 22 | ||||
-rw-r--r-- | net/openvswitch/vport-netdev.c | 7 | ||||
-rw-r--r-- | net/openvswitch/vport.c | 1 | ||||
-rw-r--r-- | net/openvswitch/vport.h | 9 |
13 files changed, 345 insertions, 188 deletions
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 29a7081858cd..e6aaee92dba4 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -10,8 +10,9 @@ config OPENVSWITCH (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \ (!NF_NAT || NF_NAT) && \ (!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT))) - select LIBCRC32C + depends on PSAMPLE || !PSAMPLE select MPLS + select NET_CRC32C select NET_MPLS_GSO select DST_CACHE select NET_NSH diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 6fcd7e2ca81f..e7269a3eec79 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -24,6 +24,11 @@ #include <net/checksum.h> #include <net/dsfield.h> #include <net/mpls.h> + +#if IS_ENABLED(CONFIG_PSAMPLE) +#include <net/psample.h> +#endif + #include <net/sctp/checksum.h> #include "datapath.h" @@ -34,56 +39,18 @@ #include "flow_netlink.h" #include "openvswitch_trace.h" -struct deferred_action { - struct sk_buff *skb; - const struct nlattr *actions; - int actions_len; - - /* Store pkt_key clone when creating deferred action. */ - struct sw_flow_key pkt_key; -}; - -#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN) -struct ovs_frag_data { - unsigned long dst; - struct vport *vport; - struct ovs_skb_cb cb; - __be16 inner_protocol; - u16 network_offset; /* valid only for MPLS */ - u16 vlan_tci; - __be16 vlan_proto; - unsigned int l2_len; - u8 mac_proto; - u8 l2_data[MAX_L2_LEN]; -}; - -static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage); - -#define DEFERRED_ACTION_FIFO_SIZE 10 -#define OVS_RECURSION_LIMIT 5 -#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2) -struct action_fifo { - int head; - int tail; - /* Deferred action fifo queue storage. */ - struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE]; -}; - -struct action_flow_keys { - struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; +DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; -static struct action_fifo __percpu *action_fifos; -static struct action_flow_keys __percpu *flow_keys; -static DEFINE_PER_CPU(int, exec_actions_level); - /* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys' * space. Return NULL if out of key spaces. */ static struct sw_flow_key *clone_key(const struct sw_flow_key *key_) { - struct action_flow_keys *keys = this_cpu_ptr(flow_keys); - int level = this_cpu_read(exec_actions_level); + struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage); + struct action_flow_keys *keys = &ovs_pcpu->flow_keys; + int level = ovs_pcpu->exec_level; struct sw_flow_key *key = NULL; if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { @@ -127,10 +94,9 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, const struct nlattr *actions, const int actions_len) { - struct action_fifo *fifo; + struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos); struct deferred_action *da; - fifo = this_cpu_ptr(action_fifos); da = action_fifo_put(fifo); if (da) { da->skb = skb; @@ -232,14 +198,18 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_vlan *vlan) { + int err; + if (skb_vlan_tag_present(skb)) { invalidate_flow_key(key); } else { key->eth.vlan.tci = vlan->vlan_tci; key->eth.vlan.tpid = vlan->vlan_tpid; } - return skb_vlan_push(skb, vlan->vlan_tpid, - ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK); + err = skb_vlan_push(skb, vlan->vlan_tpid, + ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK); + skb_reset_mac_len(skb); + return err; } /* 'src' is already properly masked. */ @@ -785,7 +755,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage); + struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage.frag_data); struct vport *vport = data->vport; if (skb_cow_head(skb, data->l2_len) < 0) { @@ -837,7 +807,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb, unsigned int hlen = skb_network_offset(skb); struct ovs_frag_data *data; - data = this_cpu_ptr(&ovs_frag_data_storage); + data = this_cpu_ptr(&ovs_pcpu_storage.frag_data); data->dst = skb->_skb_refdst; data->vport = vport; data->cb = *OVS_CB(skb); @@ -925,7 +895,9 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, { struct vport *vport = ovs_vport_rcu(dp, out_port); - if (likely(vport && netif_carrier_ok(vport->dev))) { + if (likely(vport && + netif_running(vport->dev) && + netif_carrier_ok(vport->dev))) { u16 mru = OVS_CB(skb)->mru; u32 cutlen = OVS_CB(skb)->cutlen; @@ -964,8 +936,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.mru = OVS_CB(skb)->mru; - for (a = nla_data(attr), rem = nla_len(attr); rem > 0; - a = nla_next(a, &rem)) { + nla_for_each_nested(a, attr, rem) { switch (nla_type(a)) { case OVS_USERSPACE_ATTR_USERDATA: upcall.userdata = a; @@ -1037,12 +1008,15 @@ static int sample(struct datapath *dp, struct sk_buff *skb, struct nlattr *sample_arg; int rem = nla_len(attr); const struct sample_arg *arg; + u32 init_probability; bool clone_flow_key; + int err; /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */ sample_arg = nla_data(attr); arg = nla_data(sample_arg); actions = nla_next(sample_arg, &rem); + init_probability = OVS_CB(skb)->probability; if ((arg->probability != U32_MAX) && (!arg->probability || get_random_u32() > arg->probability)) { @@ -1051,9 +1025,16 @@ static int sample(struct datapath *dp, struct sk_buff *skb, return 0; } + OVS_CB(skb)->probability = arg->probability; + clone_flow_key = !arg->exec; - return clone_execute(dp, skb, key, 0, actions, rem, last, - clone_flow_key); + err = clone_execute(dp, skb, key, 0, actions, rem, last, + clone_flow_key); + + if (!last) + OVS_CB(skb)->probability = init_probability; + + return err; } /* When 'last' is true, clone() should always consume the 'skb'. @@ -1293,6 +1274,44 @@ static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key) return 0; } +#if IS_ENABLED(CONFIG_PSAMPLE) +static void execute_psample(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr) +{ + struct psample_group psample_group = {}; + struct psample_metadata md = {}; + const struct nlattr *a; + u32 rate; + int rem; + + nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) { + switch (nla_type(a)) { + case OVS_PSAMPLE_ATTR_GROUP: + psample_group.group_num = nla_get_u32(a); + break; + + case OVS_PSAMPLE_ATTR_COOKIE: + md.user_cookie = nla_data(a); + md.user_cookie_len = nla_len(a); + break; + } + } + + psample_group.net = ovs_dp_get_net(dp); + md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex; + md.trunc_size = skb->len - OVS_CB(skb)->cutlen; + md.rate_as_probability = 1; + + rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX; + + psample_sample_packet(&psample_group, skb, rate, &md); +} +#else +static void execute_psample(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr) +{} +#endif + /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, @@ -1496,6 +1515,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, ovs_kfree_skb_reason(skb, reason); return 0; } + + case OVS_ACTION_ATTR_PSAMPLE: + execute_psample(dp, skb, a); + OVS_CB(skb)->cutlen = 0; + if (nla_is_last(a, rem)) { + consume_skb(skb); + return 0; + } + break; } if (unlikely(err)) { @@ -1541,13 +1569,13 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, if (actions) { /* Sample action */ if (clone_flow_key) - __this_cpu_inc(exec_actions_level); + __this_cpu_inc(ovs_pcpu_storage.exec_level); err = do_execute_actions(dp, skb, clone, actions, len); if (clone_flow_key) - __this_cpu_dec(exec_actions_level); + __this_cpu_dec(ovs_pcpu_storage.exec_level); } else { /* Recirc action */ clone->recirc_id = recirc_id; ovs_dp_process_packet(skb, clone); @@ -1583,7 +1611,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, static void process_deferred_actions(struct datapath *dp) { - struct action_fifo *fifo = this_cpu_ptr(action_fifos); + struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos); /* Do not touch the FIFO in case there is no deferred actions. */ if (action_fifo_is_empty(fifo)) @@ -1614,7 +1642,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, { int err, level; - level = __this_cpu_inc_return(exec_actions_level); + level = __this_cpu_inc_return(ovs_pcpu_storage.exec_level); if (unlikely(level > OVS_RECURSION_LIMIT)) { net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n", ovs_dp_name(dp)); @@ -1631,27 +1659,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, process_deferred_actions(dp); out: - __this_cpu_dec(exec_actions_level); + __this_cpu_dec(ovs_pcpu_storage.exec_level); return err; } - -int action_fifos_init(void) -{ - action_fifos = alloc_percpu(struct action_fifo); - if (!action_fifos) - return -ENOMEM; - - flow_keys = alloc_percpu(struct action_flow_keys); - if (!flow_keys) { - free_percpu(action_fifos); - return -ENOMEM; - } - - return 0; -} - -void action_fifos_exit(void) -{ - free_percpu(action_fifos); - free_percpu(flow_keys); -} diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 2928c142a2dd..e573e9221302 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -168,8 +168,13 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct) static void ovs_ct_get_labels(const struct nf_conn *ct, struct ovs_key_ct_labels *labels) { - struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; + struct nf_conn_labels *cl = NULL; + if (ct) { + if (ct->master && !nf_ct_is_confirmed(ct)) + ct = ct->master; + cl = nf_ct_labels_find(ct); + } if (cl) memcpy(labels, cl->bits, OVS_CT_LABELS_LEN); else @@ -674,6 +679,8 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, action |= BIT(NF_NAT_MANIP_DST); err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit); + if (err != NF_ACCEPT) + return err; if (action & BIT(NF_NAT_MANIP_SRC)) ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC); @@ -692,6 +699,22 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, } #endif +static int verdict_to_errno(unsigned int verdict) +{ + switch (verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + return 0; + case NF_DROP: + return -EINVAL; + case NF_STOLEN: + return -EINPROGRESS; + default: + break; + } + + return -EINVAL; +} + /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if * not done already. Update key with new CT state after passing the packet * through conntrack. @@ -730,7 +753,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, err = nf_conntrack_in(skb, &state); if (err != NF_ACCEPT) - return -ENOENT; + return verdict_to_errno(err); /* Clear CT state NAT flags to mark that we have not yet done * NAT after the nf_conntrack_in() call. We can actually clear @@ -757,9 +780,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, * the key->ct_state. */ if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) && - (nf_ct_is_confirmed(ct) || info->commit) && - ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { - return -EINVAL; + (nf_ct_is_confirmed(ct) || info->commit)) { + int err = ovs_ct_nat(net, key, info, skb, ct, ctinfo); + + err = verdict_to_errno(err); + if (err) + return err; } /* Userspace may decide to perform a ct lookup without a helper @@ -790,9 +816,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, * - When committing an unconfirmed connection. */ if ((nf_ct_is_confirmed(ct) ? !cached || add_helper : - info->commit) && - nf_ct_helper(skb, ct, ctinfo, info->family) != NF_ACCEPT) { - return -EINVAL; + info->commit)) { + int err = nf_ct_helper(skb, ct, ctinfo, info->family); + + err = verdict_to_errno(err); + if (err) + return err; } if (nf_ct_protonum(ct) == IPPROTO_TCP && @@ -996,10 +1025,9 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, /* This will take care of sending queued events even if the connection * is already confirmed. */ - if (nf_conntrack_confirm(skb) != NF_ACCEPT) - return -EINVAL; + err = nf_conntrack_confirm(skb); - return 0; + return verdict_to_errno(err); } /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero @@ -1034,6 +1062,10 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, else err = ovs_ct_lookup(net, key, info, skb); + /* conntrack core returned NF_STOLEN */ + if (err == -EINPROGRESS) + return err; + skb_push_rcsum(skb, nh_ofs); if (err) ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK); @@ -1571,8 +1603,7 @@ static int ovs_ct_limit_init(struct net *net, struct ovs_net *ovs_net) for (i = 0; i < CT_LIMIT_HASH_BUCKETS; i++) INIT_HLIST_HEAD(&ovs_net->ct_limit_info->limits[i]); - ovs_net->ct_limit_info->data = - nf_conncount_init(net, NFPROTO_INET, sizeof(u32)); + ovs_net->ct_limit_info->data = nf_conncount_init(net, sizeof(u32)); if (IS_ERR(ovs_net->ct_limit_info->data)) { err = PTR_ERR(ovs_net->ct_limit_info->data); @@ -1589,7 +1620,7 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net) const struct ovs_ct_limit_info *info = ovs_net->ct_limit_info; int i; - nf_conncount_destroy(net, NFPROTO_INET, info->data); + nf_conncount_destroy(net, info->data); for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) { struct hlist_head *head = &info->limits[i]; struct ovs_ct_limit *ct_limit; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 11c69415c605..6a304ae2d959 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -15,7 +15,6 @@ #include <linux/delay.h> #include <linux/time.h> #include <linux/etherdevice.h> -#include <linux/genetlink.h> #include <linux/kernel.h> #include <linux/kthread.h> #include <linux/mutex.h> @@ -245,11 +244,13 @@ void ovs_dp_detach_port(struct vport *p) /* Must be called with rcu_read_lock. */ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) { + struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage); const struct vport *p = OVS_CB(skb)->input_vport; struct datapath *dp = p->dp; struct sw_flow *flow; struct sw_flow_actions *sf_acts; struct dp_stats_percpu *stats; + bool ovs_pcpu_locked = false; u64 *stats_counter; u32 n_mask_hit; u32 n_cache_hit; @@ -291,10 +292,26 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) ovs_flow_stats_update(flow, key->tp.flags, skb); sf_acts = rcu_dereference(flow->sf_acts); + /* This path can be invoked recursively: Use the current task to + * identify recursive invocation - the lock must be acquired only once. + * Even with disabled bottom halves this can be preempted on PREEMPT_RT. + * Limit the locking to RT to avoid assigning `owner' if it can be + * avoided. + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) { + local_lock_nested_bh(&ovs_pcpu_storage.bh_lock); + ovs_pcpu->owner = current; + ovs_pcpu_locked = true; + } + error = ovs_execute_actions(dp, skb, sf_acts, key); if (unlikely(error)) net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n", ovs_dp_name(dp), error); + if (ovs_pcpu_locked) { + ovs_pcpu->owner = NULL; + local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock); + } stats_counter = &stats->n_hit; @@ -672,7 +689,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) sf_acts = rcu_dereference(flow->sf_acts); local_bh_disable(); + local_lock_nested_bh(&ovs_pcpu_storage.bh_lock); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + this_cpu_write(ovs_pcpu_storage.owner, current); err = ovs_execute_actions(dp, packet, sf_acts, &flow->key); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + this_cpu_write(ovs_pcpu_storage.owner, NULL); + local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock); local_bh_enable(); rcu_read_unlock(); @@ -1829,8 +1852,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.dp = dp; parms.port_no = OVSP_LOCAL; parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; - parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX] - ? nla_get_s32(a[OVS_DP_ATTR_IFINDEX]) : 0; + parms.desired_ifindex = nla_get_s32_default(a[OVS_DP_ATTR_IFINDEX], 0); /* So far only local changes have been made, now need the lock. */ ovs_lock(); @@ -2103,6 +2125,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; + struct net *net_vport; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, @@ -2119,12 +2142,15 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex)) goto nla_put_failure; - if (!net_eq(net, dev_net(vport->dev))) { - int id = peernet2id_alloc(net, dev_net(vport->dev), gfp); + rcu_read_lock(); + net_vport = dev_net_rcu(vport->dev); + if (!net_eq(net, net_vport)) { + int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC); if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) - goto nla_put_failure; + goto nla_put_failure_unlock; } + rcu_read_unlock(); ovs_vport_get_stats(vport, &vport_stats); if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, @@ -2145,6 +2171,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, genlmsg_end(skb, ovs_header); return 0; +nla_put_failure_unlock: + rcu_read_unlock(); nla_put_failure: err = -EMSGSIZE; error: @@ -2267,8 +2295,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL) return -EOPNOTSUPP; - port_no = a[OVS_VPORT_ATTR_PORT_NO] - ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; + port_no = nla_get_u32_default(a[OVS_VPORT_ATTR_PORT_NO], 0); if (port_no >= DP_MAX_PORTS) return -EFBIG; @@ -2305,8 +2332,8 @@ restart: parms.dp = dp; parms.port_no = port_no; parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; - parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX] - ? nla_get_s32(a[OVS_VPORT_ATTR_IFINDEX]) : 0; + parms.desired_ifindex = nla_get_s32_default(a[OVS_VPORT_ATTR_IFINDEX], + 0); vport = new_vport(&parms); err = PTR_ERR(vport); @@ -2707,7 +2734,7 @@ static struct pernet_operations ovs_net_ops = { }; static const char * const ovs_drop_reasons[] = { -#define S(x) (#x), +#define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x), OVS_DROP_REASONS(S) #undef S }; @@ -2726,13 +2753,9 @@ static int __init dp_init(void) pr_info("Open vSwitch switching datapath\n"); - err = action_fifos_init(); - if (err) - goto error; - err = ovs_internal_dev_rtnl_link_register(); if (err) - goto error_action_fifos_exit; + goto error; err = ovs_flow_init(); if (err) @@ -2775,8 +2798,6 @@ error_flow_exit: ovs_flow_exit(); error_unreg_rtnl_link: ovs_internal_dev_rtnl_link_unregister(); -error_action_fifos_exit: - action_fifos_exit(); error: return err; } @@ -2792,7 +2813,6 @@ static void dp_cleanup(void) ovs_vport_exit(); ovs_flow_exit(); ovs_internal_dev_rtnl_link_unregister(); - action_fifos_exit(); } module_init(dp_init); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 0cd29971a907..1b5348b0f559 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -13,6 +13,7 @@ #include <linux/skbuff.h> #include <linux/u64_stats_sync.h> #include <net/ip_tunnels.h> +#include <net/mpls.h> #include "conntrack.h" #include "flow.h" @@ -29,8 +30,8 @@ * datapath. * @n_hit: Number of received packets for which a matching flow was found in * the flow table. - * @n_miss: Number of received packets that had no matching flow in the flow - * table. The sum of @n_hit and @n_miss is the number of packets that have + * @n_missed: Number of received packets that had no matching flow in the flow + * table. The sum of @n_hit and @n_missed is the number of packets that have * been received by the datapath. * @n_lost: Number of received packets that had no matching flow in the flow * table that could not be sent to userspace (normally due to an overflow in @@ -40,6 +41,7 @@ * up per packet. * @n_cache_hit: The number of received packets that had their mask found using * the mask cache. + * @syncp: Synchronization point for 64bit counters. */ struct dp_stats_percpu { u64 n_hit; @@ -74,8 +76,10 @@ struct dp_nlsk_pids { * ovs_mutex and RCU. * @stats_percpu: Per-CPU datapath statistics. * @net: Reference to net namespace. - * @max_headroom: the maximum headroom of all vports in this datapath; it will + * @user_features: Bitmap of enabled %OVS_DP_F_* features. + * @max_headroom: The maximum headroom of all vports in this datapath; it will * be used by all the internal vports in this dp. + * @meter_tbl: Meter table. * @upcall_portids: RCU protected 'struct dp_nlsk_pids'. * * Context: See the comment on locking at the top of datapath.c for additional @@ -115,20 +119,26 @@ struct datapath { * fragmented. * @acts_origlen: The netlink size of the flow actions applied to this skb. * @cutlen: The number of bytes from the packet end to be removed. + * @probability: The sampling probability that was applied to this skb; 0 means + * no sampling has occurred; U32_MAX means 100% probability. */ struct ovs_skb_cb { struct vport *input_vport; u16 mru; u16 acts_origlen; u32 cutlen; + u32 probability; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) /** - * struct dp_upcall - metadata to include with a packet to send to userspace + * struct dp_upcall_info - metadata to include with a packet sent to userspace * @cmd: One of %OVS_PACKET_CMD_*. * @userdata: If nonnull, its variable-length value is passed to userspace as * %OVS_PACKET_ATTR_USERDATA. + * @actions: If nonnull, its variable-length value is passed to userspace as + * %OVS_PACKET_ATTR_ACTIONS. + * @actions_len: The length of the @actions. * @portid: Netlink portid to which packet should be sent. If @portid is 0 * then no packet is sent and the packet is accounted in the datapath's @n_lost * counter. @@ -149,6 +159,10 @@ struct dp_upcall_info { * struct ovs_net - Per net-namespace data for ovs. * @dps: List of datapaths to enable dumping them all out. * Protected by genl_mutex. + * @dp_notify_work: A work notifier to handle port unregistering. + * @masks_rebalance: A work to periodically optimize flow table caches. + * @ct_limit_info: A hash table of conntrack zone connection limits. + * @xt_label: Whether connlables are configured for the network or not. */ struct ovs_net { struct list_head dps; @@ -157,11 +171,57 @@ struct ovs_net { #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) struct ovs_ct_limit_info *ct_limit_info; #endif - - /* Module reference for configuring conntrack. */ bool xt_label; }; +#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN) +struct ovs_frag_data { + unsigned long dst; + struct vport *vport; + struct ovs_skb_cb cb; + __be16 inner_protocol; + u16 network_offset; /* valid only for MPLS */ + u16 vlan_tci; + __be16 vlan_proto; + unsigned int l2_len; + u8 mac_proto; + u8 l2_data[MAX_L2_LEN]; +}; + +struct deferred_action { + struct sk_buff *skb; + const struct nlattr *actions; + int actions_len; + + /* Store pkt_key clone when creating deferred action. */ + struct sw_flow_key pkt_key; +}; + +#define DEFERRED_ACTION_FIFO_SIZE 10 +#define OVS_RECURSION_LIMIT 5 +#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2) + +struct action_fifo { + int head; + int tail; + /* Deferred action fifo queue storage. */ + struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE]; +}; + +struct action_flow_keys { + struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; +}; + +struct ovs_pcpu_storage { + struct action_fifo action_fifos; + struct action_flow_keys flow_keys; + struct ovs_frag_data frag_data; + int exec_level; + struct task_struct *owner; + local_lock_t bh_lock; +}; +DECLARE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage); + /** * enum ovs_pkt_hash_types - hash info to include with a packet * to send to userspace. @@ -270,9 +330,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, void ovs_dp_notify_wq(struct work_struct *work); -int action_fifos_init(void); -void action_fifos_exit(void); - /* 'KEY' must not have any bits set outside of the 'MASK' */ #define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK))) #define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK)) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 33b21a0c0548..b80bd3a90773 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -561,7 +561,6 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, */ key->tp.src = htons(icmp->icmp6_type); key->tp.dst = htons(icmp->icmp6_code); - memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd)); if (icmp->icmp6_code == 0 && (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || @@ -570,6 +569,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, struct nd_msg *nd; int offset; + memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd)); + /* In order to process neighbor discovery options, we need the * entire packet. */ @@ -787,7 +788,7 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) memset(&key->ipv4, 0, sizeof(key->ipv4)); } } else if (eth_p_mpls(key->eth.type)) { - u8 label_count = 1; + size_t label_count = 1; memset(&key->mpls, 0, sizeof(key->mpls)); skb_set_inner_network_header(skb, skb->mac_len); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ebc5728aab4e..ad64bb9ab5e2 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -64,6 +64,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_TRUNC: case OVS_ACTION_ATTR_USERSPACE: case OVS_ACTION_ATTR_DROP: + case OVS_ACTION_ATTR_PSAMPLE: break; case OVS_ACTION_ATTR_CT: @@ -152,6 +153,13 @@ static void update_range(struct sw_flow_match *match, sizeof((match)->key->field)); \ } while (0) +#define SW_FLOW_KEY_BITMAP_COPY(match, field, value_p, nbits, is_mask) ({ \ + update_range(match, offsetof(struct sw_flow_key, field), \ + bitmap_size(nbits), is_mask); \ + bitmap_copy(is_mask ? (match)->mask->key.field : (match)->key->field, \ + value_p, nbits); \ +}) + static bool match_validate(const struct sw_flow_match *match, u64 key_attrs, u64 mask_attrs, bool log) { @@ -670,8 +678,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, bool log) { bool ttl = false, ipv4 = false, ipv6 = false; + IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { }; bool info_bridge_mode = false; - __be16 tun_flags = 0; int opts_type = 0; struct nlattr *a; int rem; @@ -697,7 +705,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_ID: SW_FLOW_KEY_PUT(match, tun_key.tun_id, nla_get_be64(a), is_mask); - tun_flags |= TUNNEL_KEY; + __set_bit(IP_TUNNEL_KEY_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src, @@ -729,10 +737,10 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, ttl = true; break; case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: - tun_flags |= TUNNEL_DONT_FRAGMENT; + __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_CSUM: - tun_flags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_TP_SRC: SW_FLOW_KEY_PUT(match, tun_key.tp_src, @@ -743,7 +751,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, nla_get_be16(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_OAM: - tun_flags |= TUNNEL_OAM; + __set_bit(IP_TUNNEL_OAM_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: if (opts_type) { @@ -755,7 +763,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, if (err) return err; - tun_flags |= TUNNEL_GENEVE_OPT; + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_flags); opts_type = type; break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: @@ -768,7 +776,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, if (err) return err; - tun_flags |= TUNNEL_VXLAN_OPT; + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_flags); opts_type = type; break; case OVS_TUNNEL_KEY_ATTR_PAD: @@ -784,7 +792,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, if (err) return err; - tun_flags |= TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_flags); opts_type = type; break; case OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE: @@ -798,7 +806,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, } } - SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); + SW_FLOW_KEY_BITMAP_COPY(match, tun_key.tun_flags, tun_flags, + __IP_TUNNEL_FLAG_NUM, is_mask); if (is_mask) SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true); else @@ -823,13 +832,15 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, } if (ipv4) { if (info_bridge_mode) { + __clear_bit(IP_TUNNEL_KEY_BIT, tun_flags); + if (match->key->tun_key.u.ipv4.src || match->key->tun_key.u.ipv4.dst || match->key->tun_key.tp_src || match->key->tun_key.tp_dst || match->key->tun_key.ttl || match->key->tun_key.tos || - tun_flags & ~TUNNEL_KEY) { + !ip_tunnel_flags_empty(tun_flags)) { OVS_NLERR(log, "IPv4 tun info is not correct"); return -EINVAL; } @@ -874,7 +885,7 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, const void *tun_opts, int swkey_tun_opts_len, unsigned short tun_proto, u8 mode) { - if (output->tun_flags & TUNNEL_KEY && + if (test_bit(IP_TUNNEL_KEY_BIT, output->tun_flags) && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id, OVS_TUNNEL_KEY_ATTR_PAD)) return -EMSGSIZE; @@ -910,10 +921,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, return -EMSGSIZE; if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl)) return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && + if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, output->tun_flags) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_CSUM) && + if (test_bit(IP_TUNNEL_CSUM_BIT, output->tun_flags) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) return -EMSGSIZE; if (output->tp_src && @@ -922,18 +933,20 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, if (output->tp_dst && nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst)) return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_OAM) && + if (test_bit(IP_TUNNEL_OAM_BIT, output->tun_flags) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; if (swkey_tun_opts_len) { - if (output->tun_flags & TUNNEL_GENEVE_OPT && + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, output->tun_flags) && nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, swkey_tun_opts_len, tun_opts)) return -EMSGSIZE; - else if (output->tun_flags & TUNNEL_VXLAN_OPT && + else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, + output->tun_flags) && vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) return -EMSGSIZE; - else if (output->tun_flags & TUNNEL_ERSPAN_OPT && + else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, + output->tun_flags) && nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, swkey_tun_opts_len, tun_opts)) return -EMSGSIZE; @@ -1925,7 +1938,7 @@ int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) { - return attr ? nla_get_u32(attr) : 0; + return nla_get_u32_default(attr, 0); } /** @@ -2029,7 +2042,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if ((swkey->tun_proto || is_mask)) { const void *opts = NULL; - if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) + if (ip_tunnel_is_options_present(output->tun_key.tun_flags)) opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); if (ip_tun_to_nlattr(skb, &output->tun_key, opts, @@ -2304,14 +2317,10 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) OVS_FLOW_ATTR_MASK, true, skb); } -#define MAX_ACTIONS_BUFSIZE (32 * 1024) - static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; - WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); - sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL); if (!sfa) return ERR_PTR(-ENOMEM); @@ -2397,7 +2406,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) /* Whenever new actions are added, the need to update this * function should be considered. */ - BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24); + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 25); if (!actions) return; @@ -2467,18 +2476,9 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); - if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR(log, "Flow action size exceeds max %u", - MAX_ACTIONS_BUFSIZE); - return ERR_PTR(-EMSGSIZE); - } - new_acts_size = MAX_ACTIONS_BUFSIZE; - } - acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) - return (void *)acts; + return ERR_CAST(acts); memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); acts->actions_len = (*sfa)->actions_len; @@ -2752,7 +2752,8 @@ static int validate_geneve_opts(struct sw_flow_key *key) opts_len -= len; } - key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + if (crit_opt) + __set_bit(IP_TUNNEL_CRIT_OPT_BIT, key->tun_key.tun_flags); return 0; } @@ -2760,6 +2761,7 @@ static int validate_geneve_opts(struct sw_flow_key *key) static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_actions **sfa, bool log) { + IP_TUNNEL_DECLARE_FLAGS(dst_opt_type) = { }; struct sw_flow_match match; struct sw_flow_key key; struct metadata_dst *tun_dst; @@ -2767,9 +2769,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct ovs_tunnel_info *ovs_tun; struct nlattr *a; int err = 0, start, opts_type; - __be16 dst_opt_type; - dst_opt_type = 0; ovs_match_init(&match, &key, true, NULL); opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log); if (opts_type < 0) @@ -2781,13 +2781,14 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, err = validate_geneve_opts(&key); if (err < 0) return err; - dst_opt_type = TUNNEL_GENEVE_OPT; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, dst_opt_type); break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: - dst_opt_type = TUNNEL_VXLAN_OPT; + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, dst_opt_type); break; case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: - dst_opt_type = TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, dst_opt_type); break; } } @@ -2875,7 +2876,8 @@ static int validate_set(const struct nlattr *a, size_t key_len; /* There can be only one key in a action */ - if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) + if (!nla_ok(ovs_key, nla_len(a)) || + nla_total_size(nla_len(ovs_key)) != nla_len(a)) return -EINVAL; key_len = nla_len(ovs_key); @@ -3047,7 +3049,8 @@ static int validate_userspace(const struct nlattr *attr) struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; - error = nla_parse_nested_deprecated(a, OVS_USERSPACE_ATTR_MAX, attr, + error = nla_parse_deprecated_strict(a, OVS_USERSPACE_ATTR_MAX, + nla_data(attr), nla_len(attr), userspace_policy, NULL); if (error) return error; @@ -3144,6 +3147,28 @@ static int validate_and_copy_check_pkt_len(struct net *net, return 0; } +static int validate_psample(const struct nlattr *attr) +{ + static const struct nla_policy policy[OVS_PSAMPLE_ATTR_MAX + 1] = { + [OVS_PSAMPLE_ATTR_GROUP] = { .type = NLA_U32 }, + [OVS_PSAMPLE_ATTR_COOKIE] = { + .type = NLA_BINARY, + .len = OVS_PSAMPLE_COOKIE_MAX_SIZE, + }, + }; + struct nlattr *a[OVS_PSAMPLE_ATTR_MAX + 1]; + int err; + + if (!IS_ENABLED(CONFIG_PSAMPLE)) + return -EOPNOTSUPP; + + err = nla_parse_nested(a, OVS_PSAMPLE_ATTR_MAX, attr, policy, NULL); + if (err) + return err; + + return a[OVS_PSAMPLE_ATTR_GROUP] ? 0 : -EINVAL; +} + static int copy_action(const struct nlattr *from, struct sw_flow_actions **sfa, bool log) { @@ -3199,6 +3224,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, [OVS_ACTION_ATTR_DROP] = sizeof(u32), + [OVS_ACTION_ATTR_PSAMPLE] = (u32)-1, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -3477,6 +3503,12 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return -EINVAL; break; + case OVS_ACTION_ATTR_PSAMPLE: + err = validate_psample(a); + if (err) + return err; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; @@ -3502,7 +3534,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, int err; u32 mpls_label_count = 0; - *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); + *sfa = nla_alloc_flow_actions(nla_len(attr)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h index ed11cd12b512..8bbf983cd244 100644 --- a/net/openvswitch/meter.h +++ b/net/openvswitch/meter.h @@ -11,7 +11,6 @@ #include <linux/kernel.h> #include <linux/netlink.h> #include <linux/openvswitch.h> -#include <linux/genetlink.h> #include <linux/skbuff.h> #include <linux/bits.h> diff --git a/net/openvswitch/openvswitch_trace.h b/net/openvswitch/openvswitch_trace.h index 3eb35d9eb700..74d75aaebef4 100644 --- a/net/openvswitch/openvswitch_trace.h +++ b/net/openvswitch/openvswitch_trace.h @@ -43,8 +43,8 @@ TRACE_EVENT(ovs_do_execute_action, TP_fast_assign( __entry->dpaddr = dp; - __assign_str(dp_name, ovs_dp_name(dp)); - __assign_str(dev_name, skb->dev->name); + __assign_str(dp_name); + __assign_str(dev_name); __entry->skbaddr = skb; __entry->len = skb->len; __entry->data_len = skb->data_len; @@ -113,8 +113,8 @@ TRACE_EVENT(ovs_dp_upcall, TP_fast_assign( __entry->dpaddr = dp; - __assign_str(dp_name, ovs_dp_name(dp)); - __assign_str(dev_name, skb->dev->name); + __assign_str(dp_name); + __assign_str(dev_name); __entry->skbaddr = skb; __entry->len = skb->len; __entry->data_len = skb->data_len; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 74c88a6baa43..125d310871e9 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -85,7 +85,6 @@ static const struct net_device_ops internal_dev_netdev_ops = { .ndo_stop = internal_dev_stop, .ndo_start_xmit = internal_dev_xmit, .ndo_set_mac_address = eth_mac_addr, - .ndo_get_stats64 = dev_get_tstats64, }; static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { @@ -103,19 +102,20 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | IFF_NO_QUEUE; + netdev->lltx = true; netdev->needs_free_netdev = true; netdev->priv_destructor = NULL; netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->rtnl_link_ops = &internal_dev_link_ops; - netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | - NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | - NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL; + netdev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | + NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | + NETIF_F_GSO_ENCAP_ALL; netdev->vlan_features = netdev->features; netdev->hw_enc_features = netdev->features; netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - netdev->hw_features = netdev->features & ~NETIF_F_LLTX; + netdev->hw_features = netdev->features; eth_hw_addr_random(netdev); } @@ -140,11 +140,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) err = -ENOMEM; goto error_free_vport; } - vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!vport->dev->tstats) { - err = -ENOMEM; - goto error_free_netdev; - } + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); dev->ifindex = parms->desired_ifindex; @@ -153,7 +149,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) /* Restrict bridge port to current netns. */ if (vport->port_no == OVSP_LOCAL) - vport->dev->features |= NETIF_F_NETNS_LOCAL; + vport->dev->netns_immutable = true; rtnl_lock(); err = register_netdevice(vport->dev); @@ -169,8 +165,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) error_unlock: rtnl_unlock(); - free_percpu(dev->tstats); -error_free_netdev: free_netdev(dev); error_free_vport: ovs_vport_free(vport); @@ -186,7 +180,6 @@ static void internal_dev_destroy(struct vport *vport) /* unregister_netdevice() waits for an RCU grace period. */ unregister_netdevice(vport->dev); - free_percpu(vport->dev->tstats); rtnl_unlock(); } @@ -202,7 +195,6 @@ static int internal_dev_recv(struct sk_buff *skb) skb_dst_drop(skb); nf_reset_ct(skb); - secpath_reset(skb); skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, netdev); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 903537a5da22..91a11067e458 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -82,6 +82,13 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) err = -ENODEV; goto error_free_vport; } + /* Ensure that the device exists and that the provided + * name is not one of its aliases. + */ + if (strcmp(name, ovs_vport_name(vport))) { + err = -ENODEV; + goto error_put; + } netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL); if (vport->dev->flags & IFF_LOOPBACK || (vport->dev->type != ARPHRD_ETHER && diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 972ae01a70f7..8732f6e51ae5 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -500,6 +500,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, OVS_CB(skb)->input_vport = vport; OVS_CB(skb)->mru = 0; OVS_CB(skb)->cutlen = 0; + OVS_CB(skb)->probability = 0; if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { u32 mark; diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 3e71ca8ad8a7..9f67b9dd49f9 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -97,6 +97,8 @@ struct vport { * @desired_ifindex: New vport's ifindex. * @dp: New vport's datapath. * @port_no: New vport's port number. + * @upcall_portids: %OVS_VPORT_ATTR_UPCALL_PID attribute from Netlink message, + * %NULL if none was supplied. */ struct vport_parms { const char *name; @@ -125,6 +127,8 @@ struct vport_parms { * have any configuration. * @send: Send a packet on the device. * zero for dropped packets or negative for error. + * @owner: Module that implements this vport type. + * @list: List entry in the global list of vport types. */ struct vport_ops { enum ovs_vport_type type; @@ -144,6 +148,7 @@ struct vport_ops { /** * struct vport_upcall_stats_percpu - per-cpu packet upcall statistics for * a given vport. + * @syncp: Synchronization point for 64bit counters. * @n_success: Number of packets that upcall to userspace succeed. * @n_fail: Number of packets that upcall to userspace failed. */ @@ -164,6 +169,8 @@ void ovs_vport_free(struct vport *); * * @vport: vport to access * + * Returns: A void pointer to a private data allocated in the @vport. + * * If a nonzero size was passed in priv_size of vport_alloc() a private data * area was allocated on creation. This allows that area to be accessed and * used for any purpose needed by the vport implementer. @@ -178,6 +185,8 @@ static inline void *vport_priv(const struct vport *vport) * * @priv: Start of private data area. * + * Returns: A reference to a vport structure that contains @priv. + * * It is sometimes useful to translate from a pointer to the private data * area to the vport, such as in the case where the private data pointer is * the result of a hash table lookup. @priv must point to the start of the |