summaryrefslogtreecommitdiff
path: root/net/openvswitch
diff options
context:
space:
mode:
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/Kconfig3
-rw-r--r--net/openvswitch/actions.c167
-rw-r--r--net/openvswitch/conntrack.c59
-rw-r--r--net/openvswitch/datapath.c58
-rw-r--r--net/openvswitch/datapath.h75
-rw-r--r--net/openvswitch/flow.c5
-rw-r--r--net/openvswitch/flow_netlink.c118
-rw-r--r--net/openvswitch/meter.h1
-rw-r--r--net/openvswitch/openvswitch_trace.h8
-rw-r--r--net/openvswitch/vport-internal_dev.c22
-rw-r--r--net/openvswitch/vport-netdev.c7
-rw-r--r--net/openvswitch/vport.c1
-rw-r--r--net/openvswitch/vport.h9
13 files changed, 345 insertions, 188 deletions
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 29a7081858cd..e6aaee92dba4 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -10,8 +10,9 @@ config OPENVSWITCH
(NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \
(!NF_NAT || NF_NAT) && \
(!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT)))
- select LIBCRC32C
+ depends on PSAMPLE || !PSAMPLE
select MPLS
+ select NET_CRC32C
select NET_MPLS_GSO
select DST_CACHE
select NET_NSH
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 6fcd7e2ca81f..e7269a3eec79 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -24,6 +24,11 @@
#include <net/checksum.h>
#include <net/dsfield.h>
#include <net/mpls.h>
+
+#if IS_ENABLED(CONFIG_PSAMPLE)
+#include <net/psample.h>
+#endif
+
#include <net/sctp/checksum.h>
#include "datapath.h"
@@ -34,56 +39,18 @@
#include "flow_netlink.h"
#include "openvswitch_trace.h"
-struct deferred_action {
- struct sk_buff *skb;
- const struct nlattr *actions;
- int actions_len;
-
- /* Store pkt_key clone when creating deferred action. */
- struct sw_flow_key pkt_key;
-};
-
-#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN)
-struct ovs_frag_data {
- unsigned long dst;
- struct vport *vport;
- struct ovs_skb_cb cb;
- __be16 inner_protocol;
- u16 network_offset; /* valid only for MPLS */
- u16 vlan_tci;
- __be16 vlan_proto;
- unsigned int l2_len;
- u8 mac_proto;
- u8 l2_data[MAX_L2_LEN];
-};
-
-static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
-
-#define DEFERRED_ACTION_FIFO_SIZE 10
-#define OVS_RECURSION_LIMIT 5
-#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
-struct action_fifo {
- int head;
- int tail;
- /* Deferred action fifo queue storage. */
- struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
-};
-
-struct action_flow_keys {
- struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
+DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
};
-static struct action_fifo __percpu *action_fifos;
-static struct action_flow_keys __percpu *flow_keys;
-static DEFINE_PER_CPU(int, exec_actions_level);
-
/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
* space. Return NULL if out of key spaces.
*/
static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
{
- struct action_flow_keys *keys = this_cpu_ptr(flow_keys);
- int level = this_cpu_read(exec_actions_level);
+ struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
+ struct action_flow_keys *keys = &ovs_pcpu->flow_keys;
+ int level = ovs_pcpu->exec_level;
struct sw_flow_key *key = NULL;
if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
@@ -127,10 +94,9 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
const struct nlattr *actions,
const int actions_len)
{
- struct action_fifo *fifo;
+ struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos);
struct deferred_action *da;
- fifo = this_cpu_ptr(action_fifos);
da = action_fifo_put(fifo);
if (da) {
da->skb = skb;
@@ -232,14 +198,18 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
const struct ovs_action_push_vlan *vlan)
{
+ int err;
+
if (skb_vlan_tag_present(skb)) {
invalidate_flow_key(key);
} else {
key->eth.vlan.tci = vlan->vlan_tci;
key->eth.vlan.tpid = vlan->vlan_tpid;
}
- return skb_vlan_push(skb, vlan->vlan_tpid,
- ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
+ err = skb_vlan_push(skb, vlan->vlan_tpid,
+ ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
+ skb_reset_mac_len(skb);
+ return err;
}
/* 'src' is already properly masked. */
@@ -785,7 +755,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
static int ovs_vport_output(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
+ struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage.frag_data);
struct vport *vport = data->vport;
if (skb_cow_head(skb, data->l2_len) < 0) {
@@ -837,7 +807,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
unsigned int hlen = skb_network_offset(skb);
struct ovs_frag_data *data;
- data = this_cpu_ptr(&ovs_frag_data_storage);
+ data = this_cpu_ptr(&ovs_pcpu_storage.frag_data);
data->dst = skb->_skb_refdst;
data->vport = vport;
data->cb = *OVS_CB(skb);
@@ -925,7 +895,9 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
{
struct vport *vport = ovs_vport_rcu(dp, out_port);
- if (likely(vport && netif_carrier_ok(vport->dev))) {
+ if (likely(vport &&
+ netif_running(vport->dev) &&
+ netif_carrier_ok(vport->dev))) {
u16 mru = OVS_CB(skb)->mru;
u32 cutlen = OVS_CB(skb)->cutlen;
@@ -964,8 +936,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
upcall.cmd = OVS_PACKET_CMD_ACTION;
upcall.mru = OVS_CB(skb)->mru;
- for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
- a = nla_next(a, &rem)) {
+ nla_for_each_nested(a, attr, rem) {
switch (nla_type(a)) {
case OVS_USERSPACE_ATTR_USERDATA:
upcall.userdata = a;
@@ -1037,12 +1008,15 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
struct nlattr *sample_arg;
int rem = nla_len(attr);
const struct sample_arg *arg;
+ u32 init_probability;
bool clone_flow_key;
+ int err;
/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
sample_arg = nla_data(attr);
arg = nla_data(sample_arg);
actions = nla_next(sample_arg, &rem);
+ init_probability = OVS_CB(skb)->probability;
if ((arg->probability != U32_MAX) &&
(!arg->probability || get_random_u32() > arg->probability)) {
@@ -1051,9 +1025,16 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
return 0;
}
+ OVS_CB(skb)->probability = arg->probability;
+
clone_flow_key = !arg->exec;
- return clone_execute(dp, skb, key, 0, actions, rem, last,
- clone_flow_key);
+ err = clone_execute(dp, skb, key, 0, actions, rem, last,
+ clone_flow_key);
+
+ if (!last)
+ OVS_CB(skb)->probability = init_probability;
+
+ return err;
}
/* When 'last' is true, clone() should always consume the 'skb'.
@@ -1293,6 +1274,44 @@ static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
return 0;
}
+#if IS_ENABLED(CONFIG_PSAMPLE)
+static void execute_psample(struct datapath *dp, struct sk_buff *skb,
+ const struct nlattr *attr)
+{
+ struct psample_group psample_group = {};
+ struct psample_metadata md = {};
+ const struct nlattr *a;
+ u32 rate;
+ int rem;
+
+ nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
+ switch (nla_type(a)) {
+ case OVS_PSAMPLE_ATTR_GROUP:
+ psample_group.group_num = nla_get_u32(a);
+ break;
+
+ case OVS_PSAMPLE_ATTR_COOKIE:
+ md.user_cookie = nla_data(a);
+ md.user_cookie_len = nla_len(a);
+ break;
+ }
+ }
+
+ psample_group.net = ovs_dp_get_net(dp);
+ md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex;
+ md.trunc_size = skb->len - OVS_CB(skb)->cutlen;
+ md.rate_as_probability = 1;
+
+ rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX;
+
+ psample_sample_packet(&psample_group, skb, rate, &md);
+}
+#else
+static void execute_psample(struct datapath *dp, struct sk_buff *skb,
+ const struct nlattr *attr)
+{}
+#endif
+
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
@@ -1496,6 +1515,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
ovs_kfree_skb_reason(skb, reason);
return 0;
}
+
+ case OVS_ACTION_ATTR_PSAMPLE:
+ execute_psample(dp, skb, a);
+ OVS_CB(skb)->cutlen = 0;
+ if (nla_is_last(a, rem)) {
+ consume_skb(skb);
+ return 0;
+ }
+ break;
}
if (unlikely(err)) {
@@ -1541,13 +1569,13 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
if (actions) { /* Sample action */
if (clone_flow_key)
- __this_cpu_inc(exec_actions_level);
+ __this_cpu_inc(ovs_pcpu_storage.exec_level);
err = do_execute_actions(dp, skb, clone,
actions, len);
if (clone_flow_key)
- __this_cpu_dec(exec_actions_level);
+ __this_cpu_dec(ovs_pcpu_storage.exec_level);
} else { /* Recirc action */
clone->recirc_id = recirc_id;
ovs_dp_process_packet(skb, clone);
@@ -1583,7 +1611,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
static void process_deferred_actions(struct datapath *dp)
{
- struct action_fifo *fifo = this_cpu_ptr(action_fifos);
+ struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos);
/* Do not touch the FIFO in case there is no deferred actions. */
if (action_fifo_is_empty(fifo))
@@ -1614,7 +1642,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
{
int err, level;
- level = __this_cpu_inc_return(exec_actions_level);
+ level = __this_cpu_inc_return(ovs_pcpu_storage.exec_level);
if (unlikely(level > OVS_RECURSION_LIMIT)) {
net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
ovs_dp_name(dp));
@@ -1631,27 +1659,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
process_deferred_actions(dp);
out:
- __this_cpu_dec(exec_actions_level);
+ __this_cpu_dec(ovs_pcpu_storage.exec_level);
return err;
}
-
-int action_fifos_init(void)
-{
- action_fifos = alloc_percpu(struct action_fifo);
- if (!action_fifos)
- return -ENOMEM;
-
- flow_keys = alloc_percpu(struct action_flow_keys);
- if (!flow_keys) {
- free_percpu(action_fifos);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void action_fifos_exit(void)
-{
- free_percpu(action_fifos);
- free_percpu(flow_keys);
-}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 2928c142a2dd..e573e9221302 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -168,8 +168,13 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct)
static void ovs_ct_get_labels(const struct nf_conn *ct,
struct ovs_key_ct_labels *labels)
{
- struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
+ struct nf_conn_labels *cl = NULL;
+ if (ct) {
+ if (ct->master && !nf_ct_is_confirmed(ct))
+ ct = ct->master;
+ cl = nf_ct_labels_find(ct);
+ }
if (cl)
memcpy(labels, cl->bits, OVS_CT_LABELS_LEN);
else
@@ -674,6 +679,8 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
action |= BIT(NF_NAT_MANIP_DST);
err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit);
+ if (err != NF_ACCEPT)
+ return err;
if (action & BIT(NF_NAT_MANIP_SRC))
ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC);
@@ -692,6 +699,22 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
}
#endif
+static int verdict_to_errno(unsigned int verdict)
+{
+ switch (verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ return 0;
+ case NF_DROP:
+ return -EINVAL;
+ case NF_STOLEN:
+ return -EINPROGRESS;
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
/* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
* not done already. Update key with new CT state after passing the packet
* through conntrack.
@@ -730,7 +753,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
err = nf_conntrack_in(skb, &state);
if (err != NF_ACCEPT)
- return -ENOENT;
+ return verdict_to_errno(err);
/* Clear CT state NAT flags to mark that we have not yet done
* NAT after the nf_conntrack_in() call. We can actually clear
@@ -757,9 +780,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
* the key->ct_state.
*/
if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) &&
- (nf_ct_is_confirmed(ct) || info->commit) &&
- ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) {
- return -EINVAL;
+ (nf_ct_is_confirmed(ct) || info->commit)) {
+ int err = ovs_ct_nat(net, key, info, skb, ct, ctinfo);
+
+ err = verdict_to_errno(err);
+ if (err)
+ return err;
}
/* Userspace may decide to perform a ct lookup without a helper
@@ -790,9 +816,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
* - When committing an unconfirmed connection.
*/
if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
- info->commit) &&
- nf_ct_helper(skb, ct, ctinfo, info->family) != NF_ACCEPT) {
- return -EINVAL;
+ info->commit)) {
+ int err = nf_ct_helper(skb, ct, ctinfo, info->family);
+
+ err = verdict_to_errno(err);
+ if (err)
+ return err;
}
if (nf_ct_protonum(ct) == IPPROTO_TCP &&
@@ -996,10 +1025,9 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
/* This will take care of sending queued events even if the connection
* is already confirmed.
*/
- if (nf_conntrack_confirm(skb) != NF_ACCEPT)
- return -EINVAL;
+ err = nf_conntrack_confirm(skb);
- return 0;
+ return verdict_to_errno(err);
}
/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
@@ -1034,6 +1062,10 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
else
err = ovs_ct_lookup(net, key, info, skb);
+ /* conntrack core returned NF_STOLEN */
+ if (err == -EINPROGRESS)
+ return err;
+
skb_push_rcsum(skb, nh_ofs);
if (err)
ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK);
@@ -1571,8 +1603,7 @@ static int ovs_ct_limit_init(struct net *net, struct ovs_net *ovs_net)
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&ovs_net->ct_limit_info->limits[i]);
- ovs_net->ct_limit_info->data =
- nf_conncount_init(net, NFPROTO_INET, sizeof(u32));
+ ovs_net->ct_limit_info->data = nf_conncount_init(net, sizeof(u32));
if (IS_ERR(ovs_net->ct_limit_info->data)) {
err = PTR_ERR(ovs_net->ct_limit_info->data);
@@ -1589,7 +1620,7 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net)
const struct ovs_ct_limit_info *info = ovs_net->ct_limit_info;
int i;
- nf_conncount_destroy(net, NFPROTO_INET, info->data);
+ nf_conncount_destroy(net, info->data);
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
struct hlist_head *head = &info->limits[i];
struct ovs_ct_limit *ct_limit;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 11c69415c605..6a304ae2d959 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -15,7 +15,6 @@
#include <linux/delay.h>
#include <linux/time.h>
#include <linux/etherdevice.h>
-#include <linux/genetlink.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/mutex.h>
@@ -245,11 +244,13 @@ void ovs_dp_detach_port(struct vport *p)
/* Must be called with rcu_read_lock. */
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
{
+ struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
const struct vport *p = OVS_CB(skb)->input_vport;
struct datapath *dp = p->dp;
struct sw_flow *flow;
struct sw_flow_actions *sf_acts;
struct dp_stats_percpu *stats;
+ bool ovs_pcpu_locked = false;
u64 *stats_counter;
u32 n_mask_hit;
u32 n_cache_hit;
@@ -291,10 +292,26 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
ovs_flow_stats_update(flow, key->tp.flags, skb);
sf_acts = rcu_dereference(flow->sf_acts);
+ /* This path can be invoked recursively: Use the current task to
+ * identify recursive invocation - the lock must be acquired only once.
+ * Even with disabled bottom halves this can be preempted on PREEMPT_RT.
+ * Limit the locking to RT to avoid assigning `owner' if it can be
+ * avoided.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) {
+ local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ ovs_pcpu->owner = current;
+ ovs_pcpu_locked = true;
+ }
+
error = ovs_execute_actions(dp, skb, sf_acts, key);
if (unlikely(error))
net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
ovs_dp_name(dp), error);
+ if (ovs_pcpu_locked) {
+ ovs_pcpu->owner = NULL;
+ local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ }
stats_counter = &stats->n_hit;
@@ -672,7 +689,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
sf_acts = rcu_dereference(flow->sf_acts);
local_bh_disable();
+ local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ this_cpu_write(ovs_pcpu_storage.owner, current);
err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ this_cpu_write(ovs_pcpu_storage.owner, NULL);
+ local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
local_bh_enable();
rcu_read_unlock();
@@ -1829,8 +1852,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.dp = dp;
parms.port_no = OVSP_LOCAL;
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
- parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX]
- ? nla_get_s32(a[OVS_DP_ATTR_IFINDEX]) : 0;
+ parms.desired_ifindex = nla_get_s32_default(a[OVS_DP_ATTR_IFINDEX], 0);
/* So far only local changes have been made, now need the lock. */
ovs_lock();
@@ -2103,6 +2125,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
{
struct ovs_header *ovs_header;
struct ovs_vport_stats vport_stats;
+ struct net *net_vport;
int err;
ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
@@ -2119,12 +2142,15 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
goto nla_put_failure;
- if (!net_eq(net, dev_net(vport->dev))) {
- int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
+ rcu_read_lock();
+ net_vport = dev_net_rcu(vport->dev);
+ if (!net_eq(net, net_vport)) {
+ int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC);
if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
- goto nla_put_failure;
+ goto nla_put_failure_unlock;
}
+ rcu_read_unlock();
ovs_vport_get_stats(vport, &vport_stats);
if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
@@ -2145,6 +2171,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
genlmsg_end(skb, ovs_header);
return 0;
+nla_put_failure_unlock:
+ rcu_read_unlock();
nla_put_failure:
err = -EMSGSIZE;
error:
@@ -2267,8 +2295,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL)
return -EOPNOTSUPP;
- port_no = a[OVS_VPORT_ATTR_PORT_NO]
- ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
+ port_no = nla_get_u32_default(a[OVS_VPORT_ATTR_PORT_NO], 0);
if (port_no >= DP_MAX_PORTS)
return -EFBIG;
@@ -2305,8 +2332,8 @@ restart:
parms.dp = dp;
parms.port_no = port_no;
parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
- parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX]
- ? nla_get_s32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
+ parms.desired_ifindex = nla_get_s32_default(a[OVS_VPORT_ATTR_IFINDEX],
+ 0);
vport = new_vport(&parms);
err = PTR_ERR(vport);
@@ -2707,7 +2734,7 @@ static struct pernet_operations ovs_net_ops = {
};
static const char * const ovs_drop_reasons[] = {
-#define S(x) (#x),
+#define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x),
OVS_DROP_REASONS(S)
#undef S
};
@@ -2726,13 +2753,9 @@ static int __init dp_init(void)
pr_info("Open vSwitch switching datapath\n");
- err = action_fifos_init();
- if (err)
- goto error;
-
err = ovs_internal_dev_rtnl_link_register();
if (err)
- goto error_action_fifos_exit;
+ goto error;
err = ovs_flow_init();
if (err)
@@ -2775,8 +2798,6 @@ error_flow_exit:
ovs_flow_exit();
error_unreg_rtnl_link:
ovs_internal_dev_rtnl_link_unregister();
-error_action_fifos_exit:
- action_fifos_exit();
error:
return err;
}
@@ -2792,7 +2813,6 @@ static void dp_cleanup(void)
ovs_vport_exit();
ovs_flow_exit();
ovs_internal_dev_rtnl_link_unregister();
- action_fifos_exit();
}
module_init(dp_init);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 0cd29971a907..1b5348b0f559 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -13,6 +13,7 @@
#include <linux/skbuff.h>
#include <linux/u64_stats_sync.h>
#include <net/ip_tunnels.h>
+#include <net/mpls.h>
#include "conntrack.h"
#include "flow.h"
@@ -29,8 +30,8 @@
* datapath.
* @n_hit: Number of received packets for which a matching flow was found in
* the flow table.
- * @n_miss: Number of received packets that had no matching flow in the flow
- * table. The sum of @n_hit and @n_miss is the number of packets that have
+ * @n_missed: Number of received packets that had no matching flow in the flow
+ * table. The sum of @n_hit and @n_missed is the number of packets that have
* been received by the datapath.
* @n_lost: Number of received packets that had no matching flow in the flow
* table that could not be sent to userspace (normally due to an overflow in
@@ -40,6 +41,7 @@
* up per packet.
* @n_cache_hit: The number of received packets that had their mask found using
* the mask cache.
+ * @syncp: Synchronization point for 64bit counters.
*/
struct dp_stats_percpu {
u64 n_hit;
@@ -74,8 +76,10 @@ struct dp_nlsk_pids {
* ovs_mutex and RCU.
* @stats_percpu: Per-CPU datapath statistics.
* @net: Reference to net namespace.
- * @max_headroom: the maximum headroom of all vports in this datapath; it will
+ * @user_features: Bitmap of enabled %OVS_DP_F_* features.
+ * @max_headroom: The maximum headroom of all vports in this datapath; it will
* be used by all the internal vports in this dp.
+ * @meter_tbl: Meter table.
* @upcall_portids: RCU protected 'struct dp_nlsk_pids'.
*
* Context: See the comment on locking at the top of datapath.c for additional
@@ -115,20 +119,26 @@ struct datapath {
* fragmented.
* @acts_origlen: The netlink size of the flow actions applied to this skb.
* @cutlen: The number of bytes from the packet end to be removed.
+ * @probability: The sampling probability that was applied to this skb; 0 means
+ * no sampling has occurred; U32_MAX means 100% probability.
*/
struct ovs_skb_cb {
struct vport *input_vport;
u16 mru;
u16 acts_origlen;
u32 cutlen;
+ u32 probability;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
/**
- * struct dp_upcall - metadata to include with a packet to send to userspace
+ * struct dp_upcall_info - metadata to include with a packet sent to userspace
* @cmd: One of %OVS_PACKET_CMD_*.
* @userdata: If nonnull, its variable-length value is passed to userspace as
* %OVS_PACKET_ATTR_USERDATA.
+ * @actions: If nonnull, its variable-length value is passed to userspace as
+ * %OVS_PACKET_ATTR_ACTIONS.
+ * @actions_len: The length of the @actions.
* @portid: Netlink portid to which packet should be sent. If @portid is 0
* then no packet is sent and the packet is accounted in the datapath's @n_lost
* counter.
@@ -149,6 +159,10 @@ struct dp_upcall_info {
* struct ovs_net - Per net-namespace data for ovs.
* @dps: List of datapaths to enable dumping them all out.
* Protected by genl_mutex.
+ * @dp_notify_work: A work notifier to handle port unregistering.
+ * @masks_rebalance: A work to periodically optimize flow table caches.
+ * @ct_limit_info: A hash table of conntrack zone connection limits.
+ * @xt_label: Whether connlables are configured for the network or not.
*/
struct ovs_net {
struct list_head dps;
@@ -157,11 +171,57 @@ struct ovs_net {
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_ct_limit_info *ct_limit_info;
#endif
-
- /* Module reference for configuring conntrack. */
bool xt_label;
};
+#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN)
+struct ovs_frag_data {
+ unsigned long dst;
+ struct vport *vport;
+ struct ovs_skb_cb cb;
+ __be16 inner_protocol;
+ u16 network_offset; /* valid only for MPLS */
+ u16 vlan_tci;
+ __be16 vlan_proto;
+ unsigned int l2_len;
+ u8 mac_proto;
+ u8 l2_data[MAX_L2_LEN];
+};
+
+struct deferred_action {
+ struct sk_buff *skb;
+ const struct nlattr *actions;
+ int actions_len;
+
+ /* Store pkt_key clone when creating deferred action. */
+ struct sw_flow_key pkt_key;
+};
+
+#define DEFERRED_ACTION_FIFO_SIZE 10
+#define OVS_RECURSION_LIMIT 5
+#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
+
+struct action_fifo {
+ int head;
+ int tail;
+ /* Deferred action fifo queue storage. */
+ struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
+};
+
+struct action_flow_keys {
+ struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
+};
+
+struct ovs_pcpu_storage {
+ struct action_fifo action_fifos;
+ struct action_flow_keys flow_keys;
+ struct ovs_frag_data frag_data;
+ int exec_level;
+ struct task_struct *owner;
+ local_lock_t bh_lock;
+};
+DECLARE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage);
+
/**
* enum ovs_pkt_hash_types - hash info to include with a packet
* to send to userspace.
@@ -270,9 +330,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
void ovs_dp_notify_wq(struct work_struct *work);
-int action_fifos_init(void);
-void action_fifos_exit(void);
-
/* 'KEY' must not have any bits set outside of the 'MASK' */
#define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
#define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK))
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 33b21a0c0548..b80bd3a90773 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -561,7 +561,6 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
*/
key->tp.src = htons(icmp->icmp6_type);
key->tp.dst = htons(icmp->icmp6_code);
- memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
if (icmp->icmp6_code == 0 &&
(icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -570,6 +569,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
struct nd_msg *nd;
int offset;
+ memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
+
/* In order to process neighbor discovery options, we need the
* entire packet.
*/
@@ -787,7 +788,7 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
memset(&key->ipv4, 0, sizeof(key->ipv4));
}
} else if (eth_p_mpls(key->eth.type)) {
- u8 label_count = 1;
+ size_t label_count = 1;
memset(&key->mpls, 0, sizeof(key->mpls));
skb_set_inner_network_header(skb, skb->mac_len);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index ebc5728aab4e..ad64bb9ab5e2 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -64,6 +64,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
case OVS_ACTION_ATTR_TRUNC:
case OVS_ACTION_ATTR_USERSPACE:
case OVS_ACTION_ATTR_DROP:
+ case OVS_ACTION_ATTR_PSAMPLE:
break;
case OVS_ACTION_ATTR_CT:
@@ -152,6 +153,13 @@ static void update_range(struct sw_flow_match *match,
sizeof((match)->key->field)); \
} while (0)
+#define SW_FLOW_KEY_BITMAP_COPY(match, field, value_p, nbits, is_mask) ({ \
+ update_range(match, offsetof(struct sw_flow_key, field), \
+ bitmap_size(nbits), is_mask); \
+ bitmap_copy(is_mask ? (match)->mask->key.field : (match)->key->field, \
+ value_p, nbits); \
+})
+
static bool match_validate(const struct sw_flow_match *match,
u64 key_attrs, u64 mask_attrs, bool log)
{
@@ -670,8 +678,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
bool log)
{
bool ttl = false, ipv4 = false, ipv6 = false;
+ IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { };
bool info_bridge_mode = false;
- __be16 tun_flags = 0;
int opts_type = 0;
struct nlattr *a;
int rem;
@@ -697,7 +705,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
case OVS_TUNNEL_KEY_ATTR_ID:
SW_FLOW_KEY_PUT(match, tun_key.tun_id,
nla_get_be64(a), is_mask);
- tun_flags |= TUNNEL_KEY;
+ __set_bit(IP_TUNNEL_KEY_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
@@ -729,10 +737,10 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
ttl = true;
break;
case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
- tun_flags |= TUNNEL_DONT_FRAGMENT;
+ __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_CSUM:
- tun_flags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_TP_SRC:
SW_FLOW_KEY_PUT(match, tun_key.tp_src,
@@ -743,7 +751,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
nla_get_be16(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_OAM:
- tun_flags |= TUNNEL_OAM;
+ __set_bit(IP_TUNNEL_OAM_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
if (opts_type) {
@@ -755,7 +763,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
if (err)
return err;
- tun_flags |= TUNNEL_GENEVE_OPT;
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_flags);
opts_type = type;
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
@@ -768,7 +776,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
if (err)
return err;
- tun_flags |= TUNNEL_VXLAN_OPT;
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_flags);
opts_type = type;
break;
case OVS_TUNNEL_KEY_ATTR_PAD:
@@ -784,7 +792,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
if (err)
return err;
- tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_flags);
opts_type = type;
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE:
@@ -798,7 +806,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
}
}
- SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
+ SW_FLOW_KEY_BITMAP_COPY(match, tun_key.tun_flags, tun_flags,
+ __IP_TUNNEL_FLAG_NUM, is_mask);
if (is_mask)
SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
else
@@ -823,13 +832,15 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
}
if (ipv4) {
if (info_bridge_mode) {
+ __clear_bit(IP_TUNNEL_KEY_BIT, tun_flags);
+
if (match->key->tun_key.u.ipv4.src ||
match->key->tun_key.u.ipv4.dst ||
match->key->tun_key.tp_src ||
match->key->tun_key.tp_dst ||
match->key->tun_key.ttl ||
match->key->tun_key.tos ||
- tun_flags & ~TUNNEL_KEY) {
+ !ip_tunnel_flags_empty(tun_flags)) {
OVS_NLERR(log, "IPv4 tun info is not correct");
return -EINVAL;
}
@@ -874,7 +885,7 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
const void *tun_opts, int swkey_tun_opts_len,
unsigned short tun_proto, u8 mode)
{
- if (output->tun_flags & TUNNEL_KEY &&
+ if (test_bit(IP_TUNNEL_KEY_BIT, output->tun_flags) &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
OVS_TUNNEL_KEY_ATTR_PAD))
return -EMSGSIZE;
@@ -910,10 +921,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
return -EMSGSIZE;
if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, output->tun_flags) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_CSUM) &&
+ if (test_bit(IP_TUNNEL_CSUM_BIT, output->tun_flags) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
return -EMSGSIZE;
if (output->tp_src &&
@@ -922,18 +933,20 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
if (output->tp_dst &&
nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_OAM) &&
+ if (test_bit(IP_TUNNEL_OAM_BIT, output->tun_flags) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
if (swkey_tun_opts_len) {
- if (output->tun_flags & TUNNEL_GENEVE_OPT &&
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, output->tun_flags) &&
nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
swkey_tun_opts_len, tun_opts))
return -EMSGSIZE;
- else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
+ else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT,
+ output->tun_flags) &&
vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
return -EMSGSIZE;
- else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
+ else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ output->tun_flags) &&
nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
swkey_tun_opts_len, tun_opts))
return -EMSGSIZE;
@@ -1925,7 +1938,7 @@ int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
{
- return attr ? nla_get_u32(attr) : 0;
+ return nla_get_u32_default(attr, 0);
}
/**
@@ -2029,7 +2042,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if ((swkey->tun_proto || is_mask)) {
const void *opts = NULL;
- if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
+ if (ip_tunnel_is_options_present(output->tun_key.tun_flags))
opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
@@ -2304,14 +2317,10 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
OVS_FLOW_ATTR_MASK, true, skb);
}
-#define MAX_ACTIONS_BUFSIZE (32 * 1024)
-
static struct sw_flow_actions *nla_alloc_flow_actions(int size)
{
struct sw_flow_actions *sfa;
- WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
-
sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL);
if (!sfa)
return ERR_PTR(-ENOMEM);
@@ -2397,7 +2406,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
/* Whenever new actions are added, the need to update this
* function should be considered.
*/
- BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24);
+ BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 25);
if (!actions)
return;
@@ -2467,18 +2476,9 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
- if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
- if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) {
- OVS_NLERR(log, "Flow action size exceeds max %u",
- MAX_ACTIONS_BUFSIZE);
- return ERR_PTR(-EMSGSIZE);
- }
- new_acts_size = MAX_ACTIONS_BUFSIZE;
- }
-
acts = nla_alloc_flow_actions(new_acts_size);
if (IS_ERR(acts))
- return (void *)acts;
+ return ERR_CAST(acts);
memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
acts->actions_len = (*sfa)->actions_len;
@@ -2752,7 +2752,8 @@ static int validate_geneve_opts(struct sw_flow_key *key)
opts_len -= len;
}
- key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+ if (crit_opt)
+ __set_bit(IP_TUNNEL_CRIT_OPT_BIT, key->tun_key.tun_flags);
return 0;
}
@@ -2760,6 +2761,7 @@ static int validate_geneve_opts(struct sw_flow_key *key)
static int validate_and_copy_set_tun(const struct nlattr *attr,
struct sw_flow_actions **sfa, bool log)
{
+ IP_TUNNEL_DECLARE_FLAGS(dst_opt_type) = { };
struct sw_flow_match match;
struct sw_flow_key key;
struct metadata_dst *tun_dst;
@@ -2767,9 +2769,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
struct ovs_tunnel_info *ovs_tun;
struct nlattr *a;
int err = 0, start, opts_type;
- __be16 dst_opt_type;
- dst_opt_type = 0;
ovs_match_init(&match, &key, true, NULL);
opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
if (opts_type < 0)
@@ -2781,13 +2781,14 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
err = validate_geneve_opts(&key);
if (err < 0)
return err;
- dst_opt_type = TUNNEL_GENEVE_OPT;
+
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, dst_opt_type);
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
- dst_opt_type = TUNNEL_VXLAN_OPT;
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, dst_opt_type);
break;
case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
- dst_opt_type = TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, dst_opt_type);
break;
}
}
@@ -2875,7 +2876,8 @@ static int validate_set(const struct nlattr *a,
size_t key_len;
/* There can be only one key in a action */
- if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
+ if (!nla_ok(ovs_key, nla_len(a)) ||
+ nla_total_size(nla_len(ovs_key)) != nla_len(a))
return -EINVAL;
key_len = nla_len(ovs_key);
@@ -3047,7 +3049,8 @@ static int validate_userspace(const struct nlattr *attr)
struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
int error;
- error = nla_parse_nested_deprecated(a, OVS_USERSPACE_ATTR_MAX, attr,
+ error = nla_parse_deprecated_strict(a, OVS_USERSPACE_ATTR_MAX,
+ nla_data(attr), nla_len(attr),
userspace_policy, NULL);
if (error)
return error;
@@ -3144,6 +3147,28 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return 0;
}
+static int validate_psample(const struct nlattr *attr)
+{
+ static const struct nla_policy policy[OVS_PSAMPLE_ATTR_MAX + 1] = {
+ [OVS_PSAMPLE_ATTR_GROUP] = { .type = NLA_U32 },
+ [OVS_PSAMPLE_ATTR_COOKIE] = {
+ .type = NLA_BINARY,
+ .len = OVS_PSAMPLE_COOKIE_MAX_SIZE,
+ },
+ };
+ struct nlattr *a[OVS_PSAMPLE_ATTR_MAX + 1];
+ int err;
+
+ if (!IS_ENABLED(CONFIG_PSAMPLE))
+ return -EOPNOTSUPP;
+
+ err = nla_parse_nested(a, OVS_PSAMPLE_ATTR_MAX, attr, policy, NULL);
+ if (err)
+ return err;
+
+ return a[OVS_PSAMPLE_ATTR_GROUP] ? 0 : -EINVAL;
+}
+
static int copy_action(const struct nlattr *from,
struct sw_flow_actions **sfa, bool log)
{
@@ -3199,6 +3224,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
[OVS_ACTION_ATTR_DROP] = sizeof(u32),
+ [OVS_ACTION_ATTR_PSAMPLE] = (u32)-1,
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -3477,6 +3503,12 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return -EINVAL;
break;
+ case OVS_ACTION_ATTR_PSAMPLE:
+ err = validate_psample(a);
+ if (err)
+ return err;
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
@@ -3502,7 +3534,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
int err;
u32 mpls_label_count = 0;
- *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
+ *sfa = nla_alloc_flow_actions(nla_len(attr));
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index ed11cd12b512..8bbf983cd244 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -11,7 +11,6 @@
#include <linux/kernel.h>
#include <linux/netlink.h>
#include <linux/openvswitch.h>
-#include <linux/genetlink.h>
#include <linux/skbuff.h>
#include <linux/bits.h>
diff --git a/net/openvswitch/openvswitch_trace.h b/net/openvswitch/openvswitch_trace.h
index 3eb35d9eb700..74d75aaebef4 100644
--- a/net/openvswitch/openvswitch_trace.h
+++ b/net/openvswitch/openvswitch_trace.h
@@ -43,8 +43,8 @@ TRACE_EVENT(ovs_do_execute_action,
TP_fast_assign(
__entry->dpaddr = dp;
- __assign_str(dp_name, ovs_dp_name(dp));
- __assign_str(dev_name, skb->dev->name);
+ __assign_str(dp_name);
+ __assign_str(dev_name);
__entry->skbaddr = skb;
__entry->len = skb->len;
__entry->data_len = skb->data_len;
@@ -113,8 +113,8 @@ TRACE_EVENT(ovs_dp_upcall,
TP_fast_assign(
__entry->dpaddr = dp;
- __assign_str(dp_name, ovs_dp_name(dp));
- __assign_str(dev_name, skb->dev->name);
+ __assign_str(dp_name);
+ __assign_str(dev_name);
__entry->skbaddr = skb;
__entry->len = skb->len;
__entry->data_len = skb->data_len;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 74c88a6baa43..125d310871e9 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -85,7 +85,6 @@ static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_stop = internal_dev_stop,
.ndo_start_xmit = internal_dev_xmit,
.ndo_set_mac_address = eth_mac_addr,
- .ndo_get_stats64 = dev_get_tstats64,
};
static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -103,19 +102,20 @@ static void do_setup(struct net_device *netdev)
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
IFF_NO_QUEUE;
+ netdev->lltx = true;
netdev->needs_free_netdev = true;
netdev->priv_destructor = NULL;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
netdev->rtnl_link_ops = &internal_dev_link_ops;
- netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
- NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
- NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL;
+ netdev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
+ NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE |
+ NETIF_F_GSO_ENCAP_ALL;
netdev->vlan_features = netdev->features;
netdev->hw_enc_features = netdev->features;
netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
- netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
+ netdev->hw_features = netdev->features;
eth_hw_addr_random(netdev);
}
@@ -140,11 +140,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
err = -ENOMEM;
goto error_free_vport;
}
- vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!vport->dev->tstats) {
- err = -ENOMEM;
- goto error_free_netdev;
- }
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
dev->ifindex = parms->desired_ifindex;
@@ -153,7 +149,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
/* Restrict bridge port to current netns. */
if (vport->port_no == OVSP_LOCAL)
- vport->dev->features |= NETIF_F_NETNS_LOCAL;
+ vport->dev->netns_immutable = true;
rtnl_lock();
err = register_netdevice(vport->dev);
@@ -169,8 +165,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
error_unlock:
rtnl_unlock();
- free_percpu(dev->tstats);
-error_free_netdev:
free_netdev(dev);
error_free_vport:
ovs_vport_free(vport);
@@ -186,7 +180,6 @@ static void internal_dev_destroy(struct vport *vport)
/* unregister_netdevice() waits for an RCU grace period. */
unregister_netdevice(vport->dev);
- free_percpu(vport->dev->tstats);
rtnl_unlock();
}
@@ -202,7 +195,6 @@ static int internal_dev_recv(struct sk_buff *skb)
skb_dst_drop(skb);
nf_reset_ct(skb);
- secpath_reset(skb);
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, netdev);
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 903537a5da22..91a11067e458 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -82,6 +82,13 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
err = -ENODEV;
goto error_free_vport;
}
+ /* Ensure that the device exists and that the provided
+ * name is not one of its aliases.
+ */
+ if (strcmp(name, ovs_vport_name(vport))) {
+ err = -ENODEV;
+ goto error_put;
+ }
netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL);
if (vport->dev->flags & IFF_LOOPBACK ||
(vport->dev->type != ARPHRD_ETHER &&
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 972ae01a70f7..8732f6e51ae5 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -500,6 +500,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
OVS_CB(skb)->input_vport = vport;
OVS_CB(skb)->mru = 0;
OVS_CB(skb)->cutlen = 0;
+ OVS_CB(skb)->probability = 0;
if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
u32 mark;
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 3e71ca8ad8a7..9f67b9dd49f9 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -97,6 +97,8 @@ struct vport {
* @desired_ifindex: New vport's ifindex.
* @dp: New vport's datapath.
* @port_no: New vport's port number.
+ * @upcall_portids: %OVS_VPORT_ATTR_UPCALL_PID attribute from Netlink message,
+ * %NULL if none was supplied.
*/
struct vport_parms {
const char *name;
@@ -125,6 +127,8 @@ struct vport_parms {
* have any configuration.
* @send: Send a packet on the device.
* zero for dropped packets or negative for error.
+ * @owner: Module that implements this vport type.
+ * @list: List entry in the global list of vport types.
*/
struct vport_ops {
enum ovs_vport_type type;
@@ -144,6 +148,7 @@ struct vport_ops {
/**
* struct vport_upcall_stats_percpu - per-cpu packet upcall statistics for
* a given vport.
+ * @syncp: Synchronization point for 64bit counters.
* @n_success: Number of packets that upcall to userspace succeed.
* @n_fail: Number of packets that upcall to userspace failed.
*/
@@ -164,6 +169,8 @@ void ovs_vport_free(struct vport *);
*
* @vport: vport to access
*
+ * Returns: A void pointer to a private data allocated in the @vport.
+ *
* If a nonzero size was passed in priv_size of vport_alloc() a private data
* area was allocated on creation. This allows that area to be accessed and
* used for any purpose needed by the vport implementer.
@@ -178,6 +185,8 @@ static inline void *vport_priv(const struct vport *vport)
*
* @priv: Start of private data area.
*
+ * Returns: A reference to a vport structure that contains @priv.
+ *
* It is sometimes useful to translate from a pointer to the private data
* area to the vport, such as in the case where the private data pointer is
* the result of a hash table lookup. @priv must point to the start of the