summaryrefslogtreecommitdiff
path: root/net/openvswitch
diff options
context:
space:
mode:
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/Kconfig4
-rw-r--r--net/openvswitch/actions.c324
-rw-r--r--net/openvswitch/conntrack.c254
-rw-r--r--net/openvswitch/datapath.c167
-rw-r--r--net/openvswitch/datapath.h79
-rw-r--r--net/openvswitch/dp_notify.c2
-rw-r--r--net/openvswitch/drop.h41
-rw-r--r--net/openvswitch/flow.c21
-rw-r--r--net/openvswitch/flow.h2
-rw-r--r--net/openvswitch/flow_netlink.c241
-rw-r--r--net/openvswitch/flow_netlink.h2
-rw-r--r--net/openvswitch/flow_table.c22
-rw-r--r--net/openvswitch/flow_table.h2
-rw-r--r--net/openvswitch/meter.c18
-rw-r--r--net/openvswitch/meter.h5
-rw-r--r--net/openvswitch/openvswitch_trace.h8
-rw-r--r--net/openvswitch/vport-internal_dev.c22
-rw-r--r--net/openvswitch/vport-netdev.c7
-rw-r--r--net/openvswitch/vport.c20
-rw-r--r--net/openvswitch/vport.h9
20 files changed, 663 insertions, 587 deletions
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 747d537a3f06..e6aaee92dba4 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -10,11 +10,13 @@ config OPENVSWITCH
(NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \
(!NF_NAT || NF_NAT) && \
(!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT)))
- select LIBCRC32C
+ depends on PSAMPLE || !PSAMPLE
select MPLS
+ select NET_CRC32C
select NET_MPLS_GSO
select DST_CACHE
select NET_NSH
+ select NF_CONNTRACK_OVS if NF_CONNTRACK
select NF_NAT_OVS if NF_NAT
help
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ca3ebfdb3023..792ca44a461d 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -17,71 +17,38 @@
#include <linux/if_vlan.h>
#include <net/dst.h>
+#include <net/gso.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/checksum.h>
#include <net/dsfield.h>
#include <net/mpls.h>
+
+#if IS_ENABLED(CONFIG_PSAMPLE)
+#include <net/psample.h>
+#endif
+
#include <net/sctp/checksum.h>
#include "datapath.h"
+#include "drop.h"
#include "flow.h"
#include "conntrack.h"
#include "vport.h"
#include "flow_netlink.h"
#include "openvswitch_trace.h"
-struct deferred_action {
- struct sk_buff *skb;
- const struct nlattr *actions;
- int actions_len;
-
- /* Store pkt_key clone when creating deferred action. */
- struct sw_flow_key pkt_key;
-};
-
-#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN)
-struct ovs_frag_data {
- unsigned long dst;
- struct vport *vport;
- struct ovs_skb_cb cb;
- __be16 inner_protocol;
- u16 network_offset; /* valid only for MPLS */
- u16 vlan_tci;
- __be16 vlan_proto;
- unsigned int l2_len;
- u8 mac_proto;
- u8 l2_data[MAX_L2_LEN];
-};
-
-static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
-
-#define DEFERRED_ACTION_FIFO_SIZE 10
-#define OVS_RECURSION_LIMIT 5
-#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
-struct action_fifo {
- int head;
- int tail;
- /* Deferred action fifo queue storage. */
- struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
-};
-
-struct action_flow_keys {
- struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
-};
-
-static struct action_fifo __percpu *action_fifos;
-static struct action_flow_keys __percpu *flow_keys;
-static DEFINE_PER_CPU(int, exec_actions_level);
+struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
* space. Return NULL if out of key spaces.
*/
static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
{
- struct action_flow_keys *keys = this_cpu_ptr(flow_keys);
- int level = this_cpu_read(exec_actions_level);
+ struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
+ struct action_flow_keys *keys = &ovs_pcpu->flow_keys;
+ int level = ovs_pcpu->exec_level;
struct sw_flow_key *key = NULL;
if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
@@ -125,10 +92,9 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
const struct nlattr *actions,
const int actions_len)
{
- struct action_fifo *fifo;
+ struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
struct deferred_action *da;
- fifo = this_cpu_ptr(action_fifos);
da = action_fifo_put(fifo);
if (da) {
da->skb = skb;
@@ -230,14 +196,18 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
const struct ovs_action_push_vlan *vlan)
{
+ int err;
+
if (skb_vlan_tag_present(skb)) {
invalidate_flow_key(key);
} else {
key->eth.vlan.tci = vlan->vlan_tci;
key->eth.vlan.tpid = vlan->vlan_tpid;
}
- return skb_vlan_push(skb, vlan->vlan_tpid,
- ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
+ err = skb_vlan_push(skb, vlan->vlan_tpid,
+ ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
+ skb_reset_mac_len(skb);
+ return err;
}
/* 'src' is already properly masked. */
@@ -309,11 +279,18 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
return 0;
}
-static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
- const struct nshhdr *nh)
+static noinline_for_stack int push_nsh(struct sk_buff *skb,
+ struct sw_flow_key *key,
+ const struct nlattr *a)
{
+ u8 buffer[NSH_HDR_MAX_LEN];
+ struct nshhdr *nh = (struct nshhdr *)buffer;
int err;
+ err = nsh_hdr_from_nlattr(a, nh, NSH_HDR_MAX_LEN);
+ if (err)
+ return err;
+
err = nsh_push(skb, nh);
if (err)
return err;
@@ -595,69 +572,6 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
return 0;
}
-static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
- const struct nlattr *a)
-{
- struct nshhdr *nh;
- size_t length;
- int err;
- u8 flags;
- u8 ttl;
- int i;
-
- struct ovs_key_nsh key;
- struct ovs_key_nsh mask;
-
- err = nsh_key_from_nlattr(a, &key, &mask);
- if (err)
- return err;
-
- /* Make sure the NSH base header is there */
- if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN))
- return -ENOMEM;
-
- nh = nsh_hdr(skb);
- length = nsh_hdr_len(nh);
-
- /* Make sure the whole NSH header is there */
- err = skb_ensure_writable(skb, skb_network_offset(skb) +
- length);
- if (unlikely(err))
- return err;
-
- nh = nsh_hdr(skb);
- skb_postpull_rcsum(skb, nh, length);
- flags = nsh_get_flags(nh);
- flags = OVS_MASKED(flags, key.base.flags, mask.base.flags);
- flow_key->nsh.base.flags = flags;
- ttl = nsh_get_ttl(nh);
- ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl);
- flow_key->nsh.base.ttl = ttl;
- nsh_set_flags_and_ttl(nh, flags, ttl);
- nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr,
- mask.base.path_hdr);
- flow_key->nsh.base.path_hdr = nh->path_hdr;
- switch (nh->mdtype) {
- case NSH_M_TYPE1:
- for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) {
- nh->md1.context[i] =
- OVS_MASKED(nh->md1.context[i], key.context[i],
- mask.context[i]);
- }
- memcpy(flow_key->nsh.context, nh->md1.context,
- sizeof(nh->md1.context));
- break;
- case NSH_M_TYPE2:
- memset(flow_key->nsh.context, 0,
- sizeof(flow_key->nsh.context));
- break;
- default:
- return -EINVAL;
- }
- skb_postpush_rcsum(skb, nh, length);
- return 0;
-}
-
/* Must follow skb_ensure_writable() since that can move the skb data. */
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
@@ -776,11 +690,11 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
static int ovs_vport_output(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
+ struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
struct vport *vport = data->vport;
if (skb_cow_head(skb, data->l2_len) < 0) {
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM);
return -ENOMEM;
}
@@ -828,7 +742,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
unsigned int hlen = skb_network_offset(skb);
struct ovs_frag_data *data;
- data = this_cpu_ptr(&ovs_frag_data_storage);
+ data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
data->dst = skb->_skb_refdst;
data->vport = vport;
data->cb = *OVS_CB(skb);
@@ -851,6 +765,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
struct sk_buff *skb, u16 mru,
struct sw_flow_key *key)
{
+ enum ovs_drop_reason reason;
u16 orig_network_offset = 0;
if (eth_p_mpls(skb->protocol)) {
@@ -860,6 +775,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
if (skb_network_offset(skb) > MAX_L2_LEN) {
OVS_NLERR(1, "L2 header too long to fragment");
+ reason = OVS_DROP_FRAG_L2_TOO_LONG;
goto err;
}
@@ -869,7 +785,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
prepare_frag(vport, skb, orig_network_offset,
ovs_key_mac_proto(key));
- dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
+ dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL,
DST_OBSOLETE_NONE, DST_NOCOUNT);
ovs_rt.dst.dev = vport->dev;
@@ -886,7 +802,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
prepare_frag(vport, skb, orig_network_offset,
ovs_key_mac_proto(key));
memset(&ovs_rt, 0, sizeof(ovs_rt));
- dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
+ dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL,
DST_OBSOLETE_NONE, DST_NOCOUNT);
ovs_rt.dst.dev = vport->dev;
@@ -900,12 +816,13 @@ static void ovs_fragment(struct net *net, struct vport *vport,
WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
ovs_vport_name(vport), ntohs(key->eth.type), mru,
vport->dev->mtu);
+ reason = OVS_DROP_FRAG_INVALID_PROTO;
goto err;
}
return;
err:
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, reason);
}
static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
@@ -913,7 +830,9 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
{
struct vport *vport = ovs_vport_rcu(dp, out_port);
- if (likely(vport)) {
+ if (likely(vport &&
+ netif_running(vport->dev) &&
+ netif_carrier_ok(vport->dev))) {
u16 mru = OVS_CB(skb)->mru;
u32 cutlen = OVS_CB(skb)->cutlen;
@@ -932,10 +851,10 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
ovs_fragment(net, vport, skb, mru, key);
} else {
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
}
} else {
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_DEV_READY);
}
}
@@ -952,16 +871,17 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
upcall.cmd = OVS_PACKET_CMD_ACTION;
upcall.mru = OVS_CB(skb)->mru;
- for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
- a = nla_next(a, &rem)) {
+ nla_for_each_nested(a, attr, rem) {
switch (nla_type(a)) {
case OVS_USERSPACE_ATTR_USERDATA:
upcall.userdata = a;
break;
case OVS_USERSPACE_ATTR_PID:
- if (dp->user_features &
- OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+ if (OVS_CB(skb)->upcall_pid)
+ upcall.portid = OVS_CB(skb)->upcall_pid;
+ else if (dp->user_features &
+ OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
upcall.portid =
ovs_dp_get_upcall_portid(dp,
smp_processor_id());
@@ -1009,7 +929,7 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
return clone_execute(dp, skb, key, 0, nla_data(actions),
nla_len(actions), true, false);
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_IP_TTL);
return 0;
}
@@ -1025,23 +945,33 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
struct nlattr *sample_arg;
int rem = nla_len(attr);
const struct sample_arg *arg;
+ u32 init_probability;
bool clone_flow_key;
+ int err;
/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
sample_arg = nla_data(attr);
arg = nla_data(sample_arg);
actions = nla_next(sample_arg, &rem);
+ init_probability = OVS_CB(skb)->probability;
if ((arg->probability != U32_MAX) &&
(!arg->probability || get_random_u32() > arg->probability)) {
if (last)
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
return 0;
}
+ OVS_CB(skb)->probability = arg->probability;
+
clone_flow_key = !arg->exec;
- return clone_execute(dp, skb, key, 0, actions, rem, last,
- clone_flow_key);
+ err = clone_execute(dp, skb, key, 0, actions, rem, last,
+ clone_flow_key);
+
+ if (!last)
+ OVS_CB(skb)->probability = init_probability;
+
+ return err;
}
/* When 'last' is true, clone() should always consume the 'skb'.
@@ -1072,8 +1002,16 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
struct ovs_action_hash *hash_act = nla_data(attr);
u32 hash = 0;
- /* OVS_HASH_ALG_L4 is the only possible hash algorithm. */
- hash = skb_get_hash(skb);
+ if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
+ /* OVS_HASH_ALG_L4 hasing type. */
+ hash = skb_get_hash(skb);
+ } else if (hash_act->hash_alg == OVS_HASH_ALG_SYM_L4) {
+ /* OVS_HASH_ALG_SYM_L4 hashing type. NOTE: this doesn't
+ * extend past an encapsulated header.
+ */
+ hash = __skb_get_hash_symmetric(skb);
+ }
+
hash = jhash_1word(hash, hash_act->hash_basis);
if (!hash)
hash = 0x1;
@@ -1129,10 +1067,6 @@ static int execute_masked_set_action(struct sk_buff *skb,
get_mask(a, struct ovs_key_ethernet *));
break;
- case OVS_KEY_ATTR_NSH:
- err = set_nsh(skb, flow_key, a);
- break;
-
case OVS_KEY_ATTR_IPV4:
err = set_ipv4(skb, flow_key, nla_data(a),
get_mask(a, struct ovs_key_ipv4 *));
@@ -1169,6 +1103,7 @@ static int execute_masked_set_action(struct sk_buff *skb,
case OVS_KEY_ATTR_CT_LABELS:
case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
+ case OVS_KEY_ATTR_NSH:
err = -EINVAL;
break;
}
@@ -1273,6 +1208,44 @@ static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
return 0;
}
+#if IS_ENABLED(CONFIG_PSAMPLE)
+static void execute_psample(struct datapath *dp, struct sk_buff *skb,
+ const struct nlattr *attr)
+{
+ struct psample_group psample_group = {};
+ struct psample_metadata md = {};
+ const struct nlattr *a;
+ u32 rate;
+ int rem;
+
+ nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
+ switch (nla_type(a)) {
+ case OVS_PSAMPLE_ATTR_GROUP:
+ psample_group.group_num = nla_get_u32(a);
+ break;
+
+ case OVS_PSAMPLE_ATTR_COOKIE:
+ md.user_cookie = nla_data(a);
+ md.user_cookie_len = nla_len(a);
+ break;
+ }
+ }
+
+ psample_group.net = ovs_dp_get_net(dp);
+ md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex;
+ md.trunc_size = skb->len - OVS_CB(skb)->cutlen;
+ md.rate_as_probability = 1;
+
+ rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX;
+
+ psample_sample_packet(&psample_group, skb, rate, &md);
+}
+#else
+static void execute_psample(struct datapath *dp, struct sk_buff *skb,
+ const struct nlattr *attr)
+{}
+#endif
+
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
@@ -1288,6 +1261,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
if (trace_ovs_do_execute_action_enabled())
trace_ovs_do_execute_action(dp, skb, key, a, rem);
+ /* Actions that rightfully have to consume the skb should do it
+ * and return directly.
+ */
switch (nla_type(a)) {
case OVS_ACTION_ATTR_OUTPUT: {
int port = nla_get_u32(a);
@@ -1323,6 +1299,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
output_userspace(dp, skb, key, a, attr,
len, OVS_CB(skb)->cutlen);
OVS_CB(skb)->cutlen = 0;
+ if (nla_is_last(a, rem)) {
+ consume_skb(skb);
+ return 0;
+ }
break;
case OVS_ACTION_ATTR_HASH:
@@ -1419,17 +1399,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = pop_eth(skb, key);
break;
- case OVS_ACTION_ATTR_PUSH_NSH: {
- u8 buffer[NSH_HDR_MAX_LEN];
- struct nshhdr *nh = (struct nshhdr *)buffer;
-
- err = nsh_hdr_from_nlattr(nla_data(a), nh,
- NSH_HDR_MAX_LEN);
- if (unlikely(err))
- break;
- err = push_nsh(skb, key, nh);
+ case OVS_ACTION_ATTR_PUSH_NSH:
+ err = push_nsh(skb, key, nla_data(a));
break;
- }
case OVS_ACTION_ATTR_POP_NSH:
err = pop_nsh(skb, key);
@@ -1437,7 +1409,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_METER:
if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_METER);
return 0;
}
break;
@@ -1468,15 +1440,33 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return dec_ttl_exception_handler(dp, skb,
key, a);
break;
+
+ case OVS_ACTION_ATTR_DROP: {
+ enum ovs_drop_reason reason = nla_get_u32(a)
+ ? OVS_DROP_EXPLICIT_WITH_ERROR
+ : OVS_DROP_EXPLICIT;
+
+ ovs_kfree_skb_reason(skb, reason);
+ return 0;
+ }
+
+ case OVS_ACTION_ATTR_PSAMPLE:
+ execute_psample(dp, skb, a);
+ OVS_CB(skb)->cutlen = 0;
+ if (nla_is_last(a, rem)) {
+ consume_skb(skb);
+ return 0;
+ }
+ break;
}
if (unlikely(err)) {
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_ACTION_ERROR);
return err;
}
}
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
return 0;
}
@@ -1510,16 +1500,15 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
clone = clone_flow_key ? clone_key(key) : key;
if (clone) {
int err = 0;
-
if (actions) { /* Sample action */
if (clone_flow_key)
- __this_cpu_inc(exec_actions_level);
+ __this_cpu_inc(ovs_pcpu_storage->exec_level);
err = do_execute_actions(dp, skb, clone,
actions, len);
if (clone_flow_key)
- __this_cpu_dec(exec_actions_level);
+ __this_cpu_dec(ovs_pcpu_storage->exec_level);
} else { /* Recirc action */
clone->recirc_id = recirc_id;
ovs_dp_process_packet(skb, clone);
@@ -1538,7 +1527,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
/* Out of per CPU action FIFO space. Drop the 'skb' and
* log an error.
*/
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_DEFERRED_LIMIT);
if (net_ratelimit()) {
if (actions) { /* Sample action */
@@ -1555,7 +1544,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
static void process_deferred_actions(struct datapath *dp)
{
- struct action_fifo *fifo = this_cpu_ptr(action_fifos);
+ struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
/* Do not touch the FIFO in case there is no deferred actions. */
if (action_fifo_is_empty(fifo))
@@ -1586,11 +1575,11 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
{
int err, level;
- level = __this_cpu_inc_return(exec_actions_level);
+ level = __this_cpu_inc_return(ovs_pcpu_storage->exec_level);
if (unlikely(level > OVS_RECURSION_LIMIT)) {
net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
ovs_dp_name(dp));
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_RECURSION_LIMIT);
err = -ENETDOWN;
goto out;
}
@@ -1603,27 +1592,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
process_deferred_actions(dp);
out:
- __this_cpu_dec(exec_actions_level);
+ __this_cpu_dec(ovs_pcpu_storage->exec_level);
return err;
}
-
-int action_fifos_init(void)
-{
- action_fifos = alloc_percpu(struct action_fifo);
- if (!action_fifos)
- return -ENOMEM;
-
- flow_keys = alloc_percpu(struct action_flow_keys);
- if (!flow_keys) {
- free_percpu(action_fifos);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void action_fifos_exit(void)
-{
- free_percpu(action_fifos);
- free_percpu(flow_keys);
-}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c8b137649ca4..a0811e1fba65 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -9,6 +9,7 @@
#include <linux/udp.h>
#include <linux/sctp.h>
#include <linux/static_key.h>
+#include <linux/string_helpers.h>
#include <net/ip.h>
#include <net/genetlink.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -28,6 +29,7 @@
#include <net/netfilter/nf_conntrack_act_ct.h>
#include "datapath.h"
+#include "drop.h"
#include "conntrack.h"
#include "flow.h"
#include "flow_netlink.h"
@@ -166,8 +168,13 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct)
static void ovs_ct_get_labels(const struct nf_conn *ct,
struct ovs_key_ct_labels *labels)
{
- struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
+ struct nf_conn_labels *cl = NULL;
+ if (ct) {
+ if (ct->master && !nf_ct_is_confirmed(ct))
+ ct = ct->master;
+ cl = nf_ct_labels_find(ct);
+ }
if (cl)
memcpy(labels, cl->bits, OVS_CT_LABELS_LEN);
else
@@ -434,96 +441,26 @@ static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key,
return 0;
}
-/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
- * value if 'skb' is freed.
- */
-static int handle_fragments(struct net *net, struct sw_flow_key *key,
- u16 zone, struct sk_buff *skb)
+static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key,
+ u16 zone, int family, struct sk_buff *skb)
{
struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
int err;
- if (key->eth.type == htons(ETH_P_IP)) {
- enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- err = ip_defrag(net, skb, user);
- if (err)
- return err;
-
- ovs_cb.mru = IPCB(skb)->frag_max_size;
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- } else if (key->eth.type == htons(ETH_P_IPV6)) {
- enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
- err = nf_ct_frag6_gather(net, skb, user);
- if (err) {
- if (err != -EINPROGRESS)
- kfree_skb(skb);
- return err;
- }
-
- key->ip.proto = ipv6_hdr(skb)->nexthdr;
- ovs_cb.mru = IP6CB(skb)->frag_max_size;
-#endif
- } else {
- kfree_skb(skb);
- return -EPFNOSUPPORT;
- }
+ err = nf_ct_handle_fragments(net, skb, zone, family, &key->ip.proto, &ovs_cb.mru);
+ if (err)
+ return err;
/* The key extracted from the fragment that completed this datagram
* likely didn't have an L4 header, so regenerate it.
*/
ovs_flow_key_update_l3l4(skb, key);
-
key->ip.frag = OVS_FRAG_TYPE_NONE;
- skb_clear_hash(skb);
- skb->ignore_df = 1;
*OVS_CB(skb) = ovs_cb;
return 0;
}
-static struct nf_conntrack_expect *
-ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
- u16 proto, const struct sk_buff *skb)
-{
- struct nf_conntrack_tuple tuple;
- struct nf_conntrack_expect *exp;
-
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
- return NULL;
-
- exp = __nf_ct_expect_find(net, zone, &tuple);
- if (exp) {
- struct nf_conntrack_tuple_hash *h;
-
- /* Delete existing conntrack entry, if it clashes with the
- * expectation. This can happen since conntrack ALGs do not
- * check for clashes between (new) expectations and existing
- * conntrack entries. nf_conntrack_in() will check the
- * expectations only if a conntrack entry can not be found,
- * which can lead to OVS finding the expectation (here) in the
- * init direction, but which will not be removed by the
- * nf_conntrack_in() call, if a matching conntrack entry is
- * found instead. In this case all init direction packets
- * would be reported as new related packets, while reply
- * direction packets would be reported as un-related
- * established packets.
- */
- h = nf_conntrack_find_get(net, zone, &tuple);
- if (h) {
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-
- nf_ct_delete(ct, 0, 0);
- nf_ct_put(ct);
- }
- }
-
- return exp;
-}
-
/* This replicates logic from nf_conntrack_core.c that is not exported. */
static enum ip_conntrack_info
ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
@@ -742,6 +679,8 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
action |= BIT(NF_NAT_MANIP_DST);
err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit);
+ if (err != NF_ACCEPT)
+ return err;
if (action & BIT(NF_NAT_MANIP_SRC))
ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC);
@@ -760,6 +699,22 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
}
#endif
+static int verdict_to_errno(unsigned int verdict)
+{
+ switch (verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ return 0;
+ case NF_DROP:
+ return -EINVAL;
+ case NF_STOLEN:
+ return -EINPROGRESS;
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
/* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
* not done already. Update key with new CT state after passing the packet
* through conntrack.
@@ -798,7 +753,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
err = nf_conntrack_in(skb, &state);
if (err != NF_ACCEPT)
- return -ENOENT;
+ return verdict_to_errno(err);
/* Clear CT state NAT flags to mark that we have not yet done
* NAT after the nf_conntrack_in() call. We can actually clear
@@ -825,9 +780,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
* the key->ct_state.
*/
if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) &&
- (nf_ct_is_confirmed(ct) || info->commit) &&
- ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) {
- return -EINVAL;
+ (nf_ct_is_confirmed(ct) || info->commit)) {
+ int err = ovs_ct_nat(net, key, info, skb, ct, ctinfo);
+
+ err = verdict_to_errno(err);
+ if (err)
+ return err;
}
/* Userspace may decide to perform a ct lookup without a helper
@@ -858,9 +816,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
* - When committing an unconfirmed connection.
*/
if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
- info->commit) &&
- nf_ct_helper(skb, ct, ctinfo, info->family) != NF_ACCEPT) {
- return -EINVAL;
+ info->commit)) {
+ int err = nf_ct_helper(skb, ct, ctinfo, info->family);
+
+ err = verdict_to_errno(err);
+ if (err)
+ return err;
}
if (nf_ct_protonum(ct) == IPPROTO_TCP &&
@@ -882,36 +843,16 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
struct sk_buff *skb)
{
- struct nf_conntrack_expect *exp;
-
- /* If we pass an expected packet through nf_conntrack_in() the
- * expectation is typically removed, but the packet could still be
- * lost in upcall processing. To prevent this from happening we
- * perform an explicit expectation lookup. Expected connections are
- * always new, and will be passed through conntrack only when they are
- * committed, as it is OK to remove the expectation at that time.
- */
- exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
- if (exp) {
- u8 state;
-
- /* NOTE: New connections are NATted and Helped only when
- * committed, so we are not calling into NAT here.
- */
- state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
- __ovs_ct_update_key(key, state, &info->zone, exp->master);
- } else {
- struct nf_conn *ct;
- int err;
+ struct nf_conn *ct;
+ int err;
- err = __ovs_ct_lookup(net, key, info, skb);
- if (err)
- return err;
+ err = __ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ return err;
- ct = (struct nf_conn *)skb_nfct(skb);
- if (ct)
- nf_ct_deliver_cached_events(ct);
- }
+ ct = (struct nf_conn *)skb_nfct(skb);
+ if (ct)
+ nf_ct_deliver_cached_events(ct);
return 0;
}
@@ -987,8 +928,8 @@ static u32 ct_limit_get(const struct ovs_ct_limit_info *info, u16 zone)
}
static int ovs_ct_check_limit(struct net *net,
- const struct ovs_conntrack_info *info,
- const struct nf_conntrack_tuple *tuple)
+ const struct sk_buff *skb,
+ const struct ovs_conntrack_info *info)
{
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
const struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
@@ -1001,8 +942,9 @@ static int ovs_ct_check_limit(struct net *net,
if (per_zone_limit == OVS_CT_LIMIT_UNLIMITED)
return 0;
- connections = nf_conncount_count(net, ct_limit_info->data,
- &conncount_key, tuple, &info->zone);
+ connections = nf_conncount_count_skb(net, skb, info->family,
+ ct_limit_info->data,
+ &conncount_key);
if (connections > per_zone_limit)
return -ENOMEM;
@@ -1031,8 +973,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
if (static_branch_unlikely(&ovs_ct_limit_enabled)) {
if (!nf_ct_is_confirmed(ct)) {
- err = ovs_ct_check_limit(net, info,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ err = ovs_ct_check_limit(net, skb, info);
if (err) {
net_warn_ratelimited("openvswitch: zone: %u "
"exceeds conntrack limit\n",
@@ -1073,7 +1014,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
if (err)
return err;
- nf_conn_act_ct_ext_add(ct);
+ nf_conn_act_ct_ext_add(skb, ct, ctinfo);
} else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
labels_nonzero(&info->labels.mask)) {
err = ovs_ct_set_labels(ct, key, &info->labels.value,
@@ -1084,40 +1025,9 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
/* This will take care of sending queued events even if the connection
* is already confirmed.
*/
- if (nf_conntrack_confirm(skb) != NF_ACCEPT)
- return -EINVAL;
+ err = nf_conntrack_confirm(skb);
- return 0;
-}
-
-/* Trim the skb to the length specified by the IP/IPv6 header,
- * removing any trailing lower-layer padding. This prepares the skb
- * for higher-layer processing that assumes skb->len excludes padding
- * (such as nf_ip_checksum). The caller needs to pull the skb to the
- * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
- */
-static int ovs_skb_network_trim(struct sk_buff *skb)
-{
- unsigned int len;
- int err;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- len = ntohs(ip_hdr(skb)->tot_len);
- break;
- case htons(ETH_P_IPV6):
- len = sizeof(struct ipv6hdr)
- + ntohs(ipv6_hdr(skb)->payload_len);
- break;
- default:
- len = skb->len;
- }
-
- err = pskb_trim_rcsum(skb, len);
- if (err)
- kfree_skb(skb);
-
- return err;
+ return verdict_to_errno(err);
}
/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
@@ -1134,12 +1044,15 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
nh_ofs = skb_network_offset(skb);
skb_pull_rcsum(skb, nh_ofs);
- err = ovs_skb_network_trim(skb);
- if (err)
+ err = nf_ct_skb_network_trim(skb, info->family);
+ if (err) {
+ kfree_skb(skb);
return err;
+ }
if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
- err = handle_fragments(net, key, info->zone.id, skb);
+ err = ovs_ct_handle_fragments(net, key, info->zone.id,
+ info->family, skb);
if (err)
return err;
}
@@ -1149,9 +1062,13 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
else
err = ovs_ct_lookup(net, key, info, skb);
+ /* conntrack core returned NF_STOLEN */
+ if (err == -EINPROGRESS)
+ return err;
+
skb_push_rcsum(skb, nh_ofs);
if (err)
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK);
return err;
}
@@ -1383,7 +1300,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
#endif
case OVS_CT_ATTR_HELPER:
*helper = nla_data(a);
- if (!memchr(*helper, '\0', nla_len(a))) {
+ if (!string_is_terminated(*helper, nla_len(a))) {
OVS_NLERR(log, "Invalid conntrack helper");
return -EINVAL;
}
@@ -1404,7 +1321,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
case OVS_CT_ATTR_TIMEOUT:
memcpy(info->timeout, nla_data(a), nla_len(a));
- if (!memchr(info->timeout, '\0', nla_len(a))) {
+ if (!string_is_terminated(info->timeout, nla_len(a))) {
OVS_NLERR(log, "Invalid conntrack timeout");
return -EINVAL;
}
@@ -1495,8 +1412,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (ct_info.timeout[0]) {
if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
ct_info.timeout))
- pr_info_ratelimited("Failed to associated timeout "
- "policy `%s'\n", ct_info.timeout);
+ OVS_NLERR(log,
+ "Failed to associated timeout policy '%s'",
+ ct_info.timeout);
else
ct_info.nf_ct_timeout = rcu_dereference(
nf_ct_timeout_find(ct_info.ct)->timeout);
@@ -1517,7 +1435,8 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (err)
goto err_free_ct;
- __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
+ if (ct_info.commit)
+ __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
return 0;
err_free_ct:
__ovs_ct_free_action(&ct_info);
@@ -1684,8 +1603,7 @@ static int ovs_ct_limit_init(struct net *net, struct ovs_net *ovs_net)
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&ovs_net->ct_limit_info->limits[i]);
- ovs_net->ct_limit_info->data =
- nf_conncount_init(net, NFPROTO_INET, sizeof(u32));
+ ovs_net->ct_limit_info->data = nf_conncount_init(net, sizeof(u32));
if (IS_ERR(ovs_net->ct_limit_info->data)) {
err = PTR_ERR(ovs_net->ct_limit_info->data);
@@ -1702,13 +1620,13 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net)
const struct ovs_ct_limit_info *info = ovs_net->ct_limit_info;
int i;
- nf_conncount_destroy(net, NFPROTO_INET, info->data);
+ nf_conncount_destroy(net, info->data);
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
struct hlist_head *head = &info->limits[i];
struct ovs_ct_limit *ct_limit;
+ struct hlist_node *next;
- hlist_for_each_entry_rcu(ct_limit, head, hlist_node,
- lockdep_ovsl_is_held())
+ hlist_for_each_entry_safe(ct_limit, next, head, hlist_node)
kfree_rcu(ct_limit, rcu);
}
kfree(info->limits);
@@ -1719,7 +1637,7 @@ static struct sk_buff *
ovs_ct_limit_cmd_reply_start(struct genl_info *info, u8 cmd,
struct ovs_header **ovs_reply_header)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sk_buff *skb;
skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -1852,8 +1770,8 @@ static int __ovs_ct_limit_get_zone_limit(struct net *net,
zone_limit.limit = limit;
nf_ct_zone_init(&ct_zone, zone_id, NF_CT_DEFAULT_ZONE_DIR, 0);
- zone_limit.count = nf_conncount_count(net, data, &conncount_key, NULL,
- &ct_zone);
+ zone_limit.count = nf_conncount_count_skb(net, NULL, 0, data,
+ &conncount_key);
return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit);
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a71795355aec..d5b6e2002bc1 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -15,7 +15,6 @@
#include <linux/delay.h>
#include <linux/time.h>
#include <linux/etherdevice.h>
-#include <linux/genetlink.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/mutex.h>
@@ -35,11 +34,13 @@
#include <linux/rculist.h>
#include <linux/dmi.h>
#include <net/genetlink.h>
+#include <net/gso.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/pkt_cls.h>
#include "datapath.h"
+#include "drop.h"
#include "flow.h"
#include "flow_table.h"
#include "flow_netlink.h"
@@ -236,9 +237,6 @@ void ovs_dp_detach_port(struct vport *p)
/* First drop references to device. */
hlist_del_rcu(&p->dp_hash_node);
- /* Free percpu memory */
- free_percpu(p->upcall_stats);
-
/* Then destroy it. */
ovs_vport_del(p);
}
@@ -246,11 +244,13 @@ void ovs_dp_detach_port(struct vport *p)
/* Must be called with rcu_read_lock. */
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
{
+ struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
const struct vport *p = OVS_CB(skb)->input_vport;
struct datapath *dp = p->dp;
struct sw_flow *flow;
struct sw_flow_actions *sf_acts;
struct dp_stats_percpu *stats;
+ bool ovs_pcpu_locked = false;
u64 *stats_counter;
u32 n_mask_hit;
u32 n_cache_hit;
@@ -267,7 +267,9 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
- if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+ if (OVS_CB(skb)->upcall_pid)
+ upcall.portid = OVS_CB(skb)->upcall_pid;
+ else if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
upcall.portid =
ovs_dp_get_upcall_portid(dp, smp_processor_id());
else
@@ -292,10 +294,26 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
ovs_flow_stats_update(flow, key->tp.flags, skb);
sf_acts = rcu_dereference(flow->sf_acts);
+ /* This path can be invoked recursively: Use the current task to
+ * identify recursive invocation - the lock must be acquired only once.
+ * Even with disabled bottom halves this can be preempted on PREEMPT_RT.
+ * Limit the locking to RT to avoid assigning `owner' if it can be
+ * avoided.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) {
+ local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
+ ovs_pcpu->owner = current;
+ ovs_pcpu_locked = true;
+ }
+
error = ovs_execute_actions(dp, skb, sf_acts, key);
if (unlikely(error))
net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
ovs_dp_name(dp), error);
+ if (ovs_pcpu_locked) {
+ ovs_pcpu->owner = NULL;
+ local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
+ }
stats_counter = &stats->n_hit;
@@ -591,7 +609,7 @@ out:
static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct sw_flow_actions *acts;
@@ -635,6 +653,9 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
!!(hash & OVS_PACKET_HASH_L4_BIT));
}
+ OVS_CB(packet)->upcall_pid =
+ nla_get_u32_default(a[OVS_PACKET_ATTR_UPCALL_PID], 0);
+
/* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc();
err = PTR_ERR(flow);
@@ -673,7 +694,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
sf_acts = rcu_dereference(flow->sf_acts);
local_bh_disable();
+ local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ this_cpu_write(ovs_pcpu_storage->owner, current);
err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ this_cpu_write(ovs_pcpu_storage->owner, NULL);
+ local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
local_bh_enable();
rcu_read_unlock();
@@ -697,6 +724,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
[OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
+ [OVS_PACKET_ATTR_UPCALL_PID] = { .type = NLA_U32 },
};
static const struct genl_small_ops dp_packet_genl_ops[] = {
@@ -968,7 +996,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sw_flow *flow = NULL, *new_flow;
struct sw_flow_mask mask;
struct sk_buff *reply;
@@ -1004,14 +1032,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
key = kzalloc(sizeof(*key), GFP_KERNEL);
if (!key) {
error = -ENOMEM;
- goto err_kfree_key;
+ goto err_kfree_flow;
}
ovs_match_init(&match, key, false, &mask);
error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
- goto err_kfree_flow;
+ goto err_kfree_key;
ovs_flow_mask_key(&new_flow->key, key, true, &mask);
@@ -1019,14 +1047,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
key, log);
if (error)
- goto err_kfree_flow;
+ goto err_kfree_key;
/* Validate actions. */
error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
&new_flow->key, &acts, log);
if (error) {
OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
- goto err_kfree_flow;
+ goto err_kfree_key;
}
reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
@@ -1126,10 +1154,10 @@ err_unlock_ovs:
kfree_skb(reply);
err_kfree_acts:
ovs_nla_free_flow_actions(acts);
-err_kfree_flow:
- ovs_flow_free(new_flow, false);
err_kfree_key:
kfree(key);
+err_kfree_flow:
+ ovs_flow_free(new_flow, false);
error:
return error;
}
@@ -1215,7 +1243,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sw_flow_key key;
struct sw_flow *flow;
struct sk_buff *reply = NULL;
@@ -1316,7 +1344,7 @@ error:
static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct net *net = sock_net(skb->sk);
struct sw_flow_key key;
struct sk_buff *reply;
@@ -1375,7 +1403,7 @@ unlock:
static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct net *net = sock_net(skb->sk);
struct sw_flow_key key;
struct sk_buff *reply;
@@ -1643,7 +1671,7 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb,
{
struct datapath *dp;
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr,
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
info->attrs);
if (IS_ERR(dp))
return;
@@ -1830,8 +1858,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.dp = dp;
parms.port_no = OVSP_LOCAL;
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
- parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX]
- ? nla_get_u32(a[OVS_DP_ATTR_IFINDEX]) : 0;
+ parms.desired_ifindex = nla_get_s32_default(a[OVS_DP_ATTR_IFINDEX], 0);
/* So far only local changes have been made, now need the lock. */
ovs_lock();
@@ -1858,12 +1885,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_destroy_portids;
}
- vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
- if (!vport->upcall_stats) {
- err = -ENOMEM;
- goto err_destroy_vport;
- }
-
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_DP_CMD_NEW);
BUG_ON(err < 0);
@@ -1876,8 +1897,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
-err_destroy_vport:
- ovs_dp_detach_port(vport);
err_destroy_portids:
kfree(rcu_dereference_raw(dp->upcall_portids));
err_unlock_and_destroy_meters:
@@ -1944,7 +1963,8 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+ info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
goto err_unlock_free;
@@ -1977,7 +1997,8 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+ info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
goto err_unlock_free;
@@ -2012,7 +2033,8 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+ info->attrs);
if (IS_ERR(dp)) {
err = PTR_ERR(dp);
goto err_unlock_free;
@@ -2059,7 +2081,7 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
[OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0,
PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
- [OVS_DP_ATTR_IFINDEX] = {.type = NLA_U32 },
+ [OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
};
static const struct genl_small_ops dp_datapath_genl_ops[] = {
@@ -2109,6 +2131,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
{
struct ovs_header *ovs_header;
struct ovs_vport_stats vport_stats;
+ struct net *net_vport;
int err;
ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
@@ -2125,12 +2148,15 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
goto nla_put_failure;
- if (!net_eq(net, dev_net(vport->dev))) {
- int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
+ rcu_read_lock();
+ net_vport = dev_net_rcu(vport->dev);
+ if (!net_eq(net, net_vport)) {
+ int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC);
if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
- goto nla_put_failure;
+ goto nla_put_failure_unlock;
}
+ rcu_read_unlock();
ovs_vport_get_stats(vport, &vport_stats);
if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
@@ -2151,6 +2177,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
genlmsg_end(skb, ovs_header);
return 0;
+nla_put_failure_unlock:
+ rcu_read_unlock();
nla_put_failure:
err = -EMSGSIZE;
error:
@@ -2255,7 +2283,7 @@ static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct vport_parms parms;
struct sk_buff *reply;
struct vport *vport;
@@ -2273,8 +2301,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL)
return -EOPNOTSUPP;
- port_no = a[OVS_VPORT_ATTR_PORT_NO]
- ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
+ port_no = nla_get_u32_default(a[OVS_VPORT_ATTR_PORT_NO], 0);
if (port_no >= DP_MAX_PORTS)
return -EFBIG;
@@ -2311,8 +2338,8 @@ restart:
parms.dp = dp;
parms.port_no = port_no;
parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
- parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX]
- ? nla_get_u32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
+ parms.desired_ifindex = nla_get_s32_default(a[OVS_VPORT_ATTR_IFINDEX],
+ 0);
vport = new_vport(&parms);
err = PTR_ERR(vport);
@@ -2322,12 +2349,6 @@ restart:
goto exit_unlock_free;
}
- vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
- if (!vport->upcall_stats) {
- err = -ENOMEM;
- goto exit_unlock_free_vport;
- }
-
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
OVS_VPORT_CMD_NEW, GFP_KERNEL);
@@ -2345,8 +2366,6 @@ restart:
ovs_notify(&dp_vport_genl_family, reply, info);
return 0;
-exit_unlock_free_vport:
- ovs_dp_detach_port(vport);
exit_unlock_free:
ovs_unlock();
kfree_skb(reply);
@@ -2365,7 +2384,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
+ vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock_free;
@@ -2421,7 +2440,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
+ vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock_free;
@@ -2464,7 +2483,7 @@ exit_unlock_free:
static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sk_buff *reply;
struct vport *vport;
int err;
@@ -2557,7 +2576,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
- [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
[OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
};
@@ -2720,6 +2739,39 @@ static struct pernet_operations ovs_net_ops = {
.size = sizeof(struct ovs_net),
};
+static const char * const ovs_drop_reasons[] = {
+#define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x),
+ OVS_DROP_REASONS(S)
+#undef S
+};
+
+static struct drop_reason_list drop_reason_list_ovs = {
+ .reasons = ovs_drop_reasons,
+ .n_reasons = ARRAY_SIZE(ovs_drop_reasons),
+};
+
+static int __init ovs_alloc_percpu_storage(void)
+{
+ unsigned int cpu;
+
+ ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage);
+ if (!ovs_pcpu_storage)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ struct ovs_pcpu_storage *ovs_pcpu;
+
+ ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu);
+ local_lock_init(&ovs_pcpu->bh_lock);
+ }
+ return 0;
+}
+
+static void ovs_free_percpu_storage(void)
+{
+ free_percpu(ovs_pcpu_storage);
+}
+
static int __init dp_init(void)
{
int err;
@@ -2729,13 +2781,13 @@ static int __init dp_init(void)
pr_info("Open vSwitch switching datapath\n");
- err = action_fifos_init();
+ err = ovs_alloc_percpu_storage();
if (err)
goto error;
err = ovs_internal_dev_rtnl_link_register();
if (err)
- goto error_action_fifos_exit;
+ goto error;
err = ovs_flow_init();
if (err)
@@ -2761,6 +2813,9 @@ static int __init dp_init(void)
if (err < 0)
goto error_unreg_netdev;
+ drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH,
+ &drop_reason_list_ovs);
+
return 0;
error_unreg_netdev:
@@ -2775,9 +2830,8 @@ error_flow_exit:
ovs_flow_exit();
error_unreg_rtnl_link:
ovs_internal_dev_rtnl_link_unregister();
-error_action_fifos_exit:
- action_fifos_exit();
error:
+ ovs_free_percpu_storage();
return err;
}
@@ -2787,11 +2841,12 @@ static void dp_cleanup(void)
ovs_netdev_exit();
unregister_netdevice_notifier(&ovs_dp_device_notifier);
unregister_pernet_device(&ovs_net_ops);
+ drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH);
rcu_barrier();
ovs_vport_exit();
ovs_flow_exit();
ovs_internal_dev_rtnl_link_unregister();
- action_fifos_exit();
+ ovs_free_percpu_storage();
}
module_init(dp_init);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 0cd29971a907..db0c3e69d66c 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -13,6 +13,7 @@
#include <linux/skbuff.h>
#include <linux/u64_stats_sync.h>
#include <net/ip_tunnels.h>
+#include <net/mpls.h>
#include "conntrack.h"
#include "flow.h"
@@ -29,8 +30,8 @@
* datapath.
* @n_hit: Number of received packets for which a matching flow was found in
* the flow table.
- * @n_miss: Number of received packets that had no matching flow in the flow
- * table. The sum of @n_hit and @n_miss is the number of packets that have
+ * @n_missed: Number of received packets that had no matching flow in the flow
+ * table. The sum of @n_hit and @n_missed is the number of packets that have
* been received by the datapath.
* @n_lost: Number of received packets that had no matching flow in the flow
* table that could not be sent to userspace (normally due to an overflow in
@@ -40,6 +41,7 @@
* up per packet.
* @n_cache_hit: The number of received packets that had their mask found using
* the mask cache.
+ * @syncp: Synchronization point for 64bit counters.
*/
struct dp_stats_percpu {
u64 n_hit;
@@ -74,8 +76,10 @@ struct dp_nlsk_pids {
* ovs_mutex and RCU.
* @stats_percpu: Per-CPU datapath statistics.
* @net: Reference to net namespace.
- * @max_headroom: the maximum headroom of all vports in this datapath; it will
+ * @user_features: Bitmap of enabled %OVS_DP_F_* features.
+ * @max_headroom: The maximum headroom of all vports in this datapath; it will
* be used by all the internal vports in this dp.
+ * @meter_tbl: Meter table.
* @upcall_portids: RCU protected 'struct dp_nlsk_pids'.
*
* Context: See the comment on locking at the top of datapath.c for additional
@@ -115,20 +119,29 @@ struct datapath {
* fragmented.
* @acts_origlen: The netlink size of the flow actions applied to this skb.
* @cutlen: The number of bytes from the packet end to be removed.
+ * @probability: The sampling probability that was applied to this skb; 0 means
+ * no sampling has occurred; U32_MAX means 100% probability.
+ * @upcall_pid: Netlink socket PID to use for sending this packet to userspace;
+ * 0 means "not set" and default per-CPU or per-vport dispatch should be used.
*/
struct ovs_skb_cb {
struct vport *input_vport;
u16 mru;
u16 acts_origlen;
u32 cutlen;
+ u32 probability;
+ u32 upcall_pid;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
/**
- * struct dp_upcall - metadata to include with a packet to send to userspace
+ * struct dp_upcall_info - metadata to include with a packet sent to userspace
* @cmd: One of %OVS_PACKET_CMD_*.
* @userdata: If nonnull, its variable-length value is passed to userspace as
* %OVS_PACKET_ATTR_USERDATA.
+ * @actions: If nonnull, its variable-length value is passed to userspace as
+ * %OVS_PACKET_ATTR_ACTIONS.
+ * @actions_len: The length of the @actions.
* @portid: Netlink portid to which packet should be sent. If @portid is 0
* then no packet is sent and the packet is accounted in the datapath's @n_lost
* counter.
@@ -149,6 +162,10 @@ struct dp_upcall_info {
* struct ovs_net - Per net-namespace data for ovs.
* @dps: List of datapaths to enable dumping them all out.
* Protected by genl_mutex.
+ * @dp_notify_work: A work notifier to handle port unregistering.
+ * @masks_rebalance: A work to periodically optimize flow table caches.
+ * @ct_limit_info: A hash table of conntrack zone connection limits.
+ * @xt_label: Whether connlables are configured for the network or not.
*/
struct ovs_net {
struct list_head dps;
@@ -157,11 +174,58 @@ struct ovs_net {
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_ct_limit_info *ct_limit_info;
#endif
-
- /* Module reference for configuring conntrack. */
bool xt_label;
};
+#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN)
+struct ovs_frag_data {
+ unsigned long dst;
+ struct vport *vport;
+ struct ovs_skb_cb cb;
+ __be16 inner_protocol;
+ u16 network_offset; /* valid only for MPLS */
+ u16 vlan_tci;
+ __be16 vlan_proto;
+ unsigned int l2_len;
+ u8 mac_proto;
+ u8 l2_data[MAX_L2_LEN];
+};
+
+struct deferred_action {
+ struct sk_buff *skb;
+ const struct nlattr *actions;
+ int actions_len;
+
+ /* Store pkt_key clone when creating deferred action. */
+ struct sw_flow_key pkt_key;
+};
+
+#define DEFERRED_ACTION_FIFO_SIZE 10
+#define OVS_RECURSION_LIMIT 5
+#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
+
+struct action_fifo {
+ int head;
+ int tail;
+ /* Deferred action fifo queue storage. */
+ struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
+};
+
+struct action_flow_keys {
+ struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
+};
+
+struct ovs_pcpu_storage {
+ struct action_fifo action_fifos;
+ struct action_flow_keys flow_keys;
+ struct ovs_frag_data frag_data;
+ int exec_level;
+ struct task_struct *owner;
+ local_lock_t bh_lock;
+};
+
+extern struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
+
/**
* enum ovs_pkt_hash_types - hash info to include with a packet
* to send to userspace.
@@ -270,9 +334,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
void ovs_dp_notify_wq(struct work_struct *work);
-int action_fifos_init(void);
-void action_fifos_exit(void);
-
/* 'KEY' must not have any bits set outside of the 'MASK' */
#define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
#define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK))
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 7af0cde8b293..a2af90ee99af 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -75,7 +75,7 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event,
/* schedule vport destroy, dev_put and genl notification */
ovs_net = net_generic(dev_net(dev), ovs_net_id);
- queue_work(system_wq, &ovs_net->dp_notify_work);
+ queue_work(system_percpu_wq, &ovs_net->dp_notify_work);
}
return NOTIFY_DONE;
diff --git a/net/openvswitch/drop.h b/net/openvswitch/drop.h
new file mode 100644
index 000000000000..cedf9b7b5796
--- /dev/null
+++ b/net/openvswitch/drop.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * OpenvSwitch drop reason list.
+ */
+
+#ifndef OPENVSWITCH_DROP_H
+#define OPENVSWITCH_DROP_H
+#include <linux/skbuff.h>
+#include <net/dropreason.h>
+
+#define OVS_DROP_REASONS(R) \
+ R(OVS_DROP_LAST_ACTION) \
+ R(OVS_DROP_ACTION_ERROR) \
+ R(OVS_DROP_EXPLICIT) \
+ R(OVS_DROP_EXPLICIT_WITH_ERROR) \
+ R(OVS_DROP_METER) \
+ R(OVS_DROP_RECURSION_LIMIT) \
+ R(OVS_DROP_DEFERRED_LIMIT) \
+ R(OVS_DROP_FRAG_L2_TOO_LONG) \
+ R(OVS_DROP_FRAG_INVALID_PROTO) \
+ R(OVS_DROP_CONNTRACK) \
+ R(OVS_DROP_IP_TTL) \
+ /* deliberate comment for trailing \ */
+
+enum ovs_drop_reason {
+ __OVS_DROP_REASON = SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
+ SKB_DROP_REASON_SUBSYS_SHIFT,
+#define ENUM(x) x,
+ OVS_DROP_REASONS(ENUM)
+#undef ENUM
+
+ OVS_DROP_MAX,
+};
+
+static inline void
+ovs_kfree_skb_reason(struct sk_buff *skb, enum ovs_drop_reason reason)
+{
+ kfree_skb_reason(skb, (u32)reason);
+}
+
+#endif /* OPENVSWITCH_DROP_H */
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index e20d1a973417..66366982f604 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -107,7 +107,8 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
rcu_assign_pointer(flow->stats[cpu],
new_stats);
- cpumask_set_cpu(cpu, &flow->cpu_used_mask);
+ cpumask_set_cpu(cpu,
+ flow->cpu_used_mask);
goto unlock;
}
}
@@ -128,14 +129,13 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
struct ovs_flow_stats *ovs_stats,
unsigned long *used, __be16 *tcp_flags)
{
- int cpu;
+ unsigned int cpu;
*used = 0;
*tcp_flags = 0;
memset(ovs_stats, 0, sizeof(*ovs_stats));
- /* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
+ for_each_cpu(cpu, flow->cpu_used_mask) {
struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
if (stats) {
@@ -156,10 +156,9 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
/* Called with ovs_mutex. */
void ovs_flow_stats_clear(struct sw_flow *flow)
{
- int cpu;
+ unsigned int cpu;
- /* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
+ for_each_cpu(cpu, flow->cpu_used_mask) {
struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
if (stats) {
@@ -558,7 +557,6 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
*/
key->tp.src = htons(icmp->icmp6_type);
key->tp.dst = htons(icmp->icmp6_code);
- memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
if (icmp->icmp6_code == 0 &&
(icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -567,6 +565,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
struct nd_msg *nd;
int offset;
+ memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
+
/* In order to process neighbor discovery options, we need the
* entire packet.
*/
@@ -784,7 +784,7 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
memset(&key->ipv4, 0, sizeof(key->ipv4));
}
} else if (eth_p_mpls(key->eth.type)) {
- u8 label_count = 1;
+ size_t label_count = 1;
memset(&key->mpls, 0, sizeof(key->mpls));
skb_set_inner_network_header(skb, skb->mac_len);
@@ -1038,7 +1038,8 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
if (tc_skb_ext_tc_enabled()) {
tc_ext = skb_ext_find(skb, TC_SKB_EXT);
- key->recirc_id = tc_ext ? tc_ext->chain : 0;
+ key->recirc_id = tc_ext && !tc_ext->act_miss ?
+ tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
post_ct = tc_ext ? tc_ext->post_ct : false;
post_ct_snat = post_ct ? tc_ext->post_ct_snat : false;
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 073ab73ffeaa..b5711aff6e76 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -229,7 +229,7 @@ struct sw_flow {
*/
struct sw_flow_key key;
struct sw_flow_id id;
- struct cpumask cpu_used_mask;
+ struct cpumask *cpu_used_mask;
struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
struct sw_flow_stats __rcu *stats[]; /* One for each CPU. First one
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index ead5418c126e..1cb4f97335d8 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -38,6 +38,7 @@
#include <net/tun_proto.h>
#include <net/erspan.h>
+#include "drop.h"
#include "flow_netlink.h"
struct ovs_len_tbl {
@@ -47,6 +48,7 @@ struct ovs_len_tbl {
#define OVS_ATTR_NESTED -1
#define OVS_ATTR_VARIABLE -2
+#define OVS_COPY_ACTIONS_MAX_DEPTH 16
static bool actions_may_change_flow(const struct nlattr *actions)
{
@@ -61,6 +63,8 @@ static bool actions_may_change_flow(const struct nlattr *actions)
case OVS_ACTION_ATTR_RECIRC:
case OVS_ACTION_ATTR_TRUNC:
case OVS_ACTION_ATTR_USERSPACE:
+ case OVS_ACTION_ATTR_DROP:
+ case OVS_ACTION_ATTR_PSAMPLE:
break;
case OVS_ACTION_ATTR_CT:
@@ -149,6 +153,13 @@ static void update_range(struct sw_flow_match *match,
sizeof((match)->key->field)); \
} while (0)
+#define SW_FLOW_KEY_BITMAP_COPY(match, field, value_p, nbits, is_mask) ({ \
+ update_range(match, offsetof(struct sw_flow_key, field), \
+ bitmap_size(nbits), is_mask); \
+ bitmap_copy(is_mask ? (match)->mask->key.field : (match)->key->field, \
+ value_p, nbits); \
+})
+
static bool match_validate(const struct sw_flow_match *match,
u64 key_attrs, u64 mask_attrs, bool log)
{
@@ -667,8 +678,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
bool log)
{
bool ttl = false, ipv4 = false, ipv6 = false;
+ IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { };
bool info_bridge_mode = false;
- __be16 tun_flags = 0;
int opts_type = 0;
struct nlattr *a;
int rem;
@@ -694,7 +705,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
case OVS_TUNNEL_KEY_ATTR_ID:
SW_FLOW_KEY_PUT(match, tun_key.tun_id,
nla_get_be64(a), is_mask);
- tun_flags |= TUNNEL_KEY;
+ __set_bit(IP_TUNNEL_KEY_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
@@ -726,10 +737,10 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
ttl = true;
break;
case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
- tun_flags |= TUNNEL_DONT_FRAGMENT;
+ __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_CSUM:
- tun_flags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_TP_SRC:
SW_FLOW_KEY_PUT(match, tun_key.tp_src,
@@ -740,7 +751,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
nla_get_be16(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_OAM:
- tun_flags |= TUNNEL_OAM;
+ __set_bit(IP_TUNNEL_OAM_BIT, tun_flags);
break;
case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
if (opts_type) {
@@ -752,7 +763,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
if (err)
return err;
- tun_flags |= TUNNEL_GENEVE_OPT;
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_flags);
opts_type = type;
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
@@ -765,7 +776,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
if (err)
return err;
- tun_flags |= TUNNEL_VXLAN_OPT;
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_flags);
opts_type = type;
break;
case OVS_TUNNEL_KEY_ATTR_PAD:
@@ -781,7 +792,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
if (err)
return err;
- tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_flags);
opts_type = type;
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE:
@@ -795,7 +806,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
}
}
- SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
+ SW_FLOW_KEY_BITMAP_COPY(match, tun_key.tun_flags, tun_flags,
+ __IP_TUNNEL_FLAG_NUM, is_mask);
if (is_mask)
SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
else
@@ -820,13 +832,15 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
}
if (ipv4) {
if (info_bridge_mode) {
+ __clear_bit(IP_TUNNEL_KEY_BIT, tun_flags);
+
if (match->key->tun_key.u.ipv4.src ||
match->key->tun_key.u.ipv4.dst ||
match->key->tun_key.tp_src ||
match->key->tun_key.tp_dst ||
match->key->tun_key.ttl ||
match->key->tun_key.tos ||
- tun_flags & ~TUNNEL_KEY) {
+ !ip_tunnel_flags_empty(tun_flags)) {
OVS_NLERR(log, "IPv4 tun info is not correct");
return -EINVAL;
}
@@ -871,7 +885,7 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
const void *tun_opts, int swkey_tun_opts_len,
unsigned short tun_proto, u8 mode)
{
- if (output->tun_flags & TUNNEL_KEY &&
+ if (test_bit(IP_TUNNEL_KEY_BIT, output->tun_flags) &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
OVS_TUNNEL_KEY_ATTR_PAD))
return -EMSGSIZE;
@@ -907,10 +921,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
return -EMSGSIZE;
if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, output->tun_flags) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_CSUM) &&
+ if (test_bit(IP_TUNNEL_CSUM_BIT, output->tun_flags) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
return -EMSGSIZE;
if (output->tp_src &&
@@ -919,18 +933,20 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
if (output->tp_dst &&
nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_OAM) &&
+ if (test_bit(IP_TUNNEL_OAM_BIT, output->tun_flags) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
if (swkey_tun_opts_len) {
- if (output->tun_flags & TUNNEL_GENEVE_OPT &&
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, output->tun_flags) &&
nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
swkey_tun_opts_len, tun_opts))
return -EMSGSIZE;
- else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
+ else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT,
+ output->tun_flags) &&
vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
return -EMSGSIZE;
- else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
+ else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ output->tun_flags) &&
nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
swkey_tun_opts_len, tun_opts))
return -EMSGSIZE;
@@ -1289,6 +1305,11 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
return 0;
}
+/*
+ * Constructs NSH header 'nh' from attributes of OVS_ACTION_ATTR_PUSH_NSH,
+ * where 'nh' points to a memory block of 'size' bytes. It's assumed that
+ * attributes were previously validated with validate_push_nsh().
+ */
int nsh_hdr_from_nlattr(const struct nlattr *attr,
struct nshhdr *nh, size_t size)
{
@@ -1298,8 +1319,6 @@ int nsh_hdr_from_nlattr(const struct nlattr *attr,
u8 ttl = 0;
int mdlen = 0;
- /* validate_nsh has check this, so we needn't do duplicate check here
- */
if (size < NSH_BASE_HDR_LEN)
return -ENOBUFS;
@@ -1343,46 +1362,6 @@ int nsh_hdr_from_nlattr(const struct nlattr *attr,
return 0;
}
-int nsh_key_from_nlattr(const struct nlattr *attr,
- struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
-{
- struct nlattr *a;
- int rem;
-
- /* validate_nsh has check this, so we needn't do duplicate check here
- */
- nla_for_each_nested(a, attr, rem) {
- int type = nla_type(a);
-
- switch (type) {
- case OVS_NSH_KEY_ATTR_BASE: {
- const struct ovs_nsh_key_base *base = nla_data(a);
- const struct ovs_nsh_key_base *base_mask = base + 1;
-
- nsh->base = *base;
- nsh_mask->base = *base_mask;
- break;
- }
- case OVS_NSH_KEY_ATTR_MD1: {
- const struct ovs_nsh_key_md1 *md1 = nla_data(a);
- const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
-
- memcpy(nsh->context, md1->context, sizeof(*md1));
- memcpy(nsh_mask->context, md1_mask->context,
- sizeof(*md1_mask));
- break;
- }
- case OVS_NSH_KEY_ATTR_MD2:
- /* Not supported yet */
- return -ENOTSUPP;
- default:
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
static int nsh_key_put_from_nlattr(const struct nlattr *attr,
struct sw_flow_match *match, bool is_mask,
bool is_push_nsh, bool log)
@@ -1922,7 +1901,7 @@ int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
{
- return attr ? nla_get_u32(attr) : 0;
+ return nla_get_u32_default(attr, 0);
}
/**
@@ -2026,7 +2005,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if ((swkey->tun_proto || is_mask)) {
const void *opts = NULL;
- if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
+ if (ip_tunnel_is_options_present(output->tun_key.tun_flags))
opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
@@ -2301,14 +2280,10 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
OVS_FLOW_ATTR_MASK, true, skb);
}
-#define MAX_ACTIONS_BUFSIZE (32 * 1024)
-
static struct sw_flow_actions *nla_alloc_flow_actions(int size)
{
struct sw_flow_actions *sfa;
- WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
-
sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL);
if (!sfa)
return ERR_PTR(-ENOMEM);
@@ -2394,7 +2369,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
/* Whenever new actions are added, the need to update this
* function should be considered.
*/
- BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
+ BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 25);
if (!actions)
return;
@@ -2464,18 +2439,9 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
- if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
- if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) {
- OVS_NLERR(log, "Flow action size exceeds max %u",
- MAX_ACTIONS_BUFSIZE);
- return ERR_PTR(-EMSGSIZE);
- }
- new_acts_size = MAX_ACTIONS_BUFSIZE;
- }
-
acts = nla_alloc_flow_actions(new_acts_size);
if (IS_ERR(acts))
- return (void *)acts;
+ return ERR_CAST(acts);
memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
acts->actions_len = (*sfa)->actions_len;
@@ -2543,13 +2509,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log);
+ u32 mpls_label_count, bool log,
+ u32 depth);
static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log, bool last)
+ u32 mpls_label_count, bool log, bool last,
+ u32 depth)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
@@ -2600,7 +2568,8 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
return err;
err = __ovs_nla_copy_actions(net, actions, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -2615,7 +2584,8 @@ static int validate_and_copy_dec_ttl(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log)
+ u32 mpls_label_count, bool log,
+ u32 depth)
{
const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1];
int start, action_start, err, rem;
@@ -2658,7 +2628,8 @@ static int validate_and_copy_dec_ttl(struct net *net,
return action_start;
err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type,
- vlan_tci, mpls_label_count, log);
+ vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -2672,7 +2643,8 @@ static int validate_and_copy_clone(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log, bool last)
+ u32 mpls_label_count, bool log, bool last,
+ u32 depth)
{
int start, err;
u32 exec;
@@ -2692,7 +2664,8 @@ static int validate_and_copy_clone(struct net *net,
return err;
err = __ovs_nla_copy_actions(net, attr, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -2742,7 +2715,8 @@ static int validate_geneve_opts(struct sw_flow_key *key)
opts_len -= len;
}
- key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+ if (crit_opt)
+ __set_bit(IP_TUNNEL_CRIT_OPT_BIT, key->tun_key.tun_flags);
return 0;
}
@@ -2750,6 +2724,7 @@ static int validate_geneve_opts(struct sw_flow_key *key)
static int validate_and_copy_set_tun(const struct nlattr *attr,
struct sw_flow_actions **sfa, bool log)
{
+ IP_TUNNEL_DECLARE_FLAGS(dst_opt_type) = { };
struct sw_flow_match match;
struct sw_flow_key key;
struct metadata_dst *tun_dst;
@@ -2757,9 +2732,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
struct ovs_tunnel_info *ovs_tun;
struct nlattr *a;
int err = 0, start, opts_type;
- __be16 dst_opt_type;
- dst_opt_type = 0;
ovs_match_init(&match, &key, true, NULL);
opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
if (opts_type < 0)
@@ -2771,13 +2744,14 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
err = validate_geneve_opts(&key);
if (err < 0)
return err;
- dst_opt_type = TUNNEL_GENEVE_OPT;
+
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, dst_opt_type);
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
- dst_opt_type = TUNNEL_VXLAN_OPT;
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, dst_opt_type);
break;
case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
- dst_opt_type = TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, dst_opt_type);
break;
}
}
@@ -2828,17 +2802,13 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
return err;
}
-static bool validate_nsh(const struct nlattr *attr, bool is_mask,
- bool is_push_nsh, bool log)
+static bool validate_push_nsh(const struct nlattr *attr, bool log)
{
struct sw_flow_match match;
struct sw_flow_key key;
- int ret = 0;
ovs_match_init(&match, &key, true, NULL);
- ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
- is_push_nsh, log);
- return !ret;
+ return !nsh_key_put_from_nlattr(attr, &match, false, true, log);
}
/* Return false if there are any non-masked bits set.
@@ -2865,7 +2835,8 @@ static int validate_set(const struct nlattr *a,
size_t key_len;
/* There can be only one key in a action */
- if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
+ if (!nla_ok(ovs_key, nla_len(a)) ||
+ nla_total_size(nla_len(ovs_key)) != nla_len(a))
return -EINVAL;
key_len = nla_len(ovs_key);
@@ -2985,13 +2956,6 @@ static int validate_set(const struct nlattr *a,
break;
- case OVS_KEY_ATTR_NSH:
- if (eth_type != htons(ETH_P_NSH))
- return -EINVAL;
- if (!validate_nsh(nla_data(a), masked, false, log))
- return -EINVAL;
- break;
-
default:
return -EINVAL;
}
@@ -3037,7 +3001,8 @@ static int validate_userspace(const struct nlattr *attr)
struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
int error;
- error = nla_parse_nested_deprecated(a, OVS_USERSPACE_ATTR_MAX, attr,
+ error = nla_parse_deprecated_strict(a, OVS_USERSPACE_ATTR_MAX,
+ nla_data(attr), nla_len(attr),
userspace_policy, NULL);
if (error)
return error;
@@ -3061,7 +3026,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
u32 mpls_label_count,
- bool log, bool last)
+ bool log, bool last, u32 depth)
{
const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1];
@@ -3109,7 +3074,8 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -3122,7 +3088,8 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -3132,6 +3099,28 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return 0;
}
+static int validate_psample(const struct nlattr *attr)
+{
+ static const struct nla_policy policy[OVS_PSAMPLE_ATTR_MAX + 1] = {
+ [OVS_PSAMPLE_ATTR_GROUP] = { .type = NLA_U32 },
+ [OVS_PSAMPLE_ATTR_COOKIE] = {
+ .type = NLA_BINARY,
+ .len = OVS_PSAMPLE_COOKIE_MAX_SIZE,
+ },
+ };
+ struct nlattr *a[OVS_PSAMPLE_ATTR_MAX + 1];
+ int err;
+
+ if (!IS_ENABLED(CONFIG_PSAMPLE))
+ return -EOPNOTSUPP;
+
+ err = nla_parse_nested(a, OVS_PSAMPLE_ATTR_MAX, attr, policy, NULL);
+ if (err)
+ return err;
+
+ return a[OVS_PSAMPLE_ATTR_GROUP] ? 0 : -EINVAL;
+}
+
static int copy_action(const struct nlattr *from,
struct sw_flow_actions **sfa, bool log)
{
@@ -3150,12 +3139,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log)
+ u32 mpls_label_count, bool log,
+ u32 depth)
{
u8 mac_proto = ovs_key_mac_proto(key);
const struct nlattr *a;
int rem, err;
+ if (depth > OVS_COPY_ACTIONS_MAX_DEPTH)
+ return -EOVERFLOW;
+
nla_for_each_nested(a, attr, rem) {
/* Expected argument lengths, (u32)-1 for variable length. */
static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
@@ -3182,6 +3175,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
[OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
+ [OVS_ACTION_ATTR_DROP] = sizeof(u32),
+ [OVS_ACTION_ATTR_PSAMPLE] = (u32)-1,
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -3221,6 +3216,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
switch (act_hash->hash_alg) {
case OVS_HASH_ALG_L4:
+ fallthrough;
+ case OVS_HASH_ALG_SYM_L4:
break;
default:
return -EINVAL;
@@ -3350,7 +3347,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_sample(net, a, key, sfa,
eth_type, vlan_tci,
mpls_label_count,
- log, last);
+ log, last, depth);
if (err)
return err;
skip_copy = true;
@@ -3392,7 +3389,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return -EINVAL;
}
mac_proto = MAC_PROTO_NONE;
- if (!validate_nsh(nla_data(a), false, true, true))
+ if (!validate_push_nsh(nla_data(a), log))
return -EINVAL;
break;
@@ -3421,7 +3418,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_clone(net, a, key, sfa,
eth_type, vlan_tci,
mpls_label_count,
- log, last);
+ log, last, depth);
if (err)
return err;
skip_copy = true;
@@ -3435,7 +3432,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
eth_type,
vlan_tci,
mpls_label_count,
- log, last);
+ log, last,
+ depth);
if (err)
return err;
skip_copy = true;
@@ -3445,12 +3443,24 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
case OVS_ACTION_ATTR_DEC_TTL:
err = validate_and_copy_dec_ttl(net, a, key, sfa,
eth_type, vlan_tci,
- mpls_label_count, log);
+ mpls_label_count, log,
+ depth);
if (err)
return err;
skip_copy = true;
break;
+ case OVS_ACTION_ATTR_DROP:
+ if (!nla_is_last(a, rem))
+ return -EINVAL;
+ break;
+
+ case OVS_ACTION_ATTR_PSAMPLE:
+ err = validate_psample(a);
+ if (err)
+ return err;
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
@@ -3476,7 +3486,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
int err;
u32 mpls_label_count = 0;
- *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
+ *sfa = nla_alloc_flow_actions(nla_len(attr));
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
@@ -3485,7 +3495,8 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
(*sfa)->orig_len = nla_len(attr);
err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
- key->eth.vlan.tci, mpls_label_count, log);
+ key->eth.vlan.tci, mpls_label_count, log,
+ 0);
if (err)
ovs_nla_free_flow_actions(*sfa);
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index fe7f77fc5f18..ff8cdecbe346 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -65,8 +65,6 @@ int ovs_nla_put_actions(const struct nlattr *attr,
void ovs_nla_free_flow_actions(struct sw_flow_actions *);
void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *);
-int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh,
- struct ovs_key_nsh *nsh_mask);
int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh,
size_t size);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 0a0e4c283f02..ffc72a741a50 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -79,6 +79,7 @@ struct sw_flow *ovs_flow_alloc(void)
return ERR_PTR(-ENOMEM);
flow->stats_last_writer = -1;
+ flow->cpu_used_mask = (struct cpumask *)&flow->stats[nr_cpu_ids];
/* Initialize the default stat node. */
stats = kmem_cache_alloc_node(flow_stats_cache,
@@ -91,7 +92,7 @@ struct sw_flow *ovs_flow_alloc(void)
RCU_INIT_POINTER(flow->stats[0], stats);
- cpumask_set_cpu(0, &flow->cpu_used_mask);
+ cpumask_set_cpu(0, flow->cpu_used_mask);
return flow;
err:
@@ -106,16 +107,15 @@ int ovs_flow_tbl_count(const struct flow_table *table)
static void flow_free(struct sw_flow *flow)
{
- int cpu;
+ unsigned int cpu;
if (ovs_identifier_is_key(&flow->id))
kfree(flow->id.unmasked_key);
if (flow->sf_acts)
ovs_nla_free_flow_actions((struct sw_flow_actions __force *)
flow->sf_acts);
- /* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids;
- cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
+
+ for_each_cpu(cpu, flow->cpu_used_mask) {
if (flow->stats[cpu])
kmem_cache_free(flow_stats_cache,
(struct sw_flow_stats __force *)flow->stats[cpu]);
@@ -219,16 +219,13 @@ static struct mask_array *tbl_mask_array_alloc(int size)
struct mask_array *new;
size = max(MASK_ARRAY_SIZE_MIN, size);
- new = kzalloc(sizeof(struct mask_array) +
- sizeof(struct sw_flow_mask *) * size +
+ new = kzalloc(struct_size(new, masks, size) +
sizeof(u64) * size, GFP_KERNEL);
if (!new)
return NULL;
new->masks_usage_zero_cntr = (u64 *)((u8 *)new +
- sizeof(struct mask_array) +
- sizeof(struct sw_flow_mask *) *
- size);
+ struct_size(new, masks, size));
new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) +
sizeof(u64) * size,
@@ -1012,7 +1009,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
mask = flow_mask_find(tbl, new);
if (!mask) {
- /* Allocate a new mask if none exsits. */
+ /* Allocate a new mask if none exists. */
mask = mask_alloc();
if (!mask)
return -ENOMEM;
@@ -1196,7 +1193,8 @@ int ovs_flow_init(void)
flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+ (nr_cpu_ids
- * sizeof(struct sw_flow_stats *)),
+ * sizeof(struct sw_flow_stats *))
+ + cpumask_size(),
0, 0, NULL);
if (flow_cache == NULL)
return -ENOMEM;
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 9e659db78c05..f524dc3e4862 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -48,7 +48,7 @@ struct mask_array {
int count, max;
struct mask_array_stats __percpu *masks_usage_stats;
u64 *masks_usage_zero_cntr;
- struct sw_flow_mask __rcu *masks[];
+ struct sw_flow_mask __rcu *masks[] __counted_by(max);
};
struct table_instance {
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 6e38f68f88c2..cc08e0403909 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -69,9 +69,7 @@ static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
{
struct dp_meter_instance *ti;
- ti = kvzalloc(sizeof(*ti) +
- sizeof(struct dp_meter *) * size,
- GFP_KERNEL);
+ ti = kvzalloc(struct_size(ti, dp_meters, size), GFP_KERNEL);
if (!ti)
return NULL;
@@ -213,7 +211,7 @@ ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
struct ovs_header **ovs_reply_header)
{
struct sk_buff *skb;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!skb)
@@ -274,7 +272,7 @@ error:
static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
struct nlattr *nla, *band_nla;
struct sk_buff *reply;
@@ -411,7 +409,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct dp_meter *meter, *old_meter;
struct sk_buff *reply;
struct ovs_header *ovs_reply_header;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct dp_meter_table *meter_tbl;
struct datapath *dp;
int err;
@@ -449,7 +447,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
err = attach_meter(meter_tbl, meter);
if (err)
- goto exit_unlock;
+ goto exit_free_old_meter;
ovs_unlock();
@@ -472,6 +470,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
genlmsg_end(reply, ovs_reply_header);
return genlmsg_reply(reply, info);
+exit_free_old_meter:
+ ovs_meter_free(old_meter);
exit_unlock:
ovs_unlock();
nlmsg_free(reply);
@@ -482,7 +482,7 @@ exit_free_meter:
static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
struct nlattr **a = info->attrs;
struct dp_meter *meter;
@@ -535,7 +535,7 @@ exit_unlock:
static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
struct nlattr **a = info->attrs;
struct dp_meter *old_meter;
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index 0c33889a8515..8bbf983cd244 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -11,7 +11,6 @@
#include <linux/kernel.h>
#include <linux/netlink.h>
#include <linux/openvswitch.h>
-#include <linux/genetlink.h>
#include <linux/skbuff.h>
#include <linux/bits.h>
@@ -39,13 +38,13 @@ struct dp_meter {
u32 max_delta_t;
u64 used;
struct ovs_flow_stats stats;
- struct dp_meter_band bands[];
+ struct dp_meter_band bands[] __counted_by(n_bands);
};
struct dp_meter_instance {
struct rcu_head rcu;
u32 n_meters;
- struct dp_meter __rcu *dp_meters[];
+ struct dp_meter __rcu *dp_meters[] __counted_by(n_meters);
};
struct dp_meter_table {
diff --git a/net/openvswitch/openvswitch_trace.h b/net/openvswitch/openvswitch_trace.h
index 3eb35d9eb700..74d75aaebef4 100644
--- a/net/openvswitch/openvswitch_trace.h
+++ b/net/openvswitch/openvswitch_trace.h
@@ -43,8 +43,8 @@ TRACE_EVENT(ovs_do_execute_action,
TP_fast_assign(
__entry->dpaddr = dp;
- __assign_str(dp_name, ovs_dp_name(dp));
- __assign_str(dev_name, skb->dev->name);
+ __assign_str(dp_name);
+ __assign_str(dev_name);
__entry->skbaddr = skb;
__entry->len = skb->len;
__entry->data_len = skb->data_len;
@@ -113,8 +113,8 @@ TRACE_EVENT(ovs_dp_upcall,
TP_fast_assign(
__entry->dpaddr = dp;
- __assign_str(dp_name, ovs_dp_name(dp));
- __assign_str(dev_name, skb->dev->name);
+ __assign_str(dp_name);
+ __assign_str(dev_name);
__entry->skbaddr = skb;
__entry->len = skb->len;
__entry->data_len = skb->data_len;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 74c88a6baa43..125d310871e9 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -85,7 +85,6 @@ static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_stop = internal_dev_stop,
.ndo_start_xmit = internal_dev_xmit,
.ndo_set_mac_address = eth_mac_addr,
- .ndo_get_stats64 = dev_get_tstats64,
};
static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -103,19 +102,20 @@ static void do_setup(struct net_device *netdev)
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
IFF_NO_QUEUE;
+ netdev->lltx = true;
netdev->needs_free_netdev = true;
netdev->priv_destructor = NULL;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
netdev->rtnl_link_ops = &internal_dev_link_ops;
- netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
- NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
- NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL;
+ netdev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
+ NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE |
+ NETIF_F_GSO_ENCAP_ALL;
netdev->vlan_features = netdev->features;
netdev->hw_enc_features = netdev->features;
netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
- netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
+ netdev->hw_features = netdev->features;
eth_hw_addr_random(netdev);
}
@@ -140,11 +140,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
err = -ENOMEM;
goto error_free_vport;
}
- vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!vport->dev->tstats) {
- err = -ENOMEM;
- goto error_free_netdev;
- }
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
dev->ifindex = parms->desired_ifindex;
@@ -153,7 +149,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
/* Restrict bridge port to current netns. */
if (vport->port_no == OVSP_LOCAL)
- vport->dev->features |= NETIF_F_NETNS_LOCAL;
+ vport->dev->netns_immutable = true;
rtnl_lock();
err = register_netdevice(vport->dev);
@@ -169,8 +165,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
error_unlock:
rtnl_unlock();
- free_percpu(dev->tstats);
-error_free_netdev:
free_netdev(dev);
error_free_vport:
ovs_vport_free(vport);
@@ -186,7 +180,6 @@ static void internal_dev_destroy(struct vport *vport)
/* unregister_netdevice() waits for an RCU grace period. */
unregister_netdevice(vport->dev);
- free_percpu(vport->dev->tstats);
rtnl_unlock();
}
@@ -202,7 +195,6 @@ static int internal_dev_recv(struct sk_buff *skb)
skb_dst_drop(skb);
nf_reset_ct(skb);
- secpath_reset(skb);
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, netdev);
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 903537a5da22..91a11067e458 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -82,6 +82,13 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
err = -ENODEV;
goto error_free_vport;
}
+ /* Ensure that the device exists and that the provided
+ * name is not one of its aliases.
+ */
+ if (strcmp(name, ovs_vport_name(vport))) {
+ err = -ENODEV;
+ goto error_put;
+ }
netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL);
if (vport->dev->flags & IFF_LOOPBACK ||
(vport->dev->type != ARPHRD_ETHER &&
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 7e0f5c45b512..6bbbc16ab778 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -124,6 +124,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
{
struct vport *vport;
size_t alloc_size;
+ int err;
alloc_size = sizeof(struct vport);
if (priv_size) {
@@ -135,17 +136,29 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
if (!vport)
return ERR_PTR(-ENOMEM);
+ vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
+ if (!vport->upcall_stats) {
+ err = -ENOMEM;
+ goto err_kfree_vport;
+ }
+
vport->dp = parms->dp;
vport->port_no = parms->port_no;
vport->ops = ops;
INIT_HLIST_NODE(&vport->dp_hash_node);
if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
- kfree(vport);
- return ERR_PTR(-EINVAL);
+ err = -EINVAL;
+ goto err_free_percpu;
}
return vport;
+
+err_free_percpu:
+ free_percpu(vport->upcall_stats);
+err_kfree_vport:
+ kfree(vport);
+ return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(ovs_vport_alloc);
@@ -165,6 +178,7 @@ void ovs_vport_free(struct vport *vport)
* it is safe to use raw dereference.
*/
kfree(rcu_dereference_raw(vport->upcall_portids));
+ free_percpu(vport->upcall_stats);
kfree(vport);
}
EXPORT_SYMBOL_GPL(ovs_vport_free);
@@ -486,6 +500,8 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
OVS_CB(skb)->input_vport = vport;
OVS_CB(skb)->mru = 0;
OVS_CB(skb)->cutlen = 0;
+ OVS_CB(skb)->probability = 0;
+ OVS_CB(skb)->upcall_pid = 0;
if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
u32 mark;
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 3e71ca8ad8a7..9f67b9dd49f9 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -97,6 +97,8 @@ struct vport {
* @desired_ifindex: New vport's ifindex.
* @dp: New vport's datapath.
* @port_no: New vport's port number.
+ * @upcall_portids: %OVS_VPORT_ATTR_UPCALL_PID attribute from Netlink message,
+ * %NULL if none was supplied.
*/
struct vport_parms {
const char *name;
@@ -125,6 +127,8 @@ struct vport_parms {
* have any configuration.
* @send: Send a packet on the device.
* zero for dropped packets or negative for error.
+ * @owner: Module that implements this vport type.
+ * @list: List entry in the global list of vport types.
*/
struct vport_ops {
enum ovs_vport_type type;
@@ -144,6 +148,7 @@ struct vport_ops {
/**
* struct vport_upcall_stats_percpu - per-cpu packet upcall statistics for
* a given vport.
+ * @syncp: Synchronization point for 64bit counters.
* @n_success: Number of packets that upcall to userspace succeed.
* @n_fail: Number of packets that upcall to userspace failed.
*/
@@ -164,6 +169,8 @@ void ovs_vport_free(struct vport *);
*
* @vport: vport to access
*
+ * Returns: A void pointer to a private data allocated in the @vport.
+ *
* If a nonzero size was passed in priv_size of vport_alloc() a private data
* area was allocated on creation. This allows that area to be accessed and
* used for any purpose needed by the vport implementer.
@@ -178,6 +185,8 @@ static inline void *vport_priv(const struct vport *vport)
*
* @priv: Start of private data area.
*
+ * Returns: A reference to a vport structure that contains @priv.
+ *
* It is sometimes useful to translate from a pointer to the private data
* area to the vport, such as in the case where the private data pointer is
* the result of a hash table lookup. @priv must point to the start of the