summaryrefslogtreecommitdiff
path: root/net/openvswitch/meter.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/openvswitch/meter.c')
-rw-r--r--net/openvswitch/meter.c372
1 files changed, 268 insertions, 104 deletions
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 43849d752a1e..cc08e0403909 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2017 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -22,8 +19,6 @@
#include "datapath.h"
#include "meter.h"
-#define METER_HASH_BUCKETS 1024
-
static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
[OVS_METER_ATTR_ID] = { .type = NLA_U32, },
[OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
@@ -42,6 +37,11 @@ static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
[OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
};
+static u32 meter_hash(struct dp_meter_instance *ti, u32 id)
+{
+ return id % ti->n_meters;
+}
+
static void ovs_meter_free(struct dp_meter *meter)
{
if (!meter)
@@ -50,39 +50,160 @@ static void ovs_meter_free(struct dp_meter *meter)
kfree_rcu(meter, rcu);
}
-static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
- u32 meter_id)
-{
- return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
-}
-
/* Call with ovs_mutex or RCU read lock. */
-static struct dp_meter *lookup_meter(const struct datapath *dp,
+static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
u32 meter_id)
{
+ struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ u32 hash = meter_hash(ti, meter_id);
struct dp_meter *meter;
- struct hlist_head *head;
- head = meter_hash_bucket(dp, meter_id);
- hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
- if (meter->id == meter_id)
- return meter;
- }
+ meter = rcu_dereference_ovsl(ti->dp_meters[hash]);
+ if (meter && likely(meter->id == meter_id))
+ return meter;
+
return NULL;
}
-static void attach_meter(struct datapath *dp, struct dp_meter *meter)
+static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
+{
+ struct dp_meter_instance *ti;
+
+ ti = kvzalloc(struct_size(ti, dp_meters, size), GFP_KERNEL);
+ if (!ti)
+ return NULL;
+
+ ti->n_meters = size;
+
+ return ti;
+}
+
+static void dp_meter_instance_free(struct dp_meter_instance *ti)
+{
+ kvfree(ti);
+}
+
+static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
+{
+ struct dp_meter_instance *ti;
+
+ ti = container_of(rcu, struct dp_meter_instance, rcu);
+ kvfree(ti);
+}
+
+static int
+dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size)
+{
+ struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ int n_meters = min(size, ti->n_meters);
+ struct dp_meter_instance *new_ti;
+ int i;
+
+ new_ti = dp_meter_instance_alloc(size);
+ if (!new_ti)
+ return -ENOMEM;
+
+ for (i = 0; i < n_meters; i++)
+ if (rcu_dereference_ovsl(ti->dp_meters[i]))
+ new_ti->dp_meters[i] = ti->dp_meters[i];
+
+ rcu_assign_pointer(tbl->ti, new_ti);
+ call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
+
+ return 0;
+}
+
+static void dp_meter_instance_insert(struct dp_meter_instance *ti,
+ struct dp_meter *meter)
+{
+ u32 hash;
+
+ hash = meter_hash(ti, meter->id);
+ rcu_assign_pointer(ti->dp_meters[hash], meter);
+}
+
+static void dp_meter_instance_remove(struct dp_meter_instance *ti,
+ struct dp_meter *meter)
+{
+ u32 hash;
+
+ hash = meter_hash(ti, meter->id);
+ RCU_INIT_POINTER(ti->dp_meters[hash], NULL);
+}
+
+static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
{
- struct hlist_head *head = meter_hash_bucket(dp, meter->id);
+ struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ u32 hash = meter_hash(ti, meter->id);
+ int err;
+
+ /* In generally, slots selected should be empty, because
+ * OvS uses id-pool to fetch a available id.
+ */
+ if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash])))
+ return -EBUSY;
+
+ dp_meter_instance_insert(ti, meter);
+
+ /* That function is thread-safe. */
+ tbl->count++;
+ if (tbl->count >= tbl->max_meters_allowed) {
+ err = -EFBIG;
+ goto attach_err;
+ }
+
+ if (tbl->count >= ti->n_meters &&
+ dp_meter_instance_realloc(tbl, ti->n_meters * 2)) {
+ err = -ENOMEM;
+ goto attach_err;
+ }
- hlist_add_head_rcu(&meter->dp_hash_node, head);
+ return 0;
+
+attach_err:
+ dp_meter_instance_remove(ti, meter);
+ tbl->count--;
+ return err;
}
-static void detach_meter(struct dp_meter *meter)
+static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
{
+ struct dp_meter_instance *ti;
+
ASSERT_OVSL();
- if (meter)
- hlist_del_rcu(&meter->dp_hash_node);
+ if (!meter)
+ return 0;
+
+ ti = rcu_dereference_ovsl(tbl->ti);
+ dp_meter_instance_remove(ti, meter);
+
+ tbl->count--;
+
+ /* Shrink the meter array if necessary. */
+ if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
+ tbl->count <= (ti->n_meters / 4)) {
+ int half_size = ti->n_meters / 2;
+ int i;
+
+ /* Avoid hash collision, don't move slots to other place.
+ * Make sure there are no references of meters in array
+ * which will be released.
+ */
+ for (i = half_size; i < ti->n_meters; i++)
+ if (rcu_dereference_ovsl(ti->dp_meters[i]))
+ goto out;
+
+ if (dp_meter_instance_realloc(tbl, half_size))
+ goto shrink_err;
+ }
+
+out:
+ return 0;
+
+shrink_err:
+ dp_meter_instance_insert(ti, meter);
+ tbl->count++;
+ return -ENOMEM;
}
static struct sk_buff *
@@ -90,7 +211,7 @@ ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
struct ovs_header **ovs_reply_header)
{
struct sk_buff *skb;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!skb)
@@ -118,16 +239,15 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
goto error;
- if (!meter)
- return 0;
-
if (nla_put(reply, OVS_METER_ATTR_STATS,
- sizeof(struct ovs_flow_stats), &meter->stats) ||
- nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
+ sizeof(struct ovs_flow_stats), &meter->stats))
+ goto error;
+
+ if (nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
OVS_METER_ATTR_PAD))
goto error;
- nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
+ nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
if (!nla)
goto error;
@@ -136,7 +256,7 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
for (i = 0; i < meter->n_bands; ++i, ++band) {
struct nlattr *band_nla;
- band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
+ band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC);
if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
sizeof(struct ovs_flow_stats),
&band->stats))
@@ -152,25 +272,39 @@ error:
static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
{
- struct sk_buff *reply;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
struct nlattr *nla, *band_nla;
- int err;
+ struct sk_buff *reply;
+ struct datapath *dp;
+ int err = -EMSGSIZE;
reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
&ovs_reply_header);
if (IS_ERR(reply))
return PTR_ERR(reply);
- if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
- nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
+ ovs_lock();
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ if (!dp) {
+ err = -ENODEV;
+ goto exit_unlock;
+ }
+
+ if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS,
+ dp->meter_tbl.max_meters_allowed))
+ goto exit_unlock;
+
+ ovs_unlock();
+
+ if (nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
goto nla_put_failure;
- nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
+ nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
if (!nla)
goto nla_put_failure;
- band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
+ band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC);
if (!band_nla)
goto nla_put_failure;
/* Currently only DROP band type is supported. */
@@ -182,9 +316,10 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
genlmsg_end(reply, ovs_reply_header);
return genlmsg_reply(reply, info);
+exit_unlock:
+ ovs_unlock();
nla_put_failure:
nlmsg_free(reply);
- err = -EMSGSIZE;
return err;
}
@@ -206,7 +341,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
return ERR_PTR(-EINVAL);
/* Allocate and set up the meter before locking anything. */
- meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL);
+ meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL_ACCOUNT);
if (!meter)
return ERR_PTR(-ENOMEM);
@@ -227,9 +362,9 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
u32 band_max_delta_t;
- err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX,
- nla_data(nla), nla_len(nla), band_policy,
- NULL);
+ err = nla_parse_deprecated((struct nlattr **)&attr,
+ OVS_BAND_ATTR_MAX, nla_data(nla),
+ nla_len(nla), band_policy, NULL);
if (err)
goto exit_free_meter;
@@ -254,8 +389,8 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
*
* Start with a full bucket.
*/
- band->bucket = (band->burst_size + band->rate) * 1000;
- band_max_delta_t = band->bucket / band->rate;
+ band->bucket = band->burst_size * 1000ULL;
+ band_max_delta_t = div_u64(band->bucket, band->rate);
if (band_max_delta_t > meter->max_delta_t)
meter->max_delta_t = band_max_delta_t;
band++;
@@ -274,18 +409,18 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct dp_meter *meter, *old_meter;
struct sk_buff *reply;
struct ovs_header *ovs_reply_header;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
+ struct dp_meter_table *meter_tbl;
struct datapath *dp;
int err;
u32 meter_id;
bool failed;
- if (!a[OVS_METER_ATTR_ID]) {
- return -ENODEV;
- }
+ if (!a[OVS_METER_ATTR_ID])
+ return -EINVAL;
meter = dp_meter_create(a);
- if (IS_ERR_OR_NULL(meter))
+ if (IS_ERR(meter))
return PTR_ERR(meter);
reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
@@ -302,12 +437,18 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock;
}
+ meter_tbl = &dp->meter_tbl;
meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
- /* Cannot fail after this. */
- old_meter = lookup_meter(dp, meter_id);
- detach_meter(old_meter);
- attach_meter(dp, meter);
+ old_meter = lookup_meter(meter_tbl, meter_id);
+ err = detach_meter(meter_tbl, old_meter);
+ if (err)
+ goto exit_unlock;
+
+ err = attach_meter(meter_tbl, meter);
+ if (err)
+ goto exit_free_old_meter;
+
ovs_unlock();
/* Build response with the meter_id and stats from
@@ -329,6 +470,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
genlmsg_end(reply, ovs_reply_header);
return genlmsg_reply(reply, info);
+exit_free_old_meter:
+ ovs_meter_free(old_meter);
exit_unlock:
ovs_unlock();
nlmsg_free(reply);
@@ -339,14 +482,14 @@ exit_free_meter:
static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
- struct nlattr **a = info->attrs;
- u32 meter_id;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
+ struct nlattr **a = info->attrs;
+ struct dp_meter *meter;
+ struct sk_buff *reply;
struct datapath *dp;
+ u32 meter_id;
int err;
- struct sk_buff *reply;
- struct dp_meter *meter;
if (!a[OVS_METER_ATTR_ID])
return -EINVAL;
@@ -367,7 +510,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
}
/* Locate meter, copy stats. */
- meter = lookup_meter(dp, meter_id);
+ meter = lookup_meter(&dp->meter_tbl, meter_id);
if (!meter) {
err = -ENOENT;
goto exit_unlock;
@@ -392,18 +535,17 @@ exit_unlock:
static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
- struct nlattr **a = info->attrs;
- u32 meter_id;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
+ struct nlattr **a = info->attrs;
+ struct dp_meter *old_meter;
+ struct sk_buff *reply;
struct datapath *dp;
+ u32 meter_id;
int err;
- struct sk_buff *reply;
- struct dp_meter *old_meter;
if (!a[OVS_METER_ATTR_ID])
return -EINVAL;
- meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
&ovs_reply_header);
@@ -418,14 +560,19 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock;
}
- old_meter = lookup_meter(dp, meter_id);
+ meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+ old_meter = lookup_meter(&dp->meter_tbl, meter_id);
if (old_meter) {
spin_lock_bh(&old_meter->lock);
err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
WARN_ON(err);
spin_unlock_bh(&old_meter->lock);
- detach_meter(old_meter);
+
+ err = detach_meter(&dp->meter_tbl, old_meter);
+ if (err)
+ goto exit_unlock;
}
+
ovs_unlock();
ovs_meter_free(old_meter);
genlmsg_end(reply, ovs_reply_header);
@@ -445,16 +592,16 @@ exit_unlock:
bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, u32 meter_id)
{
- struct dp_meter *meter;
- struct dp_meter_band *band;
long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
long long int long_delta_ms;
- u32 delta_ms;
- u32 cost;
+ struct dp_meter_band *band;
+ struct dp_meter *meter;
int i, band_exceeded_max = -1;
u32 band_exceeded_rate = 0;
+ u32 delta_ms;
+ u32 cost;
- meter = lookup_meter(dp, meter_id);
+ meter = lookup_meter(&dp->meter_tbl, meter_id);
/* Do not drop the packet when there is no meter. */
if (!meter)
return false;
@@ -463,6 +610,14 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
spin_lock(&meter->lock);
long_delta_ms = (now_ms - meter->used); /* ms */
+ if (long_delta_ms < 0) {
+ /* This condition means that we have several threads fighting
+ * for a meter lock, and the one who received the packets a
+ * bit later wins. Assuming that all racing threads received
+ * packets at the same time to avoid overflow.
+ */
+ long_delta_ms = 0;
+ }
/* Make sure delta_ms will not be too large, so that bucket will not
* wrap around below.
@@ -493,7 +648,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
long long int max_bucket_size;
band = &meter->bands[i];
- max_bucket_size = (band->burst_size + band->rate) * 1000LL;
+ max_bucket_size = band->burst_size * 1000LL;
band->bucket += delta_ms * band->rate;
if (band->bucket > max_bucket_size)
@@ -524,29 +679,29 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
return false;
}
-static struct genl_ops dp_meter_genl_ops[] = {
+static const struct genl_small_ops dp_meter_genl_ops[] = {
{ .cmd = OVS_METER_CMD_FEATURES,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = 0, /* OK for unprivileged users. */
- .policy = meter_policy,
.doit = ovs_meter_cmd_features
},
{ .cmd = OVS_METER_CMD_SET,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
- * privilege.
- */
- .policy = meter_policy,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+ * privilege.
+ */
.doit = ovs_meter_cmd_set,
},
{ .cmd = OVS_METER_CMD_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = 0, /* OK for unprivileged users. */
- .policy = meter_policy,
.doit = ovs_meter_cmd_get,
},
{ .cmd = OVS_METER_CMD_DEL,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
- * privilege.
- */
- .policy = meter_policy,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+ * privilege.
+ */
.doit = ovs_meter_cmd_del
},
};
@@ -560,10 +715,12 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
.name = OVS_METER_FAMILY,
.version = OVS_METER_VERSION,
.maxattr = OVS_METER_ATTR_MAX,
+ .policy = meter_policy,
.netnsok = true,
.parallel_ops = true,
- .ops = dp_meter_genl_ops,
- .n_ops = ARRAY_SIZE(dp_meter_genl_ops),
+ .small_ops = dp_meter_genl_ops,
+ .n_small_ops = ARRAY_SIZE(dp_meter_genl_ops),
+ .resv_start_op = OVS_METER_CMD_GET + 1,
.mcgrps = &ovs_meter_multicast_group,
.n_mcgrps = 1,
.module = THIS_MODULE,
@@ -571,32 +728,39 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
int ovs_meters_init(struct datapath *dp)
{
- int i;
-
- dp->meters = kmalloc_array(METER_HASH_BUCKETS,
- sizeof(struct hlist_head), GFP_KERNEL);
+ struct dp_meter_table *tbl = &dp->meter_tbl;
+ struct dp_meter_instance *ti;
+ unsigned long free_mem_bytes;
- if (!dp->meters)
+ ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
+ if (!ti)
return -ENOMEM;
- for (i = 0; i < METER_HASH_BUCKETS; i++)
- INIT_HLIST_HEAD(&dp->meters[i]);
+ /* Allow meters in a datapath to use ~3.12% of physical memory. */
+ free_mem_bytes = nr_free_buffer_pages() * (PAGE_SIZE >> 5);
+ tbl->max_meters_allowed = min(free_mem_bytes / sizeof(struct dp_meter),
+ DP_METER_NUM_MAX);
+ if (!tbl->max_meters_allowed)
+ goto out_err;
+
+ rcu_assign_pointer(tbl->ti, ti);
+ tbl->count = 0;
return 0;
+
+out_err:
+ dp_meter_instance_free(ti);
+ return -ENOMEM;
}
void ovs_meters_exit(struct datapath *dp)
{
+ struct dp_meter_table *tbl = &dp->meter_tbl;
+ struct dp_meter_instance *ti = rcu_dereference_raw(tbl->ti);
int i;
- for (i = 0; i < METER_HASH_BUCKETS; i++) {
- struct hlist_head *head = &dp->meters[i];
- struct dp_meter *meter;
- struct hlist_node *n;
-
- hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
- kfree(meter);
- }
+ for (i = 0; i < ti->n_meters; i++)
+ ovs_meter_free(rcu_dereference_raw(ti->dp_meters[i]));
- kfree(dp->meters);
+ dp_meter_instance_free(ti);
}