diff options
Diffstat (limited to 'net/openvswitch/meter.c')
| -rw-r--r-- | net/openvswitch/meter.c | 372 |
1 files changed, 268 insertions, 104 deletions
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 43849d752a1e..cc08e0403909 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2017 Nicira, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -22,8 +19,6 @@ #include "datapath.h" #include "meter.h" -#define METER_HASH_BUCKETS 1024 - static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = { [OVS_METER_ATTR_ID] = { .type = NLA_U32, }, [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG }, @@ -42,6 +37,11 @@ static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = { [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, }; +static u32 meter_hash(struct dp_meter_instance *ti, u32 id) +{ + return id % ti->n_meters; +} + static void ovs_meter_free(struct dp_meter *meter) { if (!meter) @@ -50,39 +50,160 @@ static void ovs_meter_free(struct dp_meter *meter) kfree_rcu(meter, rcu); } -static struct hlist_head *meter_hash_bucket(const struct datapath *dp, - u32 meter_id) -{ - return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)]; -} - /* Call with ovs_mutex or RCU read lock. */ -static struct dp_meter *lookup_meter(const struct datapath *dp, +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl, u32 meter_id) { + struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); + u32 hash = meter_hash(ti, meter_id); struct dp_meter *meter; - struct hlist_head *head; - head = meter_hash_bucket(dp, meter_id); - hlist_for_each_entry_rcu(meter, head, dp_hash_node) { - if (meter->id == meter_id) - return meter; - } + meter = rcu_dereference_ovsl(ti->dp_meters[hash]); + if (meter && likely(meter->id == meter_id)) + return meter; + return NULL; } -static void attach_meter(struct datapath *dp, struct dp_meter *meter) +static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size) +{ + struct dp_meter_instance *ti; + + ti = kvzalloc(struct_size(ti, dp_meters, size), GFP_KERNEL); + if (!ti) + return NULL; + + ti->n_meters = size; + + return ti; +} + +static void dp_meter_instance_free(struct dp_meter_instance *ti) +{ + kvfree(ti); +} + +static void dp_meter_instance_free_rcu(struct rcu_head *rcu) +{ + struct dp_meter_instance *ti; + + ti = container_of(rcu, struct dp_meter_instance, rcu); + kvfree(ti); +} + +static int +dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size) +{ + struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); + int n_meters = min(size, ti->n_meters); + struct dp_meter_instance *new_ti; + int i; + + new_ti = dp_meter_instance_alloc(size); + if (!new_ti) + return -ENOMEM; + + for (i = 0; i < n_meters; i++) + if (rcu_dereference_ovsl(ti->dp_meters[i])) + new_ti->dp_meters[i] = ti->dp_meters[i]; + + rcu_assign_pointer(tbl->ti, new_ti); + call_rcu(&ti->rcu, dp_meter_instance_free_rcu); + + return 0; +} + +static void dp_meter_instance_insert(struct dp_meter_instance *ti, + struct dp_meter *meter) +{ + u32 hash; + + hash = meter_hash(ti, meter->id); + rcu_assign_pointer(ti->dp_meters[hash], meter); +} + +static void dp_meter_instance_remove(struct dp_meter_instance *ti, + struct dp_meter *meter) +{ + u32 hash; + + hash = meter_hash(ti, meter->id); + RCU_INIT_POINTER(ti->dp_meters[hash], NULL); +} + +static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter) { - struct hlist_head *head = meter_hash_bucket(dp, meter->id); + struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); + u32 hash = meter_hash(ti, meter->id); + int err; + + /* In generally, slots selected should be empty, because + * OvS uses id-pool to fetch a available id. + */ + if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash]))) + return -EBUSY; + + dp_meter_instance_insert(ti, meter); + + /* That function is thread-safe. */ + tbl->count++; + if (tbl->count >= tbl->max_meters_allowed) { + err = -EFBIG; + goto attach_err; + } + + if (tbl->count >= ti->n_meters && + dp_meter_instance_realloc(tbl, ti->n_meters * 2)) { + err = -ENOMEM; + goto attach_err; + } - hlist_add_head_rcu(&meter->dp_hash_node, head); + return 0; + +attach_err: + dp_meter_instance_remove(ti, meter); + tbl->count--; + return err; } -static void detach_meter(struct dp_meter *meter) +static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter) { + struct dp_meter_instance *ti; + ASSERT_OVSL(); - if (meter) - hlist_del_rcu(&meter->dp_hash_node); + if (!meter) + return 0; + + ti = rcu_dereference_ovsl(tbl->ti); + dp_meter_instance_remove(ti, meter); + + tbl->count--; + + /* Shrink the meter array if necessary. */ + if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN && + tbl->count <= (ti->n_meters / 4)) { + int half_size = ti->n_meters / 2; + int i; + + /* Avoid hash collision, don't move slots to other place. + * Make sure there are no references of meters in array + * which will be released. + */ + for (i = half_size; i < ti->n_meters; i++) + if (rcu_dereference_ovsl(ti->dp_meters[i])) + goto out; + + if (dp_meter_instance_realloc(tbl, half_size)) + goto shrink_err; + } + +out: + return 0; + +shrink_err: + dp_meter_instance_insert(ti, meter); + tbl->count++; + return -ENOMEM; } static struct sk_buff * @@ -90,7 +211,7 @@ ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd, struct ovs_header **ovs_reply_header) { struct sk_buff *skb; - struct ovs_header *ovs_header = info->userhdr; + struct ovs_header *ovs_header = genl_info_userhdr(info); skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) @@ -118,16 +239,15 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id)) goto error; - if (!meter) - return 0; - if (nla_put(reply, OVS_METER_ATTR_STATS, - sizeof(struct ovs_flow_stats), &meter->stats) || - nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used, + sizeof(struct ovs_flow_stats), &meter->stats)) + goto error; + + if (nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used, OVS_METER_ATTR_PAD)) goto error; - nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS); + nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); if (!nla) goto error; @@ -136,7 +256,7 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, for (i = 0; i < meter->n_bands; ++i, ++band) { struct nlattr *band_nla; - band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC); + band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC); if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS, sizeof(struct ovs_flow_stats), &band->stats)) @@ -152,25 +272,39 @@ error: static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) { - struct sk_buff *reply; + struct ovs_header *ovs_header = genl_info_userhdr(info); struct ovs_header *ovs_reply_header; struct nlattr *nla, *band_nla; - int err; + struct sk_buff *reply; + struct datapath *dp; + int err = -EMSGSIZE; reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES, &ovs_reply_header); if (IS_ERR(reply)) return PTR_ERR(reply); - if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) || - nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) + ovs_lock(); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + if (!dp) { + err = -ENODEV; + goto exit_unlock; + } + + if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, + dp->meter_tbl.max_meters_allowed)) + goto exit_unlock; + + ovs_unlock(); + + if (nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) goto nla_put_failure; - nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS); + nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); if (!nla) goto nla_put_failure; - band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC); + band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC); if (!band_nla) goto nla_put_failure; /* Currently only DROP band type is supported. */ @@ -182,9 +316,10 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) genlmsg_end(reply, ovs_reply_header); return genlmsg_reply(reply, info); +exit_unlock: + ovs_unlock(); nla_put_failure: nlmsg_free(reply); - err = -EMSGSIZE; return err; } @@ -206,7 +341,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) return ERR_PTR(-EINVAL); /* Allocate and set up the meter before locking anything. */ - meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL); + meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL_ACCOUNT); if (!meter) return ERR_PTR(-ENOMEM); @@ -227,9 +362,9 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) struct nlattr *attr[OVS_BAND_ATTR_MAX + 1]; u32 band_max_delta_t; - err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX, - nla_data(nla), nla_len(nla), band_policy, - NULL); + err = nla_parse_deprecated((struct nlattr **)&attr, + OVS_BAND_ATTR_MAX, nla_data(nla), + nla_len(nla), band_policy, NULL); if (err) goto exit_free_meter; @@ -254,8 +389,8 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) * * Start with a full bucket. */ - band->bucket = (band->burst_size + band->rate) * 1000; - band_max_delta_t = band->bucket / band->rate; + band->bucket = band->burst_size * 1000ULL; + band_max_delta_t = div_u64(band->bucket, band->rate); if (band_max_delta_t > meter->max_delta_t) meter->max_delta_t = band_max_delta_t; band++; @@ -274,18 +409,18 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) struct dp_meter *meter, *old_meter; struct sk_buff *reply; struct ovs_header *ovs_reply_header; - struct ovs_header *ovs_header = info->userhdr; + struct ovs_header *ovs_header = genl_info_userhdr(info); + struct dp_meter_table *meter_tbl; struct datapath *dp; int err; u32 meter_id; bool failed; - if (!a[OVS_METER_ATTR_ID]) { - return -ENODEV; - } + if (!a[OVS_METER_ATTR_ID]) + return -EINVAL; meter = dp_meter_create(a); - if (IS_ERR_OR_NULL(meter)) + if (IS_ERR(meter)) return PTR_ERR(meter); reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET, @@ -302,12 +437,18 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } + meter_tbl = &dp->meter_tbl; meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); - /* Cannot fail after this. */ - old_meter = lookup_meter(dp, meter_id); - detach_meter(old_meter); - attach_meter(dp, meter); + old_meter = lookup_meter(meter_tbl, meter_id); + err = detach_meter(meter_tbl, old_meter); + if (err) + goto exit_unlock; + + err = attach_meter(meter_tbl, meter); + if (err) + goto exit_free_old_meter; + ovs_unlock(); /* Build response with the meter_id and stats from @@ -329,6 +470,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) genlmsg_end(reply, ovs_reply_header); return genlmsg_reply(reply, info); +exit_free_old_meter: + ovs_meter_free(old_meter); exit_unlock: ovs_unlock(); nlmsg_free(reply); @@ -339,14 +482,14 @@ exit_free_meter: static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info) { - struct nlattr **a = info->attrs; - u32 meter_id; - struct ovs_header *ovs_header = info->userhdr; + struct ovs_header *ovs_header = genl_info_userhdr(info); struct ovs_header *ovs_reply_header; + struct nlattr **a = info->attrs; + struct dp_meter *meter; + struct sk_buff *reply; struct datapath *dp; + u32 meter_id; int err; - struct sk_buff *reply; - struct dp_meter *meter; if (!a[OVS_METER_ATTR_ID]) return -EINVAL; @@ -367,7 +510,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info) } /* Locate meter, copy stats. */ - meter = lookup_meter(dp, meter_id); + meter = lookup_meter(&dp->meter_tbl, meter_id); if (!meter) { err = -ENOENT; goto exit_unlock; @@ -392,18 +535,17 @@ exit_unlock: static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info) { - struct nlattr **a = info->attrs; - u32 meter_id; - struct ovs_header *ovs_header = info->userhdr; + struct ovs_header *ovs_header = genl_info_userhdr(info); struct ovs_header *ovs_reply_header; + struct nlattr **a = info->attrs; + struct dp_meter *old_meter; + struct sk_buff *reply; struct datapath *dp; + u32 meter_id; int err; - struct sk_buff *reply; - struct dp_meter *old_meter; if (!a[OVS_METER_ATTR_ID]) return -EINVAL; - meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL, &ovs_reply_header); @@ -418,14 +560,19 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - old_meter = lookup_meter(dp, meter_id); + meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); + old_meter = lookup_meter(&dp->meter_tbl, meter_id); if (old_meter) { spin_lock_bh(&old_meter->lock); err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter); WARN_ON(err); spin_unlock_bh(&old_meter->lock); - detach_meter(old_meter); + + err = detach_meter(&dp->meter_tbl, old_meter); + if (err) + goto exit_unlock; } + ovs_unlock(); ovs_meter_free(old_meter); genlmsg_end(reply, ovs_reply_header); @@ -445,16 +592,16 @@ exit_unlock: bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, u32 meter_id) { - struct dp_meter *meter; - struct dp_meter_band *band; long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000); long long int long_delta_ms; - u32 delta_ms; - u32 cost; + struct dp_meter_band *band; + struct dp_meter *meter; int i, band_exceeded_max = -1; u32 band_exceeded_rate = 0; + u32 delta_ms; + u32 cost; - meter = lookup_meter(dp, meter_id); + meter = lookup_meter(&dp->meter_tbl, meter_id); /* Do not drop the packet when there is no meter. */ if (!meter) return false; @@ -463,6 +610,14 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, spin_lock(&meter->lock); long_delta_ms = (now_ms - meter->used); /* ms */ + if (long_delta_ms < 0) { + /* This condition means that we have several threads fighting + * for a meter lock, and the one who received the packets a + * bit later wins. Assuming that all racing threads received + * packets at the same time to avoid overflow. + */ + long_delta_ms = 0; + } /* Make sure delta_ms will not be too large, so that bucket will not * wrap around below. @@ -493,7 +648,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, long long int max_bucket_size; band = &meter->bands[i]; - max_bucket_size = (band->burst_size + band->rate) * 1000LL; + max_bucket_size = band->burst_size * 1000LL; band->bucket += delta_ms * band->rate; if (band->bucket > max_bucket_size) @@ -524,29 +679,29 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, return false; } -static struct genl_ops dp_meter_genl_ops[] = { +static const struct genl_small_ops dp_meter_genl_ops[] = { { .cmd = OVS_METER_CMD_FEATURES, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ - .policy = meter_policy, .doit = ovs_meter_cmd_features }, { .cmd = OVS_METER_CMD_SET, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN - * privilege. - */ - .policy = meter_policy, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN + * privilege. + */ .doit = ovs_meter_cmd_set, }, { .cmd = OVS_METER_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ - .policy = meter_policy, .doit = ovs_meter_cmd_get, }, { .cmd = OVS_METER_CMD_DEL, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN - * privilege. - */ - .policy = meter_policy, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN + * privilege. + */ .doit = ovs_meter_cmd_del }, }; @@ -560,10 +715,12 @@ struct genl_family dp_meter_genl_family __ro_after_init = { .name = OVS_METER_FAMILY, .version = OVS_METER_VERSION, .maxattr = OVS_METER_ATTR_MAX, + .policy = meter_policy, .netnsok = true, .parallel_ops = true, - .ops = dp_meter_genl_ops, - .n_ops = ARRAY_SIZE(dp_meter_genl_ops), + .small_ops = dp_meter_genl_ops, + .n_small_ops = ARRAY_SIZE(dp_meter_genl_ops), + .resv_start_op = OVS_METER_CMD_GET + 1, .mcgrps = &ovs_meter_multicast_group, .n_mcgrps = 1, .module = THIS_MODULE, @@ -571,32 +728,39 @@ struct genl_family dp_meter_genl_family __ro_after_init = { int ovs_meters_init(struct datapath *dp) { - int i; - - dp->meters = kmalloc_array(METER_HASH_BUCKETS, - sizeof(struct hlist_head), GFP_KERNEL); + struct dp_meter_table *tbl = &dp->meter_tbl; + struct dp_meter_instance *ti; + unsigned long free_mem_bytes; - if (!dp->meters) + ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN); + if (!ti) return -ENOMEM; - for (i = 0; i < METER_HASH_BUCKETS; i++) - INIT_HLIST_HEAD(&dp->meters[i]); + /* Allow meters in a datapath to use ~3.12% of physical memory. */ + free_mem_bytes = nr_free_buffer_pages() * (PAGE_SIZE >> 5); + tbl->max_meters_allowed = min(free_mem_bytes / sizeof(struct dp_meter), + DP_METER_NUM_MAX); + if (!tbl->max_meters_allowed) + goto out_err; + + rcu_assign_pointer(tbl->ti, ti); + tbl->count = 0; return 0; + +out_err: + dp_meter_instance_free(ti); + return -ENOMEM; } void ovs_meters_exit(struct datapath *dp) { + struct dp_meter_table *tbl = &dp->meter_tbl; + struct dp_meter_instance *ti = rcu_dereference_raw(tbl->ti); int i; - for (i = 0; i < METER_HASH_BUCKETS; i++) { - struct hlist_head *head = &dp->meters[i]; - struct dp_meter *meter; - struct hlist_node *n; - - hlist_for_each_entry_safe(meter, n, head, dp_hash_node) - kfree(meter); - } + for (i = 0; i < ti->n_meters; i++) + ovs_meter_free(rcu_dereference_raw(ti->dp_meters[i])); - kfree(dp->meters); + dp_meter_instance_free(ti); } |
