summaryrefslogtreecommitdiff
path: root/net/ipv4/nexthop.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/nexthop.c')
-rw-r--r--net/ipv4/nexthop.c643
1 files changed, 483 insertions, 160 deletions
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index bbff68b5b5d4..4397e89d3123 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -26,6 +26,9 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
#define NH_DEV_HASHBITS 8
#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
+#define NHA_OP_FLAGS_DUMP_ALL (NHA_OP_FLAG_DUMP_STATS | \
+ NHA_OP_FLAG_DUMP_HW_STATS)
+
static const struct nla_policy rtm_nh_policy_new[] = {
[NHA_ID] = { .type = NLA_U32 },
[NHA_GROUP] = { .type = NLA_BINARY },
@@ -37,10 +40,17 @@ static const struct nla_policy rtm_nh_policy_new[] = {
[NHA_ENCAP] = { .type = NLA_NESTED },
[NHA_FDB] = { .type = NLA_FLAG },
[NHA_RES_GROUP] = { .type = NLA_NESTED },
+ [NHA_HW_STATS_ENABLE] = NLA_POLICY_MAX(NLA_U32, true),
};
static const struct nla_policy rtm_nh_policy_get[] = {
[NHA_ID] = { .type = NLA_U32 },
+ [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32,
+ NHA_OP_FLAGS_DUMP_ALL),
+};
+
+static const struct nla_policy rtm_nh_policy_del[] = {
+ [NHA_ID] = { .type = NLA_U32 },
};
static const struct nla_policy rtm_nh_policy_dump[] = {
@@ -48,6 +58,8 @@ static const struct nla_policy rtm_nh_policy_dump[] = {
[NHA_GROUPS] = { .type = NLA_FLAG },
[NHA_MASTER] = { .type = NLA_U32 },
[NHA_FDB] = { .type = NLA_FLAG },
+ [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32,
+ NHA_OP_FLAGS_DUMP_ALL),
};
static const struct nla_policy rtm_nh_res_policy_new[] = {
@@ -92,6 +104,7 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
else if (nh_info->gw_family == AF_INET6)
nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6;
+ nh_info->id = nhi->nh_parent->id;
nh_info->is_reject = nhi->reject_nh;
nh_info->is_fdb = nhi->fdb_nh;
nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate;
@@ -131,13 +144,13 @@ static int nh_notifier_mpath_info_init(struct nh_notifier_info *info,
info->nh_grp->num_nh = num_nh;
info->nh_grp->is_fdb = nhg->fdb_nh;
+ info->nh_grp->hw_stats = nhg->hw_stats;
for (i = 0; i < num_nh; i++) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
struct nh_info *nhi;
nhi = rtnl_dereference(nhge->nh->nh_info);
- info->nh_grp->nh_entries[i].id = nhge->nh->id;
info->nh_grp->nh_entries[i].weight = nhge->weight;
__nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
nhi);
@@ -162,6 +175,7 @@ static int nh_notifier_res_table_info_init(struct nh_notifier_info *info,
return -ENOMEM;
info->nh_res_table->num_nh_buckets = num_nh_buckets;
+ info->nh_res_table->hw_stats = nhg->hw_stats;
for (i = 0; i < num_nh_buckets; i++) {
struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
@@ -393,6 +407,7 @@ static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh,
struct nh_notifier_info info = {
.net = net,
.extack = extack,
+ .id = nh->id,
};
struct nh_group *nhg;
int err;
@@ -474,6 +489,7 @@ static void nexthop_free_group(struct nexthop *nh)
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
WARN_ON(!list_empty(&nhge->nh_list));
+ free_percpu(nhge->stats);
nexthop_put(nhge->nh);
}
@@ -525,6 +541,7 @@ static struct nexthop *nexthop_alloc(void)
INIT_LIST_HEAD(&nh->f6i_list);
INIT_LIST_HEAD(&nh->grp_list);
INIT_LIST_HEAD(&nh->fdb_list);
+ spin_lock_init(&nh->lock);
}
return nh;
}
@@ -654,14 +671,213 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
+static void nh_grp_entry_stats_inc(struct nh_grp_entry *nhge)
+{
+ struct nh_grp_entry_stats *cpu_stats;
+
+ cpu_stats = get_cpu_ptr(nhge->stats);
+ u64_stats_update_begin(&cpu_stats->syncp);
+ u64_stats_inc(&cpu_stats->packets);
+ u64_stats_update_end(&cpu_stats->syncp);
+ put_cpu_ptr(cpu_stats);
+}
+
+static void nh_grp_entry_stats_read(struct nh_grp_entry *nhge,
+ u64 *ret_packets)
+{
+ int i;
+
+ *ret_packets = 0;
+
+ for_each_possible_cpu(i) {
+ struct nh_grp_entry_stats *cpu_stats;
+ unsigned int start;
+ u64 packets;
+
+ cpu_stats = per_cpu_ptr(nhge->stats, i);
+ do {
+ start = u64_stats_fetch_begin(&cpu_stats->syncp);
+ packets = u64_stats_read(&cpu_stats->packets);
+ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start));
+
+ *ret_packets += packets;
+ }
+}
+
+static int nh_notifier_grp_hw_stats_init(struct nh_notifier_info *info,
+ const struct nexthop *nh)
+{
+ struct nh_group *nhg;
+ int i;
+
+ ASSERT_RTNL();
+ nhg = rtnl_dereference(nh->nh_grp);
+
+ info->id = nh->id;
+ info->type = NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS;
+ info->nh_grp_hw_stats = kzalloc(struct_size(info->nh_grp_hw_stats,
+ stats, nhg->num_nh),
+ GFP_KERNEL);
+ if (!info->nh_grp_hw_stats)
+ return -ENOMEM;
+
+ info->nh_grp_hw_stats->num_nh = nhg->num_nh;
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+
+ info->nh_grp_hw_stats->stats[i].id = nhge->nh->id;
+ }
+
+ return 0;
+}
+
+static void nh_notifier_grp_hw_stats_fini(struct nh_notifier_info *info)
+{
+ kfree(info->nh_grp_hw_stats);
+}
+
+void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
+ unsigned int nh_idx,
+ u64 delta_packets)
+{
+ info->hw_stats_used = true;
+ info->stats[nh_idx].packets += delta_packets;
+}
+EXPORT_SYMBOL(nh_grp_hw_stats_report_delta);
+
+static void nh_grp_hw_stats_apply_update(struct nexthop *nh,
+ struct nh_notifier_info *info)
+{
+ struct nh_group *nhg;
+ int i;
+
+ ASSERT_RTNL();
+ nhg = rtnl_dereference(nh->nh_grp);
+
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+
+ nhge->packets_hw += info->nh_grp_hw_stats->stats[i].packets;
+ }
+}
+
+static int nh_grp_hw_stats_update(struct nexthop *nh, bool *hw_stats_used)
+{
+ struct nh_notifier_info info = {
+ .net = nh->net,
+ };
+ struct net *net = nh->net;
+ int err;
+
+ if (nexthop_notifiers_is_empty(net)) {
+ *hw_stats_used = false;
+ return 0;
+ }
+
+ err = nh_notifier_grp_hw_stats_init(&info, nh);
+ if (err)
+ return err;
+
+ err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
+ NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
+ &info);
+
+ /* Cache whatever we got, even if there was an error, otherwise the
+ * successful stats retrievals would get lost.
+ */
+ nh_grp_hw_stats_apply_update(nh, &info);
+ *hw_stats_used = info.nh_grp_hw_stats->hw_stats_used;
+
+ nh_notifier_grp_hw_stats_fini(&info);
+ return notifier_to_errno(err);
+}
+
+static int nla_put_nh_group_stats_entry(struct sk_buff *skb,
+ struct nh_grp_entry *nhge,
+ u32 op_flags)
+{
+ struct nlattr *nest;
+ u64 packets;
+
+ nh_grp_entry_stats_read(nhge, &packets);
+
+ nest = nla_nest_start(skb, NHA_GROUP_STATS_ENTRY);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, NHA_GROUP_STATS_ENTRY_ID, nhge->nh->id) ||
+ nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS,
+ packets + nhge->packets_hw))
+ goto nla_put_failure;
+
+ if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS &&
+ nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS_HW,
+ nhge->packets_hw))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int nla_put_nh_group_stats(struct sk_buff *skb, struct nexthop *nh,
+ u32 op_flags)
+{
+ struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ struct nlattr *nest;
+ bool hw_stats_used;
+ int err;
+ int i;
+
+ if (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats))
+ goto err_out;
+
+ if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS &&
+ nhg->hw_stats) {
+ err = nh_grp_hw_stats_update(nh, &hw_stats_used);
+ if (err)
+ goto out;
+
+ if (nla_put_u32(skb, NHA_HW_STATS_USED, hw_stats_used))
+ goto err_out;
+ }
+
+ nest = nla_nest_start(skb, NHA_GROUP_STATS);
+ if (!nest)
+ goto err_out;
+
+ for (i = 0; i < nhg->num_nh; i++)
+ if (nla_put_nh_group_stats_entry(skb, &nhg->nh_entries[i],
+ op_flags))
+ goto cancel_out;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+cancel_out:
+ nla_nest_cancel(skb, nest);
+err_out:
+ err = -EMSGSIZE;
+out:
+ return err;
+}
+
+static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh,
+ u32 op_flags, u32 *resp_op_flags)
{
+ struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
struct nexthop_grp *p;
size_t len = nhg->num_nh * sizeof(*p);
struct nlattr *nla;
u16 group_type = 0;
+ u16 weight;
int i;
+ *resp_op_flags |= NHA_OP_FLAG_RESP_GRP_RESVD_0;
+
if (nhg->hash_threshold)
group_type = NEXTHOP_GRP_TYPE_MPATH;
else if (nhg->resilient)
@@ -676,14 +892,23 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
p = nla_data(nla);
for (i = 0; i < nhg->num_nh; ++i) {
- p->id = nhg->nh_entries[i].nh->id;
- p->weight = nhg->nh_entries[i].weight - 1;
- p += 1;
+ weight = nhg->nh_entries[i].weight - 1;
+
+ *p++ = (struct nexthop_grp) {
+ .id = nhg->nh_entries[i].nh->id,
+ .weight = weight,
+ .weight_high = weight >> 8,
+ };
}
if (nhg->resilient && nla_put_nh_group_res(skb, nhg))
goto nla_put_failure;
+ if (op_flags & NHA_OP_FLAG_DUMP_STATS &&
+ (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats) ||
+ nla_put_nh_group_stats(skb, nh, op_flags)))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -691,7 +916,8 @@ nla_put_failure:
}
static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
- int event, u32 portid, u32 seq, unsigned int nlflags)
+ int event, u32 portid, u32 seq, unsigned int nlflags,
+ u32 op_flags)
{
struct fib6_nh *fib6_nh;
struct fib_nh *fib_nh;
@@ -715,10 +941,12 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nh->is_group) {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ u32 resp_op_flags = 0;
if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
goto nla_put_failure;
- if (nla_put_nh_group(skb, nhg))
+ if (nla_put_nh_group(skb, nh, op_flags, &resp_op_flags) ||
+ nla_put_u32(skb, NHA_OP_FLAGS, resp_op_flags))
goto nla_put_failure;
goto out;
}
@@ -831,7 +1059,9 @@ static size_t nh_nlmsg_size(struct nexthop *nh)
sz += nla_total_size(4); /* NHA_ID */
if (nh->is_group)
- sz += nh_nlmsg_size_grp(nh);
+ sz += nh_nlmsg_size_grp(nh) +
+ nla_total_size(4) + /* NHA_OP_FLAGS */
+ 0;
else
sz += nh_nlmsg_size_single(nh);
@@ -849,7 +1079,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
if (!skb)
goto errout;
- err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags);
+ err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in nh_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -861,8 +1091,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
info->nlh, gfp_any());
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
+ rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
}
static unsigned long nh_res_bucket_used_time(const struct nh_res_bucket *bucket)
@@ -982,8 +1211,7 @@ static void nexthop_bucket_notify(struct nh_res_table *res_table,
rtnl_notify(skb, nh->net, 0, RTNLGRP_NEXTHOP, NULL, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err);
+ rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err);
}
static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
@@ -1045,10 +1273,8 @@ static int nh_check_attr_group(struct net *net,
u16 nh_grp_type, struct netlink_ext_ack *extack)
{
unsigned int len = nla_len(tb[NHA_GROUP]);
- u8 nh_family = AF_UNSPEC;
struct nexthop_grp *nhg;
unsigned int i, j;
- u8 nhg_fdb = 0;
if (!len || len & (sizeof(struct nexthop_grp) - 1)) {
NL_SET_ERR_MSG(extack,
@@ -1061,11 +1287,14 @@ static int nh_check_attr_group(struct net *net,
nhg = nla_data(tb[NHA_GROUP]);
for (i = 0; i < len; ++i) {
- if (nhg[i].resvd1 || nhg[i].resvd2) {
- NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
+ if (nhg[i].resvd2) {
+ NL_SET_ERR_MSG(extack, "Reserved field in nexthop_grp must be 0");
return -EINVAL;
}
- if (nhg[i].weight > 254) {
+ if (nexthop_grp_weight(&nhg[i]) == 0) {
+ /* 0xffff got passed in, representing weight of 0x10000,
+ * which is too heavy.
+ */
NL_SET_ERR_MSG(extack, "Invalid value for weight");
return -EINVAL;
}
@@ -1077,10 +1306,41 @@ static int nh_check_attr_group(struct net *net,
}
}
- if (tb[NHA_FDB])
- nhg_fdb = 1;
nhg = nla_data(tb[NHA_GROUP]);
- for (i = 0; i < len; ++i) {
+ for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
+ if (!tb[i])
+ continue;
+ switch (i) {
+ case NHA_HW_STATS_ENABLE:
+ case NHA_FDB:
+ continue;
+ case NHA_RES_GROUP:
+ if (nh_grp_type == NEXTHOP_GRP_TYPE_RES)
+ continue;
+ break;
+ }
+ NL_SET_ERR_MSG(extack,
+ "No other attributes can be set in nexthop groups");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nh_check_attr_group_rtnl(struct net *net, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
+{
+ u8 nh_family = AF_UNSPEC;
+ struct nexthop_grp *nhg;
+ unsigned int len;
+ unsigned int i;
+ u8 nhg_fdb;
+
+ len = nla_len(tb[NHA_GROUP]) / sizeof(*nhg);
+ nhg = nla_data(tb[NHA_GROUP]);
+ nhg_fdb = !!tb[NHA_FDB];
+
+ for (i = 0; i < len; i++) {
struct nexthop *nh;
bool is_fdb_nh;
@@ -1100,21 +1360,6 @@ static int nh_check_attr_group(struct net *net,
return -EINVAL;
}
}
- for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
- if (!tb[i])
- continue;
- switch (i) {
- case NHA_FDB:
- continue;
- case NHA_RES_GROUP:
- if (nh_grp_type == NEXTHOP_GRP_TYPE_RES)
- continue;
- break;
- }
- NL_SET_ERR_MSG(extack,
- "No other attributes can be set in nexthop groups");
- return -EINVAL;
- }
return 0;
}
@@ -1176,6 +1421,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
if (hash > atomic_read(&nhge->hthr.upper_bound))
continue;
+ nh_grp_entry_stats_inc(nhge);
return nhge->nh;
}
@@ -1185,7 +1431,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
{
- struct nexthop *rc = NULL;
+ struct nh_grp_entry *nhge0 = NULL;
int i;
if (nhg->fdb_nh)
@@ -1200,16 +1446,20 @@ static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
if (!nexthop_is_good_nh(nhge->nh))
continue;
- if (!rc)
- rc = nhge->nh;
+ if (!nhge0)
+ nhge0 = nhge;
if (hash > atomic_read(&nhge->hthr.upper_bound))
continue;
+ nh_grp_entry_stats_inc(nhge);
return nhge->nh;
}
- return rc ? : nhg->nh_entries[0].nh;
+ if (!nhge0)
+ nhge0 = &nhg->nh_entries[0];
+ nh_grp_entry_stats_inc(nhge0);
+ return nhge0->nh;
}
static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
@@ -1225,6 +1475,7 @@ static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
bucket = &res_table->nh_buckets[bucket_index];
nh_res_bucket_set_busy(bucket);
nhge = rcu_dereference(bucket->nh_entry);
+ nh_grp_entry_stats_inc(nhge);
return nhge->nh;
}
@@ -1305,12 +1556,12 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
if (nh->is_group) {
struct nh_group *nhg;
- nhg = rtnl_dereference(nh->nh_grp);
+ nhg = rcu_dereference_rtnl(nh->nh_grp);
if (nhg->has_v4)
goto no_v4_nh;
is_fdb_nh = nhg->fdb_nh;
} else {
- nhi = rtnl_dereference(nh->nh_info);
+ nhi = rcu_dereference_rtnl(nh->nh_info);
if (nhi->family == AF_INET)
goto no_v4_nh;
is_fdb_nh = nhi->fdb_nh;
@@ -1654,9 +1905,9 @@ static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table)
static void nh_res_group_rebalance(struct nh_group *nhg,
struct nh_res_table *res_table)
{
- int prev_upper_bound = 0;
- int total = 0;
- int w = 0;
+ u16 prev_upper_bound = 0;
+ u32 total = 0;
+ u32 w = 0;
int i;
INIT_LIST_HEAD(&res_table->uw_nh_entries);
@@ -1666,11 +1917,12 @@ static void nh_res_group_rebalance(struct nh_group *nhg,
for (i = 0; i < nhg->num_nh; ++i) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
- int upper_bound;
+ u16 upper_bound;
+ u64 btw;
w += nhge->weight;
- upper_bound = DIV_ROUND_CLOSEST(res_table->num_nh_buckets * w,
- total);
+ btw = ((u64)res_table->num_nh_buckets) * w;
+ upper_bound = DIV_ROUND_CLOSEST_ULL(btw, total);
nhge->res.wants_buckets = upper_bound - prev_upper_bound;
prev_upper_bound = upper_bound;
@@ -1736,8 +1988,8 @@ static void replace_nexthop_grp_res(struct nh_group *oldg,
static void nh_hthr_group_rebalance(struct nh_group *nhg)
{
- int total = 0;
- int w = 0;
+ u32 total = 0;
+ u32 w = 0;
int i;
for (i = 0; i < nhg->num_nh; ++i)
@@ -1745,7 +1997,7 @@ static void nh_hthr_group_rebalance(struct nh_group *nhg)
for (i = 0; i < nhg->num_nh; ++i) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
- int upper_bound;
+ u32 upper_bound;
w += nhge->weight;
upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
@@ -1798,6 +2050,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
newg->has_v4 = true;
list_del(&nhges[i].nh_list);
+ new_nhges[j].stats = nhges[i].stats;
new_nhges[j].nh_parent = nhges[i].nh_parent;
new_nhges[j].nh = nhges[i].nh;
new_nhges[j].weight = nhges[i].weight;
@@ -1813,6 +2066,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
rcu_assign_pointer(nhp->nh_grp, newg);
list_del(&nhge->nh_list);
+ free_percpu(nhge->stats);
nexthop_put(nhge->nh);
/* Removal of a NH from a resilient group is notified through
@@ -1865,7 +2119,7 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
/* not called for nexthop replace */
static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
{
- struct fib6_info *f6i, *tmp;
+ struct fib6_info *f6i;
bool do_flush = false;
struct fib_info *fi;
@@ -1876,13 +2130,24 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
if (do_flush)
fib_flush(net);
- /* ip6_del_rt removes the entry from this list hence the _safe */
- list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
+ spin_lock_bh(&nh->lock);
+
+ nh->dead = true;
+
+ while (!list_empty(&nh->f6i_list)) {
+ f6i = list_first_entry(&nh->f6i_list, typeof(*f6i), nh_list);
+
/* __ip6_del_rt does a release, so do a hold here */
fib6_info_hold(f6i);
+
+ spin_unlock_bh(&nh->lock);
ipv6_stub->ip6_del_rt(net, f6i,
!READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode));
+
+ spin_lock_bh(&nh->lock);
}
+
+ spin_unlock_bh(&nh->lock);
}
static void __remove_nexthop(struct net *net, struct nexthop *nh,
@@ -2439,9 +2704,6 @@ static struct nexthop *nexthop_create_group(struct net *net,
int err;
int i;
- if (WARN_ON(!num_nh))
- return ERR_PTR(-EINVAL);
-
nh = nexthop_alloc();
if (!nh)
return ERR_PTR(-ENOMEM);
@@ -2477,8 +2739,16 @@ static struct nexthop *nexthop_create_group(struct net *net,
if (nhi->family == AF_INET)
nhg->has_v4 = true;
+ nhg->nh_entries[i].stats =
+ netdev_alloc_pcpu_stats(struct nh_grp_entry_stats);
+ if (!nhg->nh_entries[i].stats) {
+ err = -ENOMEM;
+ nexthop_put(nhe);
+ goto out_no_nh;
+ }
nhg->nh_entries[i].nh = nhe;
- nhg->nh_entries[i].weight = entry[i].weight + 1;
+ nhg->nh_entries[i].weight = nexthop_grp_weight(&entry[i]);
+
list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
nhg->nh_entries[i].nh_parent = nh;
}
@@ -2509,6 +2779,9 @@ static struct nexthop *nexthop_create_group(struct net *net,
if (cfg->nh_fdb)
nhg->fdb_nh = 1;
+ if (cfg->nh_hw_stats)
+ nhg->hw_stats = true;
+
rcu_assign_pointer(nh->nh_grp, nhg);
return nh;
@@ -2516,6 +2789,7 @@ static struct nexthop *nexthop_create_group(struct net *net,
out_no_nh:
for (i--; i >= 0; --i) {
list_del(&nhg->nh_entries[i].nh_list);
+ free_percpu(nhg->nh_entries[i].stats);
nexthop_put(nhg->nh_entries[i].nh);
}
@@ -2663,11 +2937,6 @@ static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
struct nexthop *nh;
int err;
- if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
- NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
- return ERR_PTR(-EINVAL);
- }
-
if (!cfg->nh_id) {
cfg->nh_id = nh_find_unused_id(net);
if (!cfg->nh_id) {
@@ -2764,19 +3033,13 @@ static int rtm_to_nh_config_grp_res(struct nlattr *res, struct nh_config *cfg,
}
static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nh_config *cfg,
+ struct nlmsghdr *nlh, struct nlattr **tb,
+ struct nh_config *cfg,
struct netlink_ext_ack *extack)
{
struct nhmsg *nhm = nlmsg_data(nlh);
- struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
int err;
- err = nlmsg_parse(nlh, sizeof(*nhm), tb,
- ARRAY_SIZE(rtm_nh_policy_new) - 1,
- rtm_nh_policy_new, extack);
- if (err < 0)
- return err;
-
err = -EINVAL;
if (nhm->resvd || nhm->nh_scope) {
NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
@@ -2841,7 +3104,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
NL_SET_ERR_MSG(extack, "Invalid group type");
goto out;
}
- err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb),
+
+ err = nh_check_attr_group(net, tb, ARRAY_SIZE(rtm_nh_policy_new),
cfg->nh_grp_type, extack);
if (err)
goto out;
@@ -2850,6 +3114,9 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
err = rtm_to_nh_config_grp_res(tb[NHA_RES_GROUP],
cfg, extack);
+ if (tb[NHA_HW_STATS_ENABLE])
+ cfg->nh_hw_stats = nla_get_u32(tb[NHA_HW_STATS_ENABLE]);
+
/* no other attributes should be set */
goto out;
}
@@ -2871,25 +3138,6 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
goto out;
}
- if (!cfg->nh_fdb && tb[NHA_OIF]) {
- cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
- if (cfg->nh_ifindex)
- cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
-
- if (!cfg->dev) {
- NL_SET_ERR_MSG(extack, "Invalid device index");
- goto out;
- } else if (!(cfg->dev->flags & IFF_UP)) {
- NL_SET_ERR_MSG(extack, "Nexthop device is not up");
- err = -ENETDOWN;
- goto out;
- } else if (!netif_carrier_ok(cfg->dev)) {
- NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
- err = -ENETDOWN;
- goto out;
- }
- }
-
err = -EINVAL;
if (tb[NHA_GATEWAY]) {
struct nlattr *gwa = tb[NHA_GATEWAY];
@@ -2941,34 +3189,92 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
goto out;
}
+ if (tb[NHA_HW_STATS_ENABLE]) {
+ NL_SET_ERR_MSG(extack, "Cannot enable nexthop hardware statistics for non-group nexthops");
+ goto out;
+ }
err = 0;
out:
return err;
}
+static int rtm_to_nh_config_rtnl(struct net *net, struct nlattr **tb,
+ struct nh_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ if (tb[NHA_GROUP])
+ return nh_check_attr_group_rtnl(net, tb, extack);
+
+ if (tb[NHA_OIF]) {
+ cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
+ if (cfg->nh_ifindex)
+ cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
+
+ if (!cfg->dev) {
+ NL_SET_ERR_MSG(extack, "Invalid device index");
+ return -EINVAL;
+ }
+
+ if (!(cfg->dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ return -ENETDOWN;
+ }
+
+ if (!netif_carrier_ok(cfg->dev)) {
+ NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
+ return -ENETDOWN;
+ }
+ }
+
+ return 0;
+}
+
/* rtnl */
static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
struct net *net = sock_net(skb->sk);
struct nh_config cfg;
struct nexthop *nh;
int err;
- err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
- if (!err) {
- nh = nexthop_add(net, &cfg, extack);
- if (IS_ERR(nh))
- err = PTR_ERR(nh);
+ err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+ ARRAY_SIZE(rtm_nh_policy_new) - 1,
+ rtm_nh_policy_new, extack);
+ if (err < 0)
+ goto out;
+
+ err = rtm_to_nh_config(net, skb, nlh, tb, &cfg, extack);
+ if (err)
+ goto out;
+
+ if (cfg.nlflags & NLM_F_REPLACE && !cfg.nh_id) {
+ NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
+ err = -EINVAL;
+ goto out;
}
+ rtnl_net_lock(net);
+
+ err = rtm_to_nh_config_rtnl(net, tb, &cfg, extack);
+ if (err)
+ goto unlock;
+
+ nh = nexthop_add(net, &cfg, extack);
+ if (IS_ERR(nh))
+ err = PTR_ERR(nh);
+
+unlock:
+ rtnl_net_unlock(net);
+out:
return err;
}
-static int __nh_valid_get_del_req(const struct nlmsghdr *nlh,
- struct nlattr **tb, u32 *id,
- struct netlink_ext_ack *extack)
+static int nh_valid_get_del_req(const struct nlmsghdr *nlh,
+ struct nlattr **tb, u32 *id, u32 *op_flags,
+ struct netlink_ext_ack *extack)
{
struct nhmsg *nhm = nlmsg_data(nlh);
@@ -2988,28 +3294,17 @@ static int __nh_valid_get_del_req(const struct nlmsghdr *nlh,
return -EINVAL;
}
- return 0;
-}
-
-static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)];
- int err;
-
- err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
- ARRAY_SIZE(rtm_nh_policy_get) - 1,
- rtm_nh_policy_get, extack);
- if (err < 0)
- return err;
+ if (op_flags)
+ *op_flags = nla_get_u32_default(tb[NHA_OP_FLAGS], 0);
- return __nh_valid_get_del_req(nlh, tb, id, extack);
+ return 0;
}
/* rtnl */
static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_del)];
struct net *net = sock_net(skb->sk);
struct nl_info nlinfo = {
.nlh = nlh,
@@ -3020,30 +3315,48 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
int err;
u32 id;
- err = nh_valid_get_del_req(nlh, &id, extack);
+ err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+ ARRAY_SIZE(rtm_nh_policy_del) - 1, rtm_nh_policy_del,
+ extack);
+ if (err < 0)
+ return err;
+
+ err = nh_valid_get_del_req(nlh, tb, &id, NULL, extack);
if (err)
return err;
+ rtnl_net_lock(net);
+
nh = nexthop_find_by_id(net, id);
- if (!nh)
- return -ENOENT;
+ if (nh)
+ remove_nexthop(net, nh, &nlinfo);
+ else
+ err = -ENOENT;
- remove_nexthop(net, nh, &nlinfo);
+ rtnl_net_unlock(net);
- return 0;
+ return err;
}
/* rtnl */
static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)];
struct net *net = sock_net(in_skb->sk);
struct sk_buff *skb = NULL;
struct nexthop *nh;
+ u32 op_flags;
int err;
u32 id;
- err = nh_valid_get_del_req(nlh, &id, extack);
+ err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+ ARRAY_SIZE(rtm_nh_policy_get) - 1, rtm_nh_policy_get,
+ extack);
+ if (err < 0)
+ return err;
+
+ err = nh_valid_get_del_req(nlh, tb, &id, &op_flags, extack);
if (err)
return err;
@@ -3058,7 +3371,7 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout_free;
err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0);
+ nlh->nlmsg_seq, 0, op_flags);
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
goto errout_free;
@@ -3079,6 +3392,7 @@ struct nh_dump_filter {
bool group_filter;
bool fdb_filter;
u32 res_bucket_nh_id;
+ u32 op_flags;
};
static bool nh_dump_filtered(struct nexthop *nh,
@@ -3166,6 +3480,8 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh,
if (err < 0)
return err;
+ filter->op_flags = nla_get_u32_default(tb[NHA_OP_FLAGS], 0);
+
return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
}
@@ -3223,7 +3539,7 @@ static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb,
return nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI);
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, filter->op_flags);
}
/* rtnl */
@@ -3241,10 +3557,6 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
err = rtm_dump_walk_nexthops(skb, cb, root, ctx,
&rtm_dump_nexthop_cb, &filter);
- if (err < 0) {
- if (likely(skb->len))
- err = skb->len;
- }
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -3439,11 +3751,6 @@ static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
&rtm_dump_nexthop_bucket_cb, &dd);
}
- if (err < 0) {
- if (likely(skb->len))
- err = skb->len;
- }
-
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
return err;
@@ -3483,7 +3790,7 @@ static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh,
if (err < 0)
return err;
- err = __nh_valid_get_del_req(nlh, tb, id, extack);
+ err = nh_valid_get_del_req(nlh, tb, id, NULL, extack);
if (err)
return err;
@@ -3631,17 +3938,24 @@ unlock:
}
EXPORT_SYMBOL(register_nexthop_notifier);
-int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
+int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
{
int err;
- rtnl_lock();
err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
nb);
- if (err)
- goto unlock;
- nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
-unlock:
+ if (!err)
+ nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
+ return err;
+}
+EXPORT_SYMBOL(__unregister_nexthop_notifier);
+
+int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
+{
+ int err;
+
+ rtnl_lock();
+ err = __unregister_nexthop_notifier(net, nb);
rtnl_unlock();
return err;
}
@@ -3737,16 +4051,17 @@ out:
}
EXPORT_SYMBOL(nexthop_res_grp_activity_update);
-static void __net_exit nexthop_net_exit_batch(struct list_head *net_list)
+static void __net_exit nexthop_net_exit_rtnl(struct net *net,
+ struct list_head *dev_to_kill)
{
- struct net *net;
+ ASSERT_RTNL_NET(net);
+ flush_all_nexthops(net);
+}
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list) {
- flush_all_nexthops(net);
- kfree(net->nexthop.devhash);
- }
- rtnl_unlock();
+static void __net_exit nexthop_net_exit(struct net *net)
+{
+ kfree(net->nexthop.devhash);
+ net->nexthop.devhash = NULL;
}
static int __net_init nexthop_net_init(struct net *net)
@@ -3764,7 +4079,27 @@ static int __net_init nexthop_net_init(struct net *net)
static struct pernet_operations nexthop_net_ops = {
.init = nexthop_net_init,
- .exit_batch = nexthop_net_exit_batch,
+ .exit = nexthop_net_exit,
+ .exit_rtnl = nexthop_net_exit_rtnl,
+};
+
+static const struct rtnl_msg_handler nexthop_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop,
+ .flags = RTNL_FLAG_DOIT_PERNET},
+ {.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop,
+ .flags = RTNL_FLAG_DOIT_PERNET},
+ {.msgtype = RTM_GETNEXTHOP, .doit = rtm_get_nexthop,
+ .dumpit = rtm_dump_nexthop},
+ {.msgtype = RTM_GETNEXTHOPBUCKET, .doit = rtm_get_nexthop_bucket,
+ .dumpit = rtm_dump_nexthop_bucket},
+ {.protocol = PF_INET, .msgtype = RTM_NEWNEXTHOP,
+ .doit = rtm_new_nexthop, .flags = RTNL_FLAG_DOIT_PERNET},
+ {.protocol = PF_INET, .msgtype = RTM_GETNEXTHOP,
+ .dumpit = rtm_dump_nexthop},
+ {.protocol = PF_INET6, .msgtype = RTM_NEWNEXTHOP,
+ .doit = rtm_new_nexthop, .flags = RTNL_FLAG_DOIT_PERNET},
+ {.protocol = PF_INET6, .msgtype = RTM_GETNEXTHOP,
+ .dumpit = rtm_dump_nexthop},
};
static int __init nexthop_init(void)
@@ -3773,19 +4108,7 @@ static int __init nexthop_init(void)
register_netdevice_notifier(&nh_netdev_notifier);
- rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,
- rtm_dump_nexthop, 0);
-
- rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
- rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
-
- rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
- rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
-
- rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, rtm_get_nexthop_bucket,
- rtm_dump_nexthop_bucket, 0);
+ rtnl_register_many(nexthop_rtnl_msg_handlers);
return 0;
}