diff options
Diffstat (limited to 'net/ipv4/nexthop.c')
-rw-r--r-- | net/ipv4/nexthop.c | 449 |
1 files changed, 361 insertions, 88 deletions
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index bbff68b5b5d4..09a3d73b45ba 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -26,6 +26,9 @@ static void remove_nexthop(struct net *net, struct nexthop *nh, #define NH_DEV_HASHBITS 8 #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS) +#define NHA_OP_FLAGS_DUMP_ALL (NHA_OP_FLAG_DUMP_STATS | \ + NHA_OP_FLAG_DUMP_HW_STATS) + static const struct nla_policy rtm_nh_policy_new[] = { [NHA_ID] = { .type = NLA_U32 }, [NHA_GROUP] = { .type = NLA_BINARY }, @@ -37,10 +40,17 @@ static const struct nla_policy rtm_nh_policy_new[] = { [NHA_ENCAP] = { .type = NLA_NESTED }, [NHA_FDB] = { .type = NLA_FLAG }, [NHA_RES_GROUP] = { .type = NLA_NESTED }, + [NHA_HW_STATS_ENABLE] = NLA_POLICY_MAX(NLA_U32, true), }; static const struct nla_policy rtm_nh_policy_get[] = { [NHA_ID] = { .type = NLA_U32 }, + [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, + NHA_OP_FLAGS_DUMP_ALL), +}; + +static const struct nla_policy rtm_nh_policy_del[] = { + [NHA_ID] = { .type = NLA_U32 }, }; static const struct nla_policy rtm_nh_policy_dump[] = { @@ -48,6 +58,8 @@ static const struct nla_policy rtm_nh_policy_dump[] = { [NHA_GROUPS] = { .type = NLA_FLAG }, [NHA_MASTER] = { .type = NLA_U32 }, [NHA_FDB] = { .type = NLA_FLAG }, + [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, + NHA_OP_FLAGS_DUMP_ALL), }; static const struct nla_policy rtm_nh_res_policy_new[] = { @@ -92,6 +104,7 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info, else if (nh_info->gw_family == AF_INET6) nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6; + nh_info->id = nhi->nh_parent->id; nh_info->is_reject = nhi->reject_nh; nh_info->is_fdb = nhi->fdb_nh; nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate; @@ -131,13 +144,13 @@ static int nh_notifier_mpath_info_init(struct nh_notifier_info *info, info->nh_grp->num_nh = num_nh; info->nh_grp->is_fdb = nhg->fdb_nh; + info->nh_grp->hw_stats = nhg->hw_stats; for (i = 0; i < num_nh; i++) { struct nh_grp_entry *nhge = &nhg->nh_entries[i]; struct nh_info *nhi; nhi = rtnl_dereference(nhge->nh->nh_info); - info->nh_grp->nh_entries[i].id = nhge->nh->id; info->nh_grp->nh_entries[i].weight = nhge->weight; __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh, nhi); @@ -162,6 +175,7 @@ static int nh_notifier_res_table_info_init(struct nh_notifier_info *info, return -ENOMEM; info->nh_res_table->num_nh_buckets = num_nh_buckets; + info->nh_res_table->hw_stats = nhg->hw_stats; for (i = 0; i < num_nh_buckets; i++) { struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; @@ -393,6 +407,7 @@ static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh, struct nh_notifier_info info = { .net = net, .extack = extack, + .id = nh->id, }; struct nh_group *nhg; int err; @@ -474,6 +489,7 @@ static void nexthop_free_group(struct nexthop *nh) struct nh_grp_entry *nhge = &nhg->nh_entries[i]; WARN_ON(!list_empty(&nhge->nh_list)); + free_percpu(nhge->stats); nexthop_put(nhge->nh); } @@ -654,14 +670,213 @@ nla_put_failure: return -EMSGSIZE; } -static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) +static void nh_grp_entry_stats_inc(struct nh_grp_entry *nhge) +{ + struct nh_grp_entry_stats *cpu_stats; + + cpu_stats = get_cpu_ptr(nhge->stats); + u64_stats_update_begin(&cpu_stats->syncp); + u64_stats_inc(&cpu_stats->packets); + u64_stats_update_end(&cpu_stats->syncp); + put_cpu_ptr(cpu_stats); +} + +static void nh_grp_entry_stats_read(struct nh_grp_entry *nhge, + u64 *ret_packets) +{ + int i; + + *ret_packets = 0; + + for_each_possible_cpu(i) { + struct nh_grp_entry_stats *cpu_stats; + unsigned int start; + u64 packets; + + cpu_stats = per_cpu_ptr(nhge->stats, i); + do { + start = u64_stats_fetch_begin(&cpu_stats->syncp); + packets = u64_stats_read(&cpu_stats->packets); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + *ret_packets += packets; + } +} + +static int nh_notifier_grp_hw_stats_init(struct nh_notifier_info *info, + const struct nexthop *nh) { + struct nh_group *nhg; + int i; + + ASSERT_RTNL(); + nhg = rtnl_dereference(nh->nh_grp); + + info->id = nh->id; + info->type = NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS; + info->nh_grp_hw_stats = kzalloc(struct_size(info->nh_grp_hw_stats, + stats, nhg->num_nh), + GFP_KERNEL); + if (!info->nh_grp_hw_stats) + return -ENOMEM; + + info->nh_grp_hw_stats->num_nh = nhg->num_nh; + for (i = 0; i < nhg->num_nh; i++) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; + + info->nh_grp_hw_stats->stats[i].id = nhge->nh->id; + } + + return 0; +} + +static void nh_notifier_grp_hw_stats_fini(struct nh_notifier_info *info) +{ + kfree(info->nh_grp_hw_stats); +} + +void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info, + unsigned int nh_idx, + u64 delta_packets) +{ + info->hw_stats_used = true; + info->stats[nh_idx].packets += delta_packets; +} +EXPORT_SYMBOL(nh_grp_hw_stats_report_delta); + +static void nh_grp_hw_stats_apply_update(struct nexthop *nh, + struct nh_notifier_info *info) +{ + struct nh_group *nhg; + int i; + + ASSERT_RTNL(); + nhg = rtnl_dereference(nh->nh_grp); + + for (i = 0; i < nhg->num_nh; i++) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; + + nhge->packets_hw += info->nh_grp_hw_stats->stats[i].packets; + } +} + +static int nh_grp_hw_stats_update(struct nexthop *nh, bool *hw_stats_used) +{ + struct nh_notifier_info info = { + .net = nh->net, + }; + struct net *net = nh->net; + int err; + + if (nexthop_notifiers_is_empty(net)) { + *hw_stats_used = false; + return 0; + } + + err = nh_notifier_grp_hw_stats_init(&info, nh); + if (err) + return err; + + err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, + NEXTHOP_EVENT_HW_STATS_REPORT_DELTA, + &info); + + /* Cache whatever we got, even if there was an error, otherwise the + * successful stats retrievals would get lost. + */ + nh_grp_hw_stats_apply_update(nh, &info); + *hw_stats_used = info.nh_grp_hw_stats->hw_stats_used; + + nh_notifier_grp_hw_stats_fini(&info); + return notifier_to_errno(err); +} + +static int nla_put_nh_group_stats_entry(struct sk_buff *skb, + struct nh_grp_entry *nhge, + u32 op_flags) +{ + struct nlattr *nest; + u64 packets; + + nh_grp_entry_stats_read(nhge, &packets); + + nest = nla_nest_start(skb, NHA_GROUP_STATS_ENTRY); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, NHA_GROUP_STATS_ENTRY_ID, nhge->nh->id) || + nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS, + packets + nhge->packets_hw)) + goto nla_put_failure; + + if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS && + nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS_HW, + nhge->packets_hw)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int nla_put_nh_group_stats(struct sk_buff *skb, struct nexthop *nh, + u32 op_flags) +{ + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + struct nlattr *nest; + bool hw_stats_used; + int err; + int i; + + if (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats)) + goto err_out; + + if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS && + nhg->hw_stats) { + err = nh_grp_hw_stats_update(nh, &hw_stats_used); + if (err) + goto out; + + if (nla_put_u32(skb, NHA_HW_STATS_USED, hw_stats_used)) + goto err_out; + } + + nest = nla_nest_start(skb, NHA_GROUP_STATS); + if (!nest) + goto err_out; + + for (i = 0; i < nhg->num_nh; i++) + if (nla_put_nh_group_stats_entry(skb, &nhg->nh_entries[i], + op_flags)) + goto cancel_out; + + nla_nest_end(skb, nest); + return 0; + +cancel_out: + nla_nest_cancel(skb, nest); +err_out: + err = -EMSGSIZE; +out: + return err; +} + +static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh, + u32 op_flags, u32 *resp_op_flags) +{ + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); struct nexthop_grp *p; size_t len = nhg->num_nh * sizeof(*p); struct nlattr *nla; u16 group_type = 0; + u16 weight; int i; + *resp_op_flags |= NHA_OP_FLAG_RESP_GRP_RESVD_0; + if (nhg->hash_threshold) group_type = NEXTHOP_GRP_TYPE_MPATH; else if (nhg->resilient) @@ -676,14 +891,23 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) p = nla_data(nla); for (i = 0; i < nhg->num_nh; ++i) { - p->id = nhg->nh_entries[i].nh->id; - p->weight = nhg->nh_entries[i].weight - 1; - p += 1; + weight = nhg->nh_entries[i].weight - 1; + + *p++ = (struct nexthop_grp) { + .id = nhg->nh_entries[i].nh->id, + .weight = weight, + .weight_high = weight >> 8, + }; } if (nhg->resilient && nla_put_nh_group_res(skb, nhg)) goto nla_put_failure; + if (op_flags & NHA_OP_FLAG_DUMP_STATS && + (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats) || + nla_put_nh_group_stats(skb, nh, op_flags))) + goto nla_put_failure; + return 0; nla_put_failure: @@ -691,7 +915,8 @@ nla_put_failure: } static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, - int event, u32 portid, u32 seq, unsigned int nlflags) + int event, u32 portid, u32 seq, unsigned int nlflags, + u32 op_flags) { struct fib6_nh *fib6_nh; struct fib_nh *fib_nh; @@ -715,10 +940,12 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nh->is_group) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + u32 resp_op_flags = 0; if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB)) goto nla_put_failure; - if (nla_put_nh_group(skb, nhg)) + if (nla_put_nh_group(skb, nh, op_flags, &resp_op_flags) || + nla_put_u32(skb, NHA_OP_FLAGS, resp_op_flags)) goto nla_put_failure; goto out; } @@ -831,7 +1058,9 @@ static size_t nh_nlmsg_size(struct nexthop *nh) sz += nla_total_size(4); /* NHA_ID */ if (nh->is_group) - sz += nh_nlmsg_size_grp(nh); + sz += nh_nlmsg_size_grp(nh) + + nla_total_size(4) + /* NHA_OP_FLAGS */ + 0; else sz += nh_nlmsg_size_single(nh); @@ -849,7 +1078,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info) if (!skb) goto errout; - err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags); + err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags, 0); if (err < 0) { /* -EMSGSIZE implies BUG in nh_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -861,8 +1090,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info) info->nlh, gfp_any()); return; errout: - if (err < 0) - rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err); + rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err); } static unsigned long nh_res_bucket_used_time(const struct nh_res_bucket *bucket) @@ -982,8 +1210,7 @@ static void nexthop_bucket_notify(struct nh_res_table *res_table, rtnl_notify(skb, nh->net, 0, RTNLGRP_NEXTHOP, NULL, GFP_KERNEL); return; errout: - if (err < 0) - rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err); + rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err); } static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, @@ -1061,11 +1288,14 @@ static int nh_check_attr_group(struct net *net, nhg = nla_data(tb[NHA_GROUP]); for (i = 0; i < len; ++i) { - if (nhg[i].resvd1 || nhg[i].resvd2) { - NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0"); + if (nhg[i].resvd2) { + NL_SET_ERR_MSG(extack, "Reserved field in nexthop_grp must be 0"); return -EINVAL; } - if (nhg[i].weight > 254) { + if (nexthop_grp_weight(&nhg[i]) == 0) { + /* 0xffff got passed in, representing weight of 0x10000, + * which is too heavy. + */ NL_SET_ERR_MSG(extack, "Invalid value for weight"); return -EINVAL; } @@ -1104,6 +1334,7 @@ static int nh_check_attr_group(struct net *net, if (!tb[i]) continue; switch (i) { + case NHA_HW_STATS_ENABLE: case NHA_FDB: continue; case NHA_RES_GROUP: @@ -1176,6 +1407,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash) if (hash > atomic_read(&nhge->hthr.upper_bound)) continue; + nh_grp_entry_stats_inc(nhge); return nhge->nh; } @@ -1185,7 +1417,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash) static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash) { - struct nexthop *rc = NULL; + struct nh_grp_entry *nhge0 = NULL; int i; if (nhg->fdb_nh) @@ -1200,16 +1432,20 @@ static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash) if (!nexthop_is_good_nh(nhge->nh)) continue; - if (!rc) - rc = nhge->nh; + if (!nhge0) + nhge0 = nhge; if (hash > atomic_read(&nhge->hthr.upper_bound)) continue; + nh_grp_entry_stats_inc(nhge); return nhge->nh; } - return rc ? : nhg->nh_entries[0].nh; + if (!nhge0) + nhge0 = &nhg->nh_entries[0]; + nh_grp_entry_stats_inc(nhge0); + return nhge0->nh; } static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash) @@ -1225,6 +1461,7 @@ static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash) bucket = &res_table->nh_buckets[bucket_index]; nh_res_bucket_set_busy(bucket); nhge = rcu_dereference(bucket->nh_entry); + nh_grp_entry_stats_inc(nhge); return nhge->nh; } @@ -1654,9 +1891,9 @@ static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table) static void nh_res_group_rebalance(struct nh_group *nhg, struct nh_res_table *res_table) { - int prev_upper_bound = 0; - int total = 0; - int w = 0; + u16 prev_upper_bound = 0; + u32 total = 0; + u32 w = 0; int i; INIT_LIST_HEAD(&res_table->uw_nh_entries); @@ -1666,11 +1903,12 @@ static void nh_res_group_rebalance(struct nh_group *nhg, for (i = 0; i < nhg->num_nh; ++i) { struct nh_grp_entry *nhge = &nhg->nh_entries[i]; - int upper_bound; + u16 upper_bound; + u64 btw; w += nhge->weight; - upper_bound = DIV_ROUND_CLOSEST(res_table->num_nh_buckets * w, - total); + btw = ((u64)res_table->num_nh_buckets) * w; + upper_bound = DIV_ROUND_CLOSEST_ULL(btw, total); nhge->res.wants_buckets = upper_bound - prev_upper_bound; prev_upper_bound = upper_bound; @@ -1736,8 +1974,8 @@ static void replace_nexthop_grp_res(struct nh_group *oldg, static void nh_hthr_group_rebalance(struct nh_group *nhg) { - int total = 0; - int w = 0; + u32 total = 0; + u32 w = 0; int i; for (i = 0; i < nhg->num_nh; ++i) @@ -1745,7 +1983,7 @@ static void nh_hthr_group_rebalance(struct nh_group *nhg) for (i = 0; i < nhg->num_nh; ++i) { struct nh_grp_entry *nhge = &nhg->nh_entries[i]; - int upper_bound; + u32 upper_bound; w += nhge->weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; @@ -1798,6 +2036,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, newg->has_v4 = true; list_del(&nhges[i].nh_list); + new_nhges[j].stats = nhges[i].stats; new_nhges[j].nh_parent = nhges[i].nh_parent; new_nhges[j].nh = nhges[i].nh; new_nhges[j].weight = nhges[i].weight; @@ -1813,6 +2052,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, rcu_assign_pointer(nhp->nh_grp, newg); list_del(&nhge->nh_list); + free_percpu(nhge->stats); nexthop_put(nhge->nh); /* Removal of a NH from a resilient group is notified through @@ -2477,8 +2717,16 @@ static struct nexthop *nexthop_create_group(struct net *net, if (nhi->family == AF_INET) nhg->has_v4 = true; + nhg->nh_entries[i].stats = + netdev_alloc_pcpu_stats(struct nh_grp_entry_stats); + if (!nhg->nh_entries[i].stats) { + err = -ENOMEM; + nexthop_put(nhe); + goto out_no_nh; + } nhg->nh_entries[i].nh = nhe; - nhg->nh_entries[i].weight = entry[i].weight + 1; + nhg->nh_entries[i].weight = nexthop_grp_weight(&entry[i]); + list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list); nhg->nh_entries[i].nh_parent = nh; } @@ -2509,6 +2757,9 @@ static struct nexthop *nexthop_create_group(struct net *net, if (cfg->nh_fdb) nhg->fdb_nh = 1; + if (cfg->nh_hw_stats) + nhg->hw_stats = true; + rcu_assign_pointer(nh->nh_grp, nhg); return nh; @@ -2516,6 +2767,7 @@ static struct nexthop *nexthop_create_group(struct net *net, out_no_nh: for (i--; i >= 0; --i) { list_del(&nhg->nh_entries[i].nh_list); + free_percpu(nhg->nh_entries[i].stats); nexthop_put(nhg->nh_entries[i].nh); } @@ -2850,6 +3102,9 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, err = rtm_to_nh_config_grp_res(tb[NHA_RES_GROUP], cfg, extack); + if (tb[NHA_HW_STATS_ENABLE]) + cfg->nh_hw_stats = nla_get_u32(tb[NHA_HW_STATS_ENABLE]); + /* no other attributes should be set */ goto out; } @@ -2941,6 +3196,10 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, goto out; } + if (tb[NHA_HW_STATS_ENABLE]) { + NL_SET_ERR_MSG(extack, "Cannot enable nexthop hardware statistics for non-group nexthops"); + goto out; + } err = 0; out: @@ -2966,9 +3225,9 @@ static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } -static int __nh_valid_get_del_req(const struct nlmsghdr *nlh, - struct nlattr **tb, u32 *id, - struct netlink_ext_ack *extack) +static int nh_valid_get_del_req(const struct nlmsghdr *nlh, + struct nlattr **tb, u32 *id, u32 *op_flags, + struct netlink_ext_ack *extack) { struct nhmsg *nhm = nlmsg_data(nlh); @@ -2988,28 +3247,17 @@ static int __nh_valid_get_del_req(const struct nlmsghdr *nlh, return -EINVAL; } - return 0; -} - -static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id, - struct netlink_ext_ack *extack) -{ - struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; - int err; - - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, - ARRAY_SIZE(rtm_nh_policy_get) - 1, - rtm_nh_policy_get, extack); - if (err < 0) - return err; + if (op_flags) + *op_flags = nla_get_u32_default(tb[NHA_OP_FLAGS], 0); - return __nh_valid_get_del_req(nlh, tb, id, extack); + return 0; } /* rtnl */ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_del)]; struct net *net = sock_net(skb->sk); struct nl_info nlinfo = { .nlh = nlh, @@ -3020,7 +3268,13 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, int err; u32 id; - err = nh_valid_get_del_req(nlh, &id, extack); + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_del) - 1, rtm_nh_policy_del, + extack); + if (err < 0) + return err; + + err = nh_valid_get_del_req(nlh, tb, &id, NULL, extack); if (err) return err; @@ -3037,13 +3291,21 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; struct net *net = sock_net(in_skb->sk); struct sk_buff *skb = NULL; struct nexthop *nh; + u32 op_flags; int err; u32 id; - err = nh_valid_get_del_req(nlh, &id, extack); + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_get) - 1, rtm_nh_policy_get, + extack); + if (err < 0) + return err; + + err = nh_valid_get_del_req(nlh, tb, &id, &op_flags, extack); if (err) return err; @@ -3058,7 +3320,7 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout_free; err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0); + nlh->nlmsg_seq, 0, op_flags); if (err < 0) { WARN_ON(err == -EMSGSIZE); goto errout_free; @@ -3079,6 +3341,7 @@ struct nh_dump_filter { bool group_filter; bool fdb_filter; u32 res_bucket_nh_id; + u32 op_flags; }; static bool nh_dump_filtered(struct nexthop *nh, @@ -3166,6 +3429,8 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, if (err < 0) return err; + filter->op_flags = nla_get_u32_default(tb[NHA_OP_FLAGS], 0); + return __nh_valid_dump_req(nlh, tb, filter, cb->extack); } @@ -3223,7 +3488,7 @@ static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb, return nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI); + cb->nlh->nlmsg_seq, NLM_F_MULTI, filter->op_flags); } /* rtnl */ @@ -3241,10 +3506,6 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) err = rtm_dump_walk_nexthops(skb, cb, root, ctx, &rtm_dump_nexthop_cb, &filter); - if (err < 0) { - if (likely(skb->len)) - err = skb->len; - } cb->seq = net->nexthop.seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -3439,11 +3700,6 @@ static int rtm_dump_nexthop_bucket(struct sk_buff *skb, &rtm_dump_nexthop_bucket_cb, &dd); } - if (err < 0) { - if (likely(skb->len)) - err = skb->len; - } - cb->seq = net->nexthop.seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); return err; @@ -3483,7 +3739,7 @@ static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh, if (err < 0) return err; - err = __nh_valid_get_del_req(nlh, tb, id, extack); + err = nh_valid_get_del_req(nlh, tb, id, NULL, extack); if (err) return err; @@ -3631,17 +3887,24 @@ unlock: } EXPORT_SYMBOL(register_nexthop_notifier); -int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) +int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) { int err; - rtnl_lock(); err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain, nb); - if (err) - goto unlock; - nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL); -unlock: + if (!err) + nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL); + return err; +} +EXPORT_SYMBOL(__unregister_nexthop_notifier); + +int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) +{ + int err; + + rtnl_lock(); + err = __unregister_nexthop_notifier(net, nb); rtnl_unlock(); return err; } @@ -3737,16 +4000,20 @@ out: } EXPORT_SYMBOL(nexthop_res_grp_activity_update); -static void __net_exit nexthop_net_exit_batch(struct list_head *net_list) +static void __net_exit nexthop_net_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net *net; - rtnl_lock(); - list_for_each_entry(net, net_list, exit_list) { + ASSERT_RTNL(); + list_for_each_entry(net, net_list, exit_list) flush_all_nexthops(net); - kfree(net->nexthop.devhash); - } - rtnl_unlock(); +} + +static void __net_exit nexthop_net_exit(struct net *net) +{ + kfree(net->nexthop.devhash); + net->nexthop.devhash = NULL; } static int __net_init nexthop_net_init(struct net *net) @@ -3764,7 +4031,25 @@ static int __net_init nexthop_net_init(struct net *net) static struct pernet_operations nexthop_net_ops = { .init = nexthop_net_init, - .exit_batch = nexthop_net_exit_batch, + .exit = nexthop_net_exit, + .exit_batch_rtnl = nexthop_net_exit_batch_rtnl, +}; + +static const struct rtnl_msg_handler nexthop_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop}, + {.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop}, + {.msgtype = RTM_GETNEXTHOP, .doit = rtm_get_nexthop, + .dumpit = rtm_dump_nexthop}, + {.msgtype = RTM_GETNEXTHOPBUCKET, .doit = rtm_get_nexthop_bucket, + .dumpit = rtm_dump_nexthop_bucket}, + {.protocol = PF_INET, .msgtype = RTM_NEWNEXTHOP, + .doit = rtm_new_nexthop}, + {.protocol = PF_INET, .msgtype = RTM_GETNEXTHOP, + .dumpit = rtm_dump_nexthop}, + {.protocol = PF_INET6, .msgtype = RTM_NEWNEXTHOP, + .doit = rtm_new_nexthop}, + {.protocol = PF_INET6, .msgtype = RTM_GETNEXTHOP, + .dumpit = rtm_dump_nexthop}, }; static int __init nexthop_init(void) @@ -3773,19 +4058,7 @@ static int __init nexthop_init(void) register_netdevice_notifier(&nh_netdev_notifier); - rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop, - rtm_dump_nexthop, 0); - - rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); - rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); - - rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); - rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); - - rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, rtm_get_nexthop_bucket, - rtm_dump_nexthop_bucket, 0); + rtnl_register_many(nexthop_rtnl_msg_handlers); return 0; } |