diff options
Diffstat (limited to 'net/core/dst.c')
| -rw-r--r-- | net/core/dst.c | 181 |
1 files changed, 94 insertions, 87 deletions
diff --git a/net/core/dst.c b/net/core/dst.c index a263309df115..e9d35f49c9e7 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * net/core/dst.c Protocol independent destination cache. * @@ -26,23 +27,6 @@ #include <net/dst.h> #include <net/dst_metadata.h> -/* - * Theory of operations: - * 1) We use a list, protected by a spinlock, to add - * new entries from both BH and non-BH context. - * 2) In order to keep spinlock held for a small delay, - * we use a second list where are stored long lived - * entries, that are handled by the garbage collect thread - * fired by a workqueue. - * 3) This list is guarded by a mutex, - * so that the gc_task and dst_dev_event() can be synchronized. - */ - -/* - * We want to keep lock & list close together - * to dirty as few cache lines as possible in __dst_free(). - * As this is not a very strong hint, we dont force an alignment on SMP. - */ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); @@ -61,12 +45,11 @@ const struct dst_metrics dst_default_metrics = { EXPORT_SYMBOL(dst_default_metrics); void dst_init(struct dst_entry *dst, struct dst_ops *ops, - struct net_device *dev, int initial_ref, int initial_obsolete, + struct net_device *dev, int initial_obsolete, unsigned short flags) { dst->dev = dev; - if (dev) - dev_hold(dev); + netdev_hold(dev, &dst->dev_tracker, GFP_ATOMIC); dst->ops = ops; dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; @@ -83,7 +66,8 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, dst->tclassid = 0; #endif dst->lwtstate = NULL; - atomic_set(&dst->__refcnt, initial_ref); + rcuref_init(&dst->__rcuref, 1); + INIT_LIST_HEAD(&dst->rt_uncached); dst->__use = 0; dst->lastuse = jiffies; dst->flags = flags; @@ -93,30 +77,26 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, EXPORT_SYMBOL(dst_init); void *dst_alloc(struct dst_ops *ops, struct net_device *dev, - int initial_ref, int initial_obsolete, unsigned short flags) + int initial_obsolete, unsigned short flags) { struct dst_entry *dst; - if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) { - if (ops->gc(ops)) { - printk_ratelimited(KERN_NOTICE "Route cache is full: " - "consider increasing sysctl " - "net.ipv[4|6].route.max_size.\n"); - return NULL; - } - } + if (ops->gc && + !(flags & DST_NOCOUNT) && + dst_entries_get_fast(ops) > ops->gc_thresh) + ops->gc(ops); dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; - dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags); + dst_init(dst, ops, dev, initial_obsolete, flags); return dst; } EXPORT_SYMBOL(dst_alloc); -struct dst_entry *dst_destroy(struct dst_entry * dst) +static void dst_destroy(struct dst_entry *dst) { struct dst_entry *child = NULL; @@ -129,13 +109,9 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) child = xdst->child; } #endif - if (!(dst->flags & DST_NOCOUNT)) - dst_entries_add(dst->ops, -1); - if (dst->ops->destroy) dst->ops->destroy(dst); - if (dst->dev) - dev_put(dst->dev); + netdev_put(dst->dev, &dst->dev_tracker); lwtstate_put(dst->lwtstate); @@ -147,20 +123,18 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) dst = child; if (dst) dst_release_immediate(dst); - return NULL; } -EXPORT_SYMBOL(dst_destroy); static void dst_destroy_rcu(struct rcu_head *head) { struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - dst = dst_destroy(dst); + dst_destroy(dst); } /* Operations to mark dst as DEAD and clean up the net device referenced * by dst: - * 1. put the dst under loopback interface and discard all tx/rx packets + * 1. put the dst under blackhole interface and discard all tx/rx packets * on this route. * 2. release the net_device * This function should be called when removing routes from the fib tree @@ -171,43 +145,45 @@ void dst_dev_put(struct dst_entry *dst) { struct net_device *dev = dst->dev; - dst->obsolete = DST_OBSOLETE_DEAD; + WRITE_ONCE(dst->obsolete, DST_OBSOLETE_DEAD); if (dst->ops->ifdown) - dst->ops->ifdown(dst, dev, true); - dst->input = dst_discard; - dst->output = dst_discard_out; - dst->dev = dev_net(dst->dev)->loopback_dev; - dev_hold(dst->dev); - dev_put(dev); + dst->ops->ifdown(dst, dev); + WRITE_ONCE(dst->input, dst_discard); + WRITE_ONCE(dst->output, dst_discard_out); + rcu_assign_pointer(dst->dev_rcu, blackhole_netdev); + netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, + GFP_ATOMIC); } EXPORT_SYMBOL(dst_dev_put); +static void dst_count_dec(struct dst_entry *dst) +{ + if (!(dst->flags & DST_NOCOUNT)) + dst_entries_add(dst->ops, -1); +} + void dst_release(struct dst_entry *dst) { - if (dst) { - int newrefcnt; - - newrefcnt = atomic_dec_return(&dst->__refcnt); - if (unlikely(newrefcnt < 0)) - net_warn_ratelimited("%s: dst:%p refcnt:%d\n", - __func__, dst, newrefcnt); - if (!newrefcnt) - call_rcu(&dst->rcu_head, dst_destroy_rcu); + if (dst && rcuref_put(&dst->__rcuref)) { +#ifdef CONFIG_DST_CACHE + if (dst->flags & DST_METADATA) { + struct metadata_dst *md_dst = (struct metadata_dst *)dst; + + if (md_dst->type == METADATA_IP_TUNNEL) + dst_cache_reset_now(&md_dst->u.tun_info.dst_cache); + } +#endif + dst_count_dec(dst); + call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu); } } EXPORT_SYMBOL(dst_release); void dst_release_immediate(struct dst_entry *dst) { - if (dst) { - int newrefcnt; - - newrefcnt = atomic_dec_return(&dst->__refcnt); - if (unlikely(newrefcnt < 0)) - net_warn_ratelimited("%s: dst:%p refcnt:%d\n", - __func__, dst, newrefcnt); - if (!newrefcnt) - dst_destroy(dst); + if (dst && rcuref_put(&dst->__rcuref)) { + dst_count_dec(dst); + dst_destroy(dst); } } EXPORT_SYMBOL(dst_release_immediate); @@ -253,37 +229,62 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) } EXPORT_SYMBOL(__dst_destroy_metrics_generic); -static struct dst_ops md_dst_ops = { - .family = AF_UNSPEC, -}; +struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie) +{ + return NULL; +} -static int dst_md_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) +u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old) { - WARN_ONCE(1, "Attempting to call output on metadata dst\n"); - kfree_skb(skb); - return 0; + return NULL; } -static int dst_md_discard(struct sk_buff *skb) +struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) +{ + return NULL; +} + +void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) +{ +} +EXPORT_SYMBOL_GPL(dst_blackhole_update_pmtu); + +void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { - WARN_ONCE(1, "Attempting to call input on metadata dst\n"); - kfree_skb(skb); - return 0; } +EXPORT_SYMBOL_GPL(dst_blackhole_redirect); + +unsigned int dst_blackhole_mtu(const struct dst_entry *dst) +{ + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst_dev(dst)->mtu; +} +EXPORT_SYMBOL_GPL(dst_blackhole_mtu); + +static struct dst_ops dst_blackhole_ops = { + .family = AF_UNSPEC, + .neigh_lookup = dst_blackhole_neigh_lookup, + .check = dst_blackhole_check, + .cow_metrics = dst_blackhole_cow_metrics, + .update_pmtu = dst_blackhole_update_pmtu, + .redirect = dst_blackhole_redirect, + .mtu = dst_blackhole_mtu, +}; static void __metadata_dst_init(struct metadata_dst *md_dst, enum metadata_type type, u8 optslen) - { struct dst_entry *dst; dst = &md_dst->dst; - dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE, + dst_init(dst, &dst_blackhole_ops, NULL, DST_OBSOLETE_NONE, DST_METADATA | DST_NOCOUNT); - - dst->input = dst_md_discard; - dst->output = dst_md_discard_out; - memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); md_dst->type = type; } @@ -293,7 +294,8 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, { struct metadata_dst *md_dst; - md_dst = kmalloc(sizeof(*md_dst) + optslen, flags); + md_dst = kmalloc(struct_size(md_dst, u.tun_info.options, optslen), + flags); if (!md_dst) return NULL; @@ -309,6 +311,8 @@ void metadata_dst_free(struct metadata_dst *md_dst) if (md_dst->type == METADATA_IP_TUNNEL) dst_cache_destroy(&md_dst->u.tun_info.dst_cache); #endif + if (md_dst->type == METADATA_XFRM) + dst_release(md_dst->u.xfrm_info.dst_orig); kfree(md_dst); } EXPORT_SYMBOL_GPL(metadata_dst_free); @@ -319,7 +323,8 @@ metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags) int cpu; struct metadata_dst __percpu *md_dst; - md_dst = __alloc_percpu_gfp(sizeof(struct metadata_dst) + optslen, + md_dst = __alloc_percpu_gfp(struct_size(md_dst, u.tun_info.options, + optslen), __alignof__(struct metadata_dst), flags); if (!md_dst) return NULL; @@ -333,16 +338,18 @@ EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu); void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst) { -#ifdef CONFIG_DST_CACHE int cpu; for_each_possible_cpu(cpu) { struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu); +#ifdef CONFIG_DST_CACHE if (one_md_dst->type == METADATA_IP_TUNNEL) dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache); - } #endif + if (one_md_dst->type == METADATA_XFRM) + dst_release(one_md_dst->u.xfrm_info.dst_orig); + } free_percpu(md_dst); } EXPORT_SYMBOL_GPL(metadata_dst_free_percpu); |
