summaryrefslogtreecommitdiff
path: root/net/core/neighbour.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/neighbour.c')
-rw-r--r--net/core/neighbour.c1820
1 files changed, 1024 insertions, 796 deletions
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4230400b9a30..96a3b1a93252 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Generic address resolution entity
*
@@ -5,11 +6,6 @@
* Pedro Roque <roque@di.fc.ul.pt>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Fixes:
* Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
* Harald Welte Add neighbour cache statistics like rtstat
@@ -18,7 +14,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/slab.h>
-#include <linux/kmemleak.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -31,7 +26,9 @@
#include <linux/times.h>
#include <net/net_namespace.h>
#include <net/neighbour.h>
+#include <net/arp.h>
#include <net/dst.h>
+#include <net/ip.h>
#include <net/sock.h>
#include <net/netevent.h>
#include <net/netlink.h>
@@ -42,7 +39,8 @@
#include <linux/inetdevice.h>
#include <net/addrconf.h>
-#define DEBUG
+#include <trace/events/neigh.h>
+
#define NEIGH_DEBUG 1
#define neigh_dbg(level, fmt, ...) \
do { \
@@ -56,15 +54,34 @@ static void neigh_timer_handler(struct timer_list *t);
static void __neigh_notify(struct neighbour *n, int type, int flags,
u32 pid);
static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
-static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
- struct net_device *dev);
+static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+ bool skip_perm);
#ifdef CONFIG_PROC_FS
static const struct seq_operations neigh_stat_seq_ops;
#endif
+static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
+{
+ int i;
+
+ switch (family) {
+ default:
+ DEBUG_NET_WARN_ON_ONCE(1);
+ fallthrough; /* to avoid panic by null-ptr-deref */
+ case AF_INET:
+ i = NEIGH_ARP_TABLE;
+ break;
+ case AF_INET6:
+ i = NEIGH_ND_TABLE;
+ break;
+ }
+
+ return &dev->neighbours[i];
+}
+
/*
- Neighbour hash table buckets are protected with rwlock tbl->lock.
+ Neighbour hash table buckets are protected with tbl->lock.
- All the scans/updates to hash buckets MUST be made under this lock.
- NOTHING clever should be made under this lock: no callbacks
@@ -99,9 +116,7 @@ static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
static void neigh_cleanup_and_release(struct neighbour *neigh)
{
- if (neigh->parms->neigh_cleanup)
- neigh->parms->neigh_cleanup(neigh);
-
+ trace_neigh_cleanup_and_release(neigh, 0);
__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
neigh_release(neigh);
@@ -115,7 +130,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
unsigned long neigh_rand_reach_time(unsigned long base)
{
- return base ? (prandom_u32() % base) + (base >> 1) : 0;
+ return base ? get_random_u32_below(base) + (base >> 1) : 0;
}
EXPORT_SYMBOL(neigh_rand_reach_time);
@@ -126,20 +141,25 @@ static void neigh_mark_dead(struct neighbour *n)
list_del_init(&n->gc_list);
atomic_dec(&n->tbl->gc_entries);
}
+ if (!list_empty(&n->managed_list))
+ list_del_init(&n->managed_list);
}
static void neigh_update_gc_list(struct neighbour *n)
{
bool on_gc_list, exempt_from_gc;
- write_lock_bh(&n->tbl->lock);
+ spin_lock_bh(&n->tbl->lock);
write_lock(&n->lock);
+ if (n->dead)
+ goto out;
- /* remove from the gc list if new state is permanent or if neighbor
- * is externally learned; otherwise entry should be on the gc list
+ /* remove from the gc list if new state is permanent or if neighbor is
+ * externally learned / validated; otherwise entry should be on the gc
+ * list
*/
exempt_from_gc = n->nud_state & NUD_PERMANENT ||
- n->flags & NTF_EXT_LEARNED;
+ n->flags & (NTF_EXT_LEARNED | NTF_EXT_VALIDATED);
on_gc_list = !list_empty(&n->gc_list);
if (exempt_from_gc && on_gc_list) {
@@ -150,45 +170,78 @@ static void neigh_update_gc_list(struct neighbour *n)
list_add_tail(&n->gc_list, &n->tbl->gc_list);
atomic_inc(&n->tbl->gc_entries);
}
+out:
+ write_unlock(&n->lock);
+ spin_unlock_bh(&n->tbl->lock);
+}
+
+static void neigh_update_managed_list(struct neighbour *n)
+{
+ bool on_managed_list, add_to_managed;
+
+ spin_lock_bh(&n->tbl->lock);
+ write_lock(&n->lock);
+ if (n->dead)
+ goto out;
+ add_to_managed = n->flags & NTF_MANAGED;
+ on_managed_list = !list_empty(&n->managed_list);
+
+ if (!add_to_managed && on_managed_list)
+ list_del_init(&n->managed_list);
+ else if (add_to_managed && !on_managed_list)
+ list_add_tail(&n->managed_list, &n->tbl->managed_list);
+out:
write_unlock(&n->lock);
- write_unlock_bh(&n->tbl->lock);
+ spin_unlock_bh(&n->tbl->lock);
}
-static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
- int *notify)
+static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
+ bool *gc_update, bool *managed_update)
{
- bool rc = false;
- u8 ndm_flags;
+ u32 ndm_flags, old_flags = neigh->flags;
if (!(flags & NEIGH_UPDATE_F_ADMIN))
- return rc;
+ return;
+
+ ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
+ ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
+ ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_VALIDATED) ? NTF_EXT_VALIDATED : 0;
- ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
- if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
+ if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
if (ndm_flags & NTF_EXT_LEARNED)
neigh->flags |= NTF_EXT_LEARNED;
else
neigh->flags &= ~NTF_EXT_LEARNED;
- rc = true;
*notify = 1;
+ *gc_update = true;
+ }
+ if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
+ if (ndm_flags & NTF_MANAGED)
+ neigh->flags |= NTF_MANAGED;
+ else
+ neigh->flags &= ~NTF_MANAGED;
+ *notify = 1;
+ *managed_update = true;
+ }
+ if ((old_flags ^ ndm_flags) & NTF_EXT_VALIDATED) {
+ if (ndm_flags & NTF_EXT_VALIDATED)
+ neigh->flags |= NTF_EXT_VALIDATED;
+ else
+ neigh->flags &= ~NTF_EXT_VALIDATED;
+ *notify = 1;
+ *gc_update = true;
}
-
- return rc;
}
-static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
- struct neigh_table *tbl)
+bool neigh_remove_one(struct neighbour *n)
{
bool retval = false;
write_lock(&n->lock);
if (refcount_read(&n->refcnt) == 1) {
- struct neighbour *neigh;
-
- neigh = rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock));
- rcu_assign_pointer(*np, neigh);
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
retval = true;
}
@@ -198,39 +251,19 @@ static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
return retval;
}
-bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
-{
- struct neigh_hash_table *nht;
- void *pkey = ndel->primary_key;
- u32 hash_val;
- struct neighbour *n;
- struct neighbour __rcu **np;
-
- nht = rcu_dereference_protected(tbl->nht,
- lockdep_is_held(&tbl->lock));
- hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
- hash_val = hash_val >> (32 - nht->hash_shift);
-
- np = &nht->hash_buckets[hash_val];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock)))) {
- if (n == ndel)
- return neigh_del(n, np, tbl);
- np = &n->next;
- }
- return false;
-}
-
static int neigh_forced_gc(struct neigh_table *tbl)
{
- int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
+ int max_clean = atomic_read(&tbl->gc_entries) -
+ READ_ONCE(tbl->gc_thresh2);
+ u64 tmax = ktime_get_ns() + NSEC_PER_MSEC;
unsigned long tref = jiffies - 5 * HZ;
struct neighbour *n, *tmp;
int shrunk = 0;
+ int loop = 0;
NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
if (refcount_read(&n->refcnt) == 1) {
@@ -238,27 +271,45 @@ static int neigh_forced_gc(struct neigh_table *tbl)
write_lock(&n->lock);
if ((n->nud_state == NUD_FAILED) ||
- time_after(tref, n->updated))
+ (n->nud_state == NUD_NOARP) ||
+ (tbl->is_multicast &&
+ tbl->is_multicast(n->primary_key)) ||
+ !time_in_range(n->updated, tref, jiffies))
remove = true;
write_unlock(&n->lock);
- if (remove && neigh_remove_one(n, tbl))
+ if (remove && neigh_remove_one(n))
shrunk++;
if (shrunk >= max_clean)
break;
+ if (++loop == 16) {
+ if (ktime_get_ns() > tmax)
+ goto unlock;
+ loop = 0;
+ }
}
}
- tbl->last_flush = jiffies;
-
- write_unlock_bh(&tbl->lock);
+ WRITE_ONCE(tbl->last_flush, jiffies);
+unlock:
+ spin_unlock_bh(&tbl->lock);
return shrunk;
}
static void neigh_add_timer(struct neighbour *n, unsigned long when)
{
+ /* Use safe distance from the jiffies - LONG_MAX point while timer
+ * is running in DELAY/PROBE state but still show to user space
+ * large times in the past.
+ */
+ unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
+
neigh_hold(n);
+ if (!time_in_range(n->confirmed, mint, jiffies))
+ n->confirmed = mint;
+ if (time_before(n->used, n->confirmed))
+ n->used = n->confirmed;
if (unlikely(mod_timer(&n->timer, when))) {
printk("NEIGH: BUG, double timer add, state is %x\n",
n->nud_state);
@@ -269,94 +320,163 @@ static void neigh_add_timer(struct neighbour *n, unsigned long when)
static int neigh_del_timer(struct neighbour *n)
{
if ((n->nud_state & NUD_IN_TIMER) &&
- del_timer(&n->timer)) {
+ timer_delete(&n->timer)) {
neigh_release(n);
return 1;
}
return 0;
}
-static void pneigh_queue_purge(struct sk_buff_head *list)
+static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
+ int family)
{
+ switch (family) {
+ case AF_INET:
+ return __in_dev_arp_parms_get_rcu(dev);
+ case AF_INET6:
+ return __in6_dev_nd_parms_get_rcu(dev);
+ }
+ return NULL;
+}
+
+static void neigh_parms_qlen_dec(struct net_device *dev, int family)
+{
+ struct neigh_parms *p;
+
+ rcu_read_lock();
+ p = neigh_get_dev_parms_rcu(dev, family);
+ if (p)
+ p->qlen--;
+ rcu_read_unlock();
+}
+
+static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
+ int family)
+{
+ struct sk_buff_head tmp;
+ unsigned long flags;
struct sk_buff *skb;
- while ((skb = skb_dequeue(list)) != NULL) {
+ skb_queue_head_init(&tmp);
+ spin_lock_irqsave(&list->lock, flags);
+ skb = skb_peek(list);
+ while (skb != NULL) {
+ struct sk_buff *skb_next = skb_peek_next(skb, list);
+ struct net_device *dev = skb->dev;
+
+ if (net == NULL || net_eq(dev_net(dev), net)) {
+ neigh_parms_qlen_dec(dev, family);
+ __skb_unlink(skb, list);
+ __skb_queue_tail(&tmp, skb);
+ }
+ skb = skb_next;
+ }
+ spin_unlock_irqrestore(&list->lock, flags);
+
+ while ((skb = __skb_dequeue(&tmp))) {
dev_put(skb->dev);
kfree_skb(skb);
}
}
+static void neigh_flush_one(struct neighbour *n)
+{
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
+
+ write_lock(&n->lock);
+
+ neigh_del_timer(n);
+ neigh_mark_dead(n);
+
+ if (refcount_read(&n->refcnt) != 1) {
+ /* The most unpleasant situation.
+ * We must destroy neighbour entry,
+ * but someone still uses it.
+ *
+ * The destroy will be delayed until
+ * the last user releases us, but
+ * we must kill timers etc. and move
+ * it to safe state.
+ */
+ __skb_queue_purge(&n->arp_queue);
+ n->arp_queue_len_bytes = 0;
+ WRITE_ONCE(n->output, neigh_blackhole);
+
+ if (n->nud_state & NUD_VALID)
+ n->nud_state = NUD_NOARP;
+ else
+ n->nud_state = NUD_NONE;
+
+ neigh_dbg(2, "neigh %p is stray\n", n);
+ }
+
+ write_unlock(&n->lock);
+
+ neigh_cleanup_and_release(n);
+}
+
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm)
{
- int i;
+ struct hlist_head *dev_head;
+ struct hlist_node *tmp;
+ struct neighbour *n;
+
+ dev_head = neigh_get_dev_table(dev, tbl->family);
+
+ hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
+ if (skip_perm &&
+ (n->nud_state & NUD_PERMANENT ||
+ n->flags & NTF_EXT_VALIDATED))
+ continue;
+
+ neigh_flush_one(n);
+ }
+}
+
+static void neigh_flush_table(struct neigh_table *tbl)
+{
struct neigh_hash_table *nht;
+ int i;
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
for (i = 0; i < (1 << nht->hash_shift); i++) {
+ struct hlist_node *tmp;
struct neighbour *n;
- struct neighbour __rcu **np = &nht->hash_buckets[i];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
- if (dev && n->dev != dev) {
- np = &n->next;
- continue;
- }
- if (skip_perm && n->nud_state & NUD_PERMANENT) {
- np = &n->next;
- continue;
- }
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
- write_lock(&n->lock);
- neigh_del_timer(n);
- neigh_mark_dead(n);
- if (refcount_read(&n->refcnt) != 1) {
- /* The most unpleasant situation.
- We must destroy neighbour entry,
- but someone still uses it.
-
- The destroy will be delayed until
- the last user releases us, but
- we must kill timers etc. and move
- it to safe state.
- */
- __skb_queue_purge(&n->arp_queue);
- n->arp_queue_len_bytes = 0;
- n->output = neigh_blackhole;
- if (n->nud_state & NUD_VALID)
- n->nud_state = NUD_NOARP;
- else
- n->nud_state = NUD_NONE;
- neigh_dbg(2, "neigh %p is stray\n", n);
- }
- write_unlock(&n->lock);
- neigh_cleanup_and_release(n);
- }
+ neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i])
+ neigh_flush_one(n);
}
}
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
{
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
neigh_flush_dev(tbl, dev, false);
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
}
EXPORT_SYMBOL(neigh_changeaddr);
static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm)
{
- write_lock_bh(&tbl->lock);
- neigh_flush_dev(tbl, dev, skip_perm);
- pneigh_ifdown_and_unlock(tbl, dev);
+ spin_lock_bh(&tbl->lock);
+ if (likely(dev)) {
+ neigh_flush_dev(tbl, dev, skip_perm);
+ } else {
+ DEBUG_NET_WARN_ON_ONCE(skip_perm);
+ neigh_flush_table(tbl);
+ }
+ spin_unlock_bh(&tbl->lock);
- del_timer_sync(&tbl->proxy_timer);
- pneigh_queue_purge(&tbl->proxy_queue);
+ pneigh_ifdown(tbl, dev, skip_perm);
+ pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
+ tbl->family);
+ if (skb_queue_empty_lockless(&tbl->proxy_queue))
+ timer_delete_sync(&tbl->proxy_timer);
return 0;
}
@@ -376,21 +496,21 @@ EXPORT_SYMBOL(neigh_ifdown);
static struct neighbour *neigh_alloc(struct neigh_table *tbl,
struct net_device *dev,
- bool exempt_from_gc)
+ u32 flags, bool exempt_from_gc)
{
struct neighbour *n = NULL;
unsigned long now = jiffies;
- int entries;
+ int entries, gc_thresh3;
if (exempt_from_gc)
goto do_alloc;
entries = atomic_inc_return(&tbl->gc_entries) - 1;
- if (entries >= tbl->gc_thresh3 ||
- (entries >= tbl->gc_thresh2 &&
- time_after(now, tbl->last_flush + 5 * HZ))) {
- if (!neigh_forced_gc(tbl) &&
- entries >= tbl->gc_thresh3) {
+ gc_thresh3 = READ_ONCE(tbl->gc_thresh3);
+ if (entries >= gc_thresh3 ||
+ (entries >= READ_ONCE(tbl->gc_thresh2) &&
+ time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
+ if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) {
net_info_ratelimited("%s: neighbor table overflow!\n",
tbl->id);
NEIGH_CACHE_STAT_INC(tbl, table_fulls);
@@ -409,6 +529,7 @@ do_alloc:
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
+ n->flags = flags;
seqlock_init(&n->hh.hh_lock);
n->parms = neigh_parms_clone(&tbl->parms);
timer_setup(&n->timer, neigh_timer_handler, 0);
@@ -418,6 +539,7 @@ do_alloc:
refcount_set(&n->refcnt, 1);
n->dead = 1;
INIT_LIST_HEAD(&n->gc_list);
+ INIT_LIST_HEAD(&n->managed_list);
atomic_inc(&tbl->entries);
out:
@@ -436,27 +558,21 @@ static void neigh_get_hash_rnd(u32 *x)
static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
{
- size_t size = (1 << shift) * sizeof(struct neighbour *);
+ size_t size = (1 << shift) * sizeof(struct hlist_head);
+ struct hlist_head *hash_heads;
struct neigh_hash_table *ret;
- struct neighbour __rcu **buckets;
int i;
ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
if (!ret)
return NULL;
- if (size <= PAGE_SIZE) {
- buckets = kzalloc(size, GFP_ATOMIC);
- } else {
- buckets = (struct neighbour __rcu **)
- __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
- get_order(size));
- kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
- }
- if (!buckets) {
+
+ hash_heads = kzalloc(size, GFP_ATOMIC);
+ if (!hash_heads) {
kfree(ret);
return NULL;
}
- ret->hash_buckets = buckets;
+ ret->hash_heads = hash_heads;
ret->hash_shift = shift;
for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
neigh_get_hash_rnd(&ret->hash_rnd[i]);
@@ -468,15 +584,8 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
struct neigh_hash_table *nht = container_of(head,
struct neigh_hash_table,
rcu);
- size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
- struct neighbour __rcu **buckets = nht->hash_buckets;
- if (size <= PAGE_SIZE) {
- kfree(buckets);
- } else {
- kmemleak_free(buckets);
- free_pages((unsigned long)buckets, get_order(size));
- }
+ kfree(nht->hash_heads);
kfree(nht);
}
@@ -495,24 +604,17 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
return old_nht;
for (i = 0; i < (1 << old_nht->hash_shift); i++) {
- struct neighbour *n, *next;
+ struct hlist_node *tmp;
+ struct neighbour *n;
- for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
- lockdep_is_held(&tbl->lock));
- n != NULL;
- n = next) {
+ neigh_for_each_in_bucket_safe(n, tmp, &old_nht->hash_heads[i]) {
hash = tbl->hash(n->primary_key, n->dev,
new_nht->hash_rnd);
hash >>= (32 - new_nht->hash_shift);
- next = rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock));
- rcu_assign_pointer(n->next,
- rcu_dereference_protected(
- new_nht->hash_buckets[hash],
- lockdep_is_held(&tbl->lock)));
- rcu_assign_pointer(new_nht->hash_buckets[hash], n);
+ hlist_del_rcu(&n->hash);
+ hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
}
}
@@ -528,7 +630,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
NEIGH_CACHE_STAT_INC(tbl, lookups);
- rcu_read_lock_bh();
+ rcu_read_lock();
n = __neigh_lookup_noref(tbl, pkey, dev);
if (n) {
if (!refcount_inc_not_zero(&n->refcnt))
@@ -536,53 +638,23 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
NEIGH_CACHE_STAT_INC(tbl, hits);
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return n;
}
EXPORT_SYMBOL(neigh_lookup);
-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
- const void *pkey)
+static struct neighbour *
+___neigh_create(struct neigh_table *tbl, const void *pkey,
+ struct net_device *dev, u32 flags,
+ bool exempt_from_gc, bool want_ref)
{
- struct neighbour *n;
- unsigned int key_len = tbl->key_len;
- u32 hash_val;
+ u32 hash_val, key_len = tbl->key_len;
+ struct neighbour *n1, *rc, *n;
struct neigh_hash_table *nht;
-
- NEIGH_CACHE_STAT_INC(tbl, lookups);
-
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
- hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
-
- for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
- n != NULL;
- n = rcu_dereference_bh(n->next)) {
- if (!memcmp(n->primary_key, pkey, key_len) &&
- net_eq(dev_net(n->dev), net)) {
- if (!refcount_inc_not_zero(&n->refcnt))
- n = NULL;
- NEIGH_CACHE_STAT_INC(tbl, hits);
- break;
- }
- }
-
- rcu_read_unlock_bh();
- return n;
-}
-EXPORT_SYMBOL(neigh_lookup_nodev);
-
-static struct neighbour *___neigh_create(struct neigh_table *tbl,
- const void *pkey,
- struct net_device *dev,
- bool exempt_from_gc, bool want_ref)
-{
- struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
- u32 hash_val;
- unsigned int key_len = tbl->key_len;
int error;
- struct neigh_hash_table *nht;
+ n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
+ trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
if (!n) {
rc = ERR_PTR(-ENOBUFS);
goto out;
@@ -590,7 +662,7 @@ static struct neighbour *___neigh_create(struct neigh_table *tbl,
memcpy(n->primary_key, pkey, key_len);
n->dev = dev;
- dev_hold(dev);
+ netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
/* Protocol specific setup. */
if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
@@ -615,7 +687,7 @@ static struct neighbour *___neigh_create(struct neigh_table *tbl,
n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
@@ -629,11 +701,7 @@ static struct neighbour *___neigh_create(struct neigh_table *tbl,
goto out_tbl_unlock;
}
- for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
- lockdep_is_held(&tbl->lock));
- n1 != NULL;
- n1 = rcu_dereference_protected(n1->next,
- lockdep_is_held(&tbl->lock))) {
+ neigh_for_each_in_bucket(n1, &nht->hash_heads[hash_val]) {
if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
if (want_ref)
neigh_hold(n1);
@@ -645,21 +713,25 @@ static struct neighbour *___neigh_create(struct neigh_table *tbl,
n->dead = 0;
if (!exempt_from_gc)
list_add_tail(&n->gc_list, &n->tbl->gc_list);
-
+ if (n->flags & NTF_MANAGED)
+ list_add_tail(&n->managed_list, &n->tbl->managed_list);
if (want_ref)
neigh_hold(n);
- rcu_assign_pointer(n->next,
- rcu_dereference_protected(nht->hash_buckets[hash_val],
- lockdep_is_held(&tbl->lock)));
- rcu_assign_pointer(nht->hash_buckets[hash_val], n);
- write_unlock_bh(&tbl->lock);
+ hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
+
+ hlist_add_head_rcu(&n->dev_list,
+ neigh_get_dev_table(dev, tbl->family));
+
+ spin_unlock_bh(&tbl->lock);
neigh_dbg(2, "neigh %p is created\n", n);
rc = n;
out:
return rc;
out_tbl_unlock:
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
out_neigh_release:
+ if (!exempt_from_gc)
+ atomic_dec(&tbl->gc_entries);
neigh_release(n);
goto out;
}
@@ -667,7 +739,9 @@ out_neigh_release:
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref)
{
- return ___neigh_create(tbl, pkey, dev, false, want_ref);
+ bool exempt_from_gc = !!(dev->flags & IFF_LOOPBACK);
+
+ return ___neigh_create(tbl, pkey, dev, 0, exempt_from_gc, want_ref);
}
EXPORT_SYMBOL(__neigh_create);
@@ -681,143 +755,160 @@ static u32 pneigh_hash(const void *pkey, unsigned int key_len)
return hash_val;
}
-static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
- struct net *net,
- const void *pkey,
- unsigned int key_len,
- struct net_device *dev)
+struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl,
+ struct net *net, const void *pkey,
+ struct net_device *dev)
{
+ struct pneigh_entry *n;
+ unsigned int key_len;
+ u32 hash_val;
+
+ key_len = tbl->key_len;
+ hash_val = pneigh_hash(pkey, key_len);
+ n = rcu_dereference_check(tbl->phash_buckets[hash_val],
+ lockdep_is_held(&tbl->phash_lock));
+
while (n) {
if (!memcmp(n->key, pkey, key_len) &&
net_eq(pneigh_net(n), net) &&
(n->dev == dev || !n->dev))
return n;
- n = n->next;
- }
- return NULL;
-}
-struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
- struct net *net, const void *pkey, struct net_device *dev)
-{
- unsigned int key_len = tbl->key_len;
- u32 hash_val = pneigh_hash(pkey, key_len);
+ n = rcu_dereference_check(n->next, lockdep_is_held(&tbl->phash_lock));
+ }
- return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
- net, pkey, key_len, dev);
+ return NULL;
}
-EXPORT_SYMBOL_GPL(__pneigh_lookup);
+EXPORT_IPV6_MOD(pneigh_lookup);
-struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
- struct net *net, const void *pkey,
- struct net_device *dev, int creat)
+int pneigh_create(struct neigh_table *tbl, struct net *net,
+ const void *pkey, struct net_device *dev,
+ u32 flags, u8 protocol, bool permanent)
{
struct pneigh_entry *n;
- unsigned int key_len = tbl->key_len;
- u32 hash_val = pneigh_hash(pkey, key_len);
-
- read_lock_bh(&tbl->lock);
- n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
- net, pkey, key_len, dev);
- read_unlock_bh(&tbl->lock);
+ unsigned int key_len;
+ u32 hash_val;
+ int err = 0;
- if (n || !creat)
- goto out;
+ mutex_lock(&tbl->phash_lock);
- ASSERT_RTNL();
+ n = pneigh_lookup(tbl, net, pkey, dev);
+ if (n)
+ goto update;
- n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
- if (!n)
+ key_len = tbl->key_len;
+ n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
+ if (!n) {
+ err = -ENOBUFS;
goto out;
+ }
- n->protocol = 0;
write_pnet(&n->net, net);
memcpy(n->key, pkey, key_len);
n->dev = dev;
- if (dev)
- dev_hold(dev);
+ netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
if (tbl->pconstructor && tbl->pconstructor(n)) {
- if (dev)
- dev_put(dev);
+ netdev_put(dev, &n->dev_tracker);
kfree(n);
- n = NULL;
+ err = -ENOBUFS;
goto out;
}
- write_lock_bh(&tbl->lock);
+ hash_val = pneigh_hash(pkey, key_len);
n->next = tbl->phash_buckets[hash_val];
- tbl->phash_buckets[hash_val] = n;
- write_unlock_bh(&tbl->lock);
+ rcu_assign_pointer(tbl->phash_buckets[hash_val], n);
+update:
+ WRITE_ONCE(n->flags, flags);
+ n->permanent = permanent;
+ WRITE_ONCE(n->protocol, protocol);
out:
- return n;
+ mutex_unlock(&tbl->phash_lock);
+ return err;
}
-EXPORT_SYMBOL(pneigh_lookup);
+static void pneigh_destroy(struct rcu_head *rcu)
+{
+ struct pneigh_entry *n = container_of(rcu, struct pneigh_entry, rcu);
+
+ netdev_put(n->dev, &n->dev_tracker);
+ kfree(n);
+}
int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
struct net_device *dev)
{
- struct pneigh_entry *n, **np;
- unsigned int key_len = tbl->key_len;
- u32 hash_val = pneigh_hash(pkey, key_len);
+ struct pneigh_entry *n, __rcu **np;
+ unsigned int key_len;
+ u32 hash_val;
+
+ key_len = tbl->key_len;
+ hash_val = pneigh_hash(pkey, key_len);
+
+ mutex_lock(&tbl->phash_lock);
- write_lock_bh(&tbl->lock);
- for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
+ for (np = &tbl->phash_buckets[hash_val];
+ (n = rcu_dereference_protected(*np, 1)) != NULL;
np = &n->next) {
if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
net_eq(pneigh_net(n), net)) {
- *np = n->next;
- write_unlock_bh(&tbl->lock);
+ rcu_assign_pointer(*np, n->next);
+
+ mutex_unlock(&tbl->phash_lock);
+
if (tbl->pdestructor)
tbl->pdestructor(n);
- if (n->dev)
- dev_put(n->dev);
- kfree(n);
+
+ call_rcu(&n->rcu, pneigh_destroy);
return 0;
}
}
- write_unlock_bh(&tbl->lock);
+
+ mutex_unlock(&tbl->phash_lock);
return -ENOENT;
}
-static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
- struct net_device *dev)
+static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+ bool skip_perm)
{
- struct pneigh_entry *n, **np, *freelist = NULL;
+ struct pneigh_entry *n, __rcu **np;
+ LIST_HEAD(head);
u32 h;
+ mutex_lock(&tbl->phash_lock);
+
for (h = 0; h <= PNEIGH_HASHMASK; h++) {
np = &tbl->phash_buckets[h];
- while ((n = *np) != NULL) {
+ while ((n = rcu_dereference_protected(*np, 1)) != NULL) {
+ if (skip_perm && n->permanent)
+ goto skip;
if (!dev || n->dev == dev) {
- *np = n->next;
- n->next = freelist;
- freelist = n;
+ rcu_assign_pointer(*np, n->next);
+ list_add(&n->free_node, &head);
continue;
}
+skip:
np = &n->next;
}
}
- write_unlock_bh(&tbl->lock);
- while ((n = freelist)) {
- freelist = n->next;
- n->next = NULL;
+
+ mutex_unlock(&tbl->phash_lock);
+
+ while (!list_empty(&head)) {
+ n = list_first_entry(&head, typeof(*n), free_node);
+ list_del(&n->free_node);
+
if (tbl->pdestructor)
tbl->pdestructor(n);
- if (n->dev)
- dev_put(n->dev);
- kfree(n);
+
+ call_rcu(&n->rcu, pneigh_destroy);
}
- return -ENOENT;
}
-static void neigh_parms_destroy(struct neigh_parms *parms);
-
static inline void neigh_parms_put(struct neigh_parms *parms)
{
if (refcount_dec_and_test(&parms->refcnt))
- neigh_parms_destroy(parms);
+ kfree(parms);
}
/*
@@ -847,7 +938,7 @@ void neigh_destroy(struct neighbour *neigh)
if (dev->netdev_ops->ndo_neigh_destroy)
dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
- dev_put(dev);
+ netdev_put(dev, &neigh->dev_tracker);
neigh_parms_put(neigh->parms);
neigh_dbg(2, "neigh %p is destroyed\n", neigh);
@@ -866,7 +957,7 @@ static void neigh_suspect(struct neighbour *neigh)
{
neigh_dbg(2, "neigh %p is suspected\n", neigh);
- neigh->output = neigh->ops->output;
+ WRITE_ONCE(neigh->output, neigh->ops->output);
}
/* Neighbour state is OK;
@@ -878,20 +969,20 @@ static void neigh_connect(struct neighbour *neigh)
{
neigh_dbg(2, "neigh %p is connected\n", neigh);
- neigh->output = neigh->ops->connected_output;
+ WRITE_ONCE(neigh->output, neigh->ops->connected_output);
}
static void neigh_periodic_work(struct work_struct *work)
{
struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
+ struct neigh_hash_table *nht;
+ struct hlist_node *tmp;
struct neighbour *n;
- struct neighbour __rcu **np;
unsigned int i;
- struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
@@ -901,55 +992,53 @@ static void neigh_periodic_work(struct work_struct *work)
if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
struct neigh_parms *p;
- tbl->last_rand = jiffies;
+
+ WRITE_ONCE(tbl->last_rand, jiffies);
list_for_each_entry(p, &tbl->parms_list, list)
- p->reachable_time =
- neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
+ neigh_set_reach_time(p);
}
- if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
+ if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1))
goto out;
for (i = 0 ; i < (1 << nht->hash_shift); i++) {
- np = &nht->hash_buckets[i];
-
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
+ neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
unsigned int state;
write_lock(&n->lock);
state = n->nud_state;
if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
- (n->flags & NTF_EXT_LEARNED)) {
+ (n->flags &
+ (NTF_EXT_LEARNED | NTF_EXT_VALIDATED))) {
write_unlock(&n->lock);
- goto next_elt;
+ continue;
}
- if (time_before(n->used, n->confirmed))
+ if (time_before(n->used, n->confirmed) &&
+ time_is_before_eq_jiffies(n->confirmed))
n->used = n->confirmed;
if (refcount_read(&n->refcnt) == 1 &&
(state == NUD_FAILED ||
- time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
- *np = n->next;
+ !time_in_range_open(jiffies, n->used,
+ n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
continue;
}
write_unlock(&n->lock);
-
-next_elt:
- np = &n->next;
}
/*
* It's fine to release lock here, even if hash table
* grows while we are preempted.
*/
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
cond_resched();
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
}
@@ -960,7 +1049,7 @@ out:
*/
queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
}
static __inline__ int neigh_max_probes(struct neighbour *n)
@@ -1015,7 +1104,7 @@ static void neigh_probe(struct neighbour *neigh)
static void neigh_timer_handler(struct timer_list *t)
{
unsigned long now, next;
- struct neighbour *neigh = from_timer(neigh, t, timer);
+ struct neighbour *neigh = timer_container_of(neigh, t, timer);
unsigned int state;
int notify = 0;
@@ -1037,13 +1126,13 @@ static void neigh_timer_handler(struct timer_list *t)
neigh->used +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
- neigh->nud_state = NUD_DELAY;
+ WRITE_ONCE(neigh->nud_state, NUD_DELAY);
neigh->updated = jiffies;
neigh_suspect(neigh);
next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
} else {
neigh_dbg(2, "neigh %p is suspected\n", neigh);
- neigh->nud_state = NUD_STALE;
+ WRITE_ONCE(neigh->nud_state, NUD_STALE);
neigh->updated = jiffies;
neigh_suspect(neigh);
notify = 1;
@@ -1053,35 +1142,42 @@ static void neigh_timer_handler(struct timer_list *t)
neigh->confirmed +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is now reachable\n", neigh);
- neigh->nud_state = NUD_REACHABLE;
+ WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
neigh->updated = jiffies;
neigh_connect(neigh);
notify = 1;
next = neigh->confirmed + neigh->parms->reachable_time;
} else {
neigh_dbg(2, "neigh %p is probed\n", neigh);
- neigh->nud_state = NUD_PROBE;
+ WRITE_ONCE(neigh->nud_state, NUD_PROBE);
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0);
notify = 1;
- next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
+ next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
+ HZ/100);
}
} else {
/* NUD_PROBE|NUD_INCOMPLETE */
- next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
+ next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
}
if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
- neigh->nud_state = NUD_FAILED;
+ if (neigh->nud_state == NUD_PROBE &&
+ neigh->flags & NTF_EXT_VALIDATED) {
+ WRITE_ONCE(neigh->nud_state, NUD_STALE);
+ neigh->updated = jiffies;
+ } else {
+ WRITE_ONCE(neigh->nud_state, NUD_FAILED);
+ neigh_invalidate(neigh);
+ }
notify = 1;
- neigh_invalidate(neigh);
goto out;
}
if (neigh->nud_state & NUD_IN_TIMER) {
- if (time_before(next, jiffies + HZ/2))
- next = jiffies + HZ/2;
+ if (time_before(next, jiffies + HZ/100))
+ next = jiffies + HZ/100;
if (!mod_timer(&neigh->timer, next))
neigh_hold(neigh);
}
@@ -1095,10 +1191,13 @@ out:
if (notify)
neigh_update_notify(neigh, 0);
+ trace_neigh_timer_handler(neigh, 0);
+
neigh_release(neigh);
}
-int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
+ const bool immediate_ok)
{
int rc;
bool immediate_probe = false;
@@ -1118,23 +1217,30 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
atomic_set(&neigh->probes,
NEIGH_VAR(neigh->parms, UCAST_PROBES));
- neigh->nud_state = NUD_INCOMPLETE;
+ neigh_del_timer(neigh);
+ WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
neigh->updated = now;
- next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
- HZ/2);
+ if (!immediate_ok) {
+ next = now + 1;
+ } else {
+ immediate_probe = true;
+ next = now + max(NEIGH_VAR(neigh->parms,
+ RETRANS_TIME),
+ HZ / 100);
+ }
neigh_add_timer(neigh, next);
- immediate_probe = true;
} else {
- neigh->nud_state = NUD_FAILED;
+ WRITE_ONCE(neigh->nud_state, NUD_FAILED);
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
- neigh->nud_state = NUD_DELAY;
+ neigh_del_timer(neigh);
+ WRITE_ONCE(neigh->nud_state, NUD_DELAY);
neigh->updated = jiffies;
neigh_add_timer(neigh, jiffies +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
@@ -1150,7 +1256,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
if (!buff)
break;
neigh->arp_queue_len_bytes -= buff->truesize;
- kfree_skb(buff);
+ kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
skb_dst_force(skb);
@@ -1165,13 +1271,15 @@ out_unlock_bh:
else
write_unlock(&neigh->lock);
local_bh_enable();
+ trace_neigh_event_send_done(neigh, rc);
return rc;
out_dead:
if (neigh->nud_state & NUD_STALE)
goto out_unlock_bh;
write_unlock_bh(&neigh->lock);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
+ trace_neigh_event_send_dead(neigh, 1);
return 1;
}
EXPORT_SYMBOL(__neigh_event_send);
@@ -1187,7 +1295,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
if (update) {
hh = &neigh->hh;
- if (hh->hh_len) {
+ if (READ_ONCE(hh->hh_len)) {
write_seqlock_bh(&hh->hh_lock);
update(hh, neigh->dev, neigh->ha);
write_sequnlock_bh(&hh->hh_lock);
@@ -1195,8 +1303,6 @@ static void neigh_update_hhs(struct neighbour *neigh)
}
}
-
-
/* Generic update routine.
-- lladdr is new lladdr or NULL, if it is not supplied.
-- new is new state.
@@ -1207,25 +1313,28 @@ static void neigh_update_hhs(struct neighbour *neigh)
lladdr instead of overriding it
if it is different.
NEIGH_UPDATE_F_ADMIN means that the change is administrative.
-
+ NEIGH_UPDATE_F_USE means that the entry is user triggered.
+ NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
NTF_ROUTER flag.
NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
a router.
+ NEIGH_UPDATE_F_EXT_VALIDATED means that the entry will not be removed
+ or invalidated.
Caller MUST hold reference count on the entry.
*/
-
static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
u8 new, u32 flags, u32 nlmsg_pid,
struct netlink_ext_ack *extack)
{
- bool ext_learn_change = false;
- u8 old;
- int err;
- int notify = 0;
- struct net_device *dev;
+ bool gc_update = false, managed_update = false;
int update_isrouter = 0;
+ struct net_device *dev;
+ int err, notify = 0;
+ u8 old;
+
+ trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
write_lock_bh(&neigh->lock);
@@ -1233,21 +1342,28 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
old = neigh->nud_state;
err = -EPERM;
- if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
- (old & (NUD_NOARP | NUD_PERMANENT)))
- goto out;
if (neigh->dead) {
NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
+ new = old;
goto out;
}
+ if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
+ (old & (NUD_NOARP | NUD_PERMANENT)))
+ goto out;
- ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
+ neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
+ if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
+ new = old & ~NUD_PERMANENT;
+ WRITE_ONCE(neigh->nud_state, new);
+ err = 0;
+ goto out;
+ }
if (!(new & NUD_VALID)) {
neigh_del_timer(neigh);
if (old & NUD_CONNECTED)
neigh_suspect(neigh);
- neigh->nud_state = new;
+ WRITE_ONCE(neigh->nud_state, new);
err = 0;
notify = old & NUD_VALID;
if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
@@ -1326,7 +1442,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
((new & NUD_REACHABLE) ?
neigh->parms->reachable_time :
0)));
- neigh->nud_state = new;
+ WRITE_ONCE(neigh->nud_state, new);
notify = 1;
}
@@ -1367,12 +1483,13 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
* we can reinject the packet there.
*/
n2 = NULL;
- if (dst) {
+ if (dst &&
+ READ_ONCE(dst->obsolete) != DST_OBSOLETE_DEAD) {
n2 = dst_neigh_lookup_skb(dst, skb);
if (n2)
n1 = n2;
}
- n1->output(n1, skb);
+ READ_ONCE(n1->output)(n1, skb);
if (n2)
neigh_release(n2);
rcu_read_unlock();
@@ -1386,13 +1503,13 @@ out:
if (update_isrouter)
neigh_update_is_router(neigh, flags, &notify);
write_unlock_bh(&neigh->lock);
-
- if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
+ if (((new ^ old) & NUD_PERMANENT) || gc_update)
neigh_update_gc_list(neigh);
-
+ if (managed_update)
+ neigh_update_managed_list(neigh);
if (notify)
neigh_update_notify(neigh, nlmsg_pid);
-
+ trace_neigh_update_done(neigh, err);
return err;
}
@@ -1413,10 +1530,11 @@ void __neigh_set_probe_once(struct neighbour *neigh)
neigh->updated = jiffies;
if (!(neigh->nud_state & NUD_FAILED))
return;
- neigh->nud_state = NUD_INCOMPLETE;
+ WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
atomic_set(&neigh->probes, neigh_max_probes(neigh));
neigh_add_timer(neigh,
- jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
+ jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
+ HZ/100));
}
EXPORT_SYMBOL(__neigh_set_probe_once);
@@ -1462,7 +1580,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
struct net_device *dev = neigh->dev;
unsigned int seq;
- if (dev->header_ops->cache && !neigh->hh.hh_len)
+ if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
neigh_hh_init(neigh);
do {
@@ -1481,7 +1599,7 @@ out:
return rc;
out_kfree_skb:
rc = -EINVAL;
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
goto out;
}
EXPORT_SYMBOL(neigh_resolve_output);
@@ -1505,7 +1623,7 @@ int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
err = dev_queue_xmit(skb);
else {
err = -EINVAL;
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
}
return err;
}
@@ -1517,9 +1635,23 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
}
EXPORT_SYMBOL(neigh_direct_output);
+static void neigh_managed_work(struct work_struct *work)
+{
+ struct neigh_table *tbl = container_of(work, struct neigh_table,
+ managed_work.work);
+ struct neighbour *neigh;
+
+ spin_lock_bh(&tbl->lock);
+ list_for_each_entry(neigh, &tbl->managed_list, managed_list)
+ neigh_event_send_probe(neigh, NULL, false);
+ queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
+ NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
+ spin_unlock_bh(&tbl->lock);
+}
+
static void neigh_proxy_process(struct timer_list *t)
{
- struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
+ struct neigh_table *tbl = timer_container_of(tbl, t, proxy_timer);
long sched_next = 0;
unsigned long now = jiffies;
struct sk_buff *skb, *n;
@@ -1532,7 +1664,9 @@ static void neigh_proxy_process(struct timer_list *t)
if (tdif <= 0) {
struct net_device *dev = skb->dev;
+ neigh_parms_qlen_dec(dev, tbl->family);
__skb_unlink(skb, &tbl->proxy_queue);
+
if (tbl->proxy_redo && netif_running(dev)) {
rcu_read_lock();
tbl->proxy_redo(skb);
@@ -1545,21 +1679,29 @@ static void neigh_proxy_process(struct timer_list *t)
} else if (!sched_next || tdif < sched_next)
sched_next = tdif;
}
- del_timer(&tbl->proxy_timer);
+ timer_delete(&tbl->proxy_timer);
if (sched_next)
mod_timer(&tbl->proxy_timer, jiffies + sched_next);
spin_unlock(&tbl->proxy_queue.lock);
}
+static unsigned long neigh_proxy_delay(struct neigh_parms *p)
+{
+ /* If proxy_delay is zero, do not call get_random_u32_below()
+ * as it is undefined behavior.
+ */
+ unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);
+
+ return proxy_delay ?
+ jiffies + get_random_u32_below(proxy_delay) : jiffies;
+}
+
void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
struct sk_buff *skb)
{
- unsigned long now = jiffies;
-
- unsigned long sched_next = now + (prandom_u32() %
- NEIGH_VAR(p, PROXY_DELAY));
+ unsigned long sched_next = neigh_proxy_delay(p);
- if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
+ if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
kfree_skb(skb);
return;
}
@@ -1568,13 +1710,14 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
spin_lock(&tbl->proxy_queue.lock);
- if (del_timer(&tbl->proxy_timer)) {
+ if (timer_delete(&tbl->proxy_timer)) {
if (time_before(tbl->proxy_timer.expires, sched_next))
sched_next = tbl->proxy_timer.expires;
}
skb_dst_drop(skb);
dev_hold(skb->dev);
__skb_queue_tail(&tbl->proxy_queue, skb);
+ p->qlen++;
mod_timer(&tbl->proxy_timer, sched_next);
spin_unlock(&tbl->proxy_queue.lock);
}
@@ -1605,22 +1748,22 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
if (p) {
p->tbl = tbl;
refcount_set(&p->refcnt, 1);
- p->reachable_time =
- neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
- dev_hold(dev);
+ neigh_set_reach_time(p);
+ p->qlen = 0;
+ netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
p->dev = dev;
write_pnet(&p->net, net);
p->sysctl_table = NULL;
if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
- dev_put(dev);
+ netdev_put(dev, &p->dev_tracker);
kfree(p);
return NULL;
}
- write_lock_bh(&tbl->lock);
- list_add(&p->list, &tbl->parms.list);
- write_unlock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
+ list_add_rcu(&p->list, &tbl->parms.list);
+ spin_unlock_bh(&tbl->lock);
neigh_parms_data_state_cleanall(p);
}
@@ -1640,24 +1783,20 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
{
if (!parms || parms == &tbl->parms)
return;
- write_lock_bh(&tbl->lock);
- list_del(&parms->list);
+
+ spin_lock_bh(&tbl->lock);
+ list_del_rcu(&parms->list);
parms->dead = 1;
- write_unlock_bh(&tbl->lock);
- if (parms->dev)
- dev_put(parms->dev);
+ spin_unlock_bh(&tbl->lock);
+
+ netdev_put(parms->dev, &parms->dev_tracker);
call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
}
EXPORT_SYMBOL(neigh_parms_release);
-static void neigh_parms_destroy(struct neigh_parms *parms)
-{
- kfree(parms);
-}
-
static struct lock_class_key neigh_table_proxy_queue_class;
-static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
+static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
void neigh_table_init(int index, struct neigh_table *tbl)
{
@@ -1666,11 +1805,13 @@ void neigh_table_init(int index, struct neigh_table *tbl)
INIT_LIST_HEAD(&tbl->parms_list);
INIT_LIST_HEAD(&tbl->gc_list);
+ INIT_LIST_HEAD(&tbl->managed_list);
+
list_add(&tbl->parms.list, &tbl->parms_list);
write_pnet(&tbl->parms.net, &init_net);
refcount_set(&tbl->parms.refcnt, 1);
- tbl->parms.reachable_time =
- neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
+ neigh_set_reach_time(&tbl->parms);
+ tbl->parms.qlen = 0;
tbl->stats = alloc_percpu(struct neigh_statistics);
if (!tbl->stats)
@@ -1696,10 +1837,15 @@ void neigh_table_init(int index, struct neigh_table *tbl)
else
WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
- rwlock_init(&tbl->lock);
+ spin_lock_init(&tbl->lock);
+ mutex_init(&tbl->phash_lock);
+
INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
tbl->parms.reachable_time);
+ INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
+ queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
+
timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
skb_queue_head_init_class(&tbl->proxy_queue,
&neigh_table_proxy_queue_class);
@@ -1707,17 +1853,24 @@ void neigh_table_init(int index, struct neigh_table *tbl)
tbl->last_flush = now;
tbl->last_rand = now + tbl->parms.reachable_time * 20;
- neigh_tables[index] = tbl;
+ rcu_assign_pointer(neigh_tables[index], tbl);
}
EXPORT_SYMBOL(neigh_table_init);
+/*
+ * Only called from ndisc_cleanup(), which means this is dead code
+ * because we no longer can unload IPv6 module.
+ */
int neigh_table_clear(int index, struct neigh_table *tbl)
{
- neigh_tables[index] = NULL;
+ RCU_INIT_POINTER(neigh_tables[index], NULL);
+ synchronize_rcu();
+
/* It is not clean... Fix it to unload IPv6 module safely */
+ cancel_delayed_work_sync(&tbl->managed_work);
cancel_delayed_work_sync(&tbl->gc_work);
- del_timer_sync(&tbl->proxy_timer);
- pneigh_queue_purge(&tbl->proxy_queue);
+ timer_delete_sync(&tbl->proxy_timer);
+ pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
neigh_ifdown(tbl, NULL);
if (atomic_read(&tbl->entries))
pr_crit("neighbour leakage\n");
@@ -1744,13 +1897,10 @@ static struct neigh_table *neigh_find_table(int family)
switch (family) {
case AF_INET:
- tbl = neigh_tables[NEIGH_ARP_TABLE];
+ tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ARP_TABLE]);
break;
case AF_INET6:
- tbl = neigh_tables[NEIGH_ND_TABLE];
- break;
- case AF_DECnet:
- tbl = neigh_tables[NEIGH_DN_TABLE];
+ tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ND_TABLE]);
break;
}
@@ -1758,6 +1908,7 @@ static struct neigh_table *neigh_find_table(int family)
}
const struct nla_policy nda_policy[NDA_MAX+1] = {
+ [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID },
[NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
[NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
[NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
@@ -1768,6 +1919,9 @@ const struct nla_policy nda_policy[NDA_MAX+1] = {
[NDA_IFINDEX] = { .type = NLA_U32 },
[NDA_MASTER] = { .type = NLA_U32 },
[NDA_PROTOCOL] = { .type = NLA_U8 },
+ [NDA_NH_ID] = { .type = NLA_U32 },
+ [NDA_FLAGS_EXT] = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
+ [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
};
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1826,10 +1980,10 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
err = __neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
NETLINK_CB(skb).portid, extack);
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
neigh_release(neigh);
- neigh_remove_one(neigh, tbl);
- write_unlock_bh(&tbl->lock);
+ neigh_remove_one(neigh);
+ spin_unlock_bh(&tbl->lock);
out:
return err;
@@ -1839,7 +1993,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
@@ -1848,10 +2002,12 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct neighbour *neigh;
void *dst, *lladdr;
u8 protocol = 0;
+ u32 ndm_flags;
int err;
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nda_policy, extack);
+ err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
+ nda_policy, extack);
if (err < 0)
goto out;
@@ -1862,6 +2018,15 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
}
ndm = nlmsg_data(nlh);
+ ndm_flags = ndm->ndm_flags;
+ if (tb[NDA_FLAGS_EXT]) {
+ u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
+
+ BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
+ (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
+ hweight32(NTF_EXT_MASK)));
+ ndm_flags |= (ext << NTF_EXT_SHIFT);
+ }
if (ndm->ndm_ifindex) {
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
@@ -1889,18 +2054,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[NDA_PROTOCOL])
protocol = nla_get_u8(tb[NDA_PROTOCOL]);
-
- if (ndm->ndm_flags & NTF_PROXY) {
- struct pneigh_entry *pn;
-
- err = -ENOBUFS;
- pn = pneigh_lookup(tbl, net, dst, dev, 1);
- if (pn) {
- pn->flags = ndm->ndm_flags;
- if (protocol)
- pn->protocol = protocol;
- err = 0;
+ if (ndm_flags & NTF_PROXY) {
+ if (ndm_flags & (NTF_MANAGED | NTF_EXT_VALIDATED)) {
+ NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
+ goto out;
}
+
+ err = pneigh_create(tbl, net, dst, dev, ndm_flags, protocol,
+ !!(ndm->ndm_state & NUD_PERMANENT));
goto out;
}
@@ -1909,18 +2070,49 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
+ if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
+ err = -EINVAL;
+ goto out;
+ }
+
neigh = neigh_lookup(tbl, dst, dev);
if (neigh == NULL) {
- bool exempt_from_gc;
+ bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT;
+ bool exempt_from_gc = ndm_permanent ||
+ ndm_flags & (NTF_EXT_LEARNED |
+ NTF_EXT_VALIDATED);
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
err = -ENOENT;
goto out;
}
+ if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
+ NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
+ err = -EINVAL;
+ goto out;
+ }
+ if (ndm_flags & NTF_EXT_VALIDATED) {
+ u8 state = ndm->ndm_state;
- exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
- ndm->ndm_flags & NTF_EXT_LEARNED;
- neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
+ /* NTF_USE and NTF_MANAGED will result in the neighbor
+ * being created with an invalid state (NUD_NONE).
+ */
+ if (ndm_flags & (NTF_USE | NTF_MANAGED))
+ state = NUD_NONE;
+
+ if (!(state & NUD_VALID)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot create externally validated neighbor with an invalid state");
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ neigh = ___neigh_create(tbl, dst, dev,
+ ndm_flags &
+ (NTF_EXT_LEARNED | NTF_MANAGED |
+ NTF_EXT_VALIDATED),
+ exempt_from_gc, true);
if (IS_ERR(neigh)) {
err = PTR_ERR(neigh);
goto out;
@@ -1931,30 +2123,48 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
neigh_release(neigh);
goto out;
}
+ if (ndm_flags & NTF_EXT_VALIDATED) {
+ u8 state = ndm->ndm_state;
+
+ /* NTF_USE and NTF_MANAGED do not update the existing
+ * state other than clearing it if it was
+ * NUD_PERMANENT.
+ */
+ if (ndm_flags & (NTF_USE | NTF_MANAGED))
+ state = READ_ONCE(neigh->nud_state) & ~NUD_PERMANENT;
+
+ if (!(state & NUD_VALID)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot mark neighbor as externally validated with an invalid state");
+ err = -EINVAL;
+ neigh_release(neigh);
+ goto out;
+ }
+ }
if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
}
- if (ndm->ndm_flags & NTF_EXT_LEARNED)
+ if (protocol)
+ neigh->protocol = protocol;
+ if (ndm_flags & NTF_EXT_LEARNED)
flags |= NEIGH_UPDATE_F_EXT_LEARNED;
-
- if (ndm->ndm_flags & NTF_ROUTER)
+ if (ndm_flags & NTF_ROUTER)
flags |= NEIGH_UPDATE_F_ISROUTER;
-
- if (ndm->ndm_flags & NTF_USE) {
+ if (ndm_flags & NTF_MANAGED)
+ flags |= NEIGH_UPDATE_F_MANAGED;
+ if (ndm_flags & NTF_USE)
+ flags |= NEIGH_UPDATE_F_USE;
+ if (ndm_flags & NTF_EXT_VALIDATED)
+ flags |= NEIGH_UPDATE_F_EXT_VALIDATED;
+
+ err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+ NETLINK_CB(skb).portid, extack);
+ if (!err && ndm_flags & (NTF_USE | NTF_MANAGED))
neigh_event_send(neigh, NULL);
- err = 0;
- } else
- err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
- NETLINK_CB(skb).portid, extack);
-
- if (protocol)
- neigh->protocol = protocol;
-
neigh_release(neigh);
-
out:
return err;
}
@@ -1963,12 +2173,12 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
{
struct nlattr *nest;
- nest = nla_nest_start(skb, NDTA_PARMS);
+ nest = nla_nest_start_noflag(skb, NDTA_PARMS);
if (nest == NULL)
return -ENOBUFS;
if ((parms->dev &&
- nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
+ nla_put_u32(skb, NDTPA_IFINDEX, READ_ONCE(parms->dev->ifindex))) ||
nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
@@ -1983,7 +2193,7 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
NEIGH_VAR(parms, MCAST_PROBES)) ||
nla_put_u32(skb, NDTPA_MCAST_REPROBES,
NEIGH_VAR(parms, MCAST_REPROBES)) ||
- nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
+ nla_put_msecs(skb, NDTPA_REACHABLE_TIME, READ_ONCE(parms->reachable_time),
NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
@@ -1998,7 +2208,9 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
nla_put_msecs(skb, NDTPA_PROXY_DELAY,
NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_LOCKTIME,
- NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
+ NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
+ nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
+ NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
goto nla_put_failure;
return nla_nest_end(skb, nest);
@@ -2018,22 +2230,21 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
return -EMSGSIZE;
ndtmsg = nlmsg_data(nlh);
-
- read_lock_bh(&tbl->lock);
ndtmsg->ndtm_family = tbl->family;
ndtmsg->ndtm_pad1 = 0;
ndtmsg->ndtm_pad2 = 0;
if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
- nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
- nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
- nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
- nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
+ nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval),
+ NDTA_PAD) ||
+ nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) ||
+ nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) ||
+ nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3)))
goto nla_put_failure;
{
unsigned long now = jiffies;
- unsigned int flush_delta = now - tbl->last_flush;
- unsigned int rand_delta = now - tbl->last_rand;
+ long flush_delta = now - READ_ONCE(tbl->last_flush);
+ long rand_delta = now - READ_ONCE(tbl->last_rand);
struct neigh_hash_table *nht;
struct ndt_config ndc = {
.ndtc_key_len = tbl->key_len,
@@ -2041,14 +2252,12 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
.ndtc_entries = atomic_read(&tbl->entries),
.ndtc_last_flush = jiffies_to_msecs(flush_delta),
.ndtc_last_rand = jiffies_to_msecs(rand_delta),
- .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
+ .ndtc_proxy_qlen = READ_ONCE(tbl->proxy_queue.qlen),
};
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
+ nht = rcu_dereference(tbl->nht);
ndc.ndtc_hash_rnd = nht->hash_rnd[0];
ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
- rcu_read_unlock_bh();
if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
goto nla_put_failure;
@@ -2064,17 +2273,17 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
struct neigh_statistics *st;
st = per_cpu_ptr(tbl->stats, cpu);
- ndst.ndts_allocs += st->allocs;
- ndst.ndts_destroys += st->destroys;
- ndst.ndts_hash_grows += st->hash_grows;
- ndst.ndts_res_failed += st->res_failed;
- ndst.ndts_lookups += st->lookups;
- ndst.ndts_hits += st->hits;
- ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
- ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
- ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
- ndst.ndts_forced_gc_runs += st->forced_gc_runs;
- ndst.ndts_table_fulls += st->table_fulls;
+ ndst.ndts_allocs += READ_ONCE(st->allocs);
+ ndst.ndts_destroys += READ_ONCE(st->destroys);
+ ndst.ndts_hash_grows += READ_ONCE(st->hash_grows);
+ ndst.ndts_res_failed += READ_ONCE(st->res_failed);
+ ndst.ndts_lookups += READ_ONCE(st->lookups);
+ ndst.ndts_hits += READ_ONCE(st->hits);
+ ndst.ndts_rcv_probes_mcast += READ_ONCE(st->rcv_probes_mcast);
+ ndst.ndts_rcv_probes_ucast += READ_ONCE(st->rcv_probes_ucast);
+ ndst.ndts_periodic_gc_runs += READ_ONCE(st->periodic_gc_runs);
+ ndst.ndts_forced_gc_runs += READ_ONCE(st->forced_gc_runs);
+ ndst.ndts_table_fulls += READ_ONCE(st->table_fulls);
}
if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
@@ -2086,12 +2295,10 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
if (neightbl_fill_parms(skb, &tbl->parms) < 0)
goto nla_put_failure;
- read_unlock_bh(&tbl->lock);
nlmsg_end(skb, nlh);
return 0;
nla_put_failure:
- read_unlock_bh(&tbl->lock);
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
@@ -2110,8 +2317,6 @@ static int neightbl_fill_param_info(struct sk_buff *skb,
return -EMSGSIZE;
ndtmsg = nlmsg_data(nlh);
-
- read_lock_bh(&tbl->lock);
ndtmsg->ndtm_family = tbl->family;
ndtmsg->ndtm_pad1 = 0;
ndtmsg->ndtm_pad2 = 0;
@@ -2120,11 +2325,9 @@ static int neightbl_fill_param_info(struct sk_buff *skb,
neightbl_fill_parms(skb, parms) < 0)
goto errout;
- read_unlock_bh(&tbl->lock);
nlmsg_end(skb, nlh);
return 0;
errout:
- read_unlock_bh(&tbl->lock);
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
@@ -2141,6 +2344,7 @@ static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
[NDTPA_IFINDEX] = { .type = NLA_U32 },
[NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
+ [NDTPA_QUEUE_LENBYTES] = { .type = NLA_U32 },
[NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
[NDTPA_APP_PROBES] = { .type = NLA_U32 },
[NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
@@ -2153,20 +2357,21 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
[NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
[NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
[NDTPA_LOCKTIME] = { .type = NLA_U64 },
+ [NDTPA_INTERVAL_PROBE_TIME_MS] = { .type = NLA_U64, .min = 1 },
};
static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[NDTA_MAX + 1];
struct neigh_table *tbl;
struct ndtmsg *ndtmsg;
- struct nlattr *tb[NDTA_MAX+1];
bool found = false;
int err, tidx;
- err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
- nl_neightbl_policy, extack);
+ err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
+ nl_neightbl_policy, extack);
if (err < 0)
goto errout;
@@ -2177,34 +2382,42 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
ndtmsg = nlmsg_data(nlh);
+ rcu_read_lock();
+
for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
- tbl = neigh_tables[tidx];
+ tbl = rcu_dereference(neigh_tables[tidx]);
if (!tbl)
continue;
+
if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
continue;
+
if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
found = true;
break;
}
}
- if (!found)
- return -ENOENT;
+ if (!found) {
+ rcu_read_unlock();
+ err = -ENOENT;
+ goto errout;
+ }
/*
* We acquire tbl->lock to be nice to the periodic timers and
* make sure they always see a consistent set of values.
*/
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
if (tb[NDTA_PARMS]) {
struct nlattr *tbp[NDTPA_MAX+1];
struct neigh_parms *p;
int i, ifindex = 0;
- err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
- nl_ntbl_parm_policy, extack);
+ err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
+ tb[NDTA_PARMS],
+ nl_ntbl_parm_policy, extack);
if (err < 0)
goto errout_tbl_lock;
@@ -2258,8 +2471,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
* only be effective after the next time neigh_periodic_work
* decides to recompute it (can be multiple minutes)
*/
- p->reachable_time =
- neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
+ neigh_set_reach_time(p);
break;
case NDTPA_GC_STALETIME:
NEIGH_VAR_SET(p, GC_STALETIME,
@@ -2270,6 +2482,10 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
nla_get_msecs(tbp[i]));
call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
break;
+ case NDTPA_INTERVAL_PROBE_TIME_MS:
+ NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
+ nla_get_msecs(tbp[i]));
+ break;
case NDTPA_RETRANS_TIME:
NEIGH_VAR_SET(p, RETRANS_TIME,
nla_get_msecs(tbp[i]));
@@ -2297,21 +2513,22 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout_tbl_lock;
if (tb[NDTA_THRESH1])
- tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
+ WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1]));
if (tb[NDTA_THRESH2])
- tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
+ WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2]));
if (tb[NDTA_THRESH3])
- tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
+ WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3]));
if (tb[NDTA_GC_INTERVAL])
- tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
+ WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL]));
err = 0;
errout_tbl_lock:
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
errout:
return err;
}
@@ -2321,12 +2538,12 @@ static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
{
struct ndtmsg *ndtm;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
+ ndtm = nlmsg_payload(nlh, sizeof(*ndtm));
+ if (!ndtm) {
NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
return -EINVAL;
}
- ndtm = nlmsg_data(nlh);
if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
return -EINVAL;
@@ -2358,10 +2575,12 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
+ rcu_read_lock();
+
for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
struct neigh_parms *p;
- tbl = neigh_tables[tidx];
+ tbl = rcu_dereference(neigh_tables[tidx]);
if (!tbl)
continue;
@@ -2375,7 +2594,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
nidx = 0;
p = list_next_entry(&tbl->parms, list);
- list_for_each_entry_from(p, &tbl->parms_list, list) {
+ list_for_each_entry_from_rcu(p, &tbl->parms_list, list) {
if (!net_eq(neigh_parms_net(p), net))
continue;
@@ -2395,6 +2614,8 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
neigh_skip = 0;
}
out:
+ rcu_read_unlock();
+
cb->args[0] = tidx;
cb->args[1] = nidx;
@@ -2404,6 +2625,7 @@ out:
static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
u32 pid, u32 seq, int type, unsigned int flags)
{
+ u32 neigh_flags, neigh_flags_ext;
unsigned long now = jiffies;
struct nda_cacheinfo ci;
struct nlmsghdr *nlh;
@@ -2413,11 +2635,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
if (nlh == NULL)
return -EMSGSIZE;
+ neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
+ neigh_flags = neigh->flags & NTF_OLD_MASK;
+
ndm = nlmsg_data(nlh);
ndm->ndm_family = neigh->ops->family;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
- ndm->ndm_flags = neigh->flags;
+ ndm->ndm_flags = neigh_flags;
ndm->ndm_type = neigh->type;
ndm->ndm_ifindex = neigh->dev->ifindex;
@@ -2448,6 +2673,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
goto nla_put_failure;
+ if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
+ goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -2461,18 +2688,24 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
u32 pid, u32 seq, int type, unsigned int flags,
struct neigh_table *tbl)
{
+ u32 neigh_flags, neigh_flags_ext;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
+ u8 protocol;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
if (nlh == NULL)
return -EMSGSIZE;
+ neigh_flags = READ_ONCE(pn->flags);
+ neigh_flags_ext = neigh_flags >> NTF_EXT_SHIFT;
+ neigh_flags &= NTF_OLD_MASK;
+
ndm = nlmsg_data(nlh);
ndm->ndm_family = tbl->family;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
- ndm->ndm_flags = pn->flags | NTF_PROXY;
+ ndm->ndm_flags = neigh_flags | NTF_PROXY;
ndm->ndm_type = RTN_UNICAST;
ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
ndm->ndm_state = NUD_NONE;
@@ -2480,7 +2713,10 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
goto nla_put_failure;
- if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
+ protocol = READ_ONCE(pn->protocol);
+ if (protocol && nla_put_u8(skb, NDA_PROTOCOL, protocol))
+ goto nla_put_failure;
+ if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -2504,7 +2740,14 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx)
if (!master_idx)
return false;
- master = dev ? netdev_master_upper_dev_get(dev) : NULL;
+ master = dev ? netdev_master_upper_dev_get_rcu(dev) : NULL;
+
+ /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
+ * invalid value for ifindex to denote "no master".
+ */
+ if (master_idx == -1)
+ return !!master;
+
if (!master || master->ifindex != master_idx)
return true;
@@ -2530,7 +2773,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
{
struct net *net = sock_net(skb->sk);
struct neighbour *n;
- int rc, h, s_h = cb->args[1];
+ int err = 0, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
struct neigh_hash_table *nht;
unsigned int flags = NLM_F_MULTI;
@@ -2538,37 +2781,31 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (filter->dev_idx || filter->master_idx)
flags |= NLM_F_DUMP_FILTERED;
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
+ nht = rcu_dereference(tbl->nht);
for (h = s_h; h < (1 << nht->hash_shift); h++) {
if (h > s_h)
s_idx = 0;
- for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
- n != NULL;
- n = rcu_dereference_bh(n->next)) {
+ idx = 0;
+ neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[h]) {
if (idx < s_idx || !net_eq(dev_net(n->dev), net))
goto next;
if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
neigh_master_filtered(n->dev, filter->master_idx))
goto next;
- if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH,
- flags) < 0) {
- rc = -1;
+ err = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH, flags);
+ if (err < 0)
goto out;
- }
next:
idx++;
}
}
- rc = skb->len;
out:
- rcu_read_unlock_bh();
cb->args[1] = h;
cb->args[2] = idx;
- return rc;
+ return err;
}
static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
@@ -2577,43 +2814,38 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
{
struct pneigh_entry *n;
struct net *net = sock_net(skb->sk);
- int rc, h, s_h = cb->args[3];
+ int err = 0, h, s_h = cb->args[3];
int idx, s_idx = idx = cb->args[4];
unsigned int flags = NLM_F_MULTI;
if (filter->dev_idx || filter->master_idx)
flags |= NLM_F_DUMP_FILTERED;
- read_lock_bh(&tbl->lock);
-
for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
if (h > s_h)
s_idx = 0;
- for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
+ for (n = rcu_dereference(tbl->phash_buckets[h]), idx = 0;
+ n;
+ n = rcu_dereference(n->next)) {
if (idx < s_idx || pneigh_net(n) != net)
goto next;
if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
neigh_master_filtered(n->dev, filter->master_idx))
goto next;
- if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH, flags, tbl) < 0) {
- read_unlock_bh(&tbl->lock);
- rc = -1;
+ err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH, flags, tbl);
+ if (err < 0)
goto out;
- }
next:
idx++;
}
}
- read_unlock_bh(&tbl->lock);
- rc = skb->len;
out:
cb->args[3] = h;
cb->args[4] = idx;
- return rc;
-
+ return err;
}
static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
@@ -2627,12 +2859,12 @@ static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
if (strict_check) {
struct ndmsg *ndm;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
+ ndm = nlmsg_payload(nlh, sizeof(*ndm));
+ if (!ndm) {
NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
return -EINVAL;
}
- ndm = nlmsg_data(nlh);
if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
ndm->ndm_state || ndm->ndm_type) {
NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
@@ -2644,11 +2876,12 @@ static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
return -EINVAL;
}
- err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
- nda_policy, extack);
+ err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
+ tb, NDA_MAX, nda_policy,
+ extack);
} else {
- err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
- nda_policy, extack);
+ err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
+ NDA_MAX, nda_policy, extack);
}
if (err < 0)
return err;
@@ -2697,11 +2930,13 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
if (err < 0 && cb->strict_check)
return err;
+ err = 0;
s_t = cb->args[0];
+ rcu_read_lock();
for (t = 0; t < NEIGH_NR_TABLES; t++) {
- tbl = neigh_tables[t];
+ tbl = rcu_dereference(neigh_tables[t]);
if (!tbl)
continue;
@@ -2717,69 +2952,64 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
break;
}
+ rcu_read_unlock();
cb->args[0] = t;
- return skb->len;
+ return err;
}
-static int neigh_valid_get_req(const struct nlmsghdr *nlh,
- struct neigh_table **tbl,
- void **dst, int *dev_idx, u8 *ndm_flags,
- struct netlink_ext_ack *extack)
+static struct ndmsg *neigh_valid_get_req(const struct nlmsghdr *nlh,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
- struct nlattr *tb[NDA_MAX + 1];
struct ndmsg *ndm;
int err, i;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
+ ndm = nlmsg_payload(nlh, sizeof(*ndm));
+ if (!ndm) {
NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
- ndm = nlmsg_data(nlh);
if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
ndm->ndm_type) {
NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
if (ndm->ndm_flags & ~NTF_PROXY) {
NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
- err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
- nda_policy, extack);
- if (err < 0)
- return err;
-
- *ndm_flags = ndm->ndm_flags;
- *dev_idx = ndm->ndm_ifindex;
- *tbl = neigh_find_table(ndm->ndm_family);
- if (*tbl == NULL) {
- NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
- return -EAFNOSUPPORT;
+ if (!(ndm->ndm_flags & NTF_PROXY) && !ndm->ndm_ifindex) {
+ NL_SET_ERR_MSG(extack, "No device specified");
+ return ERR_PTR(-EINVAL);
}
- for (i = 0; i <= NDA_MAX; ++i) {
- if (!tb[i])
- continue;
+ err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
+ NDA_MAX, nda_policy, extack);
+ if (err < 0)
+ return ERR_PTR(err);
+ for (i = 0; i <= NDA_MAX; ++i) {
switch (i) {
case NDA_DST:
- if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
- NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
- return -EINVAL;
+ if (!tb[i]) {
+ NL_SET_ERR_ATTR_MISS(extack, NULL, NDA_DST);
+ return ERR_PTR(-EINVAL);
}
- *dst = nla_data(tb[i]);
break;
default:
+ if (!tb[i])
+ continue;
+
NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
}
- return 0;
+ return ndm;
}
static inline size_t neigh_nlmsg_size(void)
@@ -2789,116 +3019,103 @@ static inline size_t neigh_nlmsg_size(void)
+ nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
+ nla_total_size(sizeof(struct nda_cacheinfo))
+ nla_total_size(4) /* NDA_PROBES */
+ + nla_total_size(4) /* NDA_FLAGS_EXT */
+ nla_total_size(1); /* NDA_PROTOCOL */
}
-static int neigh_get_reply(struct net *net, struct neighbour *neigh,
- u32 pid, u32 seq)
-{
- struct sk_buff *skb;
- int err = 0;
-
- skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
- if (!skb)
- return -ENOBUFS;
-
- err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
- if (err) {
- kfree_skb(skb);
- goto errout;
- }
-
- err = rtnl_unicast(skb, net, pid);
-errout:
- return err;
-}
-
static inline size_t pneigh_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ndmsg))
+ nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+ + nla_total_size(4) /* NDA_FLAGS_EXT */
+ nla_total_size(1); /* NDA_PROTOCOL */
}
-static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
- u32 pid, u32 seq, struct neigh_table *tbl)
+static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
+ struct net *net = sock_net(in_skb->sk);
+ u32 pid = NETLINK_CB(in_skb).portid;
+ struct nlattr *tb[NDA_MAX + 1];
+ struct net_device *dev = NULL;
+ u32 seq = nlh->nlmsg_seq;
+ struct neigh_table *tbl;
+ struct neighbour *neigh;
struct sk_buff *skb;
- int err = 0;
+ struct ndmsg *ndm;
+ void *dst;
+ int err;
- skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
+ ndm = neigh_valid_get_req(nlh, tb, extack);
+ if (IS_ERR(ndm))
+ return PTR_ERR(ndm);
+
+ if (ndm->ndm_flags & NTF_PROXY)
+ skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
+ else
+ skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
if (!skb)
return -ENOBUFS;
- err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
- if (err) {
- kfree_skb(skb);
- goto errout;
- }
+ rcu_read_lock();
- err = rtnl_unicast(skb, net, pid);
-errout:
- return err;
-}
+ tbl = neigh_find_table(ndm->ndm_family);
+ if (!tbl) {
+ NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
+ err = -EAFNOSUPPORT;
+ goto err_unlock;
+ }
-static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(in_skb->sk);
- struct net_device *dev = NULL;
- struct neigh_table *tbl = NULL;
- struct neighbour *neigh;
- void *dst = NULL;
- u8 ndm_flags = 0;
- int dev_idx = 0;
- int err;
+ if (nla_len(tb[NDA_DST]) != (int)tbl->key_len) {
+ NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
+ err = -EINVAL;
+ goto err_unlock;
+ }
- err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
- extack);
- if (err < 0)
- return err;
+ dst = nla_data(tb[NDA_DST]);
- if (dev_idx) {
- dev = __dev_get_by_index(net, dev_idx);
+ if (ndm->ndm_ifindex) {
+ dev = dev_get_by_index_rcu(net, ndm->ndm_ifindex);
if (!dev) {
NL_SET_ERR_MSG(extack, "Unknown device ifindex");
- return -ENODEV;
+ err = -ENODEV;
+ goto err_unlock;
}
}
- if (!dst) {
- NL_SET_ERR_MSG(extack, "Network address not specified");
- return -EINVAL;
- }
-
- if (ndm_flags & NTF_PROXY) {
+ if (ndm->ndm_flags & NTF_PROXY) {
struct pneigh_entry *pn;
- pn = pneigh_lookup(tbl, net, dst, dev, 0);
+ pn = pneigh_lookup(tbl, net, dst, dev);
if (!pn) {
NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
- return -ENOENT;
+ err = -ENOENT;
+ goto err_unlock;
}
- return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, tbl);
- }
- if (!dev) {
- NL_SET_ERR_MSG(extack, "No device specified");
- return -EINVAL;
- }
+ err = pneigh_fill_info(skb, pn, pid, seq, RTM_NEWNEIGH, 0, tbl);
+ if (err)
+ goto err_unlock;
+ } else {
+ neigh = neigh_lookup(tbl, dst, dev);
+ if (!neigh) {
+ NL_SET_ERR_MSG(extack, "Neighbour entry not found");
+ err = -ENOENT;
+ goto err_unlock;
+ }
- neigh = neigh_lookup(tbl, dst, dev);
- if (!neigh) {
- NL_SET_ERR_MSG(extack, "Neighbour entry not found");
- return -ENOENT;
+ err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
+ neigh_release(neigh);
+ if (err)
+ goto err_unlock;
}
- err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq);
-
- neigh_release(neigh);
+ rcu_read_unlock();
+ return rtnl_unicast(skb, net, pid);
+err_unlock:
+ rcu_read_unlock();
+ kfree_skb(skb);
return err;
}
@@ -2907,20 +3124,18 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void
int chain;
struct neigh_hash_table *nht;
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
+ rcu_read_lock();
+ nht = rcu_dereference(tbl->nht);
- read_lock(&tbl->lock); /* avoid resizes */
+ spin_lock_bh(&tbl->lock); /* avoid resizes */
for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
struct neighbour *n;
- for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
- n != NULL;
- n = rcu_dereference_bh(n->next))
+ neigh_for_each_in_bucket(n, &nht->hash_heads[chain])
cb(n, cookie);
}
- read_unlock(&tbl->lock);
- rcu_read_unlock_bh();
+ spin_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
}
EXPORT_SYMBOL(neigh_for_each);
@@ -2928,29 +3143,25 @@ EXPORT_SYMBOL(neigh_for_each);
void __neigh_for_each_release(struct neigh_table *tbl,
int (*cb)(struct neighbour *))
{
- int chain;
struct neigh_hash_table *nht;
+ int chain;
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
+ struct hlist_node *tmp;
struct neighbour *n;
- struct neighbour __rcu **np;
- np = &nht->hash_buckets[chain];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
+ neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) {
int release;
write_lock(&n->lock);
release = cb(n);
if (release) {
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
- } else
- np = &n->next;
+ }
write_unlock(&n->lock);
if (release)
neigh_cleanup_and_release(n);
@@ -2963,24 +3174,32 @@ int neigh_xmit(int index, struct net_device *dev,
const void *addr, struct sk_buff *skb)
{
int err = -EAFNOSUPPORT;
+
if (likely(index < NEIGH_NR_TABLES)) {
struct neigh_table *tbl;
struct neighbour *neigh;
- tbl = neigh_tables[index];
+ rcu_read_lock();
+ tbl = rcu_dereference(neigh_tables[index]);
if (!tbl)
- goto out;
- rcu_read_lock_bh();
- neigh = __neigh_lookup_noref(tbl, addr, dev);
+ goto out_unlock;
+ if (index == NEIGH_ARP_TABLE) {
+ u32 key = *((u32 *)addr);
+
+ neigh = __ipv4_neigh_lookup_noref(dev, key);
+ } else {
+ neigh = __neigh_lookup_noref(tbl, addr, dev);
+ }
if (!neigh)
neigh = __neigh_create(tbl, addr, dev, false);
err = PTR_ERR(neigh);
if (IS_ERR(neigh)) {
- rcu_read_unlock_bh();
+ rcu_read_unlock();
goto out_kfree_skb;
}
- err = neigh->output(neigh, skb);
- rcu_read_unlock_bh();
+ err = READ_ONCE(neigh->output)(neigh, skb);
+out_unlock:
+ rcu_read_unlock();
}
else if (index == NEIGH_LINK_TABLE) {
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
@@ -2999,43 +3218,53 @@ EXPORT_SYMBOL(neigh_xmit);
#ifdef CONFIG_PROC_FS
-static struct neighbour *neigh_get_first(struct seq_file *seq)
+static struct neighbour *neigh_get_valid(struct seq_file *seq,
+ struct neighbour *n,
+ loff_t *pos)
{
struct neigh_seq_state *state = seq->private;
struct net *net = seq_file_net(seq);
+
+ if (!net_eq(dev_net(n->dev), net))
+ return NULL;
+
+ if (state->neigh_sub_iter) {
+ loff_t fakep = 0;
+ void *v;
+
+ v = state->neigh_sub_iter(state, n, pos ? pos : &fakep);
+ if (!v)
+ return NULL;
+ if (pos)
+ return v;
+ }
+
+ if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
+ return n;
+
+ if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
+ return n;
+
+ return NULL;
+}
+
+static struct neighbour *neigh_get_first(struct seq_file *seq)
+{
+ struct neigh_seq_state *state = seq->private;
struct neigh_hash_table *nht = state->nht;
- struct neighbour *n = NULL;
- int bucket = state->bucket;
+ struct neighbour *n, *tmp;
state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
- for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
- n = rcu_dereference_bh(nht->hash_buckets[bucket]);
- while (n) {
- if (!net_eq(dev_net(n->dev), net))
- goto next;
- if (state->neigh_sub_iter) {
- loff_t fakep = 0;
- void *v;
-
- v = state->neigh_sub_iter(state, n, &fakep);
- if (!v)
- goto next;
- }
- if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
- break;
- if (n->nud_state & ~NUD_NOARP)
- break;
-next:
- n = rcu_dereference_bh(n->next);
+ while (++state->bucket < (1 << nht->hash_shift)) {
+ neigh_for_each_in_bucket(n, &nht->hash_heads[state->bucket]) {
+ tmp = neigh_get_valid(seq, n, NULL);
+ if (tmp)
+ return tmp;
}
-
- if (n)
- break;
}
- state->bucket = bucket;
- return n;
+ return NULL;
}
static struct neighbour *neigh_get_next(struct seq_file *seq,
@@ -3043,46 +3272,28 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
loff_t *pos)
{
struct neigh_seq_state *state = seq->private;
- struct net *net = seq_file_net(seq);
- struct neigh_hash_table *nht = state->nht;
+ struct neighbour *tmp;
if (state->neigh_sub_iter) {
void *v = state->neigh_sub_iter(state, n, pos);
+
if (v)
return n;
}
- n = rcu_dereference_bh(n->next);
-
- while (1) {
- while (n) {
- if (!net_eq(dev_net(n->dev), net))
- goto next;
- if (state->neigh_sub_iter) {
- void *v = state->neigh_sub_iter(state, n, pos);
- if (v)
- return n;
- goto next;
- }
- if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
- break;
- if (n->nud_state & ~NUD_NOARP)
- break;
-next:
- n = rcu_dereference_bh(n->next);
+ hlist_for_each_entry_continue(n, hash) {
+ tmp = neigh_get_valid(seq, n, pos);
+ if (tmp) {
+ n = tmp;
+ goto out;
}
-
- if (n)
- break;
-
- if (++state->bucket >= (1 << nht->hash_shift))
- break;
-
- n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
}
+ n = neigh_get_first(seq);
+out:
if (n && pos)
--(*pos);
+
return n;
}
@@ -3107,13 +3318,14 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
struct net *net = seq_file_net(seq);
struct neigh_table *tbl = state->tbl;
struct pneigh_entry *pn = NULL;
- int bucket = state->bucket;
+ int bucket;
state->flags |= NEIGH_SEQ_IS_PNEIGH;
for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
- pn = tbl->phash_buckets[bucket];
+ pn = rcu_dereference(tbl->phash_buckets[bucket]);
+
while (pn && !net_eq(pneigh_net(pn), net))
- pn = pn->next;
+ pn = rcu_dereference(pn->next);
if (pn)
break;
}
@@ -3131,15 +3343,17 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
struct neigh_table *tbl = state->tbl;
do {
- pn = pn->next;
+ pn = rcu_dereference(pn->next);
} while (pn && !net_eq(pneigh_net(pn), net));
while (!pn) {
if (++state->bucket > PNEIGH_HASHMASK)
break;
- pn = tbl->phash_buckets[state->bucket];
+
+ pn = rcu_dereference(tbl->phash_buckets[state->bucket]);
+
while (pn && !net_eq(pneigh_net(pn), net))
- pn = pn->next;
+ pn = rcu_dereference(pn->next);
if (pn)
break;
}
@@ -3179,16 +3393,18 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
}
void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
- __acquires(rcu_bh)
+ __acquires(tbl->lock)
+ __acquires(rcu)
{
struct neigh_seq_state *state = seq->private;
state->tbl = tbl;
- state->bucket = 0;
+ state->bucket = -1;
state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
- rcu_read_lock_bh();
- state->nht = rcu_dereference_bh(tbl->nht);
+ rcu_read_lock();
+ state->nht = rcu_dereference(tbl->nht);
+ spin_lock_bh(&tbl->lock);
return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
}
@@ -3222,9 +3438,14 @@ out:
EXPORT_SYMBOL(neigh_seq_next);
void neigh_seq_stop(struct seq_file *seq, void *v)
- __releases(rcu_bh)
+ __releases(tbl->lock)
+ __releases(rcu)
{
- rcu_read_unlock_bh();
+ struct neigh_seq_state *state = seq->private;
+ struct neigh_table *tbl = state->tbl;
+
+ spin_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
}
EXPORT_SYMBOL(neigh_seq_stop);
@@ -3232,7 +3453,7 @@ EXPORT_SYMBOL(neigh_seq_stop);
static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
int cpu;
if (*pos == 0)
@@ -3249,7 +3470,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
int cpu;
for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
@@ -3258,6 +3479,7 @@ static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
*pos = cpu+1;
return per_cpu_ptr(tbl->stats, cpu);
}
+ (*pos)++;
return NULL;
}
@@ -3268,16 +3490,17 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
static int neigh_stat_seq_show(struct seq_file *seq, void *v)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
+ seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
return 0;
}
- seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
- "%08lx %08lx %08lx %08lx %08lx %08lx\n",
+ seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
+ "%08lx %08lx %08lx "
+ "%08lx %08lx %08lx\n",
atomic_read(&tbl->entries),
st->allocs,
@@ -3312,10 +3535,12 @@ static const struct seq_operations neigh_stat_seq_ops = {
static void __neigh_notify(struct neighbour *n, int type, int flags,
u32 pid)
{
- struct net *net = dev_net(n->dev);
struct sk_buff *skb;
int err = -ENOBUFS;
+ struct net *net;
+ rcu_read_lock();
+ net = dev_net_rcu(n->dev);
skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
if (skb == NULL)
goto errout;
@@ -3328,10 +3553,11 @@ static void __neigh_notify(struct neighbour *n, int type, int flags,
goto errout;
}
rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
- return;
+ goto out;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+ rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+out:
+ rcu_read_unlock();
}
void neigh_app_ns(struct neighbour *n)
@@ -3341,17 +3567,15 @@ void neigh_app_ns(struct neighbour *n)
EXPORT_SYMBOL(neigh_app_ns);
#ifdef CONFIG_SYSCTL
-static int zero;
-static int int_max = INT_MAX;
static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
-static int proc_unres_qlen(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+static int proc_unres_qlen(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int size, ret;
struct ctl_table tmp = *ctl;
- tmp.extra1 = &zero;
+ tmp.extra1 = SYSCTL_ZERO;
tmp.extra2 = &unres_qlen_max;
tmp.data = &size;
@@ -3363,18 +3587,6 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write,
return ret;
}
-static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
- int family)
-{
- switch (family) {
- case AF_INET:
- return __in_dev_arp_parms_get_rcu(dev);
- case AF_INET6:
- return __in6_dev_nd_parms_get_rcu(dev);
- }
- return NULL;
-}
-
static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
int index)
{
@@ -3392,7 +3604,7 @@ static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
rcu_read_unlock();
}
-static void neigh_proc_update(struct ctl_table *ctl, int write)
+static void neigh_proc_update(const struct ctl_table *ctl, int write)
{
struct net_device *dev = ctl->extra1;
struct neigh_parms *p = ctl->extra2;
@@ -3409,23 +3621,39 @@ static void neigh_proc_update(struct ctl_table *ctl, int write)
neigh_copy_dflt_parms(net, p, index);
}
-static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
+static int neigh_proc_dointvec_zero_intmax(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
{
struct ctl_table tmp = *ctl;
int ret;
- tmp.extra1 = &zero;
- tmp.extra2 = &int_max;
+ tmp.extra1 = SYSCTL_ZERO;
+ tmp.extra2 = SYSCTL_INT_MAX;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
neigh_proc_update(ctl, write);
return ret;
}
-int neigh_proc_dointvec(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+static int neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table tmp = *ctl;
+ int ret;
+
+ int min = msecs_to_jiffies(1);
+
+ tmp.extra1 = &min;
+ tmp.extra2 = NULL;
+
+ ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
+ neigh_proc_update(ctl, write);
+ return ret;
+}
+
+int neigh_proc_dointvec(const struct ctl_table *ctl, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
@@ -3434,8 +3662,7 @@ int neigh_proc_dointvec(struct ctl_table *ctl, int write,
}
EXPORT_SYMBOL(neigh_proc_dointvec);
-int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
- void __user *buffer,
+int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
@@ -3445,9 +3672,9 @@ int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
}
EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
-static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
+static int neigh_proc_dointvec_userhz_jiffies(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
{
int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
@@ -3455,9 +3682,8 @@ static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
return ret;
}
-int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
+int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
@@ -3466,9 +3692,9 @@ int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
}
EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
-static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
+static int neigh_proc_dointvec_unres_qlen(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
{
int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
@@ -3476,9 +3702,9 @@ static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
return ret;
}
-static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
+static int neigh_proc_base_reachable_time(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
{
struct neigh_parms *p = ctl->extra2;
int ret;
@@ -3495,8 +3721,7 @@ static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
* only be effective after the next time neigh_periodic_work
* decides to recompute it
*/
- p->reachable_time =
- neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
+ neigh_set_reach_time(p);
}
return ret;
}
@@ -3522,8 +3747,8 @@ static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
-#define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
- NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
+#define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
+ NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
@@ -3533,7 +3758,7 @@ static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
static struct neigh_sysctl_table {
struct ctl_table_header *sysctl_header;
- struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
+ struct ctl_table neigh_vars[NEIGH_VAR_MAX];
} neigh_sysctl_template __read_mostly = {
.neigh_vars = {
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
@@ -3543,6 +3768,8 @@ static struct neigh_sysctl_table {
NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
+ NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
+ "interval_probe_time_ms"),
NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
@@ -3562,27 +3789,26 @@ static struct neigh_sysctl_table {
.procname = "gc_thresh1",
.maxlen = sizeof(int),
.mode = 0644,
- .extra1 = &zero,
- .extra2 = &int_max,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
.proc_handler = proc_dointvec_minmax,
},
[NEIGH_VAR_GC_THRESH2] = {
.procname = "gc_thresh2",
.maxlen = sizeof(int),
.mode = 0644,
- .extra1 = &zero,
- .extra2 = &int_max,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
.proc_handler = proc_dointvec_minmax,
},
[NEIGH_VAR_GC_THRESH3] = {
.procname = "gc_thresh3",
.maxlen = sizeof(int),
.mode = 0644,
- .extra1 = &zero,
- .extra2 = &int_max,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
.proc_handler = proc_dointvec_minmax,
},
- {},
},
};
@@ -3594,8 +3820,9 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
const char *dev_name_source;
char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
char *p_name;
+ size_t neigh_vars_size;
- t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
+ t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
if (!t)
goto err;
@@ -3605,11 +3832,11 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
t->neigh_vars[i].extra2 = p;
}
+ neigh_vars_size = ARRAY_SIZE(t->neigh_vars);
if (dev) {
dev_name_source = dev->name;
/* Terminate the table early */
- memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
- sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
+ neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
} else {
struct neigh_table *tbl = p->tbl;
dev_name_source = "default";
@@ -3643,10 +3870,6 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
neigh_proc_base_reachable_time;
}
- /* Don't export sysctls to unprivileged users */
- if (neigh_parms_net(p)->user_ns != &init_user_ns)
- t->neigh_vars[0].procname = NULL;
-
switch (neigh_parms_family(p)) {
case AF_INET:
p_name = "ipv4";
@@ -3660,8 +3883,9 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
p_name, dev_name_source);
- t->sysctl_header =
- register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
+ t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p),
+ neigh_path, t->neigh_vars,
+ neigh_vars_size);
if (!t->sysctl_header)
goto free;
@@ -3688,16 +3912,20 @@ EXPORT_SYMBOL(neigh_sysctl_unregister);
#endif /* CONFIG_SYSCTL */
+static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWNEIGH, .doit = neigh_add},
+ {.msgtype = RTM_DELNEIGH, .doit = neigh_delete},
+ {.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+ {.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED},
+};
+
static int __init neigh_init(void)
{
- rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
-
- rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
- 0);
- rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
-
+ rtnl_register_many(neigh_rtnl_msg_handlers);
return 0;
}