summaryrefslogtreecommitdiff
path: root/net/core/neighbour.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/neighbour.c')
-rw-r--r--net/core/neighbour.c2951
1 files changed, 1921 insertions, 1030 deletions
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index b7de821f98df..96a3b1a93252 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Generic address resolution entity
*
@@ -5,11 +6,6 @@
* Pedro Roque <roque@di.fc.ul.pt>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Fixes:
* Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
* Harald Welte Add neighbour cache statistics like rtstat
@@ -30,7 +26,9 @@
#include <linux/times.h>
#include <net/net_namespace.h>
#include <net/neighbour.h>
+#include <net/arp.h>
#include <net/dst.h>
+#include <net/ip.h>
#include <net/sock.h>
#include <net/netevent.h>
#include <net/netlink.h>
@@ -38,8 +36,11 @@
#include <linux/random.h>
#include <linux/string.h>
#include <linux/log2.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+
+#include <trace/events/neigh.h>
-#define DEBUG
#define NEIGH_DEBUG 1
#define neigh_dbg(level, fmt, ...) \
do { \
@@ -49,18 +50,38 @@ do { \
#define PNEIGH_HASHMASK 0xF
-static void neigh_timer_handler(unsigned long arg);
-static void __neigh_notify(struct neighbour *n, int type, int flags);
-static void neigh_update_notify(struct neighbour *neigh);
-static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
+static void neigh_timer_handler(struct timer_list *t);
+static void __neigh_notify(struct neighbour *n, int type, int flags,
+ u32 pid);
+static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
+static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+ bool skip_perm);
-static struct neigh_table *neigh_tables;
#ifdef CONFIG_PROC_FS
-static const struct file_operations neigh_stat_seq_fops;
+static const struct seq_operations neigh_stat_seq_ops;
#endif
+static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
+{
+ int i;
+
+ switch (family) {
+ default:
+ DEBUG_NET_WARN_ON_ONCE(1);
+ fallthrough; /* to avoid panic by null-ptr-deref */
+ case AF_INET:
+ i = NEIGH_ARP_TABLE;
+ break;
+ case AF_INET6:
+ i = NEIGH_ND_TABLE;
+ break;
+ }
+
+ return &dev->neighbours[i];
+}
+
/*
- Neighbour hash table buckets are protected with rwlock tbl->lock.
+ Neighbour hash table buckets are protected with tbl->lock.
- All the scans/updates to hash buckets MUST be made under this lock.
- NOTHING clever should be made under this lock: no callbacks
@@ -85,13 +106,8 @@ static const struct file_operations neigh_stat_seq_fops;
the most complicated procedure, which we allow is dev->hard_header.
It is supposed, that dev->hard_header is simplistic and does
not make callbacks to neighbour tables.
-
- The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
- list of neighbour tables. This list is used only in process context,
*/
-static DEFINE_RWLOCK(neigh_tbl_lock);
-
static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
{
kfree_skb(skb);
@@ -100,10 +116,9 @@ static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
static void neigh_cleanup_and_release(struct neighbour *neigh)
{
- if (neigh->parms->neigh_cleanup)
- neigh->parms->neigh_cleanup(neigh);
-
- __neigh_notify(neigh, RTM_DELNEIGH, 0);
+ trace_neigh_cleanup_and_release(neigh, 0);
+ __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
+ call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
neigh_release(neigh);
}
@@ -115,60 +130,186 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
unsigned long neigh_rand_reach_time(unsigned long base)
{
- return base ? (net_random() % base) + (base >> 1) : 0;
+ return base ? get_random_u32_below(base) + (base >> 1) : 0;
}
EXPORT_SYMBOL(neigh_rand_reach_time);
+static void neigh_mark_dead(struct neighbour *n)
+{
+ n->dead = 1;
+ if (!list_empty(&n->gc_list)) {
+ list_del_init(&n->gc_list);
+ atomic_dec(&n->tbl->gc_entries);
+ }
+ if (!list_empty(&n->managed_list))
+ list_del_init(&n->managed_list);
+}
+
+static void neigh_update_gc_list(struct neighbour *n)
+{
+ bool on_gc_list, exempt_from_gc;
+
+ spin_lock_bh(&n->tbl->lock);
+ write_lock(&n->lock);
+ if (n->dead)
+ goto out;
+
+ /* remove from the gc list if new state is permanent or if neighbor is
+ * externally learned / validated; otherwise entry should be on the gc
+ * list
+ */
+ exempt_from_gc = n->nud_state & NUD_PERMANENT ||
+ n->flags & (NTF_EXT_LEARNED | NTF_EXT_VALIDATED);
+ on_gc_list = !list_empty(&n->gc_list);
+
+ if (exempt_from_gc && on_gc_list) {
+ list_del_init(&n->gc_list);
+ atomic_dec(&n->tbl->gc_entries);
+ } else if (!exempt_from_gc && !on_gc_list) {
+ /* add entries to the tail; cleaning removes from the front */
+ list_add_tail(&n->gc_list, &n->tbl->gc_list);
+ atomic_inc(&n->tbl->gc_entries);
+ }
+out:
+ write_unlock(&n->lock);
+ spin_unlock_bh(&n->tbl->lock);
+}
+
+static void neigh_update_managed_list(struct neighbour *n)
+{
+ bool on_managed_list, add_to_managed;
+
+ spin_lock_bh(&n->tbl->lock);
+ write_lock(&n->lock);
+ if (n->dead)
+ goto out;
+
+ add_to_managed = n->flags & NTF_MANAGED;
+ on_managed_list = !list_empty(&n->managed_list);
+
+ if (!add_to_managed && on_managed_list)
+ list_del_init(&n->managed_list);
+ else if (add_to_managed && !on_managed_list)
+ list_add_tail(&n->managed_list, &n->tbl->managed_list);
+out:
+ write_unlock(&n->lock);
+ spin_unlock_bh(&n->tbl->lock);
+}
+
+static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
+ bool *gc_update, bool *managed_update)
+{
+ u32 ndm_flags, old_flags = neigh->flags;
+
+ if (!(flags & NEIGH_UPDATE_F_ADMIN))
+ return;
+
+ ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
+ ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
+ ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_VALIDATED) ? NTF_EXT_VALIDATED : 0;
+
+ if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
+ if (ndm_flags & NTF_EXT_LEARNED)
+ neigh->flags |= NTF_EXT_LEARNED;
+ else
+ neigh->flags &= ~NTF_EXT_LEARNED;
+ *notify = 1;
+ *gc_update = true;
+ }
+ if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
+ if (ndm_flags & NTF_MANAGED)
+ neigh->flags |= NTF_MANAGED;
+ else
+ neigh->flags &= ~NTF_MANAGED;
+ *notify = 1;
+ *managed_update = true;
+ }
+ if ((old_flags ^ ndm_flags) & NTF_EXT_VALIDATED) {
+ if (ndm_flags & NTF_EXT_VALIDATED)
+ neigh->flags |= NTF_EXT_VALIDATED;
+ else
+ neigh->flags &= ~NTF_EXT_VALIDATED;
+ *notify = 1;
+ *gc_update = true;
+ }
+}
+
+bool neigh_remove_one(struct neighbour *n)
+{
+ bool retval = false;
+
+ write_lock(&n->lock);
+ if (refcount_read(&n->refcnt) == 1) {
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
+ neigh_mark_dead(n);
+ retval = true;
+ }
+ write_unlock(&n->lock);
+ if (retval)
+ neigh_cleanup_and_release(n);
+ return retval;
+}
static int neigh_forced_gc(struct neigh_table *tbl)
{
+ int max_clean = atomic_read(&tbl->gc_entries) -
+ READ_ONCE(tbl->gc_thresh2);
+ u64 tmax = ktime_get_ns() + NSEC_PER_MSEC;
+ unsigned long tref = jiffies - 5 * HZ;
+ struct neighbour *n, *tmp;
int shrunk = 0;
- int i;
- struct neigh_hash_table *nht;
+ int loop = 0;
NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
- write_lock_bh(&tbl->lock);
- nht = rcu_dereference_protected(tbl->nht,
- lockdep_is_held(&tbl->lock));
- for (i = 0; i < (1 << nht->hash_shift); i++) {
- struct neighbour *n;
- struct neighbour __rcu **np;
-
- np = &nht->hash_buckets[i];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
- /* Neighbour record may be discarded if:
- * - nobody refers to it.
- * - it is not permanent
- */
+ spin_lock_bh(&tbl->lock);
+
+ list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
+ if (refcount_read(&n->refcnt) == 1) {
+ bool remove = false;
+
write_lock(&n->lock);
- if (atomic_read(&n->refcnt) == 1 &&
- !(n->nud_state & NUD_PERMANENT)) {
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
- n->dead = 1;
- shrunk = 1;
- write_unlock(&n->lock);
- neigh_cleanup_and_release(n);
- continue;
- }
+ if ((n->nud_state == NUD_FAILED) ||
+ (n->nud_state == NUD_NOARP) ||
+ (tbl->is_multicast &&
+ tbl->is_multicast(n->primary_key)) ||
+ !time_in_range(n->updated, tref, jiffies))
+ remove = true;
write_unlock(&n->lock);
- np = &n->next;
+
+ if (remove && neigh_remove_one(n))
+ shrunk++;
+ if (shrunk >= max_clean)
+ break;
+ if (++loop == 16) {
+ if (ktime_get_ns() > tmax)
+ goto unlock;
+ loop = 0;
+ }
}
}
- tbl->last_flush = jiffies;
-
- write_unlock_bh(&tbl->lock);
+ WRITE_ONCE(tbl->last_flush, jiffies);
+unlock:
+ spin_unlock_bh(&tbl->lock);
return shrunk;
}
static void neigh_add_timer(struct neighbour *n, unsigned long when)
{
+ /* Use safe distance from the jiffies - LONG_MAX point while timer
+ * is running in DELAY/PROBE state but still show to user space
+ * large times in the past.
+ */
+ unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
+
neigh_hold(n);
+ if (!time_in_range(n->confirmed, mint, jiffies))
+ n->confirmed = mint;
+ if (time_before(n->used, n->confirmed))
+ n->used = n->confirmed;
if (unlikely(mod_timer(&n->timer, when))) {
printk("NEIGH: BUG, double timer add, state is %x\n",
n->nud_state);
@@ -179,109 +320,205 @@ static void neigh_add_timer(struct neighbour *n, unsigned long when)
static int neigh_del_timer(struct neighbour *n)
{
if ((n->nud_state & NUD_IN_TIMER) &&
- del_timer(&n->timer)) {
+ timer_delete(&n->timer)) {
neigh_release(n);
return 1;
}
return 0;
}
-static void pneigh_queue_purge(struct sk_buff_head *list)
+static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
+ int family)
+{
+ switch (family) {
+ case AF_INET:
+ return __in_dev_arp_parms_get_rcu(dev);
+ case AF_INET6:
+ return __in6_dev_nd_parms_get_rcu(dev);
+ }
+ return NULL;
+}
+
+static void neigh_parms_qlen_dec(struct net_device *dev, int family)
{
+ struct neigh_parms *p;
+
+ rcu_read_lock();
+ p = neigh_get_dev_parms_rcu(dev, family);
+ if (p)
+ p->qlen--;
+ rcu_read_unlock();
+}
+
+static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
+ int family)
+{
+ struct sk_buff_head tmp;
+ unsigned long flags;
struct sk_buff *skb;
- while ((skb = skb_dequeue(list)) != NULL) {
+ skb_queue_head_init(&tmp);
+ spin_lock_irqsave(&list->lock, flags);
+ skb = skb_peek(list);
+ while (skb != NULL) {
+ struct sk_buff *skb_next = skb_peek_next(skb, list);
+ struct net_device *dev = skb->dev;
+
+ if (net == NULL || net_eq(dev_net(dev), net)) {
+ neigh_parms_qlen_dec(dev, family);
+ __skb_unlink(skb, list);
+ __skb_queue_tail(&tmp, skb);
+ }
+ skb = skb_next;
+ }
+ spin_unlock_irqrestore(&list->lock, flags);
+
+ while ((skb = __skb_dequeue(&tmp))) {
dev_put(skb->dev);
kfree_skb(skb);
}
}
-static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
+static void neigh_flush_one(struct neighbour *n)
+{
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
+
+ write_lock(&n->lock);
+
+ neigh_del_timer(n);
+ neigh_mark_dead(n);
+
+ if (refcount_read(&n->refcnt) != 1) {
+ /* The most unpleasant situation.
+ * We must destroy neighbour entry,
+ * but someone still uses it.
+ *
+ * The destroy will be delayed until
+ * the last user releases us, but
+ * we must kill timers etc. and move
+ * it to safe state.
+ */
+ __skb_queue_purge(&n->arp_queue);
+ n->arp_queue_len_bytes = 0;
+ WRITE_ONCE(n->output, neigh_blackhole);
+
+ if (n->nud_state & NUD_VALID)
+ n->nud_state = NUD_NOARP;
+ else
+ n->nud_state = NUD_NONE;
+
+ neigh_dbg(2, "neigh %p is stray\n", n);
+ }
+
+ write_unlock(&n->lock);
+
+ neigh_cleanup_and_release(n);
+}
+
+static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
+ bool skip_perm)
+{
+ struct hlist_head *dev_head;
+ struct hlist_node *tmp;
+ struct neighbour *n;
+
+ dev_head = neigh_get_dev_table(dev, tbl->family);
+
+ hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
+ if (skip_perm &&
+ (n->nud_state & NUD_PERMANENT ||
+ n->flags & NTF_EXT_VALIDATED))
+ continue;
+
+ neigh_flush_one(n);
+ }
+}
+
+static void neigh_flush_table(struct neigh_table *tbl)
{
- int i;
struct neigh_hash_table *nht;
+ int i;
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
for (i = 0; i < (1 << nht->hash_shift); i++) {
+ struct hlist_node *tmp;
struct neighbour *n;
- struct neighbour __rcu **np = &nht->hash_buckets[i];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
- if (dev && n->dev != dev) {
- np = &n->next;
- continue;
- }
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
- write_lock(&n->lock);
- neigh_del_timer(n);
- n->dead = 1;
-
- if (atomic_read(&n->refcnt) != 1) {
- /* The most unpleasant situation.
- We must destroy neighbour entry,
- but someone still uses it.
-
- The destroy will be delayed until
- the last user releases us, but
- we must kill timers etc. and move
- it to safe state.
- */
- __skb_queue_purge(&n->arp_queue);
- n->arp_queue_len_bytes = 0;
- n->output = neigh_blackhole;
- if (n->nud_state & NUD_VALID)
- n->nud_state = NUD_NOARP;
- else
- n->nud_state = NUD_NONE;
- neigh_dbg(2, "neigh %p is stray\n", n);
- }
- write_unlock(&n->lock);
- neigh_cleanup_and_release(n);
- }
+ neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i])
+ neigh_flush_one(n);
}
}
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
{
- write_lock_bh(&tbl->lock);
- neigh_flush_dev(tbl, dev);
- write_unlock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
+ neigh_flush_dev(tbl, dev, false);
+ spin_unlock_bh(&tbl->lock);
}
EXPORT_SYMBOL(neigh_changeaddr);
-int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+ bool skip_perm)
+{
+ spin_lock_bh(&tbl->lock);
+ if (likely(dev)) {
+ neigh_flush_dev(tbl, dev, skip_perm);
+ } else {
+ DEBUG_NET_WARN_ON_ONCE(skip_perm);
+ neigh_flush_table(tbl);
+ }
+ spin_unlock_bh(&tbl->lock);
+
+ pneigh_ifdown(tbl, dev, skip_perm);
+ pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
+ tbl->family);
+ if (skb_queue_empty_lockless(&tbl->proxy_queue))
+ timer_delete_sync(&tbl->proxy_timer);
+ return 0;
+}
+
+int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
{
- write_lock_bh(&tbl->lock);
- neigh_flush_dev(tbl, dev);
- pneigh_ifdown(tbl, dev);
- write_unlock_bh(&tbl->lock);
+ __neigh_ifdown(tbl, dev, true);
+ return 0;
+}
+EXPORT_SYMBOL(neigh_carrier_down);
- del_timer_sync(&tbl->proxy_timer);
- pneigh_queue_purge(&tbl->proxy_queue);
+int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+ __neigh_ifdown(tbl, dev, false);
return 0;
}
EXPORT_SYMBOL(neigh_ifdown);
-static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
+static struct neighbour *neigh_alloc(struct neigh_table *tbl,
+ struct net_device *dev,
+ u32 flags, bool exempt_from_gc)
{
struct neighbour *n = NULL;
unsigned long now = jiffies;
- int entries;
-
- entries = atomic_inc_return(&tbl->entries) - 1;
- if (entries >= tbl->gc_thresh3 ||
- (entries >= tbl->gc_thresh2 &&
- time_after(now, tbl->last_flush + 5 * HZ))) {
- if (!neigh_forced_gc(tbl) &&
- entries >= tbl->gc_thresh3)
+ int entries, gc_thresh3;
+
+ if (exempt_from_gc)
+ goto do_alloc;
+
+ entries = atomic_inc_return(&tbl->gc_entries) - 1;
+ gc_thresh3 = READ_ONCE(tbl->gc_thresh3);
+ if (entries >= gc_thresh3 ||
+ (entries >= READ_ONCE(tbl->gc_thresh2) &&
+ time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
+ if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) {
+ net_info_ratelimited("%s: neighbor table overflow!\n",
+ tbl->id);
+ NEIGH_CACHE_STAT_INC(tbl, table_fulls);
goto out_entries;
+ }
}
+do_alloc:
n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
if (!n)
goto out_entries;
@@ -292,49 +529,50 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
+ n->flags = flags;
seqlock_init(&n->hh.hh_lock);
n->parms = neigh_parms_clone(&tbl->parms);
- setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
+ timer_setup(&n->timer, neigh_timer_handler, 0);
NEIGH_CACHE_STAT_INC(tbl, allocs);
n->tbl = tbl;
- atomic_set(&n->refcnt, 1);
+ refcount_set(&n->refcnt, 1);
n->dead = 1;
+ INIT_LIST_HEAD(&n->gc_list);
+ INIT_LIST_HEAD(&n->managed_list);
+
+ atomic_inc(&tbl->entries);
out:
return n;
out_entries:
- atomic_dec(&tbl->entries);
+ if (!exempt_from_gc)
+ atomic_dec(&tbl->gc_entries);
goto out;
}
static void neigh_get_hash_rnd(u32 *x)
{
- get_random_bytes(x, sizeof(*x));
- *x |= 1;
+ *x = get_random_u32() | 1;
}
static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
{
- size_t size = (1 << shift) * sizeof(struct neighbour *);
+ size_t size = (1 << shift) * sizeof(struct hlist_head);
+ struct hlist_head *hash_heads;
struct neigh_hash_table *ret;
- struct neighbour __rcu **buckets;
int i;
ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
if (!ret)
return NULL;
- if (size <= PAGE_SIZE)
- buckets = kzalloc(size, GFP_ATOMIC);
- else
- buckets = (struct neighbour __rcu **)
- __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
- get_order(size));
- if (!buckets) {
+
+ hash_heads = kzalloc(size, GFP_ATOMIC);
+ if (!hash_heads) {
kfree(ret);
return NULL;
}
- ret->hash_buckets = buckets;
+ ret->hash_heads = hash_heads;
ret->hash_shift = shift;
for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
neigh_get_hash_rnd(&ret->hash_rnd[i]);
@@ -346,13 +584,8 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
struct neigh_hash_table *nht = container_of(head,
struct neigh_hash_table,
rcu);
- size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
- struct neighbour __rcu **buckets = nht->hash_buckets;
- if (size <= PAGE_SIZE)
- kfree(buckets);
- else
- free_pages((unsigned long)buckets, get_order(size));
+ kfree(nht->hash_heads);
kfree(nht);
}
@@ -371,24 +604,17 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
return old_nht;
for (i = 0; i < (1 << old_nht->hash_shift); i++) {
- struct neighbour *n, *next;
+ struct hlist_node *tmp;
+ struct neighbour *n;
- for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
- lockdep_is_held(&tbl->lock));
- n != NULL;
- n = next) {
+ neigh_for_each_in_bucket_safe(n, tmp, &old_nht->hash_heads[i]) {
hash = tbl->hash(n->primary_key, n->dev,
new_nht->hash_rnd);
hash >>= (32 - new_nht->hash_shift);
- next = rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock));
- rcu_assign_pointer(n->next,
- rcu_dereference_protected(
- new_nht->hash_buckets[hash],
- lockdep_is_held(&tbl->lock)));
- rcu_assign_pointer(new_nht->hash_buckets[hash], n);
+ hlist_del_rcu(&n->hash);
+ hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
}
}
@@ -401,72 +627,34 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
struct net_device *dev)
{
struct neighbour *n;
- int key_len = tbl->key_len;
- u32 hash_val;
- struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, lookups);
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
-
- for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
- n != NULL;
- n = rcu_dereference_bh(n->next)) {
- if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
- if (!atomic_inc_not_zero(&n->refcnt))
- n = NULL;
- NEIGH_CACHE_STAT_INC(tbl, hits);
- break;
- }
+ rcu_read_lock();
+ n = __neigh_lookup_noref(tbl, pkey, dev);
+ if (n) {
+ if (!refcount_inc_not_zero(&n->refcnt))
+ n = NULL;
+ NEIGH_CACHE_STAT_INC(tbl, hits);
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return n;
}
EXPORT_SYMBOL(neigh_lookup);
-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
- const void *pkey)
+static struct neighbour *
+___neigh_create(struct neigh_table *tbl, const void *pkey,
+ struct net_device *dev, u32 flags,
+ bool exempt_from_gc, bool want_ref)
{
- struct neighbour *n;
- int key_len = tbl->key_len;
- u32 hash_val;
+ u32 hash_val, key_len = tbl->key_len;
+ struct neighbour *n1, *rc, *n;
struct neigh_hash_table *nht;
-
- NEIGH_CACHE_STAT_INC(tbl, lookups);
-
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
- hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
-
- for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
- n != NULL;
- n = rcu_dereference_bh(n->next)) {
- if (!memcmp(n->primary_key, pkey, key_len) &&
- net_eq(dev_net(n->dev), net)) {
- if (!atomic_inc_not_zero(&n->refcnt))
- n = NULL;
- NEIGH_CACHE_STAT_INC(tbl, hits);
- break;
- }
- }
-
- rcu_read_unlock_bh();
- return n;
-}
-EXPORT_SYMBOL(neigh_lookup_nodev);
-
-struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
- struct net_device *dev, bool want_ref)
-{
- u32 hash_val;
- int key_len = tbl->key_len;
int error;
- struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
- struct neigh_hash_table *nht;
+ n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
+ trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
if (!n) {
rc = ERR_PTR(-ENOBUFS);
goto out;
@@ -474,7 +662,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
memcpy(n->primary_key, pkey, key_len);
n->dev = dev;
- dev_hold(dev);
+ netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
/* Protocol specific setup. */
if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
@@ -483,7 +671,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
}
if (dev->netdev_ops->ndo_neigh_construct) {
- error = dev->netdev_ops->ndo_neigh_construct(n);
+ error = dev->netdev_ops->ndo_neigh_construct(dev, n);
if (error < 0) {
rc = ERR_PTR(error);
goto out_neigh_release;
@@ -497,28 +685,24 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
goto out_neigh_release;
}
- n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
+ n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
+ hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
if (n->parms->dead) {
rc = ERR_PTR(-EINVAL);
goto out_tbl_unlock;
}
- for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
- lockdep_is_held(&tbl->lock));
- n1 != NULL;
- n1 = rcu_dereference_protected(n1->next,
- lockdep_is_held(&tbl->lock))) {
- if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
+ neigh_for_each_in_bucket(n1, &nht->hash_heads[hash_val]) {
+ if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
if (want_ref)
neigh_hold(n1);
rc = n1;
@@ -527,26 +711,41 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
}
n->dead = 0;
+ if (!exempt_from_gc)
+ list_add_tail(&n->gc_list, &n->tbl->gc_list);
+ if (n->flags & NTF_MANAGED)
+ list_add_tail(&n->managed_list, &n->tbl->managed_list);
if (want_ref)
neigh_hold(n);
- rcu_assign_pointer(n->next,
- rcu_dereference_protected(nht->hash_buckets[hash_val],
- lockdep_is_held(&tbl->lock)));
- rcu_assign_pointer(nht->hash_buckets[hash_val], n);
- write_unlock_bh(&tbl->lock);
+ hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
+
+ hlist_add_head_rcu(&n->dev_list,
+ neigh_get_dev_table(dev, tbl->family));
+
+ spin_unlock_bh(&tbl->lock);
neigh_dbg(2, "neigh %p is created\n", n);
rc = n;
out:
return rc;
out_tbl_unlock:
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
out_neigh_release:
+ if (!exempt_from_gc)
+ atomic_dec(&tbl->gc_entries);
neigh_release(n);
goto out;
}
+
+struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
+ struct net_device *dev, bool want_ref)
+{
+ bool exempt_from_gc = !!(dev->flags & IFF_LOOPBACK);
+
+ return ___neigh_create(tbl, pkey, dev, 0, exempt_from_gc, want_ref);
+}
EXPORT_SYMBOL(__neigh_create);
-static u32 pneigh_hash(const void *pkey, int key_len)
+static u32 pneigh_hash(const void *pkey, unsigned int key_len)
{
u32 hash_val = *(u32 *)(pkey + key_len - 4);
hash_val ^= (hash_val >> 16);
@@ -556,137 +755,160 @@ static u32 pneigh_hash(const void *pkey, int key_len)
return hash_val;
}
-static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
- struct net *net,
- const void *pkey,
- int key_len,
- struct net_device *dev)
+struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl,
+ struct net *net, const void *pkey,
+ struct net_device *dev)
{
+ struct pneigh_entry *n;
+ unsigned int key_len;
+ u32 hash_val;
+
+ key_len = tbl->key_len;
+ hash_val = pneigh_hash(pkey, key_len);
+ n = rcu_dereference_check(tbl->phash_buckets[hash_val],
+ lockdep_is_held(&tbl->phash_lock));
+
while (n) {
if (!memcmp(n->key, pkey, key_len) &&
net_eq(pneigh_net(n), net) &&
(n->dev == dev || !n->dev))
return n;
- n = n->next;
- }
- return NULL;
-}
-struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
- struct net *net, const void *pkey, struct net_device *dev)
-{
- int key_len = tbl->key_len;
- u32 hash_val = pneigh_hash(pkey, key_len);
+ n = rcu_dereference_check(n->next, lockdep_is_held(&tbl->phash_lock));
+ }
- return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
- net, pkey, key_len, dev);
+ return NULL;
}
-EXPORT_SYMBOL_GPL(__pneigh_lookup);
+EXPORT_IPV6_MOD(pneigh_lookup);
-struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
- struct net *net, const void *pkey,
- struct net_device *dev, int creat)
+int pneigh_create(struct neigh_table *tbl, struct net *net,
+ const void *pkey, struct net_device *dev,
+ u32 flags, u8 protocol, bool permanent)
{
struct pneigh_entry *n;
- int key_len = tbl->key_len;
- u32 hash_val = pneigh_hash(pkey, key_len);
-
- read_lock_bh(&tbl->lock);
- n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
- net, pkey, key_len, dev);
- read_unlock_bh(&tbl->lock);
+ unsigned int key_len;
+ u32 hash_val;
+ int err = 0;
- if (n || !creat)
- goto out;
+ mutex_lock(&tbl->phash_lock);
- ASSERT_RTNL();
+ n = pneigh_lookup(tbl, net, pkey, dev);
+ if (n)
+ goto update;
- n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
- if (!n)
+ key_len = tbl->key_len;
+ n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
+ if (!n) {
+ err = -ENOBUFS;
goto out;
+ }
- write_pnet(&n->net, hold_net(net));
+ write_pnet(&n->net, net);
memcpy(n->key, pkey, key_len);
n->dev = dev;
- if (dev)
- dev_hold(dev);
+ netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
if (tbl->pconstructor && tbl->pconstructor(n)) {
- if (dev)
- dev_put(dev);
- release_net(net);
+ netdev_put(dev, &n->dev_tracker);
kfree(n);
- n = NULL;
+ err = -ENOBUFS;
goto out;
}
- write_lock_bh(&tbl->lock);
+ hash_val = pneigh_hash(pkey, key_len);
n->next = tbl->phash_buckets[hash_val];
- tbl->phash_buckets[hash_val] = n;
- write_unlock_bh(&tbl->lock);
+ rcu_assign_pointer(tbl->phash_buckets[hash_val], n);
+update:
+ WRITE_ONCE(n->flags, flags);
+ n->permanent = permanent;
+ WRITE_ONCE(n->protocol, protocol);
out:
- return n;
+ mutex_unlock(&tbl->phash_lock);
+ return err;
}
-EXPORT_SYMBOL(pneigh_lookup);
+static void pneigh_destroy(struct rcu_head *rcu)
+{
+ struct pneigh_entry *n = container_of(rcu, struct pneigh_entry, rcu);
+
+ netdev_put(n->dev, &n->dev_tracker);
+ kfree(n);
+}
int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
struct net_device *dev)
{
- struct pneigh_entry *n, **np;
- int key_len = tbl->key_len;
- u32 hash_val = pneigh_hash(pkey, key_len);
+ struct pneigh_entry *n, __rcu **np;
+ unsigned int key_len;
+ u32 hash_val;
+
+ key_len = tbl->key_len;
+ hash_val = pneigh_hash(pkey, key_len);
- write_lock_bh(&tbl->lock);
- for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
+ mutex_lock(&tbl->phash_lock);
+
+ for (np = &tbl->phash_buckets[hash_val];
+ (n = rcu_dereference_protected(*np, 1)) != NULL;
np = &n->next) {
if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
net_eq(pneigh_net(n), net)) {
- *np = n->next;
- write_unlock_bh(&tbl->lock);
+ rcu_assign_pointer(*np, n->next);
+
+ mutex_unlock(&tbl->phash_lock);
+
if (tbl->pdestructor)
tbl->pdestructor(n);
- if (n->dev)
- dev_put(n->dev);
- release_net(pneigh_net(n));
- kfree(n);
+
+ call_rcu(&n->rcu, pneigh_destroy);
return 0;
}
}
- write_unlock_bh(&tbl->lock);
+
+ mutex_unlock(&tbl->phash_lock);
return -ENOENT;
}
-static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+ bool skip_perm)
{
- struct pneigh_entry *n, **np;
+ struct pneigh_entry *n, __rcu **np;
+ LIST_HEAD(head);
u32 h;
+ mutex_lock(&tbl->phash_lock);
+
for (h = 0; h <= PNEIGH_HASHMASK; h++) {
np = &tbl->phash_buckets[h];
- while ((n = *np) != NULL) {
+ while ((n = rcu_dereference_protected(*np, 1)) != NULL) {
+ if (skip_perm && n->permanent)
+ goto skip;
if (!dev || n->dev == dev) {
- *np = n->next;
- if (tbl->pdestructor)
- tbl->pdestructor(n);
- if (n->dev)
- dev_put(n->dev);
- release_net(pneigh_net(n));
- kfree(n);
+ rcu_assign_pointer(*np, n->next);
+ list_add(&n->free_node, &head);
continue;
}
+skip:
np = &n->next;
}
}
- return -ENOENT;
-}
-static void neigh_parms_destroy(struct neigh_parms *parms);
+ mutex_unlock(&tbl->phash_lock);
+
+ while (!list_empty(&head)) {
+ n = list_first_entry(&head, typeof(*n), free_node);
+ list_del(&n->free_node);
+
+ if (tbl->pdestructor)
+ tbl->pdestructor(n);
+
+ call_rcu(&n->rcu, pneigh_destroy);
+ }
+}
static inline void neigh_parms_put(struct neigh_parms *parms)
{
- if (atomic_dec_and_test(&parms->refcnt))
- neigh_parms_destroy(parms);
+ if (refcount_dec_and_test(&parms->refcnt))
+ kfree(parms);
}
/*
@@ -714,9 +936,9 @@ void neigh_destroy(struct neighbour *neigh)
neigh->arp_queue_len_bytes = 0;
if (dev->netdev_ops->ndo_neigh_destroy)
- dev->netdev_ops->ndo_neigh_destroy(neigh);
+ dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
- dev_put(dev);
+ netdev_put(dev, &neigh->dev_tracker);
neigh_parms_put(neigh->parms);
neigh_dbg(2, "neigh %p is destroyed\n", neigh);
@@ -735,7 +957,7 @@ static void neigh_suspect(struct neighbour *neigh)
{
neigh_dbg(2, "neigh %p is suspected\n", neigh);
- neigh->output = neigh->ops->output;
+ WRITE_ONCE(neigh->output, neigh->ops->output);
}
/* Neighbour state is OK;
@@ -747,96 +969,95 @@ static void neigh_connect(struct neighbour *neigh)
{
neigh_dbg(2, "neigh %p is connected\n", neigh);
- neigh->output = neigh->ops->connected_output;
+ WRITE_ONCE(neigh->output, neigh->ops->connected_output);
}
static void neigh_periodic_work(struct work_struct *work)
{
struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
+ struct neigh_hash_table *nht;
+ struct hlist_node *tmp;
struct neighbour *n;
- struct neighbour __rcu **np;
unsigned int i;
- struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
- goto out;
-
/*
* periodically recompute ReachableTime from random function
*/
if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
struct neigh_parms *p;
- tbl->last_rand = jiffies;
- for (p = &tbl->parms; p; p = p->next)
- p->reachable_time =
- neigh_rand_reach_time(p->base_reachable_time);
+
+ WRITE_ONCE(tbl->last_rand, jiffies);
+ list_for_each_entry(p, &tbl->parms_list, list)
+ neigh_set_reach_time(p);
}
- for (i = 0 ; i < (1 << nht->hash_shift); i++) {
- np = &nht->hash_buckets[i];
+ if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1))
+ goto out;
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
+ for (i = 0 ; i < (1 << nht->hash_shift); i++) {
+ neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
unsigned int state;
write_lock(&n->lock);
state = n->nud_state;
- if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
+ if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
+ (n->flags &
+ (NTF_EXT_LEARNED | NTF_EXT_VALIDATED))) {
write_unlock(&n->lock);
- goto next_elt;
+ continue;
}
- if (time_before(n->used, n->confirmed))
+ if (time_before(n->used, n->confirmed) &&
+ time_is_before_eq_jiffies(n->confirmed))
n->used = n->confirmed;
- if (atomic_read(&n->refcnt) == 1 &&
+ if (refcount_read(&n->refcnt) == 1 &&
(state == NUD_FAILED ||
- time_after(jiffies, n->used + n->parms->gc_staletime))) {
- *np = n->next;
- n->dead = 1;
+ !time_in_range_open(jiffies, n->used,
+ n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
+ neigh_mark_dead(n);
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
continue;
}
write_unlock(&n->lock);
-
-next_elt:
- np = &n->next;
}
/*
* It's fine to release lock here, even if hash table
* grows while we are preempted.
*/
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
cond_resched();
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
}
out:
- /* Cycle through all hash buckets every base_reachable_time/2 ticks.
- * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
- * base_reachable_time.
+ /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
+ * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
+ * BASE_REACHABLE_TIME.
*/
- schedule_delayed_work(&tbl->gc_work,
- tbl->parms.base_reachable_time >> 1);
- write_unlock_bh(&tbl->lock);
+ queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
+ NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
+ spin_unlock_bh(&tbl->lock);
}
static __inline__ int neigh_max_probes(struct neighbour *n)
{
struct neigh_parms *p = n->parms;
- return (n->nud_state & NUD_PROBE) ?
- p->ucast_probes :
- p->ucast_probes + p->app_probes + p->mcast_probes;
+ return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
+ (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
+ NEIGH_VAR(p, MCAST_PROBES));
}
static void neigh_invalidate(struct neighbour *neigh)
@@ -867,22 +1088,23 @@ static void neigh_invalidate(struct neighbour *neigh)
static void neigh_probe(struct neighbour *neigh)
__releases(neigh->lock)
{
- struct sk_buff *skb = skb_peek(&neigh->arp_queue);
+ struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
/* keep skb alive even if arp_queue overflows */
if (skb)
- skb = skb_copy(skb, GFP_ATOMIC);
+ skb = skb_clone(skb, GFP_ATOMIC);
write_unlock(&neigh->lock);
- neigh->ops->solicit(neigh, skb);
+ if (neigh->ops->solicit)
+ neigh->ops->solicit(neigh, skb);
atomic_inc(&neigh->probes);
- kfree_skb(skb);
+ consume_skb(skb);
}
/* Called when a timer expires for a neighbour entry. */
-static void neigh_timer_handler(unsigned long arg)
+static void neigh_timer_handler(struct timer_list *t)
{
unsigned long now, next;
- struct neighbour *neigh = (struct neighbour *)arg;
+ struct neighbour *neigh = timer_container_of(neigh, t, timer);
unsigned int state;
int notify = 0;
@@ -901,50 +1123,61 @@ static void neigh_timer_handler(unsigned long arg)
neigh_dbg(2, "neigh %p is still alive\n", neigh);
next = neigh->confirmed + neigh->parms->reachable_time;
} else if (time_before_eq(now,
- neigh->used + neigh->parms->delay_probe_time)) {
+ neigh->used +
+ NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
- neigh->nud_state = NUD_DELAY;
+ WRITE_ONCE(neigh->nud_state, NUD_DELAY);
neigh->updated = jiffies;
neigh_suspect(neigh);
- next = now + neigh->parms->delay_probe_time;
+ next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
} else {
neigh_dbg(2, "neigh %p is suspected\n", neigh);
- neigh->nud_state = NUD_STALE;
+ WRITE_ONCE(neigh->nud_state, NUD_STALE);
neigh->updated = jiffies;
neigh_suspect(neigh);
notify = 1;
}
} else if (state & NUD_DELAY) {
if (time_before_eq(now,
- neigh->confirmed + neigh->parms->delay_probe_time)) {
+ neigh->confirmed +
+ NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is now reachable\n", neigh);
- neigh->nud_state = NUD_REACHABLE;
+ WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
neigh->updated = jiffies;
neigh_connect(neigh);
notify = 1;
next = neigh->confirmed + neigh->parms->reachable_time;
} else {
neigh_dbg(2, "neigh %p is probed\n", neigh);
- neigh->nud_state = NUD_PROBE;
+ WRITE_ONCE(neigh->nud_state, NUD_PROBE);
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0);
- next = now + neigh->parms->retrans_time;
+ notify = 1;
+ next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
+ HZ/100);
}
} else {
/* NUD_PROBE|NUD_INCOMPLETE */
- next = now + neigh->parms->retrans_time;
+ next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
}
if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
- neigh->nud_state = NUD_FAILED;
+ if (neigh->nud_state == NUD_PROBE &&
+ neigh->flags & NTF_EXT_VALIDATED) {
+ WRITE_ONCE(neigh->nud_state, NUD_STALE);
+ neigh->updated = jiffies;
+ } else {
+ WRITE_ONCE(neigh->nud_state, NUD_FAILED);
+ neigh_invalidate(neigh);
+ }
notify = 1;
- neigh_invalidate(neigh);
+ goto out;
}
if (neigh->nud_state & NUD_IN_TIMER) {
- if (time_before(next, jiffies + HZ/2))
- next = jiffies + HZ/2;
+ if (time_before(next, jiffies + HZ/100))
+ next = jiffies + HZ/100;
if (!mod_timer(&neigh->timer, next))
neigh_hold(neigh);
}
@@ -956,12 +1189,15 @@ out:
}
if (notify)
- neigh_update_notify(neigh);
+ neigh_update_notify(neigh, 0);
+
+ trace_neigh_timer_handler(neigh, 0);
neigh_release(neigh);
}
-int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
+ const bool immediate_ok)
{
int rc;
bool immediate_probe = false;
@@ -971,44 +1207,56 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
rc = 0;
if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
goto out_unlock_bh;
+ if (neigh->dead)
+ goto out_dead;
if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
- if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
+ if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
+ NEIGH_VAR(neigh->parms, APP_PROBES)) {
unsigned long next, now = jiffies;
- atomic_set(&neigh->probes, neigh->parms->ucast_probes);
- neigh->nud_state = NUD_INCOMPLETE;
+ atomic_set(&neigh->probes,
+ NEIGH_VAR(neigh->parms, UCAST_PROBES));
+ neigh_del_timer(neigh);
+ WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
neigh->updated = now;
- next = now + max(neigh->parms->retrans_time, HZ/2);
+ if (!immediate_ok) {
+ next = now + 1;
+ } else {
+ immediate_probe = true;
+ next = now + max(NEIGH_VAR(neigh->parms,
+ RETRANS_TIME),
+ HZ / 100);
+ }
neigh_add_timer(neigh, next);
- immediate_probe = true;
} else {
- neigh->nud_state = NUD_FAILED;
+ WRITE_ONCE(neigh->nud_state, NUD_FAILED);
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
- neigh->nud_state = NUD_DELAY;
+ neigh_del_timer(neigh);
+ WRITE_ONCE(neigh->nud_state, NUD_DELAY);
neigh->updated = jiffies;
- neigh_add_timer(neigh,
- jiffies + neigh->parms->delay_probe_time);
+ neigh_add_timer(neigh, jiffies +
+ NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
}
if (neigh->nud_state == NUD_INCOMPLETE) {
if (skb) {
while (neigh->arp_queue_len_bytes + skb->truesize >
- neigh->parms->queue_len_bytes) {
+ NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
struct sk_buff *buff;
buff = __skb_dequeue(&neigh->arp_queue);
if (!buff)
break;
neigh->arp_queue_len_bytes -= buff->truesize;
- kfree_skb(buff);
+ kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
skb_dst_force(skb);
@@ -1023,7 +1271,16 @@ out_unlock_bh:
else
write_unlock(&neigh->lock);
local_bh_enable();
+ trace_neigh_event_send_done(neigh, rc);
return rc;
+
+out_dead:
+ if (neigh->nud_state & NUD_STALE)
+ goto out_unlock_bh;
+ write_unlock_bh(&neigh->lock);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
+ trace_neigh_event_send_dead(neigh, 1);
+ return 1;
}
EXPORT_SYMBOL(__neigh_event_send);
@@ -1038,7 +1295,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
if (update) {
hh = &neigh->hh;
- if (hh->hh_len) {
+ if (READ_ONCE(hh->hh_len)) {
write_seqlock_bh(&hh->hh_lock);
update(hh, neigh->dev, neigh->ha);
write_sequnlock_bh(&hh->hh_lock);
@@ -1046,8 +1303,6 @@ static void neigh_update_hhs(struct neighbour *neigh)
}
}
-
-
/* Generic update routine.
-- lladdr is new lladdr or NULL, if it is not supplied.
-- new is new state.
@@ -1057,26 +1312,29 @@ static void neigh_update_hhs(struct neighbour *neigh)
NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
lladdr instead of overriding it
if it is different.
- It also allows to retain current state
- if lladdr is unchanged.
NEIGH_UPDATE_F_ADMIN means that the change is administrative.
-
+ NEIGH_UPDATE_F_USE means that the entry is user triggered.
+ NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
NTF_ROUTER flag.
NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
a router.
+ NEIGH_UPDATE_F_EXT_VALIDATED means that the entry will not be removed
+ or invalidated.
Caller MUST hold reference count on the entry.
*/
-
-int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
- u32 flags)
+static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
+ u8 new, u32 flags, u32 nlmsg_pid,
+ struct netlink_ext_ack *extack)
{
- u8 old;
- int err;
- int notify = 0;
- struct net_device *dev;
+ bool gc_update = false, managed_update = false;
int update_isrouter = 0;
+ struct net_device *dev;
+ int err, notify = 0;
+ u8 old;
+
+ trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
write_lock_bh(&neigh->lock);
@@ -1084,15 +1342,28 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
old = neigh->nud_state;
err = -EPERM;
+ if (neigh->dead) {
+ NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
+ new = old;
+ goto out;
+ }
if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
(old & (NUD_NOARP | NUD_PERMANENT)))
goto out;
+ neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
+ if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
+ new = old & ~NUD_PERMANENT;
+ WRITE_ONCE(neigh->nud_state, new);
+ err = 0;
+ goto out;
+ }
+
if (!(new & NUD_VALID)) {
neigh_del_timer(neigh);
if (old & NUD_CONNECTED)
neigh_suspect(neigh);
- neigh->nud_state = new;
+ WRITE_ONCE(neigh->nud_state, new);
err = 0;
notify = old & NUD_VALID;
if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
@@ -1121,14 +1392,18 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
use it, otherwise discard the request.
*/
err = -EINVAL;
- if (!(old & NUD_VALID))
+ if (!(old & NUD_VALID)) {
+ NL_SET_ERR_MSG(extack, "No link layer address given");
goto out;
+ }
lladdr = neigh->ha;
}
+ /* Update confirmed timestamp for neighbour entry after we
+ * received ARP packet even if it doesn't change IP to MAC binding.
+ */
if (new & NUD_CONNECTED)
neigh->confirmed = jiffies;
- neigh->updated = jiffies;
/* If entry was valid and address is not changed,
do not change entry state, if new one is STALE.
@@ -1146,21 +1421,29 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
goto out;
} else {
if (lladdr == neigh->ha && new == NUD_STALE &&
- ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
- (old & NUD_CONNECTED))
- )
+ !(flags & NEIGH_UPDATE_F_ADMIN))
new = old;
}
}
+ /* Update timestamp only once we know we will make a change to the
+ * neighbour entry. Otherwise we risk to move the locktime window with
+ * noop updates and ignore relevant ARP updates.
+ */
+ if (new != old || lladdr != neigh->ha)
+ neigh->updated = jiffies;
+
if (new != old) {
neigh_del_timer(neigh);
+ if (new & NUD_PROBE)
+ atomic_set(&neigh->probes, 0);
if (new & NUD_IN_TIMER)
neigh_add_timer(neigh, (jiffies +
((new & NUD_REACHABLE) ?
neigh->parms->reachable_time :
0)));
- neigh->nud_state = new;
+ WRITE_ONCE(neigh->nud_state, new);
+ notify = 1;
}
if (lladdr != neigh->ha) {
@@ -1170,7 +1453,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
neigh_update_hhs(neigh);
if (!(new & NUD_CONNECTED))
neigh->confirmed = jiffies -
- (neigh->parms->base_reachable_time << 1);
+ (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
notify = 1;
}
if (new == old)
@@ -1200,12 +1483,13 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
* we can reinject the packet there.
*/
n2 = NULL;
- if (dst) {
+ if (dst &&
+ READ_ONCE(dst->obsolete) != DST_OBSOLETE_DEAD) {
n2 = dst_neigh_lookup_skb(dst, skb);
if (n2)
n1 = n2;
}
- n1->output(n1, skb);
+ READ_ONCE(n1->output)(n1, skb);
if (n2)
neigh_release(n2);
rcu_read_unlock();
@@ -1216,20 +1500,44 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
neigh->arp_queue_len_bytes = 0;
}
out:
- if (update_isrouter) {
- neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
- (neigh->flags | NTF_ROUTER) :
- (neigh->flags & ~NTF_ROUTER);
- }
+ if (update_isrouter)
+ neigh_update_is_router(neigh, flags, &notify);
write_unlock_bh(&neigh->lock);
-
+ if (((new ^ old) & NUD_PERMANENT) || gc_update)
+ neigh_update_gc_list(neigh);
+ if (managed_update)
+ neigh_update_managed_list(neigh);
if (notify)
- neigh_update_notify(neigh);
-
+ neigh_update_notify(neigh, nlmsg_pid);
+ trace_neigh_update_done(neigh, err);
return err;
}
+
+int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
+ u32 flags, u32 nlmsg_pid)
+{
+ return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
+}
EXPORT_SYMBOL(neigh_update);
+/* Update the neigh to listen temporarily for probe responses, even if it is
+ * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
+ */
+void __neigh_set_probe_once(struct neighbour *neigh)
+{
+ if (neigh->dead)
+ return;
+ neigh->updated = jiffies;
+ if (!(neigh->nud_state & NUD_FAILED))
+ return;
+ WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
+ atomic_set(&neigh->probes, neigh_max_probes(neigh));
+ neigh_add_timer(neigh,
+ jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
+ HZ/100));
+}
+EXPORT_SYMBOL(__neigh_set_probe_once);
+
struct neighbour *neigh_event_ns(struct neigh_table *tbl,
u8 *lladdr, void *saddr,
struct net_device *dev)
@@ -1238,16 +1546,16 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
lladdr || !dev->addr_len);
if (neigh)
neigh_update(neigh, lladdr, NUD_STALE,
- NEIGH_UPDATE_F_OVERRIDE);
+ NEIGH_UPDATE_F_OVERRIDE, 0);
return neigh;
}
EXPORT_SYMBOL(neigh_event_ns);
/* called with read_lock_bh(&n->lock); */
-static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
+static void neigh_hh_init(struct neighbour *n)
{
- struct net_device *dev = dst->dev;
- __be16 prot = dst->ops->protocol;
+ struct net_device *dev = n->dev;
+ __be16 prot = n->tbl->protocol;
struct hh_cache *hh = &n->hh;
write_lock_bh(&n->lock);
@@ -1261,43 +1569,19 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
write_unlock_bh(&n->lock);
}
-/* This function can be used in contexts, where only old dev_queue_xmit
- * worked, f.e. if you want to override normal output path (eql, shaper),
- * but resolution is not made yet.
- */
-
-int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
-
- __skb_pull(skb, skb_network_offset(skb));
-
- if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
- skb->len) < 0 &&
- dev->header_ops->rebuild(skb))
- return 0;
-
- return dev_queue_xmit(skb);
-}
-EXPORT_SYMBOL(neigh_compat_output);
-
/* Slow and careful. */
int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
int rc = 0;
- if (!dst)
- goto discard;
-
if (!neigh_event_send(neigh, skb)) {
int err;
struct net_device *dev = neigh->dev;
unsigned int seq;
- if (dev->header_ops->cache && !neigh->hh.hh_len)
- neigh_hh_init(neigh, dst);
+ if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
+ neigh_hh_init(neigh);
do {
__skb_pull(skb, skb_network_offset(skb));
@@ -1313,11 +1597,9 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
}
out:
return rc;
-discard:
- neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
out_kfree_skb:
rc = -EINVAL;
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
goto out;
}
EXPORT_SYMBOL(neigh_resolve_output);
@@ -1341,7 +1623,7 @@ int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
err = dev_queue_xmit(skb);
else {
err = -EINVAL;
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
}
return err;
}
@@ -1353,9 +1635,23 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
}
EXPORT_SYMBOL(neigh_direct_output);
-static void neigh_proxy_process(unsigned long arg)
+static void neigh_managed_work(struct work_struct *work)
{
- struct neigh_table *tbl = (struct neigh_table *)arg;
+ struct neigh_table *tbl = container_of(work, struct neigh_table,
+ managed_work.work);
+ struct neighbour *neigh;
+
+ spin_lock_bh(&tbl->lock);
+ list_for_each_entry(neigh, &tbl->managed_list, managed_list)
+ neigh_event_send_probe(neigh, NULL, false);
+ queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
+ NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
+ spin_unlock_bh(&tbl->lock);
+}
+
+static void neigh_proxy_process(struct timer_list *t)
+{
+ struct neigh_table *tbl = timer_container_of(tbl, t, proxy_timer);
long sched_next = 0;
unsigned long now = jiffies;
struct sk_buff *skb, *n;
@@ -1368,7 +1664,9 @@ static void neigh_proxy_process(unsigned long arg)
if (tdif <= 0) {
struct net_device *dev = skb->dev;
+ neigh_parms_qlen_dec(dev, tbl->family);
__skb_unlink(skb, &tbl->proxy_queue);
+
if (tbl->proxy_redo && netif_running(dev)) {
rcu_read_lock();
tbl->proxy_redo(skb);
@@ -1381,19 +1679,29 @@ static void neigh_proxy_process(unsigned long arg)
} else if (!sched_next || tdif < sched_next)
sched_next = tdif;
}
- del_timer(&tbl->proxy_timer);
+ timer_delete(&tbl->proxy_timer);
if (sched_next)
mod_timer(&tbl->proxy_timer, jiffies + sched_next);
spin_unlock(&tbl->proxy_queue.lock);
}
+static unsigned long neigh_proxy_delay(struct neigh_parms *p)
+{
+ /* If proxy_delay is zero, do not call get_random_u32_below()
+ * as it is undefined behavior.
+ */
+ unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);
+
+ return proxy_delay ?
+ jiffies + get_random_u32_below(proxy_delay) : jiffies;
+}
+
void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
struct sk_buff *skb)
{
- unsigned long now = jiffies;
- unsigned long sched_next = now + (net_random() % p->proxy_delay);
+ unsigned long sched_next = neigh_proxy_delay(p);
- if (tbl->proxy_queue.qlen > p->proxy_qlen) {
+ if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
kfree_skb(skb);
return;
}
@@ -1402,13 +1710,14 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
spin_lock(&tbl->proxy_queue.lock);
- if (del_timer(&tbl->proxy_timer)) {
+ if (timer_delete(&tbl->proxy_timer)) {
if (time_before(tbl->proxy_timer.expires, sched_next))
sched_next = tbl->proxy_timer.expires;
}
skb_dst_drop(skb);
dev_hold(skb->dev);
__skb_queue_tail(&tbl->proxy_queue, skb);
+ p->qlen++;
mod_timer(&tbl->proxy_timer, sched_next);
spin_unlock(&tbl->proxy_queue.lock);
}
@@ -1419,7 +1728,7 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
{
struct neigh_parms *p;
- for (p = &tbl->parms; p; p = p->next) {
+ list_for_each_entry(p, &tbl->parms_list, list) {
if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
(!p->dev && !ifindex && net_eq(net, &init_net)))
return p;
@@ -1438,23 +1747,25 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
if (p) {
p->tbl = tbl;
- atomic_set(&p->refcnt, 1);
- p->reachable_time =
- neigh_rand_reach_time(p->base_reachable_time);
+ refcount_set(&p->refcnt, 1);
+ neigh_set_reach_time(p);
+ p->qlen = 0;
+ netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
+ p->dev = dev;
+ write_pnet(&p->net, net);
+ p->sysctl_table = NULL;
if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
+ netdev_put(dev, &p->dev_tracker);
kfree(p);
return NULL;
}
- dev_hold(dev);
- p->dev = dev;
- write_pnet(&p->net, hold_net(net));
- p->sysctl_table = NULL;
- write_lock_bh(&tbl->lock);
- p->next = tbl->parms.next;
- tbl->parms.next = p;
- write_unlock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
+ list_add_rcu(&p->list, &tbl->parms.list);
+ spin_unlock_bh(&tbl->lock);
+
+ neigh_parms_data_state_cleanall(p);
}
return p;
}
@@ -1470,52 +1781,45 @@ static void neigh_rcu_free_parms(struct rcu_head *head)
void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
{
- struct neigh_parms **p;
-
if (!parms || parms == &tbl->parms)
return;
- write_lock_bh(&tbl->lock);
- for (p = &tbl->parms.next; *p; p = &(*p)->next) {
- if (*p == parms) {
- *p = parms->next;
- parms->dead = 1;
- write_unlock_bh(&tbl->lock);
- if (parms->dev)
- dev_put(parms->dev);
- call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
- return;
- }
- }
- write_unlock_bh(&tbl->lock);
- neigh_dbg(1, "%s: not found\n", __func__);
-}
-EXPORT_SYMBOL(neigh_parms_release);
-static void neigh_parms_destroy(struct neigh_parms *parms)
-{
- release_net(neigh_parms_net(parms));
- kfree(parms);
+ spin_lock_bh(&tbl->lock);
+ list_del_rcu(&parms->list);
+ parms->dead = 1;
+ spin_unlock_bh(&tbl->lock);
+
+ netdev_put(parms->dev, &parms->dev_tracker);
+ call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
}
+EXPORT_SYMBOL(neigh_parms_release);
static struct lock_class_key neigh_table_proxy_queue_class;
-static void neigh_table_init_no_netlink(struct neigh_table *tbl)
+static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
+
+void neigh_table_init(int index, struct neigh_table *tbl)
{
unsigned long now = jiffies;
unsigned long phsize;
+ INIT_LIST_HEAD(&tbl->parms_list);
+ INIT_LIST_HEAD(&tbl->gc_list);
+ INIT_LIST_HEAD(&tbl->managed_list);
+
+ list_add(&tbl->parms.list, &tbl->parms_list);
write_pnet(&tbl->parms.net, &init_net);
- atomic_set(&tbl->parms.refcnt, 1);
- tbl->parms.reachable_time =
- neigh_rand_reach_time(tbl->parms.base_reachable_time);
+ refcount_set(&tbl->parms.refcnt, 1);
+ neigh_set_reach_time(&tbl->parms);
+ tbl->parms.qlen = 0;
tbl->stats = alloc_percpu(struct neigh_statistics);
if (!tbl->stats)
panic("cannot create neighbour cache statistics");
#ifdef CONFIG_PROC_FS
- if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
- &neigh_stat_seq_fops, tbl))
+ if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
+ &neigh_stat_seq_ops, tbl))
panic("cannot create neighbour proc dir entry");
#endif
@@ -1533,58 +1837,43 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl)
else
WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
- rwlock_init(&tbl->lock);
+ spin_lock_init(&tbl->lock);
+ mutex_init(&tbl->phash_lock);
+
INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
- schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
- setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
+ queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
+ tbl->parms.reachable_time);
+ INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
+ queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
+
+ timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
skb_queue_head_init_class(&tbl->proxy_queue,
&neigh_table_proxy_queue_class);
tbl->last_flush = now;
tbl->last_rand = now + tbl->parms.reachable_time * 20;
-}
-
-void neigh_table_init(struct neigh_table *tbl)
-{
- struct neigh_table *tmp;
- neigh_table_init_no_netlink(tbl);
- write_lock(&neigh_tbl_lock);
- for (tmp = neigh_tables; tmp; tmp = tmp->next) {
- if (tmp->family == tbl->family)
- break;
- }
- tbl->next = neigh_tables;
- neigh_tables = tbl;
- write_unlock(&neigh_tbl_lock);
-
- if (unlikely(tmp)) {
- pr_err("Registering multiple tables for family %d\n",
- tbl->family);
- dump_stack();
- }
+ rcu_assign_pointer(neigh_tables[index], tbl);
}
EXPORT_SYMBOL(neigh_table_init);
-int neigh_table_clear(struct neigh_table *tbl)
+/*
+ * Only called from ndisc_cleanup(), which means this is dead code
+ * because we no longer can unload IPv6 module.
+ */
+int neigh_table_clear(int index, struct neigh_table *tbl)
{
- struct neigh_table **tp;
+ RCU_INIT_POINTER(neigh_tables[index], NULL);
+ synchronize_rcu();
/* It is not clean... Fix it to unload IPv6 module safely */
+ cancel_delayed_work_sync(&tbl->managed_work);
cancel_delayed_work_sync(&tbl->gc_work);
- del_timer_sync(&tbl->proxy_timer);
- pneigh_queue_purge(&tbl->proxy_queue);
+ timer_delete_sync(&tbl->proxy_timer);
+ pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
neigh_ifdown(tbl, NULL);
if (atomic_read(&tbl->entries))
pr_crit("neighbour leakage\n");
- write_lock(&neigh_tbl_lock);
- for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
- if (*tp == tbl) {
- *tp = tbl->next;
- break;
- }
- }
- write_unlock(&neigh_tbl_lock);
call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
neigh_hash_free_rcu);
@@ -1602,12 +1891,47 @@ int neigh_table_clear(struct neigh_table *tbl)
}
EXPORT_SYMBOL(neigh_table_clear);
-static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
+static struct neigh_table *neigh_find_table(int family)
+{
+ struct neigh_table *tbl = NULL;
+
+ switch (family) {
+ case AF_INET:
+ tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ARP_TABLE]);
+ break;
+ case AF_INET6:
+ tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ND_TABLE]);
+ break;
+ }
+
+ return tbl;
+}
+
+const struct nla_policy nda_policy[NDA_MAX+1] = {
+ [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID },
+ [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+ [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+ [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
+ [NDA_PROBES] = { .type = NLA_U32 },
+ [NDA_VLAN] = { .type = NLA_U16 },
+ [NDA_PORT] = { .type = NLA_U16 },
+ [NDA_VNI] = { .type = NLA_U32 },
+ [NDA_IFINDEX] = { .type = NLA_U32 },
+ [NDA_MASTER] = { .type = NLA_U32 },
+ [NDA_PROTOCOL] = { .type = NLA_U8 },
+ [NDA_NH_ID] = { .type = NLA_U32 },
+ [NDA_FLAGS_EXT] = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
+ [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
+};
+
+static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
struct nlattr *dst_attr;
struct neigh_table *tbl;
+ struct neighbour *neigh;
struct net_device *dev = NULL;
int err = -EINVAL;
@@ -1616,8 +1940,10 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
goto out;
dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
- if (dst_attr == NULL)
+ if (!dst_attr) {
+ NL_SET_ERR_MSG(extack, "Network address not specified");
goto out;
+ }
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
@@ -1628,63 +1954,79 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
}
}
- read_lock(&neigh_tbl_lock);
- for (tbl = neigh_tables; tbl; tbl = tbl->next) {
- struct neighbour *neigh;
-
- if (tbl->family != ndm->ndm_family)
- continue;
- read_unlock(&neigh_tbl_lock);
+ tbl = neigh_find_table(ndm->ndm_family);
+ if (tbl == NULL)
+ return -EAFNOSUPPORT;
- if (nla_len(dst_attr) < tbl->key_len)
- goto out;
-
- if (ndm->ndm_flags & NTF_PROXY) {
- err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
- goto out;
- }
+ if (nla_len(dst_attr) < (int)tbl->key_len) {
+ NL_SET_ERR_MSG(extack, "Invalid network address");
+ goto out;
+ }
- if (dev == NULL)
- goto out;
+ if (ndm->ndm_flags & NTF_PROXY) {
+ err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
+ goto out;
+ }
- neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
- if (neigh == NULL) {
- err = -ENOENT;
- goto out;
- }
+ if (dev == NULL)
+ goto out;
- err = neigh_update(neigh, NULL, NUD_FAILED,
- NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_ADMIN);
- neigh_release(neigh);
+ neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
+ if (neigh == NULL) {
+ err = -ENOENT;
goto out;
}
- read_unlock(&neigh_tbl_lock);
- err = -EAFNOSUPPORT;
+
+ err = __neigh_update(neigh, NULL, NUD_FAILED,
+ NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
+ NETLINK_CB(skb).portid, extack);
+ spin_lock_bh(&tbl->lock);
+ neigh_release(neigh);
+ neigh_remove_one(neigh);
+ spin_unlock_bh(&tbl->lock);
out:
return err;
}
-static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
+ int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
struct neigh_table *tbl;
struct net_device *dev = NULL;
+ struct neighbour *neigh;
+ void *dst, *lladdr;
+ u8 protocol = 0;
+ u32 ndm_flags;
int err;
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+ err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
+ nda_policy, extack);
if (err < 0)
goto out;
err = -EINVAL;
- if (tb[NDA_DST] == NULL)
+ if (!tb[NDA_DST]) {
+ NL_SET_ERR_MSG(extack, "Network address not specified");
goto out;
+ }
ndm = nlmsg_data(nlh);
+ ndm_flags = ndm->ndm_flags;
+ if (tb[NDA_FLAGS_EXT]) {
+ u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
+
+ BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
+ (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
+ hweight32(NTF_EXT_MASK)));
+ ndm_flags |= (ext << NTF_EXT_SHIFT);
+ }
if (ndm->ndm_ifindex) {
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
@@ -1692,74 +2034,137 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
goto out;
}
- if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
+ if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
+ NL_SET_ERR_MSG(extack, "Invalid link address");
goto out;
+ }
}
- read_lock(&neigh_tbl_lock);
- for (tbl = neigh_tables; tbl; tbl = tbl->next) {
- int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
- struct neighbour *neigh;
- void *dst, *lladdr;
+ tbl = neigh_find_table(ndm->ndm_family);
+ if (tbl == NULL)
+ return -EAFNOSUPPORT;
- if (tbl->family != ndm->ndm_family)
- continue;
- read_unlock(&neigh_tbl_lock);
+ if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
+ NL_SET_ERR_MSG(extack, "Invalid network address");
+ goto out;
+ }
- if (nla_len(tb[NDA_DST]) < tbl->key_len)
+ dst = nla_data(tb[NDA_DST]);
+ lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
+
+ if (tb[NDA_PROTOCOL])
+ protocol = nla_get_u8(tb[NDA_PROTOCOL]);
+ if (ndm_flags & NTF_PROXY) {
+ if (ndm_flags & (NTF_MANAGED | NTF_EXT_VALIDATED)) {
+ NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
goto out;
- dst = nla_data(tb[NDA_DST]);
- lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
+ }
- if (ndm->ndm_flags & NTF_PROXY) {
- struct pneigh_entry *pn;
+ err = pneigh_create(tbl, net, dst, dev, ndm_flags, protocol,
+ !!(ndm->ndm_state & NUD_PERMANENT));
+ goto out;
+ }
- err = -ENOBUFS;
- pn = pneigh_lookup(tbl, net, dst, dev, 1);
- if (pn) {
- pn->flags = ndm->ndm_flags;
- err = 0;
- }
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Device not specified");
+ goto out;
+ }
+
+ if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ neigh = neigh_lookup(tbl, dst, dev);
+ if (neigh == NULL) {
+ bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT;
+ bool exempt_from_gc = ndm_permanent ||
+ ndm_flags & (NTF_EXT_LEARNED |
+ NTF_EXT_VALIDATED);
+
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+ err = -ENOENT;
goto out;
}
-
- if (dev == NULL)
+ if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
+ NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
+ err = -EINVAL;
goto out;
+ }
+ if (ndm_flags & NTF_EXT_VALIDATED) {
+ u8 state = ndm->ndm_state;
- neigh = neigh_lookup(tbl, dst, dev);
- if (neigh == NULL) {
- if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
- err = -ENOENT;
- goto out;
- }
+ /* NTF_USE and NTF_MANAGED will result in the neighbor
+ * being created with an invalid state (NUD_NONE).
+ */
+ if (ndm_flags & (NTF_USE | NTF_MANAGED))
+ state = NUD_NONE;
- neigh = __neigh_lookup_errno(tbl, dst, dev);
- if (IS_ERR(neigh)) {
- err = PTR_ERR(neigh);
+ if (!(state & NUD_VALID)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot create externally validated neighbor with an invalid state");
+ err = -EINVAL;
goto out;
}
- } else {
- if (nlh->nlmsg_flags & NLM_F_EXCL) {
- err = -EEXIST;
+ }
+
+ neigh = ___neigh_create(tbl, dst, dev,
+ ndm_flags &
+ (NTF_EXT_LEARNED | NTF_MANAGED |
+ NTF_EXT_VALIDATED),
+ exempt_from_gc, true);
+ if (IS_ERR(neigh)) {
+ err = PTR_ERR(neigh);
+ goto out;
+ }
+ } else {
+ if (nlh->nlmsg_flags & NLM_F_EXCL) {
+ err = -EEXIST;
+ neigh_release(neigh);
+ goto out;
+ }
+ if (ndm_flags & NTF_EXT_VALIDATED) {
+ u8 state = ndm->ndm_state;
+
+ /* NTF_USE and NTF_MANAGED do not update the existing
+ * state other than clearing it if it was
+ * NUD_PERMANENT.
+ */
+ if (ndm_flags & (NTF_USE | NTF_MANAGED))
+ state = READ_ONCE(neigh->nud_state) & ~NUD_PERMANENT;
+
+ if (!(state & NUD_VALID)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot mark neighbor as externally validated with an invalid state");
+ err = -EINVAL;
neigh_release(neigh);
goto out;
}
-
- if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
- flags &= ~NEIGH_UPDATE_F_OVERRIDE;
}
- if (ndm->ndm_flags & NTF_USE) {
- neigh_event_send(neigh, NULL);
- err = 0;
- } else
- err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
- neigh_release(neigh);
- goto out;
- }
-
- read_unlock(&neigh_tbl_lock);
- err = -EAFNOSUPPORT;
+ if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
+ flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
+ }
+
+ if (protocol)
+ neigh->protocol = protocol;
+ if (ndm_flags & NTF_EXT_LEARNED)
+ flags |= NEIGH_UPDATE_F_EXT_LEARNED;
+ if (ndm_flags & NTF_ROUTER)
+ flags |= NEIGH_UPDATE_F_ISROUTER;
+ if (ndm_flags & NTF_MANAGED)
+ flags |= NEIGH_UPDATE_F_MANAGED;
+ if (ndm_flags & NTF_USE)
+ flags |= NEIGH_UPDATE_F_USE;
+ if (ndm_flags & NTF_EXT_VALIDATED)
+ flags |= NEIGH_UPDATE_F_EXT_VALIDATED;
+
+ err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+ NETLINK_CB(skb).portid, extack);
+ if (!err && ndm_flags & (NTF_USE | NTF_MANAGED))
+ neigh_event_send(neigh, NULL);
+ neigh_release(neigh);
out:
return err;
}
@@ -1768,31 +2173,44 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
{
struct nlattr *nest;
- nest = nla_nest_start(skb, NDTA_PARMS);
+ nest = nla_nest_start_noflag(skb, NDTA_PARMS);
if (nest == NULL)
return -ENOBUFS;
if ((parms->dev &&
- nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
- nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
- nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
+ nla_put_u32(skb, NDTPA_IFINDEX, READ_ONCE(parms->dev->ifindex))) ||
+ nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
+ nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
+ NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
/* approximative value for deprecated QUEUE_LEN (in packets) */
nla_put_u32(skb, NDTPA_QUEUE_LEN,
- parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
- nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
- nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
- nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
- nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
- nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
+ NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
+ nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
+ nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
+ nla_put_u32(skb, NDTPA_UCAST_PROBES,
+ NEIGH_VAR(parms, UCAST_PROBES)) ||
+ nla_put_u32(skb, NDTPA_MCAST_PROBES,
+ NEIGH_VAR(parms, MCAST_PROBES)) ||
+ nla_put_u32(skb, NDTPA_MCAST_REPROBES,
+ NEIGH_VAR(parms, MCAST_REPROBES)) ||
+ nla_put_msecs(skb, NDTPA_REACHABLE_TIME, READ_ONCE(parms->reachable_time),
+ NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
- parms->base_reachable_time) ||
- nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
+ NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
+ nla_put_msecs(skb, NDTPA_GC_STALETIME,
+ NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
- parms->delay_probe_time) ||
- nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
- nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
- nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
- nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
+ NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
+ nla_put_msecs(skb, NDTPA_RETRANS_TIME,
+ NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
+ nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
+ NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
+ nla_put_msecs(skb, NDTPA_PROXY_DELAY,
+ NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
+ nla_put_msecs(skb, NDTPA_LOCKTIME,
+ NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
+ nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
+ NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
goto nla_put_failure;
return nla_nest_end(skb, nest);
@@ -1812,22 +2230,21 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
return -EMSGSIZE;
ndtmsg = nlmsg_data(nlh);
-
- read_lock_bh(&tbl->lock);
ndtmsg->ndtm_family = tbl->family;
ndtmsg->ndtm_pad1 = 0;
ndtmsg->ndtm_pad2 = 0;
if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
- nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
- nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
- nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
- nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
+ nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval),
+ NDTA_PAD) ||
+ nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) ||
+ nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) ||
+ nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3)))
goto nla_put_failure;
{
unsigned long now = jiffies;
- unsigned int flush_delta = now - tbl->last_flush;
- unsigned int rand_delta = now - tbl->last_rand;
+ long flush_delta = now - READ_ONCE(tbl->last_flush);
+ long rand_delta = now - READ_ONCE(tbl->last_rand);
struct neigh_hash_table *nht;
struct ndt_config ndc = {
.ndtc_key_len = tbl->key_len,
@@ -1835,14 +2252,12 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
.ndtc_entries = atomic_read(&tbl->entries),
.ndtc_last_flush = jiffies_to_msecs(flush_delta),
.ndtc_last_rand = jiffies_to_msecs(rand_delta),
- .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
+ .ndtc_proxy_qlen = READ_ONCE(tbl->proxy_queue.qlen),
};
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
+ nht = rcu_dereference(tbl->nht);
ndc.ndtc_hash_rnd = nht->hash_rnd[0];
ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
- rcu_read_unlock_bh();
if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
goto nla_put_failure;
@@ -1858,19 +2273,21 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
struct neigh_statistics *st;
st = per_cpu_ptr(tbl->stats, cpu);
- ndst.ndts_allocs += st->allocs;
- ndst.ndts_destroys += st->destroys;
- ndst.ndts_hash_grows += st->hash_grows;
- ndst.ndts_res_failed += st->res_failed;
- ndst.ndts_lookups += st->lookups;
- ndst.ndts_hits += st->hits;
- ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
- ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
- ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
- ndst.ndts_forced_gc_runs += st->forced_gc_runs;
+ ndst.ndts_allocs += READ_ONCE(st->allocs);
+ ndst.ndts_destroys += READ_ONCE(st->destroys);
+ ndst.ndts_hash_grows += READ_ONCE(st->hash_grows);
+ ndst.ndts_res_failed += READ_ONCE(st->res_failed);
+ ndst.ndts_lookups += READ_ONCE(st->lookups);
+ ndst.ndts_hits += READ_ONCE(st->hits);
+ ndst.ndts_rcv_probes_mcast += READ_ONCE(st->rcv_probes_mcast);
+ ndst.ndts_rcv_probes_ucast += READ_ONCE(st->rcv_probes_ucast);
+ ndst.ndts_periodic_gc_runs += READ_ONCE(st->periodic_gc_runs);
+ ndst.ndts_forced_gc_runs += READ_ONCE(st->forced_gc_runs);
+ ndst.ndts_table_fulls += READ_ONCE(st->table_fulls);
}
- if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
+ if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
+ NDTA_PAD))
goto nla_put_failure;
}
@@ -1878,11 +2295,10 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
if (neightbl_fill_parms(skb, &tbl->parms) < 0)
goto nla_put_failure;
- read_unlock_bh(&tbl->lock);
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
- read_unlock_bh(&tbl->lock);
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
@@ -1901,8 +2317,6 @@ static int neightbl_fill_param_info(struct sk_buff *skb,
return -EMSGSIZE;
ndtmsg = nlmsg_data(nlh);
-
- read_lock_bh(&tbl->lock);
ndtmsg->ndtm_family = tbl->family;
ndtmsg->ndtm_pad1 = 0;
ndtmsg->ndtm_pad2 = 0;
@@ -1911,10 +2325,9 @@ static int neightbl_fill_param_info(struct sk_buff *skb,
neightbl_fill_parms(skb, parms) < 0)
goto errout;
- read_unlock_bh(&tbl->lock);
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
errout:
- read_unlock_bh(&tbl->lock);
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
@@ -1931,10 +2344,12 @@ static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
[NDTPA_IFINDEX] = { .type = NLA_U32 },
[NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
+ [NDTPA_QUEUE_LENBYTES] = { .type = NLA_U32 },
[NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
[NDTPA_APP_PROBES] = { .type = NLA_U32 },
[NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
[NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
+ [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
[NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
[NDTPA_GC_STALETIME] = { .type = NLA_U64 },
[NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
@@ -1942,18 +2357,21 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
[NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
[NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
[NDTPA_LOCKTIME] = { .type = NLA_U64 },
+ [NDTPA_INTERVAL_PROBE_TIME_MS] = { .type = NLA_U64, .min = 1 },
};
-static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[NDTA_MAX + 1];
struct neigh_table *tbl;
struct ndtmsg *ndtmsg;
- struct nlattr *tb[NDTA_MAX+1];
- int err;
+ bool found = false;
+ int err, tidx;
- err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
- nl_neightbl_policy);
+ err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
+ nl_neightbl_policy, extack);
if (err < 0)
goto errout;
@@ -1963,33 +2381,43 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
}
ndtmsg = nlmsg_data(nlh);
- read_lock(&neigh_tbl_lock);
- for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+
+ rcu_read_lock();
+
+ for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
+ tbl = rcu_dereference(neigh_tables[tidx]);
+ if (!tbl)
+ continue;
+
if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
continue;
- if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
+ if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
+ found = true;
break;
+ }
}
- if (tbl == NULL) {
+ if (!found) {
+ rcu_read_unlock();
err = -ENOENT;
- goto errout_locked;
+ goto errout;
}
/*
* We acquire tbl->lock to be nice to the periodic timers and
* make sure they always see a consistent set of values.
*/
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
if (tb[NDTA_PARMS]) {
struct nlattr *tbp[NDTPA_MAX+1];
struct neigh_parms *p;
int i, ifindex = 0;
- err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
- nl_ntbl_parm_policy);
+ err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
+ tb[NDTA_PARMS],
+ nl_ntbl_parm_policy, extack);
if (err < 0)
goto errout_tbl_lock;
@@ -2008,44 +2436,71 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
switch (i) {
case NDTPA_QUEUE_LEN:
- p->queue_len_bytes = nla_get_u32(tbp[i]) *
- SKB_TRUESIZE(ETH_FRAME_LEN);
+ NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
+ nla_get_u32(tbp[i]) *
+ SKB_TRUESIZE(ETH_FRAME_LEN));
break;
case NDTPA_QUEUE_LENBYTES:
- p->queue_len_bytes = nla_get_u32(tbp[i]);
+ NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
+ nla_get_u32(tbp[i]));
break;
case NDTPA_PROXY_QLEN:
- p->proxy_qlen = nla_get_u32(tbp[i]);
+ NEIGH_VAR_SET(p, PROXY_QLEN,
+ nla_get_u32(tbp[i]));
break;
case NDTPA_APP_PROBES:
- p->app_probes = nla_get_u32(tbp[i]);
+ NEIGH_VAR_SET(p, APP_PROBES,
+ nla_get_u32(tbp[i]));
break;
case NDTPA_UCAST_PROBES:
- p->ucast_probes = nla_get_u32(tbp[i]);
+ NEIGH_VAR_SET(p, UCAST_PROBES,
+ nla_get_u32(tbp[i]));
break;
case NDTPA_MCAST_PROBES:
- p->mcast_probes = nla_get_u32(tbp[i]);
+ NEIGH_VAR_SET(p, MCAST_PROBES,
+ nla_get_u32(tbp[i]));
+ break;
+ case NDTPA_MCAST_REPROBES:
+ NEIGH_VAR_SET(p, MCAST_REPROBES,
+ nla_get_u32(tbp[i]));
break;
case NDTPA_BASE_REACHABLE_TIME:
- p->base_reachable_time = nla_get_msecs(tbp[i]);
+ NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
+ nla_get_msecs(tbp[i]));
+ /* update reachable_time as well, otherwise, the change will
+ * only be effective after the next time neigh_periodic_work
+ * decides to recompute it (can be multiple minutes)
+ */
+ neigh_set_reach_time(p);
break;
case NDTPA_GC_STALETIME:
- p->gc_staletime = nla_get_msecs(tbp[i]);
+ NEIGH_VAR_SET(p, GC_STALETIME,
+ nla_get_msecs(tbp[i]));
break;
case NDTPA_DELAY_PROBE_TIME:
- p->delay_probe_time = nla_get_msecs(tbp[i]);
+ NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
+ nla_get_msecs(tbp[i]));
+ call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
+ break;
+ case NDTPA_INTERVAL_PROBE_TIME_MS:
+ NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
+ nla_get_msecs(tbp[i]));
break;
case NDTPA_RETRANS_TIME:
- p->retrans_time = nla_get_msecs(tbp[i]);
+ NEIGH_VAR_SET(p, RETRANS_TIME,
+ nla_get_msecs(tbp[i]));
break;
case NDTPA_ANYCAST_DELAY:
- p->anycast_delay = nla_get_msecs(tbp[i]);
+ NEIGH_VAR_SET(p, ANYCAST_DELAY,
+ nla_get_msecs(tbp[i]));
break;
case NDTPA_PROXY_DELAY:
- p->proxy_delay = nla_get_msecs(tbp[i]);
+ NEIGH_VAR_SET(p, PROXY_DELAY,
+ nla_get_msecs(tbp[i]));
break;
case NDTPA_LOCKTIME:
- p->locktime = nla_get_msecs(tbp[i]);
+ NEIGH_VAR_SET(p, LOCKTIME,
+ nla_get_msecs(tbp[i]));
break;
}
}
@@ -2058,50 +2513,88 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
goto errout_tbl_lock;
if (tb[NDTA_THRESH1])
- tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
+ WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1]));
if (tb[NDTA_THRESH2])
- tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
+ WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2]));
if (tb[NDTA_THRESH3])
- tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
+ WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3]));
if (tb[NDTA_GC_INTERVAL])
- tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
+ WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL]));
err = 0;
errout_tbl_lock:
- write_unlock_bh(&tbl->lock);
-errout_locked:
- read_unlock(&neigh_tbl_lock);
+ spin_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
errout:
return err;
}
+static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct ndtmsg *ndtm;
+
+ ndtm = nlmsg_payload(nlh, sizeof(*ndtm));
+ if (!ndtm) {
+ NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
+ return -EINVAL;
+ }
+
+ if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
+ NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
int family, tidx, nidx = 0;
int tbl_skip = cb->args[0];
int neigh_skip = cb->args[1];
struct neigh_table *tbl;
- family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
+ if (cb->strict_check) {
+ int err = neightbl_valid_dump_info(nlh, cb->extack);
+
+ if (err < 0)
+ return err;
+ }
+
+ family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
+
+ rcu_read_lock();
- read_lock(&neigh_tbl_lock);
- for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
+ for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
struct neigh_parms *p;
+ tbl = rcu_dereference(neigh_tables[tidx]);
+ if (!tbl)
+ continue;
+
if (tidx < tbl_skip || (family && tbl->family != family))
continue;
if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
- NLM_F_MULTI) <= 0)
+ nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
+ NLM_F_MULTI) < 0)
break;
- for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
+ nidx = 0;
+ p = list_next_entry(&tbl->parms, list);
+ list_for_each_entry_from_rcu(p, &tbl->parms_list, list) {
if (!net_eq(neigh_parms_net(p), net))
continue;
@@ -2110,9 +2603,9 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
if (neightbl_fill_param_info(skb, tbl, p,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
+ nlh->nlmsg_seq,
RTM_NEWNEIGHTBL,
- NLM_F_MULTI) <= 0)
+ NLM_F_MULTI) < 0)
goto out;
next:
nidx++;
@@ -2121,7 +2614,8 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
neigh_skip = 0;
}
out:
- read_unlock(&neigh_tbl_lock);
+ rcu_read_unlock();
+
cb->args[0] = tidx;
cb->args[1] = nidx;
@@ -2131,6 +2625,7 @@ out:
static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
u32 pid, u32 seq, int type, unsigned int flags)
{
+ u32 neigh_flags, neigh_flags_ext;
unsigned long now = jiffies;
struct nda_cacheinfo ci;
struct nlmsghdr *nlh;
@@ -2140,11 +2635,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
if (nlh == NULL)
return -EMSGSIZE;
+ neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
+ neigh_flags = neigh->flags & NTF_OLD_MASK;
+
ndm = nlmsg_data(nlh);
ndm->ndm_family = neigh->ops->family;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
- ndm->ndm_flags = neigh->flags;
+ ndm->ndm_flags = neigh_flags;
ndm->ndm_type = neigh->type;
ndm->ndm_ifindex = neigh->dev->ifindex;
@@ -2166,14 +2664,20 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
- ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
+ ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
read_unlock_bh(&neigh->lock);
if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
goto nla_put_failure;
- return nlmsg_end(skb, nlh);
+ if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
+ goto nla_put_failure;
+ if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
+ goto nla_put_failure;
+
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
@@ -2184,156 +2688,435 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
u32 pid, u32 seq, int type, unsigned int flags,
struct neigh_table *tbl)
{
+ u32 neigh_flags, neigh_flags_ext;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
+ u8 protocol;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
if (nlh == NULL)
return -EMSGSIZE;
+ neigh_flags = READ_ONCE(pn->flags);
+ neigh_flags_ext = neigh_flags >> NTF_EXT_SHIFT;
+ neigh_flags &= NTF_OLD_MASK;
+
ndm = nlmsg_data(nlh);
ndm->ndm_family = tbl->family;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
- ndm->ndm_flags = pn->flags | NTF_PROXY;
- ndm->ndm_type = NDA_DST;
- ndm->ndm_ifindex = pn->dev->ifindex;
+ ndm->ndm_flags = neigh_flags | NTF_PROXY;
+ ndm->ndm_type = RTN_UNICAST;
+ ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
ndm->ndm_state = NUD_NONE;
if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
goto nla_put_failure;
- return nlmsg_end(skb, nlh);
+ protocol = READ_ONCE(pn->protocol);
+ if (protocol && nla_put_u8(skb, NDA_PROTOCOL, protocol))
+ goto nla_put_failure;
+ if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
+ goto nla_put_failure;
+
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
-static void neigh_update_notify(struct neighbour *neigh)
+static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
{
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
- __neigh_notify(neigh, RTM_NEWNEIGH, 0);
+ __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
}
+static bool neigh_master_filtered(struct net_device *dev, int master_idx)
+{
+ struct net_device *master;
+
+ if (!master_idx)
+ return false;
+
+ master = dev ? netdev_master_upper_dev_get_rcu(dev) : NULL;
+
+ /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
+ * invalid value for ifindex to denote "no master".
+ */
+ if (master_idx == -1)
+ return !!master;
+
+ if (!master || master->ifindex != master_idx)
+ return true;
+
+ return false;
+}
+
+static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
+{
+ if (filter_idx && (!dev || dev->ifindex != filter_idx))
+ return true;
+
+ return false;
+}
+
+struct neigh_dump_filter {
+ int master_idx;
+ int dev_idx;
+};
+
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct netlink_callback *cb,
+ struct neigh_dump_filter *filter)
{
struct net *net = sock_net(skb->sk);
struct neighbour *n;
- int rc, h, s_h = cb->args[1];
+ int err = 0, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
struct neigh_hash_table *nht;
+ unsigned int flags = NLM_F_MULTI;
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
+ if (filter->dev_idx || filter->master_idx)
+ flags |= NLM_F_DUMP_FILTERED;
+
+ nht = rcu_dereference(tbl->nht);
for (h = s_h; h < (1 << nht->hash_shift); h++) {
if (h > s_h)
s_idx = 0;
- for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
- n != NULL;
- n = rcu_dereference_bh(n->next)) {
- if (!net_eq(dev_net(n->dev), net))
- continue;
- if (idx < s_idx)
+ idx = 0;
+ neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[h]) {
+ if (idx < s_idx || !net_eq(dev_net(n->dev), net))
goto next;
- if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH,
- NLM_F_MULTI) <= 0) {
- rc = -1;
+ if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
+ neigh_master_filtered(n->dev, filter->master_idx))
+ goto next;
+ err = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH, flags);
+ if (err < 0)
goto out;
- }
next:
idx++;
}
}
- rc = skb->len;
out:
- rcu_read_unlock_bh();
cb->args[1] = h;
cb->args[2] = idx;
- return rc;
+ return err;
}
static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct netlink_callback *cb,
+ struct neigh_dump_filter *filter)
{
struct pneigh_entry *n;
struct net *net = sock_net(skb->sk);
- int rc, h, s_h = cb->args[3];
+ int err = 0, h, s_h = cb->args[3];
int idx, s_idx = idx = cb->args[4];
+ unsigned int flags = NLM_F_MULTI;
- read_lock_bh(&tbl->lock);
+ if (filter->dev_idx || filter->master_idx)
+ flags |= NLM_F_DUMP_FILTERED;
for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
if (h > s_h)
s_idx = 0;
- for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
- if (dev_net(n->dev) != net)
- continue;
- if (idx < s_idx)
+ for (n = rcu_dereference(tbl->phash_buckets[h]), idx = 0;
+ n;
+ n = rcu_dereference(n->next)) {
+ if (idx < s_idx || pneigh_net(n) != net)
goto next;
- if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH,
- NLM_F_MULTI, tbl) <= 0) {
- read_unlock_bh(&tbl->lock);
- rc = -1;
+ if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
+ neigh_master_filtered(n->dev, filter->master_idx))
+ goto next;
+ err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH, flags, tbl);
+ if (err < 0)
goto out;
- }
next:
idx++;
}
}
- read_unlock_bh(&tbl->lock);
- rc = skb->len;
out:
cb->args[3] = h;
cb->args[4] = idx;
- return rc;
+ return err;
+}
+
+static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
+ bool strict_check,
+ struct neigh_dump_filter *filter,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[NDA_MAX + 1];
+ int err, i;
+
+ if (strict_check) {
+ struct ndmsg *ndm;
+
+ ndm = nlmsg_payload(nlh, sizeof(*ndm));
+ if (!ndm) {
+ NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
+ return -EINVAL;
+ }
+
+ if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
+ ndm->ndm_state || ndm->ndm_type) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
+ return -EINVAL;
+ }
+
+ if (ndm->ndm_flags & ~NTF_PROXY) {
+ NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
+ tb, NDA_MAX, nda_policy,
+ extack);
+ } else {
+ err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
+ NDA_MAX, nda_policy, extack);
+ }
+ if (err < 0)
+ return err;
+ for (i = 0; i <= NDA_MAX; ++i) {
+ if (!tb[i])
+ continue;
+
+ /* all new attributes should require strict_check */
+ switch (i) {
+ case NDA_IFINDEX:
+ filter->dev_idx = nla_get_u32(tb[i]);
+ break;
+ case NDA_MASTER:
+ filter->master_idx = nla_get_u32(tb[i]);
+ break;
+ default:
+ if (strict_check) {
+ NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
}
static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
+ struct neigh_dump_filter filter = {};
struct neigh_table *tbl;
int t, family, s_t;
int proxy = 0;
int err;
- read_lock(&neigh_tbl_lock);
- family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
+ family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
/* check for full ndmsg structure presence, family member is
* the same for both structures
*/
- if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
- ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
+ if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
+ ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
proxy = 1;
+ err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
+ if (err < 0 && cb->strict_check)
+ return err;
+ err = 0;
+
s_t = cb->args[0];
- for (tbl = neigh_tables, t = 0; tbl;
- tbl = tbl->next, t++) {
+ rcu_read_lock();
+ for (t = 0; t < NEIGH_NR_TABLES; t++) {
+ tbl = rcu_dereference(neigh_tables[t]);
+
+ if (!tbl)
+ continue;
if (t < s_t || (family && tbl->family != family))
continue;
if (t > s_t)
memset(&cb->args[1], 0, sizeof(cb->args) -
sizeof(cb->args[0]));
if (proxy)
- err = pneigh_dump_table(tbl, skb, cb);
+ err = pneigh_dump_table(tbl, skb, cb, &filter);
else
- err = neigh_dump_table(tbl, skb, cb);
+ err = neigh_dump_table(tbl, skb, cb, &filter);
if (err < 0)
break;
}
- read_unlock(&neigh_tbl_lock);
+ rcu_read_unlock();
cb->args[0] = t;
- return skb->len;
+ return err;
+}
+
+static struct ndmsg *neigh_valid_get_req(const struct nlmsghdr *nlh,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct ndmsg *ndm;
+ int err, i;
+
+ ndm = nlmsg_payload(nlh, sizeof(*ndm));
+ if (!ndm) {
+ NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
+ ndm->ndm_type) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (ndm->ndm_flags & ~NTF_PROXY) {
+ NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!(ndm->ndm_flags & NTF_PROXY) && !ndm->ndm_ifindex) {
+ NL_SET_ERR_MSG(extack, "No device specified");
+ return ERR_PTR(-EINVAL);
+ }
+
+ err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
+ NDA_MAX, nda_policy, extack);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ for (i = 0; i <= NDA_MAX; ++i) {
+ switch (i) {
+ case NDA_DST:
+ if (!tb[i]) {
+ NL_SET_ERR_ATTR_MISS(extack, NULL, NDA_DST);
+ return ERR_PTR(-EINVAL);
+ }
+ break;
+ default:
+ if (!tb[i])
+ continue;
+
+ NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ return ndm;
+}
+
+static inline size_t neigh_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ndmsg))
+ + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+ + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
+ + nla_total_size(sizeof(struct nda_cacheinfo))
+ + nla_total_size(4) /* NDA_PROBES */
+ + nla_total_size(4) /* NDA_FLAGS_EXT */
+ + nla_total_size(1); /* NDA_PROTOCOL */
+}
+
+static inline size_t pneigh_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ndmsg))
+ + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+ + nla_total_size(4) /* NDA_FLAGS_EXT */
+ + nla_total_size(1); /* NDA_PROTOCOL */
+}
+
+static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(in_skb->sk);
+ u32 pid = NETLINK_CB(in_skb).portid;
+ struct nlattr *tb[NDA_MAX + 1];
+ struct net_device *dev = NULL;
+ u32 seq = nlh->nlmsg_seq;
+ struct neigh_table *tbl;
+ struct neighbour *neigh;
+ struct sk_buff *skb;
+ struct ndmsg *ndm;
+ void *dst;
+ int err;
+
+ ndm = neigh_valid_get_req(nlh, tb, extack);
+ if (IS_ERR(ndm))
+ return PTR_ERR(ndm);
+
+ if (ndm->ndm_flags & NTF_PROXY)
+ skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
+ else
+ skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ rcu_read_lock();
+
+ tbl = neigh_find_table(ndm->ndm_family);
+ if (!tbl) {
+ NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
+ err = -EAFNOSUPPORT;
+ goto err_unlock;
+ }
+
+ if (nla_len(tb[NDA_DST]) != (int)tbl->key_len) {
+ NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
+ err = -EINVAL;
+ goto err_unlock;
+ }
+
+ dst = nla_data(tb[NDA_DST]);
+
+ if (ndm->ndm_ifindex) {
+ dev = dev_get_by_index_rcu(net, ndm->ndm_ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Unknown device ifindex");
+ err = -ENODEV;
+ goto err_unlock;
+ }
+ }
+
+ if (ndm->ndm_flags & NTF_PROXY) {
+ struct pneigh_entry *pn;
+
+ pn = pneigh_lookup(tbl, net, dst, dev);
+ if (!pn) {
+ NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
+ err = -ENOENT;
+ goto err_unlock;
+ }
+
+ err = pneigh_fill_info(skb, pn, pid, seq, RTM_NEWNEIGH, 0, tbl);
+ if (err)
+ goto err_unlock;
+ } else {
+ neigh = neigh_lookup(tbl, dst, dev);
+ if (!neigh) {
+ NL_SET_ERR_MSG(extack, "Neighbour entry not found");
+ err = -ENOENT;
+ goto err_unlock;
+ }
+
+ err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
+ neigh_release(neigh);
+ if (err)
+ goto err_unlock;
+ }
+
+ rcu_read_unlock();
+
+ return rtnl_unicast(skb, net, pid);
+err_unlock:
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return err;
}
void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
@@ -2341,20 +3124,18 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void
int chain;
struct neigh_hash_table *nht;
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
+ rcu_read_lock();
+ nht = rcu_dereference(tbl->nht);
- read_lock(&tbl->lock); /* avoid resizes */
+ spin_lock_bh(&tbl->lock); /* avoid resizes */
for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
struct neighbour *n;
- for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
- n != NULL;
- n = rcu_dereference_bh(n->next))
+ neigh_for_each_in_bucket(n, &nht->hash_heads[chain])
cb(n, cookie);
}
- read_unlock(&tbl->lock);
- rcu_read_unlock_bh();
+ spin_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
}
EXPORT_SYMBOL(neigh_for_each);
@@ -2362,29 +3143,25 @@ EXPORT_SYMBOL(neigh_for_each);
void __neigh_for_each_release(struct neigh_table *tbl,
int (*cb)(struct neighbour *))
{
- int chain;
struct neigh_hash_table *nht;
+ int chain;
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
+ struct hlist_node *tmp;
struct neighbour *n;
- struct neighbour __rcu **np;
- np = &nht->hash_buckets[chain];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
+ neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) {
int release;
write_lock(&n->lock);
release = cb(n);
if (release) {
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
- n->dead = 1;
- } else
- np = &n->next;
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
+ neigh_mark_dead(n);
+ }
write_unlock(&n->lock);
if (release)
neigh_cleanup_and_release(n);
@@ -2393,45 +3170,101 @@ void __neigh_for_each_release(struct neigh_table *tbl,
}
EXPORT_SYMBOL(__neigh_for_each_release);
+int neigh_xmit(int index, struct net_device *dev,
+ const void *addr, struct sk_buff *skb)
+{
+ int err = -EAFNOSUPPORT;
+
+ if (likely(index < NEIGH_NR_TABLES)) {
+ struct neigh_table *tbl;
+ struct neighbour *neigh;
+
+ rcu_read_lock();
+ tbl = rcu_dereference(neigh_tables[index]);
+ if (!tbl)
+ goto out_unlock;
+ if (index == NEIGH_ARP_TABLE) {
+ u32 key = *((u32 *)addr);
+
+ neigh = __ipv4_neigh_lookup_noref(dev, key);
+ } else {
+ neigh = __neigh_lookup_noref(tbl, addr, dev);
+ }
+ if (!neigh)
+ neigh = __neigh_create(tbl, addr, dev, false);
+ err = PTR_ERR(neigh);
+ if (IS_ERR(neigh)) {
+ rcu_read_unlock();
+ goto out_kfree_skb;
+ }
+ err = READ_ONCE(neigh->output)(neigh, skb);
+out_unlock:
+ rcu_read_unlock();
+ }
+ else if (index == NEIGH_LINK_TABLE) {
+ err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ addr, NULL, skb->len);
+ if (err < 0)
+ goto out_kfree_skb;
+ err = dev_queue_xmit(skb);
+ }
+out:
+ return err;
+out_kfree_skb:
+ kfree_skb(skb);
+ goto out;
+}
+EXPORT_SYMBOL(neigh_xmit);
+
#ifdef CONFIG_PROC_FS
-static struct neighbour *neigh_get_first(struct seq_file *seq)
+static struct neighbour *neigh_get_valid(struct seq_file *seq,
+ struct neighbour *n,
+ loff_t *pos)
{
struct neigh_seq_state *state = seq->private;
struct net *net = seq_file_net(seq);
+
+ if (!net_eq(dev_net(n->dev), net))
+ return NULL;
+
+ if (state->neigh_sub_iter) {
+ loff_t fakep = 0;
+ void *v;
+
+ v = state->neigh_sub_iter(state, n, pos ? pos : &fakep);
+ if (!v)
+ return NULL;
+ if (pos)
+ return v;
+ }
+
+ if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
+ return n;
+
+ if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
+ return n;
+
+ return NULL;
+}
+
+static struct neighbour *neigh_get_first(struct seq_file *seq)
+{
+ struct neigh_seq_state *state = seq->private;
struct neigh_hash_table *nht = state->nht;
- struct neighbour *n = NULL;
- int bucket = state->bucket;
+ struct neighbour *n, *tmp;
state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
- for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
- n = rcu_dereference_bh(nht->hash_buckets[bucket]);
- while (n) {
- if (!net_eq(dev_net(n->dev), net))
- goto next;
- if (state->neigh_sub_iter) {
- loff_t fakep = 0;
- void *v;
-
- v = state->neigh_sub_iter(state, n, &fakep);
- if (!v)
- goto next;
- }
- if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
- break;
- if (n->nud_state & ~NUD_NOARP)
- break;
-next:
- n = rcu_dereference_bh(n->next);
+ while (++state->bucket < (1 << nht->hash_shift)) {
+ neigh_for_each_in_bucket(n, &nht->hash_heads[state->bucket]) {
+ tmp = neigh_get_valid(seq, n, NULL);
+ if (tmp)
+ return tmp;
}
-
- if (n)
- break;
}
- state->bucket = bucket;
- return n;
+ return NULL;
}
static struct neighbour *neigh_get_next(struct seq_file *seq,
@@ -2439,46 +3272,28 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
loff_t *pos)
{
struct neigh_seq_state *state = seq->private;
- struct net *net = seq_file_net(seq);
- struct neigh_hash_table *nht = state->nht;
+ struct neighbour *tmp;
if (state->neigh_sub_iter) {
void *v = state->neigh_sub_iter(state, n, pos);
+
if (v)
return n;
}
- n = rcu_dereference_bh(n->next);
-
- while (1) {
- while (n) {
- if (!net_eq(dev_net(n->dev), net))
- goto next;
- if (state->neigh_sub_iter) {
- void *v = state->neigh_sub_iter(state, n, pos);
- if (v)
- return n;
- goto next;
- }
- if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
- break;
- if (n->nud_state & ~NUD_NOARP)
- break;
-next:
- n = rcu_dereference_bh(n->next);
+ hlist_for_each_entry_continue(n, hash) {
+ tmp = neigh_get_valid(seq, n, pos);
+ if (tmp) {
+ n = tmp;
+ goto out;
}
-
- if (n)
- break;
-
- if (++state->bucket >= (1 << nht->hash_shift))
- break;
-
- n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
}
+ n = neigh_get_first(seq);
+out:
if (n && pos)
--(*pos);
+
return n;
}
@@ -2503,13 +3318,14 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
struct net *net = seq_file_net(seq);
struct neigh_table *tbl = state->tbl;
struct pneigh_entry *pn = NULL;
- int bucket = state->bucket;
+ int bucket;
state->flags |= NEIGH_SEQ_IS_PNEIGH;
for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
- pn = tbl->phash_buckets[bucket];
+ pn = rcu_dereference(tbl->phash_buckets[bucket]);
+
while (pn && !net_eq(pneigh_net(pn), net))
- pn = pn->next;
+ pn = rcu_dereference(pn->next);
if (pn)
break;
}
@@ -2527,15 +3343,17 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
struct neigh_table *tbl = state->tbl;
do {
- pn = pn->next;
+ pn = rcu_dereference(pn->next);
} while (pn && !net_eq(pneigh_net(pn), net));
while (!pn) {
if (++state->bucket > PNEIGH_HASHMASK)
break;
- pn = tbl->phash_buckets[state->bucket];
+
+ pn = rcu_dereference(tbl->phash_buckets[state->bucket]);
+
while (pn && !net_eq(pneigh_net(pn), net))
- pn = pn->next;
+ pn = rcu_dereference(pn->next);
if (pn)
break;
}
@@ -2575,16 +3393,18 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
}
void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
- __acquires(rcu_bh)
+ __acquires(tbl->lock)
+ __acquires(rcu)
{
struct neigh_seq_state *state = seq->private;
state->tbl = tbl;
- state->bucket = 0;
+ state->bucket = -1;
state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
- rcu_read_lock_bh();
- state->nht = rcu_dereference_bh(tbl->nht);
+ rcu_read_lock();
+ state->nht = rcu_dereference(tbl->nht);
+ spin_lock_bh(&tbl->lock);
return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
}
@@ -2618,9 +3438,14 @@ out:
EXPORT_SYMBOL(neigh_seq_next);
void neigh_seq_stop(struct seq_file *seq, void *v)
- __releases(rcu_bh)
+ __releases(tbl->lock)
+ __releases(rcu)
{
- rcu_read_unlock_bh();
+ struct neigh_seq_state *state = seq->private;
+ struct neigh_table *tbl = state->tbl;
+
+ spin_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
}
EXPORT_SYMBOL(neigh_seq_stop);
@@ -2628,7 +3453,7 @@ EXPORT_SYMBOL(neigh_seq_stop);
static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct neigh_table *tbl = seq->private;
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
int cpu;
if (*pos == 0)
@@ -2645,7 +3470,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct neigh_table *tbl = seq->private;
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
int cpu;
for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
@@ -2654,6 +3479,7 @@ static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
*pos = cpu+1;
return per_cpu_ptr(tbl->stats, cpu);
}
+ (*pos)++;
return NULL;
}
@@ -2664,16 +3490,17 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
static int neigh_stat_seq_show(struct seq_file *seq, void *v)
{
- struct neigh_table *tbl = seq->private;
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n");
+ seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
return 0;
}
- seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
- "%08lx %08lx %08lx %08lx %08lx\n",
+ seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
+ "%08lx %08lx %08lx "
+ "%08lx %08lx %08lx\n",
atomic_read(&tbl->entries),
st->allocs,
@@ -2690,7 +3517,8 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
st->periodic_gc_runs,
st->forced_gc_runs,
- st->unres_discards
+ st->unres_discards,
+ st->table_fulls
);
return 0;
@@ -2702,48 +3530,22 @@ static const struct seq_operations neigh_stat_seq_ops = {
.stop = neigh_stat_seq_stop,
.show = neigh_stat_seq_show,
};
-
-static int neigh_stat_seq_open(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &neigh_stat_seq_ops);
-
- if (!ret) {
- struct seq_file *sf = file->private_data;
- sf->private = PDE_DATA(inode);
- }
- return ret;
-};
-
-static const struct file_operations neigh_stat_seq_fops = {
- .owner = THIS_MODULE,
- .open = neigh_stat_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
#endif /* CONFIG_PROC_FS */
-static inline size_t neigh_nlmsg_size(void)
+static void __neigh_notify(struct neighbour *n, int type, int flags,
+ u32 pid)
{
- return NLMSG_ALIGN(sizeof(struct ndmsg))
- + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
- + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
- + nla_total_size(sizeof(struct nda_cacheinfo))
- + nla_total_size(4); /* NDA_PROBES */
-}
-
-static void __neigh_notify(struct neighbour *n, int type, int flags)
-{
- struct net *net = dev_net(n->dev);
struct sk_buff *skb;
int err = -ENOBUFS;
+ struct net *net;
+ rcu_read_lock();
+ net = dev_net_rcu(n->dev);
skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
if (skb == NULL)
goto errout;
- err = neigh_fill_info(skb, n, 0, 0, type, flags);
+ err = neigh_fill_info(skb, n, pid, 0, type, flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -2751,31 +3553,29 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
goto errout;
}
rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
- return;
+ goto out;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+ rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+out:
+ rcu_read_unlock();
}
-#ifdef CONFIG_ARPD
void neigh_app_ns(struct neighbour *n)
{
- __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
+ __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
}
EXPORT_SYMBOL(neigh_app_ns);
-#endif /* CONFIG_ARPD */
#ifdef CONFIG_SYSCTL
-static int zero;
static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
-static int proc_unres_qlen(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+static int proc_unres_qlen(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int size, ret;
struct ctl_table tmp = *ctl;
- tmp.extra1 = &zero;
+ tmp.extra1 = SYSCTL_ZERO;
tmp.extra2 = &unres_qlen_max;
tmp.data = &size;
@@ -2787,125 +3587,198 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write,
return ret;
}
-enum {
- NEIGH_VAR_MCAST_PROBE,
- NEIGH_VAR_UCAST_PROBE,
- NEIGH_VAR_APP_PROBE,
- NEIGH_VAR_RETRANS_TIME,
- NEIGH_VAR_BASE_REACHABLE_TIME,
- NEIGH_VAR_DELAY_PROBE_TIME,
- NEIGH_VAR_GC_STALETIME,
- NEIGH_VAR_QUEUE_LEN,
- NEIGH_VAR_QUEUE_LEN_BYTES,
- NEIGH_VAR_PROXY_QLEN,
- NEIGH_VAR_ANYCAST_DELAY,
- NEIGH_VAR_PROXY_DELAY,
- NEIGH_VAR_LOCKTIME,
- NEIGH_VAR_RETRANS_TIME_MS,
- NEIGH_VAR_BASE_REACHABLE_TIME_MS,
- NEIGH_VAR_GC_INTERVAL,
- NEIGH_VAR_GC_THRESH1,
- NEIGH_VAR_GC_THRESH2,
- NEIGH_VAR_GC_THRESH3,
- NEIGH_VAR_MAX
-};
+static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
+ int index)
+{
+ struct net_device *dev;
+ int family = neigh_parms_family(p);
+
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
+ struct neigh_parms *dst_p =
+ neigh_get_dev_parms_rcu(dev, family);
+
+ if (dst_p && !test_bit(index, dst_p->data_state))
+ dst_p->data[index] = p->data[index];
+ }
+ rcu_read_unlock();
+}
+
+static void neigh_proc_update(const struct ctl_table *ctl, int write)
+{
+ struct net_device *dev = ctl->extra1;
+ struct neigh_parms *p = ctl->extra2;
+ struct net *net = neigh_parms_net(p);
+ int index = (int *) ctl->data - p->data;
+
+ if (!write)
+ return;
+
+ set_bit(index, p->data_state);
+ if (index == NEIGH_VAR_DELAY_PROBE_TIME)
+ call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
+ if (!dev) /* NULL dev means this is default value */
+ neigh_copy_dflt_parms(net, p, index);
+}
+
+static int neigh_proc_dointvec_zero_intmax(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct ctl_table tmp = *ctl;
+ int ret;
+
+ tmp.extra1 = SYSCTL_ZERO;
+ tmp.extra2 = SYSCTL_INT_MAX;
+
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ neigh_proc_update(ctl, write);
+ return ret;
+}
+
+static int neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table tmp = *ctl;
+ int ret;
+
+ int min = msecs_to_jiffies(1);
+
+ tmp.extra1 = &min;
+ tmp.extra2 = NULL;
+
+ ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
+ neigh_proc_update(ctl, write);
+ return ret;
+}
+
+int neigh_proc_dointvec(const struct ctl_table *ctl, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+
+ neigh_proc_update(ctl, write);
+ return ret;
+}
+EXPORT_SYMBOL(neigh_proc_dointvec);
+
+int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
+
+ neigh_proc_update(ctl, write);
+ return ret;
+}
+EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
+
+static int neigh_proc_dointvec_userhz_jiffies(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
+
+ neigh_proc_update(ctl, write);
+ return ret;
+}
+
+int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
+
+ neigh_proc_update(ctl, write);
+ return ret;
+}
+EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
+
+static int neigh_proc_dointvec_unres_qlen(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
+
+ neigh_proc_update(ctl, write);
+ return ret;
+}
+
+static int neigh_proc_base_reachable_time(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct neigh_parms *p = ctl->extra2;
+ int ret;
+
+ if (strcmp(ctl->procname, "base_reachable_time") == 0)
+ ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
+ else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
+ ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
+ else
+ ret = -1;
+
+ if (write && ret == 0) {
+ /* update reachable_time as well, otherwise, the change will
+ * only be effective after the next time neigh_periodic_work
+ * decides to recompute it
+ */
+ neigh_set_reach_time(p);
+ }
+ return ret;
+}
+
+#define NEIGH_PARMS_DATA_OFFSET(index) \
+ (&((struct neigh_parms *) 0)->data[index])
+
+#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
+ [NEIGH_VAR_ ## attr] = { \
+ .procname = name, \
+ .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
+ .maxlen = sizeof(int), \
+ .mode = mval, \
+ .proc_handler = proc, \
+ }
+
+#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
+ NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
+
+#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
+ NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
+
+#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
+ NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
+
+#define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
+ NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
+
+#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
+ NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
+
+#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
+ NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
static struct neigh_sysctl_table {
struct ctl_table_header *sysctl_header;
- struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
+ struct ctl_table neigh_vars[NEIGH_VAR_MAX];
} neigh_sysctl_template __read_mostly = {
.neigh_vars = {
- [NEIGH_VAR_MCAST_PROBE] = {
- .procname = "mcast_solicit",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- [NEIGH_VAR_UCAST_PROBE] = {
- .procname = "ucast_solicit",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- [NEIGH_VAR_APP_PROBE] = {
- .procname = "app_solicit",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- [NEIGH_VAR_RETRANS_TIME] = {
- .procname = "retrans_time",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_userhz_jiffies,
- },
- [NEIGH_VAR_BASE_REACHABLE_TIME] = {
- .procname = "base_reachable_time",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NEIGH_VAR_DELAY_PROBE_TIME] = {
- .procname = "delay_first_probe_time",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NEIGH_VAR_GC_STALETIME] = {
- .procname = "gc_stale_time",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NEIGH_VAR_QUEUE_LEN] = {
- .procname = "unres_qlen",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_unres_qlen,
- },
- [NEIGH_VAR_QUEUE_LEN_BYTES] = {
- .procname = "unres_qlen_bytes",
- .maxlen = sizeof(int),
- .mode = 0644,
- .extra1 = &zero,
- .proc_handler = proc_dointvec_minmax,
- },
- [NEIGH_VAR_PROXY_QLEN] = {
- .procname = "proxy_qlen",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- [NEIGH_VAR_ANYCAST_DELAY] = {
- .procname = "anycast_delay",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_userhz_jiffies,
- },
- [NEIGH_VAR_PROXY_DELAY] = {
- .procname = "proxy_delay",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_userhz_jiffies,
- },
- [NEIGH_VAR_LOCKTIME] = {
- .procname = "locktime",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_userhz_jiffies,
- },
- [NEIGH_VAR_RETRANS_TIME_MS] = {
- .procname = "retrans_time_ms",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
- },
- [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
- .procname = "base_reachable_time_ms",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
- },
+ NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
+ NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
+ NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
+ NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
+ NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
+ NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
+ NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
+ NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
+ "interval_probe_time_ms"),
+ NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
+ NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
+ NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
+ NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
+ NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
+ NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
+ NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
+ NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
+ NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
[NEIGH_VAR_GC_INTERVAL] = {
.procname = "gc_interval",
.maxlen = sizeof(int),
@@ -2916,88 +3789,103 @@ static struct neigh_sysctl_table {
.procname = "gc_thresh1",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
+ .proc_handler = proc_dointvec_minmax,
},
[NEIGH_VAR_GC_THRESH2] = {
.procname = "gc_thresh2",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
+ .proc_handler = proc_dointvec_minmax,
},
[NEIGH_VAR_GC_THRESH3] = {
.procname = "gc_thresh3",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
+ .proc_handler = proc_dointvec_minmax,
},
- {},
},
};
int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
- char *p_name, proc_handler *handler)
+ proc_handler *handler)
{
+ int i;
struct neigh_sysctl_table *t;
- const char *dev_name_source = NULL;
+ const char *dev_name_source;
char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
+ char *p_name;
+ size_t neigh_vars_size;
- t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
+ t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
if (!t)
goto err;
- t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data = &p->mcast_probes;
- t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data = &p->ucast_probes;
- t->neigh_vars[NEIGH_VAR_APP_PROBE].data = &p->app_probes;
- t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data = &p->retrans_time;
- t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data = &p->base_reachable_time;
- t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data = &p->delay_probe_time;
- t->neigh_vars[NEIGH_VAR_GC_STALETIME].data = &p->gc_staletime;
- t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data = &p->queue_len_bytes;
- t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data = &p->queue_len_bytes;
- t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data = &p->proxy_qlen;
- t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data = &p->anycast_delay;
- t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
- t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
- t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data = &p->retrans_time;
- t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data = &p->base_reachable_time;
+ for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
+ t->neigh_vars[i].data += (long) p;
+ t->neigh_vars[i].extra1 = dev;
+ t->neigh_vars[i].extra2 = p;
+ }
+ neigh_vars_size = ARRAY_SIZE(t->neigh_vars);
if (dev) {
dev_name_source = dev->name;
/* Terminate the table early */
- memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
- sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
+ neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
} else {
+ struct neigh_table *tbl = p->tbl;
dev_name_source = "default";
- t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
- t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
- t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
- t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
+ t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
}
-
if (handler) {
/* RetransTime */
t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
- t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
/* ReachableTime */
t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
- t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
/* RetransTime (in milliseconds)*/
t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
- t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
/* ReachableTime (in milliseconds) */
t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
- t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
+ } else {
+ /* Those handlers will update p->reachable_time after
+ * base_reachable_time(_ms) is set to ensure the new timer starts being
+ * applied after the next neighbour update instead of waiting for
+ * neigh_periodic_work to update its value (can be multiple minutes)
+ * So any handler that replaces them should do this as well
+ */
+ /* ReachableTime */
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
+ neigh_proc_base_reachable_time;
+ /* ReachableTime (in milliseconds) */
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
+ neigh_proc_base_reachable_time;
}
- /* Don't export sysctls to unprivileged users */
- if (neigh_parms_net(p)->user_ns != &init_user_ns)
- t->neigh_vars[0].procname = NULL;
+ switch (neigh_parms_family(p)) {
+ case AF_INET:
+ p_name = "ipv4";
+ break;
+ case AF_INET6:
+ p_name = "ipv6";
+ break;
+ default:
+ BUG();
+ }
snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
p_name, dev_name_source);
- t->sysctl_header =
- register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
+ t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p),
+ neigh_path, t->neigh_vars,
+ neigh_vars_size);
if (!t->sysctl_header)
goto free;
@@ -3024,18 +3912,21 @@ EXPORT_SYMBOL(neigh_sysctl_unregister);
#endif /* CONFIG_SYSCTL */
+static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWNEIGH, .doit = neigh_add},
+ {.msgtype = RTM_DELNEIGH, .doit = neigh_delete},
+ {.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+ {.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED},
+};
+
static int __init neigh_init(void)
{
- rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
-
- rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
- NULL);
- rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
-
+ rtnl_register_many(neigh_rtnl_msg_handlers);
return 0;
}
subsys_initcall(neigh_init);
-