diff options
Diffstat (limited to 'include/net/neighbour.h')
| -rw-r--r-- | include/net/neighbour.h | 244 |
1 files changed, 166 insertions, 78 deletions
diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 9816df225af3..2dfee6d4258a 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_NEIGHBOUR_H #define _NET_NEIGHBOUR_H @@ -28,6 +29,7 @@ #include <linux/sysctl.h> #include <linux/workqueue.h> #include <net/rtnetlink.h> +#include <net/neighbour_tables.h> /* * NUD stands for "neighbor unreachability detection" @@ -47,6 +49,7 @@ enum { NEIGH_VAR_RETRANS_TIME, NEIGH_VAR_BASE_REACHABLE_TIME, NEIGH_VAR_DELAY_PROBE_TIME, + NEIGH_VAR_INTERVAL_PROBE_TIME_MS, NEIGH_VAR_GC_STALETIME, NEIGH_VAR_QUEUE_LEN_BYTES, NEIGH_VAR_PROXY_QLEN, @@ -69,9 +72,9 @@ enum { struct neigh_parms { possible_net_t net; struct net_device *dev; + netdevice_tracker dev_tracker; struct list_head list; int (*neigh_setup)(struct neighbour *); - void (*neigh_cleanup)(struct neighbour *); struct neigh_table *tbl; void *sysctl_table; @@ -81,6 +84,7 @@ struct neigh_parms { struct rcu_head rcu_head; int reachable_time; + u32 qlen; int data[NEIGH_VAR_DATA_MAX]; DECLARE_BITMAP(data_state, NEIGH_VAR_DATA_MAX); }; @@ -88,15 +92,17 @@ struct neigh_parms { static inline void neigh_var_set(struct neigh_parms *p, int index, int val) { set_bit(index, p->data_state); - p->data[index] = val; + WRITE_ONCE(p->data[index], val); } -#define NEIGH_VAR(p, attr) ((p)->data[NEIGH_VAR_ ## attr]) +#define __NEIGH_VAR(p, attr) ((p)->data[NEIGH_VAR_ ## attr]) +#define NEIGH_VAR(p, attr) READ_ONCE(__NEIGH_VAR(p, attr)) +#define NEIGH_VAR_PTR(p, attr) (&(__NEIGH_VAR(p, attr))) /* In ndo_neigh_setup, NEIGH_VAR_INIT should be used. * In other cases, NEIGH_VAR_SET should be used. */ -#define NEIGH_VAR_INIT(p, attr, val) (NEIGH_VAR(p, attr) = val) +#define NEIGH_VAR_INIT(p, attr, val) (__NEIGH_VAR(p, attr) = val) #define NEIGH_VAR_SET(p, attr, val) neigh_var_set(p, NEIGH_VAR_ ## attr, val) static inline void neigh_parms_data_state_setall(struct neigh_parms *p) @@ -132,30 +138,35 @@ struct neigh_statistics { #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) struct neighbour { - struct neighbour __rcu *next; + struct hlist_node hash; + struct hlist_node dev_list; struct neigh_table *tbl; struct neigh_parms *parms; unsigned long confirmed; unsigned long updated; rwlock_t lock; refcount_t refcnt; - struct sk_buff_head arp_queue; unsigned int arp_queue_len_bytes; + struct sk_buff_head arp_queue; struct timer_list timer; unsigned long used; atomic_t probes; - __u8 flags; - __u8 nud_state; - __u8 type; - __u8 dead; + u8 nud_state; + u8 type; + u8 dead; + u8 protocol; + u32 flags; seqlock_t ha_lock; - unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; + unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))] __aligned(8); struct hh_cache hh; int (*output)(struct neighbour *, struct sk_buff *); const struct neigh_ops *ops; + struct list_head gc_list; + struct list_head managed_list; struct rcu_head rcu; struct net_device *dev; - u8 primary_key[0]; + netdevice_tracker dev_tracker; + u8 primary_key[]; } __randomize_layout; struct neigh_ops { @@ -167,11 +178,18 @@ struct neigh_ops { }; struct pneigh_entry { - struct pneigh_entry *next; + struct pneigh_entry __rcu *next; possible_net_t net; struct net_device *dev; - u8 flags; - u8 key[0]; + netdevice_tracker dev_tracker; + union { + struct list_head free_node; + struct rcu_head rcu; + }; + u32 flags; + u8 protocol; + bool permanent; + u32 key[]; }; /* @@ -181,7 +199,7 @@ struct pneigh_entry { #define NEIGH_NUM_HASH_RND 4 struct neigh_hash_table { - struct neighbour __rcu **hash_buckets; + struct hlist_head *hash_heads; unsigned int hash_shift; __u32 hash_rnd[NEIGH_NUM_HASH_RND]; struct rcu_head rcu; @@ -190,8 +208,8 @@ struct neigh_hash_table { struct neigh_table { int family; - int entry_size; - int key_len; + unsigned int entry_size; + unsigned int key_len; __be16 protocol; __u32 (*hash)(const void *pkey, const struct net_device *dev, @@ -201,6 +219,9 @@ struct neigh_table { int (*pconstructor)(struct pneigh_entry *); void (*pdestructor)(struct pneigh_entry *); void (*proxy_redo)(struct sk_buff *skb); + int (*is_multicast)(const void *pkey); + bool (*allow_add)(const struct net_device *dev, + struct netlink_ext_ack *extack); char *id; struct neigh_parms parms; struct list_head parms_list; @@ -210,22 +231,19 @@ struct neigh_table { int gc_thresh3; unsigned long last_flush; struct delayed_work gc_work; + struct delayed_work managed_work; struct timer_list proxy_timer; struct sk_buff_head proxy_queue; atomic_t entries; - rwlock_t lock; + atomic_t gc_entries; + struct list_head gc_list; + struct list_head managed_list; + spinlock_t lock; unsigned long last_rand; struct neigh_statistics __percpu *stats; struct neigh_hash_table __rcu *nht; - struct pneigh_entry **phash_buckets; -}; - -enum { - NEIGH_ARP_TABLE = 0, - NEIGH_ND_TABLE = 1, - NEIGH_DN_TABLE = 2, - NEIGH_NR_TABLES, - NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */ + struct mutex phash_lock; + struct pneigh_entry __rcu **phash_buckets; }; static inline int neigh_parms_family(struct neigh_parms *p) @@ -242,17 +260,31 @@ static inline void *neighbour_priv(const struct neighbour *n) } /* flags for neigh_update() */ -#define NEIGH_UPDATE_F_OVERRIDE 0x00000001 -#define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 -#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004 -#define NEIGH_UPDATE_F_ISROUTER 0x40000000 -#define NEIGH_UPDATE_F_ADMIN 0x80000000 - - -static inline bool neigh_key_eq16(const struct neighbour *n, const void *pkey) -{ - return *(const u16 *)n->primary_key == *(const u16 *)pkey; -} +#define NEIGH_UPDATE_F_OVERRIDE BIT(0) +#define NEIGH_UPDATE_F_WEAK_OVERRIDE BIT(1) +#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER BIT(2) +#define NEIGH_UPDATE_F_USE BIT(3) +#define NEIGH_UPDATE_F_MANAGED BIT(4) +#define NEIGH_UPDATE_F_EXT_LEARNED BIT(5) +#define NEIGH_UPDATE_F_ISROUTER BIT(6) +#define NEIGH_UPDATE_F_ADMIN BIT(7) +#define NEIGH_UPDATE_F_EXT_VALIDATED BIT(8) + +/* In-kernel representation for NDA_FLAGS_EXT flags: */ +#define NTF_OLD_MASK 0xff +#define NTF_EXT_SHIFT 8 +#define NTF_EXT_MASK (NTF_EXT_MANAGED | NTF_EXT_EXT_VALIDATED) + +#define NTF_MANAGED (NTF_EXT_MANAGED << NTF_EXT_SHIFT) +#define NTF_EXT_VALIDATED (NTF_EXT_EXT_VALIDATED << NTF_EXT_SHIFT) + +extern const struct nla_policy nda_policy[]; + +#define neigh_for_each_in_bucket(pos, head) hlist_for_each_entry(pos, head, hash) +#define neigh_for_each_in_bucket_rcu(pos, head) \ + hlist_for_each_entry_rcu(pos, head, hash) +#define neigh_for_each_in_bucket_safe(pos, tmp, head) \ + hlist_for_each_entry_safe(pos, tmp, head, hash) static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey) { @@ -277,17 +309,14 @@ static inline struct neighbour *___neigh_lookup_noref( const void *pkey, struct net_device *dev) { - struct neigh_hash_table *nht = rcu_dereference_bh(tbl->nht); + struct neigh_hash_table *nht = rcu_dereference(tbl->nht); struct neighbour *n; u32 hash_val; hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); - for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); - n != NULL; - n = rcu_dereference_bh(n->next)) { + neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[hash_val]) if (n->dev == dev && key_eq(n, pkey)) return n; - } return NULL; } @@ -299,12 +328,21 @@ static inline struct neighbour *__neigh_lookup_noref(struct neigh_table *tbl, return ___neigh_lookup_noref(tbl, tbl->key_eq, tbl->hash, pkey, dev); } +static inline void neigh_confirm(struct neighbour *n) +{ + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); + } +} + void neigh_table_init(int index, struct neigh_table *tbl); int neigh_table_clear(int index, struct neigh_table *tbl); struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev); -struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, - const void *pkey); struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref); static inline struct neighbour *neigh_create(struct neigh_table *tbl, @@ -314,13 +352,15 @@ static inline struct neighbour *neigh_create(struct neigh_table *tbl, return __neigh_create(tbl, pkey, dev, true); } void neigh_destroy(struct neighbour *neigh); -int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); +int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, + const bool immediate_ok); int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); -bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl); +bool neigh_remove_one(struct neighbour *ndel); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev); +int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev); int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb); int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb); int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb); @@ -340,13 +380,20 @@ struct net *neigh_parms_net(const struct neigh_parms *parms) unsigned long neigh_rand_reach_time(unsigned long base); +static inline void neigh_set_reach_time(struct neigh_parms *p) +{ + unsigned long base = NEIGH_VAR(p, BASE_REACHABLE_TIME); + + WRITE_ONCE(p->reachable_time, neigh_rand_reach_time(base)); +} + void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb); struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net, - const void *key, struct net_device *dev, - int creat); -struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, struct net *net, - const void *key, struct net_device *dev); + const void *key, struct net_device *dev); +int pneigh_create(struct neigh_table *tbl, struct net *net, const void *key, + struct net_device *dev, u32 flags, u8 protocol, + bool permanent); int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); @@ -361,8 +408,6 @@ void neigh_for_each(struct neigh_table *tbl, void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)); int neigh_xmit(int fam, struct net_device *, const void *, struct sk_buff *); -void pneigh_for_each(struct neigh_table *tbl, - void (*cb)(struct pneigh_entry *)); struct neigh_seq_state { struct seq_net_private p; @@ -381,14 +426,13 @@ void *neigh_seq_start(struct seq_file *, loff_t *, struct neigh_table *, void *neigh_seq_next(struct seq_file *, void *, loff_t *); void neigh_seq_stop(struct seq_file *, void *); -int neigh_proc_dointvec(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); -int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, - void __user *buffer, +int neigh_proc_dointvec(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos); -int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); +int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos); int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, proc_handler *proc_handler); @@ -424,17 +468,24 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh) #define neigh_hold(n) refcount_inc(&(n)->refcnt) -static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +static __always_inline int neigh_event_send_probe(struct neighbour *neigh, + struct sk_buff *skb, + const bool immediate_ok) { unsigned long now = jiffies; - - if (neigh->used != now) - neigh->used = now; - if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) - return __neigh_event_send(neigh, skb); + + if (READ_ONCE(neigh->used) != now) + WRITE_ONCE(neigh->used, now); + if (!(READ_ONCE(neigh->nud_state) & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))) + return __neigh_event_send(neigh, skb, immediate_ok); return 0; } +static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +{ + return neigh_event_send_probe(neigh, skb, true); +} + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) { @@ -451,34 +502,58 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) { + unsigned int hh_alen = 0; unsigned int seq; unsigned int hh_len; do { seq = read_seqbegin(&hh->hh_lock); - hh_len = hh->hh_len; + hh_len = READ_ONCE(hh->hh_len); if (likely(hh_len <= HH_DATA_MOD)) { - /* this is inlined by gcc */ - memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); + hh_alen = HH_DATA_MOD; + + /* skb_push() would proceed silently if we have room for + * the unaligned size but not for the aligned size: + * check headroom explicitly. + */ + if (likely(skb_headroom(skb) >= HH_DATA_MOD)) { + /* this is inlined by gcc */ + memcpy(skb->data - HH_DATA_MOD, hh->hh_data, + HH_DATA_MOD); + } } else { - unsigned int hh_alen = HH_DATA_ALIGN(hh_len); + hh_alen = HH_DATA_ALIGN(hh_len); - memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); + if (likely(skb_headroom(skb) >= hh_alen)) { + memcpy(skb->data - hh_alen, hh->hh_data, + hh_alen); + } } } while (read_seqretry(&hh->hh_lock, seq)); - skb_push(skb, hh_len); + if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + __skb_push(skb, hh_len); return dev_queue_xmit(skb); } -static inline int neigh_output(struct neighbour *n, struct sk_buff *skb) +static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, + bool skip_cache) { const struct hh_cache *hh = &n->hh; - if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) + /* n->nud_state and hh->hh_len could be changed under us. + * neigh_hh_output() is taking care of the race later. + */ + if (!skip_cache && + (READ_ONCE(n->nud_state) & NUD_CONNECTED) && + READ_ONCE(hh->hh_len)) return neigh_hh_output(hh, skb); - else - return n->output(n, skb); + + return READ_ONCE(n->output)(n, skb); } static inline struct neighbour * @@ -525,5 +600,18 @@ static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, } while (read_seqretry(&n->ha_lock, seq)); } - +static inline void neigh_update_is_router(struct neighbour *neigh, u32 flags, + int *notify) +{ + u8 ndm_flags = 0; + + ndm_flags |= (flags & NEIGH_UPDATE_F_ISROUTER) ? NTF_ROUTER : 0; + if ((neigh->flags ^ ndm_flags) & NTF_ROUTER) { + if (ndm_flags & NTF_ROUTER) + neigh->flags |= NTF_ROUTER; + else + neigh->flags &= ~NTF_ROUTER; + *notify = 1; + } +} #endif |
