diff options
Diffstat (limited to 'include/net/net_namespace.h')
| -rw-r--r-- | include/net/net_namespace.h | 415 |
1 files changed, 324 insertions, 91 deletions
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 84e37b1ca9e1..cb664f6e3558 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Operations on the network namespace */ @@ -5,24 +6,43 @@ #define __NET_NET_NAMESPACE_H #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/workqueue.h> #include <linux/list.h> #include <linux/sysctl.h> +#include <linux/uidgid.h> +#include <net/flow.h> #include <net/netns/core.h> #include <net/netns/mib.h> #include <net/netns/unix.h> #include <net/netns/packet.h> #include <net/netns/ipv4.h> #include <net/netns/ipv6.h> +#include <net/netns/nexthop.h> +#include <net/netns/ieee802154_6lowpan.h> #include <net/netns/sctp.h> -#include <net/netns/dccp.h> #include <net/netns/netfilter.h> -#include <net/netns/x_tables.h> #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include <net/netns/conntrack.h> #endif +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) +#include <net/netns/flow_table.h> +#endif +#include <net/netns/nftables.h> #include <net/netns/xfrm.h> +#include <net/netns/mpls.h> +#include <net/netns/can.h> +#include <net/netns/xdp.h> +#include <net/netns/smc.h> +#include <net/netns/bpf.h> +#include <net/netns/mctp.h> +#include <net/net_trackers.h> +#include <linux/ns_common.h> +#include <linux/idr.h> +#include <linux/skbuff.h> +#include <linux/notifier.h> +#include <linux/xarray.h> struct user_namespace; struct proc_dir_entry; @@ -30,35 +50,55 @@ struct net_device; struct sock; struct ctl_table_header; struct net_generic; -struct sock; +struct uevent_sock; struct netns_ipvs; +struct bpf_prog; #define NETDEV_HASHBITS 8 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) struct net { - atomic_t passive; /* To decided when the network + /* First cache line can be often dirtied. + * Do not place here read-mostly fields. + */ + refcount_t passive; /* To decide when the network * namespace should be freed. */ - atomic_t count; /* To decided when the network - * namespace should be shut down. - */ -#ifdef NETNS_REFCNT_DEBUG - atomic_t use_count; /* To track references we - * destroy on demand - */ -#endif spinlock_t rules_mod_lock; - struct list_head list; /* list of network namespaces */ - struct list_head cleanup_list; /* namespaces on death row */ - struct list_head exit_list; /* Use only net_mutex */ + unsigned int dev_base_seq; /* protected by rtnl_mutex */ + u32 ifindex; - struct user_namespace *user_ns; /* Owning user namespace */ + spinlock_t nsid_lock; + atomic_t fnhe_genid; + + struct list_head list; /* list of network namespaces */ + struct list_head exit_list; /* To linked to call pernet exit + * methods on dead net ( + * pernet_ops_rwsem read locked), + * or to unregister pernet ops + * (pernet_ops_rwsem write locked). + */ + struct llist_node defer_free_list; + struct llist_node cleanup_list; /* namespaces on death row */ - unsigned int proc_inum; + struct list_head ptype_all; + struct list_head ptype_specific; +#ifdef CONFIG_KEYS + struct key_tag *key_domain; /* Key domain of operation tag */ +#endif + struct user_namespace *user_ns; /* Owning user namespace */ + struct ucounts *ucounts; + struct idr netns_ids; + + struct ns_common ns; + struct ref_tracker_dir refcnt_tracker; + struct ref_tracker_dir notrefcnt_tracker; /* tracker for objects not + * refcounted against netns + */ + struct list_head dev_base_head; struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; @@ -69,67 +109,94 @@ struct net { struct sock *rtnl; /* rtnetlink socket */ struct sock *genl_sock; - struct list_head dev_base_head; + struct uevent_sock *uevent_sock; /* uevent socket */ + struct hlist_head *dev_name_head; struct hlist_head *dev_index_head; - unsigned int dev_base_seq; /* protected by rtnl_mutex */ - int ifindex; + struct xarray dev_by_index; + struct raw_notifier_head netdev_chain; + + /* Note that @hash_mix can be read millions times per second, + * it is critical that it is on a read_mostly cache line. + */ + u32 hash_mix; + + struct net_device *loopback_dev; /* The loopback */ /* core fib_rules */ struct list_head rules_ops; - - struct net_device *loopback_dev; /* The loopback */ struct netns_core core; struct netns_mib mib; struct netns_packet packet; +#if IS_ENABLED(CONFIG_UNIX) struct netns_unix unx; +#endif + struct netns_nexthop nexthop; struct netns_ipv4 ipv4; #if IS_ENABLED(CONFIG_IPV6) struct netns_ipv6 ipv6; #endif +#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) + struct netns_ieee802154_lowpan ieee802154_lowpan; +#endif #if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE) struct netns_sctp sctp; #endif -#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) - struct netns_dccp dccp; -#endif #ifdef CONFIG_NETFILTER struct netns_nf nf; - struct netns_xt xt; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) - struct netns_nf_frag nf_frag; +#if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE) + struct netns_nftables nft; +#endif +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) + struct netns_ft ft; #endif - struct sock *nfnl; - struct sock *nfnl_stash; #endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; #endif struct net_generic __rcu *gen; + /* Used to store attached BPF programs */ + struct netns_bpf bpf; + /* Note : following structs are cache line aligned */ #ifdef CONFIG_XFRM struct netns_xfrm xfrm; #endif + + u64 net_cookie; /* written once */ + #if IS_ENABLED(CONFIG_IP_VS) struct netns_ipvs *ipvs; #endif +#if IS_ENABLED(CONFIG_MPLS) + struct netns_mpls mpls; +#endif +#if IS_ENABLED(CONFIG_CAN) + struct netns_can can; +#endif +#ifdef CONFIG_XDP_SOCKETS + struct netns_xdp xdp; +#endif +#if IS_ENABLED(CONFIG_MCTP) + struct netns_mctp mctp; +#endif +#if IS_ENABLED(CONFIG_CRYPTO_USER) + struct sock *crypto_nlsk; +#endif struct sock *diag_nlsk; - atomic_t rt_genid; - atomic_t fnhe_genid; -}; - -/* - * ifindex generation is per-net namespace, and loopback is - * always the 1st device in ns (see net_dev_init), thus any - * loopback device should get ifindex 1 - */ - -#define LOOPBACK_IFINDEX 1 +#if IS_ENABLED(CONFIG_SMC) + struct netns_smc smc; +#endif +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL + /* Move to a better place when the config guard is removed. */ + struct mutex rtnl_mutex; +#endif +} __randomize_layout; #include <linux/seq_file_net.h> @@ -137,33 +204,73 @@ struct net { extern struct net init_net; #ifdef CONFIG_NET_NS -extern struct net *copy_net_ns(unsigned long flags, - struct user_namespace *user_ns, struct net *old_net); +struct net *copy_net_ns(u64 flags, struct user_namespace *user_ns, + struct net *old_net); + +void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid); + +void net_ns_barrier(void); + +struct ns_common *get_net_ns(struct ns_common *ns); +struct net *get_net_ns_by_fd(int fd); +extern struct task_struct *cleanup_net_task; #else /* CONFIG_NET_NS */ #include <linux/sched.h> #include <linux/nsproxy.h> -static inline struct net *copy_net_ns(unsigned long flags, +static inline struct net *copy_net_ns(u64 flags, struct user_namespace *user_ns, struct net *old_net) { if (flags & CLONE_NEWNET) return ERR_PTR(-EINVAL); return old_net; } + +static inline void net_ns_get_ownership(const struct net *net, + kuid_t *uid, kgid_t *gid) +{ + *uid = GLOBAL_ROOT_UID; + *gid = GLOBAL_ROOT_GID; +} + +static inline void net_ns_barrier(void) {} + +static inline struct ns_common *get_net_ns(struct ns_common *ns) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct net *get_net_ns_by_fd(int fd) +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_NET_NS */ extern struct list_head net_namespace_list; -extern struct net *get_net_ns_by_pid(pid_t pid); -extern struct net *get_net_ns_by_fd(int pid); +struct net *get_net_ns_by_pid(pid_t pid); + +#ifdef CONFIG_SYSCTL +void ipx_register_sysctl(void); +void ipx_unregister_sysctl(void); +#else +#define ipx_register_sysctl() +#define ipx_unregister_sysctl() +#endif #ifdef CONFIG_NET_NS -extern void __put_net(struct net *net); +void __put_net(struct net *net); + +static inline struct net *to_net_ns(struct ns_common *ns) +{ + return container_of(ns, struct net, ns); +} +/* Try using get_net_track() instead */ static inline struct net *get_net(struct net *net) { - atomic_inc(&net->count); + ns_ref_inc(net); return net; } @@ -174,14 +281,15 @@ static inline struct net *maybe_get_net(struct net *net) * exists. If the reference count is zero this * function fails and returns NULL. */ - if (!atomic_inc_not_zero(&net->count)) + if (!ns_ref_get(net)) net = NULL; return net; } +/* Try using put_net_track() instead */ static inline void put_net(struct net *net) { - if (atomic_dec_and_test(&net->count)) + if (ns_ref_put(net)) __put_net(net); } @@ -191,7 +299,13 @@ int net_eq(const struct net *net1, const struct net *net2) return net1 == net2; } -extern void net_drop_ns(void *); +static inline int check_net(const struct net *net) +{ + return ns_ref_read(net) != 0; +} + +void net_drop_ns(void *); +void net_passive_dec(struct net *net); #else @@ -215,56 +329,108 @@ int net_eq(const struct net *net1, const struct net *net2) return 1; } +static inline int check_net(const struct net *net) +{ + return 1; +} + #define net_drop_ns NULL + +static inline void net_passive_dec(struct net *net) +{ + refcount_dec(&net->passive); +} #endif +static inline void net_passive_inc(struct net *net) +{ + refcount_inc(&net->passive); +} -#ifdef NETNS_REFCNT_DEBUG -static inline struct net *hold_net(struct net *net) +/* Returns true if the netns initialization is completed successfully */ +static inline bool net_initialized(const struct net *net) { - if (net) - atomic_inc(&net->use_count); - return net; + return READ_ONCE(net->list.next); } -static inline void release_net(struct net *net) +static inline void __netns_tracker_alloc(struct net *net, + netns_tracker *tracker, + bool refcounted, + gfp_t gfp) { - if (net) - atomic_dec(&net->use_count); +#ifdef CONFIG_NET_NS_REFCNT_TRACKER + ref_tracker_alloc(refcounted ? &net->refcnt_tracker : + &net->notrefcnt_tracker, + tracker, gfp); +#endif } -#else -static inline struct net *hold_net(struct net *net) + +static inline void netns_tracker_alloc(struct net *net, netns_tracker *tracker, + gfp_t gfp) { + __netns_tracker_alloc(net, tracker, true, gfp); +} + +static inline void __netns_tracker_free(struct net *net, + netns_tracker *tracker, + bool refcounted) +{ +#ifdef CONFIG_NET_NS_REFCNT_TRACKER + ref_tracker_free(refcounted ? &net->refcnt_tracker : + &net->notrefcnt_tracker, tracker); +#endif +} + +static inline struct net *get_net_track(struct net *net, + netns_tracker *tracker, gfp_t gfp) +{ + get_net(net); + netns_tracker_alloc(net, tracker, gfp); return net; } -static inline void release_net(struct net *net) +static inline void put_net_track(struct net *net, netns_tracker *tracker) { + __netns_tracker_free(net, tracker, true); + put_net(net); } -#endif +typedef struct { #ifdef CONFIG_NET_NS + struct net __rcu *net; +#endif +} possible_net_t; -static inline void write_pnet(struct net **pnet, struct net *net) +static inline void write_pnet(possible_net_t *pnet, struct net *net) { - *pnet = net; +#ifdef CONFIG_NET_NS + rcu_assign_pointer(pnet->net, net); +#endif } -static inline struct net *read_pnet(struct net * const *pnet) +static inline struct net *read_pnet(const possible_net_t *pnet) { - return *pnet; +#ifdef CONFIG_NET_NS + return rcu_dereference_protected(pnet->net, true); +#else + return &init_net; +#endif } +static inline struct net *read_pnet_rcu(const possible_net_t *pnet) +{ +#ifdef CONFIG_NET_NS + return rcu_dereference(pnet->net); #else - -#define write_pnet(pnet, net) do { (void)(net);} while (0) -#define read_pnet(pnet) (&init_net) - + return &init_net; #endif +} +/* Protected by net_rwsem */ #define for_each_net(VAR) \ list_for_each_entry(VAR, &net_namespace_list, list) - +#define for_each_net_continue_reverse(VAR) \ + list_for_each_entry_continue_reverse(VAR, &net_namespace_list, list) #define for_each_net_rcu(VAR) \ list_for_each_entry_rcu(VAR, &net_namespace_list, list) @@ -275,18 +441,49 @@ static inline struct net *read_pnet(struct net * const *pnet) #define __net_initconst #else #define __net_init __init -#define __net_exit __exit_refok +#define __net_exit __ref #define __net_initdata __initdata #define __net_initconst __initconst #endif +int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp); +int peernet2id(const struct net *net, struct net *peer); +bool peernet_has_id(const struct net *net, struct net *peer); +struct net *get_net_ns_by_id(const struct net *net, int id); + struct pernet_operations { struct list_head list; + /* + * Below methods are called without any exclusive locks. + * More than one net may be constructed and destructed + * in parallel on several cpus. Every pernet_operations + * have to keep in mind all other pernet_operations and + * to introduce a locking, if they share common resources. + * + * The only time they are called with exclusive lock is + * from register_pernet_subsys(), unregister_pernet_subsys() + * register_pernet_device() and unregister_pernet_device(). + * + * Exit methods using blocking RCU primitives, such as + * synchronize_rcu(), should be implemented via exit_batch. + * Then, destruction of a group of net requires single + * synchronize_rcu() related to these pernet_operations, + * instead of separate synchronize_rcu() for every net. + * Please, avoid synchronize_rcu() at all, where it's possible. + * + * Note that a combination of pre_exit() and exit() can + * be used, since a synchronize_rcu() is guaranteed between + * the calls. + */ int (*init)(struct net *net); + void (*pre_exit)(struct net *net); void (*exit)(struct net *net); void (*exit_batch)(struct list_head *net_exit_list); - int *id; - size_t size; + /* Following method is called with RTNL held. */ + void (*exit_rtnl)(struct net *net, + struct list_head *dev_kill_list); + unsigned int * const id; + const size_t size; }; /* @@ -308,23 +505,24 @@ struct pernet_operations { * device which caused kernel oops, and panics during network * namespace cleanup. So please don't get this wrong. */ -extern int register_pernet_subsys(struct pernet_operations *); -extern void unregister_pernet_subsys(struct pernet_operations *); -extern int register_pernet_device(struct pernet_operations *); -extern void unregister_pernet_device(struct pernet_operations *); +int register_pernet_subsys(struct pernet_operations *); +void unregister_pernet_subsys(struct pernet_operations *); +int register_pernet_device(struct pernet_operations *); +void unregister_pernet_device(struct pernet_operations *); struct ctl_table; -struct ctl_table_header; +#define register_net_sysctl(net, path, table) \ + register_net_sysctl_sz(net, path, table, ARRAY_SIZE(table)) #ifdef CONFIG_SYSCTL -extern int net_sysctl_init(void); -extern struct ctl_table_header *register_net_sysctl(struct net *net, - const char *path, struct ctl_table *table); -extern void unregister_net_sysctl_table(struct ctl_table_header *header); +int net_sysctl_init(void); +struct ctl_table_header *register_net_sysctl_sz(struct net *net, const char *path, + struct ctl_table *table, size_t table_size); +void unregister_net_sysctl_table(struct ctl_table_header *header); #else static inline int net_sysctl_init(void) { return 0; } -static inline struct ctl_table_header *register_net_sysctl(struct net *net, - const char *path, struct ctl_table *table) +static inline struct ctl_table_header *register_net_sysctl_sz(struct net *net, + const char *path, struct ctl_table *table, size_t table_size) { return NULL; } @@ -333,17 +531,46 @@ static inline void unregister_net_sysctl_table(struct ctl_table_header *header) } #endif -static inline int rt_genid(struct net *net) +static inline int rt_genid_ipv4(const struct net *net) +{ + return atomic_read(&net->ipv4.rt_genid); +} + +#if IS_ENABLED(CONFIG_IPV6) +static inline int rt_genid_ipv6(const struct net *net) +{ + return atomic_read(&net->ipv6.fib6_sernum); +} +#endif + +static inline void rt_genid_bump_ipv4(struct net *net) { - return atomic_read(&net->rt_genid); + atomic_inc(&net->ipv4.rt_genid); } -static inline void rt_genid_bump(struct net *net) +extern void (*__fib6_flush_trees)(struct net *net); +static inline void rt_genid_bump_ipv6(struct net *net) { - atomic_inc(&net->rt_genid); + if (__fib6_flush_trees) + __fib6_flush_trees(net); } -static inline int fnhe_genid(struct net *net) +#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) +static inline struct netns_ieee802154_lowpan * +net_ieee802154_lowpan(struct net *net) +{ + return &net->ieee802154_lowpan; +} +#endif + +/* For callers who don't really care about whether it's IPv4 or IPv6 */ +static inline void rt_genid_bump_all(struct net *net) +{ + rt_genid_bump_ipv4(net); + rt_genid_bump_ipv6(net); +} + +static inline int fnhe_genid(const struct net *net) { return atomic_read(&net->fnhe_genid); } @@ -353,4 +580,10 @@ static inline void fnhe_genid_bump(struct net *net) atomic_inc(&net->fnhe_genid); } +#ifdef CONFIG_NET +void net_ns_init(void); +#else +static inline void net_ns_init(void) {} +#endif + #endif /* __NET_NET_NAMESPACE_H */ |
