diff options
Diffstat (limited to 'include/net/net_namespace.h')
| -rw-r--r-- | include/net/net_namespace.h | 309 |
1 files changed, 253 insertions, 56 deletions
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 1c401bd4c2e0..cb664f6e3558 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Operations on the network namespace */ @@ -9,6 +10,7 @@ #include <linux/workqueue.h> #include <linux/list.h> #include <linux/sysctl.h> +#include <linux/uidgid.h> #include <net/flow.h> #include <net/netns/core.h> @@ -17,21 +19,30 @@ #include <net/netns/packet.h> #include <net/netns/ipv4.h> #include <net/netns/ipv6.h> +#include <net/netns/nexthop.h> #include <net/netns/ieee802154_6lowpan.h> #include <net/netns/sctp.h> -#include <net/netns/dccp.h> #include <net/netns/netfilter.h> -#include <net/netns/x_tables.h> #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include <net/netns/conntrack.h> #endif +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) +#include <net/netns/flow_table.h> +#endif #include <net/netns/nftables.h> #include <net/netns/xfrm.h> #include <net/netns/mpls.h> #include <net/netns/can.h> +#include <net/netns/xdp.h> +#include <net/netns/smc.h> +#include <net/netns/bpf.h> +#include <net/netns/mctp.h> +#include <net/net_trackers.h> #include <linux/ns_common.h> #include <linux/idr.h> #include <linux/skbuff.h> +#include <linux/notifier.h> +#include <linux/xarray.h> struct user_namespace; struct proc_dir_entry; @@ -39,35 +50,55 @@ struct net_device; struct sock; struct ctl_table_header; struct net_generic; -struct sock; +struct uevent_sock; struct netns_ipvs; +struct bpf_prog; #define NETDEV_HASHBITS 8 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) struct net { - refcount_t passive; /* To decided when the network + /* First cache line can be often dirtied. + * Do not place here read-mostly fields. + */ + refcount_t passive; /* To decide when the network * namespace should be freed. */ - atomic_t count; /* To decided when the network - * namespace should be shut down. - */ spinlock_t rules_mod_lock; - atomic64_t cookie_gen; + unsigned int dev_base_seq; /* protected by rtnl_mutex */ + u32 ifindex; + + spinlock_t nsid_lock; + atomic_t fnhe_genid; struct list_head list; /* list of network namespaces */ - struct list_head cleanup_list; /* namespaces on death row */ - struct list_head exit_list; /* Use only net_mutex */ + struct list_head exit_list; /* To linked to call pernet exit + * methods on dead net ( + * pernet_ops_rwsem read locked), + * or to unregister pernet ops + * (pernet_ops_rwsem write locked). + */ + struct llist_node defer_free_list; + struct llist_node cleanup_list; /* namespaces on death row */ + + struct list_head ptype_all; + struct list_head ptype_specific; +#ifdef CONFIG_KEYS + struct key_tag *key_domain; /* Key domain of operation tag */ +#endif struct user_namespace *user_ns; /* Owning user namespace */ struct ucounts *ucounts; - spinlock_t nsid_lock; struct idr netns_ids; struct ns_common ns; - + struct ref_tracker_dir refcnt_tracker; + struct ref_tracker_dir notrefcnt_tracker; /* tracker for objects not + * refcounted against netns + */ + struct list_head dev_base_head; struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; @@ -78,22 +109,30 @@ struct net { struct sock *rtnl; /* rtnetlink socket */ struct sock *genl_sock; - struct list_head dev_base_head; + struct uevent_sock *uevent_sock; /* uevent socket */ + struct hlist_head *dev_name_head; struct hlist_head *dev_index_head; - unsigned int dev_base_seq; /* protected by rtnl_mutex */ - int ifindex; - unsigned int dev_unreg_count; + struct xarray dev_by_index; + struct raw_notifier_head netdev_chain; + + /* Note that @hash_mix can be read millions times per second, + * it is critical that it is on a read_mostly cache line. + */ + u32 hash_mix; + + struct net_device *loopback_dev; /* The loopback */ /* core fib_rules */ struct list_head rules_ops; - - struct net_device *loopback_dev; /* The loopback */ struct netns_core core; struct netns_mib mib; struct netns_packet packet; +#if IS_ENABLED(CONFIG_UNIX) struct netns_unix unx; +#endif + struct netns_nexthop nexthop; struct netns_ipv4 ipv4; #if IS_ENABLED(CONFIG_IPV6) struct netns_ipv6 ipv6; @@ -104,28 +143,16 @@ struct net { #if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE) struct netns_sctp sctp; #endif -#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) - struct netns_dccp dccp; -#endif #ifdef CONFIG_NETFILTER struct netns_nf nf; - struct netns_xt xt; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif #if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE) struct netns_nftables nft; #endif -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) - struct netns_nf_frag nf_frag; -#endif - struct sock *nfnl; - struct sock *nfnl_stash; -#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT) - struct list_head nfnl_acct_list; -#endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) - struct list_head nfct_timeout_list; +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) + struct netns_ft ft; #endif #endif #ifdef CONFIG_WEXT_CORE @@ -133,10 +160,16 @@ struct net { #endif struct net_generic __rcu *gen; + /* Used to store attached BPF programs */ + struct netns_bpf bpf; + /* Note : following structs are cache line aligned */ #ifdef CONFIG_XFRM struct netns_xfrm xfrm; #endif + + u64 net_cookie; /* written once */ + #if IS_ENABLED(CONFIG_IP_VS) struct netns_ipvs *ipvs; #endif @@ -146,8 +179,23 @@ struct net { #if IS_ENABLED(CONFIG_CAN) struct netns_can can; #endif +#ifdef CONFIG_XDP_SOCKETS + struct netns_xdp xdp; +#endif +#if IS_ENABLED(CONFIG_MCTP) + struct netns_mctp mctp; +#endif +#if IS_ENABLED(CONFIG_CRYPTO_USER) + struct sock *crypto_nlsk; +#endif struct sock *diag_nlsk; - atomic_t fnhe_genid; +#if IS_ENABLED(CONFIG_SMC) + struct netns_smc smc; +#endif +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL + /* Move to a better place when the config guard is removed. */ + struct mutex rtnl_mutex; +#endif } __randomize_layout; #include <linux/seq_file_net.h> @@ -156,14 +204,21 @@ struct net { extern struct net init_net; #ifdef CONFIG_NET_NS -struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, +struct net *copy_net_ns(u64 flags, struct user_namespace *user_ns, struct net *old_net); +void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid); + void net_ns_barrier(void); + +struct ns_common *get_net_ns(struct ns_common *ns); +struct net *get_net_ns_by_fd(int fd); +extern struct task_struct *cleanup_net_task; + #else /* CONFIG_NET_NS */ #include <linux/sched.h> #include <linux/nsproxy.h> -static inline struct net *copy_net_ns(unsigned long flags, +static inline struct net *copy_net_ns(u64 flags, struct user_namespace *user_ns, struct net *old_net) { if (flags & CLONE_NEWNET) @@ -171,14 +226,30 @@ static inline struct net *copy_net_ns(unsigned long flags, return old_net; } +static inline void net_ns_get_ownership(const struct net *net, + kuid_t *uid, kgid_t *gid) +{ + *uid = GLOBAL_ROOT_UID; + *gid = GLOBAL_ROOT_GID; +} + static inline void net_ns_barrier(void) {} + +static inline struct ns_common *get_net_ns(struct ns_common *ns) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct net *get_net_ns_by_fd(int fd) +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_NET_NS */ extern struct list_head net_namespace_list; struct net *get_net_ns_by_pid(pid_t pid); -struct net *get_net_ns_by_fd(int fd); #ifdef CONFIG_SYSCTL void ipx_register_sysctl(void); @@ -191,9 +262,15 @@ void ipx_unregister_sysctl(void); #ifdef CONFIG_NET_NS void __put_net(struct net *net); +static inline struct net *to_net_ns(struct ns_common *ns) +{ + return container_of(ns, struct net, ns); +} + +/* Try using get_net_track() instead */ static inline struct net *get_net(struct net *net) { - atomic_inc(&net->count); + ns_ref_inc(net); return net; } @@ -204,14 +281,15 @@ static inline struct net *maybe_get_net(struct net *net) * exists. If the reference count is zero this * function fails and returns NULL. */ - if (!atomic_inc_not_zero(&net->count)) + if (!ns_ref_get(net)) net = NULL; return net; } +/* Try using put_net_track() instead */ static inline void put_net(struct net *net) { - if (atomic_dec_and_test(&net->count)) + if (ns_ref_put(net)) __put_net(net); } @@ -221,7 +299,13 @@ int net_eq(const struct net *net1, const struct net *net2) return net1 == net2; } +static inline int check_net(const struct net *net) +{ + return ns_ref_read(net) != 0; +} + void net_drop_ns(void *); +void net_passive_dec(struct net *net); #else @@ -245,35 +329,108 @@ int net_eq(const struct net *net1, const struct net *net2) return 1; } +static inline int check_net(const struct net *net) +{ + return 1; +} + #define net_drop_ns NULL + +static inline void net_passive_dec(struct net *net) +{ + refcount_dec(&net->passive); +} +#endif + +static inline void net_passive_inc(struct net *net) +{ + refcount_inc(&net->passive); +} + +/* Returns true if the netns initialization is completed successfully */ +static inline bool net_initialized(const struct net *net) +{ + return READ_ONCE(net->list.next); +} + +static inline void __netns_tracker_alloc(struct net *net, + netns_tracker *tracker, + bool refcounted, + gfp_t gfp) +{ +#ifdef CONFIG_NET_NS_REFCNT_TRACKER + ref_tracker_alloc(refcounted ? &net->refcnt_tracker : + &net->notrefcnt_tracker, + tracker, gfp); +#endif +} + +static inline void netns_tracker_alloc(struct net *net, netns_tracker *tracker, + gfp_t gfp) +{ + __netns_tracker_alloc(net, tracker, true, gfp); +} + +static inline void __netns_tracker_free(struct net *net, + netns_tracker *tracker, + bool refcounted) +{ +#ifdef CONFIG_NET_NS_REFCNT_TRACKER + ref_tracker_free(refcounted ? &net->refcnt_tracker : + &net->notrefcnt_tracker, tracker); #endif +} +static inline struct net *get_net_track(struct net *net, + netns_tracker *tracker, gfp_t gfp) +{ + get_net(net); + netns_tracker_alloc(net, tracker, gfp); + return net; +} + +static inline void put_net_track(struct net *net, netns_tracker *tracker) +{ + __netns_tracker_free(net, tracker, true); + put_net(net); +} typedef struct { #ifdef CONFIG_NET_NS - struct net *net; + struct net __rcu *net; #endif } possible_net_t; static inline void write_pnet(possible_net_t *pnet, struct net *net) { #ifdef CONFIG_NET_NS - pnet->net = net; + rcu_assign_pointer(pnet->net, net); #endif } static inline struct net *read_pnet(const possible_net_t *pnet) { #ifdef CONFIG_NET_NS - return pnet->net; + return rcu_dereference_protected(pnet->net, true); +#else + return &init_net; +#endif +} + +static inline struct net *read_pnet_rcu(const possible_net_t *pnet) +{ +#ifdef CONFIG_NET_NS + return rcu_dereference(pnet->net); #else return &init_net; #endif } +/* Protected by net_rwsem */ #define for_each_net(VAR) \ list_for_each_entry(VAR, &net_namespace_list, list) - +#define for_each_net_continue_reverse(VAR) \ + list_for_each_entry_continue_reverse(VAR, &net_namespace_list, list) #define for_each_net_rcu(VAR) \ list_for_each_entry_rcu(VAR, &net_namespace_list, list) @@ -289,18 +446,44 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #define __net_initconst __initconst #endif -int peernet2id_alloc(struct net *net, struct net *peer); -int peernet2id(struct net *net, struct net *peer); -bool peernet_has_id(struct net *net, struct net *peer); -struct net *get_net_ns_by_id(struct net *net, int id); +int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp); +int peernet2id(const struct net *net, struct net *peer); +bool peernet_has_id(const struct net *net, struct net *peer); +struct net *get_net_ns_by_id(const struct net *net, int id); struct pernet_operations { struct list_head list; + /* + * Below methods are called without any exclusive locks. + * More than one net may be constructed and destructed + * in parallel on several cpus. Every pernet_operations + * have to keep in mind all other pernet_operations and + * to introduce a locking, if they share common resources. + * + * The only time they are called with exclusive lock is + * from register_pernet_subsys(), unregister_pernet_subsys() + * register_pernet_device() and unregister_pernet_device(). + * + * Exit methods using blocking RCU primitives, such as + * synchronize_rcu(), should be implemented via exit_batch. + * Then, destruction of a group of net requires single + * synchronize_rcu() related to these pernet_operations, + * instead of separate synchronize_rcu() for every net. + * Please, avoid synchronize_rcu() at all, where it's possible. + * + * Note that a combination of pre_exit() and exit() can + * be used, since a synchronize_rcu() is guaranteed between + * the calls. + */ int (*init)(struct net *net); + void (*pre_exit)(struct net *net); void (*exit)(struct net *net); void (*exit_batch)(struct list_head *net_exit_list); - unsigned int *id; - size_t size; + /* Following method is called with RTNL held. */ + void (*exit_rtnl)(struct net *net, + struct list_head *dev_kill_list); + unsigned int * const id; + const size_t size; }; /* @@ -328,17 +511,18 @@ int register_pernet_device(struct pernet_operations *); void unregister_pernet_device(struct pernet_operations *); struct ctl_table; -struct ctl_table_header; +#define register_net_sysctl(net, path, table) \ + register_net_sysctl_sz(net, path, table, ARRAY_SIZE(table)) #ifdef CONFIG_SYSCTL int net_sysctl_init(void); -struct ctl_table_header *register_net_sysctl(struct net *net, const char *path, - struct ctl_table *table); +struct ctl_table_header *register_net_sysctl_sz(struct net *net, const char *path, + struct ctl_table *table, size_t table_size); void unregister_net_sysctl_table(struct ctl_table_header *header); #else static inline int net_sysctl_init(void) { return 0; } -static inline struct ctl_table_header *register_net_sysctl(struct net *net, - const char *path, struct ctl_table *table) +static inline struct ctl_table_header *register_net_sysctl_sz(struct net *net, + const char *path, struct ctl_table *table, size_t table_size) { return NULL; } @@ -347,11 +531,18 @@ static inline void unregister_net_sysctl_table(struct ctl_table_header *header) } #endif -static inline int rt_genid_ipv4(struct net *net) +static inline int rt_genid_ipv4(const struct net *net) { return atomic_read(&net->ipv4.rt_genid); } +#if IS_ENABLED(CONFIG_IPV6) +static inline int rt_genid_ipv6(const struct net *net) +{ + return atomic_read(&net->ipv6.fib6_sernum); +} +#endif + static inline void rt_genid_bump_ipv4(struct net *net) { atomic_inc(&net->ipv4.rt_genid); @@ -379,7 +570,7 @@ static inline void rt_genid_bump_all(struct net *net) rt_genid_bump_ipv6(net); } -static inline int fnhe_genid(struct net *net) +static inline int fnhe_genid(const struct net *net) { return atomic_read(&net->fnhe_genid); } @@ -389,4 +580,10 @@ static inline void fnhe_genid_bump(struct net *net) atomic_inc(&net->fnhe_genid); } +#ifdef CONFIG_NET +void net_ns_init(void); +#else +static inline void net_ns_init(void) {} +#endif + #endif /* __NET_NET_NAMESPACE_H */ |
