summaryrefslogtreecommitdiff
path: root/include/net/net_namespace.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/net_namespace.h')
-rw-r--r--include/net/net_namespace.h415
1 files changed, 324 insertions, 91 deletions
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 84e37b1ca9e1..cb664f6e3558 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Operations on the network namespace
*/
@@ -5,24 +6,43 @@
#define __NET_NET_NAMESPACE_H
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/workqueue.h>
#include <linux/list.h>
#include <linux/sysctl.h>
+#include <linux/uidgid.h>
+#include <net/flow.h>
#include <net/netns/core.h>
#include <net/netns/mib.h>
#include <net/netns/unix.h>
#include <net/netns/packet.h>
#include <net/netns/ipv4.h>
#include <net/netns/ipv6.h>
+#include <net/netns/nexthop.h>
+#include <net/netns/ieee802154_6lowpan.h>
#include <net/netns/sctp.h>
-#include <net/netns/dccp.h>
#include <net/netns/netfilter.h>
-#include <net/netns/x_tables.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netns/conntrack.h>
#endif
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+#include <net/netns/flow_table.h>
+#endif
+#include <net/netns/nftables.h>
#include <net/netns/xfrm.h>
+#include <net/netns/mpls.h>
+#include <net/netns/can.h>
+#include <net/netns/xdp.h>
+#include <net/netns/smc.h>
+#include <net/netns/bpf.h>
+#include <net/netns/mctp.h>
+#include <net/net_trackers.h>
+#include <linux/ns_common.h>
+#include <linux/idr.h>
+#include <linux/skbuff.h>
+#include <linux/notifier.h>
+#include <linux/xarray.h>
struct user_namespace;
struct proc_dir_entry;
@@ -30,35 +50,55 @@ struct net_device;
struct sock;
struct ctl_table_header;
struct net_generic;
-struct sock;
+struct uevent_sock;
struct netns_ipvs;
+struct bpf_prog;
#define NETDEV_HASHBITS 8
#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
struct net {
- atomic_t passive; /* To decided when the network
+ /* First cache line can be often dirtied.
+ * Do not place here read-mostly fields.
+ */
+ refcount_t passive; /* To decide when the network
* namespace should be freed.
*/
- atomic_t count; /* To decided when the network
- * namespace should be shut down.
- */
-#ifdef NETNS_REFCNT_DEBUG
- atomic_t use_count; /* To track references we
- * destroy on demand
- */
-#endif
spinlock_t rules_mod_lock;
- struct list_head list; /* list of network namespaces */
- struct list_head cleanup_list; /* namespaces on death row */
- struct list_head exit_list; /* Use only net_mutex */
+ unsigned int dev_base_seq; /* protected by rtnl_mutex */
+ u32 ifindex;
- struct user_namespace *user_ns; /* Owning user namespace */
+ spinlock_t nsid_lock;
+ atomic_t fnhe_genid;
+
+ struct list_head list; /* list of network namespaces */
+ struct list_head exit_list; /* To linked to call pernet exit
+ * methods on dead net (
+ * pernet_ops_rwsem read locked),
+ * or to unregister pernet ops
+ * (pernet_ops_rwsem write locked).
+ */
+ struct llist_node defer_free_list;
+ struct llist_node cleanup_list; /* namespaces on death row */
- unsigned int proc_inum;
+ struct list_head ptype_all;
+ struct list_head ptype_specific;
+#ifdef CONFIG_KEYS
+ struct key_tag *key_domain; /* Key domain of operation tag */
+#endif
+ struct user_namespace *user_ns; /* Owning user namespace */
+ struct ucounts *ucounts;
+ struct idr netns_ids;
+
+ struct ns_common ns;
+ struct ref_tracker_dir refcnt_tracker;
+ struct ref_tracker_dir notrefcnt_tracker; /* tracker for objects not
+ * refcounted against netns
+ */
+ struct list_head dev_base_head;
struct proc_dir_entry *proc_net;
struct proc_dir_entry *proc_net_stat;
@@ -69,67 +109,94 @@ struct net {
struct sock *rtnl; /* rtnetlink socket */
struct sock *genl_sock;
- struct list_head dev_base_head;
+ struct uevent_sock *uevent_sock; /* uevent socket */
+
struct hlist_head *dev_name_head;
struct hlist_head *dev_index_head;
- unsigned int dev_base_seq; /* protected by rtnl_mutex */
- int ifindex;
+ struct xarray dev_by_index;
+ struct raw_notifier_head netdev_chain;
+
+ /* Note that @hash_mix can be read millions times per second,
+ * it is critical that it is on a read_mostly cache line.
+ */
+ u32 hash_mix;
+
+ struct net_device *loopback_dev; /* The loopback */
/* core fib_rules */
struct list_head rules_ops;
-
- struct net_device *loopback_dev; /* The loopback */
struct netns_core core;
struct netns_mib mib;
struct netns_packet packet;
+#if IS_ENABLED(CONFIG_UNIX)
struct netns_unix unx;
+#endif
+ struct netns_nexthop nexthop;
struct netns_ipv4 ipv4;
#if IS_ENABLED(CONFIG_IPV6)
struct netns_ipv6 ipv6;
#endif
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+ struct netns_ieee802154_lowpan ieee802154_lowpan;
+#endif
#if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE)
struct netns_sctp sctp;
#endif
-#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
- struct netns_dccp dccp;
-#endif
#ifdef CONFIG_NETFILTER
struct netns_nf nf;
- struct netns_xt xt;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct netns_ct ct;
#endif
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- struct netns_nf_frag nf_frag;
+#if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE)
+ struct netns_nftables nft;
+#endif
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ struct netns_ft ft;
#endif
- struct sock *nfnl;
- struct sock *nfnl_stash;
#endif
#ifdef CONFIG_WEXT_CORE
struct sk_buff_head wext_nlevents;
#endif
struct net_generic __rcu *gen;
+ /* Used to store attached BPF programs */
+ struct netns_bpf bpf;
+
/* Note : following structs are cache line aligned */
#ifdef CONFIG_XFRM
struct netns_xfrm xfrm;
#endif
+
+ u64 net_cookie; /* written once */
+
#if IS_ENABLED(CONFIG_IP_VS)
struct netns_ipvs *ipvs;
#endif
+#if IS_ENABLED(CONFIG_MPLS)
+ struct netns_mpls mpls;
+#endif
+#if IS_ENABLED(CONFIG_CAN)
+ struct netns_can can;
+#endif
+#ifdef CONFIG_XDP_SOCKETS
+ struct netns_xdp xdp;
+#endif
+#if IS_ENABLED(CONFIG_MCTP)
+ struct netns_mctp mctp;
+#endif
+#if IS_ENABLED(CONFIG_CRYPTO_USER)
+ struct sock *crypto_nlsk;
+#endif
struct sock *diag_nlsk;
- atomic_t rt_genid;
- atomic_t fnhe_genid;
-};
-
-/*
- * ifindex generation is per-net namespace, and loopback is
- * always the 1st device in ns (see net_dev_init), thus any
- * loopback device should get ifindex 1
- */
-
-#define LOOPBACK_IFINDEX 1
+#if IS_ENABLED(CONFIG_SMC)
+ struct netns_smc smc;
+#endif
+#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
+ /* Move to a better place when the config guard is removed. */
+ struct mutex rtnl_mutex;
+#endif
+} __randomize_layout;
#include <linux/seq_file_net.h>
@@ -137,33 +204,73 @@ struct net {
extern struct net init_net;
#ifdef CONFIG_NET_NS
-extern struct net *copy_net_ns(unsigned long flags,
- struct user_namespace *user_ns, struct net *old_net);
+struct net *copy_net_ns(u64 flags, struct user_namespace *user_ns,
+ struct net *old_net);
+
+void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
+
+void net_ns_barrier(void);
+
+struct ns_common *get_net_ns(struct ns_common *ns);
+struct net *get_net_ns_by_fd(int fd);
+extern struct task_struct *cleanup_net_task;
#else /* CONFIG_NET_NS */
#include <linux/sched.h>
#include <linux/nsproxy.h>
-static inline struct net *copy_net_ns(unsigned long flags,
+static inline struct net *copy_net_ns(u64 flags,
struct user_namespace *user_ns, struct net *old_net)
{
if (flags & CLONE_NEWNET)
return ERR_PTR(-EINVAL);
return old_net;
}
+
+static inline void net_ns_get_ownership(const struct net *net,
+ kuid_t *uid, kgid_t *gid)
+{
+ *uid = GLOBAL_ROOT_UID;
+ *gid = GLOBAL_ROOT_GID;
+}
+
+static inline void net_ns_barrier(void) {}
+
+static inline struct ns_common *get_net_ns(struct ns_common *ns)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+static inline struct net *get_net_ns_by_fd(int fd)
+{
+ return ERR_PTR(-EINVAL);
+}
#endif /* CONFIG_NET_NS */
extern struct list_head net_namespace_list;
-extern struct net *get_net_ns_by_pid(pid_t pid);
-extern struct net *get_net_ns_by_fd(int pid);
+struct net *get_net_ns_by_pid(pid_t pid);
+
+#ifdef CONFIG_SYSCTL
+void ipx_register_sysctl(void);
+void ipx_unregister_sysctl(void);
+#else
+#define ipx_register_sysctl()
+#define ipx_unregister_sysctl()
+#endif
#ifdef CONFIG_NET_NS
-extern void __put_net(struct net *net);
+void __put_net(struct net *net);
+
+static inline struct net *to_net_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct net, ns);
+}
+/* Try using get_net_track() instead */
static inline struct net *get_net(struct net *net)
{
- atomic_inc(&net->count);
+ ns_ref_inc(net);
return net;
}
@@ -174,14 +281,15 @@ static inline struct net *maybe_get_net(struct net *net)
* exists. If the reference count is zero this
* function fails and returns NULL.
*/
- if (!atomic_inc_not_zero(&net->count))
+ if (!ns_ref_get(net))
net = NULL;
return net;
}
+/* Try using put_net_track() instead */
static inline void put_net(struct net *net)
{
- if (atomic_dec_and_test(&net->count))
+ if (ns_ref_put(net))
__put_net(net);
}
@@ -191,7 +299,13 @@ int net_eq(const struct net *net1, const struct net *net2)
return net1 == net2;
}
-extern void net_drop_ns(void *);
+static inline int check_net(const struct net *net)
+{
+ return ns_ref_read(net) != 0;
+}
+
+void net_drop_ns(void *);
+void net_passive_dec(struct net *net);
#else
@@ -215,56 +329,108 @@ int net_eq(const struct net *net1, const struct net *net2)
return 1;
}
+static inline int check_net(const struct net *net)
+{
+ return 1;
+}
+
#define net_drop_ns NULL
+
+static inline void net_passive_dec(struct net *net)
+{
+ refcount_dec(&net->passive);
+}
#endif
+static inline void net_passive_inc(struct net *net)
+{
+ refcount_inc(&net->passive);
+}
-#ifdef NETNS_REFCNT_DEBUG
-static inline struct net *hold_net(struct net *net)
+/* Returns true if the netns initialization is completed successfully */
+static inline bool net_initialized(const struct net *net)
{
- if (net)
- atomic_inc(&net->use_count);
- return net;
+ return READ_ONCE(net->list.next);
}
-static inline void release_net(struct net *net)
+static inline void __netns_tracker_alloc(struct net *net,
+ netns_tracker *tracker,
+ bool refcounted,
+ gfp_t gfp)
{
- if (net)
- atomic_dec(&net->use_count);
+#ifdef CONFIG_NET_NS_REFCNT_TRACKER
+ ref_tracker_alloc(refcounted ? &net->refcnt_tracker :
+ &net->notrefcnt_tracker,
+ tracker, gfp);
+#endif
}
-#else
-static inline struct net *hold_net(struct net *net)
+
+static inline void netns_tracker_alloc(struct net *net, netns_tracker *tracker,
+ gfp_t gfp)
{
+ __netns_tracker_alloc(net, tracker, true, gfp);
+}
+
+static inline void __netns_tracker_free(struct net *net,
+ netns_tracker *tracker,
+ bool refcounted)
+{
+#ifdef CONFIG_NET_NS_REFCNT_TRACKER
+ ref_tracker_free(refcounted ? &net->refcnt_tracker :
+ &net->notrefcnt_tracker, tracker);
+#endif
+}
+
+static inline struct net *get_net_track(struct net *net,
+ netns_tracker *tracker, gfp_t gfp)
+{
+ get_net(net);
+ netns_tracker_alloc(net, tracker, gfp);
return net;
}
-static inline void release_net(struct net *net)
+static inline void put_net_track(struct net *net, netns_tracker *tracker)
{
+ __netns_tracker_free(net, tracker, true);
+ put_net(net);
}
-#endif
+typedef struct {
#ifdef CONFIG_NET_NS
+ struct net __rcu *net;
+#endif
+} possible_net_t;
-static inline void write_pnet(struct net **pnet, struct net *net)
+static inline void write_pnet(possible_net_t *pnet, struct net *net)
{
- *pnet = net;
+#ifdef CONFIG_NET_NS
+ rcu_assign_pointer(pnet->net, net);
+#endif
}
-static inline struct net *read_pnet(struct net * const *pnet)
+static inline struct net *read_pnet(const possible_net_t *pnet)
{
- return *pnet;
+#ifdef CONFIG_NET_NS
+ return rcu_dereference_protected(pnet->net, true);
+#else
+ return &init_net;
+#endif
}
+static inline struct net *read_pnet_rcu(const possible_net_t *pnet)
+{
+#ifdef CONFIG_NET_NS
+ return rcu_dereference(pnet->net);
#else
-
-#define write_pnet(pnet, net) do { (void)(net);} while (0)
-#define read_pnet(pnet) (&init_net)
-
+ return &init_net;
#endif
+}
+/* Protected by net_rwsem */
#define for_each_net(VAR) \
list_for_each_entry(VAR, &net_namespace_list, list)
-
+#define for_each_net_continue_reverse(VAR) \
+ list_for_each_entry_continue_reverse(VAR, &net_namespace_list, list)
#define for_each_net_rcu(VAR) \
list_for_each_entry_rcu(VAR, &net_namespace_list, list)
@@ -275,18 +441,49 @@ static inline struct net *read_pnet(struct net * const *pnet)
#define __net_initconst
#else
#define __net_init __init
-#define __net_exit __exit_refok
+#define __net_exit __ref
#define __net_initdata __initdata
#define __net_initconst __initconst
#endif
+int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp);
+int peernet2id(const struct net *net, struct net *peer);
+bool peernet_has_id(const struct net *net, struct net *peer);
+struct net *get_net_ns_by_id(const struct net *net, int id);
+
struct pernet_operations {
struct list_head list;
+ /*
+ * Below methods are called without any exclusive locks.
+ * More than one net may be constructed and destructed
+ * in parallel on several cpus. Every pernet_operations
+ * have to keep in mind all other pernet_operations and
+ * to introduce a locking, if they share common resources.
+ *
+ * The only time they are called with exclusive lock is
+ * from register_pernet_subsys(), unregister_pernet_subsys()
+ * register_pernet_device() and unregister_pernet_device().
+ *
+ * Exit methods using blocking RCU primitives, such as
+ * synchronize_rcu(), should be implemented via exit_batch.
+ * Then, destruction of a group of net requires single
+ * synchronize_rcu() related to these pernet_operations,
+ * instead of separate synchronize_rcu() for every net.
+ * Please, avoid synchronize_rcu() at all, where it's possible.
+ *
+ * Note that a combination of pre_exit() and exit() can
+ * be used, since a synchronize_rcu() is guaranteed between
+ * the calls.
+ */
int (*init)(struct net *net);
+ void (*pre_exit)(struct net *net);
void (*exit)(struct net *net);
void (*exit_batch)(struct list_head *net_exit_list);
- int *id;
- size_t size;
+ /* Following method is called with RTNL held. */
+ void (*exit_rtnl)(struct net *net,
+ struct list_head *dev_kill_list);
+ unsigned int * const id;
+ const size_t size;
};
/*
@@ -308,23 +505,24 @@ struct pernet_operations {
* device which caused kernel oops, and panics during network
* namespace cleanup. So please don't get this wrong.
*/
-extern int register_pernet_subsys(struct pernet_operations *);
-extern void unregister_pernet_subsys(struct pernet_operations *);
-extern int register_pernet_device(struct pernet_operations *);
-extern void unregister_pernet_device(struct pernet_operations *);
+int register_pernet_subsys(struct pernet_operations *);
+void unregister_pernet_subsys(struct pernet_operations *);
+int register_pernet_device(struct pernet_operations *);
+void unregister_pernet_device(struct pernet_operations *);
struct ctl_table;
-struct ctl_table_header;
+#define register_net_sysctl(net, path, table) \
+ register_net_sysctl_sz(net, path, table, ARRAY_SIZE(table))
#ifdef CONFIG_SYSCTL
-extern int net_sysctl_init(void);
-extern struct ctl_table_header *register_net_sysctl(struct net *net,
- const char *path, struct ctl_table *table);
-extern void unregister_net_sysctl_table(struct ctl_table_header *header);
+int net_sysctl_init(void);
+struct ctl_table_header *register_net_sysctl_sz(struct net *net, const char *path,
+ struct ctl_table *table, size_t table_size);
+void unregister_net_sysctl_table(struct ctl_table_header *header);
#else
static inline int net_sysctl_init(void) { return 0; }
-static inline struct ctl_table_header *register_net_sysctl(struct net *net,
- const char *path, struct ctl_table *table)
+static inline struct ctl_table_header *register_net_sysctl_sz(struct net *net,
+ const char *path, struct ctl_table *table, size_t table_size)
{
return NULL;
}
@@ -333,17 +531,46 @@ static inline void unregister_net_sysctl_table(struct ctl_table_header *header)
}
#endif
-static inline int rt_genid(struct net *net)
+static inline int rt_genid_ipv4(const struct net *net)
+{
+ return atomic_read(&net->ipv4.rt_genid);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int rt_genid_ipv6(const struct net *net)
+{
+ return atomic_read(&net->ipv6.fib6_sernum);
+}
+#endif
+
+static inline void rt_genid_bump_ipv4(struct net *net)
{
- return atomic_read(&net->rt_genid);
+ atomic_inc(&net->ipv4.rt_genid);
}
-static inline void rt_genid_bump(struct net *net)
+extern void (*__fib6_flush_trees)(struct net *net);
+static inline void rt_genid_bump_ipv6(struct net *net)
{
- atomic_inc(&net->rt_genid);
+ if (__fib6_flush_trees)
+ __fib6_flush_trees(net);
}
-static inline int fnhe_genid(struct net *net)
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+static inline struct netns_ieee802154_lowpan *
+net_ieee802154_lowpan(struct net *net)
+{
+ return &net->ieee802154_lowpan;
+}
+#endif
+
+/* For callers who don't really care about whether it's IPv4 or IPv6 */
+static inline void rt_genid_bump_all(struct net *net)
+{
+ rt_genid_bump_ipv4(net);
+ rt_genid_bump_ipv6(net);
+}
+
+static inline int fnhe_genid(const struct net *net)
{
return atomic_read(&net->fnhe_genid);
}
@@ -353,4 +580,10 @@ static inline void fnhe_genid_bump(struct net *net)
atomic_inc(&net->fnhe_genid);
}
+#ifdef CONFIG_NET
+void net_ns_init(void);
+#else
+static inline void net_ns_init(void) {}
+#endif
+
#endif /* __NET_NET_NAMESPACE_H */