summaryrefslogtreecommitdiff
path: root/include/net/net_namespace.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/net_namespace.h')
-rw-r--r--include/net/net_namespace.h309
1 files changed, 253 insertions, 56 deletions
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 1c401bd4c2e0..cb664f6e3558 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Operations on the network namespace
*/
@@ -9,6 +10,7 @@
#include <linux/workqueue.h>
#include <linux/list.h>
#include <linux/sysctl.h>
+#include <linux/uidgid.h>
#include <net/flow.h>
#include <net/netns/core.h>
@@ -17,21 +19,30 @@
#include <net/netns/packet.h>
#include <net/netns/ipv4.h>
#include <net/netns/ipv6.h>
+#include <net/netns/nexthop.h>
#include <net/netns/ieee802154_6lowpan.h>
#include <net/netns/sctp.h>
-#include <net/netns/dccp.h>
#include <net/netns/netfilter.h>
-#include <net/netns/x_tables.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netns/conntrack.h>
#endif
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+#include <net/netns/flow_table.h>
+#endif
#include <net/netns/nftables.h>
#include <net/netns/xfrm.h>
#include <net/netns/mpls.h>
#include <net/netns/can.h>
+#include <net/netns/xdp.h>
+#include <net/netns/smc.h>
+#include <net/netns/bpf.h>
+#include <net/netns/mctp.h>
+#include <net/net_trackers.h>
#include <linux/ns_common.h>
#include <linux/idr.h>
#include <linux/skbuff.h>
+#include <linux/notifier.h>
+#include <linux/xarray.h>
struct user_namespace;
struct proc_dir_entry;
@@ -39,35 +50,55 @@ struct net_device;
struct sock;
struct ctl_table_header;
struct net_generic;
-struct sock;
+struct uevent_sock;
struct netns_ipvs;
+struct bpf_prog;
#define NETDEV_HASHBITS 8
#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
struct net {
- refcount_t passive; /* To decided when the network
+ /* First cache line can be often dirtied.
+ * Do not place here read-mostly fields.
+ */
+ refcount_t passive; /* To decide when the network
* namespace should be freed.
*/
- atomic_t count; /* To decided when the network
- * namespace should be shut down.
- */
spinlock_t rules_mod_lock;
- atomic64_t cookie_gen;
+ unsigned int dev_base_seq; /* protected by rtnl_mutex */
+ u32 ifindex;
+
+ spinlock_t nsid_lock;
+ atomic_t fnhe_genid;
struct list_head list; /* list of network namespaces */
- struct list_head cleanup_list; /* namespaces on death row */
- struct list_head exit_list; /* Use only net_mutex */
+ struct list_head exit_list; /* To linked to call pernet exit
+ * methods on dead net (
+ * pernet_ops_rwsem read locked),
+ * or to unregister pernet ops
+ * (pernet_ops_rwsem write locked).
+ */
+ struct llist_node defer_free_list;
+ struct llist_node cleanup_list; /* namespaces on death row */
+
+ struct list_head ptype_all;
+ struct list_head ptype_specific;
+#ifdef CONFIG_KEYS
+ struct key_tag *key_domain; /* Key domain of operation tag */
+#endif
struct user_namespace *user_ns; /* Owning user namespace */
struct ucounts *ucounts;
- spinlock_t nsid_lock;
struct idr netns_ids;
struct ns_common ns;
-
+ struct ref_tracker_dir refcnt_tracker;
+ struct ref_tracker_dir notrefcnt_tracker; /* tracker for objects not
+ * refcounted against netns
+ */
+ struct list_head dev_base_head;
struct proc_dir_entry *proc_net;
struct proc_dir_entry *proc_net_stat;
@@ -78,22 +109,30 @@ struct net {
struct sock *rtnl; /* rtnetlink socket */
struct sock *genl_sock;
- struct list_head dev_base_head;
+ struct uevent_sock *uevent_sock; /* uevent socket */
+
struct hlist_head *dev_name_head;
struct hlist_head *dev_index_head;
- unsigned int dev_base_seq; /* protected by rtnl_mutex */
- int ifindex;
- unsigned int dev_unreg_count;
+ struct xarray dev_by_index;
+ struct raw_notifier_head netdev_chain;
+
+ /* Note that @hash_mix can be read millions times per second,
+ * it is critical that it is on a read_mostly cache line.
+ */
+ u32 hash_mix;
+
+ struct net_device *loopback_dev; /* The loopback */
/* core fib_rules */
struct list_head rules_ops;
-
- struct net_device *loopback_dev; /* The loopback */
struct netns_core core;
struct netns_mib mib;
struct netns_packet packet;
+#if IS_ENABLED(CONFIG_UNIX)
struct netns_unix unx;
+#endif
+ struct netns_nexthop nexthop;
struct netns_ipv4 ipv4;
#if IS_ENABLED(CONFIG_IPV6)
struct netns_ipv6 ipv6;
@@ -104,28 +143,16 @@ struct net {
#if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE)
struct netns_sctp sctp;
#endif
-#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
- struct netns_dccp dccp;
-#endif
#ifdef CONFIG_NETFILTER
struct netns_nf nf;
- struct netns_xt xt;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct netns_ct ct;
#endif
#if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE)
struct netns_nftables nft;
#endif
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- struct netns_nf_frag nf_frag;
-#endif
- struct sock *nfnl;
- struct sock *nfnl_stash;
-#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT)
- struct list_head nfnl_acct_list;
-#endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
- struct list_head nfct_timeout_list;
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ struct netns_ft ft;
#endif
#endif
#ifdef CONFIG_WEXT_CORE
@@ -133,10 +160,16 @@ struct net {
#endif
struct net_generic __rcu *gen;
+ /* Used to store attached BPF programs */
+ struct netns_bpf bpf;
+
/* Note : following structs are cache line aligned */
#ifdef CONFIG_XFRM
struct netns_xfrm xfrm;
#endif
+
+ u64 net_cookie; /* written once */
+
#if IS_ENABLED(CONFIG_IP_VS)
struct netns_ipvs *ipvs;
#endif
@@ -146,8 +179,23 @@ struct net {
#if IS_ENABLED(CONFIG_CAN)
struct netns_can can;
#endif
+#ifdef CONFIG_XDP_SOCKETS
+ struct netns_xdp xdp;
+#endif
+#if IS_ENABLED(CONFIG_MCTP)
+ struct netns_mctp mctp;
+#endif
+#if IS_ENABLED(CONFIG_CRYPTO_USER)
+ struct sock *crypto_nlsk;
+#endif
struct sock *diag_nlsk;
- atomic_t fnhe_genid;
+#if IS_ENABLED(CONFIG_SMC)
+ struct netns_smc smc;
+#endif
+#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
+ /* Move to a better place when the config guard is removed. */
+ struct mutex rtnl_mutex;
+#endif
} __randomize_layout;
#include <linux/seq_file_net.h>
@@ -156,14 +204,21 @@ struct net {
extern struct net init_net;
#ifdef CONFIG_NET_NS
-struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
+struct net *copy_net_ns(u64 flags, struct user_namespace *user_ns,
struct net *old_net);
+void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
+
void net_ns_barrier(void);
+
+struct ns_common *get_net_ns(struct ns_common *ns);
+struct net *get_net_ns_by_fd(int fd);
+extern struct task_struct *cleanup_net_task;
+
#else /* CONFIG_NET_NS */
#include <linux/sched.h>
#include <linux/nsproxy.h>
-static inline struct net *copy_net_ns(unsigned long flags,
+static inline struct net *copy_net_ns(u64 flags,
struct user_namespace *user_ns, struct net *old_net)
{
if (flags & CLONE_NEWNET)
@@ -171,14 +226,30 @@ static inline struct net *copy_net_ns(unsigned long flags,
return old_net;
}
+static inline void net_ns_get_ownership(const struct net *net,
+ kuid_t *uid, kgid_t *gid)
+{
+ *uid = GLOBAL_ROOT_UID;
+ *gid = GLOBAL_ROOT_GID;
+}
+
static inline void net_ns_barrier(void) {}
+
+static inline struct ns_common *get_net_ns(struct ns_common *ns)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+static inline struct net *get_net_ns_by_fd(int fd)
+{
+ return ERR_PTR(-EINVAL);
+}
#endif /* CONFIG_NET_NS */
extern struct list_head net_namespace_list;
struct net *get_net_ns_by_pid(pid_t pid);
-struct net *get_net_ns_by_fd(int fd);
#ifdef CONFIG_SYSCTL
void ipx_register_sysctl(void);
@@ -191,9 +262,15 @@ void ipx_unregister_sysctl(void);
#ifdef CONFIG_NET_NS
void __put_net(struct net *net);
+static inline struct net *to_net_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct net, ns);
+}
+
+/* Try using get_net_track() instead */
static inline struct net *get_net(struct net *net)
{
- atomic_inc(&net->count);
+ ns_ref_inc(net);
return net;
}
@@ -204,14 +281,15 @@ static inline struct net *maybe_get_net(struct net *net)
* exists. If the reference count is zero this
* function fails and returns NULL.
*/
- if (!atomic_inc_not_zero(&net->count))
+ if (!ns_ref_get(net))
net = NULL;
return net;
}
+/* Try using put_net_track() instead */
static inline void put_net(struct net *net)
{
- if (atomic_dec_and_test(&net->count))
+ if (ns_ref_put(net))
__put_net(net);
}
@@ -221,7 +299,13 @@ int net_eq(const struct net *net1, const struct net *net2)
return net1 == net2;
}
+static inline int check_net(const struct net *net)
+{
+ return ns_ref_read(net) != 0;
+}
+
void net_drop_ns(void *);
+void net_passive_dec(struct net *net);
#else
@@ -245,35 +329,108 @@ int net_eq(const struct net *net1, const struct net *net2)
return 1;
}
+static inline int check_net(const struct net *net)
+{
+ return 1;
+}
+
#define net_drop_ns NULL
+
+static inline void net_passive_dec(struct net *net)
+{
+ refcount_dec(&net->passive);
+}
+#endif
+
+static inline void net_passive_inc(struct net *net)
+{
+ refcount_inc(&net->passive);
+}
+
+/* Returns true if the netns initialization is completed successfully */
+static inline bool net_initialized(const struct net *net)
+{
+ return READ_ONCE(net->list.next);
+}
+
+static inline void __netns_tracker_alloc(struct net *net,
+ netns_tracker *tracker,
+ bool refcounted,
+ gfp_t gfp)
+{
+#ifdef CONFIG_NET_NS_REFCNT_TRACKER
+ ref_tracker_alloc(refcounted ? &net->refcnt_tracker :
+ &net->notrefcnt_tracker,
+ tracker, gfp);
+#endif
+}
+
+static inline void netns_tracker_alloc(struct net *net, netns_tracker *tracker,
+ gfp_t gfp)
+{
+ __netns_tracker_alloc(net, tracker, true, gfp);
+}
+
+static inline void __netns_tracker_free(struct net *net,
+ netns_tracker *tracker,
+ bool refcounted)
+{
+#ifdef CONFIG_NET_NS_REFCNT_TRACKER
+ ref_tracker_free(refcounted ? &net->refcnt_tracker :
+ &net->notrefcnt_tracker, tracker);
#endif
+}
+static inline struct net *get_net_track(struct net *net,
+ netns_tracker *tracker, gfp_t gfp)
+{
+ get_net(net);
+ netns_tracker_alloc(net, tracker, gfp);
+ return net;
+}
+
+static inline void put_net_track(struct net *net, netns_tracker *tracker)
+{
+ __netns_tracker_free(net, tracker, true);
+ put_net(net);
+}
typedef struct {
#ifdef CONFIG_NET_NS
- struct net *net;
+ struct net __rcu *net;
#endif
} possible_net_t;
static inline void write_pnet(possible_net_t *pnet, struct net *net)
{
#ifdef CONFIG_NET_NS
- pnet->net = net;
+ rcu_assign_pointer(pnet->net, net);
#endif
}
static inline struct net *read_pnet(const possible_net_t *pnet)
{
#ifdef CONFIG_NET_NS
- return pnet->net;
+ return rcu_dereference_protected(pnet->net, true);
+#else
+ return &init_net;
+#endif
+}
+
+static inline struct net *read_pnet_rcu(const possible_net_t *pnet)
+{
+#ifdef CONFIG_NET_NS
+ return rcu_dereference(pnet->net);
#else
return &init_net;
#endif
}
+/* Protected by net_rwsem */
#define for_each_net(VAR) \
list_for_each_entry(VAR, &net_namespace_list, list)
-
+#define for_each_net_continue_reverse(VAR) \
+ list_for_each_entry_continue_reverse(VAR, &net_namespace_list, list)
#define for_each_net_rcu(VAR) \
list_for_each_entry_rcu(VAR, &net_namespace_list, list)
@@ -289,18 +446,44 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
#define __net_initconst __initconst
#endif
-int peernet2id_alloc(struct net *net, struct net *peer);
-int peernet2id(struct net *net, struct net *peer);
-bool peernet_has_id(struct net *net, struct net *peer);
-struct net *get_net_ns_by_id(struct net *net, int id);
+int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp);
+int peernet2id(const struct net *net, struct net *peer);
+bool peernet_has_id(const struct net *net, struct net *peer);
+struct net *get_net_ns_by_id(const struct net *net, int id);
struct pernet_operations {
struct list_head list;
+ /*
+ * Below methods are called without any exclusive locks.
+ * More than one net may be constructed and destructed
+ * in parallel on several cpus. Every pernet_operations
+ * have to keep in mind all other pernet_operations and
+ * to introduce a locking, if they share common resources.
+ *
+ * The only time they are called with exclusive lock is
+ * from register_pernet_subsys(), unregister_pernet_subsys()
+ * register_pernet_device() and unregister_pernet_device().
+ *
+ * Exit methods using blocking RCU primitives, such as
+ * synchronize_rcu(), should be implemented via exit_batch.
+ * Then, destruction of a group of net requires single
+ * synchronize_rcu() related to these pernet_operations,
+ * instead of separate synchronize_rcu() for every net.
+ * Please, avoid synchronize_rcu() at all, where it's possible.
+ *
+ * Note that a combination of pre_exit() and exit() can
+ * be used, since a synchronize_rcu() is guaranteed between
+ * the calls.
+ */
int (*init)(struct net *net);
+ void (*pre_exit)(struct net *net);
void (*exit)(struct net *net);
void (*exit_batch)(struct list_head *net_exit_list);
- unsigned int *id;
- size_t size;
+ /* Following method is called with RTNL held. */
+ void (*exit_rtnl)(struct net *net,
+ struct list_head *dev_kill_list);
+ unsigned int * const id;
+ const size_t size;
};
/*
@@ -328,17 +511,18 @@ int register_pernet_device(struct pernet_operations *);
void unregister_pernet_device(struct pernet_operations *);
struct ctl_table;
-struct ctl_table_header;
+#define register_net_sysctl(net, path, table) \
+ register_net_sysctl_sz(net, path, table, ARRAY_SIZE(table))
#ifdef CONFIG_SYSCTL
int net_sysctl_init(void);
-struct ctl_table_header *register_net_sysctl(struct net *net, const char *path,
- struct ctl_table *table);
+struct ctl_table_header *register_net_sysctl_sz(struct net *net, const char *path,
+ struct ctl_table *table, size_t table_size);
void unregister_net_sysctl_table(struct ctl_table_header *header);
#else
static inline int net_sysctl_init(void) { return 0; }
-static inline struct ctl_table_header *register_net_sysctl(struct net *net,
- const char *path, struct ctl_table *table)
+static inline struct ctl_table_header *register_net_sysctl_sz(struct net *net,
+ const char *path, struct ctl_table *table, size_t table_size)
{
return NULL;
}
@@ -347,11 +531,18 @@ static inline void unregister_net_sysctl_table(struct ctl_table_header *header)
}
#endif
-static inline int rt_genid_ipv4(struct net *net)
+static inline int rt_genid_ipv4(const struct net *net)
{
return atomic_read(&net->ipv4.rt_genid);
}
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int rt_genid_ipv6(const struct net *net)
+{
+ return atomic_read(&net->ipv6.fib6_sernum);
+}
+#endif
+
static inline void rt_genid_bump_ipv4(struct net *net)
{
atomic_inc(&net->ipv4.rt_genid);
@@ -379,7 +570,7 @@ static inline void rt_genid_bump_all(struct net *net)
rt_genid_bump_ipv6(net);
}
-static inline int fnhe_genid(struct net *net)
+static inline int fnhe_genid(const struct net *net)
{
return atomic_read(&net->fnhe_genid);
}
@@ -389,4 +580,10 @@ static inline void fnhe_genid_bump(struct net *net)
atomic_inc(&net->fnhe_genid);
}
+#ifdef CONFIG_NET
+void net_ns_init(void);
+#else
+static inline void net_ns_init(void) {}
+#endif
+
#endif /* __NET_NET_NAMESPACE_H */