diff options
Diffstat (limited to 'net/l2tp/l2tp_core.c')
| -rw-r--r-- | net/l2tp/l2tp_core.c | 1968 |
1 files changed, 943 insertions, 1025 deletions
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index feae495a0a30..687c1366a4d0 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1,5 +1,5 @@ -/* - * L2TP core. +// SPDX-License-Identifier: GPL-2.0-only +/* L2TP core. * * Copyright (c) 2008,2009,2010 Katalix Systems Ltd * @@ -12,10 +12,6 @@ * Michal Ostrowski <mostrows@speakeasy.net> * Arnaldo Carvalho de Melo <acme@xconectiva.com.br> * David S. Miller (davem@redhat.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -43,7 +39,6 @@ #include <linux/ip.h> #include <linux/udp.h> #include <linux/l2tp.h> -#include <linux/hash.h> #include <linux/sort.h> #include <linux/file.h> #include <linux/nsproxy.h> @@ -52,6 +47,7 @@ #include <net/dst.h> #include <net/ip.h> #include <net/udp.h> +#include <net/udp_tunnel.h> #include <net/inet_common.h> #include <net/xfrm.h> #include <net/protocol.h> @@ -65,6 +61,9 @@ #include "l2tp_core.h" +#define CREATE_TRACE_POINTS +#include "trace.h" + #define L2TP_DRV_VERSION "V2.0" /* L2TP header constants */ @@ -82,12 +81,16 @@ #define L2TP_SLFLAG_S 0x40000000 #define L2TP_SL_SEQ_MASK 0x00ffffff -#define L2TP_HDR_SIZE_SEQ 10 -#define L2TP_HDR_SIZE_NOSEQ 6 +#define L2TP_HDR_SIZE_MAX 14 /* Default trace flags */ #define L2TP_DEFAULT_DEBUG_FLAGS 0 +#define L2TP_DEPTH_NESTING 2 +#if L2TP_DEPTH_NESTING == SINGLE_DEPTH_NESTING +#error "L2TP requires its own lockdep subclass" +#endif + /* Private data stored for received packets in the skb. */ struct l2tp_skb_cb { @@ -97,270 +100,527 @@ struct l2tp_skb_cb { unsigned long expires; }; -#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)]) +#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *)&(skb)->cb[sizeof(struct inet_skb_parm)]) -static atomic_t l2tp_tunnel_count; -static atomic_t l2tp_session_count; static struct workqueue_struct *l2tp_wq; /* per-net private data for this module */ static unsigned int l2tp_net_id; struct l2tp_net { - struct list_head l2tp_tunnel_list; - spinlock_t l2tp_tunnel_list_lock; - struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2]; - spinlock_t l2tp_session_hlist_lock; + /* Lock for write access to l2tp_tunnel_idr */ + spinlock_t l2tp_tunnel_idr_lock; + struct idr l2tp_tunnel_idr; + /* Lock for write access to l2tp_v[23]_session_idr/htable */ + spinlock_t l2tp_session_idr_lock; + struct idr l2tp_v2_session_idr; + struct idr l2tp_v3_session_idr; + struct hlist_head l2tp_v3_session_htable[16]; }; -static void l2tp_session_set_header_len(struct l2tp_session *session, int version); -static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); +static u32 l2tp_v2_session_key(u16 tunnel_id, u16 session_id) +{ + return ((u32)tunnel_id) << 16 | session_id; +} + +static unsigned long l2tp_v3_session_hashkey(struct sock *sk, u32 session_id) +{ + return ((unsigned long)sk) + session_id; +} -static inline struct l2tp_net *l2tp_pernet(struct net *net) +#if IS_ENABLED(CONFIG_IPV6) +static bool l2tp_sk_is_v6(struct sock *sk) { - BUG_ON(!net); + return sk->sk_family == PF_INET6 && + !ipv6_addr_v4mapped(&sk->sk_v6_daddr); +} +#endif +static struct l2tp_net *l2tp_pernet(const struct net *net) +{ return net_generic(net, l2tp_net_id); } -/* Tunnel reference counts. Incremented per session that is added to - * the tunnel. - */ -static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel) +static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) { - atomic_inc(&tunnel->ref_count); + struct sock *sk = tunnel->sock; + + trace_free_tunnel(tunnel); + + if (sk) { + /* Disable udp encapsulation */ + switch (tunnel->encap) { + case L2TP_ENCAPTYPE_UDP: + /* No longer an encapsulation socket. See net/ipv4/udp.c */ + WRITE_ONCE(udp_sk(sk)->encap_type, 0); + udp_sk(sk)->encap_rcv = NULL; + udp_sk(sk)->encap_destroy = NULL; + break; + case L2TP_ENCAPTYPE_IP: + break; + } + + tunnel->sock = NULL; + sock_put(sk); + } + + kfree_rcu(tunnel, rcu); } -static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel) +static void l2tp_session_free(struct l2tp_session *session) { - if (atomic_dec_and_test(&tunnel->ref_count)) - l2tp_tunnel_free(tunnel); + trace_free_session(session); + if (session->tunnel) + l2tp_tunnel_put(session->tunnel); + kfree_rcu(session, rcu); } -#ifdef L2TP_REFCNT_DEBUG -#define l2tp_tunnel_inc_refcount(_t) \ -do { \ - pr_debug("l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", \ - __func__, __LINE__, (_t)->name, \ - atomic_read(&_t->ref_count)); \ - l2tp_tunnel_inc_refcount_1(_t); \ -} while (0) -#define l2tp_tunnel_dec_refcount(_t) -do { \ - pr_debug("l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", \ - __func__, __LINE__, (_t)->name, \ - atomic_read(&_t->ref_count)); \ - l2tp_tunnel_dec_refcount_1(_t); \ -} while (0) -#else -#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t) -#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t) -#endif -/* Session hash global list for L2TPv3. - * The session_id SHOULD be random according to RFC3931, but several - * L2TP implementations use incrementing session_ids. So we do a real - * hash on the session_id, rather than a simple bitmask. - */ -static inline struct hlist_head * -l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id) +struct l2tp_tunnel *l2tp_sk_to_tunnel(const struct sock *sk) { - return &pn->l2tp_session_hlist[hash_32(session_id, L2TP_HASH_BITS_2)]; + const struct net *net = sock_net(sk); + unsigned long tunnel_id, tmp; + struct l2tp_tunnel *tunnel; + struct l2tp_net *pn; + rcu_read_lock_bh(); + pn = l2tp_pernet(net); + idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { + if (tunnel && + tunnel->sock == sk && + refcount_inc_not_zero(&tunnel->ref_count)) { + rcu_read_unlock_bh(); + return tunnel; + } + } + rcu_read_unlock_bh(); + + return NULL; } +EXPORT_SYMBOL_GPL(l2tp_sk_to_tunnel); -/* Lookup the tunnel socket, possibly involving the fs code if the socket is - * owned by userspace. A struct sock returned from this function must be - * released using l2tp_tunnel_sock_put once you're done with it. - */ -struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) +void l2tp_tunnel_put(struct l2tp_tunnel *tunnel) { - int err = 0; - struct socket *sock = NULL; - struct sock *sk = NULL; + if (refcount_dec_and_test(&tunnel->ref_count)) + l2tp_tunnel_free(tunnel); +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_put); - if (!tunnel) - goto out; +void l2tp_session_put(struct l2tp_session *session) +{ + if (refcount_dec_and_test(&session->ref_count)) + l2tp_session_free(session); +} +EXPORT_SYMBOL_GPL(l2tp_session_put); - if (tunnel->fd >= 0) { - /* Socket is owned by userspace, who might be in the process - * of closing it. Look the socket up using the fd to ensure - * consistency. - */ - sock = sockfd_lookup(tunnel->fd, &err); - if (sock) - sk = sock->sk; - } else { - /* Socket is owned by kernelspace */ - sk = tunnel->sock; - sock_hold(sk); +/* Lookup a tunnel. A new reference is held on the returned tunnel. */ +struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) +{ + const struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_tunnel *tunnel; + + rcu_read_lock_bh(); + tunnel = idr_find(&pn->l2tp_tunnel_idr, tunnel_id); + if (tunnel && refcount_inc_not_zero(&tunnel->ref_count)) { + rcu_read_unlock_bh(); + return tunnel; } + rcu_read_unlock_bh(); -out: - return sk; + return NULL; } -EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_lookup); +EXPORT_SYMBOL_GPL(l2tp_tunnel_get); -/* Drop a reference to a tunnel socket obtained via. l2tp_tunnel_sock_put */ -void l2tp_tunnel_sock_put(struct sock *sk) +struct l2tp_tunnel *l2tp_tunnel_get_next(const struct net *net, unsigned long *key) { - struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_tunnel *tunnel = NULL; + + rcu_read_lock_bh(); +again: + tunnel = idr_get_next_ul(&pn->l2tp_tunnel_idr, key); if (tunnel) { - if (tunnel->fd >= 0) { - /* Socket is owned by userspace */ - sockfd_put(sk->sk_socket); + if (refcount_inc_not_zero(&tunnel->ref_count)) { + rcu_read_unlock_bh(); + return tunnel; } - sock_put(sk); + (*key)++; + goto again; } - sock_put(sk); + rcu_read_unlock_bh(); + + return NULL; } -EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put); +EXPORT_SYMBOL_GPL(l2tp_tunnel_get_next); -/* Lookup a session by id in the global session list - */ -static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id) +struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id) { - struct l2tp_net *pn = l2tp_pernet(net); - struct hlist_head *session_list = - l2tp_session_id_hash_2(pn, session_id); + const struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_session *session; rcu_read_lock_bh(); - hlist_for_each_entry_rcu(session, session_list, global_hlist) { - if (session->session_id == session_id) { - rcu_read_unlock_bh(); - return session; + session = idr_find(&pn->l2tp_v3_session_idr, session_id); + if (session && !hash_hashed(&session->hlist) && + refcount_inc_not_zero(&session->ref_count)) { + rcu_read_unlock_bh(); + return session; + } + + /* If we get here and session is non-NULL, the session_id + * collides with one in another tunnel. If sk is non-NULL, + * find the session matching sk. + */ + if (session && sk) { + unsigned long key = l2tp_v3_session_hashkey(sk, session->session_id); + + hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session, + hlist, key) { + /* session->tunnel may be NULL if another thread is in + * l2tp_session_register and has added an item to + * l2tp_v3_session_htable but hasn't yet added the + * session to its tunnel's session_list. + */ + struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel); + + if (session->session_id == session_id && + tunnel && tunnel->sock == sk && + refcount_inc_not_zero(&session->ref_count)) { + rcu_read_unlock_bh(); + return session; + } } } rcu_read_unlock_bh(); return NULL; } +EXPORT_SYMBOL_GPL(l2tp_v3_session_get); -/* Session hash list. - * The session_id SHOULD be random according to RFC2661, but several - * L2TP implementations (Cisco and Microsoft) use incrementing - * session_ids. So we do a real hash on the session_id, rather than a - * simple bitmask. - */ -static inline struct hlist_head * -l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id) +struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u16 session_id) { - return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)]; + u32 session_key = l2tp_v2_session_key(tunnel_id, session_id); + const struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_session *session; + + rcu_read_lock_bh(); + session = idr_find(&pn->l2tp_v2_session_idr, session_key); + if (session && refcount_inc_not_zero(&session->ref_count)) { + rcu_read_unlock_bh(); + return session; + } + rcu_read_unlock_bh(); + + return NULL; } +EXPORT_SYMBOL_GPL(l2tp_v2_session_get); -/* Lookup a session by id - */ -struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id) +struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, int pver, + u32 tunnel_id, u32 session_id) { - struct hlist_head *session_list; - struct l2tp_session *session; + if (pver == L2TP_HDR_VER_2) + return l2tp_v2_session_get(net, tunnel_id, session_id); + else + return l2tp_v3_session_get(net, sk, session_id); +} +EXPORT_SYMBOL_GPL(l2tp_session_get); - /* In L2TPv3, session_ids are unique over all tunnels and we - * sometimes need to look them up before we know the - * tunnel. - */ - if (tunnel == NULL) - return l2tp_session_find_2(net, session_id); - - session_list = l2tp_session_id_hash(tunnel, session_id); - read_lock_bh(&tunnel->hlist_lock); - hlist_for_each_entry(session, session_list, hlist) { - if (session->session_id == session_id) { - read_unlock_bh(&tunnel->hlist_lock); +static struct l2tp_session *l2tp_v2_session_get_next(const struct net *net, + u16 tid, + unsigned long *key) +{ + struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_session *session = NULL; + + /* Start searching within the range of the tid */ + if (*key == 0) + *key = l2tp_v2_session_key(tid, 0); + + rcu_read_lock_bh(); +again: + session = idr_get_next_ul(&pn->l2tp_v2_session_idr, key); + if (session) { + struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel); + + /* ignore sessions with id 0 as they are internal for pppol2tp */ + if (session->session_id == 0) { + (*key)++; + goto again; + } + + if (tunnel->tunnel_id == tid && + refcount_inc_not_zero(&session->ref_count)) { + rcu_read_unlock_bh(); return session; } + + (*key)++; + if (tunnel->tunnel_id == tid) + goto again; } - read_unlock_bh(&tunnel->hlist_lock); + rcu_read_unlock_bh(); return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find); -struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) +static struct l2tp_session *l2tp_v3_session_get_next(const struct net *net, + u32 tid, struct sock *sk, + unsigned long *key) { - int hash; - struct l2tp_session *session; - int count = 0; + struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_session *session = NULL; - read_lock_bh(&tunnel->hlist_lock); - for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { - hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { - if (++count > nth) { - read_unlock_bh(&tunnel->hlist_lock); + rcu_read_lock_bh(); +again: + session = idr_get_next_ul(&pn->l2tp_v3_session_idr, key); + if (session && !hash_hashed(&session->hlist)) { + struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel); + + if (tunnel && tunnel->tunnel_id == tid && + refcount_inc_not_zero(&session->ref_count)) { + rcu_read_unlock_bh(); + return session; + } + + (*key)++; + goto again; + } + + /* If we get here and session is non-NULL, the IDR entry may be one + * where the session_id collides with one in another tunnel. Check + * session_htable for a match. There can only be one session of a given + * ID per tunnel so we can return as soon as a match is found. + */ + if (session && hash_hashed(&session->hlist)) { + unsigned long hkey = l2tp_v3_session_hashkey(sk, session->session_id); + u32 sid = session->session_id; + + hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session, + hlist, hkey) { + struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel); + + if (session->session_id == sid && + tunnel && tunnel->tunnel_id == tid && + refcount_inc_not_zero(&session->ref_count)) { + rcu_read_unlock_bh(); return session; } } + + /* If no match found, the colliding session ID isn't in our + * tunnel so try the next session ID. + */ + (*key)++; + goto again; } - read_unlock_bh(&tunnel->hlist_lock); + rcu_read_unlock_bh(); return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find_nth); + +struct l2tp_session *l2tp_session_get_next(const struct net *net, struct sock *sk, int pver, + u32 tunnel_id, unsigned long *key) +{ + if (pver == L2TP_HDR_VER_2) + return l2tp_v2_session_get_next(net, tunnel_id, key); + else + return l2tp_v3_session_get_next(net, tunnel_id, sk, key); +} +EXPORT_SYMBOL_GPL(l2tp_session_get_next); /* Lookup a session by interface name. * This is very inefficient but is only used by management interfaces. */ -struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) +struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, + const char *ifname) { struct l2tp_net *pn = l2tp_pernet(net); - int hash; + unsigned long tunnel_id, tmp; struct l2tp_session *session; + struct l2tp_tunnel *tunnel; rcu_read_lock_bh(); - for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) { - hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) { - if (!strcmp(session->ifname, ifname)) { - rcu_read_unlock_bh(); - return session; + idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { + if (tunnel) { + list_for_each_entry_rcu(session, &tunnel->session_list, list) { + if (!strcmp(session->ifname, ifname)) { + refcount_inc(&session->ref_count); + rcu_read_unlock_bh(); + + return session; + } } } } - rcu_read_unlock_bh(); return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname); +EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname); -/* Lookup a tunnel by id - */ -struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id) +static void l2tp_session_coll_list_add(struct l2tp_session_coll_list *clist, + struct l2tp_session *session) { - struct l2tp_tunnel *tunnel; - struct l2tp_net *pn = l2tp_pernet(net); + refcount_inc(&session->ref_count); + WARN_ON_ONCE(session->coll_list); + session->coll_list = clist; + spin_lock(&clist->lock); + list_add(&session->clist, &clist->list); + spin_unlock(&clist->lock); +} - rcu_read_lock_bh(); - list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (tunnel->tunnel_id == tunnel_id) { - rcu_read_unlock_bh(); - return tunnel; - } +static int l2tp_session_collision_add(struct l2tp_net *pn, + struct l2tp_session *session1, + struct l2tp_session *session2) +{ + struct l2tp_session_coll_list *clist; + + lockdep_assert_held(&pn->l2tp_session_idr_lock); + + if (!session2) + return -EEXIST; + + /* If existing session is in IP-encap tunnel, refuse new session */ + if (session2->tunnel->encap == L2TP_ENCAPTYPE_IP) + return -EEXIST; + + clist = session2->coll_list; + if (!clist) { + /* First collision. Allocate list to manage the collided sessions + * and add the existing session to the list. + */ + clist = kmalloc(sizeof(*clist), GFP_ATOMIC); + if (!clist) + return -ENOMEM; + + spin_lock_init(&clist->lock); + INIT_LIST_HEAD(&clist->list); + refcount_set(&clist->ref_count, 1); + l2tp_session_coll_list_add(clist, session2); } - rcu_read_unlock_bh(); - return NULL; + /* If existing session isn't already in the session hlist, add it. */ + if (!hash_hashed(&session2->hlist)) + hash_add_rcu(pn->l2tp_v3_session_htable, &session2->hlist, + session2->hlist_key); + + /* Add new session to the hlist and collision list */ + hash_add_rcu(pn->l2tp_v3_session_htable, &session1->hlist, + session1->hlist_key); + refcount_inc(&clist->ref_count); + l2tp_session_coll_list_add(clist, session1); + + return 0; } -EXPORT_SYMBOL_GPL(l2tp_tunnel_find); -struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth) +static void l2tp_session_collision_del(struct l2tp_net *pn, + struct l2tp_session *session) { - struct l2tp_net *pn = l2tp_pernet(net); - struct l2tp_tunnel *tunnel; - int count = 0; + struct l2tp_session_coll_list *clist = session->coll_list; + unsigned long session_key = session->session_id; + struct l2tp_session *session2; - rcu_read_lock_bh(); - list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (++count > nth) { - rcu_read_unlock_bh(); - return tunnel; + lockdep_assert_held(&pn->l2tp_session_idr_lock); + + hash_del_rcu(&session->hlist); + + if (clist) { + /* Remove session from its collision list. If there + * are other sessions with the same ID, replace this + * session's IDR entry with that session, otherwise + * remove the IDR entry. If this is the last session, + * the collision list data is freed. + */ + spin_lock(&clist->lock); + list_del_init(&session->clist); + session2 = list_first_entry_or_null(&clist->list, struct l2tp_session, clist); + if (session2) { + void *old = idr_replace(&pn->l2tp_v3_session_idr, session2, session_key); + + WARN_ON_ONCE(IS_ERR_VALUE(old)); + } else { + void *removed = idr_remove(&pn->l2tp_v3_session_idr, session_key); + + WARN_ON_ONCE(removed != session); } + session->coll_list = NULL; + spin_unlock(&clist->lock); + if (refcount_dec_and_test(&clist->ref_count)) + kfree(clist); + l2tp_session_put(session); } +} - rcu_read_unlock_bh(); +int l2tp_session_register(struct l2tp_session *session, + struct l2tp_tunnel *tunnel) +{ + struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); + struct l2tp_session *other_session = NULL; + void *old = NULL; + u32 session_key; + int err; - return NULL; + spin_lock_bh(&tunnel->list_lock); + spin_lock_bh(&pn->l2tp_session_idr_lock); + + if (!tunnel->acpt_newsess) { + err = -ENODEV; + goto out; + } + + if (tunnel->version == L2TP_HDR_VER_3) { + session_key = session->session_id; + err = idr_alloc_u32(&pn->l2tp_v3_session_idr, NULL, + &session_key, session_key, GFP_ATOMIC); + /* IP encap expects session IDs to be globally unique, while + * UDP encap doesn't. This isn't per the RFC, which says that + * sessions are identified only by the session ID, but is to + * support existing userspace which depends on it. + */ + if (err == -ENOSPC && tunnel->encap == L2TP_ENCAPTYPE_UDP) { + other_session = idr_find(&pn->l2tp_v3_session_idr, + session_key); + err = l2tp_session_collision_add(pn, session, + other_session); + } + } else { + session_key = l2tp_v2_session_key(tunnel->tunnel_id, + session->session_id); + err = idr_alloc_u32(&pn->l2tp_v2_session_idr, NULL, + &session_key, session_key, GFP_ATOMIC); + } + + if (err) { + if (err == -ENOSPC) + err = -EEXIST; + goto out; + } + + refcount_inc(&tunnel->ref_count); + WRITE_ONCE(session->tunnel, tunnel); + list_add_rcu(&session->list, &tunnel->session_list); + + /* this makes session available to lockless getters */ + if (tunnel->version == L2TP_HDR_VER_3) { + if (!other_session) + old = idr_replace(&pn->l2tp_v3_session_idr, session, session_key); + } else { + old = idr_replace(&pn->l2tp_v2_session_idr, session, session_key); + } + + /* old should be NULL, unless something removed or modified + * the IDR entry after our idr_alloc_32 above (which shouldn't + * happen). + */ + WARN_ON_ONCE(old); +out: + spin_unlock_bh(&pn->l2tp_session_idr_lock); + spin_unlock_bh(&tunnel->list_lock); + + if (!err) + trace_register_session(session); + + return err; } -EXPORT_SYMBOL_GPL(l2tp_tunnel_find_nth); +EXPORT_SYMBOL_GPL(l2tp_session_register); /***************************************************************************** * Receive data handling @@ -379,10 +639,6 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk skb_queue_walk_safe(&session->reorder_q, skbp, tmp) { if (L2TP_SKB_CB(skbp)->ns > ns) { __skb_queue_before(&session->reorder_q, skbp, skb); - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n", - session->name, ns, L2TP_SKB_CB(skbp)->ns, - skb_queue_len(&session->reorder_q)); atomic_long_inc(&session->stats.rx_oos_packets); goto out; } @@ -415,19 +671,14 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff * /* Bump our Nr */ session->nr++; session->nr &= session->nr_max; - - l2tp_dbg(session, L2TP_MSG_SEQ, "%s: updated nr to %hu\n", - session->name, session->nr); + trace_session_seqnum_update(session); } /* call private receive handler */ - if (session->recv_skb != NULL) + if (session->recv_skb) (*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length); else kfree_skb(skb); - - if (session->deref) - (*session->deref)(session); } /* Dequeue skbs from the session's reorder_q, subject to packet order. @@ -445,39 +696,27 @@ static void l2tp_recv_dequeue(struct l2tp_session *session) start: spin_lock_bh(&session->reorder_q.lock); skb_queue_walk_safe(&session->reorder_q, skb, tmp) { - if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) { + struct l2tp_skb_cb *cb = L2TP_SKB_CB(skb); + + /* If the packet has been pending on the queue for too long, discard it */ + if (time_after(jiffies, cb->expires)) { atomic_long_inc(&session->stats.rx_seq_discards); atomic_long_inc(&session->stats.rx_errors); - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: oos pkt %u len %d discarded (too old), waiting for %u, reorder_q_len=%d\n", - session->name, L2TP_SKB_CB(skb)->ns, - L2TP_SKB_CB(skb)->length, session->nr, - skb_queue_len(&session->reorder_q)); + trace_session_pkt_expired(session, cb->ns); session->reorder_skip = 1; __skb_unlink(skb, &session->reorder_q); kfree_skb(skb); - if (session->deref) - (*session->deref)(session); continue; } - if (L2TP_SKB_CB(skb)->has_seq) { + if (cb->has_seq) { if (session->reorder_skip) { - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: advancing nr to next pkt: %u -> %u", - session->name, session->nr, - L2TP_SKB_CB(skb)->ns); session->reorder_skip = 0; - session->nr = L2TP_SKB_CB(skb)->ns; + session->nr = cb->ns; + trace_session_seqnum_reset(session); } - if (L2TP_SKB_CB(skb)->ns != session->nr) { - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: holding oos pkt %u len %d, waiting for %u, reorder_q_len=%d\n", - session->name, L2TP_SKB_CB(skb)->ns, - L2TP_SKB_CB(skb)->length, session->nr, - skb_queue_len(&session->reorder_q)); + if (cb->ns != session->nr) goto out; - } } __skb_unlink(skb, &session->reorder_q); @@ -493,52 +732,6 @@ out: spin_unlock_bh(&session->reorder_q.lock); } -static inline int l2tp_verify_udp_checksum(struct sock *sk, - struct sk_buff *skb) -{ - struct udphdr *uh = udp_hdr(skb); - u16 ulen = ntohs(uh->len); - __wsum psum; - - if (sk->sk_no_check || skb_csum_unnecessary(skb)) - return 0; - -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) { - if (!uh->check) { - LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); - return 1; - } - if ((skb->ip_summed == CHECKSUM_COMPLETE) && - !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, ulen, - IPPROTO_UDP, skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - return 0; - } - skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len, IPPROTO_UDP, - 0)); - } else -#endif - { - struct inet_sock *inet; - if (!uh->check) - return 0; - inet = inet_sk(sk); - psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, - ulen, IPPROTO_UDP, 0); - - if ((skb->ip_summed == CHECKSUM_COMPLETE) && - !csum_fold(csum_add(psum, skb->csum))) - return 0; - skb->csum = psum; - } - - return __skb_checksum_complete(skb); -} - static int l2tp_seq_check_rx_window(struct l2tp_session *session, u32 nr) { u32 nws; @@ -556,14 +749,13 @@ static int l2tp_seq_check_rx_window(struct l2tp_session *session, u32 nr) */ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) { - if (!l2tp_seq_check_rx_window(session, L2TP_SKB_CB(skb)->ns)) { + struct l2tp_skb_cb *cb = L2TP_SKB_CB(skb); + + if (!l2tp_seq_check_rx_window(session, cb->ns)) { /* Packet sequence number is outside allowed window. * Discard it. */ - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: pkt %u len %d discarded, outside window, nr=%u\n", - session->name, L2TP_SKB_CB(skb)->ns, - L2TP_SKB_CB(skb)->length, session->nr); + trace_session_pkt_outside_rx_window(session, cb->ns); goto discard; } @@ -580,10 +772,10 @@ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) * is seen. After nr_oos_count_max in-sequence packets, reset the * sequence number to re-enable packet reception. */ - if (L2TP_SKB_CB(skb)->ns == session->nr) { + if (cb->ns == session->nr) { skb_queue_tail(&session->reorder_q, skb); } else { - u32 nr_oos = L2TP_SKB_CB(skb)->ns; + u32 nr_oos = cb->ns; u32 nr_next = (session->nr_oos + 1) & session->nr_max; if (nr_oos == nr_next) @@ -594,17 +786,10 @@ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) session->nr_oos = nr_oos; if (session->nr_oos_count > session->nr_oos_count_max) { session->reorder_skip = 1; - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: %d oos packets received. Resetting sequence numbers\n", - session->name, session->nr_oos_count); } if (!session->reorder_skip) { atomic_long_inc(&session->stats.rx_seq_discards); - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", - session->name, L2TP_SKB_CB(skb)->ns, - L2TP_SKB_CB(skb)->length, session->nr, - skb_queue_len(&session->reorder_q)); + trace_session_pkt_oos(session, cb->ns); goto discard; } skb_queue_tail(&session->reorder_q, skb); @@ -667,10 +852,9 @@ discard: * |x|S|x|x|x|x|x|x| Sequence Number | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * - * Cookie value, sublayer format and offset (pad) are negotiated with - * the peer when the session is set up. Unlike L2TPv2, we do not need - * to parse the packet header to determine if optional fields are - * present. + * Cookie value and sublayer format are negotiated with the peer when + * the session is set up. Unlike L2TPv2, we do not need to parse the + * packet header to determine if optional fields are present. * * Caller must already have parsed the frame and determined that it is * a data (not control) frame before coming here. Fields up to the @@ -679,27 +863,17 @@ discard: */ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, - int length, int (*payload_hook)(struct sk_buff *skb)) + int length) { struct l2tp_tunnel *tunnel = session->tunnel; int offset; - u32 ns, nr; - - /* The ref count is increased since we now hold a pointer to - * the session. Take care to decrement the refcnt when exiting - * this function from now on... - */ - l2tp_session_inc_refcount(session); - if (session->ref) - (*session->ref)(session); /* Parse and check optional cookie */ if (session->peer_cookie_len > 0) { if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) { - l2tp_info(tunnel, L2TP_MSG_DATA, - "%s: cookie mismatch (%u/%u). Discarding.\n", - tunnel->name, tunnel->tunnel_id, - session->session_id); + pr_debug_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n", + tunnel->name, tunnel->tunnel_id, + session->session_id); atomic_long_inc(&session->stats.rx_cookie_discards); goto discard; } @@ -713,62 +887,46 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * the control of the LNS. If no sequence numbers present but * we were expecting them, discard frame. */ - ns = nr = 0; L2TP_SKB_CB(skb)->has_seq = 0; if (tunnel->version == L2TP_HDR_VER_2) { if (hdrflags & L2TP_HDRFLAG_S) { - ns = ntohs(*(__be16 *) ptr); - ptr += 2; - nr = ntohs(*(__be16 *) ptr); - ptr += 2; - /* Store L2TP info in the skb */ - L2TP_SKB_CB(skb)->ns = ns; + L2TP_SKB_CB(skb)->ns = ntohs(*(__be16 *)ptr); L2TP_SKB_CB(skb)->has_seq = 1; + ptr += 2; + /* Skip past nr in the header */ + ptr += 2; - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: recv data ns=%u, nr=%u, session nr=%u\n", - session->name, ns, nr, session->nr); } } else if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { - u32 l2h = ntohl(*(__be32 *) ptr); + u32 l2h = ntohl(*(__be32 *)ptr); if (l2h & 0x40000000) { - ns = l2h & 0x00ffffff; - /* Store L2TP info in the skb */ - L2TP_SKB_CB(skb)->ns = ns; + L2TP_SKB_CB(skb)->ns = l2h & 0x00ffffff; L2TP_SKB_CB(skb)->has_seq = 1; - - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: recv data ns=%u, session nr=%u\n", - session->name, ns, session->nr); } + ptr += 4; } - /* Advance past L2-specific header, if present */ - ptr += session->l2specific_len; - if (L2TP_SKB_CB(skb)->has_seq) { - /* Received a packet with sequence numbers. If we're the LNS, + /* Received a packet with sequence numbers. If we're the LAC, * check if we sre sending sequence numbers and if not, * configure it so. */ - if ((!session->lns_mode) && (!session->send_seq)) { - l2tp_info(session, L2TP_MSG_SEQ, - "%s: requested to enable seq numbers by LNS\n", - session->name); - session->send_seq = -1; - l2tp_session_set_header_len(session, tunnel->version); + if (!session->lns_mode && !session->send_seq) { + trace_session_seqnum_lns_enable(session); + session->send_seq = 1; + l2tp_session_set_header_len(session, tunnel->version, + tunnel->encap); } } else { /* No sequence numbers. * If user has configured mandatory sequence numbers, discard. */ if (session->recv_seq) { - l2tp_warn(session, L2TP_MSG_SEQ, - "%s: recv data has no seq numbers when required. Discarding.\n", - session->name); + pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", + session->name); atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } @@ -778,25 +936,21 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * If we're the LNS and we're sending sequence numbers, the * LAC is broken. Discard the frame. */ - if ((!session->lns_mode) && (session->send_seq)) { - l2tp_info(session, L2TP_MSG_SEQ, - "%s: requested to disable seq numbers by LNS\n", - session->name); + if (!session->lns_mode && session->send_seq) { + trace_session_seqnum_lns_disable(session); session->send_seq = 0; - l2tp_session_set_header_len(session, tunnel->version); + l2tp_session_set_header_len(session, tunnel->version, + tunnel->encap); } else if (session->send_seq) { - l2tp_warn(session, L2TP_MSG_SEQ, - "%s: recv data has no seq numbers when required. Discarding.\n", - session->name); + pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", + session->name); atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } } - /* Session data offset is handled differently for L2TPv2 and - * L2TPv3. For L2TPv2, there is an optional 16-bit value in - * the header. For L2TPv3, the offset is negotiated using AVPs - * in the session setup control protocol. + /* Session data offset is defined only for L2TPv2 and is + * indicated by an optional 16-bit value in the header. */ if (tunnel->version == L2TP_HDR_VER_2) { /* If offset bit set, skip it. */ @@ -804,8 +958,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, offset = ntohs(*(__be16 *)ptr); ptr += 2 + offset; } - } else - ptr += session->offset; + } offset = ptr - optr; if (!pskb_may_pull(skb, offset)) @@ -813,13 +966,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, __skb_pull(skb, offset); - /* If caller wants to process the payload before we queue the - * packet, do so now. - */ - if (payload_hook) - if ((*payload_hook)(skb)) - goto discard; - /* Prepare skb for adding to the session's reorder_q. Hold * packets for max reorder_timeout or 1 second if not * reordering. @@ -845,182 +991,143 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, /* Try to dequeue as many skbs from reorder_q as we can. */ l2tp_recv_dequeue(session); - l2tp_session_dec_refcount(session); - return; discard: atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); - - if (session->deref) - (*session->deref)(session); - - l2tp_session_dec_refcount(session); } -EXPORT_SYMBOL(l2tp_recv_common); +EXPORT_SYMBOL_GPL(l2tp_recv_common); /* Drop skbs from the session's reorder_q */ -int l2tp_session_queue_purge(struct l2tp_session *session) +static void l2tp_session_queue_purge(struct l2tp_session *session) { struct sk_buff *skb = NULL; - BUG_ON(!session); - BUG_ON(session->magic != L2TP_SESSION_MAGIC); + while ((skb = skb_dequeue(&session->reorder_q))) { atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); - if (session->deref) - (*session->deref)(session); } - return 0; } -EXPORT_SYMBOL_GPL(l2tp_session_queue_purge); -/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame - * here. The skb is not on a list when we get here. - * Returns 0 if the packet was a data packet and was successfully passed on. - * Returns 1 if the packet was not a good data packet and could not be - * forwarded. All such packets are passed up to userspace to deal with. - */ -static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, - int (*payload_hook)(struct sk_buff *skb)) +/* UDP encapsulation receive handler. See net/ipv4/udp.c for details. */ +int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct l2tp_session *session = NULL; + struct l2tp_tunnel *tunnel = NULL; + struct net *net = sock_net(sk); unsigned char *ptr, *optr; u16 hdrflags; - u32 tunnel_id, session_id; u16 version; int length; - if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb)) - goto discard_bad_csum; + /* UDP has verified checksum */ /* UDP always verifies the packet length. */ __skb_pull(skb, sizeof(struct udphdr)); /* Short packet? */ - if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) { - l2tp_info(tunnel, L2TP_MSG_DATA, - "%s: recv short packet (len=%d)\n", - tunnel->name, skb->len); - goto error; - } - - /* Trace packet contents, if enabled */ - if (tunnel->debug & L2TP_MSG_DATA) { - length = min(32u, skb->len); - if (!pskb_may_pull(skb, length)) - goto error; - - pr_debug("%s: recv\n", tunnel->name); - print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, skb->data, length); - } + if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) + goto pass; /* Point to L2TP header */ - optr = ptr = skb->data; + optr = skb->data; + ptr = skb->data; /* Get L2TP header flags */ - hdrflags = ntohs(*(__be16 *) ptr); + hdrflags = ntohs(*(__be16 *)ptr); - /* Check protocol version */ + /* Get protocol version */ version = hdrflags & L2TP_HDR_VER_MASK; - if (version != tunnel->version) { - l2tp_info(tunnel, L2TP_MSG_DATA, - "%s: recv protocol version mismatch: got %d expected %d\n", - tunnel->name, version, tunnel->version); - goto error; - } /* Get length of L2TP packet */ length = skb->len; /* If type is control packet, it is handled by userspace. */ - if (hdrflags & L2TP_HDRFLAG_T) { - l2tp_dbg(tunnel, L2TP_MSG_DATA, - "%s: recv control packet, len=%d\n", - tunnel->name, length); - goto error; - } + if (hdrflags & L2TP_HDRFLAG_T) + goto pass; /* Skip flags */ ptr += 2; - if (tunnel->version == L2TP_HDR_VER_2) { + if (version == L2TP_HDR_VER_2) { + u16 tunnel_id, session_id; + /* If length is present, skip it */ if (hdrflags & L2TP_HDRFLAG_L) ptr += 2; /* Extract tunnel and session ID */ - tunnel_id = ntohs(*(__be16 *) ptr); + tunnel_id = ntohs(*(__be16 *)ptr); ptr += 2; - session_id = ntohs(*(__be16 *) ptr); + session_id = ntohs(*(__be16 *)ptr); ptr += 2; + + session = l2tp_v2_session_get(net, tunnel_id, session_id); } else { + u32 session_id; + ptr += 2; /* skip reserved bits */ - tunnel_id = tunnel->tunnel_id; - session_id = ntohl(*(__be32 *) ptr); + session_id = ntohl(*(__be32 *)ptr); ptr += 4; + + session = l2tp_v3_session_get(net, sk, session_id); } - /* Find the session context */ - session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id); if (!session || !session->recv_skb) { + if (session) + l2tp_session_put(session); + /* Not found? Pass to userspace to deal with */ - l2tp_info(tunnel, L2TP_MSG_DATA, - "%s: no session found (%u/%u). Passing up.\n", - tunnel->name, tunnel_id, session_id); - goto error; + goto pass; } - l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook); + tunnel = session->tunnel; - return 0; + /* Check protocol version */ + if (version != tunnel->version) + goto invalid; -discard_bad_csum: - LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name); - UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0); - atomic_long_inc(&tunnel->stats.rx_errors); - kfree_skb(skb); + if (version == L2TP_HDR_VER_3 && + l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) { + l2tp_session_put(session); + goto invalid; + } + + l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); + l2tp_session_put(session); return 0; -error: +invalid: + atomic_long_inc(&tunnel->stats.rx_invalid); + +pass: /* Put UDP header back */ __skb_push(skb, sizeof(struct udphdr)); return 1; } +EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv); -/* UDP encapsulation receive handler. See net/ipv4/udp.c. - * Return codes: - * 0 : success. - * <0: error - * >0: skb should be passed up to userspace as UDP. - */ -int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) +/* UDP encapsulation receive error handler. See net/ipv4/udp.c for details. */ +static void l2tp_udp_encap_err_recv(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload) { - struct l2tp_tunnel *tunnel; - - tunnel = l2tp_sock_to_tunnel(sk); - if (tunnel == NULL) - goto pass_up; - - l2tp_dbg(tunnel, L2TP_MSG_DATA, "%s: received %d bytes\n", - tunnel->name, skb->len); + sk->sk_err = err; + sk_error_report(sk); - if (l2tp_udp_recv_core(tunnel, skb, tunnel->recv_payload_hook)) - goto pass_up_put; - - sock_put(sk); - return 0; - -pass_up_put: - sock_put(sk); -pass_up: - return 1; + if (ip_hdr(skb)->version == IPVERSION) { + if (inet_test_bit(RECVERR, sk)) + return ip_icmp_error(sk, skb, err, port, info, payload); +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (inet6_test_bit(RECVERR6, sk)) + return ipv6_icmp_error(sk, skb, err, port, info, payload); +#endif + } } -EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv); /************************************************************************ * Transmit handling @@ -1049,8 +1156,7 @@ static int l2tp_build_l2tpv2_header(struct l2tp_session *session, void *buf) *bufp++ = 0; session->ns++; session->ns &= 0xffff; - l2tp_dbg(session, L2TP_MSG_SEQ, "%s: updated ns to %u\n", - session->name, session->ns); + trace_session_seqnum_update(session); } return bufp - optr; @@ -1067,180 +1173,112 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf) */ if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { u16 flags = L2TP_HDR_VER_3; - *((__be16 *) bufp) = htons(flags); + *((__be16 *)bufp) = htons(flags); bufp += 2; - *((__be16 *) bufp) = 0; + *((__be16 *)bufp) = 0; bufp += 2; } - *((__be32 *) bufp) = htonl(session->peer_session_id); + *((__be32 *)bufp) = htonl(session->peer_session_id); bufp += 4; if (session->cookie_len) { memcpy(bufp, &session->cookie[0], session->cookie_len); bufp += session->cookie_len; } - if (session->l2specific_len) { - if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { - u32 l2h = 0; - if (session->send_seq) { - l2h = 0x40000000 | session->ns; - session->ns++; - session->ns &= 0xffffff; - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: updated ns to %u\n", - session->name, session->ns); - } - - *((__be32 *) bufp) = htonl(l2h); + if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { + u32 l2h = 0; + + if (session->send_seq) { + l2h = 0x40000000 | session->ns; + session->ns++; + session->ns &= 0xffffff; + trace_session_seqnum_update(session); } - bufp += session->l2specific_len; + + *((__be32 *)bufp) = htonl(l2h); + bufp += 4; } - if (session->offset) - bufp += session->offset; return bufp - optr; } -static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, - struct flowi *fl, size_t data_len) +/* Queue the packet to IP for output: tunnel socket lock must be held */ +static int l2tp_xmit_queue(struct l2tp_tunnel *tunnel, struct sk_buff *skb, struct flowi *fl) { - struct l2tp_tunnel *tunnel = session->tunnel; - unsigned int len = skb->len; - int error; - - /* Debug */ - if (session->send_seq) - l2tp_dbg(session, L2TP_MSG_DATA, "%s: send %Zd bytes, ns=%u\n", - session->name, data_len, session->ns - 1); - else - l2tp_dbg(session, L2TP_MSG_DATA, "%s: send %Zd bytes\n", - session->name, data_len); - - if (session->debug & L2TP_MSG_DATA) { - int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; - unsigned char *datap = skb->data + uhlen; - - pr_debug("%s: xmit\n", session->name); - print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, - datap, min_t(size_t, 32, len - uhlen)); - } + int err; - /* Queue the packet to IP for output */ - skb->local_df = 1; + skb->ignore_df = 1; + skb_dst_drop(skb); #if IS_ENABLED(CONFIG_IPV6) - if (skb->sk->sk_family == PF_INET6) - error = inet6_csk_xmit(skb, NULL); + if (l2tp_sk_is_v6(tunnel->sock)) + err = inet6_csk_xmit(tunnel->sock, skb, NULL); else #endif - error = ip_queue_xmit(skb, fl); + err = ip_queue_xmit(tunnel->sock, skb, fl); - /* Update stats */ - if (error >= 0) { - atomic_long_inc(&tunnel->stats.tx_packets); - atomic_long_add(len, &tunnel->stats.tx_bytes); - atomic_long_inc(&session->stats.tx_packets); - atomic_long_add(len, &session->stats.tx_bytes); - } else { - atomic_long_inc(&tunnel->stats.tx_errors); - atomic_long_inc(&session->stats.tx_errors); - } - - return 0; -} - -/* Automatically called when the skb is freed. - */ -static void l2tp_sock_wfree(struct sk_buff *skb) -{ - sock_put(skb->sk); + return err >= 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; } -/* For data skbs that we transmit, we associate with the tunnel socket - * but don't do accounting. - */ -static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk) +static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, unsigned int *len) { - sock_hold(sk); - skb->sk = sk; - skb->destructor = l2tp_sock_wfree; -} - -#if IS_ENABLED(CONFIG_IPV6) -static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb, - int udp_len) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct udphdr *uh = udp_hdr(skb); - - if (!skb_dst(skb) || !skb_dst(skb)->dev || - !(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { - __wsum csum = skb_checksum(skb, 0, udp_len, 0); - skb->ip_summed = CHECKSUM_UNNECESSARY; - uh->check = csum_ipv6_magic(&np->saddr, &np->daddr, udp_len, - IPPROTO_UDP, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } else { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, - udp_len, IPPROTO_UDP, 0); - } -} -#endif - -/* If caller requires the skb to have a ppp header, the header must be - * inserted in the skb data before calling this function. - */ -int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len) -{ - int data_len = skb->len; struct l2tp_tunnel *tunnel = session->tunnel; + unsigned int data_len = skb->len; struct sock *sk = tunnel->sock; - struct flowi *fl; - struct udphdr *uh; - struct inet_sock *inet; - __wsum csum; - int headroom; - int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; - int udp_len; + int headroom, uhlen, udp_len; int ret = NET_XMIT_SUCCESS; + struct inet_sock *inet; + struct udphdr *uh; /* Check that there's enough headroom in the skb to insert IP, * UDP and L2TP headers. If not enough, expand it to * make room. Adjust truesize. */ - headroom = NET_SKB_PAD + sizeof(struct iphdr) + - uhlen + hdr_len; + uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(*uh) : 0; + headroom = NET_SKB_PAD + sizeof(struct iphdr) + uhlen + session->hdr_len; if (skb_cow_head(skb, headroom)) { kfree_skb(skb); return NET_XMIT_DROP; } - skb_orphan(skb); /* Setup L2TP header */ - session->build_header(session, __skb_push(skb, hdr_len)); + if (tunnel->version == L2TP_HDR_VER_2) + l2tp_build_l2tpv2_header(session, __skb_push(skb, session->hdr_len)); + else + l2tp_build_l2tpv3_header(session, __skb_push(skb, session->hdr_len)); + + /* Reset control buffer */ + memset(skb->cb, 0, sizeof(skb->cb)); - /* Reset skb netfilter state */ - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | - IPSKB_REROUTED); - nf_reset(skb); + nf_reset_ct(skb); + + /* L2TP uses its own lockdep subclass to avoid lockdep splats caused by + * nested socket calls on the same lockdep socket class. This can + * happen when data from a user socket is routed over l2tp, which uses + * another userspace socket. + */ + spin_lock_nested(&sk->sk_lock.slock, L2TP_DEPTH_NESTING); - bh_lock_sock(sk); if (sock_owned_by_user(sk)) { kfree_skb(skb); ret = NET_XMIT_DROP; goto out_unlock; } - /* Get routing info from the tunnel socket */ - skb_dst_drop(skb); - skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0))); + /* The user-space may change the connection status for the user-space + * provided socket at run time: we must check it under the socket lock + */ + if (tunnel->fd >= 0 && sk->sk_state != TCP_ESTABLISHED) { + kfree_skb(skb); + ret = NET_XMIT_DROP; + goto out_unlock; + } + + /* Report transmitted length before we add encap header, which keeps + * statistics consistent for both UDP and IP encap tx/rx paths. + */ + *len = skb->len; inet = inet_sk(sk); - fl = &inet->cork.fl; switch (tunnel->encap) { case L2TP_ENCAPTYPE_UDP: /* Setup UDP header */ @@ -1249,47 +1287,51 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len uh = udp_hdr(skb); uh->source = inet->inet_sport; uh->dest = inet->inet_dport; - udp_len = uhlen + hdr_len + data_len; + udp_len = uhlen + session->hdr_len + data_len; uh->len = htons(udp_len); - uh->check = 0; /* Calculate UDP checksum if configured to do so */ #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) - l2tp_xmit_ipv6_csum(sk, skb, udp_len); + if (l2tp_sk_is_v6(sk)) + udp6_set_csum(udp_get_no_check6_tx(sk), + skb, &inet6_sk(sk)->saddr, + &sk->sk_v6_daddr, udp_len); else #endif - if (sk->sk_no_check == UDP_CSUM_NOXMIT) - skb->ip_summed = CHECKSUM_NONE; - else if ((skb_dst(skb) && skb_dst(skb)->dev) && - (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) { - skb->ip_summed = CHECKSUM_COMPLETE; - csum = skb_checksum(skb, 0, udp_len, 0); - uh->check = csum_tcpudp_magic(inet->inet_saddr, - inet->inet_daddr, - udp_len, IPPROTO_UDP, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } else { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_tcpudp_magic(inet->inet_saddr, - inet->inet_daddr, - udp_len, IPPROTO_UDP, 0); - } + udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr, + inet->inet_daddr, udp_len); break; case L2TP_ENCAPTYPE_IP: break; } - l2tp_skb_set_owner_w(skb, sk); + ret = l2tp_xmit_queue(tunnel, skb, &inet->cork.fl); - l2tp_xmit_core(session, skb, fl, data_len); out_unlock: - bh_unlock_sock(sk); + spin_unlock(&sk->sk_lock.slock); + + return ret; +} + +/* If caller requires the skb to have a ppp header, the header must be + * inserted in the skb data before calling this function. + */ +int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb) +{ + unsigned int len = 0; + int ret; + ret = l2tp_xmit_core(session, skb, &len); + if (ret == NET_XMIT_SUCCESS) { + atomic_long_inc(&session->tunnel->stats.tx_packets); + atomic_long_add(len, &session->tunnel->stats.tx_bytes); + atomic_long_inc(&session->stats.tx_packets); + atomic_long_add(len, &session->stats.tx_bytes); + } else { + atomic_long_inc(&session->tunnel->stats.tx_errors); + atomic_long_inc(&session->stats.tx_errors); + } return ret; } EXPORT_SYMBOL_GPL(l2tp_xmit_skb); @@ -1298,163 +1340,101 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); * Tinnel and session create/destroy. *****************************************************************************/ -/* Tunnel socket destruct hook. - * The tunnel context is deleted only when all session sockets have been - * closed. - */ -static void l2tp_tunnel_destruct(struct sock *sk) +/* Remove an l2tp session from l2tp_core's lists. */ +static void l2tp_session_unhash(struct l2tp_session *session) { - struct l2tp_tunnel *tunnel; - struct l2tp_net *pn; - - tunnel = sk->sk_user_data; - if (tunnel == NULL) - goto end; - - l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name); + struct l2tp_tunnel *tunnel = session->tunnel; + if (tunnel) { + struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); + struct l2tp_session *removed = session; + + spin_lock_bh(&tunnel->list_lock); + spin_lock_bh(&pn->l2tp_session_idr_lock); + + /* Remove from the per-tunnel list */ + list_del_init(&session->list); + + /* Remove from per-net IDR */ + if (tunnel->version == L2TP_HDR_VER_3) { + if (hash_hashed(&session->hlist)) + l2tp_session_collision_del(pn, session); + else + removed = idr_remove(&pn->l2tp_v3_session_idr, + session->session_id); + } else { + u32 session_key = l2tp_v2_session_key(tunnel->tunnel_id, + session->session_id); + removed = idr_remove(&pn->l2tp_v2_session_idr, + session_key); + } + WARN_ON_ONCE(removed && removed != session); - /* Disable udp encapsulation */ - switch (tunnel->encap) { - case L2TP_ENCAPTYPE_UDP: - /* No longer an encapsulation socket. See net/ipv4/udp.c */ - (udp_sk(sk))->encap_type = 0; - (udp_sk(sk))->encap_rcv = NULL; - (udp_sk(sk))->encap_destroy = NULL; - break; - case L2TP_ENCAPTYPE_IP: - break; + spin_unlock_bh(&pn->l2tp_session_idr_lock); + spin_unlock_bh(&tunnel->list_lock); } - - /* Remove hooks into tunnel socket */ - sk->sk_destruct = tunnel->old_sk_destruct; - sk->sk_user_data = NULL; - tunnel->sock = NULL; - - /* Remove the tunnel struct from the tunnel list */ - pn = l2tp_pernet(tunnel->l2tp_net); - spin_lock_bh(&pn->l2tp_tunnel_list_lock); - list_del_rcu(&tunnel->list); - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - atomic_dec(&l2tp_tunnel_count); - - l2tp_tunnel_closeall(tunnel); - l2tp_tunnel_dec_refcount(tunnel); - - /* Call the original destructor */ - if (sk->sk_destruct) - (*sk->sk_destruct)(sk); -end: - return; } /* When the tunnel is closed, all the attached sessions need to go too. */ -void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) +static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) { - int hash; - struct hlist_node *walk; - struct hlist_node *tmp; struct l2tp_session *session; - BUG_ON(tunnel == NULL); - - l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing all sessions...\n", - tunnel->name); - - write_lock_bh(&tunnel->hlist_lock); - for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { -again: - hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) { - session = hlist_entry(walk, struct l2tp_session, hlist); - - l2tp_info(session, L2TP_MSG_CONTROL, - "%s: closing session\n", session->name); - - hlist_del_init(&session->hlist); - - if (session->ref != NULL) - (*session->ref)(session); - - write_unlock_bh(&tunnel->hlist_lock); - - __l2tp_session_unhash(session); - l2tp_session_queue_purge(session); - - if (session->session_close != NULL) - (*session->session_close)(session); - - if (session->deref != NULL) - (*session->deref)(session); - - l2tp_session_dec_refcount(session); - - write_lock_bh(&tunnel->hlist_lock); - - /* Now restart from the beginning of this hash - * chain. We always remove a session from the - * list so we are guaranteed to make forward - * progress. - */ - goto again; - } - } - write_unlock_bh(&tunnel->hlist_lock); + spin_lock_bh(&tunnel->list_lock); + tunnel->acpt_newsess = false; + list_for_each_entry(session, &tunnel->session_list, list) + l2tp_session_delete(session); + spin_unlock_bh(&tunnel->list_lock); } -EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall); /* Tunnel socket destroy hook for UDP encapsulation */ static void l2tp_udp_encap_destroy(struct sock *sk) { - struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + struct l2tp_tunnel *tunnel; + + tunnel = l2tp_sk_to_tunnel(sk); if (tunnel) { - l2tp_tunnel_closeall(tunnel); - sock_put(sk); + l2tp_tunnel_delete(tunnel); + l2tp_tunnel_put(tunnel); } } -/* Really kill the tunnel. - * Come here only when all sessions have been cleared from the tunnel. - */ -static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) +static void l2tp_tunnel_remove(struct net *net, struct l2tp_tunnel *tunnel) { - BUG_ON(atomic_read(&tunnel->ref_count) != 0); - BUG_ON(tunnel->sock != NULL); - l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: free...\n", tunnel->name); - kfree_rcu(tunnel, rcu); + struct l2tp_net *pn = l2tp_pernet(net); + + spin_lock_bh(&pn->l2tp_tunnel_idr_lock); + idr_remove(&pn->l2tp_tunnel_idr, tunnel->tunnel_id); + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); } /* Workqueue tunnel deletion function */ static void l2tp_tunnel_del_work(struct work_struct *work) { - struct l2tp_tunnel *tunnel = NULL; - struct socket *sock = NULL; - struct sock *sk = NULL; - - tunnel = container_of(work, struct l2tp_tunnel, del_work); - sk = l2tp_tunnel_sock_lookup(tunnel); - if (!sk) - return; + struct l2tp_tunnel *tunnel = container_of(work, struct l2tp_tunnel, + del_work); + struct sock *sk = tunnel->sock; + struct socket *sock = sk->sk_socket; - sock = sk->sk_socket; + l2tp_tunnel_closeall(tunnel); - /* If the tunnel socket was created by userspace, then go through the - * inet layer to shut the socket down, and let userspace close it. - * Otherwise, if we created the socket directly within the kernel, use + /* If the tunnel socket was created within the kernel, use * the sk API to release it here. - * In either case the tunnel resources are freed in the socket - * destructor when the tunnel socket goes away. */ - if (tunnel->fd >= 0) { - if (sock) - inet_shutdown(sock, 2); - } else { - if (sock) + if (tunnel->fd < 0) { + if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sk); + sock_release(sock); + } } - l2tp_tunnel_sock_put(sk); + l2tp_tunnel_remove(tunnel->l2tp_net, tunnel); + /* drop initial ref */ + l2tp_tunnel_put(tunnel); + + /* drop workqueue ref */ + l2tp_tunnel_put(tunnel); } /* Create a socket for the tunnel, if one isn't set up by @@ -1467,95 +1447,63 @@ static void l2tp_tunnel_del_work(struct work_struct *work) * exit hook. */ static int l2tp_tunnel_sock_create(struct net *net, - u32 tunnel_id, - u32 peer_tunnel_id, - struct l2tp_tunnel_cfg *cfg, - struct socket **sockp) + u32 tunnel_id, + u32 peer_tunnel_id, + struct l2tp_tunnel_cfg *cfg, + struct socket **sockp) { int err = -EINVAL; struct socket *sock = NULL; - struct sockaddr_in udp_addr = {0}; - struct sockaddr_l2tpip ip_addr = {0}; -#if IS_ENABLED(CONFIG_IPV6) - struct sockaddr_in6 udp6_addr = {0}; - struct sockaddr_l2tpip6 ip6_addr = {0}; -#endif + struct udp_port_cfg udp_conf; switch (cfg->encap) { case L2TP_ENCAPTYPE_UDP: + memset(&udp_conf, 0, sizeof(udp_conf)); + #if IS_ENABLED(CONFIG_IPV6) if (cfg->local_ip6 && cfg->peer_ip6) { - err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); - if (err < 0) - goto out; - - sk_change_net(sock->sk, net); - - udp6_addr.sin6_family = AF_INET6; - memcpy(&udp6_addr.sin6_addr, cfg->local_ip6, - sizeof(udp6_addr.sin6_addr)); - udp6_addr.sin6_port = htons(cfg->local_udp_port); - err = kernel_bind(sock, (struct sockaddr *) &udp6_addr, - sizeof(udp6_addr)); - if (err < 0) - goto out; - - udp6_addr.sin6_family = AF_INET6; - memcpy(&udp6_addr.sin6_addr, cfg->peer_ip6, - sizeof(udp6_addr.sin6_addr)); - udp6_addr.sin6_port = htons(cfg->peer_udp_port); - err = kernel_connect(sock, - (struct sockaddr *) &udp6_addr, - sizeof(udp6_addr), 0); - if (err < 0) - goto out; + udp_conf.family = AF_INET6; + memcpy(&udp_conf.local_ip6, cfg->local_ip6, + sizeof(udp_conf.local_ip6)); + memcpy(&udp_conf.peer_ip6, cfg->peer_ip6, + sizeof(udp_conf.peer_ip6)); + udp_conf.use_udp6_tx_checksums = + !cfg->udp6_zero_tx_checksums; + udp_conf.use_udp6_rx_checksums = + !cfg->udp6_zero_rx_checksums; } else #endif { - err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock); - if (err < 0) - goto out; - - sk_change_net(sock->sk, net); - - udp_addr.sin_family = AF_INET; - udp_addr.sin_addr = cfg->local_ip; - udp_addr.sin_port = htons(cfg->local_udp_port); - err = kernel_bind(sock, (struct sockaddr *) &udp_addr, - sizeof(udp_addr)); - if (err < 0) - goto out; - - udp_addr.sin_family = AF_INET; - udp_addr.sin_addr = cfg->peer_ip; - udp_addr.sin_port = htons(cfg->peer_udp_port); - err = kernel_connect(sock, - (struct sockaddr *) &udp_addr, - sizeof(udp_addr), 0); - if (err < 0) - goto out; + udp_conf.family = AF_INET; + udp_conf.local_ip = cfg->local_ip; + udp_conf.peer_ip = cfg->peer_ip; + udp_conf.use_udp_checksums = cfg->use_udp_checksums; } - if (!cfg->use_udp_checksums) - sock->sk->sk_no_check = UDP_CSUM_NOXMIT; + udp_conf.local_udp_port = htons(cfg->local_udp_port); + udp_conf.peer_udp_port = htons(cfg->peer_udp_port); + + err = udp_sock_create(net, &udp_conf, &sock); + if (err < 0) + goto out; break; case L2TP_ENCAPTYPE_IP: #if IS_ENABLED(CONFIG_IPV6) if (cfg->local_ip6 && cfg->peer_ip6) { - err = sock_create_kern(AF_INET6, SOCK_DGRAM, - IPPROTO_L2TP, &sock); + struct sockaddr_l2tpip6 ip6_addr = {0}; + + err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, + IPPROTO_L2TP, &sock); if (err < 0) goto out; - sk_change_net(sock->sk, net); - ip6_addr.l2tp_family = AF_INET6; memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6, sizeof(ip6_addr.l2tp_addr)); ip6_addr.l2tp_conn_id = tunnel_id; - err = kernel_bind(sock, (struct sockaddr *) &ip6_addr, + err = kernel_bind(sock, (struct sockaddr_unsized *)&ip6_addr, sizeof(ip6_addr)); if (err < 0) goto out; @@ -1565,24 +1513,24 @@ static int l2tp_tunnel_sock_create(struct net *net, sizeof(ip6_addr.l2tp_addr)); ip6_addr.l2tp_conn_id = peer_tunnel_id; err = kernel_connect(sock, - (struct sockaddr *) &ip6_addr, + (struct sockaddr_unsized *)&ip6_addr, sizeof(ip6_addr), 0); if (err < 0) goto out; } else #endif { - err = sock_create_kern(AF_INET, SOCK_DGRAM, - IPPROTO_L2TP, &sock); + struct sockaddr_l2tpip ip_addr = {0}; + + err = sock_create_kern(net, AF_INET, SOCK_DGRAM, + IPPROTO_L2TP, &sock); if (err < 0) goto out; - sk_change_net(sock->sk, net); - ip_addr.l2tp_family = AF_INET; ip_addr.l2tp_addr = cfg->local_ip; ip_addr.l2tp_conn_id = tunnel_id; - err = kernel_bind(sock, (struct sockaddr *) &ip_addr, + err = kernel_bind(sock, (struct sockaddr_unsized *)&ip_addr, sizeof(ip_addr)); if (err < 0) goto out; @@ -1590,7 +1538,7 @@ static int l2tp_tunnel_sock_create(struct net *net, ip_addr.l2tp_family = AF_INET; ip_addr.l2tp_addr = cfg->peer_ip; ip_addr.l2tp_conn_id = peer_tunnel_id; - err = kernel_connect(sock, (struct sockaddr *) &ip_addr, + err = kernel_connect(sock, (struct sockaddr_unsized *)&ip_addr, sizeof(ip_addr), 0); if (err < 0) goto out; @@ -1603,87 +1551,27 @@ static int l2tp_tunnel_sock_create(struct net *net, out: *sockp = sock; - if ((err < 0) && sock) { + if (err < 0 && sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); *sockp = NULL; } return err; } -static struct lock_class_key l2tp_socket_class; - -int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp) +int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, + struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp) { struct l2tp_tunnel *tunnel = NULL; int err; - struct socket *sock = NULL; - struct sock *sk = NULL; - struct l2tp_net *pn; enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP; - /* Get the tunnel socket from the fd, which was opened by - * the userspace L2TP daemon. If not specified, create a - * kernel socket. - */ - if (fd < 0) { - err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id, - cfg, &sock); - if (err < 0) - goto err; - } else { - sock = sockfd_lookup(fd, &err); - if (!sock) { - pr_err("tunl %u: sockfd_lookup(fd=%d) returned %d\n", - tunnel_id, fd, err); - err = -EBADF; - goto err; - } - - /* Reject namespace mismatches */ - if (!net_eq(sock_net(sock->sk), net)) { - pr_err("tunl %u: netns mismatch\n", tunnel_id); - err = -EINVAL; - goto err; - } - } - - sk = sock->sk; - - if (cfg != NULL) + if (cfg) encap = cfg->encap; - /* Quick sanity checks */ - switch (encap) { - case L2TP_ENCAPTYPE_UDP: - err = -EPROTONOSUPPORT; - if (sk->sk_protocol != IPPROTO_UDP) { - pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n", - tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP); - goto err; - } - break; - case L2TP_ENCAPTYPE_IP: - err = -EPROTONOSUPPORT; - if (sk->sk_protocol != IPPROTO_L2TP) { - pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n", - tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP); - goto err; - } - break; - } - - /* Check if this socket has already been prepped */ - tunnel = (struct l2tp_tunnel *)sk->sk_user_data; - if (tunnel != NULL) { - /* This socket has already been prepped */ - err = -EBUSY; - goto err; - } - - tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL); - if (tunnel == NULL) { + tunnel = kzalloc(sizeof(*tunnel), GFP_KERNEL); + if (!tunnel) { err = -ENOMEM; goto err; } @@ -1691,180 +1579,205 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 tunnel->version = version; tunnel->tunnel_id = tunnel_id; tunnel->peer_tunnel_id = peer_tunnel_id; - tunnel->debug = L2TP_DEFAULT_DEBUG_FLAGS; - tunnel->magic = L2TP_TUNNEL_MAGIC; sprintf(&tunnel->name[0], "tunl %u", tunnel_id); - rwlock_init(&tunnel->hlist_lock); - - /* The net we belong to */ - tunnel->l2tp_net = net; - pn = l2tp_pernet(net); - - if (cfg != NULL) - tunnel->debug = cfg->debug; + spin_lock_init(&tunnel->list_lock); + tunnel->acpt_newsess = true; + INIT_LIST_HEAD(&tunnel->session_list); - /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; - if (encap == L2TP_ENCAPTYPE_UDP) { - /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ - udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP; - udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; - udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) - udpv6_encap_enable(); - else -#endif - udp_encap_enable(); - } - sk->sk_user_data = tunnel; - - /* Hook on the tunnel socket destructor so that we can cleanup - * if the tunnel socket goes away. - */ - tunnel->old_sk_destruct = sk->sk_destruct; - sk->sk_destruct = &l2tp_tunnel_destruct; - tunnel->sock = sk; + refcount_set(&tunnel->ref_count, 1); tunnel->fd = fd; - lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock"); - - sk->sk_allocation = GFP_ATOMIC; /* Init delete workqueue struct */ INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work); - /* Add tunnel to our list */ - INIT_LIST_HEAD(&tunnel->list); - atomic_inc(&l2tp_tunnel_count); - - /* Bump the reference count. The tunnel context is deleted - * only when this drops to zero. Must be done before list insertion - */ - l2tp_tunnel_inc_refcount(tunnel); - spin_lock_bh(&pn->l2tp_tunnel_list_lock); - list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - err = 0; err: if (tunnelp) *tunnelp = tunnel; - /* If tunnel's socket was created by the kernel, it doesn't - * have a file. - */ - if (sock && sock->file) - sockfd_put(sock); - return err; } EXPORT_SYMBOL_GPL(l2tp_tunnel_create); -/* This function is used by the netlink TUNNEL_DELETE command. - */ -int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) +static int l2tp_validate_socket(const struct sock *sk, const struct net *net, + enum l2tp_encap_type encap) { - l2tp_tunnel_closeall(tunnel); - return (false == queue_work(l2tp_wq, &tunnel->del_work)); + struct l2tp_tunnel *tunnel; + + if (!net_eq(sock_net(sk), net)) + return -EINVAL; + + if (sk->sk_type != SOCK_DGRAM) + return -EPROTONOSUPPORT; + + if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) + return -EPROTONOSUPPORT; + + if ((encap == L2TP_ENCAPTYPE_UDP && sk->sk_protocol != IPPROTO_UDP) || + (encap == L2TP_ENCAPTYPE_IP && sk->sk_protocol != IPPROTO_L2TP)) + return -EPROTONOSUPPORT; + + if (encap == L2TP_ENCAPTYPE_UDP && sk->sk_user_data) + return -EBUSY; + + tunnel = l2tp_sk_to_tunnel(sk); + if (tunnel) { + l2tp_tunnel_put(tunnel); + return -EBUSY; + } + + return 0; } -EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); -/* Really kill the session. - */ -void l2tp_session_free(struct l2tp_session *session) +int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, + struct l2tp_tunnel_cfg *cfg) { - struct l2tp_tunnel *tunnel = session->tunnel; + struct l2tp_net *pn = l2tp_pernet(net); + u32 tunnel_id = tunnel->tunnel_id; + struct socket *sock; + struct sock *sk; + int ret; + + spin_lock_bh(&pn->l2tp_tunnel_idr_lock); + ret = idr_alloc_u32(&pn->l2tp_tunnel_idr, NULL, &tunnel_id, tunnel_id, + GFP_ATOMIC); + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); + if (ret) + return ret == -ENOSPC ? -EEXIST : ret; + + if (tunnel->fd < 0) { + ret = l2tp_tunnel_sock_create(net, tunnel->tunnel_id, + tunnel->peer_tunnel_id, cfg, + &sock); + if (ret < 0) + goto err; + } else { + sock = sockfd_lookup(tunnel->fd, &ret); + if (!sock) + goto err; + } - BUG_ON(atomic_read(&session->ref_count) != 0); + sk = sock->sk; + lock_sock(sk); + write_lock_bh(&sk->sk_callback_lock); + ret = l2tp_validate_socket(sk, net, tunnel->encap); + if (ret < 0) + goto err_inval_sock; + write_unlock_bh(&sk->sk_callback_lock); - if (tunnel) { - BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); - if (session->session_id != 0) - atomic_dec(&l2tp_session_count); - sock_put(tunnel->sock); - session->tunnel = NULL; - l2tp_tunnel_dec_refcount(tunnel); + if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { + struct udp_tunnel_sock_cfg udp_cfg = { + .encap_type = UDP_ENCAP_L2TPINUDP, + .encap_rcv = l2tp_udp_encap_recv, + .encap_err_rcv = l2tp_udp_encap_err_recv, + .encap_destroy = l2tp_udp_encap_destroy, + }; + + setup_udp_tunnel_sock(net, sock, &udp_cfg); } - kfree(session); + sk->sk_allocation = GFP_ATOMIC; + release_sock(sk); - return; + sock_hold(sk); + tunnel->sock = sk; + tunnel->l2tp_net = net; + + spin_lock_bh(&pn->l2tp_tunnel_idr_lock); + idr_replace(&pn->l2tp_tunnel_idr, tunnel, tunnel->tunnel_id); + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); + + trace_register_tunnel(tunnel); + + if (tunnel->fd >= 0) + sockfd_put(sock); + + return 0; + +err_inval_sock: + write_unlock_bh(&sk->sk_callback_lock); + release_sock(sk); + + if (tunnel->fd < 0) + sock_release(sock); + else + sockfd_put(sock); +err: + l2tp_tunnel_remove(net, tunnel); + return ret; } -EXPORT_SYMBOL_GPL(l2tp_session_free); +EXPORT_SYMBOL_GPL(l2tp_tunnel_register); -/* Remove an l2tp session from l2tp_core's hash lists. - * Provides a tidyup interface for pseudowire code which can't just route all - * shutdown via. l2tp_session_delete and a pseudowire-specific session_close - * callback. +/* This function is used by the netlink TUNNEL_DELETE command. */ -void __l2tp_session_unhash(struct l2tp_session *session) +void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { - struct l2tp_tunnel *tunnel = session->tunnel; + if (!test_and_set_bit(0, &tunnel->dead)) { + trace_delete_tunnel(tunnel); + refcount_inc(&tunnel->ref_count); + queue_work(l2tp_wq, &tunnel->del_work); + } +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); - /* Remove the session from core hashes */ - if (tunnel) { - /* Remove from the per-tunnel hash */ - write_lock_bh(&tunnel->hlist_lock); - hlist_del_init(&session->hlist); - write_unlock_bh(&tunnel->hlist_lock); - - /* For L2TPv3 we have a per-net hash: remove from there, too */ - if (tunnel->version != L2TP_HDR_VER_2) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - spin_lock_bh(&pn->l2tp_session_hlist_lock); - hlist_del_init_rcu(&session->global_hlist); - spin_unlock_bh(&pn->l2tp_session_hlist_lock); - synchronize_rcu(); - } +void l2tp_session_delete(struct l2tp_session *session) +{ + if (!test_and_set_bit(0, &session->dead)) { + trace_delete_session(session); + refcount_inc(&session->ref_count); + queue_work(l2tp_wq, &session->del_work); } } -EXPORT_SYMBOL_GPL(__l2tp_session_unhash); +EXPORT_SYMBOL_GPL(l2tp_session_delete); -/* This function is used by the netlink SESSION_DELETE command and by - pseudowire modules. - */ -int l2tp_session_delete(struct l2tp_session *session) +/* Workqueue session deletion function */ +static void l2tp_session_del_work(struct work_struct *work) { - if (session->ref) - (*session->ref)(session); - __l2tp_session_unhash(session); + struct l2tp_session *session = container_of(work, struct l2tp_session, + del_work); + + l2tp_session_unhash(session); l2tp_session_queue_purge(session); - if (session->session_close != NULL) + if (session->session_close) (*session->session_close)(session); - if (session->deref) - (*session->deref)(session); - l2tp_session_dec_refcount(session); - return 0; + + /* drop initial ref */ + l2tp_session_put(session); + + /* drop workqueue ref */ + l2tp_session_put(session); } -EXPORT_SYMBOL_GPL(l2tp_session_delete); /* We come here whenever a session's send_seq, cookie_len or - * l2specific_len parameters are set. + * l2specific_type parameters are set. */ -static void l2tp_session_set_header_len(struct l2tp_session *session, int version) +void l2tp_session_set_header_len(struct l2tp_session *session, int version, + enum l2tp_encap_type encap) { if (version == L2TP_HDR_VER_2) { session->hdr_len = 6; if (session->send_seq) session->hdr_len += 4; } else { - session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset; - if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP) + session->hdr_len = 4 + session->cookie_len; + session->hdr_len += l2tp_get_l2specific_len(session); + if (encap == L2TP_ENCAPTYPE_UDP) session->hdr_len += 4; } - } +EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); -struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) +struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, + u32 peer_session_id, struct l2tp_session_cfg *cfg) { struct l2tp_session *session; - session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL); - if (session != NULL) { + session = kzalloc(sizeof(*session) + priv_size, GFP_KERNEL); + if (session) { session->magic = L2TP_SESSION_MAGIC; - session->tunnel = tunnel; session->session_id = session_id; session->peer_session_id = peer_session_id; @@ -1884,68 +1797,33 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn skb_queue_head_init(&session->reorder_q); + session->hlist_key = l2tp_v3_session_hashkey(tunnel->sock, session->session_id); INIT_HLIST_NODE(&session->hlist); - INIT_HLIST_NODE(&session->global_hlist); - - /* Inherit debug options from tunnel */ - session->debug = tunnel->debug; + INIT_LIST_HEAD(&session->clist); + INIT_LIST_HEAD(&session->list); + INIT_WORK(&session->del_work, l2tp_session_del_work); if (cfg) { session->pwtype = cfg->pw_type; - session->debug = cfg->debug; - session->mtu = cfg->mtu; - session->mru = cfg->mru; session->send_seq = cfg->send_seq; session->recv_seq = cfg->recv_seq; session->lns_mode = cfg->lns_mode; session->reorder_timeout = cfg->reorder_timeout; - session->offset = cfg->offset; session->l2specific_type = cfg->l2specific_type; - session->l2specific_len = cfg->l2specific_len; session->cookie_len = cfg->cookie_len; memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len); session->peer_cookie_len = cfg->peer_cookie_len; memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len); } - if (tunnel->version == L2TP_HDR_VER_2) - session->build_header = l2tp_build_l2tpv2_header; - else - session->build_header = l2tp_build_l2tpv3_header; + l2tp_session_set_header_len(session, tunnel->version, tunnel->encap); - l2tp_session_set_header_len(session, tunnel->version); - - /* Bump the reference count. The session context is deleted - * only when this drops to zero. - */ - l2tp_session_inc_refcount(session); - l2tp_tunnel_inc_refcount(tunnel); - - /* Ensure tunnel socket isn't deleted */ - sock_hold(tunnel->sock); - - /* Add session to the tunnel's hash list */ - write_lock_bh(&tunnel->hlist_lock); - hlist_add_head(&session->hlist, - l2tp_session_id_hash(tunnel, session_id)); - write_unlock_bh(&tunnel->hlist_lock); - - /* And to the global session list if L2TPv3 */ - if (tunnel->version != L2TP_HDR_VER_2) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - - spin_lock_bh(&pn->l2tp_session_hlist_lock); - hlist_add_head_rcu(&session->global_hlist, - l2tp_session_id_hash_2(pn, session_id)); - spin_unlock_bh(&pn->l2tp_session_hlist_lock); - } + refcount_set(&session->ref_count, 1); - /* Ignore management session in session count value */ - if (session->session_id != 0) - atomic_inc(&l2tp_session_count); + return session; } - return session; + return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(l2tp_session_create); @@ -1956,34 +1834,74 @@ EXPORT_SYMBOL_GPL(l2tp_session_create); static __net_init int l2tp_init_net(struct net *net) { struct l2tp_net *pn = net_generic(net, l2tp_net_id); - int hash; - - INIT_LIST_HEAD(&pn->l2tp_tunnel_list); - spin_lock_init(&pn->l2tp_tunnel_list_lock); - for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) - INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]); + idr_init(&pn->l2tp_tunnel_idr); + spin_lock_init(&pn->l2tp_tunnel_idr_lock); - spin_lock_init(&pn->l2tp_session_hlist_lock); + idr_init(&pn->l2tp_v2_session_idr); + idr_init(&pn->l2tp_v3_session_idr); + spin_lock_init(&pn->l2tp_session_idr_lock); return 0; } -static __net_exit void l2tp_exit_net(struct net *net) +static __net_exit void l2tp_pre_exit_net(struct net *net) { struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_tunnel *tunnel = NULL; + unsigned long tunnel_id, tmp; rcu_read_lock_bh(); - list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - (void)l2tp_tunnel_delete(tunnel); + idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { + if (tunnel) + l2tp_tunnel_delete(tunnel); } rcu_read_unlock_bh(); + + if (l2tp_wq) { + /* Run all TUNNEL_DELETE work items just queued. */ + __flush_workqueue(l2tp_wq); + + /* Each TUNNEL_DELETE work item will queue a SESSION_DELETE + * work item for each session in the tunnel. Flush the + * workqueue again to process these. + */ + __flush_workqueue(l2tp_wq); + } +} + +static int l2tp_idr_item_unexpected(int id, void *p, void *data) +{ + const char *idr_name = data; + + pr_err("l2tp: %s IDR not empty at net %d exit\n", idr_name, id); + WARN_ON_ONCE(1); + return 1; +} + +static __net_exit void l2tp_exit_net(struct net *net) +{ + struct l2tp_net *pn = l2tp_pernet(net); + + /* Our per-net IDRs should be empty. Check that is so, to + * help catch cleanup races or refcnt leaks. + */ + idr_for_each(&pn->l2tp_v2_session_idr, l2tp_idr_item_unexpected, + "v2_session"); + idr_for_each(&pn->l2tp_v3_session_idr, l2tp_idr_item_unexpected, + "v3_session"); + idr_for_each(&pn->l2tp_tunnel_idr, l2tp_idr_item_unexpected, + "tunnel"); + + idr_destroy(&pn->l2tp_v2_session_idr); + idr_destroy(&pn->l2tp_v3_session_idr); + idr_destroy(&pn->l2tp_tunnel_idr); } static struct pernet_operations l2tp_net_ops = { .init = l2tp_init_net, .exit = l2tp_exit_net, + .pre_exit = l2tp_pre_exit_net, .id = &l2tp_net_id, .size = sizeof(struct l2tp_net), }; @@ -1996,9 +1914,10 @@ static int __init l2tp_init(void) if (rc) goto out; - l2tp_wq = alloc_workqueue("l2tp", WQ_NON_REENTRANT | WQ_UNBOUND, 0); + l2tp_wq = alloc_workqueue("l2tp", WQ_UNBOUND, 0); if (!l2tp_wq) { pr_err("alloc_workqueue failed\n"); + unregister_pernet_device(&l2tp_net_ops); rc = -ENOMEM; goto out; } @@ -2025,4 +1944,3 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); MODULE_DESCRIPTION("L2TP core"); MODULE_LICENSE("GPL"); MODULE_VERSION(L2TP_DRV_VERSION); - |
