summaryrefslogtreecommitdiff
path: root/net/xfrm/xfrm_policy.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/xfrm/xfrm_policy.c')
-rw-r--r--net/xfrm/xfrm_policy.c1167
1 files changed, 820 insertions, 347 deletions
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 934492bad8e0..62486f866975 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* xfrm_policy.c
*
@@ -27,13 +28,24 @@
#include <linux/cpu.h>
#include <linux/audit.h>
#include <linux/rhashtable.h>
+#include <linux/if_tunnel.h>
+#include <linux/icmp.h>
#include <net/dst.h>
#include <net/flow.h>
+#include <net/inet_ecn.h>
#include <net/xfrm.h>
#include <net/ip.h>
+#include <net/gre.h>
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+#include <net/mip6.h>
+#endif
#ifdef CONFIG_XFRM_STATISTICS
#include <net/snmp.h>
#endif
+#ifdef CONFIG_XFRM_ESPINTCP
+#include <net/espintcp.h>
+#endif
+#include <net/inet_dscp.h>
#include "xfrm_hash.h"
@@ -98,7 +110,11 @@ struct xfrm_pol_inexact_node {
* 4. saddr:any list from saddr tree
*
* This result set then needs to be searched for the policy with
- * the lowest priority. If two results have same prio, youngest one wins.
+ * the lowest priority. If two candidates have the same priority, the
+ * struct xfrm_policy pos member with the lower number is used.
+ *
+ * This replicates previous single-list-search algorithm which would
+ * return first matching policy in the (ordered-by-priority) list.
*/
struct xfrm_pol_inexact_key {
@@ -114,7 +130,7 @@ struct xfrm_pol_inexact_bin {
/* list containing '*:*' policies */
struct hlist_head hhead;
- seqcount_t count;
+ seqcount_spinlock_t count;
/* tree sorted by daddr/prefix */
struct rb_root root_d;
@@ -139,6 +155,21 @@ struct xfrm_pol_inexact_candidates {
struct hlist_head *res[XFRM_POL_CAND_MAX];
};
+struct xfrm_flow_keys {
+ struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_control control;
+ union {
+ struct flow_dissector_key_ipv4_addrs ipv4;
+ struct flow_dissector_key_ipv6_addrs ipv6;
+ } addrs;
+ struct flow_dissector_key_ip ip;
+ struct flow_dissector_key_icmp icmp;
+ struct flow_dissector_key_ports ports;
+ struct flow_dissector_key_keyid gre;
+};
+
+static struct flow_dissector xfrm_session_dissector __ro_after_init;
+
static DEFINE_SPINLOCK(xfrm_if_cb_lock);
static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
@@ -147,7 +178,6 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
__read_mostly;
static struct kmem_cache *xfrm_dst_cache __ro_after_init;
-static __read_mostly seqcount_t xfrm_policy_hash_generation;
static struct rhashtable xfrm_policy_inexact_table;
static const struct rhashtable_params xfrm_pol_inexact_params;
@@ -171,8 +201,6 @@ xfrm_policy_inexact_lookup_rcu(struct net *net,
static struct xfrm_policy *
xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy,
bool excl);
-static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
- struct xfrm_policy *policy);
static bool
xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
@@ -242,10 +270,8 @@ static const struct xfrm_if_cb *xfrm_if_get_cb(void)
return rcu_dereference(xfrm_if_cb);
}
-struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
- const xfrm_address_t *saddr,
- const xfrm_address_t *daddr,
- int family, u32 mark)
+struct dst_entry *__xfrm_dst_lookup(int family,
+ const struct xfrm_dst_lookup_params *params)
{
const struct xfrm_policy_afinfo *afinfo;
struct dst_entry *dst;
@@ -254,7 +280,7 @@ struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
if (unlikely(afinfo == NULL))
return ERR_PTR(-EAFNOSUPPORT);
- dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark);
+ dst = afinfo->dst_lookup(params);
rcu_read_unlock();
@@ -263,11 +289,12 @@ struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
EXPORT_SYMBOL(__xfrm_dst_lookup);
static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
- int tos, int oif,
+ dscp_t dscp, int oif,
xfrm_address_t *prev_saddr,
xfrm_address_t *prev_daddr,
int family, u32 mark)
{
+ struct xfrm_dst_lookup_params params;
struct net *net = xs_net(x);
xfrm_address_t *saddr = &x->props.saddr;
xfrm_address_t *daddr = &x->id.daddr;
@@ -282,7 +309,29 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
daddr = x->coaddr;
}
- dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark);
+ params.net = net;
+ params.saddr = saddr;
+ params.daddr = daddr;
+ params.dscp = dscp;
+ params.oif = oif;
+ params.mark = mark;
+ params.ipproto = x->id.proto;
+ if (x->encap) {
+ switch (x->encap->encap_type) {
+ case UDP_ENCAP_ESPINUDP:
+ params.ipproto = IPPROTO_UDP;
+ params.uli.ports.sport = x->encap->encap_sport;
+ params.uli.ports.dport = x->encap->encap_dport;
+ break;
+ case TCP_ENCAP_ESPINTCP:
+ params.ipproto = IPPROTO_TCP;
+ params.uli.ports.sport = x->encap->encap_sport;
+ params.uli.ports.dport = x->encap->encap_dport;
+ break;
+ }
+ }
+
+ dst = __xfrm_dst_lookup(family, &params);
if (!IS_ERR(dst)) {
if (prev_saddr != saddr)
@@ -304,7 +353,7 @@ static inline unsigned long make_jiffies(long secs)
static void xfrm_policy_timer(struct timer_list *t)
{
- struct xfrm_policy *xp = from_timer(xp, t, timer);
+ struct xfrm_policy *xp = timer_container_of(xp, t, timer);
time64_t now = ktime_get_real_seconds();
time64_t next = TIME64_MAX;
int warn = 0;
@@ -327,7 +376,7 @@ static void xfrm_policy_timer(struct timer_list *t)
}
if (xp->lft.hard_use_expires_seconds) {
time64_t tmo = xp->lft.hard_use_expires_seconds +
- (xp->curlft.use_time ? : xp->curlft.add_time) - now;
+ (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now;
if (tmo <= 0)
goto expired;
if (tmo < next)
@@ -345,7 +394,7 @@ static void xfrm_policy_timer(struct timer_list *t)
}
if (xp->lft.soft_use_expires_seconds) {
time64_t tmo = xp->lft.soft_use_expires_seconds +
- (xp->curlft.use_time ? : xp->curlft.add_time) - now;
+ (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now;
if (tmo <= 0) {
warn = 1;
tmo = XFRM_KM_TIMEOUT;
@@ -385,7 +434,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
if (policy) {
write_pnet(&policy->xp_net, net);
INIT_LIST_HEAD(&policy->walk.all);
- INIT_HLIST_NODE(&policy->bydst_inexact_list);
+ INIT_HLIST_HEAD(&policy->state_cache_list);
INIT_HLIST_NODE(&policy->bydst);
INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock);
@@ -413,9 +462,10 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
{
BUG_ON(!policy->walk.dead);
- if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
+ if (timer_delete(&policy->timer) || timer_delete(&policy->polq.hold_timer))
BUG();
+ xfrm_dev_policy_free(policy);
call_rcu(&policy->rcu, xfrm_policy_destroy_rcu);
}
EXPORT_SYMBOL(xfrm_policy_destroy);
@@ -426,17 +476,31 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
+ struct net *net = xp_net(policy);
+ struct xfrm_state *x;
+
+ xfrm_dev_policy_delete(policy);
+
+ write_lock_bh(&policy->lock);
policy->walk.dead = 1;
+ write_unlock_bh(&policy->lock);
atomic_inc(&policy->genid);
- if (del_timer(&policy->polq.hold_timer))
+ if (timer_delete(&policy->polq.hold_timer))
xfrm_pol_put(policy);
skb_queue_purge(&policy->polq.hold_queue);
- if (del_timer(&policy->timer))
+ if (timer_delete(&policy->timer))
xfrm_pol_put(policy);
+ /* XXX: Flush state cache */
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) {
+ hlist_del_init_rcu(&x->state_cache);
+ }
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
xfrm_pol_put(policy);
}
@@ -524,7 +588,7 @@ redo:
__get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
pol->family, nhashmask, dbits, sbits);
- if (!entry0) {
+ if (!entry0 || pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
hlist_del_rcu(&pol->bydst);
hlist_add_head_rcu(&pol->bydst, ndsttable + h);
h0 = h;
@@ -575,10 +639,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
return;
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- write_seqcount_begin(&xfrm_policy_hash_generation);
-
- odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
- lockdep_is_held(&net->xfrm.xfrm_policy_lock));
+ write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
lockdep_is_held(&net->xfrm.xfrm_policy_lock));
@@ -589,7 +650,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
net->xfrm.policy_bydst[dir].hmask = nhashmask;
- write_seqcount_end(&xfrm_policy_hash_generation);
+ write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
synchronize_rcu();
@@ -597,7 +658,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
}
-static void xfrm_byidx_resize(struct net *net, int total)
+static void xfrm_byidx_resize(struct net *net)
{
unsigned int hmask = net->xfrm.policy_idx_hmask;
unsigned int nhashmask = xfrm_new_hash_mask(hmask);
@@ -675,23 +736,13 @@ static void xfrm_hash_resize(struct work_struct *work)
xfrm_bydst_resize(net, dir);
}
if (xfrm_byidx_should_resize(net, total))
- xfrm_byidx_resize(net, total);
+ xfrm_byidx_resize(net);
mutex_unlock(&hash_resize_mutex);
}
-static void xfrm_hash_reset_inexact_table(struct net *net)
-{
- struct xfrm_pol_inexact_bin *b;
-
- lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
-
- list_for_each_entry(b, &net->xfrm.inexact_bins, inexact_bins)
- INIT_HLIST_HEAD(&b->hhead);
-}
-
/* Make sure *pol can be inserted into fastbin.
- * Useful to check that later insert requests will be sucessful
+ * Useful to check that later insert requests will be successful
* (provided xfrm_policy_lock is held throughout).
*/
static struct xfrm_pol_inexact_bin *
@@ -722,7 +773,7 @@ xfrm_policy_inexact_alloc_bin(const struct xfrm_policy *pol, u8 dir)
INIT_HLIST_HEAD(&bin->hhead);
bin->root_d = RB_ROOT;
bin->root_s = RB_ROOT;
- seqcount_init(&bin->count);
+ seqcount_spinlock_init(&bin->count, &net->xfrm.xfrm_policy_lock);
prev = rhashtable_lookup_get_insert_key(&xfrm_policy_inexact_table,
&bin->k, &bin->head,
@@ -796,15 +847,22 @@ static int xfrm_policy_addr_delta(const xfrm_address_t *a,
const xfrm_address_t *b,
u8 prefixlen, u16 family)
{
+ u32 ma, mb, mask;
unsigned int pdw, pbi;
int delta = 0;
switch (family) {
case AF_INET:
- if (sizeof(long) == 4 && prefixlen == 0)
- return ntohl(a->a4) - ntohl(b->a4);
- return (ntohl(a->a4) & ((~0UL << (32 - prefixlen)))) -
- (ntohl(b->a4) & ((~0UL << (32 - prefixlen))));
+ if (prefixlen == 0)
+ return 0;
+ mask = ~0U << (32 - prefixlen);
+ ma = ntohl(a->a4) & mask;
+ mb = ntohl(b->a4) & mask;
+ if (ma < mb)
+ delta = -1;
+ else if (ma > mb)
+ delta = 1;
+ break;
case AF_INET6:
pdw = prefixlen >> 5;
pbi = prefixlen & 0x1f;
@@ -815,10 +873,13 @@ static int xfrm_policy_addr_delta(const xfrm_address_t *a,
return delta;
}
if (pbi) {
- u32 mask = ~0u << (32 - pbi);
-
- delta = (ntohl(a->a6[pdw]) & mask) -
- (ntohl(b->a6[pdw]) & mask);
+ mask = ~0U << (32 - pbi);
+ ma = ntohl(a->a6[pdw]) & mask;
+ mb = ntohl(b->a6[pdw]) & mask;
+ if (ma < mb)
+ delta = -1;
+ else if (ma > mb)
+ delta = 1;
}
break;
default:
@@ -833,32 +894,35 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net,
u16 family)
{
unsigned int matched_s, matched_d;
- struct hlist_node *newpos = NULL;
struct xfrm_policy *policy, *p;
matched_s = 0;
matched_d = 0;
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+ struct hlist_node *newpos = NULL;
bool matches_s, matches_d;
- if (!policy->bydst_reinsert)
+ if (policy->walk.dead || !policy->bydst_reinsert)
continue;
WARN_ON_ONCE(policy->family != family);
policy->bydst_reinsert = false;
hlist_for_each_entry(p, &n->hhead, bydst) {
- if (policy->priority >= p->priority)
+ if (policy->priority > p->priority)
+ newpos = &p->bydst;
+ else if (policy->priority == p->priority &&
+ policy->pos > p->pos)
newpos = &p->bydst;
else
break;
}
- if (newpos)
- hlist_add_behind(&policy->bydst, newpos);
+ if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
+ hlist_add_behind_rcu(&policy->bydst, newpos);
else
- hlist_add_head(&policy->bydst, &n->hhead);
+ hlist_add_head_rcu(&policy->bydst, &n->hhead);
/* paranoia checks follow.
* Check that the reinserted policy matches at least
@@ -893,12 +957,13 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net,
struct rb_root *new,
u16 family)
{
- struct rb_node **p, *parent = NULL;
struct xfrm_pol_inexact_node *node;
+ struct rb_node **p, *parent;
/* we should not have another subtree here */
WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root));
-
+restart:
+ parent = NULL;
p = &new->rb_node;
while (*p) {
u8 prefixlen;
@@ -916,17 +981,19 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net,
} else if (delta > 0) {
p = &parent->rb_right;
} else {
+ bool same_prefixlen = node->prefixlen == n->prefixlen;
struct xfrm_policy *tmp;
- hlist_for_each_entry(tmp, &node->hhead, bydst)
- tmp->bydst_reinsert = true;
- hlist_for_each_entry(tmp, &n->hhead, bydst)
+ hlist_for_each_entry(tmp, &n->hhead, bydst) {
tmp->bydst_reinsert = true;
+ hlist_del_rcu(&tmp->bydst);
+ }
+
+ node->prefixlen = prefixlen;
- INIT_HLIST_HEAD(&node->hhead);
xfrm_policy_inexact_list_reinsert(net, node, family);
- if (node->prefixlen == n->prefixlen) {
+ if (same_prefixlen) {
kfree_rcu(n, rcu);
return;
}
@@ -934,9 +1001,7 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net,
rb_erase(*p, new);
kfree_rcu(n, rcu);
n = node;
- n->prefixlen = prefixlen;
- *p = new->rb_node;
- parent = NULL;
+ goto restart;
}
}
@@ -965,12 +1030,11 @@ static void xfrm_policy_inexact_node_merge(struct net *net,
family);
}
- hlist_for_each_entry(tmp, &v->hhead, bydst)
- tmp->bydst_reinsert = true;
- hlist_for_each_entry(tmp, &n->hhead, bydst)
+ hlist_for_each_entry(tmp, &v->hhead, bydst) {
tmp->bydst_reinsert = true;
+ hlist_del_rcu(&tmp->bydst);
+ }
- INIT_HLIST_HEAD(&n->hhead);
xfrm_policy_inexact_list_reinsert(net, n, family);
}
@@ -1198,26 +1262,31 @@ xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl)
return ERR_PTR(-EEXIST);
}
- chain = &net->xfrm.policy_inexact[dir];
- xfrm_policy_insert_inexact_list(chain, policy);
-
if (delpol)
__xfrm_policy_inexact_prune_bin(bin, false);
return delpol;
}
+static bool xfrm_policy_is_dead_or_sk(const struct xfrm_policy *policy)
+{
+ int dir;
+
+ if (policy->walk.dead)
+ return true;
+
+ dir = xfrm_policy_id2dir(policy->index);
+ return dir >= XFRM_POLICY_MAX;
+}
+
static void xfrm_hash_rebuild(struct work_struct *work)
{
struct net *net = container_of(work, struct net,
xfrm.policy_hthresh.work);
- unsigned int hmask;
struct xfrm_policy *pol;
struct xfrm_policy *policy;
struct hlist_head *chain;
- struct hlist_head *odst;
struct hlist_node *newpos;
- int i;
int dir;
unsigned seq;
u8 lbits4, rbits4, lbits6, rbits6;
@@ -1235,6 +1304,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
/* make sure that we can insert the indirect policies again before
* we start with destructive action.
@@ -1243,10 +1313,10 @@ static void xfrm_hash_rebuild(struct work_struct *work)
struct xfrm_pol_inexact_bin *bin;
u8 dbits, sbits;
- dir = xfrm_policy_id2dir(policy->index);
- if (policy->walk.dead || dir >= XFRM_POLICY_MAX)
+ if (xfrm_policy_is_dead_or_sk(policy))
continue;
+ dir = xfrm_policy_id2dir(policy->index);
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
if (policy->family == AF_INET) {
dbits = rbits4;
@@ -1277,15 +1347,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
goto out_unlock;
}
- /* reset the bydst and inexact table in all directions */
- xfrm_hash_reset_inexact_table(net);
-
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
- INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
- hmask = net->xfrm.policy_bydst[dir].hmask;
- odst = net->xfrm.policy_bydst[dir].table;
- for (i = hmask; i >= 0; i--)
- INIT_HLIST_HEAD(odst + i);
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
/* dir out => dst = remote, src = local */
net->xfrm.policy_bydst[dir].dbits4 = rbits4;
@@ -1303,16 +1365,16 @@ static void xfrm_hash_rebuild(struct work_struct *work)
/* re-insert all policies by order of creation */
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
- if (policy->walk.dead)
+ if (xfrm_policy_is_dead_or_sk(policy))
continue;
- dir = xfrm_policy_id2dir(policy->index);
- if (dir >= XFRM_POLICY_MAX) {
- /* skip socket policies */
- continue;
- }
+
+ hlist_del_rcu(&policy->bydst);
+
newpos = NULL;
+ dir = xfrm_policy_id2dir(policy->index);
chain = policy_hash_bysel(net, &policy->selector,
policy->family, dir);
+
if (!chain) {
void *p = xfrm_policy_inexact_insert(policy, dir, 0);
@@ -1326,7 +1388,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
else
break;
}
- if (newpos)
+ if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
hlist_add_behind_rcu(&policy->bydst, newpos);
else
hlist_add_head_rcu(&policy->bydst, chain);
@@ -1334,6 +1396,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
out_unlock:
__xfrm_policy_inexact_flush(net);
+ write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
mutex_unlock(&hash_resize_mutex);
@@ -1349,8 +1412,6 @@ EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
* of an absolute inpredictability of ordering of rules. This will not pass. */
static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
{
- static u32 idx_generator;
-
for (;;) {
struct hlist_head *list;
struct xfrm_policy *p;
@@ -1358,8 +1419,8 @@ static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
int found;
if (!index) {
- idx = (idx_generator | dir);
- idx_generator += 8;
+ idx = (net->xfrm.idx_generator | dir);
+ net->xfrm.idx_generator += 8;
} else {
idx = index;
index = 0;
@@ -1408,7 +1469,7 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
spin_lock_bh(&pq->hold_queue.lock);
skb_queue_splice_init(&pq->hold_queue, &list);
- if (del_timer(&pq->hold_timer))
+ if (timer_delete(&pq->hold_timer))
xfrm_pol_put(old);
spin_unlock_bh(&pq->hold_queue.lock);
@@ -1422,19 +1483,10 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
spin_unlock_bh(&pq->hold_queue.lock);
}
-static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
- struct xfrm_policy *pol)
+static inline bool xfrm_policy_mark_match(const struct xfrm_mark *mark,
+ struct xfrm_policy *pol)
{
- u32 mark = policy->mark.v & policy->mark.m;
-
- if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
- return true;
-
- if ((mark & pol->mark.m) == pol->mark.v &&
- policy->priority == pol->priority)
- return true;
-
- return false;
+ return mark->v == pol->mark.v && mark->m == pol->mark.m;
}
static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed)
@@ -1486,42 +1538,6 @@ static const struct rhashtable_params xfrm_pol_inexact_params = {
.automatic_shrinking = true,
};
-static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
- struct xfrm_policy *policy)
-{
- struct xfrm_policy *pol, *delpol = NULL;
- struct hlist_node *newpos = NULL;
- int i = 0;
-
- hlist_for_each_entry(pol, chain, bydst_inexact_list) {
- if (pol->type == policy->type &&
- pol->if_id == policy->if_id &&
- !selector_cmp(&pol->selector, &policy->selector) &&
- xfrm_policy_mark_match(policy, pol) &&
- xfrm_sec_ctx_match(pol->security, policy->security) &&
- !WARN_ON(delpol)) {
- delpol = pol;
- if (policy->priority > pol->priority)
- continue;
- } else if (policy->priority >= pol->priority) {
- newpos = &pol->bydst_inexact_list;
- continue;
- }
- if (delpol)
- break;
- }
-
- if (newpos)
- hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos);
- else
- hlist_add_head_rcu(&policy->bydst_inexact_list, chain);
-
- hlist_for_each_entry(pol, chain, bydst_inexact_list) {
- pol->pos = i;
- i++;
- }
-}
-
static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
struct xfrm_policy *policy,
bool excl)
@@ -1532,7 +1548,7 @@ static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
if (pol->type == policy->type &&
pol->if_id == policy->if_id &&
!selector_cmp(&pol->selector, &policy->selector) &&
- xfrm_policy_mark_match(policy, pol) &&
+ xfrm_policy_mark_match(&policy->mark, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) {
if (excl)
@@ -1548,9 +1564,12 @@ static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
break;
}
- if (newpos)
+ if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
hlist_add_behind_rcu(&policy->bydst, &newpos->bydst);
else
+ /* Packet offload policies enter to the head
+ * to speed-up lookups.
+ */
hlist_add_head_rcu(&policy->bydst, chain);
return delpol;
@@ -1562,6 +1581,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
struct xfrm_policy *delpol;
struct hlist_head *chain;
+ /* Sanitize mark before store */
+ policy->mark.v &= policy->mark.m;
+
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
if (chain)
@@ -1604,9 +1626,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
EXPORT_SYMBOL(xfrm_policy_insert);
static struct xfrm_policy *
-__xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
- u8 type, int dir,
- struct xfrm_selector *sel,
+__xfrm_policy_bysel_ctx(struct hlist_head *chain, const struct xfrm_mark *mark,
+ u32 if_id, u8 type, int dir, struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx)
{
struct xfrm_policy *pol;
@@ -1617,7 +1638,7 @@ __xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
hlist_for_each_entry(pol, chain, bydst) {
if (pol->type == type &&
pol->if_id == if_id &&
- (mark & pol->mark.m) == pol->mark.v &&
+ xfrm_policy_mark_match(mark, pol) &&
!selector_cmp(sel, &pol->selector) &&
xfrm_sec_ctx_match(ctx, pol->security))
return pol;
@@ -1626,11 +1647,10 @@ __xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
return NULL;
}
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
- u8 type, int dir,
- struct xfrm_selector *sel,
- struct xfrm_sec_ctx *ctx, int delete,
- int *err)
+struct xfrm_policy *
+xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id,
+ u8 type, int dir, struct xfrm_selector *sel,
+ struct xfrm_sec_ctx *ctx, int delete, int *err)
{
struct xfrm_pol_inexact_bin *bin = NULL;
struct xfrm_policy *pol, *ret = NULL;
@@ -1697,9 +1717,9 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
}
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
- u8 type, int dir, u32 id, int delete,
- int *err)
+struct xfrm_policy *
+xfrm_policy_byid(struct net *net, const struct xfrm_mark *mark, u32 if_id,
+ u8 type, int dir, u32 id, int delete, int *err)
{
struct xfrm_policy *pol, *ret;
struct hlist_head *chain;
@@ -1714,8 +1734,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
ret = NULL;
hlist_for_each_entry(pol, chain, byidx) {
if (pol->type == type && pol->index == id &&
- pol->if_id == if_id &&
- (mark & pol->mark.m) == pol->mark.v) {
+ pol->if_id == if_id && xfrm_policy_mark_match(mark, pol)) {
xfrm_pol_hold(pol);
if (delete) {
*err = security_xfrm_policy_delete(
@@ -1759,12 +1778,41 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
}
return err;
}
+
+static inline int xfrm_dev_policy_flush_secctx_check(struct net *net,
+ struct net_device *dev,
+ bool task_valid)
+{
+ struct xfrm_policy *pol;
+ int err = 0;
+
+ list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead ||
+ xfrm_policy_id2dir(pol->index) >= XFRM_POLICY_MAX ||
+ pol->xdo.dev != dev)
+ continue;
+
+ err = security_xfrm_policy_delete(pol->security);
+ if (err) {
+ xfrm_audit_policy_delete(pol, 0, task_valid);
+ return err;
+ }
+ }
+ return err;
+}
#else
static inline int
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
{
return 0;
}
+
+static inline int xfrm_dev_policy_flush_secctx_check(struct net *net,
+ struct net_device *dev,
+ bool task_valid)
+{
+ return 0;
+}
#endif
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
@@ -1780,9 +1828,11 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
again:
list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(pol->index);
- if (pol->walk.dead ||
- dir >= XFRM_POLICY_MAX ||
+ if (dir >= XFRM_POLICY_MAX ||
pol->type != type)
continue;
@@ -1804,6 +1854,46 @@ out:
}
EXPORT_SYMBOL(xfrm_policy_flush);
+int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
+ bool task_valid)
+{
+ int dir, err = 0, cnt = 0;
+ struct xfrm_policy *pol;
+
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+
+ err = xfrm_dev_policy_flush_secctx_check(net, dev, task_valid);
+ if (err)
+ goto out;
+
+again:
+ list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead)
+ continue;
+
+ dir = xfrm_policy_id2dir(pol->index);
+ if (dir >= XFRM_POLICY_MAX ||
+ pol->xdo.dev != dev)
+ continue;
+
+ __xfrm_policy_unlink(pol, dir);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ cnt++;
+ xfrm_audit_policy_delete(pol, 1, task_valid);
+ xfrm_policy_kill(pol);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ goto again;
+ }
+ if (cnt)
+ __xfrm_policy_inexact_flush(net);
+ else
+ err = -ESRCH;
+out:
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ return err;
+}
+EXPORT_SYMBOL(xfrm_dev_policy_flush);
+
int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
int (*func)(struct xfrm_policy *, int, int, void*),
void *data)
@@ -1879,7 +1969,7 @@ EXPORT_SYMBOL(xfrm_policy_walk_done);
*/
static int xfrm_policy_match(const struct xfrm_policy *pol,
const struct flowi *fl,
- u8 type, u16 family, int dir, u32 if_id)
+ u8 type, u16 family, u32 if_id)
{
const struct xfrm_selector *sel = &pol->selector;
int ret = -ESRCH;
@@ -1893,14 +1983,13 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
match = xfrm_selector_match(sel, fl, family);
if (match)
- ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
- dir);
+ ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid);
return ret;
}
static struct xfrm_pol_inexact_node *
xfrm_policy_lookup_inexact_addr(const struct rb_root *r,
- seqcount_t *count,
+ seqcount_spinlock_t *count,
const xfrm_address_t *addr, u16 family)
{
const struct rb_node *parent;
@@ -2005,7 +2094,7 @@ static struct xfrm_policy *
__xfrm_policy_eval_candidates(struct hlist_head *chain,
struct xfrm_policy *prefer,
const struct flowi *fl,
- u8 type, u16 family, int dir, u32 if_id)
+ u8 type, u16 family, u32 if_id)
{
u32 priority = prefer ? prefer->priority : ~0u;
struct xfrm_policy *pol;
@@ -2019,7 +2108,7 @@ __xfrm_policy_eval_candidates(struct hlist_head *chain,
if (pol->priority > priority)
break;
- err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
+ err = xfrm_policy_match(pol, fl, type, family, if_id);
if (err) {
if (err != -ESRCH)
return ERR_PTR(err);
@@ -2044,7 +2133,7 @@ static struct xfrm_policy *
xfrm_policy_eval_candidates(struct xfrm_pol_inexact_candidates *cand,
struct xfrm_policy *prefer,
const struct flowi *fl,
- u8 type, u16 family, int dir, u32 if_id)
+ u8 type, u16 family, u32 if_id)
{
struct xfrm_policy *tmp;
int i;
@@ -2052,8 +2141,7 @@ xfrm_policy_eval_candidates(struct xfrm_pol_inexact_candidates *cand,
for (i = 0; i < ARRAY_SIZE(cand->res); i++) {
tmp = __xfrm_policy_eval_candidates(cand->res[i],
prefer,
- fl, type, family, dir,
- if_id);
+ fl, type, family, if_id);
if (!tmp)
continue;
@@ -2086,13 +2174,13 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
rcu_read_lock();
retry:
do {
- sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
+ sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
chain = policy_hash_direct(net, daddr, saddr, family, dir);
- } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence));
+ } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence));
ret = NULL;
hlist_for_each_entry_rcu(pol, chain, bydst) {
- err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
+ err = xfrm_policy_match(pol, fl, type, family, if_id);
if (err) {
if (err == -ESRCH)
continue;
@@ -2105,13 +2193,16 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
break;
}
}
+ if (ret && ret->xdo.type == XFRM_DEV_OFFLOAD_PACKET)
+ goto skip_inexact;
+
bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
if (!bin || !xfrm_policy_find_inexact_candidates(&cand, bin, saddr,
daddr))
goto skip_inexact;
pol = xfrm_policy_eval_candidates(&cand, ret, fl, type,
- family, dir, if_id);
+ family, if_id);
if (pol) {
ret = pol;
if (IS_ERR(pol))
@@ -2119,7 +2210,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
}
skip_inexact:
- if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
+ if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence))
goto retry;
if (ret && !xfrm_pol_hold_rcu(ret))
@@ -2166,14 +2257,13 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
match = xfrm_selector_match(&pol->selector, fl, family);
if (match) {
- if ((sk->sk_mark & pol->mark.m) != pol->mark.v ||
+ if ((READ_ONCE(sk->sk_mark) & pol->mark.m) != pol->mark.v ||
pol->if_id != if_id) {
pol = NULL;
goto out;
}
err = security_xfrm_policy_lookup(pol->security,
- fl->flowi_secid,
- dir);
+ fl->flowi_secid);
if (!err) {
if (!xfrm_pol_hold_rcu(pol))
goto again;
@@ -2190,10 +2280,52 @@ out:
return pol;
}
+static u32 xfrm_gen_pos_slow(struct net *net)
+{
+ struct xfrm_policy *policy;
+ u32 i = 0;
+
+ /* oldest entry is last in list */
+ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+ if (!xfrm_policy_is_dead_or_sk(policy))
+ policy->pos = ++i;
+ }
+
+ return i;
+}
+
+static u32 xfrm_gen_pos(struct net *net)
+{
+ const struct xfrm_policy *policy;
+ u32 i = 0;
+
+ /* most recently added policy is at the head of the list */
+ list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) {
+ if (xfrm_policy_is_dead_or_sk(policy))
+ continue;
+
+ if (policy->pos == UINT_MAX)
+ return xfrm_gen_pos_slow(net);
+
+ i = policy->pos + 1;
+ break;
+ }
+
+ return i;
+}
+
static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
{
struct net *net = xp_net(pol);
+ switch (dir) {
+ case XFRM_POLICY_IN:
+ case XFRM_POLICY_FWD:
+ case XFRM_POLICY_OUT:
+ pol->pos = xfrm_gen_pos(net);
+ break;
+ }
+
list_add(&pol->walk.all, &net->xfrm.policy_all);
net->xfrm.policy_count[dir]++;
xfrm_pol_hold(pol);
@@ -2210,7 +2342,6 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
/* Socket policies are not hashed. */
if (!hlist_unhashed(&pol->bydst)) {
hlist_del_rcu(&pol->bydst);
- hlist_del_init(&pol->bydst_inexact_list);
hlist_del(&pol->byidx);
}
@@ -2336,15 +2467,15 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
}
static int
-xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
- xfrm_address_t *remote, unsigned short family, u32 mark)
+xfrm_get_saddr(unsigned short family, xfrm_address_t *saddr,
+ const struct xfrm_dst_lookup_params *params)
{
int err;
const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
if (unlikely(afinfo == NULL))
return -EINVAL;
- err = afinfo->get_saddr(net, oif, local, remote, mark);
+ err = afinfo->get_saddr(saddr, params);
rcu_read_unlock();
return err;
}
@@ -2369,13 +2500,19 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
if (tmpl->mode == XFRM_MODE_TUNNEL ||
+ tmpl->mode == XFRM_MODE_IPTFS ||
tmpl->mode == XFRM_MODE_BEET) {
remote = &tmpl->id.daddr;
local = &tmpl->saddr;
if (xfrm_addr_any(local, tmpl->encap_family)) {
- error = xfrm_get_saddr(net, fl->flowi_oif,
- &tmp, remote,
- tmpl->encap_family, 0);
+ struct xfrm_dst_lookup_params params;
+
+ memset(&params, 0, sizeof(params));
+ params.net = net;
+ params.oif = fl->flowi_oif;
+ params.daddr = remote;
+ error = xfrm_get_saddr(tmpl->encap_family, &tmp,
+ &params);
if (error)
goto fail;
local = &tmp;
@@ -2384,6 +2521,12 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
x = xfrm_state_find(remote, local, fl, tmpl, policy, &error,
family, policy->if_id);
+ if (x && x->dir && x->dir != XFRM_SA_DIR_OUT) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEDIRERROR);
+ xfrm_state_put(x);
+ error = -EINVAL;
+ goto fail;
+ }
if (x && x->km.state == XFRM_STATE_VALID) {
xfrm[nx++] = x;
@@ -2448,20 +2591,12 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
}
-static int xfrm_get_tos(const struct flowi *fl, int family)
+static dscp_t xfrm_get_dscp(const struct flowi *fl, int family)
{
- const struct xfrm_policy_afinfo *afinfo;
- int tos;
-
- afinfo = xfrm_policy_get_afinfo(family);
- if (!afinfo)
- return 0;
+ if (family == AF_INET)
+ return fl->u.ip4.flowi4_dscp;
- tos = afinfo->get_tos(fl);
-
- rcu_read_unlock();
-
- return tos;
+ return 0;
}
static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
@@ -2485,12 +2620,10 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
default:
BUG();
}
- xdst = dst_alloc(dst_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
+ xdst = dst_alloc(dst_ops, NULL, DST_OBSOLETE_NONE, 0);
if (likely(xdst)) {
- struct dst_entry *dst = &xdst->u.dst;
-
- memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
+ memset_after(xdst, 0, u.dst);
} else
xdst = ERR_PTR(-ENOBUFS);
@@ -2499,21 +2632,13 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
return xdst;
}
-static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
- int nfheader_len)
+static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
+ int nfheader_len)
{
- const struct xfrm_policy_afinfo *afinfo =
- xfrm_policy_get_afinfo(dst->ops->family);
- int err;
-
- if (!afinfo)
- return -EINVAL;
-
- err = afinfo->init_path(path, dst, nfheader_len);
-
- rcu_read_unlock();
-
- return err;
+ if (dst->ops->family == AF_INET6) {
+ path->path_cookie = rt6_get_cookie(dst_rt6_info(dst));
+ path->u.rt6.rt6i_nfheader_len = nfheader_len;
+ }
}
static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
@@ -2545,10 +2670,11 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
const struct flowi *fl,
struct dst_entry *dst)
{
+ const struct xfrm_state_afinfo *afinfo;
+ const struct xfrm_mode *inner_mode;
struct net *net = xp_net(policy);
unsigned long now = jiffies;
struct net_device *dev;
- struct xfrm_mode *inner_mode;
struct xfrm_dst *xdst_prev = NULL;
struct xfrm_dst *xdst0 = NULL;
int i = 0;
@@ -2556,13 +2682,13 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
int header_len = 0;
int nfheader_len = 0;
int trailer_len = 0;
- int tos;
int family = policy->selector.family;
xfrm_address_t saddr, daddr;
+ dscp_t dscp;
xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
- tos = xfrm_get_tos(fl, family);
+ dscp = xfrm_get_dscp(fl, family);
dst_hold(dst);
@@ -2594,17 +2720,24 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
goto put_states;
}
} else
- inner_mode = xfrm[i]->inner_mode;
+ inner_mode = &xfrm[i]->inner_mode;
xdst->route = dst;
dst_copy_metrics(dst1, dst);
if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
- __u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
+ __u32 mark = 0;
+ int oif;
+
+ if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)
+ mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
+
+ if (xfrm[i]->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+ family = xfrm[i]->props.family;
- family = xfrm[i]->props.family;
- dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
- &saddr, &daddr, family, mark);
+ oif = fl->flowi_oif ? : fl->flowi_l3mdev;
+ dst = xfrm_dst_lookup(xfrm[i], dscp, oif, &saddr,
+ &daddr, family, mark);
err = PTR_ERR(dst);
if (IS_ERR(dst))
goto put_states;
@@ -2615,11 +2748,21 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
xdst->xfrm_genid = xfrm[i]->genid;
dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
- dst1->flags |= DST_HOST;
dst1->lastuse = now;
dst1->input = dst_discard;
- dst1->output = inner_mode->afinfo->output;
+
+ if (xfrm[i]->mode_cbs && xfrm[i]->mode_cbs->output) {
+ dst1->output = xfrm[i]->mode_cbs->output;
+ } else {
+ rcu_read_lock();
+ afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
+ if (likely(afinfo))
+ dst1->output = afinfo->output;
+ else
+ dst1->output = dst_discard_out;
+ rcu_read_unlock();
+ }
xdst_prev = xdst;
@@ -2675,13 +2818,15 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
*num_xfrms = 0;
return 0;
}
- if (IS_ERR(pols[0]))
+ if (IS_ERR(pols[0])) {
+ *num_pols = 0;
return PTR_ERR(pols[0]);
+ }
*num_xfrms = pols[0]->xfrm_nr;
#ifdef CONFIG_XFRM_SUB_POLICY
- if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
+ if (pols[0]->action == XFRM_POLICY_ALLOW &&
pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
XFRM_POLICY_TYPE_MAIN,
@@ -2691,6 +2836,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
if (pols[1]) {
if (IS_ERR(pols[1])) {
xfrm_pols_put(pols, *num_pols);
+ *num_pols = 0;
return PTR_ERR(pols[1]);
}
(*num_pols)++;
@@ -2752,11 +2898,12 @@ static void xfrm_policy_queue_process(struct timer_list *t)
struct sk_buff *skb;
struct sock *sk;
struct dst_entry *dst;
- struct xfrm_policy *pol = from_timer(pol, t, polq.hold_timer);
+ struct xfrm_policy *pol = timer_container_of(pol, t, polq.hold_timer);
struct net *net = xp_net(pol);
struct xfrm_policy_queue *pq = &pol->polq;
struct flowi fl;
struct sk_buff_head list;
+ __u32 skb_mark;
spin_lock(&pq->hold_queue.lock);
skb = skb_peek(&pq->hold_queue);
@@ -2766,7 +2913,12 @@ static void xfrm_policy_queue_process(struct timer_list *t)
}
dst = skb_dst(skb);
sk = skb->sk;
- xfrm_decode_session(skb, &fl, dst->ops->family);
+
+ /* Fixup the mark to support VTI. */
+ skb_mark = skb->mark;
+ skb->mark = pol->mark.v;
+ xfrm_decode_session(net, skb, &fl, dst->ops->family);
+ skb->mark = skb_mark;
spin_unlock(&pq->hold_queue.lock);
dst_hold(xfrm_dst_path(dst));
@@ -2798,7 +2950,12 @@ static void xfrm_policy_queue_process(struct timer_list *t)
while (!skb_queue_empty(&list)) {
skb = __skb_dequeue(&list);
- xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
+ /* Fixup the mark to support VTI. */
+ skb_mark = skb->mark;
+ skb->mark = pol->mark.v;
+ xfrm_decode_session(net, skb, &fl, skb_dst(skb)->ops->family);
+ skb->mark = skb_mark;
+
dst_hold(xfrm_dst_path(skb_dst(skb)));
dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0);
if (IS_ERR(dst)) {
@@ -2806,11 +2963,11 @@ static void xfrm_policy_queue_process(struct timer_list *t)
continue;
}
- nf_reset(skb);
+ nf_reset_ct(skb);
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- dst_output(net, skb->sk, skb);
+ dst_output(net, skb_to_full_sk(skb), skb);
}
out:
@@ -2850,7 +3007,7 @@ static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *s
sched_next = jiffies + pq->timeout;
- if (del_timer(&pq->hold_timer)) {
+ if (timer_delete(&pq->hold_timer)) {
if (time_before(pq->hold_timer.expires, sched_next))
sched_next = pq->hold_timer.expires;
xfrm_pol_put(pol);
@@ -2894,7 +3051,7 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
dst_copy_metrics(dst1, dst);
dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
- dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
+ dst1->flags |= DST_XFRM_QUEUE;
dst1->lastuse = jiffies;
dst1->input = dst_discard;
@@ -3073,8 +3230,8 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
xflo.flags = flags;
/* To accelerate a bit... */
- if ((dst_orig->flags & DST_NOXFRM) ||
- !net->xfrm.policy_count[XFRM_POLICY_OUT])
+ if (!if_id && ((dst_orig->flags & DST_NOXFRM) ||
+ !net->xfrm.policy_count[XFRM_POLICY_OUT]))
goto nopol;
xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id);
@@ -3122,7 +3279,7 @@ no_transform:
}
for (i = 0; i < num_pols; i++)
- pols[i]->curlft.use_time = ktime_get_real_seconds();
+ WRITE_ONCE(pols[i]->curlft.use_time, ktime_get_real_seconds());
if (num_xfrms < 0) {
/* Prohibit the flow */
@@ -3137,14 +3294,21 @@ no_transform:
dst_release(dst);
dst = dst_orig;
}
+
ok:
xfrm_pols_put(pols, drop_pols);
- if (dst && dst->xfrm &&
- dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
+ if (dst->xfrm &&
+ (dst->xfrm->props.mode == XFRM_MODE_TUNNEL ||
+ dst->xfrm->props.mode == XFRM_MODE_IPTFS))
dst->flags |= DST_XFRM_TUNNEL;
return dst;
nopol:
+ if ((!dst_orig->dev || !(dst_orig->dev->flags & IFF_LOOPBACK)) &&
+ net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
+ err = -EPERM;
+ goto error;
+ }
if (!(flags & XFRM_LOOKUP_ICMP)) {
dst = dst_orig;
goto ok;
@@ -3184,7 +3348,7 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
flags | XFRM_LOOKUP_QUEUE |
XFRM_LOOKUP_KEEP_DST_REF);
- if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
+ if (PTR_ERR(dst) == -EREMOTE)
return make_blackhole(net, dst_orig->ops->family, dst_orig);
if (IS_ERR(dst))
@@ -3216,7 +3380,7 @@ xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
static inline int
xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
- unsigned short family)
+ unsigned short family, u32 if_id)
{
if (xfrm_state_kern(x))
return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
@@ -3227,19 +3391,20 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
(tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
!(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
!(x->props.mode != XFRM_MODE_TRANSPORT &&
- xfrm_state_addr_cmp(tmpl, x, family));
+ xfrm_state_addr_cmp(tmpl, x, family)) &&
+ (if_id == 0 || if_id == x->if_id);
}
/*
* 0 or more than 0 is returned when validation is succeeded (either bypass
- * because of optional transport mode, or next index of the mathced secpath
+ * because of optional transport mode, or next index of the matched secpath
* state with the template.
* -1 is returned when no matching template is found.
* Otherwise "-2 - errored_index" is returned.
*/
static inline int
xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
- unsigned short family)
+ unsigned short family, u32 if_id)
{
int idx = start;
@@ -3249,9 +3414,16 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star
} else
start = -1;
for (; idx < sp->len; idx++) {
- if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
+ if (xfrm_state_ok(tmpl, sp->xvec[idx], family, if_id))
return ++idx;
if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
+ if (idx < sp->verified_cnt) {
+ /* Secpath entry previously verified, consider optional and
+ * continue searching
+ */
+ continue;
+ }
+
if (start == -1)
start = -2-idx;
break;
@@ -3260,20 +3432,108 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star
return start;
}
-int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
+static void
+decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse)
+{
+ struct flowi4 *fl4 = &fl->u.ip4;
+
+ memset(fl4, 0, sizeof(struct flowi4));
+
+ if (reverse) {
+ fl4->saddr = flkeys->addrs.ipv4.dst;
+ fl4->daddr = flkeys->addrs.ipv4.src;
+ fl4->fl4_sport = flkeys->ports.dst;
+ fl4->fl4_dport = flkeys->ports.src;
+ } else {
+ fl4->saddr = flkeys->addrs.ipv4.src;
+ fl4->daddr = flkeys->addrs.ipv4.dst;
+ fl4->fl4_sport = flkeys->ports.src;
+ fl4->fl4_dport = flkeys->ports.dst;
+ }
+
+ switch (flkeys->basic.ip_proto) {
+ case IPPROTO_GRE:
+ fl4->fl4_gre_key = flkeys->gre.keyid;
+ break;
+ case IPPROTO_ICMP:
+ fl4->fl4_icmp_type = flkeys->icmp.type;
+ fl4->fl4_icmp_code = flkeys->icmp.code;
+ break;
+ }
+
+ fl4->flowi4_proto = flkeys->basic.ip_proto;
+ fl4->flowi4_dscp = inet_dsfield_to_dscp(flkeys->ip.tos);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void
+decode_session6(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse)
+{
+ struct flowi6 *fl6 = &fl->u.ip6;
+
+ memset(fl6, 0, sizeof(struct flowi6));
+
+ if (reverse) {
+ fl6->saddr = flkeys->addrs.ipv6.dst;
+ fl6->daddr = flkeys->addrs.ipv6.src;
+ fl6->fl6_sport = flkeys->ports.dst;
+ fl6->fl6_dport = flkeys->ports.src;
+ } else {
+ fl6->saddr = flkeys->addrs.ipv6.src;
+ fl6->daddr = flkeys->addrs.ipv6.dst;
+ fl6->fl6_sport = flkeys->ports.src;
+ fl6->fl6_dport = flkeys->ports.dst;
+ }
+
+ switch (flkeys->basic.ip_proto) {
+ case IPPROTO_GRE:
+ fl6->fl6_gre_key = flkeys->gre.keyid;
+ break;
+ case IPPROTO_ICMPV6:
+ fl6->fl6_icmp_type = flkeys->icmp.type;
+ fl6->fl6_icmp_code = flkeys->icmp.code;
+ break;
+ }
+
+ fl6->flowi6_proto = flkeys->basic.ip_proto;
+}
+#endif
+
+int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family, int reverse)
{
- const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
- int err;
+ struct xfrm_flow_keys flkeys;
- if (unlikely(afinfo == NULL))
+ memset(&flkeys, 0, sizeof(flkeys));
+ __skb_flow_dissect(net, skb, &xfrm_session_dissector, &flkeys,
+ NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
+
+ switch (family) {
+ case AF_INET:
+ decode_session4(&flkeys, fl, reverse);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ decode_session6(&flkeys, fl, reverse);
+ break;
+#endif
+ default:
return -EAFNOSUPPORT;
+ }
- afinfo->decode_session(skb, fl, reverse);
+ fl->flowi_mark = skb->mark;
+ if (reverse) {
+ fl->flowi_oif = skb->skb_iif;
+ } else {
+ int oif = 0;
- err = security_xfrm_decode_session(skb, &fl->flowi_secid);
- rcu_read_unlock();
- return err;
+ if (skb_dst(skb) && skb_dst(skb)->dev)
+ oif = skb_dst(skb)->dev->ifindex;
+
+ fl->flowi_oif = oif;
+ }
+
+ return security_xfrm_decode_session(skb, &fl->flowi_secid);
}
EXPORT_SYMBOL(__xfrm_decode_session);
@@ -3289,6 +3549,130 @@ static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int
return 0;
}
+static bool icmp_err_packet(const struct flowi *fl, unsigned short family)
+{
+ const struct flowi4 *fl4 = &fl->u.ip4;
+
+ if (family == AF_INET &&
+ fl4->flowi4_proto == IPPROTO_ICMP &&
+ (fl4->fl4_icmp_type == ICMP_DEST_UNREACH ||
+ fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED))
+ return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6) {
+ const struct flowi6 *fl6 = &fl->u.ip6;
+
+ if (fl6->flowi6_proto == IPPROTO_ICMPV6 &&
+ (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH ||
+ fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG ||
+ fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED))
+ return true;
+ }
+#endif
+ return false;
+}
+
+static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family,
+ const struct flowi *fl, struct flowi *fl1)
+{
+ bool ret = true;
+ struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+ int hl = family == AF_INET ? (sizeof(struct iphdr) + sizeof(struct icmphdr)) :
+ (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr));
+
+ if (!newskb)
+ return true;
+
+ if (!pskb_pull(newskb, hl))
+ goto out;
+
+ skb_reset_network_header(newskb);
+
+ if (xfrm_decode_session_reverse(dev_net(skb->dev), newskb, fl1, family) < 0)
+ goto out;
+
+ fl1->flowi_oif = fl->flowi_oif;
+ fl1->flowi_mark = fl->flowi_mark;
+ fl1->flowi_dscp = fl->flowi_dscp;
+ nf_nat_decode_session(newskb, fl1, family);
+ ret = false;
+
+out:
+ consume_skb(newskb);
+ return ret;
+}
+
+static bool xfrm_selector_inner_icmp_match(struct sk_buff *skb, unsigned short family,
+ const struct xfrm_selector *sel,
+ const struct flowi *fl)
+{
+ bool ret = false;
+
+ if (icmp_err_packet(fl, family)) {
+ struct flowi fl1;
+
+ if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+ return ret;
+
+ ret = xfrm_selector_match(sel, &fl1, family);
+ }
+
+ return ret;
+}
+
+static inline struct
+xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb,
+ const struct flowi *fl, unsigned short family,
+ u32 if_id)
+{
+ struct xfrm_policy *pol = NULL;
+
+ if (icmp_err_packet(fl, family)) {
+ struct flowi fl1;
+ struct net *net = dev_net(skb->dev);
+
+ if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+ return pol;
+
+ pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
+ if (IS_ERR(pol))
+ pol = NULL;
+ }
+
+ return pol;
+}
+
+static inline struct
+dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl,
+ unsigned short family, struct dst_entry *dst)
+{
+ if (icmp_err_packet(fl, family)) {
+ struct net *net = dev_net(skb->dev);
+ struct dst_entry *dst2;
+ struct flowi fl1;
+
+ if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+ return dst;
+
+ dst_hold(dst);
+
+ dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP));
+
+ if (IS_ERR(dst2))
+ return dst;
+
+ if (dst2->xfrm) {
+ dst_release(dst);
+ dst = dst2;
+ } else {
+ dst_release(dst2);
+ }
+ }
+
+ return dst;
+}
+
int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
unsigned short family)
{
@@ -3303,23 +3687,25 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
int xerr_idx = -1;
const struct xfrm_if_cb *ifcb;
struct sec_path *sp;
- struct xfrm_if *xi;
u32 if_id = 0;
rcu_read_lock();
ifcb = xfrm_if_get_cb();
if (ifcb) {
- xi = ifcb->decode_session(skb);
- if (xi)
- if_id = xi->p.if_id;
+ struct xfrm_if_decode_session_result r;
+
+ if (ifcb->decode_session(skb, family, &r)) {
+ if_id = r.if_id;
+ net = r.net;
+ }
}
rcu_read_unlock();
reverse = dir & ~XFRM_POLICY_MASK;
dir &= XFRM_POLICY_MASK;
- if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
+ if (__xfrm_decode_session(net, skb, &fl, family, reverse) < 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
return 0;
}
@@ -3333,9 +3719,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
for (i = sp->len - 1; i >= 0; i--) {
struct xfrm_state *x = sp->xvec[i];
+ int ret = 0;
+
if (!xfrm_selector_match(&x->sel, &fl, family)) {
- XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
- return 0;
+ ret = 1;
+ if (x->props.flags & XFRM_STATE_ICMP &&
+ xfrm_selector_inner_icmp_match(skb, family, &x->sel, &fl))
+ ret = 0;
+ if (ret) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
+ return 0;
+ }
}
}
}
@@ -3358,8 +3752,19 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
return 0;
}
+ if (!pol && dir == XFRM_POLICY_FWD)
+ pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
+
if (!pol) {
- if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) {
+ const bool is_crypto_offload = sp &&
+ (xfrm_input_state(skb)->xso.type == XFRM_DEV_OFFLOAD_CRYPTO);
+
+ if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
+ return 0;
+ }
+
+ if (sp && secpath_has_nontransport(sp, 0, &xerr_idx) && !is_crypto_offload) {
xfrm_secpath_reject(xerr_idx, skb, &fl);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
return 0;
@@ -3367,7 +3772,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
return 1;
}
- pol->curlft.use_time = ktime_get_real_seconds();
+ /* This lockless write can happen from different cpus. */
+ WRITE_ONCE(pol->curlft.use_time, ktime_get_real_seconds());
pols[0] = pol;
npols++;
@@ -3379,9 +3785,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
if (pols[1]) {
if (IS_ERR(pols[1])) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
+ xfrm_pol_put(pols[0]);
return 0;
}
- pols[1]->curlft.use_time = ktime_get_real_seconds();
+ /* This write can happen from different cpus. */
+ WRITE_ONCE(pols[1]->curlft.use_time,
+ ktime_get_real_seconds());
npols++;
}
}
@@ -3413,8 +3822,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
tpp[ti++] = &pols[pi]->xfrm_vec[i];
}
xfrm_nr = ti;
+
if (npols > 1) {
- xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net);
+ xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
tpp = stp;
}
@@ -3423,9 +3833,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
* Order is _important_. Later we will implement
* some barriers, but at the moment barriers
* are implied between each two transformations.
+ * Upon success, marks secpath entries as having been
+ * verified to allow them to be skipped in future policy
+ * checks (e.g. nested tunnels).
*/
for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
- k = xfrm_policy_ok(tpp[i], sp, k, family);
+ k = xfrm_policy_ok(tpp[i], sp, k, family, if_id);
if (k < 0) {
if (k < -1)
/* "-2 - errored_index" returned */
@@ -3441,6 +3854,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
xfrm_pols_put(pols, npols);
+ sp->verified_cnt = k;
+
return 1;
}
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
@@ -3460,22 +3875,32 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
struct dst_entry *dst;
int res = 1;
- if (xfrm_decode_session(skb, &fl, family) < 0) {
+ if (xfrm_decode_session(net, skb, &fl, family) < 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
return 0;
}
skb_dst_force(skb);
- if (!skb_dst(skb)) {
+ dst = skb_dst(skb);
+ if (!dst) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
return 0;
}
- dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
+ /* ignore return value from skb_dstref_steal, xfrm_lookup takes
+ * care of dropping the refcnt if needed.
+ */
+ skb_dstref_steal(skb);
+
+ dst = xfrm_lookup(net, dst, &fl, NULL, XFRM_LOOKUP_QUEUE);
if (IS_ERR(dst)) {
res = 0;
dst = NULL;
}
+
+ if (dst && !dst->xfrm)
+ dst = xfrm_out_fwd_icmp(skb, &fl, family, dst);
+
skb_dst_set(skb, dst);
return res;
}
@@ -3506,7 +3931,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
* This will force stale_bundle() to fail on any xdst bundle with
* this dst linked in it.
*/
- if (dst->obsolete < 0 && !stale_bundle(dst))
+ if (READ_ONCE(dst->obsolete) < 0 && !stale_bundle(dst))
return dst;
return NULL;
@@ -3520,7 +3945,7 @@ static int stale_bundle(struct dst_entry *dst)
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) {
- dst->dev = dev_net(dev)->loopback_dev;
+ dst->dev = blackhole_netdev;
dev_hold(dst->dev);
dev_put(dev);
}
@@ -3532,15 +3957,10 @@ static void xfrm_link_failure(struct sk_buff *skb)
/* Impossible. Such dst must be popped before reaches point of failure. */
}
-static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
+static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst)
{
- if (dst) {
- if (dst->obsolete) {
- dst_release(dst);
- dst = NULL;
- }
- }
- return dst;
+ if (READ_ONCE(dst->obsolete))
+ sk_dst_reset(sk);
}
static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
@@ -3794,10 +4214,7 @@ static int __net_init xfrm_policy_init(struct net *net)
int dir, err;
if (net_eq(net, &init_net)) {
- xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
- sizeof(struct xfrm_dst),
- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
- NULL);
+ xfrm_dst_cache = KMEM_CACHE(xfrm_dst, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
err = rhashtable_init(&xfrm_policy_inexact_table,
&xfrm_pol_inexact_params);
BUG_ON(err);
@@ -3816,7 +4233,6 @@ static int __net_init xfrm_policy_init(struct net *net)
net->xfrm.policy_count[dir] = 0;
net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0;
- INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
htab = &net->xfrm.policy_bydst[dir];
htab->table = xfrm_hash_alloc(sz);
@@ -3870,8 +4286,6 @@ static void xfrm_policy_fini(struct net *net)
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
struct xfrm_policy_hash *htab;
- WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
-
htab = &net->xfrm.policy_bydst[dir];
sz = (htab->hmask + 1) * sizeof(struct hlist_head);
WARN_ON(!hlist_empty(htab->table));
@@ -3895,7 +4309,11 @@ static int __net_init xfrm_net_init(struct net *net)
/* Initialize the per-net locks here */
spin_lock_init(&net->xfrm.xfrm_state_lock);
spin_lock_init(&net->xfrm.xfrm_policy_lock);
+ seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock);
mutex_init(&net->xfrm.xfrm_cfg_mutex);
+ net->xfrm.policy_default[XFRM_POLICY_IN] = XFRM_USERPOLICY_ACCEPT;
+ net->xfrm.policy_default[XFRM_POLICY_FWD] = XFRM_USERPOLICY_ACCEPT;
+ net->xfrm.policy_default[XFRM_POLICY_OUT] = XFRM_USERPOLICY_ACCEPT;
rv = xfrm_statistics_init(net);
if (rv < 0)
@@ -3910,8 +4328,14 @@ static int __net_init xfrm_net_init(struct net *net)
if (rv < 0)
goto out_sysctl;
+ rv = xfrm_nat_keepalive_net_init(net);
+ if (rv < 0)
+ goto out_nat_keepalive;
+
return 0;
+out_nat_keepalive:
+ xfrm_sysctl_fini(net);
out_sysctl:
xfrm_policy_fini(net);
out_policy:
@@ -3924,6 +4348,7 @@ out_statistics:
static void __net_exit xfrm_net_exit(struct net *net)
{
+ xfrm_nat_keepalive_net_fini(net);
xfrm_sysctl_fini(net);
xfrm_policy_fini(net);
xfrm_state_fini(net);
@@ -3935,15 +4360,57 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
.exit = xfrm_net_exit,
};
+static const struct flow_dissector_key xfrm_flow_dissector_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+ .offset = offsetof(struct xfrm_flow_keys, control),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_BASIC,
+ .offset = offsetof(struct xfrm_flow_keys, basic),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ .offset = offsetof(struct xfrm_flow_keys, addrs.ipv4),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ .offset = offsetof(struct xfrm_flow_keys, addrs.ipv6),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_PORTS,
+ .offset = offsetof(struct xfrm_flow_keys, ports),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
+ .offset = offsetof(struct xfrm_flow_keys, gre),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IP,
+ .offset = offsetof(struct xfrm_flow_keys, ip),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_ICMP,
+ .offset = offsetof(struct xfrm_flow_keys, icmp),
+ },
+};
+
void __init xfrm_init(void)
{
+ skb_flow_dissector_init(&xfrm_session_dissector,
+ xfrm_flow_dissector_keys,
+ ARRAY_SIZE(xfrm_flow_dissector_keys));
+
register_pernet_subsys(&xfrm_net_ops);
xfrm_dev_init();
- seqcount_init(&xfrm_policy_hash_generation);
xfrm_input_init();
- RCU_INIT_POINTER(xfrm_if_cb, NULL);
- synchronize_rcu();
+#ifdef CONFIG_XFRM_ESPINTCP
+ espintcp_init();
+#endif
+
+ register_xfrm_state_bpf();
+ xfrm_nat_keepalive_init(AF_INET);
}
#ifdef CONFIG_AUDITSYSCALL
@@ -4012,61 +4479,50 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
#endif
#ifdef CONFIG_XFRM_MIGRATE
-static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
- const struct xfrm_selector *sel_tgt)
-{
- if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
- if (sel_tgt->family == sel_cmp->family &&
- xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
- sel_cmp->family) &&
- xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
- sel_cmp->family) &&
- sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
- sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
- return true;
- }
- } else {
- if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
- return true;
- }
- }
- return false;
-}
-
static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
- u8 dir, u8 type, struct net *net)
+ u8 dir, u8 type, struct net *net, u32 if_id)
{
- struct xfrm_policy *pol, *ret = NULL;
- struct hlist_head *chain;
- u32 priority = ~0U;
+ struct xfrm_policy *pol;
+ struct flowi fl;
- spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
- hlist_for_each_entry(pol, chain, bydst) {
- if (xfrm_migrate_selector_match(sel, &pol->selector) &&
- pol->type == type) {
- ret = pol;
- priority = ret->priority;
- break;
- }
- }
- chain = &net->xfrm.policy_inexact[dir];
- hlist_for_each_entry(pol, chain, bydst_inexact_list) {
- if ((pol->priority >= priority) && ret)
- break;
+ memset(&fl, 0, sizeof(fl));
- if (xfrm_migrate_selector_match(sel, &pol->selector) &&
- pol->type == type) {
- ret = pol;
+ fl.flowi_proto = sel->proto;
+
+ switch (sel->family) {
+ case AF_INET:
+ fl.u.ip4.saddr = sel->saddr.a4;
+ fl.u.ip4.daddr = sel->daddr.a4;
+ if (sel->proto == IPSEC_ULPROTO_ANY)
break;
- }
+ fl.u.flowi4_oif = sel->ifindex;
+ fl.u.ip4.fl4_sport = sel->sport;
+ fl.u.ip4.fl4_dport = sel->dport;
+ break;
+ case AF_INET6:
+ fl.u.ip6.saddr = sel->saddr.in6;
+ fl.u.ip6.daddr = sel->daddr.in6;
+ if (sel->proto == IPSEC_ULPROTO_ANY)
+ break;
+ fl.u.flowi6_oif = sel->ifindex;
+ fl.u.ip6.fl4_sport = sel->sport;
+ fl.u.ip6.fl4_dport = sel->dport;
+ break;
+ default:
+ return ERR_PTR(-EAFNOSUPPORT);
}
- xfrm_pol_hold(ret);
+ rcu_read_lock();
- spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ pol = xfrm_policy_lookup_bytype(net, type, &fl, sel->family, dir, if_id);
+ if (IS_ERR_OR_NULL(pol))
+ goto out_unlock;
- return ret;
+ if (!xfrm_pol_hold_rcu(pol))
+ pol = NULL;
+out_unlock:
+ rcu_read_unlock();
+ return pol;
}
static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
@@ -4078,6 +4534,7 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm
switch (t->mode) {
case XFRM_MODE_TUNNEL:
case XFRM_MODE_BEET:
+ case XFRM_MODE_IPTFS:
if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
m->old_family) &&
xfrm_addr_equal(&t->saddr, &m->old_saddr,
@@ -4100,7 +4557,8 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm
/* update endpoint address(es) of template(s) */
static int xfrm_policy_migrate(struct xfrm_policy *pol,
- struct xfrm_migrate *m, int num_migrate)
+ struct xfrm_migrate *m, int num_migrate,
+ struct netlink_ext_ack *extack)
{
struct xfrm_migrate *mp;
int i, j, n = 0;
@@ -4108,6 +4566,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
write_lock_bh(&pol->lock);
if (unlikely(pol->walk.dead)) {
/* target policy has been deleted */
+ NL_SET_ERR_MSG(extack, "Target policy not found");
write_unlock_bh(&pol->lock);
return -ENOENT;
}
@@ -4118,7 +4577,8 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
continue;
n++;
if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
- pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
+ pol->xfrm_vec[i].mode != XFRM_MODE_BEET &&
+ pol->xfrm_vec[i].mode != XFRM_MODE_IPTFS)
continue;
/* update endpoints */
memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
@@ -4139,17 +4599,22 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
return 0;
}
-static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
+static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate,
+ struct netlink_ext_ack *extack)
{
int i, j;
- if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
+ if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) {
+ NL_SET_ERR_MSG(extack, "Invalid number of SAs to migrate, must be 0 < num <= XFRM_MAX_DEPTH (6)");
return -EINVAL;
+ }
for (i = 0; i < num_migrate; i++) {
if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
- xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
+ xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) {
+ NL_SET_ERR_MSG(extack, "Addresses in the MIGRATE attribute's list cannot be null");
return -EINVAL;
+ }
/* check if there is any duplicated entry */
for (j = i + 1; j < num_migrate; j++) {
@@ -4160,8 +4625,10 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
m[i].proto == m[j].proto &&
m[i].mode == m[j].mode &&
m[i].reqid == m[j].reqid &&
- m[i].old_family == m[j].old_family)
+ m[i].old_family == m[j].old_family) {
+ NL_SET_ERR_MSG(extack, "Entries in the MIGRATE attribute's list must be unique");
return -EINVAL;
+ }
}
}
@@ -4171,7 +4638,8 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_migrate *m, int num_migrate,
struct xfrm_kmaddress *k, struct net *net,
- struct xfrm_encap_tmpl *encap)
+ struct xfrm_encap_tmpl *encap, u32 if_id,
+ struct netlink_ext_ack *extack, struct xfrm_user_offload *xuo)
{
int i, err, nx_cur = 0, nx_new = 0;
struct xfrm_policy *pol = NULL;
@@ -4181,26 +4649,30 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_migrate *mp;
/* Stage 0 - sanity checks */
- if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
+ err = xfrm_migrate_check(m, num_migrate, extack);
+ if (err < 0)
goto out;
if (dir >= XFRM_POLICY_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid policy direction");
err = -EINVAL;
goto out;
}
/* Stage 1 - find policy */
- if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
- err = -ENOENT;
+ pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id);
+ if (IS_ERR_OR_NULL(pol)) {
+ NL_SET_ERR_MSG(extack, "Target policy not found");
+ err = IS_ERR(pol) ? PTR_ERR(pol) : -ENOENT;
goto out;
}
/* Stage 2 - find and update state(s) */
for (i = 0, mp = m; i < num_migrate; i++, mp++) {
- if ((x = xfrm_migrate_state_find(mp, net))) {
+ if ((x = xfrm_migrate_state_find(mp, net, if_id))) {
x_cur[nx_cur] = x;
nx_cur++;
- xc = xfrm_state_migrate(x, mp, encap);
+ xc = xfrm_state_migrate(x, mp, encap, net, xuo, extack);
if (xc) {
x_new[nx_new] = xc;
nx_new++;
@@ -4212,7 +4684,8 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
/* Stage 3 - update policy */
- if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
+ err = xfrm_policy_migrate(pol, m, num_migrate, extack);
+ if (err < 0)
goto restore_state;
/* Stage 4 - delete old state(s) */