diff options
Diffstat (limited to 'net/xfrm/xfrm_policy.c')
-rw-r--r-- | net/xfrm/xfrm_policy.c | 517 |
1 files changed, 343 insertions, 174 deletions
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index da6ecc6b3e15..6551e588fe52 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -29,6 +29,7 @@ #include <linux/audit.h> #include <linux/rhashtable.h> #include <linux/if_tunnel.h> +#include <linux/icmp.h> #include <net/dst.h> #include <net/flow.h> #include <net/inet_ecn.h> @@ -44,6 +45,7 @@ #ifdef CONFIG_XFRM_ESPINTCP #include <net/espintcp.h> #endif +#include <net/inet_dscp.h> #include "xfrm_hash.h" @@ -108,7 +110,11 @@ struct xfrm_pol_inexact_node { * 4. saddr:any list from saddr tree * * This result set then needs to be searched for the policy with - * the lowest priority. If two results have same prio, youngest one wins. + * the lowest priority. If two candidates have the same priority, the + * struct xfrm_policy pos member with the lower number is used. + * + * This replicates previous single-list-search algorithm which would + * return first matching policy in the (ordered-by-priority) list. */ struct xfrm_pol_inexact_key { @@ -195,8 +201,6 @@ xfrm_policy_inexact_lookup_rcu(struct net *net, static struct xfrm_policy * xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy, bool excl); -static void xfrm_policy_insert_inexact_list(struct hlist_head *chain, - struct xfrm_policy *policy); static bool xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand, @@ -266,10 +270,8 @@ static const struct xfrm_if_cb *xfrm_if_get_cb(void) return rcu_dereference(xfrm_if_cb); } -struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - int family, u32 mark) +struct dst_entry *__xfrm_dst_lookup(int family, + const struct xfrm_dst_lookup_params *params) { const struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; @@ -278,7 +280,7 @@ struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, if (unlikely(afinfo == NULL)) return ERR_PTR(-EAFNOSUPPORT); - dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark); + dst = afinfo->dst_lookup(params); rcu_read_unlock(); @@ -287,11 +289,12 @@ struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, EXPORT_SYMBOL(__xfrm_dst_lookup); static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, - int tos, int oif, + dscp_t dscp, int oif, xfrm_address_t *prev_saddr, xfrm_address_t *prev_daddr, int family, u32 mark) { + struct xfrm_dst_lookup_params params; struct net *net = xs_net(x); xfrm_address_t *saddr = &x->props.saddr; xfrm_address_t *daddr = &x->id.daddr; @@ -306,7 +309,29 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, daddr = x->coaddr; } - dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark); + params.net = net; + params.saddr = saddr; + params.daddr = daddr; + params.dscp = dscp; + params.oif = oif; + params.mark = mark; + params.ipproto = x->id.proto; + if (x->encap) { + switch (x->encap->encap_type) { + case UDP_ENCAP_ESPINUDP: + params.ipproto = IPPROTO_UDP; + params.uli.ports.sport = x->encap->encap_sport; + params.uli.ports.dport = x->encap->encap_dport; + break; + case TCP_ENCAP_ESPINTCP: + params.ipproto = IPPROTO_TCP; + params.uli.ports.sport = x->encap->encap_sport; + params.uli.ports.dport = x->encap->encap_dport; + break; + } + } + + dst = __xfrm_dst_lookup(family, ¶ms); if (!IS_ERR(dst)) { if (prev_saddr != saddr) @@ -409,7 +434,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) if (policy) { write_pnet(&policy->xp_net, net); INIT_LIST_HEAD(&policy->walk.all); - INIT_HLIST_NODE(&policy->bydst_inexact_list); + INIT_HLIST_HEAD(&policy->state_cache_list); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); @@ -451,6 +476,11 @@ EXPORT_SYMBOL(xfrm_policy_destroy); static void xfrm_policy_kill(struct xfrm_policy *policy) { + struct net *net = xp_net(policy); + struct xfrm_state *x; + + xfrm_dev_policy_delete(policy); + write_lock_bh(&policy->lock); policy->walk.dead = 1; write_unlock_bh(&policy->lock); @@ -464,6 +494,13 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) if (del_timer(&policy->timer)) xfrm_pol_put(policy); + /* XXX: Flush state cache */ + spin_lock_bh(&net->xfrm.xfrm_state_lock); + hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) { + hlist_del_init_rcu(&x->state_cache); + } + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + xfrm_pol_put(policy); } @@ -1225,26 +1262,31 @@ xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl) return ERR_PTR(-EEXIST); } - chain = &net->xfrm.policy_inexact[dir]; - xfrm_policy_insert_inexact_list(chain, policy); - if (delpol) __xfrm_policy_inexact_prune_bin(bin, false); return delpol; } +static bool xfrm_policy_is_dead_or_sk(const struct xfrm_policy *policy) +{ + int dir; + + if (policy->walk.dead) + return true; + + dir = xfrm_policy_id2dir(policy->index); + return dir >= XFRM_POLICY_MAX; +} + static void xfrm_hash_rebuild(struct work_struct *work) { struct net *net = container_of(work, struct net, xfrm.policy_hthresh.work); - unsigned int hmask; struct xfrm_policy *pol; struct xfrm_policy *policy; struct hlist_head *chain; - struct hlist_head *odst; struct hlist_node *newpos; - int i; int dir; unsigned seq; u8 lbits4, rbits4, lbits6, rbits6; @@ -1271,13 +1313,10 @@ static void xfrm_hash_rebuild(struct work_struct *work) struct xfrm_pol_inexact_bin *bin; u8 dbits, sbits; - if (policy->walk.dead) + if (xfrm_policy_is_dead_or_sk(policy)) continue; dir = xfrm_policy_id2dir(policy->index); - if (dir >= XFRM_POLICY_MAX) - continue; - if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { if (policy->family == AF_INET) { dbits = rbits4; @@ -1308,23 +1347,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) goto out_unlock; } - /* reset the bydst and inexact table in all directions */ for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - struct hlist_node *n; - - hlist_for_each_entry_safe(policy, n, - &net->xfrm.policy_inexact[dir], - bydst_inexact_list) { - hlist_del_rcu(&policy->bydst); - hlist_del_init(&policy->bydst_inexact_list); - } - - hmask = net->xfrm.policy_bydst[dir].hmask; - odst = net->xfrm.policy_bydst[dir].table; - for (i = hmask; i >= 0; i--) { - hlist_for_each_entry_safe(policy, n, odst + i, bydst) - hlist_del_rcu(&policy->bydst); - } if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { /* dir out => dst = remote, src = local */ net->xfrm.policy_bydst[dir].dbits4 = rbits4; @@ -1342,14 +1365,13 @@ static void xfrm_hash_rebuild(struct work_struct *work) /* re-insert all policies by order of creation */ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { - if (policy->walk.dead) - continue; - dir = xfrm_policy_id2dir(policy->index); - if (dir >= XFRM_POLICY_MAX) { - /* skip socket policies */ + if (xfrm_policy_is_dead_or_sk(policy)) continue; - } + + hlist_del_rcu(&policy->bydst); + newpos = NULL; + dir = xfrm_policy_id2dir(policy->index); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); @@ -1516,42 +1538,6 @@ static const struct rhashtable_params xfrm_pol_inexact_params = { .automatic_shrinking = true, }; -static void xfrm_policy_insert_inexact_list(struct hlist_head *chain, - struct xfrm_policy *policy) -{ - struct xfrm_policy *pol, *delpol = NULL; - struct hlist_node *newpos = NULL; - int i = 0; - - hlist_for_each_entry(pol, chain, bydst_inexact_list) { - if (pol->type == policy->type && - pol->if_id == policy->if_id && - !selector_cmp(&pol->selector, &policy->selector) && - xfrm_policy_mark_match(&policy->mark, pol) && - xfrm_sec_ctx_match(pol->security, policy->security) && - !WARN_ON(delpol)) { - delpol = pol; - if (policy->priority > pol->priority) - continue; - } else if (policy->priority >= pol->priority) { - newpos = &pol->bydst_inexact_list; - continue; - } - if (delpol) - break; - } - - if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET) - hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos); - else - hlist_add_head_rcu(&policy->bydst_inexact_list, chain); - - hlist_for_each_entry(pol, chain, bydst_inexact_list) { - pol->pos = i; - i++; - } -} - static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy, bool excl) @@ -1849,7 +1835,6 @@ again: __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); - xfrm_dev_policy_delete(pol); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); @@ -1890,7 +1875,6 @@ again: __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); - xfrm_dev_policy_delete(pol); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); @@ -2293,10 +2277,52 @@ out: return pol; } +static u32 xfrm_gen_pos_slow(struct net *net) +{ + struct xfrm_policy *policy; + u32 i = 0; + + /* oldest entry is last in list */ + list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { + if (!xfrm_policy_is_dead_or_sk(policy)) + policy->pos = ++i; + } + + return i; +} + +static u32 xfrm_gen_pos(struct net *net) +{ + const struct xfrm_policy *policy; + u32 i = 0; + + /* most recently added policy is at the head of the list */ + list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) { + if (xfrm_policy_is_dead_or_sk(policy)) + continue; + + if (policy->pos == UINT_MAX) + return xfrm_gen_pos_slow(net); + + i = policy->pos + 1; + break; + } + + return i; +} + static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); + switch (dir) { + case XFRM_POLICY_IN: + case XFRM_POLICY_FWD: + case XFRM_POLICY_OUT: + pol->pos = xfrm_gen_pos(net); + break; + } + list_add(&pol->walk.all, &net->xfrm.policy_all); net->xfrm.policy_count[dir]++; xfrm_pol_hold(pol); @@ -2313,7 +2339,6 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, /* Socket policies are not hashed. */ if (!hlist_unhashed(&pol->bydst)) { hlist_del_rcu(&pol->bydst); - hlist_del_init(&pol->bydst_inexact_list); hlist_del(&pol->byidx); } @@ -2341,7 +2366,6 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir) pol = __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (pol) { - xfrm_dev_policy_delete(pol); xfrm_policy_kill(pol); return 0; } @@ -2440,15 +2464,15 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) } static int -xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local, - xfrm_address_t *remote, unsigned short family, u32 mark) +xfrm_get_saddr(unsigned short family, xfrm_address_t *saddr, + const struct xfrm_dst_lookup_params *params) { int err; const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(net, oif, local, remote, mark); + err = afinfo->get_saddr(saddr, params); rcu_read_unlock(); return err; } @@ -2473,13 +2497,19 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; if (tmpl->mode == XFRM_MODE_TUNNEL || + tmpl->mode == XFRM_MODE_IPTFS || tmpl->mode == XFRM_MODE_BEET) { remote = &tmpl->id.daddr; local = &tmpl->saddr; if (xfrm_addr_any(local, tmpl->encap_family)) { - error = xfrm_get_saddr(net, fl->flowi_oif, - &tmp, remote, - tmpl->encap_family, 0); + struct xfrm_dst_lookup_params params; + + memset(¶ms, 0, sizeof(params)); + params.net = net; + params.oif = fl->flowi_oif; + params.daddr = remote; + error = xfrm_get_saddr(tmpl->encap_family, &tmp, + ¶ms); if (error) goto fail; local = &tmp; @@ -2488,6 +2518,12 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family, policy->if_id); + if (x && x->dir && x->dir != XFRM_SA_DIR_OUT) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEDIRERROR); + xfrm_state_put(x); + error = -EINVAL; + goto fail; + } if (x && x->km.state == XFRM_STATE_VALID) { xfrm[nx++] = x; @@ -2552,10 +2588,10 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, } -static int xfrm_get_tos(const struct flowi *fl, int family) +static dscp_t xfrm_get_dscp(const struct flowi *fl, int family) { if (family == AF_INET) - return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; + return inet_dsfield_to_dscp(fl->u.ip4.flowi4_tos); return 0; } @@ -2597,8 +2633,7 @@ static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { if (dst->ops->family == AF_INET6) { - struct rt6_info *rt = (struct rt6_info *)dst; - path->path_cookie = rt6_get_cookie(rt); + path->path_cookie = rt6_get_cookie(dst_rt6_info(dst)); path->u.rt6.rt6i_nfheader_len = nfheader_len; } } @@ -2644,13 +2679,13 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, int header_len = 0; int nfheader_len = 0; int trailer_len = 0; - int tos; int family = policy->selector.family; xfrm_address_t saddr, daddr; + dscp_t dscp; xfrm_flowi_addr_get(fl, &saddr, &daddr, family); - tos = xfrm_get_tos(fl, family); + dscp = xfrm_get_dscp(fl, family); dst_hold(dst); @@ -2698,8 +2733,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, family = xfrm[i]->props.family; oif = fl->flowi_oif ? : fl->flowi_l3mdev; - dst = xfrm_dst_lookup(xfrm[i], tos, oif, - &saddr, &daddr, family, mark); + dst = xfrm_dst_lookup(xfrm[i], dscp, oif, &saddr, + &daddr, family, mark); err = PTR_ERR(dst); if (IS_ERR(dst)) goto put_states; @@ -2714,13 +2749,17 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst1->input = dst_discard; - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); - if (likely(afinfo)) - dst1->output = afinfo->output; - else - dst1->output = dst_discard_out; - rcu_read_unlock(); + if (xfrm[i]->mode_cbs && xfrm[i]->mode_cbs->output) { + dst1->output = xfrm[i]->mode_cbs->output; + } else { + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); + if (likely(afinfo)) + dst1->output = afinfo->output; + else + dst1->output = dst_discard_out; + rcu_read_unlock(); + } xdst_prev = xdst; @@ -2925,7 +2964,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) skb_dst_drop(skb); skb_dst_set(skb, dst); - dst_output(net, skb->sk, skb); + dst_output(net, skb_to_full_sk(skb), skb); } out: @@ -3252,10 +3291,12 @@ no_transform: dst_release(dst); dst = dst_orig; } + ok: xfrm_pols_put(pols, drop_pols); if (dst && dst->xfrm && - dst->xfrm->props.mode == XFRM_MODE_TUNNEL) + (dst->xfrm->props.mode == XFRM_MODE_TUNNEL || + dst->xfrm->props.mode == XFRM_MODE_IPTFS)) dst->flags |= DST_XFRM_TUNNEL; return dst; @@ -3505,6 +3546,130 @@ static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int return 0; } +static bool icmp_err_packet(const struct flowi *fl, unsigned short family) +{ + const struct flowi4 *fl4 = &fl->u.ip4; + + if (family == AF_INET && + fl4->flowi4_proto == IPPROTO_ICMP && + (fl4->fl4_icmp_type == ICMP_DEST_UNREACH || + fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED)) + return true; + +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6) { + const struct flowi6 *fl6 = &fl->u.ip6; + + if (fl6->flowi6_proto == IPPROTO_ICMPV6 && + (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH || + fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG || + fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED)) + return true; + } +#endif + return false; +} + +static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family, + const struct flowi *fl, struct flowi *fl1) +{ + bool ret = true; + struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); + int hl = family == AF_INET ? (sizeof(struct iphdr) + sizeof(struct icmphdr)) : + (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr)); + + if (!newskb) + return true; + + if (!pskb_pull(newskb, hl)) + goto out; + + skb_reset_network_header(newskb); + + if (xfrm_decode_session_reverse(dev_net(skb->dev), newskb, fl1, family) < 0) + goto out; + + fl1->flowi_oif = fl->flowi_oif; + fl1->flowi_mark = fl->flowi_mark; + fl1->flowi_tos = fl->flowi_tos; + nf_nat_decode_session(newskb, fl1, family); + ret = false; + +out: + consume_skb(newskb); + return ret; +} + +static bool xfrm_selector_inner_icmp_match(struct sk_buff *skb, unsigned short family, + const struct xfrm_selector *sel, + const struct flowi *fl) +{ + bool ret = false; + + if (icmp_err_packet(fl, family)) { + struct flowi fl1; + + if (xfrm_icmp_flow_decode(skb, family, fl, &fl1)) + return ret; + + ret = xfrm_selector_match(sel, &fl1, family); + } + + return ret; +} + +static inline struct +xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb, + const struct flowi *fl, unsigned short family, + u32 if_id) +{ + struct xfrm_policy *pol = NULL; + + if (icmp_err_packet(fl, family)) { + struct flowi fl1; + struct net *net = dev_net(skb->dev); + + if (xfrm_icmp_flow_decode(skb, family, fl, &fl1)) + return pol; + + pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id); + if (IS_ERR(pol)) + pol = NULL; + } + + return pol; +} + +static inline struct +dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl, + unsigned short family, struct dst_entry *dst) +{ + if (icmp_err_packet(fl, family)) { + struct net *net = dev_net(skb->dev); + struct dst_entry *dst2; + struct flowi fl1; + + if (xfrm_icmp_flow_decode(skb, family, fl, &fl1)) + return dst; + + dst_hold(dst); + + dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP)); + + if (IS_ERR(dst2)) + return dst; + + if (dst2->xfrm) { + dst_release(dst); + dst = dst2; + } else { + dst_release(dst2); + } + } + + return dst; +} + int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { @@ -3551,9 +3716,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, for (i = sp->len - 1; i >= 0; i--) { struct xfrm_state *x = sp->xvec[i]; + int ret = 0; + if (!xfrm_selector_match(&x->sel, &fl, family)) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); - return 0; + ret = 1; + if (x->props.flags & XFRM_STATE_ICMP && + xfrm_selector_inner_icmp_match(skb, family, &x->sel, &fl)) + ret = 0; + if (ret) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); + return 0; + } } } } @@ -3576,13 +3749,19 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, return 0; } + if (!pol && dir == XFRM_POLICY_FWD) + pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id); + if (!pol) { + const bool is_crypto_offload = sp && + (xfrm_input_state(skb)->xso.type == XFRM_DEV_OFFLOAD_CRYPTO); + if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; } - if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) { + if (sp && secpath_has_nontransport(sp, 0, &xerr_idx) && !is_crypto_offload) { xfrm_secpath_reject(xerr_idx, skb, &fl); XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; @@ -3709,6 +3888,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) res = 0; dst = NULL; } + + if (dst && !dst->xfrm) + dst = xfrm_out_fwd_icmp(skb, &fl, family, dst); + skb_dst_set(skb, dst); return res; } @@ -3765,15 +3948,10 @@ static void xfrm_link_failure(struct sk_buff *skb) /* Impossible. Such dst must be popped before reaches point of failure. */ } -static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) +static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst) { - if (dst) { - if (dst->obsolete) { - dst_release(dst); - dst = NULL; - } - } - return dst; + if (dst->obsolete) + sk_dst_reset(sk); } static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr) @@ -4027,10 +4205,7 @@ static int __net_init xfrm_policy_init(struct net *net) int dir, err; if (net_eq(net, &init_net)) { - xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", - sizeof(struct xfrm_dst), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL); + xfrm_dst_cache = KMEM_CACHE(xfrm_dst, SLAB_HWCACHE_ALIGN | SLAB_PANIC); err = rhashtable_init(&xfrm_policy_inexact_table, &xfrm_pol_inexact_params); BUG_ON(err); @@ -4049,7 +4224,6 @@ static int __net_init xfrm_policy_init(struct net *net) net->xfrm.policy_count[dir] = 0; net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0; - INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); htab = &net->xfrm.policy_bydst[dir]; htab->table = xfrm_hash_alloc(sz); @@ -4103,8 +4277,6 @@ static void xfrm_policy_fini(struct net *net) for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy_hash *htab; - WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); - htab = &net->xfrm.policy_bydst[dir]; sz = (htab->hmask + 1) * sizeof(struct hlist_head); WARN_ON(!hlist_empty(htab->table)); @@ -4147,8 +4319,14 @@ static int __net_init xfrm_net_init(struct net *net) if (rv < 0) goto out_sysctl; + rv = xfrm_nat_keepalive_net_init(net); + if (rv < 0) + goto out_nat_keepalive; + return 0; +out_nat_keepalive: + xfrm_sysctl_fini(net); out_sysctl: xfrm_policy_fini(net); out_policy: @@ -4161,6 +4339,7 @@ out_statistics: static void __net_exit xfrm_net_exit(struct net *net) { + xfrm_nat_keepalive_net_fini(net); xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); @@ -4222,6 +4401,7 @@ void __init xfrm_init(void) #endif register_xfrm_state_bpf(); + xfrm_nat_keepalive_init(AF_INET); } #ifdef CONFIG_AUDITSYSCALL @@ -4290,63 +4470,50 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); #endif #ifdef CONFIG_XFRM_MIGRATE -static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, - const struct xfrm_selector *sel_tgt) -{ - if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { - if (sel_tgt->family == sel_cmp->family && - xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, - sel_cmp->family) && - xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, - sel_cmp->family) && - sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && - sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { - return true; - } - } else { - if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { - return true; - } - } - return false; -} - static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, u8 dir, u8 type, struct net *net, u32 if_id) { - struct xfrm_policy *pol, *ret = NULL; - struct hlist_head *chain; - u32 priority = ~0U; + struct xfrm_policy *pol; + struct flowi fl; - spin_lock_bh(&net->xfrm.xfrm_policy_lock); - chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); - hlist_for_each_entry(pol, chain, bydst) { - if ((if_id == 0 || pol->if_id == if_id) && - xfrm_migrate_selector_match(sel, &pol->selector) && - pol->type == type) { - ret = pol; - priority = ret->priority; - break; - } - } - chain = &net->xfrm.policy_inexact[dir]; - hlist_for_each_entry(pol, chain, bydst_inexact_list) { - if ((pol->priority >= priority) && ret) - break; + memset(&fl, 0, sizeof(fl)); - if ((if_id == 0 || pol->if_id == if_id) && - xfrm_migrate_selector_match(sel, &pol->selector) && - pol->type == type) { - ret = pol; + fl.flowi_proto = sel->proto; + + switch (sel->family) { + case AF_INET: + fl.u.ip4.saddr = sel->saddr.a4; + fl.u.ip4.daddr = sel->daddr.a4; + if (sel->proto == IPSEC_ULPROTO_ANY) break; - } + fl.u.flowi4_oif = sel->ifindex; + fl.u.ip4.fl4_sport = sel->sport; + fl.u.ip4.fl4_dport = sel->dport; + break; + case AF_INET6: + fl.u.ip6.saddr = sel->saddr.in6; + fl.u.ip6.daddr = sel->daddr.in6; + if (sel->proto == IPSEC_ULPROTO_ANY) + break; + fl.u.flowi6_oif = sel->ifindex; + fl.u.ip6.fl4_sport = sel->sport; + fl.u.ip6.fl4_dport = sel->dport; + break; + default: + return ERR_PTR(-EAFNOSUPPORT); } - xfrm_pol_hold(ret); + rcu_read_lock(); - spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + pol = xfrm_policy_lookup_bytype(net, type, &fl, sel->family, dir, if_id); + if (IS_ERR_OR_NULL(pol)) + goto out_unlock; - return ret; + if (!xfrm_pol_hold_rcu(pol)) + pol = NULL; +out_unlock: + rcu_read_unlock(); + return pol; } static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) @@ -4358,6 +4525,7 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm switch (t->mode) { case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: + case XFRM_MODE_IPTFS: if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, m->old_family) && xfrm_addr_equal(&t->saddr, &m->old_saddr, @@ -4400,7 +4568,8 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, continue; n++; if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && - pol->xfrm_vec[i].mode != XFRM_MODE_BEET) + pol->xfrm_vec[i].mode != XFRM_MODE_BEET && + pol->xfrm_vec[i].mode != XFRM_MODE_IPTFS) continue; /* update endpoints */ memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, @@ -4483,9 +4652,9 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, /* Stage 1 - find policy */ pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id); - if (!pol) { + if (IS_ERR_OR_NULL(pol)) { NL_SET_ERR_MSG(extack, "Target policy not found"); - err = -ENOENT; + err = IS_ERR(pol) ? PTR_ERR(pol) : -ENOENT; goto out; } |