summaryrefslogtreecommitdiff
path: root/include/net/xfrm.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/xfrm.h')
-rw-r--r--include/net/xfrm.h219
1 files changed, 165 insertions, 54 deletions
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 57c743b7e4fe..39365fd2ea17 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -19,6 +19,7 @@
#include <net/sock.h>
#include <net/dst.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/ipv6.h>
@@ -37,6 +38,7 @@
#define XFRM_PROTO_COMP 108
#define XFRM_PROTO_IPIP 4
#define XFRM_PROTO_IPV6 41
+#define XFRM_PROTO_IPTFS IPPROTO_AGGFRAG
#define XFRM_PROTO_ROUTING IPPROTO_ROUTING
#define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS
@@ -67,27 +69,27 @@
- instance of a transformer, struct xfrm_state (=SA)
- template to clone xfrm_state, struct xfrm_tmpl
- SPD is plain linear list of xfrm_policy rules, ordered by priority.
+ SPD is organized as hash table (for policies that meet minimum address prefix
+ length setting, net->xfrm.policy_hthresh). Other policies are stored in
+ lists, sorted into rbtree ordered by destination and source address networks.
+ See net/xfrm/xfrm_policy.c for details.
+
(To be compatible with existing pfkeyv2 implementations,
many rules with priority of 0x7fffffff are allowed to exist and
such rules are ordered in an unpredictable way, thanks to bsd folks.)
- Lookup is plain linear search until the first match with selector.
-
If "action" is "block", then we prohibit the flow, otherwise:
if "xfrms_nr" is zero, the flow passes untransformed. Otherwise,
policy entry has list of up to XFRM_MAX_DEPTH transformations,
described by templates xfrm_tmpl. Each template is resolved
to a complete xfrm_state (see below) and we pack bundle of transformations
- to a dst_entry returned to requestor.
+ to a dst_entry returned to requester.
dst -. xfrm .-> xfrm_state #1
|---. child .-> dst -. xfrm .-> xfrm_state #2
|---. child .-> dst -. xfrm .-> xfrm_state #3
|---. child .-> NULL
- Bundles are cached at xrfm_policy struct (field ->bundles).
-
Resolution of xrfm_tmpl
-----------------------
@@ -178,13 +180,19 @@ struct xfrm_state {
struct hlist_node gclist;
struct hlist_node bydst;
};
- struct hlist_node bysrc;
+ union {
+ struct hlist_node dev_gclist;
+ struct hlist_node bysrc;
+ };
struct hlist_node byspi;
struct hlist_node byseq;
+ struct hlist_node state_cache;
+ struct hlist_node state_cache_input;
refcount_t refcnt;
spinlock_t lock;
+ u32 pcpu_num;
struct xfrm_id id;
struct xfrm_selector sel;
struct xfrm_mark mark;
@@ -206,6 +214,7 @@ struct xfrm_state {
u16 family;
xfrm_address_t saddr;
int header_len;
+ int enc_hdr_len;
int trailer_len;
u32 extra_flags;
struct xfrm_mark smark;
@@ -229,6 +238,10 @@ struct xfrm_state {
struct xfrm_encap_tmpl *encap;
struct sock __rcu *encap_sk;
+ /* NAT keepalive */
+ u32 nat_keepalive_interval; /* seconds */
+ time64_t nat_keepalive_expiration;
+
/* Data for care-of address */
xfrm_address_t *coaddr;
@@ -291,6 +304,10 @@ struct xfrm_state {
/* Private data of this transformer, format is opaque,
* interpreted by xfrm_type methods. */
void *data;
+ u8 dir;
+
+ const struct xfrm_mode_cbs *mode_cbs;
+ void *mode_data;
};
static inline struct net *xs_net(struct xfrm_state *x)
@@ -341,20 +358,25 @@ struct xfrm_if_cb {
void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
void xfrm_if_unregister_cb(void);
+struct xfrm_dst_lookup_params {
+ struct net *net;
+ dscp_t dscp;
+ int oif;
+ xfrm_address_t *saddr;
+ xfrm_address_t *daddr;
+ u32 mark;
+ __u8 ipproto;
+ union flowi_uli uli;
+};
+
struct net_device;
struct xfrm_type;
struct xfrm_dst;
struct xfrm_policy_afinfo {
struct dst_ops *dst_ops;
- struct dst_entry *(*dst_lookup)(struct net *net,
- int tos, int oif,
- const xfrm_address_t *saddr,
- const xfrm_address_t *daddr,
- u32 mark);
- int (*get_saddr)(struct net *net, int oif,
- xfrm_address_t *saddr,
- xfrm_address_t *daddr,
- u32 mark);
+ struct dst_entry *(*dst_lookup)(const struct xfrm_dst_lookup_params *params);
+ int (*get_saddr)(xfrm_address_t *saddr,
+ const struct xfrm_dst_lookup_params *params);
int (*fill_dst)(struct xfrm_dst *xdst,
struct net_device *dev,
const struct flowi *fl);
@@ -442,6 +464,54 @@ struct xfrm_type_offload {
int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
+void xfrm_set_type_offload(struct xfrm_state *x);
+static inline void xfrm_unset_type_offload(struct xfrm_state *x)
+{
+ if (!x->type_offload)
+ return;
+
+ module_put(x->type_offload->owner);
+ x->type_offload = NULL;
+}
+
+/**
+ * struct xfrm_mode_cbs - XFRM mode callbacks
+ * @owner: module owner or NULL
+ * @init_state: Add/init mode specific state in `xfrm_state *x`
+ * @clone_state: Copy mode specific values from `orig` to new state `x`
+ * @destroy_state: Cleanup mode specific state from `xfrm_state *x`
+ * @user_init: Process mode specific netlink attributes from user
+ * @copy_to_user: Add netlink attributes to `attrs` based on state in `x`
+ * @sa_len: Return space required to store mode specific netlink attributes
+ * @get_inner_mtu: Return avail payload space after removing encap overhead
+ * @input: Process received packet from SA using mode
+ * @output: Output given packet using mode
+ * @prepare_output: Add mode specific encapsulation to packet in skb. On return
+ * `transport_header` should point at ESP header, `network_header` should
+ * point at outer IP header and `mac_header` should opint at the
+ * protocol/nexthdr field of the outer IP.
+ *
+ * One should examine and understand the specific uses of these callbacks in
+ * xfrm for further detail on how and when these functions are called. RTSL.
+ */
+struct xfrm_mode_cbs {
+ struct module *owner;
+ int (*init_state)(struct xfrm_state *x);
+ int (*clone_state)(struct xfrm_state *x, struct xfrm_state *orig);
+ void (*destroy_state)(struct xfrm_state *x);
+ int (*user_init)(struct net *net, struct xfrm_state *x,
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack);
+ int (*copy_to_user)(struct xfrm_state *x, struct sk_buff *skb);
+ unsigned int (*sa_len)(const struct xfrm_state *x);
+ u32 (*get_inner_mtu)(struct xfrm_state *x, int outer_mtu);
+ int (*input)(struct xfrm_state *x, struct sk_buff *skb);
+ int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
+ int (*prepare_output)(struct xfrm_state *x, struct sk_buff *skb);
+};
+
+int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs);
+void xfrm_unregister_mode_cbs(u8 mode);
static inline int xfrm_af2proto(unsigned int family)
{
@@ -518,11 +588,44 @@ struct xfrm_policy_queue {
unsigned long timeout;
};
+/**
+ * struct xfrm_policy - xfrm policy
+ * @xp_net: network namespace the policy lives in
+ * @bydst: hlist node for SPD hash table or rbtree list
+ * @byidx: hlist node for index hash table
+ * @state_cache_list: hlist head for policy cached xfrm states
+ * @lock: serialize changes to policy structure members
+ * @refcnt: reference count, freed once it reaches 0
+ * @pos: kernel internal tie-breaker to determine age of policy
+ * @timer: timer
+ * @genid: generation, used to invalidate old policies
+ * @priority: priority, set by userspace
+ * @index: policy index (autogenerated)
+ * @if_id: virtual xfrm interface id
+ * @mark: packet mark
+ * @selector: selector
+ * @lft: liftime configuration data
+ * @curlft: liftime state
+ * @walk: list head on pernet policy list
+ * @polq: queue to hold packets while aqcuire operaion in progress
+ * @bydst_reinsert: policy tree node needs to be merged
+ * @type: XFRM_POLICY_TYPE_MAIN or _SUB
+ * @action: XFRM_POLICY_ALLOW or _BLOCK
+ * @flags: XFRM_POLICY_LOCALOK, XFRM_POLICY_ICMP
+ * @xfrm_nr: number of used templates in @xfrm_vec
+ * @family: protocol family
+ * @security: SELinux security label
+ * @xfrm_vec: array of templates to resolve state
+ * @rcu: rcu head, used to defer memory release
+ * @xdo: hardware offload state
+ */
struct xfrm_policy {
possible_net_t xp_net;
struct hlist_node bydst;
struct hlist_node byidx;
+ struct hlist_head state_cache_list;
+
/* This lock only affects elements except for entry. */
rwlock_t lock;
refcount_t refcnt;
@@ -547,7 +650,6 @@ struct xfrm_policy {
u16 family;
struct xfrm_sec_ctx *security;
struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH];
- struct hlist_node bydst_inexact_list;
struct rcu_head rcu;
struct xfrm_dev_offload xdo;
@@ -1008,7 +1110,7 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
struct xfrm_if_parms {
int link; /* ifindex of underlying L2 interface */
- u32 if_id; /* interface identifyer */
+ u32 if_id; /* interface identifier */
bool collect_md;
};
@@ -1049,6 +1151,9 @@ struct xfrm_offload {
#define CRYPTO_INVALID_PACKET_SYNTAX 64
#define CRYPTO_INVALID_PROTOCOL 128
+ /* Used to keep whole l2 header for transport mode GRO */
+ __u32 orig_mac_len;
+
__u8 proto;
__u8 inner_ipproto;
};
@@ -1172,9 +1277,19 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
if (xo) {
x = xfrm_input_state(skb);
- if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
- return (xo->flags & CRYPTO_DONE) &&
- (xo->status & CRYPTO_SUCCESS);
+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
+ bool check = (xo->flags & CRYPTO_DONE) &&
+ (xo->status & CRYPTO_SUCCESS);
+
+ /* The packets here are plain ones and secpath was
+ * needed to indicate that hardware already handled
+ * them and there is no need to do nothing in addition.
+ *
+ * Consume secpath which was set by drivers.
+ */
+ secpath_reset(skb);
+ return check;
+ }
}
return __xfrm_check_nopolicy(net, skb, dir) ||
@@ -1584,7 +1699,7 @@ void xfrm_state_update_stats(struct net *net);
static inline void xfrm_dev_state_update_stats(struct xfrm_state *x)
{
struct xfrm_dev_offload *xdo = &x->xso;
- struct net_device *dev = xdo->dev;
+ struct net_device *dev = READ_ONCE(xdo->dev);
if (dev && dev->xfrmdev_ops &&
dev->xfrmdev_ops->xdo_dev_state_update_stats)
@@ -1600,6 +1715,10 @@ int xfrm_state_update(struct xfrm_state *x);
struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark,
const xfrm_address_t *daddr, __be32 spi,
u8 proto, unsigned short family);
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+ const xfrm_address_t *daddr,
+ __be32 spi, u8 proto,
+ unsigned short family);
struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
@@ -1639,7 +1758,7 @@ struct xfrmk_spdinfo {
u32 spdhmcnt;
};
-struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
+struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
int xfrm_state_delete(struct xfrm_state *x);
int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
@@ -1650,8 +1769,7 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack);
u32 xfrm_state_mtu(struct xfrm_state *x, int mtu);
-int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload,
- struct netlink_ext_ack *extack);
+int __xfrm_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack);
int xfrm_init_state(struct xfrm_state *x);
int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
@@ -1663,6 +1781,15 @@ int xfrm_trans_queue(struct sk_buff *skb,
struct sk_buff *));
int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err);
int xfrm_output(struct sock *sk, struct sk_buff *skb);
+int xfrm4_tunnel_check_size(struct sk_buff *skb);
+#if IS_ENABLED(CONFIG_IPV6)
+int xfrm6_tunnel_check_size(struct sk_buff *skb);
+#else
+static inline int xfrm6_tunnel_check_size(struct sk_buff *skb)
+{
+ return -EMSGSIZE;
+}
+#endif
#if IS_ENABLED(CONFIG_NET_PKTGEN)
int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb);
@@ -1724,10 +1851,7 @@ static inline int xfrm_user_policy(struct sock *sk, int optname,
}
#endif
-struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
- const xfrm_address_t *saddr,
- const xfrm_address_t *daddr,
- int family, u32 mark);
+struct dst_entry *__xfrm_dst_lookup(int family, const struct xfrm_dst_lookup_params *params);
struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp);
@@ -1754,7 +1878,7 @@ int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack);
int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi,
struct netlink_ext_ack *extack);
struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
- u8 mode, u32 reqid, u32 if_id, u8 proto,
+ u8 mode, u32 reqid, u32 if_id, u32 pcpu_num, u8 proto,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr, int create,
unsigned short family);
@@ -1942,13 +2066,16 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
struct xfrm_user_offload *xuo, u8 dir,
struct netlink_ext_ack *extack);
bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
+void xfrm_dev_state_delete(struct xfrm_state *x);
+void xfrm_dev_state_free(struct xfrm_state *x);
static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
{
struct xfrm_dev_offload *xso = &x->xso;
+ struct net_device *dev = READ_ONCE(xso->dev);
- if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn)
- xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x);
+ if (dev && dev->xfrmdev_ops->xdo_dev_state_advance_esn)
+ dev->xfrmdev_ops->xdo_dev_state_advance_esn(x);
}
static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
@@ -1969,28 +2096,6 @@ static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
return false;
}
-static inline void xfrm_dev_state_delete(struct xfrm_state *x)
-{
- struct xfrm_dev_offload *xso = &x->xso;
-
- if (xso->dev)
- xso->dev->xfrmdev_ops->xdo_dev_state_delete(x);
-}
-
-static inline void xfrm_dev_state_free(struct xfrm_state *x)
-{
- struct xfrm_dev_offload *xso = &x->xso;
- struct net_device *dev = xso->dev;
-
- if (dev && dev->xfrmdev_ops) {
- if (dev->xfrmdev_ops->xdo_dev_state_free)
- dev->xfrmdev_ops->xdo_dev_state_free(x);
- xso->dev = NULL;
- xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
- netdev_put(dev, &xso->dev_tracker);
- }
-}
-
static inline void xfrm_dev_policy_delete(struct xfrm_policy *x)
{
struct xfrm_dev_offload *xdo = &x->xdo;
@@ -2199,4 +2304,10 @@ static inline int register_xfrm_state_bpf(void)
}
#endif
+int xfrm_nat_keepalive_init(unsigned short family);
+void xfrm_nat_keepalive_fini(unsigned short family);
+int xfrm_nat_keepalive_net_init(struct net *net);
+int xfrm_nat_keepalive_net_fini(struct net *net);
+void xfrm_nat_keepalive_state_updated(struct xfrm_state *x);
+
#endif /* _NET_XFRM_H */