summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-05-18 14:05:48 -0700
committerJakub Kicinski <kuba@kernel.org>2023-05-18 14:05:49 -0700
commit1ecaf17d097c91a7bd2979c57f7c81c5eeaf526b (patch)
tree379efe7cfe3b5acbb01658200c423c15aa6d7d3f
parent02f8fc1a67c160b2faab2c9e9439026deb076971 (diff)
parente05b5362166b18a224c30502e81416e4d622d3e4 (diff)
Merge tag 'nf-next-2023-05-18' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next
Florian Westphal says: ==================== Netfilter updates for net-next nftables updates: 1. Allow key existence checks with maps. At the moment the kernel requires userspace to pass a destination register for the associated value, make this optional so userspace can query if the key exists, just like with normal sets. 2. nftables maintains a counter per set that holds the number of elements. This counter gets decremented on element removal, but its only incremented if the set has a upper maximum value. Increment unconditionally, this will allow us to update the maximum value later on. 3. At DCCP option maching, from Jeremy Sowden. 4. use struct_size macro, from Christophe JAILLET. Conntrack: 5. Squash holes in struct nf_conntrack_expect, also Christophe JAILLET. 6. Allow clash resolution for GRE Protocol to avoid a packet drop, from Faicker Mo. Flowtable: Simplify route logic and split large functions into smaller chunks, from Pablo Neira Ayuso. * tag 'nf-next-2023-05-18' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next: netfilter: flowtable: split IPv6 datapath in helper functions netfilter: flowtable: split IPv4 datapath in helper functions netfilter: flowtable: simplify route logic netfilter: conntrack: allow insertion clash of gre protocol netfilter: nft_set_pipapo: Use struct_size() netfilter: Reorder fields in 'struct nf_conntrack_expect' netfilter: nft_exthdr: add boolean DCCP option matching netfilter: nf_tables: always increment set element count netfilter: nf_tables: relax set/map validation checks ==================== Link: https://lore.kernel.org/r/20230518100759.84858-1-fw@strlen.de Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/net/netfilter/nf_conntrack_expect.h18
-rw-r--r--include/net/netfilter/nf_flow_table.h4
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h2
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c1
-rw-r--r--net/netfilter/nf_flow_table_core.c24
-rw-r--r--net/netfilter/nf_flow_table_ip.c231
-rw-r--r--net/netfilter/nf_tables_api.c11
-rw-r--r--net/netfilter/nft_exthdr.c106
-rw-r--r--net/netfilter/nft_flow_offload.c12
-rw-r--r--net/netfilter/nft_lookup.c23
-rw-r--r--net/netfilter/nft_set_pipapo.c6
11 files changed, 303 insertions, 135 deletions
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 0855b60fba17..cf0d81be5a96 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -26,6 +26,15 @@ struct nf_conntrack_expect {
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_mask mask;
+ /* Usage count. */
+ refcount_t use;
+
+ /* Flags */
+ unsigned int flags;
+
+ /* Expectation class */
+ unsigned int class;
+
/* Function to call after setup and insertion */
void (*expectfn)(struct nf_conn *new,
struct nf_conntrack_expect *this);
@@ -39,15 +48,6 @@ struct nf_conntrack_expect {
/* Timer function; deletes the expectation. */
struct timer_list timeout;
- /* Usage count. */
- refcount_t use;
-
- /* Flags */
- unsigned int flags;
-
- /* Expectation class */
- unsigned int class;
-
#if IS_ENABLED(CONFIG_NF_NAT)
union nf_inet_addr saved_addr;
/* This is the original per-proto part, used to map the
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index ebb28ec5b6fa..546fc4a9b939 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -263,8 +263,8 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
up_write(&flow_table->flow_block_lock);
}
-int flow_offload_route_init(struct flow_offload *flow,
- const struct nf_flow_route *route);
+void flow_offload_route_init(struct flow_offload *flow,
+ const struct nf_flow_route *route);
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
void flow_offload_refresh(struct nf_flowtable *flow_table,
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c4d4d8e42dc8..e059dc2644df 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -859,12 +859,14 @@ enum nft_exthdr_flags {
* @NFT_EXTHDR_OP_TCP: match against tcp options
* @NFT_EXTHDR_OP_IPV4: match against ipv4 options
* @NFT_EXTHDR_OP_SCTP: match against sctp chunks
+ * @NFT_EXTHDR_OP_DCCP: match against dccp otions
*/
enum nft_exthdr_op {
NFT_EXTHDR_OP_IPV6,
NFT_EXTHDR_OP_TCPOPT,
NFT_EXTHDR_OP_IPV4,
NFT_EXTHDR_OP_SCTP,
+ NFT_EXTHDR_OP_DCCP,
__NFT_EXTHDR_OP_MAX
};
#define NFT_EXTHDR_OP_MAX (__NFT_EXTHDR_OP_MAX - 1)
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 728eeb0aea87..ad6f0ca40cd2 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -296,6 +296,7 @@ void nf_conntrack_gre_init_net(struct net *net)
/* protocol helper struct */
const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = {
.l4proto = IPPROTO_GRE,
+ .allow_clash = true,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
#endif
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 04bd0ed4d2ae..b46dd897f2c5 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -125,9 +125,6 @@ static int flow_offload_fill_route(struct flow_offload *flow,
break;
case FLOW_OFFLOAD_XMIT_XFRM:
case FLOW_OFFLOAD_XMIT_NEIGH:
- if (!dst_hold_safe(route->tuple[dir].dst))
- return -1;
-
flow_tuple->dst_cache = dst;
flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
break;
@@ -148,27 +145,12 @@ static void nft_flow_dst_release(struct flow_offload *flow,
dst_release(flow->tuplehash[dir].tuple.dst_cache);
}
-int flow_offload_route_init(struct flow_offload *flow,
+void flow_offload_route_init(struct flow_offload *flow,
const struct nf_flow_route *route)
{
- int err;
-
- err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
- if (err < 0)
- return err;
-
- err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
- if (err < 0)
- goto err_route_reply;
-
+ flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
+ flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
flow->type = NF_FLOW_OFFLOAD_ROUTE;
-
- return 0;
-
-err_route_reply:
- nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
-
- return err;
}
EXPORT_SYMBOL_GPL(flow_offload_route_init);
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 19efba1e51ef..d248763917ad 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -163,38 +163,43 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
}
}
-static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
- struct flow_offload_tuple *tuple, u32 *hdrsize,
- u32 offset)
+struct nf_flowtable_ctx {
+ const struct net_device *in;
+ u32 offset;
+ u32 hdrsize;
+};
+
+static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
+ struct flow_offload_tuple *tuple)
{
struct flow_ports *ports;
unsigned int thoff;
struct iphdr *iph;
u8 ipproto;
- if (!pskb_may_pull(skb, sizeof(*iph) + offset))
+ if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset))
return -1;
- iph = (struct iphdr *)(skb_network_header(skb) + offset);
+ iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
thoff = (iph->ihl * 4);
if (ip_is_fragment(iph) ||
unlikely(ip_has_options(thoff)))
return -1;
- thoff += offset;
+ thoff += ctx->offset;
ipproto = iph->protocol;
switch (ipproto) {
case IPPROTO_TCP:
- *hdrsize = sizeof(struct tcphdr);
+ ctx->hdrsize = sizeof(struct tcphdr);
break;
case IPPROTO_UDP:
- *hdrsize = sizeof(struct udphdr);
+ ctx->hdrsize = sizeof(struct udphdr);
break;
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE:
- *hdrsize = sizeof(struct gre_base_hdr);
+ ctx->hdrsize = sizeof(struct gre_base_hdr);
break;
#endif
default:
@@ -204,7 +209,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
if (iph->ttl <= 1)
return -1;
- if (!pskb_may_pull(skb, thoff + *hdrsize))
+ if (!pskb_may_pull(skb, thoff + ctx->hdrsize))
return -1;
switch (ipproto) {
@@ -224,13 +229,13 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
}
}
- iph = (struct iphdr *)(skb_network_header(skb) + offset);
+ iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
tuple->src_v4.s_addr = iph->saddr;
tuple->dst_v4.s_addr = iph->daddr;
tuple->l3proto = AF_INET;
tuple->l4proto = ipproto;
- tuple->iifidx = dev->ifindex;
+ tuple->iifidx = ctx->in->ifindex;
nf_flow_tuple_encap(skb, tuple);
return 0;
@@ -336,58 +341,56 @@ static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
return NF_STOLEN;
}
-unsigned int
-nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
+static struct flow_offload_tuple_rhash *
+nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
+ struct nf_flowtable *flow_table, struct sk_buff *skb)
{
- struct flow_offload_tuple_rhash *tuplehash;
- struct nf_flowtable *flow_table = priv;
struct flow_offload_tuple tuple = {};
- enum flow_offload_tuple_dir dir;
- struct flow_offload *flow;
- struct net_device *outdev;
- u32 hdrsize, offset = 0;
- unsigned int thoff, mtu;
- struct rtable *rt;
- struct iphdr *iph;
- __be32 nexthop;
- int ret;
if (skb->protocol != htons(ETH_P_IP) &&
- !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
- return NF_ACCEPT;
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
+ return NULL;
- if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
- return NF_ACCEPT;
+ if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
+ return NULL;
- tuplehash = flow_offload_lookup(flow_table, &tuple);
- if (tuplehash == NULL)
- return NF_ACCEPT;
+ return flow_offload_lookup(flow_table, &tuple);
+}
+
+static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
+ struct nf_flowtable *flow_table,
+ struct flow_offload_tuple_rhash *tuplehash,
+ struct sk_buff *skb)
+{
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ unsigned int thoff, mtu;
+ struct iphdr *iph;
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- mtu = flow->tuplehash[dir].tuple.mtu + offset;
+ mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
- return NF_ACCEPT;
+ return 0;
- iph = (struct iphdr *)(skb_network_header(skb) + offset);
- thoff = (iph->ihl * 4) + offset;
+ iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
+ thoff = (iph->ihl * 4) + ctx->offset;
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
- return NF_ACCEPT;
+ return 0;
if (!nf_flow_dst_check(&tuplehash->tuple)) {
flow_offload_teardown(flow);
- return NF_ACCEPT;
+ return 0;
}
- if (skb_try_make_writable(skb, thoff + hdrsize))
- return NF_DROP;
+ if (skb_try_make_writable(skb, thoff + ctx->hdrsize))
+ return -1;
flow_offload_refresh(flow_table, flow);
nf_flow_encap_pop(skb, tuplehash);
- thoff -= offset;
+ thoff -= ctx->offset;
iph = ip_hdr(skb);
nf_flow_nat_ip(flow, skb, thoff, dir, iph);
@@ -398,6 +401,35 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
+ return 1;
+}
+
+unsigned int
+nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ enum flow_offload_tuple_dir dir;
+ struct nf_flowtable_ctx ctx = {
+ .in = state->in,
+ };
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ struct rtable *rt;
+ __be32 nexthop;
+ int ret;
+
+ tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb);
+ if (!tuplehash)
+ return NF_ACCEPT;
+
+ ret = nf_flow_offload_forward(&ctx, flow_table, tuplehash, skb);
+ if (ret < 0)
+ return NF_DROP;
+ else if (ret == 0)
+ return NF_ACCEPT;
+
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
rt = (struct rtable *)tuplehash->tuple.dst_cache;
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
@@ -406,6 +438,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
}
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
rt = (struct rtable *)tuplehash->tuple.dst_cache;
@@ -535,32 +570,31 @@ static void nf_flow_nat_ipv6(const struct flow_offload *flow,
}
}
-static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
- struct flow_offload_tuple *tuple, u32 *hdrsize,
- u32 offset)
+static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
+ struct flow_offload_tuple *tuple)
{
struct flow_ports *ports;
struct ipv6hdr *ip6h;
unsigned int thoff;
u8 nexthdr;
- thoff = sizeof(*ip6h) + offset;
+ thoff = sizeof(*ip6h) + ctx->offset;
if (!pskb_may_pull(skb, thoff))
return -1;
- ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
nexthdr = ip6h->nexthdr;
switch (nexthdr) {
case IPPROTO_TCP:
- *hdrsize = sizeof(struct tcphdr);
+ ctx->hdrsize = sizeof(struct tcphdr);
break;
case IPPROTO_UDP:
- *hdrsize = sizeof(struct udphdr);
+ ctx->hdrsize = sizeof(struct udphdr);
break;
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE:
- *hdrsize = sizeof(struct gre_base_hdr);
+ ctx->hdrsize = sizeof(struct gre_base_hdr);
break;
#endif
default:
@@ -570,7 +604,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
if (ip6h->hop_limit <= 1)
return -1;
- if (!pskb_may_pull(skb, thoff + *hdrsize))
+ if (!pskb_may_pull(skb, thoff + ctx->hdrsize))
return -1;
switch (nexthdr) {
@@ -590,65 +624,47 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
}
}
- ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
tuple->src_v6 = ip6h->saddr;
tuple->dst_v6 = ip6h->daddr;
tuple->l3proto = AF_INET6;
tuple->l4proto = nexthdr;
- tuple->iifidx = dev->ifindex;
+ tuple->iifidx = ctx->in->ifindex;
nf_flow_tuple_encap(skb, tuple);
return 0;
}
-unsigned int
-nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
+static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
+ struct nf_flowtable *flow_table,
+ struct flow_offload_tuple_rhash *tuplehash,
+ struct sk_buff *skb)
{
- struct flow_offload_tuple_rhash *tuplehash;
- struct nf_flowtable *flow_table = priv;
- struct flow_offload_tuple tuple = {};
enum flow_offload_tuple_dir dir;
- const struct in6_addr *nexthop;
struct flow_offload *flow;
- struct net_device *outdev;
unsigned int thoff, mtu;
- u32 hdrsize, offset = 0;
struct ipv6hdr *ip6h;
- struct rt6_info *rt;
- int ret;
-
- if (skb->protocol != htons(ETH_P_IPV6) &&
- !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
- return NF_ACCEPT;
-
- if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
- return NF_ACCEPT;
-
- tuplehash = flow_offload_lookup(flow_table, &tuple);
- if (tuplehash == NULL)
- return NF_ACCEPT;
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- mtu = flow->tuplehash[dir].tuple.mtu + offset;
+ mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
- return NF_ACCEPT;
+ return 0;
- ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
- thoff = sizeof(*ip6h) + offset;
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
+ thoff = sizeof(*ip6h) + ctx->offset;
if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
- return NF_ACCEPT;
+ return 0;
if (!nf_flow_dst_check(&tuplehash->tuple)) {
flow_offload_teardown(flow);
- return NF_ACCEPT;
+ return 0;
}
- if (skb_try_make_writable(skb, thoff + hdrsize))
- return NF_DROP;
+ if (skb_try_make_writable(skb, thoff + ctx->hdrsize))
+ return -1;
flow_offload_refresh(flow_table, flow);
@@ -663,6 +679,52 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
+ return 1;
+}
+
+static struct flow_offload_tuple_rhash *
+nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
+ struct nf_flowtable *flow_table,
+ struct sk_buff *skb)
+{
+ struct flow_offload_tuple tuple = {};
+
+ if (skb->protocol != htons(ETH_P_IPV6) &&
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset))
+ return NULL;
+
+ if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)
+ return NULL;
+
+ return flow_offload_lookup(flow_table, &tuple);
+}
+
+unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ enum flow_offload_tuple_dir dir;
+ struct nf_flowtable_ctx ctx = {
+ .in = state->in,
+ };
+ const struct in6_addr *nexthop;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ struct rt6_info *rt;
+ int ret;
+
+ tuplehash = nf_flow_offload_ipv6_lookup(&ctx, flow_table, skb);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb);
+ if (ret < 0)
+ return NF_DROP;
+ else if (ret == 0)
+ return NF_ACCEPT;
+
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
@@ -671,6 +733,9 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
}
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 59fb8320ab4d..7a61de80a8d1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6541,10 +6541,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_element_clash;
}
- if (!(flags & NFT_SET_ELEM_CATCHALL) && set->size &&
- !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) {
- err = -ENFILE;
- goto err_set_full;
+ if (!(flags & NFT_SET_ELEM_CATCHALL)) {
+ unsigned int max = set->size ? set->size + set->ndeact : UINT_MAX;
+
+ if (!atomic_add_unless(&set->nelems, 1, max)) {
+ err = -ENFILE;
+ goto err_set_full;
+ }
}
nft_trans_elem(trans) = elem;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a54a7f772cec..671474e59817 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -10,6 +10,7 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
+#include <linux/dccp.h>
#include <linux/sctp.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
@@ -406,6 +407,82 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void nft_exthdr_dccp_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ unsigned int thoff, dataoff, optoff, optlen, i;
+ u32 *dest = &regs->data[priv->dreg];
+ const struct dccp_hdr *dh;
+ struct dccp_hdr _dh;
+
+ if (pkt->tprot != IPPROTO_DCCP || pkt->fragoff)
+ goto err;
+
+ thoff = nft_thoff(pkt);
+
+ dh = skb_header_pointer(pkt->skb, thoff, sizeof(_dh), &_dh);
+ if (!dh)
+ goto err;
+
+ dataoff = dh->dccph_doff * sizeof(u32);
+ optoff = __dccp_hdr_len(dh);
+ if (dataoff <= optoff)
+ goto err;
+
+ optlen = dataoff - optoff;
+
+ for (i = 0; i < optlen; ) {
+ /* Options 0 (DCCPO_PADDING) - 31 (DCCPO_MAX_RESERVED) are 1B in
+ * the length; the remaining options are at least 2B long. In
+ * all cases, the first byte contains the option type. In
+ * multi-byte options, the second byte contains the option
+ * length, which must be at least two: 1 for the type plus 1 for
+ * the length plus 0-253 for any following option data. We
+ * aren't interested in the option data, only the type and the
+ * length, so we don't need to read more than two bytes at a
+ * time.
+ */
+ unsigned int buflen = optlen - i;
+ u8 buf[2], *bufp;
+ u8 type, len;
+
+ if (buflen > sizeof(buf))
+ buflen = sizeof(buf);
+
+ bufp = skb_header_pointer(pkt->skb, thoff + optoff + i, buflen,
+ &buf);
+ if (!bufp)
+ goto err;
+
+ type = bufp[0];
+
+ if (type == priv->type) {
+ *dest = 1;
+ return;
+ }
+
+ if (type <= DCCPO_MAX_RESERVED) {
+ i++;
+ continue;
+ }
+
+ if (buflen < 2)
+ goto err;
+
+ len = bufp[1];
+
+ if (len < 2)
+ goto err;
+
+ i += len;
+ }
+
+err:
+ *dest = 0;
+}
+
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
[NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
@@ -557,6 +634,22 @@ static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
return 0;
}
+static int nft_exthdr_dccp_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ int err = nft_exthdr_init(ctx, expr, tb);
+
+ if (err < 0)
+ return err;
+
+ if (!(priv->flags & NFT_EXTHDR_F_PRESENT))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
{
if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
@@ -686,6 +779,15 @@ static const struct nft_expr_ops nft_exthdr_sctp_ops = {
.reduce = nft_exthdr_reduce,
};
+static const struct nft_expr_ops nft_exthdr_dccp_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_dccp_eval,
+ .init = nft_exthdr_dccp_init,
+ .dump = nft_exthdr_dump,
+ .reduce = nft_exthdr_reduce,
+};
+
static const struct nft_expr_ops *
nft_exthdr_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -720,6 +822,10 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_sctp_ops;
break;
+ case NFT_EXTHDR_OP_DCCP:
+ if (tb[NFTA_EXTHDR_DREG])
+ return &nft_exthdr_dccp_ops;
+ break;
}
return ERR_PTR(-EOPNOTSUPP);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index e860d8fe0e5e..5ef9146e74ad 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -250,9 +250,14 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
break;
}
+ if (!dst_hold_safe(this_dst))
+ return -ENOENT;
+
nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
- if (!other_dst)
+ if (!other_dst) {
+ dst_release(this_dst);
return -ENOENT;
+ }
nft_default_forward_path(route, this_dst, dir);
nft_default_forward_path(route, other_dst, !dir);
@@ -349,8 +354,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
if (!flow)
goto err_flow_alloc;
- if (flow_offload_route_init(flow, &route) < 0)
- goto err_flow_add;
+ flow_offload_route_init(flow, &route);
if (tcph) {
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
@@ -361,12 +365,12 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
if (ret < 0)
goto err_flow_add;
- dst_release(route.tuple[!dir].dst);
return;
err_flow_add:
flow_offload_free(flow);
err_flow_alloc:
+ dst_release(route.tuple[dir].dst);
dst_release(route.tuple[!dir].dst);
err_flow_route:
clear_bit(IPS_OFFLOAD_BIT, &ct->status);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 03ef4fdaa460..29ac48cdd6db 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -19,6 +19,7 @@ struct nft_lookup {
struct nft_set *set;
u8 sreg;
u8 dreg;
+ bool dreg_set;
bool invert;
struct nft_set_binding binding;
};
@@ -75,7 +76,7 @@ void nft_lookup_eval(const struct nft_expr *expr,
}
if (ext) {
- if (set->flags & NFT_SET_MAP)
+ if (priv->dreg_set)
nft_data_copy(&regs->data[priv->dreg],
nft_set_ext_data(ext), set->dlen);
@@ -122,11 +123,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (flags & ~NFT_LOOKUP_F_INV)
return -EINVAL;
- if (flags & NFT_LOOKUP_F_INV) {
- if (set->flags & NFT_SET_MAP)
- return -EINVAL;
+ if (flags & NFT_LOOKUP_F_INV)
priv->invert = true;
- }
}
if (tb[NFTA_LOOKUP_DREG] != NULL) {
@@ -140,8 +138,17 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
set->dlen);
if (err < 0)
return err;
- } else if (set->flags & NFT_SET_MAP)
- return -EINVAL;
+ priv->dreg_set = true;
+ } else if (set->flags & NFT_SET_MAP) {
+ /* Map given, but user asks for lookup only (i.e. to
+ * ignore value assoicated with key).
+ *
+ * This makes no sense for anonymous maps since they are
+ * scoped to the rule, but for named sets this can be useful.
+ */
+ if (set->flags & NFT_SET_ANONYMOUS)
+ return -EINVAL;
+ }
priv->binding.flags = set->flags & NFT_SET_MAP;
@@ -188,7 +195,7 @@ static int nft_lookup_dump(struct sk_buff *skb,
goto nla_put_failure;
if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg))
goto nla_put_failure;
- if (priv->set->flags & NFT_SET_MAP)
+ if (priv->dreg_set)
if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags)))
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 06d46d182634..34c684e121d3 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1274,8 +1274,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
struct nft_pipapo_match *new;
int i;
- new = kmalloc(sizeof(*new) + sizeof(*dst) * old->field_count,
- GFP_KERNEL);
+ new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL);
if (!new)
return ERR_PTR(-ENOMEM);
@@ -2059,8 +2058,7 @@ static int nft_pipapo_init(const struct nft_set *set,
if (field_count > NFT_PIPAPO_MAX_FIELDS)
return -EINVAL;
- m = kmalloc(sizeof(*priv->match) + sizeof(*f) * field_count,
- GFP_KERNEL);
+ m = kmalloc(struct_size(m, f, field_count), GFP_KERNEL);
if (!m)
return -ENOMEM;