summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig30
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_bpf_link.c5
-rw-r--r--net/netfilter/nf_conntrack_core.c50
-rw-r--r--net/netfilter/nf_conntrack_netlink.c1
-rw-r--r--net/netfilter/nf_conntrack_proto.c6
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c826
-rw-r--r--net/netfilter/nf_conntrack_standalone.c118
-rw-r--r--net/netfilter/nf_log.c26
-rw-r--r--net/netfilter/nf_nat_core.c6
-rw-r--r--net/netfilter/nf_nat_proto.c43
-rw-r--r--net/netfilter/nf_tables_api.c139
-rw-r--r--net/netfilter/nf_tables_trace.c3
-rw-r--r--net/netfilter/nfnetlink.c1
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c5
-rw-r--r--net/netfilter/nfnetlink_hook.c76
-rw-r--r--net/netfilter/nft_chain_filter.c2
-rw-r--r--net/netfilter/nft_dynset.c10
-rw-r--r--net/netfilter/nft_exthdr.c8
-rw-r--r--net/netfilter/nft_lookup.c27
-rw-r--r--net/netfilter/nft_objref.c5
-rw-r--r--net/netfilter/nft_set_bitmap.c11
-rw-r--r--net/netfilter/nft_set_hash.c54
-rw-r--r--net/netfilter/nft_set_pipapo.c204
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c26
-rw-r--r--net/netfilter/nft_set_rbtree.c40
-rw-r--r--net/netfilter/x_tables.c16
-rw-r--r--net/netfilter/xt_nfacct.c4
30 files changed, 380 insertions, 1367 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 2560416218d0..6cdc994fdc8a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -195,16 +195,6 @@ config NF_CONNTRACK_LABELS
config NF_CONNTRACK_OVS
bool
-config NF_CT_PROTO_DCCP
- bool 'DCCP protocol connection tracking support'
- depends on NETFILTER_ADVANCED
- default y
- help
- With this option enabled, the layer 3 independent connection
- tracking code will be able to do state tracking on DCCP connections.
-
- If unsure, say Y.
-
config NF_CT_PROTO_GRE
bool
@@ -516,6 +506,12 @@ config NFT_CT
This option adds the "ct" expression that you can use to match
connection tracking information such as the flow state.
+config NFT_EXTHDR_DCCP
+ bool "Netfilter nf_tables exthdr DCCP support (DEPRECATED)"
+ default n
+ help
+ This option adds support for matching on DCCP extension headers.
+
config NFT_FLOW_OFFLOAD
depends on NF_CONNTRACK && NF_FLOW_TABLE
tristate "Netfilter nf_tables hardware flow offload module"
@@ -762,6 +758,16 @@ config NETFILTER_XTABLES_COMPAT
If unsure, say N.
+config NETFILTER_XTABLES_LEGACY
+ bool "Netfilter legacy tables support"
+ depends on !PREEMPT_RT
+ help
+ Say Y here if you still require support for legacy tables. This is
+ required by the legacy tools (iptables-legacy) and is not needed if
+ you use iptables over nftables (iptables-nft).
+ Legacy support is not limited to IP, it also includes EBTABLES and
+ ARPTABLES.
+
comment "Xtables combined modules"
config NETFILTER_XT_MARK
@@ -1278,9 +1284,9 @@ config NETFILTER_XT_MATCH_CPU
To compile it as a module, choose M here. If unsure, say N.
config NETFILTER_XT_MATCH_DCCP
- tristate '"dccp" protocol match support'
+ tristate '"dccp" protocol match support (DEPRECATED)'
depends on NETFILTER_ADVANCED
- default IP_DCCP
+ default n
help
With this option enabled, you will be able to use the iptables
`dccp' match in order to match on DCCP source/destination ports
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index f0aa4d7ef499..e43e20f529f8 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -12,7 +12,6 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_OVS) += nf_conntrack_ovs.o
-nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
ifeq ($(CONFIG_NF_CONNTRACK),m)
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 44b2ad695c15..965f3c8e5089 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -926,7 +926,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
{
/* Using mod_timer_pending will ensure the timer is not
- * modified after the final del_timer in ip_vs_conn_expire.
+ * modified after the final timer_delete in ip_vs_conn_expire.
*/
if (timer_pending(&cp->timer) &&
time_after(cp->timer.expires, jiffies))
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 014f07740369..95af252b2939 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -97,7 +97,7 @@ __ip_vs_dst_check(struct ip_vs_dest *dest)
if (!dest_dst)
return NULL;
dst = dest_dst->dst_cache;
- if (dst->obsolete &&
+ if (READ_ONCE(dst->obsolete) &&
dst->ops->check(dst, dest_dst->dst_cookie) == NULL)
return NULL;
return dest_dst;
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index 06b084844700..3e4fb9ddcd36 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -17,7 +17,7 @@ static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
.skb = skb,
};
- return bpf_prog_run(prog, &ctx);
+ return bpf_prog_run_pin_on_cpu(prog, &ctx);
}
struct bpf_nf_link {
@@ -225,7 +225,8 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
if (!link)
return -ENOMEM;
- bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog);
+ bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog,
+ attr->link_create.attach_type);
link->hook_ops.hook = nf_hook_run_bpf;
link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 201d3c4ec623..344f88295976 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -136,8 +136,8 @@ static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
}
/* return true if we need to recompute hashes (in case hash table was resized) */
-static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
- unsigned int h2, unsigned int sequence)
+static bool nf_conntrack_double_lock(unsigned int h1, unsigned int h2,
+ unsigned int sequence)
{
h1 %= CONNTRACK_LOCKS;
h2 %= CONNTRACK_LOCKS;
@@ -329,9 +329,6 @@ nf_ct_get_tuple(const struct sk_buff *skb,
#ifdef CONFIG_NF_CT_PROTO_SCTP
case IPPROTO_SCTP:
#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- case IPPROTO_DCCP:
-#endif
/* fallthrough */
return nf_ct_get_tuple_ports(skb, dataoff, tuple);
default:
@@ -616,7 +613,7 @@ static void __nf_ct_delete_from_lists(struct nf_conn *ct)
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
- } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+ } while (nf_conntrack_double_lock(hash, reply_hash, sequence));
clean_from_lists(ct);
nf_conntrack_double_unlock(hash, reply_hash);
@@ -893,7 +890,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
- } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+ } while (nf_conntrack_double_lock(hash, reply_hash, sequence));
max_chainlen = MIN_CHAINLEN + get_random_u32_below(MAX_CHAINLEN);
@@ -1124,6 +1121,12 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
&nf_conntrack_hash[repl_idx]);
+ /* confirmed bit must be set after hlist add, not before:
+ * loser_ct can still be visible to other cpu due to
+ * SLAB_TYPESAFE_BY_RCU.
+ */
+ smp_mb__before_atomic();
+ set_bit(IPS_CONFIRMED_BIT, &loser_ct->status);
NF_CT_STAT_INC(net, clash_resolve);
return NF_ACCEPT;
@@ -1231,7 +1234,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
- } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+ } while (nf_conntrack_double_lock(hash, reply_hash, sequence));
/* We're not in hash table, and we refuse to set up related
* connections for unconfirmed conns. But packet copies and
@@ -1260,8 +1263,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
* user context, else we insert an already 'dead' hash, blocking
* further use of that particular connection -JM.
*/
- ct->status |= IPS_CONFIRMED;
-
if (unlikely(nf_ct_is_dying(ct))) {
NF_CT_STAT_INC(net, insert_failed);
goto dying;
@@ -1293,7 +1294,7 @@ chaintoolong:
}
}
- /* Timer relative to confirmation time, not original
+ /* Timeout is relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
weird delay cases. */
ct->timeout += nfct_time_stamp;
@@ -1301,11 +1302,21 @@ chaintoolong:
__nf_conntrack_insert_prepare(ct);
/* Since the lookup is lockless, hash insertion must be done after
- * starting the timer and setting the CONFIRMED bit. The RCU barriers
- * guarantee that no other CPU can find the conntrack before the above
- * stores are visible.
+ * setting ct->timeout. The RCU barriers guarantee that no other CPU
+ * can find the conntrack before the above stores are visible.
*/
__nf_conntrack_hash_insert(ct, hash, reply_hash);
+
+ /* IPS_CONFIRMED unset means 'ct not (yet) in hash', conntrack lookups
+ * skip entries that lack this bit. This happens when a CPU is looking
+ * at a stale entry that is being recycled due to SLAB_TYPESAFE_BY_RCU
+ * or when another CPU encounters this entry right after the insertion
+ * but before the set-confirm-bit below. This bit must not be set until
+ * after __nf_conntrack_hash_insert().
+ */
+ smp_mb__before_atomic();
+ set_bit(IPS_CONFIRMED_BIT, &ct->status);
+
nf_conntrack_double_unlock(hash, reply_hash);
local_bh_enable();
@@ -1662,7 +1673,11 @@ __nf_conntrack_alloc(struct net *net,
if (!conntrack_gc_work.early_drop)
conntrack_gc_work.early_drop = true;
atomic_dec(&cnet->count);
- net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
+ if (net == &init_net)
+ net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
+ else
+ net_warn_ratelimited("nf_conntrack: table full in netns %u, dropping packet\n",
+ net->ns.inum);
return ERR_PTR(-ENOMEM);
}
}
@@ -1982,11 +1997,6 @@ static int nf_conntrack_handle_packet(struct nf_conn *ct,
return nf_conntrack_sctp_packet(ct, skb, dataoff,
ctinfo, state);
#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- case IPPROTO_DCCP:
- return nf_conntrack_dccp_packet(ct, skb, dataoff,
- ctinfo, state);
-#endif
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE:
return nf_conntrack_gre_packet(ct, skb, dataoff,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2cc0fde23344..486d52b45fe5 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2036,7 +2036,6 @@ static void ctnetlink_change_mark(struct nf_conn *ct,
static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = {
[CTA_PROTOINFO_TCP] = { .type = NLA_NESTED },
- [CTA_PROTOINFO_DCCP] = { .type = NLA_NESTED },
[CTA_PROTOINFO_SCTP] = { .type = NLA_NESTED },
};
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index f36727ed91e1..bc1d96686b9c 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -100,9 +100,6 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
case IPPROTO_UDP: return &nf_conntrack_l4proto_udp;
case IPPROTO_TCP: return &nf_conntrack_l4proto_tcp;
case IPPROTO_ICMP: return &nf_conntrack_l4proto_icmp;
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- case IPPROTO_DCCP: return &nf_conntrack_l4proto_dccp;
-#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp;
#endif
@@ -681,9 +678,6 @@ void nf_conntrack_proto_pernet_init(struct net *net)
#if IS_ENABLED(CONFIG_IPV6)
nf_conntrack_icmpv6_init_net(net);
#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- nf_conntrack_dccp_init_net(net);
-#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
nf_conntrack_sctp_init_net(net);
#endif
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
deleted file mode 100644
index ebc4f733bb2e..000000000000
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ /dev/null
@@ -1,826 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * DCCP connection tracking protocol helper
- *
- * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net>
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sysctl.h>
-#include <linux/spinlock.h>
-#include <linux/skbuff.h>
-#include <linux/dccp.h>
-#include <linux/slab.h>
-
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-
-#include <linux/netfilter/nfnetlink_conntrack.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_ecache.h>
-#include <net/netfilter/nf_conntrack_timeout.h>
-#include <net/netfilter/nf_log.h>
-
-/* Timeouts are based on values from RFC4340:
- *
- * - REQUEST:
- *
- * 8.1.2. Client Request
- *
- * A client MAY give up on its DCCP-Requests after some time
- * (3 minutes, for example).
- *
- * - RESPOND:
- *
- * 8.1.3. Server Response
- *
- * It MAY also leave the RESPOND state for CLOSED after a timeout of
- * not less than 4MSL (8 minutes);
- *
- * - PARTOPEN:
- *
- * 8.1.5. Handshake Completion
- *
- * If the client remains in PARTOPEN for more than 4MSL (8 minutes),
- * it SHOULD reset the connection with Reset Code 2, "Aborted".
- *
- * - OPEN:
- *
- * The DCCP timestamp overflows after 11.9 hours. If the connection
- * stays idle this long the sequence number won't be recognized
- * as valid anymore.
- *
- * - CLOSEREQ/CLOSING:
- *
- * 8.3. Termination
- *
- * The retransmission timer should initially be set to go off in two
- * round-trip times and should back off to not less than once every
- * 64 seconds ...
- *
- * - TIMEWAIT:
- *
- * 4.3. States
- *
- * A server or client socket remains in this state for 2MSL (4 minutes)
- * after the connection has been town down, ...
- */
-
-#define DCCP_MSL (2 * 60 * HZ)
-
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
-static const char * const dccp_state_names[] = {
- [CT_DCCP_NONE] = "NONE",
- [CT_DCCP_REQUEST] = "REQUEST",
- [CT_DCCP_RESPOND] = "RESPOND",
- [CT_DCCP_PARTOPEN] = "PARTOPEN",
- [CT_DCCP_OPEN] = "OPEN",
- [CT_DCCP_CLOSEREQ] = "CLOSEREQ",
- [CT_DCCP_CLOSING] = "CLOSING",
- [CT_DCCP_TIMEWAIT] = "TIMEWAIT",
- [CT_DCCP_IGNORE] = "IGNORE",
- [CT_DCCP_INVALID] = "INVALID",
-};
-#endif
-
-#define sNO CT_DCCP_NONE
-#define sRQ CT_DCCP_REQUEST
-#define sRS CT_DCCP_RESPOND
-#define sPO CT_DCCP_PARTOPEN
-#define sOP CT_DCCP_OPEN
-#define sCR CT_DCCP_CLOSEREQ
-#define sCG CT_DCCP_CLOSING
-#define sTW CT_DCCP_TIMEWAIT
-#define sIG CT_DCCP_IGNORE
-#define sIV CT_DCCP_INVALID
-
-/*
- * DCCP state transition table
- *
- * The assumption is the same as for TCP tracking:
- *
- * We are the man in the middle. All the packets go through us but might
- * get lost in transit to the destination. It is assumed that the destination
- * can't receive segments we haven't seen.
- *
- * The following states exist:
- *
- * NONE: Initial state, expecting Request
- * REQUEST: Request seen, waiting for Response from server
- * RESPOND: Response from server seen, waiting for Ack from client
- * PARTOPEN: Ack after Response seen, waiting for packet other than Response,
- * Reset or Sync from server
- * OPEN: Packet other than Response, Reset or Sync seen
- * CLOSEREQ: CloseReq from server seen, expecting Close from client
- * CLOSING: Close seen, expecting Reset
- * TIMEWAIT: Reset seen
- * IGNORE: Not determinable whether packet is valid
- *
- * Some states exist only on one side of the connection: REQUEST, RESPOND,
- * PARTOPEN, CLOSEREQ. For the other side these states are equivalent to
- * the one it was in before.
- *
- * Packets are marked as ignored (sIG) if we don't know if they're valid
- * (for example a reincarnation of a connection we didn't notice is dead
- * already) and the server may send back a connection closing Reset or a
- * Response. They're also used for Sync/SyncAck packets, which we don't
- * care about.
- */
-static const u_int8_t
-dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = {
- [CT_DCCP_ROLE_CLIENT] = {
- [DCCP_PKT_REQUEST] = {
- /*
- * sNO -> sRQ Regular Request
- * sRQ -> sRQ Retransmitted Request or reincarnation
- * sRS -> sRS Retransmitted Request (apparently Response
- * got lost after we saw it) or reincarnation
- * sPO -> sIG Ignore, conntrack might be out of sync
- * sOP -> sIG Ignore, conntrack might be out of sync
- * sCR -> sIG Ignore, conntrack might be out of sync
- * sCG -> sIG Ignore, conntrack might be out of sync
- * sTW -> sRQ Reincarnation
- *
- * sNO, sRQ, sRS, sPO. sOP, sCR, sCG, sTW, */
- sRQ, sRQ, sRS, sIG, sIG, sIG, sIG, sRQ,
- },
- [DCCP_PKT_RESPONSE] = {
- /*
- * sNO -> sIV Invalid
- * sRQ -> sIG Ignore, might be response to ignored Request
- * sRS -> sIG Ignore, might be response to ignored Request
- * sPO -> sIG Ignore, might be response to ignored Request
- * sOP -> sIG Ignore, might be response to ignored Request
- * sCR -> sIG Ignore, might be response to ignored Request
- * sCG -> sIG Ignore, might be response to ignored Request
- * sTW -> sIV Invalid, reincarnation in reverse direction
- * goes through sRQ
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIV,
- },
- [DCCP_PKT_ACK] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.)
- * sPO -> sPO Retransmitted Ack for Response, remain in PARTOPEN
- * sOP -> sOP Regular ACK, remain in OPEN
- * sCR -> sCR Ack in CLOSEREQ MAY be processed (8.3.)
- * sCG -> sCG Ack in CLOSING MAY be processed (8.3.)
- * sTW -> sIV
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV
- },
- [DCCP_PKT_DATA] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sIV No connection
- * sPO -> sIV MUST use DataAck in PARTOPEN state (8.1.5.)
- * sOP -> sOP Regular Data packet
- * sCR -> sCR Data in CLOSEREQ MAY be processed (8.3.)
- * sCG -> sCG Data in CLOSING MAY be processed (8.3.)
- * sTW -> sIV
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sIV, sOP, sCR, sCG, sIV,
- },
- [DCCP_PKT_DATAACK] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.)
- * sPO -> sPO Remain in PARTOPEN state
- * sOP -> sOP Regular DataAck packet in OPEN state
- * sCR -> sCR DataAck in CLOSEREQ MAY be processed (8.3.)
- * sCG -> sCG DataAck in CLOSING MAY be processed (8.3.)
- * sTW -> sIV
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV
- },
- [DCCP_PKT_CLOSEREQ] = {
- /*
- * CLOSEREQ may only be sent by the server.
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV
- },
- [DCCP_PKT_CLOSE] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sIV No connection
- * sPO -> sCG Client-initiated close
- * sOP -> sCG Client-initiated close
- * sCR -> sCG Close in response to CloseReq (8.3.)
- * sCG -> sCG Retransmit
- * sTW -> sIV Late retransmit, already in TIME_WAIT
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sCG, sCG, sCG, sIV, sIV
- },
- [DCCP_PKT_RESET] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sTW Sync received or timeout, SHOULD send Reset (8.1.1.)
- * sRS -> sTW Response received without Request
- * sPO -> sTW Timeout, SHOULD send Reset (8.1.5.)
- * sOP -> sTW Connection reset
- * sCR -> sTW Connection reset
- * sCG -> sTW Connection reset
- * sTW -> sIG Ignore (don't refresh timer)
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sTW, sTW, sTW, sTW, sTW, sTW, sIG
- },
- [DCCP_PKT_SYNC] = {
- /*
- * We currently ignore Sync packets
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
- },
- [DCCP_PKT_SYNCACK] = {
- /*
- * We currently ignore SyncAck packets
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
- },
- },
- [CT_DCCP_ROLE_SERVER] = {
- [DCCP_PKT_REQUEST] = {
- /*
- * sNO -> sIV Invalid
- * sRQ -> sIG Ignore, conntrack might be out of sync
- * sRS -> sIG Ignore, conntrack might be out of sync
- * sPO -> sIG Ignore, conntrack might be out of sync
- * sOP -> sIG Ignore, conntrack might be out of sync
- * sCR -> sIG Ignore, conntrack might be out of sync
- * sCG -> sIG Ignore, conntrack might be out of sync
- * sTW -> sRQ Reincarnation, must reverse roles
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIG, sIG, sIG, sIG, sIG, sIG, sRQ
- },
- [DCCP_PKT_RESPONSE] = {
- /*
- * sNO -> sIV Response without Request
- * sRQ -> sRS Response to clients Request
- * sRS -> sRS Retransmitted Response (8.1.3. SHOULD NOT)
- * sPO -> sIG Response to an ignored Request or late retransmit
- * sOP -> sIG Ignore, might be response to ignored Request
- * sCR -> sIG Ignore, might be response to ignored Request
- * sCG -> sIG Ignore, might be response to ignored Request
- * sTW -> sIV Invalid, Request from client in sTW moves to sRQ
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sRS, sRS, sIG, sIG, sIG, sIG, sIV
- },
- [DCCP_PKT_ACK] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sIV No connection
- * sPO -> sOP Enter OPEN state (8.1.5.)
- * sOP -> sOP Regular Ack in OPEN state
- * sCR -> sIV Waiting for Close from client
- * sCG -> sCG Ack in CLOSING MAY be processed (8.3.)
- * sTW -> sIV
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
- },
- [DCCP_PKT_DATA] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sIV No connection
- * sPO -> sOP Enter OPEN state (8.1.5.)
- * sOP -> sOP Regular Data packet in OPEN state
- * sCR -> sIV Waiting for Close from client
- * sCG -> sCG Data in CLOSING MAY be processed (8.3.)
- * sTW -> sIV
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
- },
- [DCCP_PKT_DATAACK] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sIV No connection
- * sPO -> sOP Enter OPEN state (8.1.5.)
- * sOP -> sOP Regular DataAck in OPEN state
- * sCR -> sIV Waiting for Close from client
- * sCG -> sCG Data in CLOSING MAY be processed (8.3.)
- * sTW -> sIV
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
- },
- [DCCP_PKT_CLOSEREQ] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sIV No connection
- * sPO -> sOP -> sCR Move directly to CLOSEREQ (8.1.5.)
- * sOP -> sCR CloseReq in OPEN state
- * sCR -> sCR Retransmit
- * sCG -> sCR Simultaneous close, client sends another Close
- * sTW -> sIV Already closed
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sCR, sCR, sCR, sCR, sIV
- },
- [DCCP_PKT_CLOSE] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sIV No connection
- * sPO -> sOP -> sCG Move direcly to CLOSING
- * sOP -> sCG Move to CLOSING
- * sCR -> sIV Close after CloseReq is invalid
- * sCG -> sCG Retransmit
- * sTW -> sIV Already closed
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sCG, sCG, sIV, sCG, sIV
- },
- [DCCP_PKT_RESET] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sTW Reset in response to Request
- * sRS -> sTW Timeout, SHOULD send Reset (8.1.3.)
- * sPO -> sTW Timeout, SHOULD send Reset (8.1.3.)
- * sOP -> sTW
- * sCR -> sTW
- * sCG -> sTW
- * sTW -> sIG Ignore (don't refresh timer)
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW, sTW */
- sIV, sTW, sTW, sTW, sTW, sTW, sTW, sTW, sIG
- },
- [DCCP_PKT_SYNC] = {
- /*
- * We currently ignore Sync packets
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
- },
- [DCCP_PKT_SYNCACK] = {
- /*
- * We currently ignore SyncAck packets
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
- },
- },
-};
-
-static noinline bool
-dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
- const struct dccp_hdr *dh,
- const struct nf_hook_state *hook_state)
-{
- struct net *net = nf_ct_net(ct);
- struct nf_dccp_net *dn;
- const char *msg;
- u_int8_t state;
-
- state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
- switch (state) {
- default:
- dn = nf_dccp_pernet(net);
- if (dn->dccp_loose == 0) {
- msg = "not picking up existing connection ";
- goto out_invalid;
- }
- break;
- case CT_DCCP_REQUEST:
- break;
- case CT_DCCP_INVALID:
- msg = "invalid state transition ";
- goto out_invalid;
- }
-
- ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
- ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
- ct->proto.dccp.state = CT_DCCP_NONE;
- ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
- ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
- ct->proto.dccp.handshake_seq = 0;
- return true;
-
-out_invalid:
- nf_ct_l4proto_log_invalid(skb, ct, hook_state, "%s", msg);
- return false;
-}
-
-static u64 dccp_ack_seq(const struct dccp_hdr *dh)
-{
- const struct dccp_hdr_ack_bits *dhack;
-
- dhack = (void *)dh + __dccp_basic_hdr_len(dh);
- return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) +
- ntohl(dhack->dccph_ack_nr_low);
-}
-
-static bool dccp_error(const struct dccp_hdr *dh,
- struct sk_buff *skb, unsigned int dataoff,
- const struct nf_hook_state *state)
-{
- static const unsigned long require_seq48 = 1 << DCCP_PKT_REQUEST |
- 1 << DCCP_PKT_RESPONSE |
- 1 << DCCP_PKT_CLOSEREQ |
- 1 << DCCP_PKT_CLOSE |
- 1 << DCCP_PKT_RESET |
- 1 << DCCP_PKT_SYNC |
- 1 << DCCP_PKT_SYNCACK;
- unsigned int dccp_len = skb->len - dataoff;
- unsigned int cscov;
- const char *msg;
- u8 type;
-
- BUILD_BUG_ON(DCCP_PKT_INVALID >= BITS_PER_LONG);
-
- if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
- dh->dccph_doff * 4 > dccp_len) {
- msg = "nf_ct_dccp: truncated/malformed packet ";
- goto out_invalid;
- }
-
- cscov = dccp_len;
- if (dh->dccph_cscov) {
- cscov = (dh->dccph_cscov - 1) * 4;
- if (cscov > dccp_len) {
- msg = "nf_ct_dccp: bad checksum coverage ";
- goto out_invalid;
- }
- }
-
- if (state->hook == NF_INET_PRE_ROUTING &&
- state->net->ct.sysctl_checksum &&
- nf_checksum_partial(skb, state->hook, dataoff, cscov,
- IPPROTO_DCCP, state->pf)) {
- msg = "nf_ct_dccp: bad checksum ";
- goto out_invalid;
- }
-
- type = dh->dccph_type;
- if (type >= DCCP_PKT_INVALID) {
- msg = "nf_ct_dccp: reserved packet type ";
- goto out_invalid;
- }
-
- if (test_bit(type, &require_seq48) && !dh->dccph_x) {
- msg = "nf_ct_dccp: type lacks 48bit sequence numbers";
- goto out_invalid;
- }
-
- return false;
-out_invalid:
- nf_l4proto_log_invalid(skb, state, IPPROTO_DCCP, "%s", msg);
- return true;
-}
-
-struct nf_conntrack_dccp_buf {
- struct dccp_hdr dh; /* generic header part */
- struct dccp_hdr_ext ext; /* optional depending dh->dccph_x */
- union { /* depends on header type */
- struct dccp_hdr_ack_bits ack;
- struct dccp_hdr_request req;
- struct dccp_hdr_response response;
- struct dccp_hdr_reset rst;
- } u;
-};
-
-static struct dccp_hdr *
-dccp_header_pointer(const struct sk_buff *skb, int offset, const struct dccp_hdr *dh,
- struct nf_conntrack_dccp_buf *buf)
-{
- unsigned int hdrlen = __dccp_hdr_len(dh);
-
- if (hdrlen > sizeof(*buf))
- return NULL;
-
- return skb_header_pointer(skb, offset, hdrlen, buf);
-}
-
-int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
- unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- const struct nf_hook_state *state)
-{
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- struct nf_conntrack_dccp_buf _dh;
- u_int8_t type, old_state, new_state;
- enum ct_dccp_roles role;
- unsigned int *timeouts;
- struct dccp_hdr *dh;
-
- dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh);
- if (!dh)
- return -NF_ACCEPT;
-
- if (dccp_error(dh, skb, dataoff, state))
- return -NF_ACCEPT;
-
- /* pull again, including possible 48 bit sequences and subtype header */
- dh = dccp_header_pointer(skb, dataoff, dh, &_dh);
- if (!dh)
- return -NF_ACCEPT;
-
- type = dh->dccph_type;
- if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state))
- return -NF_ACCEPT;
-
- if (type == DCCP_PKT_RESET &&
- !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
- /* Tear down connection immediately if only reply is a RESET */
- nf_ct_kill_acct(ct, ctinfo, skb);
- return NF_ACCEPT;
- }
-
- spin_lock_bh(&ct->lock);
-
- role = ct->proto.dccp.role[dir];
- old_state = ct->proto.dccp.state;
- new_state = dccp_state_table[role][type][old_state];
-
- switch (new_state) {
- case CT_DCCP_REQUEST:
- if (old_state == CT_DCCP_TIMEWAIT &&
- role == CT_DCCP_ROLE_SERVER) {
- /* Reincarnation in the reverse direction: reopen and
- * reverse client/server roles. */
- ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT;
- ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER;
- }
- break;
- case CT_DCCP_RESPOND:
- if (old_state == CT_DCCP_REQUEST)
- ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh);
- break;
- case CT_DCCP_PARTOPEN:
- if (old_state == CT_DCCP_RESPOND &&
- type == DCCP_PKT_ACK &&
- dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq)
- set_bit(IPS_ASSURED_BIT, &ct->status);
- break;
- case CT_DCCP_IGNORE:
- /*
- * Connection tracking might be out of sync, so we ignore
- * packets that might establish a new connection and resync
- * if the server responds with a valid Response.
- */
- if (ct->proto.dccp.last_dir == !dir &&
- ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST &&
- type == DCCP_PKT_RESPONSE) {
- ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT;
- ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER;
- ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh);
- new_state = CT_DCCP_RESPOND;
- break;
- }
- ct->proto.dccp.last_dir = dir;
- ct->proto.dccp.last_pkt = type;
-
- spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid packet");
- return NF_ACCEPT;
- case CT_DCCP_INVALID:
- spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid state transition");
- return -NF_ACCEPT;
- }
-
- ct->proto.dccp.last_dir = dir;
- ct->proto.dccp.last_pkt = type;
- ct->proto.dccp.state = new_state;
- spin_unlock_bh(&ct->lock);
-
- if (new_state != old_state)
- nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
-
- timeouts = nf_ct_timeout_lookup(ct);
- if (!timeouts)
- timeouts = nf_dccp_pernet(nf_ct_net(ct))->dccp_timeout;
- nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
-
- return NF_ACCEPT;
-}
-
-static bool dccp_can_early_drop(const struct nf_conn *ct)
-{
- switch (ct->proto.dccp.state) {
- case CT_DCCP_CLOSEREQ:
- case CT_DCCP_CLOSING:
- case CT_DCCP_TIMEWAIT:
- return true;
- default:
- break;
- }
-
- return false;
-}
-
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
-static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
-{
- seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
-}
-#endif
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
- struct nf_conn *ct, bool destroy)
-{
- struct nlattr *nest_parms;
-
- spin_lock_bh(&ct->lock);
- nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP);
- if (!nest_parms)
- goto nla_put_failure;
- if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state))
- goto nla_put_failure;
-
- if (destroy)
- goto skip_state;
-
- if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_ROLE,
- ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]) ||
- nla_put_be64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ,
- cpu_to_be64(ct->proto.dccp.handshake_seq),
- CTA_PROTOINFO_DCCP_PAD))
- goto nla_put_failure;
-skip_state:
- nla_nest_end(skb, nest_parms);
- spin_unlock_bh(&ct->lock);
-
- return 0;
-
-nla_put_failure:
- spin_unlock_bh(&ct->lock);
- return -1;
-}
-
-static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
- [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 },
- [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 },
- [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 },
- [CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC },
-};
-
-#define DCCP_NLATTR_SIZE ( \
- NLA_ALIGN(NLA_HDRLEN + 1) + \
- NLA_ALIGN(NLA_HDRLEN + 1) + \
- NLA_ALIGN(NLA_HDRLEN + sizeof(u64)) + \
- NLA_ALIGN(NLA_HDRLEN + 0))
-
-static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
-{
- struct nlattr *attr = cda[CTA_PROTOINFO_DCCP];
- struct nlattr *tb[CTA_PROTOINFO_DCCP_MAX + 1];
- int err;
-
- if (!attr)
- return 0;
-
- err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_DCCP_MAX, attr,
- dccp_nla_policy, NULL);
- if (err < 0)
- return err;
-
- if (!tb[CTA_PROTOINFO_DCCP_STATE] ||
- !tb[CTA_PROTOINFO_DCCP_ROLE] ||
- nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX ||
- nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) {
- return -EINVAL;
- }
-
- spin_lock_bh(&ct->lock);
- ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
- if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) {
- ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
- ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
- } else {
- ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER;
- ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT;
- }
- if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) {
- ct->proto.dccp.handshake_seq =
- be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]));
- }
- spin_unlock_bh(&ct->lock);
- return 0;
-}
-#endif
-
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_cttimeout.h>
-
-static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
- struct net *net, void *data)
-{
- struct nf_dccp_net *dn = nf_dccp_pernet(net);
- unsigned int *timeouts = data;
- int i;
-
- if (!timeouts)
- timeouts = dn->dccp_timeout;
-
- /* set default DCCP timeouts. */
- for (i=0; i<CT_DCCP_MAX; i++)
- timeouts[i] = dn->dccp_timeout[i];
-
- /* there's a 1:1 mapping between attributes and protocol states. */
- for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) {
- if (tb[i]) {
- timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ;
- }
- }
-
- timeouts[CTA_TIMEOUT_DCCP_UNSPEC] = timeouts[CTA_TIMEOUT_DCCP_REQUEST];
- return 0;
-}
-
-static int
-dccp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
-{
- const unsigned int *timeouts = data;
- int i;
-
- for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) {
- if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ)))
- goto nla_put_failure;
- }
- return 0;
-
-nla_put_failure:
- return -ENOSPC;
-}
-
-static const struct nla_policy
-dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = {
- [CTA_TIMEOUT_DCCP_REQUEST] = { .type = NLA_U32 },
- [CTA_TIMEOUT_DCCP_RESPOND] = { .type = NLA_U32 },
- [CTA_TIMEOUT_DCCP_PARTOPEN] = { .type = NLA_U32 },
- [CTA_TIMEOUT_DCCP_OPEN] = { .type = NLA_U32 },
- [CTA_TIMEOUT_DCCP_CLOSEREQ] = { .type = NLA_U32 },
- [CTA_TIMEOUT_DCCP_CLOSING] = { .type = NLA_U32 },
- [CTA_TIMEOUT_DCCP_TIMEWAIT] = { .type = NLA_U32 },
-};
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-
-void nf_conntrack_dccp_init_net(struct net *net)
-{
- struct nf_dccp_net *dn = nf_dccp_pernet(net);
-
- /* default values */
- dn->dccp_loose = 1;
- dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL;
- dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL;
- dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL;
- dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ;
- dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ;
- dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ;
- dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL;
-
- /* timeouts[0] is unused, make it same as SYN_SENT so
- * ->timeouts[0] contains 'new' timeout, like udp or icmp.
- */
- dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST];
-}
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = {
- .l4proto = IPPROTO_DCCP,
- .can_early_drop = dccp_can_early_drop,
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
- .print_conntrack = dccp_print_conntrack,
-#endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_size = DCCP_NLATTR_SIZE,
- .to_nlattr = dccp_to_nlattr,
- .from_nlattr = nlattr_to_dccp,
- .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
- .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
- .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
- .nla_policy = nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- .ctnl_timeout = {
- .nlattr_to_obj = dccp_timeout_nlattr_to_obj,
- .obj_to_nlattr = dccp_timeout_obj_to_nlattr,
- .nlattr_max = CTA_TIMEOUT_DCCP_MAX,
- .obj_size = sizeof(unsigned int) * CT_DCCP_MAX,
- .nla_policy = dccp_timeout_nla_policy,
- },
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-};
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 6c4cff10357d..9b8b10a85233 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -14,6 +14,7 @@
#include <linux/sysctl.h>
#endif
+#include <net/netfilter/nf_log.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
@@ -67,11 +68,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
ntohs(tuple->dst.u.udp.port));
break;
- case IPPROTO_DCCP:
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.dccp.port),
- ntohs(tuple->dst.u.dccp.port));
- break;
case IPPROTO_SCTP:
seq_printf(s, "sport=%hu dport=%hu ",
ntohs(tuple->src.u.sctp.port),
@@ -279,7 +275,6 @@ static const char* l4proto_name(u16 proto)
case IPPROTO_ICMP: return "icmp";
case IPPROTO_TCP: return "tcp";
case IPPROTO_UDP: return "udp";
- case IPPROTO_DCCP: return "dccp";
case IPPROTO_GRE: return "gre";
case IPPROTO_SCTP: return "sctp";
case IPPROTO_UDPLITE: return "udplite";
@@ -561,6 +556,29 @@ nf_conntrack_hash_sysctl(const struct ctl_table *table, int write,
return ret;
}
+static int
+nf_conntrack_log_invalid_sysctl(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret, i;
+
+ ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
+ if (ret < 0 || !write)
+ return ret;
+
+ if (*(u8 *)table->data == 0)
+ return ret;
+
+ /* Load nf_log_syslog only if no logger is currently registered */
+ for (i = 0; i < NFPROTO_NUMPROTO; i++) {
+ if (nf_log_is_registered(i))
+ return ret;
+ }
+ request_module("%s", "nf_log_syslog");
+
+ return ret;
+}
+
static struct ctl_table_header *nf_ct_netfilter_header;
enum nf_ct_sysctl_index {
@@ -612,16 +630,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT,
#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST,
- NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND,
- NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN,
- NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN,
- NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ,
- NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING,
- NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT,
- NF_SYSCTL_CT_PROTO_DCCP_LOOSE,
-#endif
#ifdef CONFIG_NF_CT_PROTO_GRE
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
@@ -667,7 +675,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.data = &init_net.ct.sysctl_log_invalid,
.maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dou8vec_minmax,
+ .proc_handler = nf_conntrack_log_invalid_sysctl,
},
[NF_SYSCTL_CT_EXPECT_MAX] = {
.procname = "nf_conntrack_expect_max",
@@ -895,58 +903,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = {
- .procname = "nf_conntrack_dccp_timeout_request",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND] = {
- .procname = "nf_conntrack_dccp_timeout_respond",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN] = {
- .procname = "nf_conntrack_dccp_timeout_partopen",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN] = {
- .procname = "nf_conntrack_dccp_timeout_open",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ] = {
- .procname = "nf_conntrack_dccp_timeout_closereq",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING] = {
- .procname = "nf_conntrack_dccp_timeout_closing",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT] = {
- .procname = "nf_conntrack_dccp_timeout_timewait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_DCCP_LOOSE] = {
- .procname = "nf_conntrack_dccp_loose",
- .maxlen = sizeof(u8),
- .mode = 0644,
- .proc_handler = proc_dou8vec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#endif
#ifdef CONFIG_NF_CT_PROTO_GRE
[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE] = {
.procname = "nf_conntrack_gre_timeout",
@@ -1032,29 +988,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
#endif
}
-static void nf_conntrack_standalone_init_dccp_sysctl(struct net *net,
- struct ctl_table *table)
-{
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- struct nf_dccp_net *dn = nf_dccp_pernet(net);
-
-#define XASSIGN(XNAME, dn) \
- table[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_ ## XNAME].data = \
- &(dn)->dccp_timeout[CT_DCCP_ ## XNAME]
-
- XASSIGN(REQUEST, dn);
- XASSIGN(RESPOND, dn);
- XASSIGN(PARTOPEN, dn);
- XASSIGN(OPEN, dn);
- XASSIGN(CLOSEREQ, dn);
- XASSIGN(CLOSING, dn);
- XASSIGN(TIMEWAIT, dn);
-#undef XASSIGN
-
- table[NF_SYSCTL_CT_PROTO_DCCP_LOOSE].data = &dn->dccp_loose;
-#endif
-}
-
static void nf_conntrack_standalone_init_gre_sysctl(struct net *net,
struct ctl_table *table)
{
@@ -1100,7 +1033,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
nf_conntrack_standalone_init_tcp_sysctl(net, table);
nf_conntrack_standalone_init_sctp_sysctl(net, table);
- nf_conntrack_standalone_init_dccp_sysctl(net, table);
nf_conntrack_standalone_init_gre_sysctl(net, table);
/* Don't allow non-init_net ns to alter global sysctls */
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 6dd0de33eebd..74cef8bf554c 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -125,6 +125,32 @@ void nf_log_unregister(struct nf_logger *logger)
}
EXPORT_SYMBOL(nf_log_unregister);
+/**
+ * nf_log_is_registered - Check if any logger is registered for a given
+ * protocol family.
+ *
+ * @pf: Protocol family
+ *
+ * Returns: true if at least one logger is active for @pf, false otherwise.
+ */
+bool nf_log_is_registered(u_int8_t pf)
+{
+ int i;
+
+ if (pf >= NFPROTO_NUMPROTO) {
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ for (i = 0; i < NF_LOG_TYPE_MAX; i++) {
+ if (rcu_access_pointer(loggers[pf][i]))
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(nf_log_is_registered);
+
int nf_log_bind_pf(struct net *net, u_int8_t pf,
const struct nf_logger *logger)
{
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index f391cd267922..78a61dac4ade 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -69,7 +69,6 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
if (t->dst.protonum == IPPROTO_TCP ||
t->dst.protonum == IPPROTO_UDP ||
t->dst.protonum == IPPROTO_UDPLITE ||
- t->dst.protonum == IPPROTO_DCCP ||
t->dst.protonum == IPPROTO_SCTP)
fl4->fl4_dport = t->dst.u.all;
}
@@ -81,7 +80,6 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
if (t->dst.protonum == IPPROTO_TCP ||
t->dst.protonum == IPPROTO_UDP ||
t->dst.protonum == IPPROTO_UDPLITE ||
- t->dst.protonum == IPPROTO_DCCP ||
t->dst.protonum == IPPROTO_SCTP)
fl4->fl4_sport = t->src.u.all;
}
@@ -102,7 +100,6 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
if (t->dst.protonum == IPPROTO_TCP ||
t->dst.protonum == IPPROTO_UDP ||
t->dst.protonum == IPPROTO_UDPLITE ||
- t->dst.protonum == IPPROTO_DCCP ||
t->dst.protonum == IPPROTO_SCTP)
fl6->fl6_dport = t->dst.u.all;
}
@@ -114,7 +111,6 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
if (t->dst.protonum == IPPROTO_TCP ||
t->dst.protonum == IPPROTO_UDP ||
t->dst.protonum == IPPROTO_UDPLITE ||
- t->dst.protonum == IPPROTO_DCCP ||
t->dst.protonum == IPPROTO_SCTP)
fl6->fl6_sport = t->src.u.all;
}
@@ -432,7 +428,6 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
- case IPPROTO_DCCP:
case IPPROTO_SCTP:
if (maniptype == NF_NAT_MANIP_SRC)
port = tuple->src.u.all;
@@ -632,7 +627,6 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
case IPPROTO_UDPLITE:
case IPPROTO_TCP:
case IPPROTO_SCTP:
- case IPPROTO_DCCP:
if (maniptype == NF_NAT_MANIP_SRC)
keyptr = &tuple->src.u.all;
else
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index dc450cc81222..b14a434b9561 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -180,46 +180,6 @@ tcp_manip_pkt(struct sk_buff *skb,
}
static bool
-dccp_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- struct dccp_hdr *hdr;
- __be16 *portptr, oldport, newport;
- int hdrsize = 8; /* DCCP connection tracking guarantees this much */
-
- if (skb->len >= hdroff + sizeof(struct dccp_hdr))
- hdrsize = sizeof(struct dccp_hdr);
-
- if (skb_ensure_writable(skb, hdroff + hdrsize))
- return false;
-
- hdr = (struct dccp_hdr *)(skb->data + hdroff);
-
- if (maniptype == NF_NAT_MANIP_SRC) {
- newport = tuple->src.u.dccp.port;
- portptr = &hdr->dccph_sport;
- } else {
- newport = tuple->dst.u.dccp.port;
- portptr = &hdr->dccph_dport;
- }
-
- oldport = *portptr;
- *portptr = newport;
-
- if (hdrsize < sizeof(*hdr))
- return true;
-
- nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
- inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
- false);
-#endif
- return true;
-}
-
-static bool
icmp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
@@ -338,9 +298,6 @@ static bool l4proto_manip_pkt(struct sk_buff *skb,
case IPPROTO_ICMPV6:
return icmpv6_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
- case IPPROTO_DCCP:
- return dccp_manip_pkt(skb, iphdroff, hdroff,
- tuple, maniptype);
case IPPROTO_GRE:
return gre_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 24c71ecb2179..13d0ed9d1895 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1153,9 +1153,9 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
{
struct nlmsghdr *nlh;
- event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
- NFNETLINK_V0, nft_base_seq(net));
+ nlh = nfnl_msg_put(skb, portid, seq,
+ nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
+ flags, family, NFNETLINK_V0, nft_base_seq(net));
if (!nlh)
goto nla_put_failure;
@@ -1165,7 +1165,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
NFTA_TABLE_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELTABLE) {
+ if (event == NFT_MSG_DELTABLE ||
+ event == NFT_MSG_DESTROYTABLE) {
nlmsg_end(skb, nlh);
return 0;
}
@@ -2016,9 +2017,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
{
struct nlmsghdr *nlh;
- event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
- NFNETLINK_V0, nft_base_seq(net));
+ nlh = nfnl_msg_put(skb, portid, seq,
+ nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
+ flags, family, NFNETLINK_V0, nft_base_seq(net));
if (!nlh)
goto nla_put_failure;
@@ -2028,7 +2029,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
NFTA_CHAIN_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELCHAIN && !hook_list) {
+ if (!hook_list &&
+ (event == NFT_MSG_DELCHAIN ||
+ event == NFT_MSG_DESTROYCHAIN)) {
nlmsg_end(skb, nlh);
return 0;
}
@@ -4039,7 +4042,7 @@ void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)
/* can only be used if rule is no longer visible to dumps */
static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
{
- lockdep_commit_lock_is_held(ctx->net);
+ WARN_ON_ONCE(!lockdep_commit_lock_is_held(ctx->net));
nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
nf_tables_rule_destroy(ctx, rule);
@@ -4845,9 +4848,10 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
u32 seq = ctx->seq;
int i;
- event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
- NFNETLINK_V0, nft_base_seq(ctx->net));
+ nlh = nfnl_msg_put(skb, portid, seq,
+ nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
+ flags, ctx->family, NFNETLINK_V0,
+ nft_base_seq(ctx->net));
if (!nlh)
goto nla_put_failure;
@@ -4859,7 +4863,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
NFTA_SET_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELSET) {
+ if (event == NFT_MSG_DELSET ||
+ event == NFT_MSG_DESTROYSET) {
nlmsg_end(skb, nlh);
return 0;
}
@@ -5859,7 +5864,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding,
enum nft_trans_phase phase)
{
- lockdep_commit_lock_is_held(ctx->net);
+ WARN_ON_ONCE(!lockdep_commit_lock_is_held(ctx->net));
switch (phase) {
case NFT_TRANS_PREPARE_ERROR:
@@ -8338,25 +8343,26 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
{
struct nlmsghdr *nlh;
- event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
- NFNETLINK_V0, nft_base_seq(net));
+ nlh = nfnl_msg_put(skb, portid, seq,
+ nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
+ flags, family, NFNETLINK_V0, nft_base_seq(net));
if (!nlh)
goto nla_put_failure;
if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) ||
+ nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle),
NFTA_OBJ_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELOBJ) {
+ if (event == NFT_MSG_DELOBJ ||
+ event == NFT_MSG_DESTROYOBJ) {
nlmsg_end(skb, nlh);
return 0;
}
- if (nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
- nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
+ if (nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
goto nla_put_failure;
@@ -8889,11 +8895,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
list_for_each_entry(hook, &flowtable_hook->list, list) {
list_for_each_entry(ops, &hook->ops_list, list) {
- ops->pf = NFPROTO_NETDEV;
- ops->hooknum = flowtable_hook->num;
- ops->priority = flowtable_hook->priority;
- ops->priv = &flowtable->data;
- ops->hook = flowtable->data.type->hook;
+ ops->pf = NFPROTO_NETDEV;
+ ops->hooknum = flowtable_hook->num;
+ ops->priority = flowtable_hook->priority;
+ ops->priv = &flowtable->data;
+ ops->hook = flowtable->data.type->hook;
+ ops->hook_ops_type = NF_HOOK_OP_NFT_FT;
}
}
@@ -9382,9 +9389,9 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
struct nft_hook *hook;
struct nlmsghdr *nlh;
- event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
- NFNETLINK_V0, nft_base_seq(net));
+ nlh = nfnl_msg_put(skb, portid, seq,
+ nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
+ flags, family, NFNETLINK_V0, nft_base_seq(net));
if (!nlh)
goto nla_put_failure;
@@ -9394,7 +9401,9 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
NFTA_FLOWTABLE_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELFLOWTABLE && !hook_list) {
+ if (!hook_list &&
+ (event == NFT_MSG_DELFLOWTABLE ||
+ event == NFT_MSG_DESTROYFLOWTABLE)) {
nlmsg_end(skb, nlh);
return 0;
}
@@ -9686,64 +9695,6 @@ struct nf_hook_ops *nft_hook_find_ops_rcu(const struct nft_hook *hook,
}
EXPORT_SYMBOL_GPL(nft_hook_find_ops_rcu);
-static void
-nf_tables_device_notify(const struct nft_table *table, int attr,
- const char *name, const struct nft_hook *hook,
- const struct net_device *dev, int event)
-{
- struct net *net = dev_net(dev);
- struct nlmsghdr *nlh;
- struct sk_buff *skb;
- u16 flags = 0;
-
- if (!nfnetlink_has_listeners(net, NFNLGRP_NFT_DEV))
- return;
-
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!skb)
- goto err;
-
- event = event == NETDEV_REGISTER ? NFT_MSG_NEWDEV : NFT_MSG_DELDEV;
- event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nfnl_msg_put(skb, 0, 0, event, flags, table->family,
- NFNETLINK_V0, nft_base_seq(net));
- if (!nlh)
- goto err;
-
- if (nla_put_string(skb, NFTA_DEVICE_TABLE, table->name) ||
- nla_put_string(skb, attr, name) ||
- nla_put(skb, NFTA_DEVICE_SPEC, hook->ifnamelen, hook->ifname) ||
- nla_put_string(skb, NFTA_DEVICE_NAME, dev->name))
- goto err;
-
- nlmsg_end(skb, nlh);
- nfnetlink_send(skb, net, 0, NFNLGRP_NFT_DEV,
- nlmsg_report(nlh), GFP_KERNEL);
- return;
-err:
- if (skb)
- kfree_skb(skb);
- nfnetlink_set_err(net, 0, NFNLGRP_NFT_DEV, -ENOBUFS);
-}
-
-void
-nf_tables_chain_device_notify(const struct nft_chain *chain,
- const struct nft_hook *hook,
- const struct net_device *dev, int event)
-{
- nf_tables_device_notify(chain->table, NFTA_DEVICE_CHAIN,
- chain->name, hook, dev, event);
-}
-
-static void
-nf_tables_flowtable_device_notify(const struct nft_flowtable *ft,
- const struct nft_hook *hook,
- const struct net_device *dev, int event)
-{
- nf_tables_device_notify(ft->table, NFTA_DEVICE_FLOWTABLE,
- ft->name, hook, dev, event);
-}
-
static int nft_flowtable_event(unsigned long event, struct net_device *dev,
struct nft_flowtable *flowtable, bool changename)
{
@@ -9777,12 +9728,13 @@ static int nft_flowtable_event(unsigned long event, struct net_device *dev,
if (!ops)
return 1;
- ops->pf = NFPROTO_NETDEV;
- ops->hooknum = flowtable->hooknum;
- ops->priority = flowtable->data.priority;
- ops->priv = &flowtable->data;
- ops->hook = flowtable->data.type->hook;
- ops->dev = dev;
+ ops->pf = NFPROTO_NETDEV;
+ ops->hooknum = flowtable->hooknum;
+ ops->priority = flowtable->data.priority;
+ ops->priv = &flowtable->data;
+ ops->hook = flowtable->data.type->hook;
+ ops->hook_ops_type = NF_HOOK_OP_NFT_FT;
+ ops->dev = dev;
if (nft_register_flowtable_ops(dev_net(dev),
flowtable, ops)) {
kfree(ops);
@@ -9791,7 +9743,6 @@ static int nft_flowtable_event(unsigned long event, struct net_device *dev,
list_add_tail_rcu(&ops->list, &hook->ops_list);
break;
}
- nf_tables_flowtable_device_notify(flowtable, hook, dev, event);
break;
}
return 0;
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index ae3fe87195ab..a88abae5a9de 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -127,6 +127,9 @@ static int nf_trace_fill_ct_info(struct sk_buff *nlskb,
if (nla_put_be32(nlskb, NFTA_TRACE_CT_ID, (__force __be32)id))
return -1;
+ /* Kernel implementation detail, withhold this from userspace for now */
+ status &= ~IPS_NAT_CLASH;
+
if (status && nla_put_be32(nlskb, NFTA_TRACE_CT_STATUS, htonl(status)))
return -1;
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index ac77fc21632d..e598a2a252b0 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -86,7 +86,6 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = {
[NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES,
[NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT,
[NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES,
- [NFNLGRP_NFT_DEV] = NFNL_SUBSYS_NFTABLES,
};
static struct nfnl_net *nfnl_pernet(struct net *net)
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index eab4f476b47f..38d75484e531 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -461,11 +461,6 @@ static int cttimeout_default_get(struct sk_buff *skb,
case IPPROTO_UDPLITE:
timeouts = nf_udp_pernet(info->net)->timeouts;
break;
- case IPPROTO_DCCP:
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- timeouts = nf_dccp_pernet(info->net)->dccp_timeout;
-#endif
- break;
case IPPROTO_ICMPV6:
timeouts = &nf_icmpv6_pernet(info->net)->timeout;
break;
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index ade8ee1988b1..92d869317cba 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -109,13 +109,30 @@ cancel_nest:
return -EMSGSIZE;
}
+static int nfnl_hook_put_nft_info_desc(struct sk_buff *nlskb, const char *tname,
+ const char *name, u8 family)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC);
+ if (!nest ||
+ nla_put_string(nlskb, NFNLA_CHAIN_TABLE, tname) ||
+ nla_put_string(nlskb, NFNLA_CHAIN_NAME, name) ||
+ nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, family)) {
+ nla_nest_cancel(nlskb, nest);
+ return -EMSGSIZE;
+ }
+ nla_nest_end(nlskb, nest);
+ return 0;
+}
+
static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb,
const struct nfnl_dump_hook_data *ctx,
unsigned int seq,
struct nft_chain *chain)
{
struct net *net = sock_net(nlskb->sk);
- struct nlattr *nest, *nest2;
+ struct nlattr *nest;
int ret = 0;
if (WARN_ON_ONCE(!chain))
@@ -128,29 +145,47 @@ static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb,
if (!nest)
return -EMSGSIZE;
- nest2 = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC);
- if (!nest2)
- goto cancel_nest;
+ ret = nfnl_hook_put_nft_info_desc(nlskb, chain->table->name,
+ chain->name, chain->table->family);
+ if (ret) {
+ nla_nest_cancel(nlskb, nest);
+ return ret;
+ }
- ret = nla_put_string(nlskb, NFNLA_CHAIN_TABLE, chain->table->name);
- if (ret)
- goto cancel_nest;
+ nla_nest_end(nlskb, nest);
+ return 0;
+}
- ret = nla_put_string(nlskb, NFNLA_CHAIN_NAME, chain->name);
- if (ret)
- goto cancel_nest;
+static int nfnl_hook_put_nft_ft_info(struct sk_buff *nlskb,
+ const struct nfnl_dump_hook_data *ctx,
+ unsigned int seq,
+ struct nf_flowtable *nf_ft)
+{
+ struct nft_flowtable *ft =
+ container_of(nf_ft, struct nft_flowtable, data);
+ struct net *net = sock_net(nlskb->sk);
+ struct nlattr *nest;
+ int ret = 0;
- ret = nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, chain->table->family);
- if (ret)
- goto cancel_nest;
+ if (WARN_ON_ONCE(!nf_ft))
+ return 0;
- nla_nest_end(nlskb, nest2);
- nla_nest_end(nlskb, nest);
- return ret;
+ if (!nft_is_active(net, ft))
+ return 0;
-cancel_nest:
- nla_nest_cancel(nlskb, nest);
- return -EMSGSIZE;
+ nest = nfnl_start_info_type(nlskb, NFNL_HOOK_TYPE_NFT_FLOWTABLE);
+ if (!nest)
+ return -EMSGSIZE;
+
+ ret = nfnl_hook_put_nft_info_desc(nlskb, ft->table->name,
+ ft->name, ft->table->family);
+ if (ret) {
+ nla_nest_cancel(nlskb, nest);
+ return ret;
+ }
+
+ nla_nest_end(nlskb, nest);
+ return 0;
}
static int nfnl_hook_dump_one(struct sk_buff *nlskb,
@@ -220,6 +255,9 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb,
case NF_HOOK_OP_BPF:
ret = nfnl_hook_put_bpf_prog_info(nlskb, ctx, seq, ops->priv);
break;
+ case NF_HOOK_OP_NFT_FT:
+ ret = nfnl_hook_put_nft_ft_info(nlskb, ctx, seq, ops->priv);
+ break;
case NF_HOOK_OP_UNDEFINED:
break;
default:
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 846d48ba8965..b16185e9a6dd 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -363,8 +363,6 @@ static int nft_netdev_event(unsigned long event, struct net_device *dev,
list_add_tail_rcu(&ops->list, &hook->ops_list);
break;
}
- nf_tables_chain_device_notify(&basechain->chain,
- hook, dev, event);
break;
}
return 0;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 88922e0e8e83..7807d8129664 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -44,9 +44,9 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv,
return 0;
}
-static struct nft_elem_priv *nft_dynset_new(struct nft_set *set,
- const struct nft_expr *expr,
- struct nft_regs *regs)
+struct nft_elem_priv *nft_dynset_new(struct nft_set *set,
+ const struct nft_expr *expr,
+ struct nft_regs *regs)
{
const struct nft_dynset *priv = nft_expr_priv(expr);
struct nft_set_ext *ext;
@@ -91,8 +91,8 @@ void nft_dynset_eval(const struct nft_expr *expr,
return;
}
- if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
- expr, regs, &ext)) {
+ ext = set->ops->update(set, &regs->data[priv->sreg_key], expr, regs);
+ if (ext) {
if (priv->op == NFT_DYNSET_OP_UPDATE &&
nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
READ_ONCE(nft_set_ext_timeout(ext)->timeout) != 0) {
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index c74012c99125..7eedf4e3ae9c 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -407,6 +407,7 @@ err:
regs->verdict.code = NFT_BREAK;
}
+#ifdef CONFIG_NFT_EXTHDR_DCCP
static void nft_exthdr_dccp_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -482,6 +483,7 @@ static void nft_exthdr_dccp_eval(const struct nft_expr *expr,
err:
*dest = 0;
}
+#endif
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
@@ -634,6 +636,7 @@ static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
return 0;
}
+#ifdef CONFIG_NFT_EXTHDR_DCCP
static int nft_exthdr_dccp_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -649,6 +652,7 @@ static int nft_exthdr_dccp_init(const struct nft_ctx *ctx,
return 0;
}
+#endif
static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
{
@@ -779,6 +783,7 @@ static const struct nft_expr_ops nft_exthdr_sctp_ops = {
.reduce = nft_exthdr_reduce,
};
+#ifdef CONFIG_NFT_EXTHDR_DCCP
static const struct nft_expr_ops nft_exthdr_dccp_ops = {
.type = &nft_exthdr_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
@@ -787,6 +792,7 @@ static const struct nft_expr_ops nft_exthdr_dccp_ops = {
.dump = nft_exthdr_dump,
.reduce = nft_exthdr_reduce,
};
+#endif
static const struct nft_expr_ops *
nft_exthdr_select_ops(const struct nft_ctx *ctx,
@@ -822,10 +828,12 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_sctp_ops;
break;
+#ifdef CONFIG_NFT_EXTHDR_DCCP
case NFT_EXTHDR_OP_DCCP:
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_dccp_ops;
break;
+#endif
}
return ERR_PTR(-EOPNOTSUPP);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 63ef832b8aa7..40c602ffbcba 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -25,32 +25,33 @@ struct nft_lookup {
};
#ifdef CONFIG_MITIGATION_RETPOLINE
-bool nft_set_do_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+const struct nft_set_ext *
+nft_set_do_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
if (set->ops == &nft_set_hash_fast_type.ops)
- return nft_hash_lookup_fast(net, set, key, ext);
+ return nft_hash_lookup_fast(net, set, key);
if (set->ops == &nft_set_hash_type.ops)
- return nft_hash_lookup(net, set, key, ext);
+ return nft_hash_lookup(net, set, key);
if (set->ops == &nft_set_rhash_type.ops)
- return nft_rhash_lookup(net, set, key, ext);
+ return nft_rhash_lookup(net, set, key);
if (set->ops == &nft_set_bitmap_type.ops)
- return nft_bitmap_lookup(net, set, key, ext);
+ return nft_bitmap_lookup(net, set, key);
if (set->ops == &nft_set_pipapo_type.ops)
- return nft_pipapo_lookup(net, set, key, ext);
+ return nft_pipapo_lookup(net, set, key);
#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
if (set->ops == &nft_set_pipapo_avx2_type.ops)
- return nft_pipapo_avx2_lookup(net, set, key, ext);
+ return nft_pipapo_avx2_lookup(net, set, key);
#endif
if (set->ops == &nft_set_rbtree_type.ops)
- return nft_rbtree_lookup(net, set, key, ext);
+ return nft_rbtree_lookup(net, set, key);
WARN_ON_ONCE(1);
- return set->ops->lookup(net, set, key, ext);
+ return set->ops->lookup(net, set, key);
}
EXPORT_SYMBOL_GPL(nft_set_do_lookup);
#endif
@@ -61,12 +62,12 @@ void nft_lookup_eval(const struct nft_expr *expr,
{
const struct nft_lookup *priv = nft_expr_priv(expr);
const struct nft_set *set = priv->set;
- const struct nft_set_ext *ext = NULL;
const struct net *net = nft_net(pkt);
+ const struct nft_set_ext *ext;
bool found;
- found = nft_set_do_lookup(net, set, &regs->data[priv->sreg], &ext) ^
- priv->invert;
+ ext = nft_set_do_lookup(net, set, &regs->data[priv->sreg]);
+ found = !!ext ^ priv->invert;
if (!found) {
ext = nft_set_catchall_lookup(net, set);
if (!ext) {
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 09da7a3f9f96..8ee66a86c3bc 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -111,10 +111,9 @@ void nft_objref_map_eval(const struct nft_expr *expr,
struct net *net = nft_net(pkt);
const struct nft_set_ext *ext;
struct nft_object *obj;
- bool found;
- found = nft_set_do_lookup(net, set, &regs->data[priv->sreg], &ext);
- if (!found) {
+ ext = nft_set_do_lookup(net, set, &regs->data[priv->sreg]);
+ if (!ext) {
ext = nft_set_catchall_lookup(net, set);
if (!ext) {
regs->verdict.code = NFT_BREAK;
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 12390d2e994f..c24c922f895d 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -75,16 +75,21 @@ nft_bitmap_active(const u8 *bitmap, u32 idx, u32 off, u8 genmask)
}
INDIRECT_CALLABLE_SCOPE
-bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+const struct nft_set_ext *
+nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
const struct nft_bitmap *priv = nft_set_priv(set);
+ static const struct nft_set_ext found;
u8 genmask = nft_genmask_cur(net);
u32 idx, off;
nft_bitmap_location(set, key, &idx, &off);
- return nft_bitmap_active(priv->bitmap, idx, off, genmask);
+ if (nft_bitmap_active(priv->bitmap, idx, off, genmask))
+ return &found;
+
+ return NULL;
}
static struct nft_bitmap_elem *
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index abb0c8ec6371..266d0c637225 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -81,8 +81,9 @@ static const struct rhashtable_params nft_rhash_params = {
};
INDIRECT_CALLABLE_SCOPE
-bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+const struct nft_set_ext *
+nft_rhash_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
struct nft_rhash *priv = nft_set_priv(set);
const struct nft_rhash_elem *he;
@@ -95,9 +96,9 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
- *ext = &he->ext;
+ return &he->ext;
- return !!he;
+ return NULL;
}
static struct nft_elem_priv *
@@ -120,14 +121,9 @@ nft_rhash_get(const struct net *net, const struct nft_set *set,
return ERR_PTR(-ENOENT);
}
-static bool nft_rhash_update(struct nft_set *set, const u32 *key,
- struct nft_elem_priv *
- (*new)(struct nft_set *,
- const struct nft_expr *,
- struct nft_regs *regs),
- const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_set_ext **ext)
+static const struct nft_set_ext *
+nft_rhash_update(struct nft_set *set, const u32 *key,
+ const struct nft_expr *expr, struct nft_regs *regs)
{
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he, *prev;
@@ -143,7 +139,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
if (he != NULL)
goto out;
- elem_priv = new(set, expr, regs);
+ elem_priv = nft_dynset_new(set, expr, regs);
if (!elem_priv)
goto err1;
@@ -161,14 +157,13 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
}
out:
- *ext = &he->ext;
- return true;
+ return &he->ext;
err2:
nft_set_elem_destroy(set, &he->priv, true);
atomic_dec(&set->nelems);
err1:
- return false;
+ return NULL;
}
static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
@@ -507,8 +502,9 @@ struct nft_hash_elem {
};
INDIRECT_CALLABLE_SCOPE
-bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+const struct nft_set_ext *
+nft_hash_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
@@ -519,12 +515,10 @@ bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
hash = reciprocal_scale(hash, priv->buckets);
hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) &&
- nft_set_elem_active(&he->ext, genmask)) {
- *ext = &he->ext;
- return true;
- }
+ nft_set_elem_active(&he->ext, genmask))
+ return &he->ext;
}
- return false;
+ return NULL;
}
static struct nft_elem_priv *
@@ -547,9 +541,9 @@ nft_hash_get(const struct net *net, const struct nft_set *set,
}
INDIRECT_CALLABLE_SCOPE
-bool nft_hash_lookup_fast(const struct net *net,
- const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+const struct nft_set_ext *
+nft_hash_lookup_fast(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
@@ -562,12 +556,10 @@ bool nft_hash_lookup_fast(const struct net *net,
hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
k2 = *(u32 *)nft_set_ext_key(&he->ext)->data;
if (k1 == k2 &&
- nft_set_elem_active(&he->ext, genmask)) {
- *ext = &he->ext;
- return true;
- }
+ nft_set_elem_active(&he->ext, genmask))
+ return &he->ext;
}
- return false;
+ return NULL;
}
static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv,
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index c5855069bdab..1a19649c2851 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -397,34 +397,36 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules,
}
/**
- * nft_pipapo_lookup() - Lookup function
- * @net: Network namespace
- * @set: nftables API set representation
- * @key: nftables API element representation containing key data
- * @ext: nftables API extension pointer, filled with matching reference
+ * pipapo_get() - Get matching element reference given key data
+ * @m: storage containing the set elements
+ * @data: Key data to be matched against existing elements
+ * @genmask: If set, check that element is active in given genmask
+ * @tstamp: timestamp to check for expired elements
*
* For more details, see DOC: Theory of Operation.
*
- * Return: true on match, false otherwise.
+ * This is the main lookup function. It matches key data against either
+ * the working match set or the uncommitted copy, depending on what the
+ * caller passed to us.
+ * nft_pipapo_get (lookup from userspace/control plane) and nft_pipapo_lookup
+ * (datapath lookup) pass the active copy.
+ * The insertion path will pass the uncommitted working copy.
+ *
+ * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise.
*/
-bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
+ const u8 *data, u8 genmask,
+ u64 tstamp)
{
- struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_scratch *scratch;
unsigned long *res_map, *fill_map;
- u8 genmask = nft_genmask_cur(net);
- const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
- const u8 *rp = (const u8 *)key;
bool map_index;
int i;
local_bh_disable();
- m = rcu_dereference(priv->match);
-
- if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
+ if (unlikely(!raw_cpu_ptr(m->scratch)))
goto out;
scratch = *raw_cpu_ptr(m->scratch);
@@ -444,12 +446,12 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
* packet bytes value, then AND bucket value
*/
if (likely(f->bb == 8))
- pipapo_and_field_buckets_8bit(f, res_map, rp);
+ pipapo_and_field_buckets_8bit(f, res_map, data);
else
- pipapo_and_field_buckets_4bit(f, res_map, rp);
+ pipapo_and_field_buckets_4bit(f, res_map, data);
NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4;
- rp += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f);
+ data += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f);
/* Now populate the bitmap for the next field, unless this is
* the last field, in which case return the matched 'ext'
@@ -465,13 +467,15 @@ next_match:
scratch->map_index = map_index;
local_bh_enable();
- return false;
+ return NULL;
}
if (last) {
- *ext = &f->mt[b].e->ext;
- if (unlikely(nft_set_elem_expired(*ext) ||
- !nft_set_elem_active(*ext, genmask)))
+ struct nft_pipapo_elem *e;
+
+ e = f->mt[b].e;
+ if (unlikely(__nft_set_elem_expired(&e->ext, tstamp) ||
+ !nft_set_elem_active(&e->ext, genmask)))
goto next_match;
/* Last field: we're just returning the key without
@@ -481,8 +485,7 @@ next_match:
*/
scratch->map_index = map_index;
local_bh_enable();
-
- return true;
+ return e;
}
/* Swap bitmap indices: res_map is the initial bitmap for the
@@ -492,112 +495,38 @@ next_match:
map_index = !map_index;
swap(res_map, fill_map);
- rp += NFT_PIPAPO_GROUPS_PADDING(f);
+ data += NFT_PIPAPO_GROUPS_PADDING(f);
}
out:
local_bh_enable();
- return false;
+ return NULL;
}
/**
- * pipapo_get() - Get matching element reference given key data
+ * nft_pipapo_lookup() - Dataplane fronted for main lookup function
* @net: Network namespace
* @set: nftables API set representation
- * @m: storage containing active/existing elements
- * @data: Key data to be matched against existing elements
- * @genmask: If set, check that element is active in given genmask
- * @tstamp: timestamp to check for expired elements
- * @gfp: the type of memory to allocate (see kmalloc).
+ * @key: pointer to nft registers containing key data
*
- * This is essentially the same as the lookup function, except that it matches
- * key data against the uncommitted copy and doesn't use preallocated maps for
- * bitmap results.
+ * This function is called from the data path. It will search for
+ * an element matching the given key in the current active copy.
*
- * Return: pointer to &struct nft_pipapo_elem on match, error pointer otherwise.
+ * Return: ntables API extension pointer or NULL if no match.
*/
-static struct nft_pipapo_elem *pipapo_get(const struct net *net,
- const struct nft_set *set,
- const struct nft_pipapo_match *m,
- const u8 *data, u8 genmask,
- u64 tstamp, gfp_t gfp)
+const struct nft_set_ext *
+nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
- struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
- unsigned long *res_map, *fill_map = NULL;
- const struct nft_pipapo_field *f;
- int i;
-
- if (m->bsize_max == 0)
- return ret;
-
- res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), gfp);
- if (!res_map) {
- ret = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- fill_map = kcalloc(m->bsize_max, sizeof(*res_map), gfp);
- if (!fill_map) {
- ret = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- pipapo_resmap_init(m, res_map);
-
- nft_pipapo_for_each_field(f, i, m) {
- bool last = i == m->field_count - 1;
- int b;
-
- /* For each bit group: select lookup table bucket depending on
- * packet bytes value, then AND bucket value
- */
- if (f->bb == 8)
- pipapo_and_field_buckets_8bit(f, res_map, data);
- else if (f->bb == 4)
- pipapo_and_field_buckets_4bit(f, res_map, data);
- else
- BUG();
-
- data += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f);
-
- /* Now populate the bitmap for the next field, unless this is
- * the last field, in which case return the matched 'ext'
- * pointer if any.
- *
- * Now res_map contains the matching bitmap, and fill_map is the
- * bitmap for the next field.
- */
-next_match:
- b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
- last);
- if (b < 0)
- goto out;
-
- if (last) {
- if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp))
- goto next_match;
- if ((genmask &&
- !nft_set_elem_active(&f->mt[b].e->ext, genmask)))
- goto next_match;
-
- ret = f->mt[b].e;
- goto out;
- }
-
- data += NFT_PIPAPO_GROUPS_PADDING(f);
+ struct nft_pipapo *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_cur(net);
+ const struct nft_pipapo_match *m;
+ const struct nft_pipapo_elem *e;
- /* Swap bitmap indices: fill_map will be the initial bitmap for
- * the next field (i.e. the new res_map), and res_map is
- * guaranteed to be all-zeroes at this point, ready to be filled
- * according to the next mapping table.
- */
- swap(res_map, fill_map);
- }
+ m = rcu_dereference(priv->match);
+ e = pipapo_get(m, (const u8 *)key, genmask, get_jiffies_64());
-out:
- kfree(fill_map);
- kfree(res_map);
- return ret;
+ return e ? &e->ext : NULL;
}
/**
@@ -606,6 +535,11 @@ out:
* @set: nftables API set representation
* @elem: nftables API element representation containing key data
* @flags: Unused
+ *
+ * This function is called from the control plane path under
+ * RCU read lock.
+ *
+ * Return: set element private pointer or ERR_PTR(-ENOENT).
*/
static struct nft_elem_priv *
nft_pipapo_get(const struct net *net, const struct nft_set *set,
@@ -615,11 +549,10 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set,
struct nft_pipapo_match *m = rcu_dereference(priv->match);
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net), get_jiffies_64(),
- GFP_ATOMIC);
- if (IS_ERR(e))
- return ERR_CAST(e);
+ e = pipapo_get(m, (const u8 *)elem->key.val.data,
+ nft_genmask_cur(net), get_jiffies_64());
+ if (!e)
+ return ERR_PTR(-ENOENT);
return &e->priv;
}
@@ -1219,7 +1152,7 @@ static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int c
mem = s;
mem -= s->align_off;
- kfree(mem);
+ kvfree(mem);
}
/**
@@ -1240,10 +1173,9 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
void *scratch_aligned;
u32 align_off;
#endif
- scratch = kzalloc_node(struct_size(scratch, map,
- bsize_max * 2) +
- NFT_PIPAPO_ALIGN_HEADROOM,
- GFP_KERNEL_ACCOUNT, cpu_to_node(i));
+ scratch = kvzalloc_node(struct_size(scratch, map, bsize_max * 2) +
+ NFT_PIPAPO_ALIGN_HEADROOM,
+ GFP_KERNEL_ACCOUNT, cpu_to_node(i));
if (!scratch) {
/* On failure, there's no need to undo previous
* allocations: this means that some scratch maps have
@@ -1345,8 +1277,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
end = start;
- dup = pipapo_get(net, set, m, start, genmask, tstamp, GFP_KERNEL);
- if (!IS_ERR(dup)) {
+ dup = pipapo_get(m, start, genmask, tstamp);
+ if (dup) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@@ -1365,15 +1297,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
return -ENOTEMPTY;
}
- if (PTR_ERR(dup) == -ENOENT) {
- /* Look for partially overlapping entries */
- dup = pipapo_get(net, set, m, end, nft_genmask_next(net), tstamp,
- GFP_KERNEL);
- }
-
- if (PTR_ERR(dup) != -ENOENT) {
- if (IS_ERR(dup))
- return PTR_ERR(dup);
+ /* Look for partially overlapping entries */
+ dup = pipapo_get(m, end, nft_genmask_next(net), tstamp);
+ if (dup) {
*elem_priv = &dup->priv;
return -ENOTEMPTY;
}
@@ -1914,9 +1840,9 @@ nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
if (!m)
return NULL;
- e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data,
- nft_genmask_next(net), nft_net_tstamp(net), GFP_KERNEL);
- if (IS_ERR(e))
+ e = pipapo_get(m, (const u8 *)elem->key.val.data,
+ nft_genmask_next(net), nft_net_tstamp(net));
+ if (!e)
return NULL;
nft_set_elem_change_active(net, set, &e->ext);
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index be7c16c79f71..db5d367e43c4 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1137,7 +1137,6 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns
* @net: Network namespace
* @set: nftables API set representation
* @key: nftables API element representation containing key data
- * @ext: nftables API extension pointer, filled with matching reference
*
* For more details, see DOC: Theory of Operation in nft_set_pipapo.c.
*
@@ -1146,8 +1145,9 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns
*
* Return: true on match, false otherwise.
*/
-bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+const struct nft_set_ext *
+nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_scratch *scratch;
@@ -1155,17 +1155,18 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
const u8 *rp = (const u8 *)key;
+ const struct nft_set_ext *ext;
unsigned long *res, *fill;
bool map_index;
- int i, ret = 0;
+ int i;
local_bh_disable();
if (unlikely(!irq_fpu_usable())) {
- bool fallback_res = nft_pipapo_lookup(net, set, key, ext);
+ ext = nft_pipapo_lookup(net, set, key);
local_bh_enable();
- return fallback_res;
+ return ext;
}
m = rcu_dereference(priv->match);
@@ -1182,7 +1183,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
if (unlikely(!scratch)) {
kernel_fpu_end();
local_bh_enable();
- return false;
+ return NULL;
}
map_index = scratch->map_index;
@@ -1197,6 +1198,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
next_match:
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1, first = !i;
+ int ret = 0;
#define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \
(ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \
@@ -1244,10 +1246,10 @@ next_match:
goto out;
if (last) {
- *ext = &f->mt[ret].e->ext;
- if (unlikely(nft_set_elem_expired(*ext) ||
- !nft_set_elem_active(*ext, genmask))) {
- ret = 0;
+ ext = &f->mt[ret].e->ext;
+ if (unlikely(nft_set_elem_expired(ext) ||
+ !nft_set_elem_active(ext, genmask))) {
+ ext = NULL;
goto next_match;
}
@@ -1264,5 +1266,5 @@ out:
kernel_fpu_end();
local_bh_enable();
- return ret >= 0;
+ return ext;
}
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 2e8ef16ff191..938a257c069e 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -52,9 +52,9 @@ static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
return nft_set_elem_expired(&rbe->ext);
}
-static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext,
- unsigned int seq)
+static const struct nft_set_ext *
+__nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, unsigned int seq)
{
struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
@@ -65,7 +65,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
parent = rcu_dereference_raw(priv->root.rb_node);
while (parent != NULL) {
if (read_seqcount_retry(&priv->count, seq))
- return false;
+ return NULL;
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
@@ -87,50 +87,48 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
}
if (nft_rbtree_elem_expired(rbe))
- return false;
+ return NULL;
if (nft_rbtree_interval_end(rbe)) {
if (nft_set_is_anonymous(set))
- return false;
+ return NULL;
parent = rcu_dereference_raw(parent->rb_left);
interval = NULL;
continue;
}
- *ext = &rbe->ext;
- return true;
+ return &rbe->ext;
}
}
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
!nft_rbtree_elem_expired(interval) &&
- nft_rbtree_interval_start(interval)) {
- *ext = &interval->ext;
- return true;
- }
+ nft_rbtree_interval_start(interval))
+ return &interval->ext;
- return false;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE
-bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+const struct nft_set_ext *
+nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
struct nft_rbtree *priv = nft_set_priv(set);
unsigned int seq = read_seqcount_begin(&priv->count);
- bool ret;
+ const struct nft_set_ext *ext;
- ret = __nft_rbtree_lookup(net, set, key, ext, seq);
- if (ret || !read_seqcount_retry(&priv->count, seq))
- return ret;
+ ext = __nft_rbtree_lookup(net, set, key, seq);
+ if (ext || !read_seqcount_retry(&priv->count, seq))
+ return ext;
read_lock_bh(&priv->lock);
seq = read_seqcount_begin(&priv->count);
- ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+ ext = __nft_rbtree_lookup(net, set, key, seq);
read_unlock_bh(&priv->lock);
- return ret;
+ return ext;
}
static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 709840612f0d..90b7630421c4 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1317,12 +1317,13 @@ void xt_compat_unlock(u_int8_t af)
EXPORT_SYMBOL_GPL(xt_compat_unlock);
#endif
-DEFINE_PER_CPU(seqcount_t, xt_recseq);
-EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
-
struct static_key xt_tee_enabled __read_mostly;
EXPORT_SYMBOL_GPL(xt_tee_enabled);
+#ifdef CONFIG_NETFILTER_XTABLES_LEGACY
+DEFINE_PER_CPU(seqcount_t, xt_recseq);
+EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
+
static int xt_jumpstack_alloc(struct xt_table_info *i)
{
unsigned int size;
@@ -1514,6 +1515,7 @@ void *xt_unregister_table(struct xt_table *table)
return private;
}
EXPORT_SYMBOL_GPL(xt_unregister_table);
+#endif
#ifdef CONFIG_PROC_FS
static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
@@ -1897,6 +1899,7 @@ void xt_proto_fini(struct net *net, u_int8_t af)
}
EXPORT_SYMBOL_GPL(xt_proto_fini);
+#ifdef CONFIG_NETFILTER_XTABLES_LEGACY
/**
* xt_percpu_counter_alloc - allocate x_tables rule counter
*
@@ -1951,6 +1954,7 @@ void xt_percpu_counter_free(struct xt_counters *counters)
free_percpu((void __percpu *)pcnt);
}
EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
+#endif
static int __net_init xt_net_init(struct net *net)
{
@@ -1983,8 +1987,10 @@ static int __init xt_init(void)
unsigned int i;
int rv;
- for_each_possible_cpu(i) {
- seqcount_init(&per_cpu(xt_recseq, i));
+ if (IS_ENABLED(CONFIG_NETFILTER_XTABLES_LEGACY)) {
+ for_each_possible_cpu(i) {
+ seqcount_init(&per_cpu(xt_recseq, i));
+ }
}
xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index 7c6bf1c16813..0ca1cdfc4095 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -38,8 +38,8 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par)
nfacct = nfnl_acct_find_get(par->net, info->name);
if (nfacct == NULL) {
- pr_info_ratelimited("accounting object `%s' does not exists\n",
- info->name);
+ pr_info_ratelimited("accounting object `%.*s' does not exist\n",
+ NFACCT_NAME_MAX, info->name);
return -ENOENT;
}
info->nfacct = nfacct;