diff options
Diffstat (limited to 'net/netfilter')
30 files changed, 380 insertions, 1367 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 2560416218d0..6cdc994fdc8a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -195,16 +195,6 @@ config NF_CONNTRACK_LABELS config NF_CONNTRACK_OVS bool -config NF_CT_PROTO_DCCP - bool 'DCCP protocol connection tracking support' - depends on NETFILTER_ADVANCED - default y - help - With this option enabled, the layer 3 independent connection - tracking code will be able to do state tracking on DCCP connections. - - If unsure, say Y. - config NF_CT_PROTO_GRE bool @@ -516,6 +506,12 @@ config NFT_CT This option adds the "ct" expression that you can use to match connection tracking information such as the flow state. +config NFT_EXTHDR_DCCP + bool "Netfilter nf_tables exthdr DCCP support (DEPRECATED)" + default n + help + This option adds support for matching on DCCP extension headers. + config NFT_FLOW_OFFLOAD depends on NF_CONNTRACK && NF_FLOW_TABLE tristate "Netfilter nf_tables hardware flow offload module" @@ -762,6 +758,16 @@ config NETFILTER_XTABLES_COMPAT If unsure, say N. +config NETFILTER_XTABLES_LEGACY + bool "Netfilter legacy tables support" + depends on !PREEMPT_RT + help + Say Y here if you still require support for legacy tables. This is + required by the legacy tools (iptables-legacy) and is not needed if + you use iptables over nftables (iptables-nft). + Legacy support is not limited to IP, it also includes EBTABLES and + ARPTABLES. + comment "Xtables combined modules" config NETFILTER_XT_MARK @@ -1278,9 +1284,9 @@ config NETFILTER_XT_MATCH_CPU To compile it as a module, choose M here. If unsure, say N. config NETFILTER_XT_MATCH_DCCP - tristate '"dccp" protocol match support' + tristate '"dccp" protocol match support (DEPRECATED)' depends on NETFILTER_ADVANCED - default IP_DCCP + default n help With this option enabled, you will be able to use the iptables `dccp' match in order to match on DCCP source/destination ports diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index f0aa4d7ef499..e43e20f529f8 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -12,7 +12,6 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CONNTRACK_OVS) += nf_conntrack_ovs.o -nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o ifeq ($(CONFIG_NF_CONNTRACK),m) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 44b2ad695c15..965f3c8e5089 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -926,7 +926,7 @@ static void ip_vs_conn_expire(struct timer_list *t) void ip_vs_conn_expire_now(struct ip_vs_conn *cp) { /* Using mod_timer_pending will ensure the timer is not - * modified after the final del_timer in ip_vs_conn_expire. + * modified after the final timer_delete in ip_vs_conn_expire. */ if (timer_pending(&cp->timer) && time_after(cp->timer.expires, jiffies)) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 014f07740369..95af252b2939 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -97,7 +97,7 @@ __ip_vs_dst_check(struct ip_vs_dest *dest) if (!dest_dst) return NULL; dst = dest_dst->dst_cache; - if (dst->obsolete && + if (READ_ONCE(dst->obsolete) && dst->ops->check(dst, dest_dst->dst_cookie) == NULL) return NULL; return dest_dst; diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index 06b084844700..3e4fb9ddcd36 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -17,7 +17,7 @@ static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb, .skb = skb, }; - return bpf_prog_run(prog, &ctx); + return bpf_prog_run_pin_on_cpu(prog, &ctx); } struct bpf_nf_link { @@ -225,7 +225,8 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (!link) return -ENOMEM; - bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog); + bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog, + attr->link_create.attach_type); link->hook_ops.hook = nf_hook_run_bpf; link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 201d3c4ec623..344f88295976 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -136,8 +136,8 @@ static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2) } /* return true if we need to recompute hashes (in case hash table was resized) */ -static bool nf_conntrack_double_lock(struct net *net, unsigned int h1, - unsigned int h2, unsigned int sequence) +static bool nf_conntrack_double_lock(unsigned int h1, unsigned int h2, + unsigned int sequence) { h1 %= CONNTRACK_LOCKS; h2 %= CONNTRACK_LOCKS; @@ -329,9 +329,6 @@ nf_ct_get_tuple(const struct sk_buff *skb, #ifdef CONFIG_NF_CT_PROTO_SCTP case IPPROTO_SCTP: #endif -#ifdef CONFIG_NF_CT_PROTO_DCCP - case IPPROTO_DCCP: -#endif /* fallthrough */ return nf_ct_get_tuple_ports(skb, dataoff, tuple); default: @@ -616,7 +613,7 @@ static void __nf_ct_delete_from_lists(struct nf_conn *ct) reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY)); - } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); + } while (nf_conntrack_double_lock(hash, reply_hash, sequence)); clean_from_lists(ct); nf_conntrack_double_unlock(hash, reply_hash); @@ -893,7 +890,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY)); - } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); + } while (nf_conntrack_double_lock(hash, reply_hash, sequence)); max_chainlen = MIN_CHAINLEN + get_random_u32_below(MAX_CHAINLEN); @@ -1124,6 +1121,12 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &nf_conntrack_hash[repl_idx]); + /* confirmed bit must be set after hlist add, not before: + * loser_ct can still be visible to other cpu due to + * SLAB_TYPESAFE_BY_RCU. + */ + smp_mb__before_atomic(); + set_bit(IPS_CONFIRMED_BIT, &loser_ct->status); NF_CT_STAT_INC(net, clash_resolve); return NF_ACCEPT; @@ -1231,7 +1234,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY)); - } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); + } while (nf_conntrack_double_lock(hash, reply_hash, sequence)); /* We're not in hash table, and we refuse to set up related * connections for unconfirmed conns. But packet copies and @@ -1260,8 +1263,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) * user context, else we insert an already 'dead' hash, blocking * further use of that particular connection -JM. */ - ct->status |= IPS_CONFIRMED; - if (unlikely(nf_ct_is_dying(ct))) { NF_CT_STAT_INC(net, insert_failed); goto dying; @@ -1293,7 +1294,7 @@ chaintoolong: } } - /* Timer relative to confirmation time, not original + /* Timeout is relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ ct->timeout += nfct_time_stamp; @@ -1301,11 +1302,21 @@ chaintoolong: __nf_conntrack_insert_prepare(ct); /* Since the lookup is lockless, hash insertion must be done after - * starting the timer and setting the CONFIRMED bit. The RCU barriers - * guarantee that no other CPU can find the conntrack before the above - * stores are visible. + * setting ct->timeout. The RCU barriers guarantee that no other CPU + * can find the conntrack before the above stores are visible. */ __nf_conntrack_hash_insert(ct, hash, reply_hash); + + /* IPS_CONFIRMED unset means 'ct not (yet) in hash', conntrack lookups + * skip entries that lack this bit. This happens when a CPU is looking + * at a stale entry that is being recycled due to SLAB_TYPESAFE_BY_RCU + * or when another CPU encounters this entry right after the insertion + * but before the set-confirm-bit below. This bit must not be set until + * after __nf_conntrack_hash_insert(). + */ + smp_mb__before_atomic(); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); @@ -1662,7 +1673,11 @@ __nf_conntrack_alloc(struct net *net, if (!conntrack_gc_work.early_drop) conntrack_gc_work.early_drop = true; atomic_dec(&cnet->count); - net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); + if (net == &init_net) + net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); + else + net_warn_ratelimited("nf_conntrack: table full in netns %u, dropping packet\n", + net->ns.inum); return ERR_PTR(-ENOMEM); } } @@ -1982,11 +1997,6 @@ static int nf_conntrack_handle_packet(struct nf_conn *ct, return nf_conntrack_sctp_packet(ct, skb, dataoff, ctinfo, state); #endif -#ifdef CONFIG_NF_CT_PROTO_DCCP - case IPPROTO_DCCP: - return nf_conntrack_dccp_packet(ct, skb, dataoff, - ctinfo, state); -#endif #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: return nf_conntrack_gre_packet(ct, skb, dataoff, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2cc0fde23344..486d52b45fe5 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2036,7 +2036,6 @@ static void ctnetlink_change_mark(struct nf_conn *ct, static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = { [CTA_PROTOINFO_TCP] = { .type = NLA_NESTED }, - [CTA_PROTOINFO_DCCP] = { .type = NLA_NESTED }, [CTA_PROTOINFO_SCTP] = { .type = NLA_NESTED }, }; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index f36727ed91e1..bc1d96686b9c 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -100,9 +100,6 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto) case IPPROTO_UDP: return &nf_conntrack_l4proto_udp; case IPPROTO_TCP: return &nf_conntrack_l4proto_tcp; case IPPROTO_ICMP: return &nf_conntrack_l4proto_icmp; -#ifdef CONFIG_NF_CT_PROTO_DCCP - case IPPROTO_DCCP: return &nf_conntrack_l4proto_dccp; -#endif #ifdef CONFIG_NF_CT_PROTO_SCTP case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp; #endif @@ -681,9 +678,6 @@ void nf_conntrack_proto_pernet_init(struct net *net) #if IS_ENABLED(CONFIG_IPV6) nf_conntrack_icmpv6_init_net(net); #endif -#ifdef CONFIG_NF_CT_PROTO_DCCP - nf_conntrack_dccp_init_net(net); -#endif #ifdef CONFIG_NF_CT_PROTO_SCTP nf_conntrack_sctp_init_net(net); #endif diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c deleted file mode 100644 index ebc4f733bb2e..000000000000 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ /dev/null @@ -1,826 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * DCCP connection tracking protocol helper - * - * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net> - */ -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/sysctl.h> -#include <linux/spinlock.h> -#include <linux/skbuff.h> -#include <linux/dccp.h> -#include <linux/slab.h> - -#include <net/net_namespace.h> -#include <net/netns/generic.h> - -#include <linux/netfilter/nfnetlink_conntrack.h> -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_conntrack_l4proto.h> -#include <net/netfilter/nf_conntrack_ecache.h> -#include <net/netfilter/nf_conntrack_timeout.h> -#include <net/netfilter/nf_log.h> - -/* Timeouts are based on values from RFC4340: - * - * - REQUEST: - * - * 8.1.2. Client Request - * - * A client MAY give up on its DCCP-Requests after some time - * (3 minutes, for example). - * - * - RESPOND: - * - * 8.1.3. Server Response - * - * It MAY also leave the RESPOND state for CLOSED after a timeout of - * not less than 4MSL (8 minutes); - * - * - PARTOPEN: - * - * 8.1.5. Handshake Completion - * - * If the client remains in PARTOPEN for more than 4MSL (8 minutes), - * it SHOULD reset the connection with Reset Code 2, "Aborted". - * - * - OPEN: - * - * The DCCP timestamp overflows after 11.9 hours. If the connection - * stays idle this long the sequence number won't be recognized - * as valid anymore. - * - * - CLOSEREQ/CLOSING: - * - * 8.3. Termination - * - * The retransmission timer should initially be set to go off in two - * round-trip times and should back off to not less than once every - * 64 seconds ... - * - * - TIMEWAIT: - * - * 4.3. States - * - * A server or client socket remains in this state for 2MSL (4 minutes) - * after the connection has been town down, ... - */ - -#define DCCP_MSL (2 * 60 * HZ) - -#ifdef CONFIG_NF_CONNTRACK_PROCFS -static const char * const dccp_state_names[] = { - [CT_DCCP_NONE] = "NONE", - [CT_DCCP_REQUEST] = "REQUEST", - [CT_DCCP_RESPOND] = "RESPOND", - [CT_DCCP_PARTOPEN] = "PARTOPEN", - [CT_DCCP_OPEN] = "OPEN", - [CT_DCCP_CLOSEREQ] = "CLOSEREQ", - [CT_DCCP_CLOSING] = "CLOSING", - [CT_DCCP_TIMEWAIT] = "TIMEWAIT", - [CT_DCCP_IGNORE] = "IGNORE", - [CT_DCCP_INVALID] = "INVALID", -}; -#endif - -#define sNO CT_DCCP_NONE -#define sRQ CT_DCCP_REQUEST -#define sRS CT_DCCP_RESPOND -#define sPO CT_DCCP_PARTOPEN -#define sOP CT_DCCP_OPEN -#define sCR CT_DCCP_CLOSEREQ -#define sCG CT_DCCP_CLOSING -#define sTW CT_DCCP_TIMEWAIT -#define sIG CT_DCCP_IGNORE -#define sIV CT_DCCP_INVALID - -/* - * DCCP state transition table - * - * The assumption is the same as for TCP tracking: - * - * We are the man in the middle. All the packets go through us but might - * get lost in transit to the destination. It is assumed that the destination - * can't receive segments we haven't seen. - * - * The following states exist: - * - * NONE: Initial state, expecting Request - * REQUEST: Request seen, waiting for Response from server - * RESPOND: Response from server seen, waiting for Ack from client - * PARTOPEN: Ack after Response seen, waiting for packet other than Response, - * Reset or Sync from server - * OPEN: Packet other than Response, Reset or Sync seen - * CLOSEREQ: CloseReq from server seen, expecting Close from client - * CLOSING: Close seen, expecting Reset - * TIMEWAIT: Reset seen - * IGNORE: Not determinable whether packet is valid - * - * Some states exist only on one side of the connection: REQUEST, RESPOND, - * PARTOPEN, CLOSEREQ. For the other side these states are equivalent to - * the one it was in before. - * - * Packets are marked as ignored (sIG) if we don't know if they're valid - * (for example a reincarnation of a connection we didn't notice is dead - * already) and the server may send back a connection closing Reset or a - * Response. They're also used for Sync/SyncAck packets, which we don't - * care about. - */ -static const u_int8_t -dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = { - [CT_DCCP_ROLE_CLIENT] = { - [DCCP_PKT_REQUEST] = { - /* - * sNO -> sRQ Regular Request - * sRQ -> sRQ Retransmitted Request or reincarnation - * sRS -> sRS Retransmitted Request (apparently Response - * got lost after we saw it) or reincarnation - * sPO -> sIG Ignore, conntrack might be out of sync - * sOP -> sIG Ignore, conntrack might be out of sync - * sCR -> sIG Ignore, conntrack might be out of sync - * sCG -> sIG Ignore, conntrack might be out of sync - * sTW -> sRQ Reincarnation - * - * sNO, sRQ, sRS, sPO. sOP, sCR, sCG, sTW, */ - sRQ, sRQ, sRS, sIG, sIG, sIG, sIG, sRQ, - }, - [DCCP_PKT_RESPONSE] = { - /* - * sNO -> sIV Invalid - * sRQ -> sIG Ignore, might be response to ignored Request - * sRS -> sIG Ignore, might be response to ignored Request - * sPO -> sIG Ignore, might be response to ignored Request - * sOP -> sIG Ignore, might be response to ignored Request - * sCR -> sIG Ignore, might be response to ignored Request - * sCG -> sIG Ignore, might be response to ignored Request - * sTW -> sIV Invalid, reincarnation in reverse direction - * goes through sRQ - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIV, - }, - [DCCP_PKT_ACK] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.) - * sPO -> sPO Retransmitted Ack for Response, remain in PARTOPEN - * sOP -> sOP Regular ACK, remain in OPEN - * sCR -> sCR Ack in CLOSEREQ MAY be processed (8.3.) - * sCG -> sCG Ack in CLOSING MAY be processed (8.3.) - * sTW -> sIV - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV - }, - [DCCP_PKT_DATA] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sIV No connection - * sPO -> sIV MUST use DataAck in PARTOPEN state (8.1.5.) - * sOP -> sOP Regular Data packet - * sCR -> sCR Data in CLOSEREQ MAY be processed (8.3.) - * sCG -> sCG Data in CLOSING MAY be processed (8.3.) - * sTW -> sIV - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sIV, sOP, sCR, sCG, sIV, - }, - [DCCP_PKT_DATAACK] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.) - * sPO -> sPO Remain in PARTOPEN state - * sOP -> sOP Regular DataAck packet in OPEN state - * sCR -> sCR DataAck in CLOSEREQ MAY be processed (8.3.) - * sCG -> sCG DataAck in CLOSING MAY be processed (8.3.) - * sTW -> sIV - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV - }, - [DCCP_PKT_CLOSEREQ] = { - /* - * CLOSEREQ may only be sent by the server. - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV - }, - [DCCP_PKT_CLOSE] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sIV No connection - * sPO -> sCG Client-initiated close - * sOP -> sCG Client-initiated close - * sCR -> sCG Close in response to CloseReq (8.3.) - * sCG -> sCG Retransmit - * sTW -> sIV Late retransmit, already in TIME_WAIT - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sCG, sCG, sCG, sIV, sIV - }, - [DCCP_PKT_RESET] = { - /* - * sNO -> sIV No connection - * sRQ -> sTW Sync received or timeout, SHOULD send Reset (8.1.1.) - * sRS -> sTW Response received without Request - * sPO -> sTW Timeout, SHOULD send Reset (8.1.5.) - * sOP -> sTW Connection reset - * sCR -> sTW Connection reset - * sCG -> sTW Connection reset - * sTW -> sIG Ignore (don't refresh timer) - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sTW, sTW, sTW, sTW, sTW, sTW, sIG - }, - [DCCP_PKT_SYNC] = { - /* - * We currently ignore Sync packets - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, - }, - [DCCP_PKT_SYNCACK] = { - /* - * We currently ignore SyncAck packets - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, - }, - }, - [CT_DCCP_ROLE_SERVER] = { - [DCCP_PKT_REQUEST] = { - /* - * sNO -> sIV Invalid - * sRQ -> sIG Ignore, conntrack might be out of sync - * sRS -> sIG Ignore, conntrack might be out of sync - * sPO -> sIG Ignore, conntrack might be out of sync - * sOP -> sIG Ignore, conntrack might be out of sync - * sCR -> sIG Ignore, conntrack might be out of sync - * sCG -> sIG Ignore, conntrack might be out of sync - * sTW -> sRQ Reincarnation, must reverse roles - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIG, sIG, sIG, sIG, sIG, sIG, sRQ - }, - [DCCP_PKT_RESPONSE] = { - /* - * sNO -> sIV Response without Request - * sRQ -> sRS Response to clients Request - * sRS -> sRS Retransmitted Response (8.1.3. SHOULD NOT) - * sPO -> sIG Response to an ignored Request or late retransmit - * sOP -> sIG Ignore, might be response to ignored Request - * sCR -> sIG Ignore, might be response to ignored Request - * sCG -> sIG Ignore, might be response to ignored Request - * sTW -> sIV Invalid, Request from client in sTW moves to sRQ - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sRS, sRS, sIG, sIG, sIG, sIG, sIV - }, - [DCCP_PKT_ACK] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sIV No connection - * sPO -> sOP Enter OPEN state (8.1.5.) - * sOP -> sOP Regular Ack in OPEN state - * sCR -> sIV Waiting for Close from client - * sCG -> sCG Ack in CLOSING MAY be processed (8.3.) - * sTW -> sIV - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV - }, - [DCCP_PKT_DATA] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sIV No connection - * sPO -> sOP Enter OPEN state (8.1.5.) - * sOP -> sOP Regular Data packet in OPEN state - * sCR -> sIV Waiting for Close from client - * sCG -> sCG Data in CLOSING MAY be processed (8.3.) - * sTW -> sIV - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV - }, - [DCCP_PKT_DATAACK] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sIV No connection - * sPO -> sOP Enter OPEN state (8.1.5.) - * sOP -> sOP Regular DataAck in OPEN state - * sCR -> sIV Waiting for Close from client - * sCG -> sCG Data in CLOSING MAY be processed (8.3.) - * sTW -> sIV - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV - }, - [DCCP_PKT_CLOSEREQ] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sIV No connection - * sPO -> sOP -> sCR Move directly to CLOSEREQ (8.1.5.) - * sOP -> sCR CloseReq in OPEN state - * sCR -> sCR Retransmit - * sCG -> sCR Simultaneous close, client sends another Close - * sTW -> sIV Already closed - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sCR, sCR, sCR, sCR, sIV - }, - [DCCP_PKT_CLOSE] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sIV No connection - * sPO -> sOP -> sCG Move direcly to CLOSING - * sOP -> sCG Move to CLOSING - * sCR -> sIV Close after CloseReq is invalid - * sCG -> sCG Retransmit - * sTW -> sIV Already closed - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sCG, sCG, sIV, sCG, sIV - }, - [DCCP_PKT_RESET] = { - /* - * sNO -> sIV No connection - * sRQ -> sTW Reset in response to Request - * sRS -> sTW Timeout, SHOULD send Reset (8.1.3.) - * sPO -> sTW Timeout, SHOULD send Reset (8.1.3.) - * sOP -> sTW - * sCR -> sTW - * sCG -> sTW - * sTW -> sIG Ignore (don't refresh timer) - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW, sTW */ - sIV, sTW, sTW, sTW, sTW, sTW, sTW, sTW, sIG - }, - [DCCP_PKT_SYNC] = { - /* - * We currently ignore Sync packets - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, - }, - [DCCP_PKT_SYNCACK] = { - /* - * We currently ignore SyncAck packets - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, - }, - }, -}; - -static noinline bool -dccp_new(struct nf_conn *ct, const struct sk_buff *skb, - const struct dccp_hdr *dh, - const struct nf_hook_state *hook_state) -{ - struct net *net = nf_ct_net(ct); - struct nf_dccp_net *dn; - const char *msg; - u_int8_t state; - - state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; - switch (state) { - default: - dn = nf_dccp_pernet(net); - if (dn->dccp_loose == 0) { - msg = "not picking up existing connection "; - goto out_invalid; - } - break; - case CT_DCCP_REQUEST: - break; - case CT_DCCP_INVALID: - msg = "invalid state transition "; - goto out_invalid; - } - - ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; - ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; - ct->proto.dccp.state = CT_DCCP_NONE; - ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST; - ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL; - ct->proto.dccp.handshake_seq = 0; - return true; - -out_invalid: - nf_ct_l4proto_log_invalid(skb, ct, hook_state, "%s", msg); - return false; -} - -static u64 dccp_ack_seq(const struct dccp_hdr *dh) -{ - const struct dccp_hdr_ack_bits *dhack; - - dhack = (void *)dh + __dccp_basic_hdr_len(dh); - return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + - ntohl(dhack->dccph_ack_nr_low); -} - -static bool dccp_error(const struct dccp_hdr *dh, - struct sk_buff *skb, unsigned int dataoff, - const struct nf_hook_state *state) -{ - static const unsigned long require_seq48 = 1 << DCCP_PKT_REQUEST | - 1 << DCCP_PKT_RESPONSE | - 1 << DCCP_PKT_CLOSEREQ | - 1 << DCCP_PKT_CLOSE | - 1 << DCCP_PKT_RESET | - 1 << DCCP_PKT_SYNC | - 1 << DCCP_PKT_SYNCACK; - unsigned int dccp_len = skb->len - dataoff; - unsigned int cscov; - const char *msg; - u8 type; - - BUILD_BUG_ON(DCCP_PKT_INVALID >= BITS_PER_LONG); - - if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) || - dh->dccph_doff * 4 > dccp_len) { - msg = "nf_ct_dccp: truncated/malformed packet "; - goto out_invalid; - } - - cscov = dccp_len; - if (dh->dccph_cscov) { - cscov = (dh->dccph_cscov - 1) * 4; - if (cscov > dccp_len) { - msg = "nf_ct_dccp: bad checksum coverage "; - goto out_invalid; - } - } - - if (state->hook == NF_INET_PRE_ROUTING && - state->net->ct.sysctl_checksum && - nf_checksum_partial(skb, state->hook, dataoff, cscov, - IPPROTO_DCCP, state->pf)) { - msg = "nf_ct_dccp: bad checksum "; - goto out_invalid; - } - - type = dh->dccph_type; - if (type >= DCCP_PKT_INVALID) { - msg = "nf_ct_dccp: reserved packet type "; - goto out_invalid; - } - - if (test_bit(type, &require_seq48) && !dh->dccph_x) { - msg = "nf_ct_dccp: type lacks 48bit sequence numbers"; - goto out_invalid; - } - - return false; -out_invalid: - nf_l4proto_log_invalid(skb, state, IPPROTO_DCCP, "%s", msg); - return true; -} - -struct nf_conntrack_dccp_buf { - struct dccp_hdr dh; /* generic header part */ - struct dccp_hdr_ext ext; /* optional depending dh->dccph_x */ - union { /* depends on header type */ - struct dccp_hdr_ack_bits ack; - struct dccp_hdr_request req; - struct dccp_hdr_response response; - struct dccp_hdr_reset rst; - } u; -}; - -static struct dccp_hdr * -dccp_header_pointer(const struct sk_buff *skb, int offset, const struct dccp_hdr *dh, - struct nf_conntrack_dccp_buf *buf) -{ - unsigned int hdrlen = __dccp_hdr_len(dh); - - if (hdrlen > sizeof(*buf)) - return NULL; - - return skb_header_pointer(skb, offset, hdrlen, buf); -} - -int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) -{ - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - struct nf_conntrack_dccp_buf _dh; - u_int8_t type, old_state, new_state; - enum ct_dccp_roles role; - unsigned int *timeouts; - struct dccp_hdr *dh; - - dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh); - if (!dh) - return -NF_ACCEPT; - - if (dccp_error(dh, skb, dataoff, state)) - return -NF_ACCEPT; - - /* pull again, including possible 48 bit sequences and subtype header */ - dh = dccp_header_pointer(skb, dataoff, dh, &_dh); - if (!dh) - return -NF_ACCEPT; - - type = dh->dccph_type; - if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state)) - return -NF_ACCEPT; - - if (type == DCCP_PKT_RESET && - !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { - /* Tear down connection immediately if only reply is a RESET */ - nf_ct_kill_acct(ct, ctinfo, skb); - return NF_ACCEPT; - } - - spin_lock_bh(&ct->lock); - - role = ct->proto.dccp.role[dir]; - old_state = ct->proto.dccp.state; - new_state = dccp_state_table[role][type][old_state]; - - switch (new_state) { - case CT_DCCP_REQUEST: - if (old_state == CT_DCCP_TIMEWAIT && - role == CT_DCCP_ROLE_SERVER) { - /* Reincarnation in the reverse direction: reopen and - * reverse client/server roles. */ - ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT; - ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER; - } - break; - case CT_DCCP_RESPOND: - if (old_state == CT_DCCP_REQUEST) - ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); - break; - case CT_DCCP_PARTOPEN: - if (old_state == CT_DCCP_RESPOND && - type == DCCP_PKT_ACK && - dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq) - set_bit(IPS_ASSURED_BIT, &ct->status); - break; - case CT_DCCP_IGNORE: - /* - * Connection tracking might be out of sync, so we ignore - * packets that might establish a new connection and resync - * if the server responds with a valid Response. - */ - if (ct->proto.dccp.last_dir == !dir && - ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST && - type == DCCP_PKT_RESPONSE) { - ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT; - ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER; - ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); - new_state = CT_DCCP_RESPOND; - break; - } - ct->proto.dccp.last_dir = dir; - ct->proto.dccp.last_pkt = type; - - spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid packet"); - return NF_ACCEPT; - case CT_DCCP_INVALID: - spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid state transition"); - return -NF_ACCEPT; - } - - ct->proto.dccp.last_dir = dir; - ct->proto.dccp.last_pkt = type; - ct->proto.dccp.state = new_state; - spin_unlock_bh(&ct->lock); - - if (new_state != old_state) - nf_conntrack_event_cache(IPCT_PROTOINFO, ct); - - timeouts = nf_ct_timeout_lookup(ct); - if (!timeouts) - timeouts = nf_dccp_pernet(nf_ct_net(ct))->dccp_timeout; - nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); - - return NF_ACCEPT; -} - -static bool dccp_can_early_drop(const struct nf_conn *ct) -{ - switch (ct->proto.dccp.state) { - case CT_DCCP_CLOSEREQ: - case CT_DCCP_CLOSING: - case CT_DCCP_TIMEWAIT: - return true; - default: - break; - } - - return false; -} - -#ifdef CONFIG_NF_CONNTRACK_PROCFS -static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) -{ - seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); -} -#endif - -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) -static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, - struct nf_conn *ct, bool destroy) -{ - struct nlattr *nest_parms; - - spin_lock_bh(&ct->lock); - nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP); - if (!nest_parms) - goto nla_put_failure; - if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state)) - goto nla_put_failure; - - if (destroy) - goto skip_state; - - if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_ROLE, - ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]) || - nla_put_be64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, - cpu_to_be64(ct->proto.dccp.handshake_seq), - CTA_PROTOINFO_DCCP_PAD)) - goto nla_put_failure; -skip_state: - nla_nest_end(skb, nest_parms); - spin_unlock_bh(&ct->lock); - - return 0; - -nla_put_failure: - spin_unlock_bh(&ct->lock); - return -1; -} - -static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { - [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, - [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, - [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 }, - [CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC }, -}; - -#define DCCP_NLATTR_SIZE ( \ - NLA_ALIGN(NLA_HDRLEN + 1) + \ - NLA_ALIGN(NLA_HDRLEN + 1) + \ - NLA_ALIGN(NLA_HDRLEN + sizeof(u64)) + \ - NLA_ALIGN(NLA_HDRLEN + 0)) - -static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) -{ - struct nlattr *attr = cda[CTA_PROTOINFO_DCCP]; - struct nlattr *tb[CTA_PROTOINFO_DCCP_MAX + 1]; - int err; - - if (!attr) - return 0; - - err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_DCCP_MAX, attr, - dccp_nla_policy, NULL); - if (err < 0) - return err; - - if (!tb[CTA_PROTOINFO_DCCP_STATE] || - !tb[CTA_PROTOINFO_DCCP_ROLE] || - nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX || - nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) { - return -EINVAL; - } - - spin_lock_bh(&ct->lock); - ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); - if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { - ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; - ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; - } else { - ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; - ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; - } - if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) { - ct->proto.dccp.handshake_seq = - be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ])); - } - spin_unlock_bh(&ct->lock); - return 0; -} -#endif - -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT - -#include <linux/netfilter/nfnetlink.h> -#include <linux/netfilter/nfnetlink_cttimeout.h> - -static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], - struct net *net, void *data) -{ - struct nf_dccp_net *dn = nf_dccp_pernet(net); - unsigned int *timeouts = data; - int i; - - if (!timeouts) - timeouts = dn->dccp_timeout; - - /* set default DCCP timeouts. */ - for (i=0; i<CT_DCCP_MAX; i++) - timeouts[i] = dn->dccp_timeout[i]; - - /* there's a 1:1 mapping between attributes and protocol states. */ - for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) { - if (tb[i]) { - timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ; - } - } - - timeouts[CTA_TIMEOUT_DCCP_UNSPEC] = timeouts[CTA_TIMEOUT_DCCP_REQUEST]; - return 0; -} - -static int -dccp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) -{ - const unsigned int *timeouts = data; - int i; - - for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) { - if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ))) - goto nla_put_failure; - } - return 0; - -nla_put_failure: - return -ENOSPC; -} - -static const struct nla_policy -dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = { - [CTA_TIMEOUT_DCCP_REQUEST] = { .type = NLA_U32 }, - [CTA_TIMEOUT_DCCP_RESPOND] = { .type = NLA_U32 }, - [CTA_TIMEOUT_DCCP_PARTOPEN] = { .type = NLA_U32 }, - [CTA_TIMEOUT_DCCP_OPEN] = { .type = NLA_U32 }, - [CTA_TIMEOUT_DCCP_CLOSEREQ] = { .type = NLA_U32 }, - [CTA_TIMEOUT_DCCP_CLOSING] = { .type = NLA_U32 }, - [CTA_TIMEOUT_DCCP_TIMEWAIT] = { .type = NLA_U32 }, -}; -#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - -void nf_conntrack_dccp_init_net(struct net *net) -{ - struct nf_dccp_net *dn = nf_dccp_pernet(net); - - /* default values */ - dn->dccp_loose = 1; - dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL; - dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL; - dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL; - dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ; - dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ; - dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; - dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; - - /* timeouts[0] is unused, make it same as SYN_SENT so - * ->timeouts[0] contains 'new' timeout, like udp or icmp. - */ - dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST]; -} - -const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = { - .l4proto = IPPROTO_DCCP, - .can_early_drop = dccp_can_early_drop, -#ifdef CONFIG_NF_CONNTRACK_PROCFS - .print_conntrack = dccp_print_conntrack, -#endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_size = DCCP_NLATTR_SIZE, - .to_nlattr = dccp_to_nlattr, - .from_nlattr = nlattr_to_dccp, - .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, - .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, - .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, - .nla_policy = nf_ct_port_nla_policy, -#endif -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT - .ctnl_timeout = { - .nlattr_to_obj = dccp_timeout_nlattr_to_obj, - .obj_to_nlattr = dccp_timeout_obj_to_nlattr, - .nlattr_max = CTA_TIMEOUT_DCCP_MAX, - .obj_size = sizeof(unsigned int) * CT_DCCP_MAX, - .nla_policy = dccp_timeout_nla_policy, - }, -#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -}; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 6c4cff10357d..9b8b10a85233 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -14,6 +14,7 @@ #include <linux/sysctl.h> #endif +#include <net/netfilter/nf_log.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_l4proto.h> @@ -67,11 +68,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, ntohs(tuple->dst.u.udp.port)); break; - case IPPROTO_DCCP: - seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.dccp.port), - ntohs(tuple->dst.u.dccp.port)); - break; case IPPROTO_SCTP: seq_printf(s, "sport=%hu dport=%hu ", ntohs(tuple->src.u.sctp.port), @@ -279,7 +275,6 @@ static const char* l4proto_name(u16 proto) case IPPROTO_ICMP: return "icmp"; case IPPROTO_TCP: return "tcp"; case IPPROTO_UDP: return "udp"; - case IPPROTO_DCCP: return "dccp"; case IPPROTO_GRE: return "gre"; case IPPROTO_SCTP: return "sctp"; case IPPROTO_UDPLITE: return "udplite"; @@ -561,6 +556,29 @@ nf_conntrack_hash_sysctl(const struct ctl_table *table, int write, return ret; } +static int +nf_conntrack_log_invalid_sysctl(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret, i; + + ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); + if (ret < 0 || !write) + return ret; + + if (*(u8 *)table->data == 0) + return ret; + + /* Load nf_log_syslog only if no logger is currently registered */ + for (i = 0; i < NFPROTO_NUMPROTO; i++) { + if (nf_log_is_registered(i)) + return ret; + } + request_module("%s", "nf_log_syslog"); + + return ret; +} + static struct ctl_table_header *nf_ct_netfilter_header; enum nf_ct_sysctl_index { @@ -612,16 +630,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT, #endif -#ifdef CONFIG_NF_CT_PROTO_DCCP - NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST, - NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND, - NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN, - NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN, - NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ, - NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING, - NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT, - NF_SYSCTL_CT_PROTO_DCCP_LOOSE, -#endif #ifdef CONFIG_NF_CT_PROTO_GRE NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, @@ -667,7 +675,7 @@ static struct ctl_table nf_ct_sysctl_table[] = { .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dou8vec_minmax, + .proc_handler = nf_conntrack_log_invalid_sysctl, }, [NF_SYSCTL_CT_EXPECT_MAX] = { .procname = "nf_conntrack_expect_max", @@ -895,58 +903,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, #endif -#ifdef CONFIG_NF_CT_PROTO_DCCP - [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = { - .procname = "nf_conntrack_dccp_timeout_request", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND] = { - .procname = "nf_conntrack_dccp_timeout_respond", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN] = { - .procname = "nf_conntrack_dccp_timeout_partopen", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN] = { - .procname = "nf_conntrack_dccp_timeout_open", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ] = { - .procname = "nf_conntrack_dccp_timeout_closereq", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING] = { - .procname = "nf_conntrack_dccp_timeout_closing", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT] = { - .procname = "nf_conntrack_dccp_timeout_timewait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_DCCP_LOOSE] = { - .procname = "nf_conntrack_dccp_loose", - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = proc_dou8vec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif #ifdef CONFIG_NF_CT_PROTO_GRE [NF_SYSCTL_CT_PROTO_TIMEOUT_GRE] = { .procname = "nf_conntrack_gre_timeout", @@ -1032,29 +988,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net, #endif } -static void nf_conntrack_standalone_init_dccp_sysctl(struct net *net, - struct ctl_table *table) -{ -#ifdef CONFIG_NF_CT_PROTO_DCCP - struct nf_dccp_net *dn = nf_dccp_pernet(net); - -#define XASSIGN(XNAME, dn) \ - table[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_ ## XNAME].data = \ - &(dn)->dccp_timeout[CT_DCCP_ ## XNAME] - - XASSIGN(REQUEST, dn); - XASSIGN(RESPOND, dn); - XASSIGN(PARTOPEN, dn); - XASSIGN(OPEN, dn); - XASSIGN(CLOSEREQ, dn); - XASSIGN(CLOSING, dn); - XASSIGN(TIMEWAIT, dn); -#undef XASSIGN - - table[NF_SYSCTL_CT_PROTO_DCCP_LOOSE].data = &dn->dccp_loose; -#endif -} - static void nf_conntrack_standalone_init_gre_sysctl(struct net *net, struct ctl_table *table) { @@ -1100,7 +1033,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) nf_conntrack_standalone_init_tcp_sysctl(net, table); nf_conntrack_standalone_init_sctp_sysctl(net, table); - nf_conntrack_standalone_init_dccp_sysctl(net, table); nf_conntrack_standalone_init_gre_sysctl(net, table); /* Don't allow non-init_net ns to alter global sysctls */ diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 6dd0de33eebd..74cef8bf554c 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -125,6 +125,32 @@ void nf_log_unregister(struct nf_logger *logger) } EXPORT_SYMBOL(nf_log_unregister); +/** + * nf_log_is_registered - Check if any logger is registered for a given + * protocol family. + * + * @pf: Protocol family + * + * Returns: true if at least one logger is active for @pf, false otherwise. + */ +bool nf_log_is_registered(u_int8_t pf) +{ + int i; + + if (pf >= NFPROTO_NUMPROTO) { + WARN_ON_ONCE(1); + return false; + } + + for (i = 0; i < NF_LOG_TYPE_MAX; i++) { + if (rcu_access_pointer(loggers[pf][i])) + return true; + } + + return false; +} +EXPORT_SYMBOL(nf_log_is_registered); + int nf_log_bind_pf(struct net *net, u_int8_t pf, const struct nf_logger *logger) { diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index f391cd267922..78a61dac4ade 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -69,7 +69,6 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb, if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) fl4->fl4_dport = t->dst.u.all; } @@ -81,7 +80,6 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb, if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) fl4->fl4_sport = t->src.u.all; } @@ -102,7 +100,6 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb, if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) fl6->fl6_dport = t->dst.u.all; } @@ -114,7 +111,6 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb, if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) fl6->fl6_sport = t->src.u.all; } @@ -432,7 +428,6 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple, case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_UDPLITE: - case IPPROTO_DCCP: case IPPROTO_SCTP: if (maniptype == NF_NAT_MANIP_SRC) port = tuple->src.u.all; @@ -632,7 +627,6 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, case IPPROTO_UDPLITE: case IPPROTO_TCP: case IPPROTO_SCTP: - case IPPROTO_DCCP: if (maniptype == NF_NAT_MANIP_SRC) keyptr = &tuple->src.u.all; else diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index dc450cc81222..b14a434b9561 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -180,46 +180,6 @@ tcp_manip_pkt(struct sk_buff *skb, } static bool -dccp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ -#ifdef CONFIG_NF_CT_PROTO_DCCP - struct dccp_hdr *hdr; - __be16 *portptr, oldport, newport; - int hdrsize = 8; /* DCCP connection tracking guarantees this much */ - - if (skb->len >= hdroff + sizeof(struct dccp_hdr)) - hdrsize = sizeof(struct dccp_hdr); - - if (skb_ensure_writable(skb, hdroff + hdrsize)) - return false; - - hdr = (struct dccp_hdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - newport = tuple->src.u.dccp.port; - portptr = &hdr->dccph_sport; - } else { - newport = tuple->dst.u.dccp.port; - portptr = &hdr->dccph_dport; - } - - oldport = *portptr; - *portptr = newport; - - if (hdrsize < sizeof(*hdr)) - return true; - - nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype); - inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, - false); -#endif - return true; -} - -static bool icmp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, @@ -338,9 +298,6 @@ static bool l4proto_manip_pkt(struct sk_buff *skb, case IPPROTO_ICMPV6: return icmpv6_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); - case IPPROTO_DCCP: - return dccp_manip_pkt(skb, iphdroff, hdroff, - tuple, maniptype); case IPPROTO_GRE: return gre_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 24c71ecb2179..13d0ed9d1895 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1153,9 +1153,9 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, { struct nlmsghdr *nlh; - event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, - NFNETLINK_V0, nft_base_seq(net)); + nlh = nfnl_msg_put(skb, portid, seq, + nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), + flags, family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; @@ -1165,7 +1165,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, NFTA_TABLE_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELTABLE) { + if (event == NFT_MSG_DELTABLE || + event == NFT_MSG_DESTROYTABLE) { nlmsg_end(skb, nlh); return 0; } @@ -2016,9 +2017,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, { struct nlmsghdr *nlh; - event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, - NFNETLINK_V0, nft_base_seq(net)); + nlh = nfnl_msg_put(skb, portid, seq, + nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), + flags, family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; @@ -2028,7 +2029,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, NFTA_CHAIN_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELCHAIN && !hook_list) { + if (!hook_list && + (event == NFT_MSG_DELCHAIN || + event == NFT_MSG_DESTROYCHAIN)) { nlmsg_end(skb, nlh); return 0; } @@ -4039,7 +4042,7 @@ void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) /* can only be used if rule is no longer visible to dumps */ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { - lockdep_commit_lock_is_held(ctx->net); + WARN_ON_ONCE(!lockdep_commit_lock_is_held(ctx->net)); nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); @@ -4845,9 +4848,10 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, u32 seq = ctx->seq; int i; - event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family, - NFNETLINK_V0, nft_base_seq(ctx->net)); + nlh = nfnl_msg_put(skb, portid, seq, + nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), + flags, ctx->family, NFNETLINK_V0, + nft_base_seq(ctx->net)); if (!nlh) goto nla_put_failure; @@ -4859,7 +4863,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, NFTA_SET_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELSET) { + if (event == NFT_MSG_DELSET || + event == NFT_MSG_DESTROYSET) { nlmsg_end(skb, nlh); return 0; } @@ -5859,7 +5864,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase) { - lockdep_commit_lock_is_held(ctx->net); + WARN_ON_ONCE(!lockdep_commit_lock_is_held(ctx->net)); switch (phase) { case NFT_TRANS_PREPARE_ERROR: @@ -8338,25 +8343,26 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, { struct nlmsghdr *nlh; - event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, - NFNETLINK_V0, nft_base_seq(net)); + nlh = nfnl_msg_put(skb, portid, seq, + nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), + flags, family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) || + nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle), NFTA_OBJ_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELOBJ) { + if (event == NFT_MSG_DELOBJ || + event == NFT_MSG_DESTROYOBJ) { nlmsg_end(skb, nlh); return 0; } - if (nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || - nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || + if (nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset)) goto nla_put_failure; @@ -8889,11 +8895,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx, list_for_each_entry(hook, &flowtable_hook->list, list) { list_for_each_entry(ops, &hook->ops_list, list) { - ops->pf = NFPROTO_NETDEV; - ops->hooknum = flowtable_hook->num; - ops->priority = flowtable_hook->priority; - ops->priv = &flowtable->data; - ops->hook = flowtable->data.type->hook; + ops->pf = NFPROTO_NETDEV; + ops->hooknum = flowtable_hook->num; + ops->priority = flowtable_hook->priority; + ops->priv = &flowtable->data; + ops->hook = flowtable->data.type->hook; + ops->hook_ops_type = NF_HOOK_OP_NFT_FT; } } @@ -9382,9 +9389,9 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, struct nft_hook *hook; struct nlmsghdr *nlh; - event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, - NFNETLINK_V0, nft_base_seq(net)); + nlh = nfnl_msg_put(skb, portid, seq, + nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), + flags, family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; @@ -9394,7 +9401,9 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, NFTA_FLOWTABLE_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELFLOWTABLE && !hook_list) { + if (!hook_list && + (event == NFT_MSG_DELFLOWTABLE || + event == NFT_MSG_DESTROYFLOWTABLE)) { nlmsg_end(skb, nlh); return 0; } @@ -9686,64 +9695,6 @@ struct nf_hook_ops *nft_hook_find_ops_rcu(const struct nft_hook *hook, } EXPORT_SYMBOL_GPL(nft_hook_find_ops_rcu); -static void -nf_tables_device_notify(const struct nft_table *table, int attr, - const char *name, const struct nft_hook *hook, - const struct net_device *dev, int event) -{ - struct net *net = dev_net(dev); - struct nlmsghdr *nlh; - struct sk_buff *skb; - u16 flags = 0; - - if (!nfnetlink_has_listeners(net, NFNLGRP_NFT_DEV)) - return; - - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb) - goto err; - - event = event == NETDEV_REGISTER ? NFT_MSG_NEWDEV : NFT_MSG_DELDEV; - event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nfnl_msg_put(skb, 0, 0, event, flags, table->family, - NFNETLINK_V0, nft_base_seq(net)); - if (!nlh) - goto err; - - if (nla_put_string(skb, NFTA_DEVICE_TABLE, table->name) || - nla_put_string(skb, attr, name) || - nla_put(skb, NFTA_DEVICE_SPEC, hook->ifnamelen, hook->ifname) || - nla_put_string(skb, NFTA_DEVICE_NAME, dev->name)) - goto err; - - nlmsg_end(skb, nlh); - nfnetlink_send(skb, net, 0, NFNLGRP_NFT_DEV, - nlmsg_report(nlh), GFP_KERNEL); - return; -err: - if (skb) - kfree_skb(skb); - nfnetlink_set_err(net, 0, NFNLGRP_NFT_DEV, -ENOBUFS); -} - -void -nf_tables_chain_device_notify(const struct nft_chain *chain, - const struct nft_hook *hook, - const struct net_device *dev, int event) -{ - nf_tables_device_notify(chain->table, NFTA_DEVICE_CHAIN, - chain->name, hook, dev, event); -} - -static void -nf_tables_flowtable_device_notify(const struct nft_flowtable *ft, - const struct nft_hook *hook, - const struct net_device *dev, int event) -{ - nf_tables_device_notify(ft->table, NFTA_DEVICE_FLOWTABLE, - ft->name, hook, dev, event); -} - static int nft_flowtable_event(unsigned long event, struct net_device *dev, struct nft_flowtable *flowtable, bool changename) { @@ -9777,12 +9728,13 @@ static int nft_flowtable_event(unsigned long event, struct net_device *dev, if (!ops) return 1; - ops->pf = NFPROTO_NETDEV; - ops->hooknum = flowtable->hooknum; - ops->priority = flowtable->data.priority; - ops->priv = &flowtable->data; - ops->hook = flowtable->data.type->hook; - ops->dev = dev; + ops->pf = NFPROTO_NETDEV; + ops->hooknum = flowtable->hooknum; + ops->priority = flowtable->data.priority; + ops->priv = &flowtable->data; + ops->hook = flowtable->data.type->hook; + ops->hook_ops_type = NF_HOOK_OP_NFT_FT; + ops->dev = dev; if (nft_register_flowtable_ops(dev_net(dev), flowtable, ops)) { kfree(ops); @@ -9791,7 +9743,6 @@ static int nft_flowtable_event(unsigned long event, struct net_device *dev, list_add_tail_rcu(&ops->list, &hook->ops_list); break; } - nf_tables_flowtable_device_notify(flowtable, hook, dev, event); break; } return 0; diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index ae3fe87195ab..a88abae5a9de 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -127,6 +127,9 @@ static int nf_trace_fill_ct_info(struct sk_buff *nlskb, if (nla_put_be32(nlskb, NFTA_TRACE_CT_ID, (__force __be32)id)) return -1; + /* Kernel implementation detail, withhold this from userspace for now */ + status &= ~IPS_NAT_CLASH; + if (status && nla_put_be32(nlskb, NFTA_TRACE_CT_STATUS, htonl(status))) return -1; } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index ac77fc21632d..e598a2a252b0 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -86,7 +86,6 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES, [NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT, [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES, - [NFNLGRP_NFT_DEV] = NFNL_SUBSYS_NFTABLES, }; static struct nfnl_net *nfnl_pernet(struct net *net) diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index eab4f476b47f..38d75484e531 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -461,11 +461,6 @@ static int cttimeout_default_get(struct sk_buff *skb, case IPPROTO_UDPLITE: timeouts = nf_udp_pernet(info->net)->timeouts; break; - case IPPROTO_DCCP: -#ifdef CONFIG_NF_CT_PROTO_DCCP - timeouts = nf_dccp_pernet(info->net)->dccp_timeout; -#endif - break; case IPPROTO_ICMPV6: timeouts = &nf_icmpv6_pernet(info->net)->timeout; break; diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index ade8ee1988b1..92d869317cba 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -109,13 +109,30 @@ cancel_nest: return -EMSGSIZE; } +static int nfnl_hook_put_nft_info_desc(struct sk_buff *nlskb, const char *tname, + const char *name, u8 family) +{ + struct nlattr *nest; + + nest = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC); + if (!nest || + nla_put_string(nlskb, NFNLA_CHAIN_TABLE, tname) || + nla_put_string(nlskb, NFNLA_CHAIN_NAME, name) || + nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, family)) { + nla_nest_cancel(nlskb, nest); + return -EMSGSIZE; + } + nla_nest_end(nlskb, nest); + return 0; +} + static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb, const struct nfnl_dump_hook_data *ctx, unsigned int seq, struct nft_chain *chain) { struct net *net = sock_net(nlskb->sk); - struct nlattr *nest, *nest2; + struct nlattr *nest; int ret = 0; if (WARN_ON_ONCE(!chain)) @@ -128,29 +145,47 @@ static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb, if (!nest) return -EMSGSIZE; - nest2 = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC); - if (!nest2) - goto cancel_nest; + ret = nfnl_hook_put_nft_info_desc(nlskb, chain->table->name, + chain->name, chain->table->family); + if (ret) { + nla_nest_cancel(nlskb, nest); + return ret; + } - ret = nla_put_string(nlskb, NFNLA_CHAIN_TABLE, chain->table->name); - if (ret) - goto cancel_nest; + nla_nest_end(nlskb, nest); + return 0; +} - ret = nla_put_string(nlskb, NFNLA_CHAIN_NAME, chain->name); - if (ret) - goto cancel_nest; +static int nfnl_hook_put_nft_ft_info(struct sk_buff *nlskb, + const struct nfnl_dump_hook_data *ctx, + unsigned int seq, + struct nf_flowtable *nf_ft) +{ + struct nft_flowtable *ft = + container_of(nf_ft, struct nft_flowtable, data); + struct net *net = sock_net(nlskb->sk); + struct nlattr *nest; + int ret = 0; - ret = nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, chain->table->family); - if (ret) - goto cancel_nest; + if (WARN_ON_ONCE(!nf_ft)) + return 0; - nla_nest_end(nlskb, nest2); - nla_nest_end(nlskb, nest); - return ret; + if (!nft_is_active(net, ft)) + return 0; -cancel_nest: - nla_nest_cancel(nlskb, nest); - return -EMSGSIZE; + nest = nfnl_start_info_type(nlskb, NFNL_HOOK_TYPE_NFT_FLOWTABLE); + if (!nest) + return -EMSGSIZE; + + ret = nfnl_hook_put_nft_info_desc(nlskb, ft->table->name, + ft->name, ft->table->family); + if (ret) { + nla_nest_cancel(nlskb, nest); + return ret; + } + + nla_nest_end(nlskb, nest); + return 0; } static int nfnl_hook_dump_one(struct sk_buff *nlskb, @@ -220,6 +255,9 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb, case NF_HOOK_OP_BPF: ret = nfnl_hook_put_bpf_prog_info(nlskb, ctx, seq, ops->priv); break; + case NF_HOOK_OP_NFT_FT: + ret = nfnl_hook_put_nft_ft_info(nlskb, ctx, seq, ops->priv); + break; case NF_HOOK_OP_UNDEFINED: break; default: diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 846d48ba8965..b16185e9a6dd 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -363,8 +363,6 @@ static int nft_netdev_event(unsigned long event, struct net_device *dev, list_add_tail_rcu(&ops->list, &hook->ops_list); break; } - nf_tables_chain_device_notify(&basechain->chain, - hook, dev, event); break; } return 0; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 88922e0e8e83..7807d8129664 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -44,9 +44,9 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv, return 0; } -static struct nft_elem_priv *nft_dynset_new(struct nft_set *set, - const struct nft_expr *expr, - struct nft_regs *regs) +struct nft_elem_priv *nft_dynset_new(struct nft_set *set, + const struct nft_expr *expr, + struct nft_regs *regs) { const struct nft_dynset *priv = nft_expr_priv(expr); struct nft_set_ext *ext; @@ -91,8 +91,8 @@ void nft_dynset_eval(const struct nft_expr *expr, return; } - if (set->ops->update(set, ®s->data[priv->sreg_key], nft_dynset_new, - expr, regs, &ext)) { + ext = set->ops->update(set, ®s->data[priv->sreg_key], expr, regs); + if (ext) { if (priv->op == NFT_DYNSET_OP_UPDATE && nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && READ_ONCE(nft_set_ext_timeout(ext)->timeout) != 0) { diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index c74012c99125..7eedf4e3ae9c 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -407,6 +407,7 @@ err: regs->verdict.code = NFT_BREAK; } +#ifdef CONFIG_NFT_EXTHDR_DCCP static void nft_exthdr_dccp_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -482,6 +483,7 @@ static void nft_exthdr_dccp_eval(const struct nft_expr *expr, err: *dest = 0; } +#endif static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, @@ -634,6 +636,7 @@ static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx, return 0; } +#ifdef CONFIG_NFT_EXTHDR_DCCP static int nft_exthdr_dccp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -649,6 +652,7 @@ static int nft_exthdr_dccp_init(const struct nft_ctx *ctx, return 0; } +#endif static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv) { @@ -779,6 +783,7 @@ static const struct nft_expr_ops nft_exthdr_sctp_ops = { .reduce = nft_exthdr_reduce, }; +#ifdef CONFIG_NFT_EXTHDR_DCCP static const struct nft_expr_ops nft_exthdr_dccp_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), @@ -787,6 +792,7 @@ static const struct nft_expr_ops nft_exthdr_dccp_ops = { .dump = nft_exthdr_dump, .reduce = nft_exthdr_reduce, }; +#endif static const struct nft_expr_ops * nft_exthdr_select_ops(const struct nft_ctx *ctx, @@ -822,10 +828,12 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx, if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_sctp_ops; break; +#ifdef CONFIG_NFT_EXTHDR_DCCP case NFT_EXTHDR_OP_DCCP: if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_dccp_ops; break; +#endif } return ERR_PTR(-EOPNOTSUPP); diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 63ef832b8aa7..40c602ffbcba 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -25,32 +25,33 @@ struct nft_lookup { }; #ifdef CONFIG_MITIGATION_RETPOLINE -bool nft_set_do_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +const struct nft_set_ext * +nft_set_do_lookup(const struct net *net, const struct nft_set *set, + const u32 *key) { if (set->ops == &nft_set_hash_fast_type.ops) - return nft_hash_lookup_fast(net, set, key, ext); + return nft_hash_lookup_fast(net, set, key); if (set->ops == &nft_set_hash_type.ops) - return nft_hash_lookup(net, set, key, ext); + return nft_hash_lookup(net, set, key); if (set->ops == &nft_set_rhash_type.ops) - return nft_rhash_lookup(net, set, key, ext); + return nft_rhash_lookup(net, set, key); if (set->ops == &nft_set_bitmap_type.ops) - return nft_bitmap_lookup(net, set, key, ext); + return nft_bitmap_lookup(net, set, key); if (set->ops == &nft_set_pipapo_type.ops) - return nft_pipapo_lookup(net, set, key, ext); + return nft_pipapo_lookup(net, set, key); #if defined(CONFIG_X86_64) && !defined(CONFIG_UML) if (set->ops == &nft_set_pipapo_avx2_type.ops) - return nft_pipapo_avx2_lookup(net, set, key, ext); + return nft_pipapo_avx2_lookup(net, set, key); #endif if (set->ops == &nft_set_rbtree_type.ops) - return nft_rbtree_lookup(net, set, key, ext); + return nft_rbtree_lookup(net, set, key); WARN_ON_ONCE(1); - return set->ops->lookup(net, set, key, ext); + return set->ops->lookup(net, set, key); } EXPORT_SYMBOL_GPL(nft_set_do_lookup); #endif @@ -61,12 +62,12 @@ void nft_lookup_eval(const struct nft_expr *expr, { const struct nft_lookup *priv = nft_expr_priv(expr); const struct nft_set *set = priv->set; - const struct nft_set_ext *ext = NULL; const struct net *net = nft_net(pkt); + const struct nft_set_ext *ext; bool found; - found = nft_set_do_lookup(net, set, ®s->data[priv->sreg], &ext) ^ - priv->invert; + ext = nft_set_do_lookup(net, set, ®s->data[priv->sreg]); + found = !!ext ^ priv->invert; if (!found) { ext = nft_set_catchall_lookup(net, set); if (!ext) { diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 09da7a3f9f96..8ee66a86c3bc 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -111,10 +111,9 @@ void nft_objref_map_eval(const struct nft_expr *expr, struct net *net = nft_net(pkt); const struct nft_set_ext *ext; struct nft_object *obj; - bool found; - found = nft_set_do_lookup(net, set, ®s->data[priv->sreg], &ext); - if (!found) { + ext = nft_set_do_lookup(net, set, ®s->data[priv->sreg]); + if (!ext) { ext = nft_set_catchall_lookup(net, set); if (!ext) { regs->verdict.code = NFT_BREAK; diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 12390d2e994f..c24c922f895d 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -75,16 +75,21 @@ nft_bitmap_active(const u8 *bitmap, u32 idx, u32 off, u8 genmask) } INDIRECT_CALLABLE_SCOPE -bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +const struct nft_set_ext * +nft_bitmap_lookup(const struct net *net, const struct nft_set *set, + const u32 *key) { const struct nft_bitmap *priv = nft_set_priv(set); + static const struct nft_set_ext found; u8 genmask = nft_genmask_cur(net); u32 idx, off; nft_bitmap_location(set, key, &idx, &off); - return nft_bitmap_active(priv->bitmap, idx, off, genmask); + if (nft_bitmap_active(priv->bitmap, idx, off, genmask)) + return &found; + + return NULL; } static struct nft_bitmap_elem * diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index abb0c8ec6371..266d0c637225 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -81,8 +81,9 @@ static const struct rhashtable_params nft_rhash_params = { }; INDIRECT_CALLABLE_SCOPE -bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +const struct nft_set_ext * +nft_rhash_lookup(const struct net *net, const struct nft_set *set, + const u32 *key) { struct nft_rhash *priv = nft_set_priv(set); const struct nft_rhash_elem *he; @@ -95,9 +96,9 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); if (he != NULL) - *ext = &he->ext; + return &he->ext; - return !!he; + return NULL; } static struct nft_elem_priv * @@ -120,14 +121,9 @@ nft_rhash_get(const struct net *net, const struct nft_set *set, return ERR_PTR(-ENOENT); } -static bool nft_rhash_update(struct nft_set *set, const u32 *key, - struct nft_elem_priv * - (*new)(struct nft_set *, - const struct nft_expr *, - struct nft_regs *regs), - const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_set_ext **ext) +static const struct nft_set_ext * +nft_rhash_update(struct nft_set *set, const u32 *key, + const struct nft_expr *expr, struct nft_regs *regs) { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he, *prev; @@ -143,7 +139,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, if (he != NULL) goto out; - elem_priv = new(set, expr, regs); + elem_priv = nft_dynset_new(set, expr, regs); if (!elem_priv) goto err1; @@ -161,14 +157,13 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, } out: - *ext = &he->ext; - return true; + return &he->ext; err2: nft_set_elem_destroy(set, &he->priv, true); atomic_dec(&set->nelems); err1: - return false; + return NULL; } static int nft_rhash_insert(const struct net *net, const struct nft_set *set, @@ -507,8 +502,9 @@ struct nft_hash_elem { }; INDIRECT_CALLABLE_SCOPE -bool nft_hash_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +const struct nft_set_ext * +nft_hash_lookup(const struct net *net, const struct nft_set *set, + const u32 *key) { struct nft_hash *priv = nft_set_priv(set); u8 genmask = nft_genmask_cur(net); @@ -519,12 +515,10 @@ bool nft_hash_lookup(const struct net *net, const struct nft_set *set, hash = reciprocal_scale(hash, priv->buckets); hlist_for_each_entry_rcu(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) && - nft_set_elem_active(&he->ext, genmask)) { - *ext = &he->ext; - return true; - } + nft_set_elem_active(&he->ext, genmask)) + return &he->ext; } - return false; + return NULL; } static struct nft_elem_priv * @@ -547,9 +541,9 @@ nft_hash_get(const struct net *net, const struct nft_set *set, } INDIRECT_CALLABLE_SCOPE -bool nft_hash_lookup_fast(const struct net *net, - const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +const struct nft_set_ext * +nft_hash_lookup_fast(const struct net *net, const struct nft_set *set, + const u32 *key) { struct nft_hash *priv = nft_set_priv(set); u8 genmask = nft_genmask_cur(net); @@ -562,12 +556,10 @@ bool nft_hash_lookup_fast(const struct net *net, hlist_for_each_entry_rcu(he, &priv->table[hash], node) { k2 = *(u32 *)nft_set_ext_key(&he->ext)->data; if (k1 == k2 && - nft_set_elem_active(&he->ext, genmask)) { - *ext = &he->ext; - return true; - } + nft_set_elem_active(&he->ext, genmask)) + return &he->ext; } - return false; + return NULL; } static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv, diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index c5855069bdab..1a19649c2851 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -397,34 +397,36 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, } /** - * nft_pipapo_lookup() - Lookup function - * @net: Network namespace - * @set: nftables API set representation - * @key: nftables API element representation containing key data - * @ext: nftables API extension pointer, filled with matching reference + * pipapo_get() - Get matching element reference given key data + * @m: storage containing the set elements + * @data: Key data to be matched against existing elements + * @genmask: If set, check that element is active in given genmask + * @tstamp: timestamp to check for expired elements * * For more details, see DOC: Theory of Operation. * - * Return: true on match, false otherwise. + * This is the main lookup function. It matches key data against either + * the working match set or the uncommitted copy, depending on what the + * caller passed to us. + * nft_pipapo_get (lookup from userspace/control plane) and nft_pipapo_lookup + * (datapath lookup) pass the active copy. + * The insertion path will pass the uncommitted working copy. + * + * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise. */ -bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, + const u8 *data, u8 genmask, + u64 tstamp) { - struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_scratch *scratch; unsigned long *res_map, *fill_map; - u8 genmask = nft_genmask_cur(net); - const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; - const u8 *rp = (const u8 *)key; bool map_index; int i; local_bh_disable(); - m = rcu_dereference(priv->match); - - if (unlikely(!m || !*raw_cpu_ptr(m->scratch))) + if (unlikely(!raw_cpu_ptr(m->scratch))) goto out; scratch = *raw_cpu_ptr(m->scratch); @@ -444,12 +446,12 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, * packet bytes value, then AND bucket value */ if (likely(f->bb == 8)) - pipapo_and_field_buckets_8bit(f, res_map, rp); + pipapo_and_field_buckets_8bit(f, res_map, data); else - pipapo_and_field_buckets_4bit(f, res_map, rp); + pipapo_and_field_buckets_4bit(f, res_map, data); NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; - rp += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f); + data += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f); /* Now populate the bitmap for the next field, unless this is * the last field, in which case return the matched 'ext' @@ -465,13 +467,15 @@ next_match: scratch->map_index = map_index; local_bh_enable(); - return false; + return NULL; } if (last) { - *ext = &f->mt[b].e->ext; - if (unlikely(nft_set_elem_expired(*ext) || - !nft_set_elem_active(*ext, genmask))) + struct nft_pipapo_elem *e; + + e = f->mt[b].e; + if (unlikely(__nft_set_elem_expired(&e->ext, tstamp) || + !nft_set_elem_active(&e->ext, genmask))) goto next_match; /* Last field: we're just returning the key without @@ -481,8 +485,7 @@ next_match: */ scratch->map_index = map_index; local_bh_enable(); - - return true; + return e; } /* Swap bitmap indices: res_map is the initial bitmap for the @@ -492,112 +495,38 @@ next_match: map_index = !map_index; swap(res_map, fill_map); - rp += NFT_PIPAPO_GROUPS_PADDING(f); + data += NFT_PIPAPO_GROUPS_PADDING(f); } out: local_bh_enable(); - return false; + return NULL; } /** - * pipapo_get() - Get matching element reference given key data + * nft_pipapo_lookup() - Dataplane fronted for main lookup function * @net: Network namespace * @set: nftables API set representation - * @m: storage containing active/existing elements - * @data: Key data to be matched against existing elements - * @genmask: If set, check that element is active in given genmask - * @tstamp: timestamp to check for expired elements - * @gfp: the type of memory to allocate (see kmalloc). + * @key: pointer to nft registers containing key data * - * This is essentially the same as the lookup function, except that it matches - * key data against the uncommitted copy and doesn't use preallocated maps for - * bitmap results. + * This function is called from the data path. It will search for + * an element matching the given key in the current active copy. * - * Return: pointer to &struct nft_pipapo_elem on match, error pointer otherwise. + * Return: ntables API extension pointer or NULL if no match. */ -static struct nft_pipapo_elem *pipapo_get(const struct net *net, - const struct nft_set *set, - const struct nft_pipapo_match *m, - const u8 *data, u8 genmask, - u64 tstamp, gfp_t gfp) +const struct nft_set_ext * +nft_pipapo_lookup(const struct net *net, const struct nft_set *set, + const u32 *key) { - struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT); - unsigned long *res_map, *fill_map = NULL; - const struct nft_pipapo_field *f; - int i; - - if (m->bsize_max == 0) - return ret; - - res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), gfp); - if (!res_map) { - ret = ERR_PTR(-ENOMEM); - goto out; - } - - fill_map = kcalloc(m->bsize_max, sizeof(*res_map), gfp); - if (!fill_map) { - ret = ERR_PTR(-ENOMEM); - goto out; - } - - pipapo_resmap_init(m, res_map); - - nft_pipapo_for_each_field(f, i, m) { - bool last = i == m->field_count - 1; - int b; - - /* For each bit group: select lookup table bucket depending on - * packet bytes value, then AND bucket value - */ - if (f->bb == 8) - pipapo_and_field_buckets_8bit(f, res_map, data); - else if (f->bb == 4) - pipapo_and_field_buckets_4bit(f, res_map, data); - else - BUG(); - - data += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f); - - /* Now populate the bitmap for the next field, unless this is - * the last field, in which case return the matched 'ext' - * pointer if any. - * - * Now res_map contains the matching bitmap, and fill_map is the - * bitmap for the next field. - */ -next_match: - b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt, - last); - if (b < 0) - goto out; - - if (last) { - if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp)) - goto next_match; - if ((genmask && - !nft_set_elem_active(&f->mt[b].e->ext, genmask))) - goto next_match; - - ret = f->mt[b].e; - goto out; - } - - data += NFT_PIPAPO_GROUPS_PADDING(f); + struct nft_pipapo *priv = nft_set_priv(set); + u8 genmask = nft_genmask_cur(net); + const struct nft_pipapo_match *m; + const struct nft_pipapo_elem *e; - /* Swap bitmap indices: fill_map will be the initial bitmap for - * the next field (i.e. the new res_map), and res_map is - * guaranteed to be all-zeroes at this point, ready to be filled - * according to the next mapping table. - */ - swap(res_map, fill_map); - } + m = rcu_dereference(priv->match); + e = pipapo_get(m, (const u8 *)key, genmask, get_jiffies_64()); -out: - kfree(fill_map); - kfree(res_map); - return ret; + return e ? &e->ext : NULL; } /** @@ -606,6 +535,11 @@ out: * @set: nftables API set representation * @elem: nftables API element representation containing key data * @flags: Unused + * + * This function is called from the control plane path under + * RCU read lock. + * + * Return: set element private pointer or ERR_PTR(-ENOENT). */ static struct nft_elem_priv * nft_pipapo_get(const struct net *net, const struct nft_set *set, @@ -615,11 +549,10 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set, struct nft_pipapo_match *m = rcu_dereference(priv->match); struct nft_pipapo_elem *e; - e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data, - nft_genmask_cur(net), get_jiffies_64(), - GFP_ATOMIC); - if (IS_ERR(e)) - return ERR_CAST(e); + e = pipapo_get(m, (const u8 *)elem->key.val.data, + nft_genmask_cur(net), get_jiffies_64()); + if (!e) + return ERR_PTR(-ENOENT); return &e->priv; } @@ -1219,7 +1152,7 @@ static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int c mem = s; mem -= s->align_off; - kfree(mem); + kvfree(mem); } /** @@ -1240,10 +1173,9 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, void *scratch_aligned; u32 align_off; #endif - scratch = kzalloc_node(struct_size(scratch, map, - bsize_max * 2) + - NFT_PIPAPO_ALIGN_HEADROOM, - GFP_KERNEL_ACCOUNT, cpu_to_node(i)); + scratch = kvzalloc_node(struct_size(scratch, map, bsize_max * 2) + + NFT_PIPAPO_ALIGN_HEADROOM, + GFP_KERNEL_ACCOUNT, cpu_to_node(i)); if (!scratch) { /* On failure, there's no need to undo previous * allocations: this means that some scratch maps have @@ -1345,8 +1277,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, else end = start; - dup = pipapo_get(net, set, m, start, genmask, tstamp, GFP_KERNEL); - if (!IS_ERR(dup)) { + dup = pipapo_get(m, start, genmask, tstamp); + if (dup) { /* Check if we already have the same exact entry */ const struct nft_data *dup_key, *dup_end; @@ -1365,15 +1297,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, return -ENOTEMPTY; } - if (PTR_ERR(dup) == -ENOENT) { - /* Look for partially overlapping entries */ - dup = pipapo_get(net, set, m, end, nft_genmask_next(net), tstamp, - GFP_KERNEL); - } - - if (PTR_ERR(dup) != -ENOENT) { - if (IS_ERR(dup)) - return PTR_ERR(dup); + /* Look for partially overlapping entries */ + dup = pipapo_get(m, end, nft_genmask_next(net), tstamp); + if (dup) { *elem_priv = &dup->priv; return -ENOTEMPTY; } @@ -1914,9 +1840,9 @@ nft_pipapo_deactivate(const struct net *net, const struct nft_set *set, if (!m) return NULL; - e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data, - nft_genmask_next(net), nft_net_tstamp(net), GFP_KERNEL); - if (IS_ERR(e)) + e = pipapo_get(m, (const u8 *)elem->key.val.data, + nft_genmask_next(net), nft_net_tstamp(net)); + if (!e) return NULL; nft_set_elem_change_active(net, set, &e->ext); diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index be7c16c79f71..db5d367e43c4 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1137,7 +1137,6 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns * @net: Network namespace * @set: nftables API set representation * @key: nftables API element representation containing key data - * @ext: nftables API extension pointer, filled with matching reference * * For more details, see DOC: Theory of Operation in nft_set_pipapo.c. * @@ -1146,8 +1145,9 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns * * Return: true on match, false otherwise. */ -bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +const struct nft_set_ext * +nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, + const u32 *key) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_scratch *scratch; @@ -1155,17 +1155,18 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; const u8 *rp = (const u8 *)key; + const struct nft_set_ext *ext; unsigned long *res, *fill; bool map_index; - int i, ret = 0; + int i; local_bh_disable(); if (unlikely(!irq_fpu_usable())) { - bool fallback_res = nft_pipapo_lookup(net, set, key, ext); + ext = nft_pipapo_lookup(net, set, key); local_bh_enable(); - return fallback_res; + return ext; } m = rcu_dereference(priv->match); @@ -1182,7 +1183,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, if (unlikely(!scratch)) { kernel_fpu_end(); local_bh_enable(); - return false; + return NULL; } map_index = scratch->map_index; @@ -1197,6 +1198,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, next_match: nft_pipapo_for_each_field(f, i, m) { bool last = i == m->field_count - 1, first = !i; + int ret = 0; #define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \ (ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \ @@ -1244,10 +1246,10 @@ next_match: goto out; if (last) { - *ext = &f->mt[ret].e->ext; - if (unlikely(nft_set_elem_expired(*ext) || - !nft_set_elem_active(*ext, genmask))) { - ret = 0; + ext = &f->mt[ret].e->ext; + if (unlikely(nft_set_elem_expired(ext) || + !nft_set_elem_active(ext, genmask))) { + ext = NULL; goto next_match; } @@ -1264,5 +1266,5 @@ out: kernel_fpu_end(); local_bh_enable(); - return ret >= 0; + return ext; } diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 2e8ef16ff191..938a257c069e 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -52,9 +52,9 @@ static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe) return nft_set_elem_expired(&rbe->ext); } -static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext, - unsigned int seq) +static const struct nft_set_ext * +__nft_rbtree_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, unsigned int seq) { struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; @@ -65,7 +65,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set parent = rcu_dereference_raw(priv->root.rb_node); while (parent != NULL) { if (read_seqcount_retry(&priv->count, seq)) - return false; + return NULL; rbe = rb_entry(parent, struct nft_rbtree_elem, node); @@ -87,50 +87,48 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set } if (nft_rbtree_elem_expired(rbe)) - return false; + return NULL; if (nft_rbtree_interval_end(rbe)) { if (nft_set_is_anonymous(set)) - return false; + return NULL; parent = rcu_dereference_raw(parent->rb_left); interval = NULL; continue; } - *ext = &rbe->ext; - return true; + return &rbe->ext; } } if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && !nft_rbtree_elem_expired(interval) && - nft_rbtree_interval_start(interval)) { - *ext = &interval->ext; - return true; - } + nft_rbtree_interval_start(interval)) + return &interval->ext; - return false; + return NULL; } INDIRECT_CALLABLE_SCOPE -bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +const struct nft_set_ext * +nft_rbtree_lookup(const struct net *net, const struct nft_set *set, + const u32 *key) { struct nft_rbtree *priv = nft_set_priv(set); unsigned int seq = read_seqcount_begin(&priv->count); - bool ret; + const struct nft_set_ext *ext; - ret = __nft_rbtree_lookup(net, set, key, ext, seq); - if (ret || !read_seqcount_retry(&priv->count, seq)) - return ret; + ext = __nft_rbtree_lookup(net, set, key, seq); + if (ext || !read_seqcount_retry(&priv->count, seq)) + return ext; read_lock_bh(&priv->lock); seq = read_seqcount_begin(&priv->count); - ret = __nft_rbtree_lookup(net, set, key, ext, seq); + ext = __nft_rbtree_lookup(net, set, key, seq); read_unlock_bh(&priv->lock); - return ret; + return ext; } static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 709840612f0d..90b7630421c4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1317,12 +1317,13 @@ void xt_compat_unlock(u_int8_t af) EXPORT_SYMBOL_GPL(xt_compat_unlock); #endif -DEFINE_PER_CPU(seqcount_t, xt_recseq); -EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq); - struct static_key xt_tee_enabled __read_mostly; EXPORT_SYMBOL_GPL(xt_tee_enabled); +#ifdef CONFIG_NETFILTER_XTABLES_LEGACY +DEFINE_PER_CPU(seqcount_t, xt_recseq); +EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq); + static int xt_jumpstack_alloc(struct xt_table_info *i) { unsigned int size; @@ -1514,6 +1515,7 @@ void *xt_unregister_table(struct xt_table *table) return private; } EXPORT_SYMBOL_GPL(xt_unregister_table); +#endif #ifdef CONFIG_PROC_FS static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos) @@ -1897,6 +1899,7 @@ void xt_proto_fini(struct net *net, u_int8_t af) } EXPORT_SYMBOL_GPL(xt_proto_fini); +#ifdef CONFIG_NETFILTER_XTABLES_LEGACY /** * xt_percpu_counter_alloc - allocate x_tables rule counter * @@ -1951,6 +1954,7 @@ void xt_percpu_counter_free(struct xt_counters *counters) free_percpu((void __percpu *)pcnt); } EXPORT_SYMBOL_GPL(xt_percpu_counter_free); +#endif static int __net_init xt_net_init(struct net *net) { @@ -1983,8 +1987,10 @@ static int __init xt_init(void) unsigned int i; int rv; - for_each_possible_cpu(i) { - seqcount_init(&per_cpu(xt_recseq, i)); + if (IS_ENABLED(CONFIG_NETFILTER_XTABLES_LEGACY)) { + for_each_possible_cpu(i) { + seqcount_init(&per_cpu(xt_recseq, i)); + } } xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL); diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index 7c6bf1c16813..0ca1cdfc4095 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -38,8 +38,8 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par) nfacct = nfnl_acct_find_get(par->net, info->name); if (nfacct == NULL) { - pr_info_ratelimited("accounting object `%s' does not exists\n", - info->name); + pr_info_ratelimited("accounting object `%.*s' does not exist\n", + NFACCT_NAME_MAX, info->name); return -ENOENT; } info->nfacct = nfacct; |