From 0604628bb03ae695f61b872cc59f1933a8379625 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 29 Jan 2019 09:15:02 +0100 Subject: netfilter: nf_tables: add NFTA_RULE_POSITION_ID to nla_policy Fixes: 75dd48e2e420a ("netfilter: nf_tables: Support RULE_ID reference in new rule") Reported-by: Cong Wang Signed-off-by: Florian Westphal Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e92bedd09cde..7495f29d24e8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2239,6 +2239,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { [NFTA_RULE_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, [NFTA_RULE_ID] = { .type = NLA_U32 }, + [NFTA_RULE_POSITION_ID] = { .type = NLA_U32 }, }; static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, -- cgit From a46c52d9f2659498f0c0871f7f2333a692c243fe Mon Sep 17 00:00:00 2001 From: wenxu Date: Tue, 29 Jan 2019 15:51:17 +0800 Subject: netfilter: nft_tunnel: Add NFTA_TUNNEL_MODE options nft "tunnel" expr match both the tun_info of RX and TX. This patch provide the NFTA_TUNNEL_MODE to individually match the tun_info of RX or TX. Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_tunnel.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 3a15f219e4e7..ea28588c5eed 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -15,6 +15,7 @@ struct nft_tunnel { enum nft_tunnel_keys key:8; enum nft_registers dreg:8; + enum nft_tunnel_mode mode:8; }; static void nft_tunnel_get_eval(const struct nft_expr *expr, @@ -29,14 +30,32 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr, switch (priv->key) { case NFT_TUNNEL_PATH: - nft_reg_store8(dest, !!tun_info); + if (!tun_info) { + nft_reg_store8(dest, false); + return; + } + if (priv->mode == NFT_TUNNEL_MODE_NONE || + (priv->mode == NFT_TUNNEL_MODE_RX && + !(tun_info->mode & IP_TUNNEL_INFO_TX)) || + (priv->mode == NFT_TUNNEL_MODE_TX && + (tun_info->mode & IP_TUNNEL_INFO_TX))) + nft_reg_store8(dest, true); + else + nft_reg_store8(dest, false); break; case NFT_TUNNEL_ID: if (!tun_info) { regs->verdict.code = NFT_BREAK; return; } - *dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id)); + if (priv->mode == NFT_TUNNEL_MODE_NONE || + (priv->mode == NFT_TUNNEL_MODE_RX && + !(tun_info->mode & IP_TUNNEL_INFO_TX)) || + (priv->mode == NFT_TUNNEL_MODE_TX && + (tun_info->mode & IP_TUNNEL_INFO_TX))) + *dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id)); + else + regs->verdict.code = NFT_BREAK; break; default: WARN_ON(1); @@ -47,6 +66,7 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr, static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = { [NFTA_TUNNEL_KEY] = { .type = NLA_U32 }, [NFTA_TUNNEL_DREG] = { .type = NLA_U32 }, + [NFTA_TUNNEL_MODE] = { .type = NLA_U32 }, }; static int nft_tunnel_get_init(const struct nft_ctx *ctx, @@ -74,6 +94,14 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx, priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]); + if (tb[NFTA_TUNNEL_MODE]) { + priv->mode = ntohl(nla_get_be32(tb[NFTA_TUNNEL_MODE])); + if (priv->mode > NFT_TUNNEL_MODE_MAX) + return -EOPNOTSUPP; + } else { + priv->mode = NFT_TUNNEL_MODE_NONE; + } + return nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, len); } @@ -87,6 +115,8 @@ static int nft_tunnel_get_dump(struct sk_buff *skb, goto nla_put_failure; if (nft_dump_register(skb, NFTA_TUNNEL_DREG, priv->dreg)) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_TUNNEL_MODE, htonl(priv->mode))) + goto nla_put_failure; return 0; nla_put_failure: -- cgit From 960587285a56ec3cafb4d1e6b25c19eced4d0bce Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 2 Feb 2019 10:16:59 +0100 Subject: netfilter: nat: remove module dependency on ipv6 core nf_nat_ipv6 calls two ipv6 core functions, so add those to v6ops to avoid the module dependency. This is a prerequisite for merging ipv4 and ipv6 nat implementations. Add wrappers to avoid the indirection if ipv6 is builtin. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter.c | 4 ++++ net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 17 ++++++++++++++++- net/ipv6/netfilter/nf_nat_masquerade_ipv6.c | 21 +++++++++++++++++++-- 3 files changed, 39 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 8b075f0bc351..0a5caf263889 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -112,6 +112,10 @@ static const struct nf_ipv6_ops ipv6ops = { .fragment = ip6_fragment, .route = nf_ip6_route, .reroute = nf_ip6_reroute, +#if IS_MODULE(CONFIG_IPV6) + .route_me_harder = ip6_route_me_harder, + .dev_get_saddr = ipv6_dev_get_saddr, +#endif }; int __init ipv6_netfilter_init(void) diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 9c914db44bec..b52026adb3e7 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -317,6 +318,20 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb, return ret; } +static int nat_route_me_harder(struct net *net, struct sk_buff *skb) +{ +#ifdef CONFIG_IPV6_MODULE + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return -EHOSTUNREACH; + + return v6_ops->route_me_harder(net, skb); +#else + return ip6_route_me_harder(net, skb); +#endif +} + static unsigned int nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -333,7 +348,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { - err = ip6_route_me_harder(state->net, skb); + err = nat_route_me_harder(state->net, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index 0ad0da5a2600..fd313b726263 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -24,6 +24,23 @@ static atomic_t v6_worker_count; +static int +nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, + const struct in6_addr *daddr, unsigned int srcprefs, + struct in6_addr *saddr) +{ +#ifdef CONFIG_IPV6_MODULE + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return -EHOSTUNREACH; + + return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); +#else + return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); +#endif +} + unsigned int nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, const struct net_device *out) @@ -38,8 +55,8 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY))); - if (ipv6_dev_get_saddr(nf_ct_net(ct), out, - &ipv6_hdr(skb)->daddr, 0, &src) < 0) + if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, + &ipv6_hdr(skb)->daddr, 0, &src) < 0) return NF_DROP; nat = nf_ct_nat_ext_add(ct); -- cgit From ac02bcf9cc1e4aefb0a7156a2ae26e8396b15f24 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 2 Feb 2019 10:17:00 +0100 Subject: netfilter: ipv6: avoid indirect calls for IPV6=y case indirect calls are only needed if ipv6 is a module. Add helpers to abstract the v6ops indirections and use them instead. fragment, reroute and route_input are kept as indirect calls. The first two are not not used in hot path and route_input is only used by bridge netfilter. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter.c | 15 ++++++++------- net/ipv6/netfilter/nft_fib_ipv6.c | 9 ++------- net/netfilter/utils.c | 6 ++---- net/netfilter/xt_addrtype.c | 16 +++++----------- 4 files changed, 17 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 0a5caf263889..a8263031f3a6 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -84,8 +84,8 @@ static int nf_ip6_reroute(struct sk_buff *skb, return 0; } -static int nf_ip6_route(struct net *net, struct dst_entry **dst, - struct flowi *fl, bool strict) +int __nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict) { static const struct ipv6_pinfo fake_pinfo; static const struct inet_sock fake_sk = { @@ -105,17 +105,18 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst, *dst = result; return err; } +EXPORT_SYMBOL_GPL(__nf_ip6_route); static const struct nf_ipv6_ops ipv6ops = { - .chk_addr = ipv6_chk_addr, - .route_input = ip6_route_input, - .fragment = ip6_fragment, - .route = nf_ip6_route, - .reroute = nf_ip6_reroute, #if IS_MODULE(CONFIG_IPV6) + .chk_addr = ipv6_chk_addr, .route_me_harder = ip6_route_me_harder, .dev_get_saddr = ipv6_dev_get_saddr, + .route = __nf_ip6_route, #endif + .route_input = ip6_route_input, + .fragment = ip6_fragment, + .reroute = nf_ip6_reroute, }; int __init ipv6_netfilter_init(void) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 36be3cf0adef..73cdc0bc63f7 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -59,7 +59,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, struct ipv6hdr *iph) { const struct net_device *dev = NULL; - const struct nf_ipv6_ops *v6ops; int route_err, addrtype; struct rt6_info *rt; struct flowi6 fl6 = { @@ -68,10 +67,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, }; u32 ret = 0; - v6ops = nf_get_ipv6_ops(); - if (!v6ops) - return RTN_UNREACHABLE; - if (priv->flags & NFTA_FIB_F_IIF) dev = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) @@ -79,10 +74,10 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); - if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) + if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; - route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt, + route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt, flowi6_to_flowi(&fl6), false); if (route_err) goto err; diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index 55af9f247993..06dc55590441 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(nf_checksum_partial); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family) { - const struct nf_ipv6_ops *v6ops; + const struct nf_ipv6_ops *v6ops __maybe_unused; int ret = 0; switch (family) { @@ -170,9 +170,7 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, ret = nf_ip_route(net, dst, fl, strict); break; case AF_INET6: - v6ops = rcu_dereference(nf_ipv6_ops); - if (v6ops) - ret = v6ops->route(net, dst, fl, strict); + ret = nf_ip6_route(net, dst, fl, strict); break; } diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 89e281b3bfc2..29987ff03621 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -36,7 +36,6 @@ MODULE_ALIAS("ip6t_addrtype"); static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, const struct in6_addr *addr, u16 mask) { - const struct nf_ipv6_ops *v6ops; struct flowi6 flow; struct rt6_info *rt; u32 ret = 0; @@ -47,18 +46,13 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, if (dev) flow.flowi6_oif = dev->ifindex; - v6ops = nf_get_ipv6_ops(); - if (v6ops) { - if (dev && (mask & XT_ADDRTYPE_LOCAL)) { - if (v6ops->chk_addr(net, addr, dev, true)) - ret = XT_ADDRTYPE_LOCAL; - } - route_err = v6ops->route(net, (struct dst_entry **)&rt, - flowi6_to_flowi(&flow), false); - } else { - route_err = 1; + if (dev && (mask & XT_ADDRTYPE_LOCAL)) { + if (nf_ipv6_chk_addr(net, addr, dev, true)) + ret = XT_ADDRTYPE_LOCAL; } + route_err = nf_ip6_route(net, (struct dst_entry **)&rt, + flowi6_to_flowi(&flow), false); if (route_err) return XT_ADDRTYPE_UNREACHABLE; -- cgit From 48ab807c792fa9074a46cfdc837023fadb46fb73 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Feb 2019 13:13:11 +0000 Subject: netfilter: conntrack: fix indentation issue A statement in an if block is not indented correctly. Fix this. Signed-off-by: Colin Ian King Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 8071bb04a849..349b42a65c8a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2675,7 +2675,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb, ret = ctnetlink_dump_tuples_ip(skb, &m); if (ret >= 0) { l4proto = nf_ct_l4proto_find(tuple->dst.protonum); - ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto); + ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto); } rcu_read_unlock(); -- cgit From 6fde9df6b76eaf2395d158628d5f915857981a44 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 18:44:56 -0600 Subject: ipvs: Use struct_size() helper One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = alloc(size, GFP_KERNEL) Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: size = struct_size(instance, entry, count); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 432141f04af3..446beeb5e7b2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2722,8 +2722,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) int size; get = (struct ip_vs_get_services *)arg; - size = sizeof(*get) + - sizeof(struct ip_vs_service_entry) * get->num_services; + size = struct_size(get, entrytable, get->num_services); if (*len != size) { pr_err("length: %u != %u\n", *len, size); ret = -EINVAL; @@ -2764,8 +2763,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) int size; get = (struct ip_vs_get_dests *)arg; - size = sizeof(*get) + - sizeof(struct ip_vs_dest_entry) * get->num_dests; + size = struct_size(get, entrytable, get->num_dests); if (*len != size) { pr_err("length: %u != %u\n", *len, size); ret = -EINVAL; -- cgit From 6ca64ef37da930ad0fd0f2c3dac1a4607e0af494 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 18:56:08 -0600 Subject: netfilter: xt_recent: Use struct_size() in kvzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; void *entry[]; }; size = sizeof(struct foo) + count * sizeof(void *); instance = alloc(size, GFP_KERNEL) Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: size = struct_size(instance, entry, count); instance = alloc(size, GFP_KERNEL) Notice that, in this case, variable sz is not necessary, hence it is removed. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_recent.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index f44de4bc2100..1664d2ec8b2f 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -337,7 +337,6 @@ static int recent_mt_check(const struct xt_mtchk_param *par, unsigned int nstamp_mask; unsigned int i; int ret = -EINVAL; - size_t sz; net_get_random_once(&hash_rnd, sizeof(hash_rnd)); @@ -387,8 +386,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par, goto out; } - sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size; - t = kvzalloc(sz, GFP_KERNEL); + t = kvzalloc(struct_size(t, iphash, ip_list_hash_size), GFP_KERNEL); if (t == NULL) { ret = -ENOMEM; goto out; -- cgit From 13f5251fd17088170c18844534682d9cab5ff5aa Mon Sep 17 00:00:00 2001 From: Chieh-Min Wang Date: Tue, 12 Feb 2019 00:59:55 +0100 Subject: netfilter: conntrack: fix cloned unconfirmed skb->_nfct race in __nf_conntrack_confirm For bridge(br_flood) or broadcast/multicast packets, they could clone skb with unconfirmed conntrack which break the rule that unconfirmed skb->_nfct is never shared. With nfqueue running on my system, the race can be easily reproduced with following warning calltrace: [13257.707525] CPU: 0 PID: 12132 Comm: main Tainted: P W 4.4.60 #7744 [13257.707568] Hardware name: Qualcomm (Flattened Device Tree) [13257.714700] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [13257.720253] [] (show_stack) from [] (dump_stack+0x94/0xa8) [13257.728240] [] (dump_stack) from [] (warn_slowpath_common+0x94/0xb0) [13257.735268] [] (warn_slowpath_common) from [] (warn_slowpath_null+0x1c/0x24) [13257.743519] [] (warn_slowpath_null) from [] (__nf_conntrack_confirm+0xa8/0x618) [13257.752284] [] (__nf_conntrack_confirm) from [] (ipv4_confirm+0xb8/0xfc) [13257.761049] [] (ipv4_confirm) from [] (nf_iterate+0x48/0xa8) [13257.769725] [] (nf_iterate) from [] (nf_hook_slow+0x30/0xb0) [13257.777108] [] (nf_hook_slow) from [] (br_nf_post_routing+0x274/0x31c) [13257.784486] [] (br_nf_post_routing) from [] (nf_iterate+0x48/0xa8) [13257.792556] [] (nf_iterate) from [] (nf_hook_slow+0x30/0xb0) [13257.800458] [] (nf_hook_slow) from [] (br_forward_finish+0x94/0xa4) [13257.808010] [] (br_forward_finish) from [] (br_nf_forward_finish+0x150/0x1ac) [13257.815736] [] (br_nf_forward_finish) from [] (nf_reinject+0x108/0x170) [13257.824762] [] (nf_reinject) from [] (nfqnl_recv_verdict+0x3d8/0x420) [13257.832924] [] (nfqnl_recv_verdict) from [] (nfnetlink_rcv_msg+0x158/0x248) [13257.841256] [] (nfnetlink_rcv_msg) from [] (netlink_rcv_skb+0x54/0xb0) [13257.849762] [] (netlink_rcv_skb) from [] (netlink_unicast+0x148/0x23c) [13257.858093] [] (netlink_unicast) from [] (netlink_sendmsg+0x2ec/0x368) [13257.866348] [] (netlink_sendmsg) from [] (sock_sendmsg+0x34/0x44) [13257.874590] [] (sock_sendmsg) from [] (___sys_sendmsg+0x1ec/0x200) [13257.882489] [] (___sys_sendmsg) from [] (__sys_sendmsg+0x3c/0x64) [13257.890300] [] (__sys_sendmsg) from [] (ret_fast_syscall+0x0/0x34) The original code just triggered the warning but do nothing. It will caused the shared conntrack moves to the dying list and the packet be droppped (nf_ct_resolve_clash returns NF_DROP for dying conntrack). - Reproduce steps: +----------------------------+ | br0(bridge) | | | +-+---------+---------+------+ | eth0| | eth1| | eth2| | | | | | | +--+--+ +--+--+ +---+-+ | | | | | | +--+-+ +-+--+ +--+-+ | PC1| | PC2| | PC3| +----+ +----+ +----+ iptables -A FORWARD -m mark --mark 0x1000000/0x1000000 -j NFQUEUE --queue-num 100 --queue-bypass ps: Our nfq userspace program will set mark on packets whose connection has already been processed. PC1 sends broadcast packets simulated by hping3: hping3 --rand-source --udp 192.168.1.255 -i u100 - Broadcast racing flow chart is as follow: br_handle_frame BR_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, br_handle_frame_finish) // skb->_nfct (unconfirmed conntrack) is constructed at PRE_ROUTING stage br_handle_frame_finish // check if this packet is broadcast br_flood_forward br_flood list_for_each_entry_rcu(p, &br->port_list, list) // iterate through each port maybe_deliver deliver_clone skb = skb_clone(skb) __br_forward BR_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD,...) // queue in our nfq and received by our userspace program // goto __nf_conntrack_confirm with process context on CPU 1 br_pass_frame_up BR_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,...) // goto __nf_conntrack_confirm with softirq context on CPU 0 Because conntrack confirm can happen at both INPUT and POSTROUTING stage. So with NFQUEUE running, skb->_nfct with the same unconfirmed conntrack could race on different core. This patch fixes a repeating kernel splat, now it is only displayed once. Signed-off-by: Chieh-Min Wang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 815956ac5a76..85de2a7b0ede 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -936,10 +936,18 @@ __nf_conntrack_confirm(struct sk_buff *skb) * REJECT will give spurious warnings here. */ - /* No external references means no one else could have - * confirmed us. + /* Another skb with the same unconfirmed conntrack may + * win the race. This may happen for bridge(br_flood) + * or broadcast/multicast packets do skb_clone with + * unconfirmed conntrack. */ - WARN_ON(nf_ct_is_confirmed(ct)); + if (unlikely(nf_ct_is_confirmed(ct))) { + WARN_ON_ONCE(1); + nf_conntrack_double_unlock(hash, reply_hash); + local_bh_enable(); + return NF_DROP; + } + pr_debug("Confirming conntrack %p\n", ct); /* We have to check the DYING flag after unlink to prevent * a race against nf_ct_get_next_corpse() possibly called from -- cgit From 7fc38225363dd8f19e667ad7c77b63bc4a5c065d Mon Sep 17 00:00:00 2001 From: Alin Nastac Date: Wed, 13 Feb 2019 09:14:53 +0100 Subject: netfilter: reject: skip csum verification for protocols that don't support it Some protocols have other means to verify the payload integrity (AH, ESP, SCTP) while others are incompatible with nf_ip(6)_checksum implementation because checksum is either optional or might be partial (UDPLITE, DCCP, GRE). Because nf_ip(6)_checksum was used to validate the packets, ip(6)tables REJECT rules were not capable to generate ICMP(v6) errors for the protocols mentioned above. This commit also fixes the incorrect pseudo-header protocol used for IPv4 packets that carry other transport protocols than TCP or UDP (pseudo-header used protocol 0 iso the proper value). Signed-off-by: Alin Nastac Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nft_reject_bridge.c | 10 +++++----- net/ipv4/netfilter/nf_reject_ipv4.c | 9 ++------- net/ipv6/netfilter/nf_reject_ipv6.c | 3 +++ 3 files changed, 10 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 419e8edf23ba..1b1856744c80 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -125,13 +125,10 @@ static void nft_reject_br_send_v4_unreach(struct net *net, if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len))) return; - if (ip_hdr(oldskb)->protocol == IPPROTO_TCP || - ip_hdr(oldskb)->protocol == IPPROTO_UDP) - proto = ip_hdr(oldskb)->protocol; - else - proto = 0; + proto = ip_hdr(oldskb)->protocol; if (!skb_csum_unnecessary(oldskb) && + nf_reject_verify_csum(proto) && nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto)) return; @@ -234,6 +231,9 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; + if (!nf_reject_verify_csum(proto)) + return true; + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index aa8304c618b8..7dc3c324b911 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -173,21 +173,16 @@ EXPORT_SYMBOL_GPL(nf_send_reset); void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) { struct iphdr *iph = ip_hdr(skb_in); - u8 proto; + u8 proto = iph->protocol; if (iph->frag_off & htons(IP_OFFSET)) return; - if (skb_csum_unnecessary(skb_in)) { + if (skb_csum_unnecessary(skb_in) || !nf_reject_verify_csum(proto)) { icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); return; } - if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP) - proto = iph->protocol; - else - proto = 0; - if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0) icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); } diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index b9c8a763c863..02e9228641e0 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -233,6 +233,9 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook) if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; + if (!nf_reject_verify_csum(proto)) + return true; + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; } -- cgit From dddaf89e2fbc69b15e82a96ccd174c70f65bf3d5 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 16 Feb 2019 08:16:06 +0000 Subject: netfilter: ipt_CLUSTERIP: make symbol 'cip_netdev_notifier' static Fixes the following sparse warnings: net/ipv4/netfilter/ipt_CLUSTERIP.c:867:23: warning: symbol 'cip_netdev_notifier' was not declared. Should it be static? Fixes: 5a86d68bcf02 ("netfilter: ipt_CLUSTERIP: fix deadlock in netns exit routine") Signed-off-by: Wei Yongjun Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index b61977db9b7f..91b369bc44f9 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -864,7 +864,7 @@ static struct pernet_operations clusterip_net_ops = { .size = sizeof(struct clusterip_net), }; -struct notifier_block cip_netdev_notifier = { +static struct notifier_block cip_netdev_notifier = { .notifier_call = clusterip_netdev_event }; -- cgit From a3419ce3356cf1fdc69a0524eced84cef730b3bf Mon Sep 17 00:00:00 2001 From: Alin Nastac Date: Sat, 16 Feb 2019 10:49:12 +0100 Subject: netfilter: nf_conntrack_sip: add sip_external_media logic When enabled, the sip_external_media logic will leave SDP payload untouched when it detects that interface towards INVITEd party is the same with the one towards media endpoint. The typical scenario for this logic is when a LAN SIP agent has more than one IP address (uses a different address for media streams than the one used on signalling stream) and it also forwards calls to a voice mailbox located on the WAN side. In such case sip_direct_media must be disabled (so normal calls could be handled by the SIP helper), but media streams that are not traversing this router must also be excluded from address translation (e.g. call forwards). Signed-off-by: Alin Nastac Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index c8d2b6688a2a..f067c6b50857 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -21,6 +21,8 @@ #include #include +#include +#include #include #include #include @@ -54,6 +56,11 @@ module_param(sip_direct_media, int, 0600); MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling " "endpoints only (default 1)"); +static int sip_external_media __read_mostly = 0; +module_param(sip_external_media, int, 0600); +MODULE_PARM_DESC(sip_external_media, "Expect Media streams between external " + "endpoints (default 0)"); + const struct nf_nat_sip_hooks *nf_nat_sip_hooks; EXPORT_SYMBOL_GPL(nf_nat_sip_hooks); @@ -861,6 +868,41 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, if (!nf_inet_addr_cmp(daddr, &ct->tuplehash[dir].tuple.src.u3)) return NF_ACCEPT; saddr = &ct->tuplehash[!dir].tuple.src.u3; + } else if (sip_external_media) { + struct net_device *dev = skb_dst(skb)->dev; + struct net *net = dev_net(dev); + struct rtable *rt; + struct flowi4 fl4 = {}; +#if IS_ENABLED(CONFIG_IPV6) + struct flowi6 fl6 = {}; +#endif + struct dst_entry *dst = NULL; + + switch (nf_ct_l3num(ct)) { + case NFPROTO_IPV4: + fl4.daddr = daddr->ip; + rt = ip_route_output_key(net, &fl4); + if (!IS_ERR(rt)) + dst = &rt->dst; + break; + +#if IS_ENABLED(CONFIG_IPV6) + case NFPROTO_IPV6: + fl6.daddr = daddr->in6; + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + dst_release(dst); + dst = NULL; + } + break; +#endif + } + + /* Don't predict any conntracks when media endpoint is reachable + * through the same interface as the signalling peer. + */ + if (dst && dst->dev == dev) + return NF_ACCEPT; } /* We need to check whether the registration exists before attempting -- cgit