diff options
author | David S. Miller <davem@davemloft.net> | 2015-08-04 23:57:45 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-08-04 23:57:45 -0700 |
commit | 9dc20a649609c95ce7c5ac4282656ba627b67d49 (patch) | |
tree | 7e268a62b212e7f4799d3a25ac7f5f62c2c7143a /net/ipv4 | |
parent | d1b22e4d8e57e3914d706b108c5b1718d187557d (diff) | |
parent | a6cd379b4d68867295ea35a719008e86d7a2ee9f (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter updates for net-next
The following patchset contains Netfilter updates for net-next, they are:
1) A couple of cleanups for the netfilter core hook from Eric Biederman.
2) Net namespace hook registration, also from Eric. This adds a dependency with
the rtnl_lock. This should be fine by now but we have to keep an eye on this
because if we ever get the per-subsys nfnl_lock before rtnl we have may
problems in the future. But we have room to remove this in the future by
propagating the complexity to the clients, by registering hooks for the init
netns functions.
3) Update nf_tables to use the new net namespace hook infrastructure, also from
Eric.
4) Three patches to refine and to address problems from the new net namespace
hook infrastructure.
5) Switch to alternate jumpstack in xtables iff the packet is reentering. This
only applies to a very special case, the TEE target, but Eric Dumazet
reports that this is slowing down things for everyone else. So let's only
switch to the alternate jumpstack if the tee target is in used through a
static key. This batch also comes with offline precalculation of the
jumpstack based on the callchain depth. From Florian Westphal.
6) Minimal SCTP multihoming support for our conntrack helper, from Michal
Kubecek.
7) Reduce nf_bridge_info per skbuff scratchpad area to 32 bytes, from Florian
Westphal.
8) Fix several checkpatch errors in bridge netfilter, from Bernhard Thaler.
9) Get rid of useless debug message in ip6t_REJECT, from Subash Abhinov.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/netfilter/arp_tables.c | 32 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_tables.c | 68 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_defrag_ipv4.c | 7 |
3 files changed, 58 insertions, 49 deletions
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 92305a1a021a..c416cb355cb0 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -240,7 +240,7 @@ get_entry(const void *base, unsigned int offset) return (struct arpt_entry *)(base + offset); } -static inline __pure +static inline struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry) { return (void *)entry + entry->next_offset; @@ -280,6 +280,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, table_base = private->entries; jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; + /* No TEE support for arptables, so no need to switch to alternate + * stack. All targets that reenter must return absolute verdicts. + */ e = get_entry(table_base, private->hook_entry[hook]); acpar.in = state->in; @@ -325,11 +328,6 @@ unsigned int arpt_do_table(struct sk_buff *skb, } if (table_base + v != arpt_next_entry(e)) { - - if (stackidx >= private->stacksize) { - verdict = NF_DROP; - break; - } jumpstack[stackidx++] = e; } @@ -337,9 +335,6 @@ unsigned int arpt_do_table(struct sk_buff *skb, continue; } - /* Targets which reenter must return - * abs. verdicts - */ acpar.target = t->u.kernel.target; acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); @@ -372,10 +367,13 @@ static inline bool unconditional(const struct arpt_arp *arp) /* Figures out from what hook each rule can be called: returns 0 if * there are loops. Puts hook bitmask in comefrom. + * + * Keeps track of largest call depth seen and stores it in newinfo->stacksize. */ -static int mark_source_chains(const struct xt_table_info *newinfo, +static int mark_source_chains(struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0) { + unsigned int calldepth, max_calldepth = 0; unsigned int hook; /* No recursion; use packet counter to save back ptrs (reset @@ -391,6 +389,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo, /* Set initial back pointer. */ e->counters.pcnt = pos; + calldepth = 0; for (;;) { const struct xt_standard_target *t @@ -445,6 +444,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo, (entry0 + pos + size); e->counters.pcnt = pos; pos += size; + if (calldepth > 0) + --calldepth; } else { int newpos = t->verdict; @@ -459,6 +460,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo, return 0; } + if (entry0 + newpos != arpt_next_entry(e) && + ++calldepth > max_calldepth) + max_calldepth = calldepth; + /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); @@ -475,6 +480,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo, next: duprintf("Finished chain %u\n", hook); } + newinfo->stacksize = max_calldepth; return 1; } @@ -664,9 +670,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, if (ret != 0) break; ++i; - if (strcmp(arpt_get_target(iter)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; } duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); if (ret != 0) @@ -1439,9 +1442,6 @@ static int translate_compat_table(const char *name, break; } ++i; - if (strcmp(arpt_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; } if (ret) { /* diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 6c72fbb7b49e..787f99ed55e2 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -276,7 +276,7 @@ static void trace_packet(const struct sk_buff *skb, } #endif -static inline __pure +static inline struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) { return (void *)entry + entry->next_offset; @@ -296,12 +296,13 @@ ipt_do_table(struct sk_buff *skb, const char *indev, *outdev; const void *table_base; struct ipt_entry *e, **jumpstack; - unsigned int *stackptr, origptr, cpu; + unsigned int stackidx, cpu; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; /* Initialization */ + stackidx = 0; ip = ip_hdr(skb); indev = state->in ? state->in->name : nulldevname; outdev = state->out ? state->out->name : nulldevname; @@ -331,13 +332,21 @@ ipt_do_table(struct sk_buff *skb, smp_read_barrier_depends(); table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; - stackptr = per_cpu_ptr(private->stackptr, cpu); - origptr = *stackptr; + + /* Switch to alternate jumpstack if we're being invoked via TEE. + * TEE issues XT_CONTINUE verdict on original skb so we must not + * clobber the jumpstack. + * + * For recursion via REJECT or SYNPROXY the stack will be clobbered + * but it is no problem since absolute verdict is issued by these. + */ + if (static_key_false(&xt_tee_enabled)) + jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); e = get_entry(table_base, private->hook_entry[hook]); - pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n", - table->name, hook, origptr, + pr_debug("Entering %s(hook %u), UF %p\n", + table->name, hook, get_entry(table_base, private->underflow[hook])); do { @@ -383,28 +392,24 @@ ipt_do_table(struct sk_buff *skb, verdict = (unsigned int)(-v) - 1; break; } - if (*stackptr <= origptr) { + if (stackidx == 0) { e = get_entry(table_base, private->underflow[hook]); pr_debug("Underflow (this is normal) " "to %p\n", e); } else { - e = jumpstack[--*stackptr]; + e = jumpstack[--stackidx]; pr_debug("Pulled %p out from pos %u\n", - e, *stackptr); + e, stackidx); e = ipt_next_entry(e); } continue; } if (table_base + v != ipt_next_entry(e) && !(e->ip.flags & IPT_F_GOTO)) { - if (*stackptr >= private->stacksize) { - verdict = NF_DROP; - break; - } - jumpstack[(*stackptr)++] = e; + jumpstack[stackidx++] = e; pr_debug("Pushed %p into pos %u\n", - e, *stackptr - 1); + e, stackidx - 1); } e = get_entry(table_base, v); @@ -423,9 +428,8 @@ ipt_do_table(struct sk_buff *skb, /* Verdict */ break; } while (!acpar.hotdrop); - pr_debug("Exiting %s; resetting sp from %u to %u\n", - __func__, *stackptr, origptr); - *stackptr = origptr; + pr_debug("Exiting %s; sp at %u\n", __func__, stackidx); + xt_write_recseq_end(addend); local_bh_enable(); @@ -439,11 +443,15 @@ ipt_do_table(struct sk_buff *skb, } /* Figures out from what hook each rule can be called: returns 0 if - there are loops. Puts hook bitmask in comefrom. */ + * there are loops. Puts hook bitmask in comefrom. + * + * Keeps track of largest call depth seen and stores it in newinfo->stacksize. + */ static int -mark_source_chains(const struct xt_table_info *newinfo, +mark_source_chains(struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0) { + unsigned int calldepth, max_calldepth = 0; unsigned int hook; /* No recursion; use packet counter to save back ptrs (reset @@ -457,6 +465,7 @@ mark_source_chains(const struct xt_table_info *newinfo, /* Set initial back pointer. */ e->counters.pcnt = pos; + calldepth = 0; for (;;) { const struct xt_standard_target *t @@ -518,6 +527,9 @@ mark_source_chains(const struct xt_table_info *newinfo, (entry0 + pos + size); e->counters.pcnt = pos; pos += size; + WARN_ON_ONCE(calldepth == 0); + if (calldepth > 0) + --calldepth; } else { int newpos = t->verdict; @@ -531,9 +543,14 @@ mark_source_chains(const struct xt_table_info *newinfo, newpos); return 0; } + if (entry0 + newpos != ipt_next_entry(e) && + !(e->ip.flags & IPT_F_GOTO) && + ++calldepth > max_calldepth) + max_calldepth = calldepth; + /* This a jump; chase it. */ - duprintf("Jump rule %u -> %u\n", - pos, newpos); + duprintf("Jump rule %u -> %u, calldepth %d\n", + pos, newpos, calldepth); } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -547,6 +564,7 @@ mark_source_chains(const struct xt_table_info *newinfo, next: duprintf("Finished chain %u\n", hook); } + newinfo->stacksize = max_calldepth; return 1; } @@ -826,9 +844,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, if (ret != 0) return ret; ++i; - if (strcmp(ipt_get_target(iter)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; } if (i != repl->num_entries) { @@ -1744,9 +1759,6 @@ translate_compat_table(struct net *net, if (ret != 0) break; ++i; - if (strcmp(ipt_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; } if (ret) { /* diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index c88b7d434718..b69e82bda215 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -49,12 +49,9 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, if (skb->nfct) zone = nf_ct_zone((struct nf_conn *)skb->nfct); #endif - -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (skb->nf_bridge && - skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) + if (nf_bridge_in_prerouting(skb)) return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone; -#endif + if (hooknum == NF_INET_PRE_ROUTING) return IP_DEFRAG_CONNTRACK_IN + zone; else |