summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-08-04 23:57:45 -0700
committerDavid S. Miller <davem@davemloft.net>2015-08-04 23:57:45 -0700
commit9dc20a649609c95ce7c5ac4282656ba627b67d49 (patch)
tree7e268a62b212e7f4799d3a25ac7f5f62c2c7143a /net/ipv4
parentd1b22e4d8e57e3914d706b108c5b1718d187557d (diff)
parenta6cd379b4d68867295ea35a719008e86d7a2ee9f (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for net-next, they are: 1) A couple of cleanups for the netfilter core hook from Eric Biederman. 2) Net namespace hook registration, also from Eric. This adds a dependency with the rtnl_lock. This should be fine by now but we have to keep an eye on this because if we ever get the per-subsys nfnl_lock before rtnl we have may problems in the future. But we have room to remove this in the future by propagating the complexity to the clients, by registering hooks for the init netns functions. 3) Update nf_tables to use the new net namespace hook infrastructure, also from Eric. 4) Three patches to refine and to address problems from the new net namespace hook infrastructure. 5) Switch to alternate jumpstack in xtables iff the packet is reentering. This only applies to a very special case, the TEE target, but Eric Dumazet reports that this is slowing down things for everyone else. So let's only switch to the alternate jumpstack if the tee target is in used through a static key. This batch also comes with offline precalculation of the jumpstack based on the callchain depth. From Florian Westphal. 6) Minimal SCTP multihoming support for our conntrack helper, from Michal Kubecek. 7) Reduce nf_bridge_info per skbuff scratchpad area to 32 bytes, from Florian Westphal. 8) Fix several checkpatch errors in bridge netfilter, from Bernhard Thaler. 9) Get rid of useless debug message in ip6t_REJECT, from Subash Abhinov. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/netfilter/arp_tables.c32
-rw-r--r--net/ipv4/netfilter/ip_tables.c68
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c7
3 files changed, 58 insertions, 49 deletions
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 92305a1a021a..c416cb355cb0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -240,7 +240,7 @@ get_entry(const void *base, unsigned int offset)
return (struct arpt_entry *)(base + offset);
}
-static inline __pure
+static inline
struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
{
return (void *)entry + entry->next_offset;
@@ -280,6 +280,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
table_base = private->entries;
jumpstack = (struct arpt_entry **)private->jumpstack[cpu];
+ /* No TEE support for arptables, so no need to switch to alternate
+ * stack. All targets that reenter must return absolute verdicts.
+ */
e = get_entry(table_base, private->hook_entry[hook]);
acpar.in = state->in;
@@ -325,11 +328,6 @@ unsigned int arpt_do_table(struct sk_buff *skb,
}
if (table_base + v
!= arpt_next_entry(e)) {
-
- if (stackidx >= private->stacksize) {
- verdict = NF_DROP;
- break;
- }
jumpstack[stackidx++] = e;
}
@@ -337,9 +335,6 @@ unsigned int arpt_do_table(struct sk_buff *skb,
continue;
}
- /* Targets which reenter must return
- * abs. verdicts
- */
acpar.target = t->u.kernel.target;
acpar.targinfo = t->data;
verdict = t->u.kernel.target->target(skb, &acpar);
@@ -372,10 +367,13 @@ static inline bool unconditional(const struct arpt_arp *arp)
/* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom.
+ *
+ * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
*/
-static int mark_source_chains(const struct xt_table_info *newinfo,
+static int mark_source_chains(struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
+ unsigned int calldepth, max_calldepth = 0;
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
@@ -391,6 +389,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
/* Set initial back pointer. */
e->counters.pcnt = pos;
+ calldepth = 0;
for (;;) {
const struct xt_standard_target *t
@@ -445,6 +444,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
(entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
+ if (calldepth > 0)
+ --calldepth;
} else {
int newpos = t->verdict;
@@ -459,6 +460,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
return 0;
}
+ if (entry0 + newpos != arpt_next_entry(e) &&
+ ++calldepth > max_calldepth)
+ max_calldepth = calldepth;
+
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
@@ -475,6 +480,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
next:
duprintf("Finished chain %u\n", hook);
}
+ newinfo->stacksize = max_calldepth;
return 1;
}
@@ -664,9 +670,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (ret != 0)
break;
++i;
- if (strcmp(arpt_get_target(iter)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0)
@@ -1439,9 +1442,6 @@ static int translate_compat_table(const char *name,
break;
}
++i;
- if (strcmp(arpt_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (ret) {
/*
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 6c72fbb7b49e..787f99ed55e2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -276,7 +276,7 @@ static void trace_packet(const struct sk_buff *skb,
}
#endif
-static inline __pure
+static inline
struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
{
return (void *)entry + entry->next_offset;
@@ -296,12 +296,13 @@ ipt_do_table(struct sk_buff *skb,
const char *indev, *outdev;
const void *table_base;
struct ipt_entry *e, **jumpstack;
- unsigned int *stackptr, origptr, cpu;
+ unsigned int stackidx, cpu;
const struct xt_table_info *private;
struct xt_action_param acpar;
unsigned int addend;
/* Initialization */
+ stackidx = 0;
ip = ip_hdr(skb);
indev = state->in ? state->in->name : nulldevname;
outdev = state->out ? state->out->name : nulldevname;
@@ -331,13 +332,21 @@ ipt_do_table(struct sk_buff *skb,
smp_read_barrier_depends();
table_base = private->entries;
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
- stackptr = per_cpu_ptr(private->stackptr, cpu);
- origptr = *stackptr;
+
+ /* Switch to alternate jumpstack if we're being invoked via TEE.
+ * TEE issues XT_CONTINUE verdict on original skb so we must not
+ * clobber the jumpstack.
+ *
+ * For recursion via REJECT or SYNPROXY the stack will be clobbered
+ * but it is no problem since absolute verdict is issued by these.
+ */
+ if (static_key_false(&xt_tee_enabled))
+ jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
e = get_entry(table_base, private->hook_entry[hook]);
- pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
- table->name, hook, origptr,
+ pr_debug("Entering %s(hook %u), UF %p\n",
+ table->name, hook,
get_entry(table_base, private->underflow[hook]));
do {
@@ -383,28 +392,24 @@ ipt_do_table(struct sk_buff *skb,
verdict = (unsigned int)(-v) - 1;
break;
}
- if (*stackptr <= origptr) {
+ if (stackidx == 0) {
e = get_entry(table_base,
private->underflow[hook]);
pr_debug("Underflow (this is normal) "
"to %p\n", e);
} else {
- e = jumpstack[--*stackptr];
+ e = jumpstack[--stackidx];
pr_debug("Pulled %p out from pos %u\n",
- e, *stackptr);
+ e, stackidx);
e = ipt_next_entry(e);
}
continue;
}
if (table_base + v != ipt_next_entry(e) &&
!(e->ip.flags & IPT_F_GOTO)) {
- if (*stackptr >= private->stacksize) {
- verdict = NF_DROP;
- break;
- }
- jumpstack[(*stackptr)++] = e;
+ jumpstack[stackidx++] = e;
pr_debug("Pushed %p into pos %u\n",
- e, *stackptr - 1);
+ e, stackidx - 1);
}
e = get_entry(table_base, v);
@@ -423,9 +428,8 @@ ipt_do_table(struct sk_buff *skb,
/* Verdict */
break;
} while (!acpar.hotdrop);
- pr_debug("Exiting %s; resetting sp from %u to %u\n",
- __func__, *stackptr, origptr);
- *stackptr = origptr;
+ pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
+
xt_write_recseq_end(addend);
local_bh_enable();
@@ -439,11 +443,15 @@ ipt_do_table(struct sk_buff *skb,
}
/* Figures out from what hook each rule can be called: returns 0 if
- there are loops. Puts hook bitmask in comefrom. */
+ * there are loops. Puts hook bitmask in comefrom.
+ *
+ * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
+ */
static int
-mark_source_chains(const struct xt_table_info *newinfo,
+mark_source_chains(struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
+ unsigned int calldepth, max_calldepth = 0;
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
@@ -457,6 +465,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* Set initial back pointer. */
e->counters.pcnt = pos;
+ calldepth = 0;
for (;;) {
const struct xt_standard_target *t
@@ -518,6 +527,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
(entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
+ WARN_ON_ONCE(calldepth == 0);
+ if (calldepth > 0)
+ --calldepth;
} else {
int newpos = t->verdict;
@@ -531,9 +543,14 @@ mark_source_chains(const struct xt_table_info *newinfo,
newpos);
return 0;
}
+ if (entry0 + newpos != ipt_next_entry(e) &&
+ !(e->ip.flags & IPT_F_GOTO) &&
+ ++calldepth > max_calldepth)
+ max_calldepth = calldepth;
+
/* This a jump; chase it. */
- duprintf("Jump rule %u -> %u\n",
- pos, newpos);
+ duprintf("Jump rule %u -> %u, calldepth %d\n",
+ pos, newpos, calldepth);
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
@@ -547,6 +564,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
next:
duprintf("Finished chain %u\n", hook);
}
+ newinfo->stacksize = max_calldepth;
return 1;
}
@@ -826,9 +844,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (ret != 0)
return ret;
++i;
- if (strcmp(ipt_get_target(iter)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (i != repl->num_entries) {
@@ -1744,9 +1759,6 @@ translate_compat_table(struct net *net,
if (ret != 0)
break;
++i;
- if (strcmp(ipt_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (ret) {
/*
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index c88b7d434718..b69e82bda215 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -49,12 +49,9 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct);
#endif
-
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (skb->nf_bridge &&
- skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+ if (nf_bridge_in_prerouting(skb))
return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
-#endif
+
if (hooknum == NF_INET_PRE_ROUTING)
return IP_DEFRAG_CONNTRACK_IN + zone;
else