summaryrefslogtreecommitdiff
path: root/net/netfilter/nf_conntrack_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter/nf_conntrack_core.c')
-rw-r--r--net/netfilter/nf_conntrack_core.c126
1 files changed, 85 insertions, 41 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3d621b8d7b8a..f9245dbfe435 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -76,6 +76,7 @@ struct conntrack_gc_work {
struct delayed_work dwork;
u32 last_bucket;
bool exiting;
+ bool early_drop;
long next_gc_run;
};
@@ -180,14 +181,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
seqcount_t nf_conntrack_generation __read_mostly;
-
-/* nf_conn must be 8 bytes aligned, as the 3 LSB bits are used
- * for the nfctinfo. We cheat by (ab)using the PER CPU cache line
- * alignment to enforce this.
- */
-DEFINE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked);
-EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
-
static unsigned int nf_conntrack_hash_rnd __read_mostly;
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
@@ -706,7 +699,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
- !nfct_nat(ct) &&
+ ((ct->status & IPS_NAT_DONE_MASK) == 0) &&
!nf_ct_is_dying(ct) &&
atomic_inc_not_zero(&ct->ct_general.use)) {
enum ip_conntrack_info oldinfo;
@@ -959,10 +952,30 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
return false;
}
+static bool gc_worker_skip_ct(const struct nf_conn *ct)
+{
+ return !nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct);
+}
+
+static bool gc_worker_can_early_drop(const struct nf_conn *ct)
+{
+ const struct nf_conntrack_l4proto *l4proto;
+
+ if (!test_bit(IPS_ASSURED_BIT, &ct->status))
+ return true;
+
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
+ return true;
+
+ return false;
+}
+
static void gc_worker(struct work_struct *work)
{
unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
unsigned int i, goal, buckets = 0, expired_count = 0;
+ unsigned int nf_conntrack_max95 = 0;
struct conntrack_gc_work *gc_work;
unsigned int ratio, scanned = 0;
unsigned long next_run;
@@ -971,6 +984,8 @@ static void gc_worker(struct work_struct *work)
goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
i = gc_work->last_bucket;
+ if (gc_work->early_drop)
+ nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
do {
struct nf_conntrack_tuple_hash *h;
@@ -987,6 +1002,8 @@ static void gc_worker(struct work_struct *work)
i = 0;
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+ struct net *net;
+
tmp = nf_ct_tuplehash_to_ctrack(h);
scanned++;
@@ -995,6 +1012,27 @@ static void gc_worker(struct work_struct *work)
expired_count++;
continue;
}
+
+ if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
+ continue;
+
+ net = nf_ct_net(tmp);
+ if (atomic_read(&net->ct.count) < nf_conntrack_max95)
+ continue;
+
+ /* need to take reference to avoid possible races */
+ if (!atomic_inc_not_zero(&tmp->ct_general.use))
+ continue;
+
+ if (gc_worker_skip_ct(tmp)) {
+ nf_ct_put(tmp);
+ continue;
+ }
+
+ if (gc_worker_can_early_drop(tmp))
+ nf_ct_kill(tmp);
+
+ nf_ct_put(tmp);
}
/* could check get_nulls_value() here and restart if ct
@@ -1040,6 +1078,7 @@ static void gc_worker(struct work_struct *work)
next_run = gc_work->next_gc_run;
gc_work->last_bucket = i;
+ gc_work->early_drop = false;
queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
}
@@ -1065,6 +1104,8 @@ __nf_conntrack_alloc(struct net *net,
if (nf_conntrack_max &&
unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
if (!early_drop(net, hash)) {
+ if (!conntrack_gc_work.early_drop)
+ conntrack_gc_work.early_drop = true;
atomic_dec(&net->ct.count);
net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
return ERR_PTR(-ENOMEM);
@@ -1314,9 +1355,10 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
int ret;
tmpl = nf_ct_get(skb, &ctinfo);
- if (tmpl) {
+ if (tmpl || ctinfo == IP_CT_UNTRACKED) {
/* Previously seen (loopback or untracked)? Ignore. */
- if (!nf_ct_is_template(tmpl)) {
+ if ((tmpl && !nf_ct_is_template(tmpl)) ||
+ ctinfo == IP_CT_UNTRACKED) {
NF_CT_STAT_INC_ATOMIC(net, ignore);
return NF_ACCEPT;
}
@@ -1629,18 +1671,6 @@ void nf_ct_free_hashtable(void *hash, unsigned int size)
}
EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
-static int untrack_refs(void)
-{
- int cnt = 0, cpu;
-
- for_each_possible_cpu(cpu) {
- struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
-
- cnt += atomic_read(&ct->ct_general.use) - 1;
- }
- return cnt;
-}
-
void nf_conntrack_cleanup_start(void)
{
conntrack_gc_work.exiting = true;
@@ -1650,8 +1680,6 @@ void nf_conntrack_cleanup_start(void)
void nf_conntrack_cleanup_end(void)
{
RCU_INIT_POINTER(nf_ct_destroy, NULL);
- while (untrack_refs() > 0)
- schedule();
cancel_delayed_work_sync(&conntrack_gc_work.dwork);
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
@@ -1825,20 +1853,44 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600);
-void nf_ct_untracked_status_or(unsigned long bits)
+static unsigned int total_extension_size(void)
{
- int cpu;
+ /* remember to add new extensions below */
+ BUILD_BUG_ON(NF_CT_EXT_NUM > 9);
- for_each_possible_cpu(cpu)
- per_cpu(nf_conntrack_untracked, cpu).status |= bits;
-}
-EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
+ return sizeof(struct nf_ct_ext) +
+ sizeof(struct nf_conn_help)
+#if IS_ENABLED(CONFIG_NF_NAT)
+ + sizeof(struct nf_conn_nat)
+#endif
+ + sizeof(struct nf_conn_seqadj)
+ + sizeof(struct nf_conn_acct)
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ + sizeof(struct nf_conntrack_ecache)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ + sizeof(struct nf_conn_tstamp)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+ + sizeof(struct nf_conn_timeout)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ + sizeof(struct nf_conn_labels)
+#endif
+#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+ + sizeof(struct nf_conn_synproxy)
+#endif
+ ;
+};
int nf_conntrack_init_start(void)
{
int max_factor = 8;
int ret = -ENOMEM;
- int i, cpu;
+ int i;
+
+ /* struct nf_ct_ext uses u8 to store offsets/size */
+ BUILD_BUG_ON(total_extension_size() > 255u);
seqcount_init(&nf_conntrack_generation);
@@ -1921,15 +1973,6 @@ int nf_conntrack_init_start(void)
if (ret < 0)
goto err_proto;
- /* Set up fake conntrack: to never be deleted, not in any hashes */
- for_each_possible_cpu(cpu) {
- struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
- write_pnet(&ct->ct_net, &init_net);
- atomic_set(&ct->ct_general.use, 1);
- }
- /* - and look it like as a confirmed connection */
- nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
-
conntrack_gc_work_init(&conntrack_gc_work);
queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);
@@ -1977,6 +2020,7 @@ int nf_conntrack_init_net(struct net *net)
int ret = -ENOMEM;
int cpu;
+ BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER);
atomic_set(&net->ct.count, 0);
net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);