From e2ef75445340ca7ec2c4558f84ae6c8c5d650fc8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 11 Sep 2017 16:33:31 -0700 Subject: net_sched: fix reference counting of tc filter chain This patch fixes the following ugliness of tc filter chain refcnt: a) tp proto should hold a refcnt to the chain too. This significantly simplifies the logic. b) Chain 0 is no longer special, it is created with refcnt=1 like any other chains. All the ugliness in tcf_chain_put() can be gone! c) No need to handle the flushing oddly, because block still holds chain 0, it can not be released, this guarantees block is the last user. d) The race condition with RCU callbacks is easier to handle with just a rcu_barrier(). Much easier to understand, nothing to hide. Thanks to the previous patch. Please see also the comments in code. e) Make the code understandable by humans, much less error-prone. Fixes: 744a4cf63e52 ("net: sched: fix use after free when tcf_chain_destroy is called multiple times") Fixes: 5bc1701881e3 ("net: sched: introduce multichain support for filters") Cc: Jiri Pirko Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index c743f03cfebd..d29e79d98a69 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -182,7 +182,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, list_add_tail(&chain->list, &block->chain_list); chain->block = block; chain->index = chain_index; - chain->refcnt = 0; + chain->refcnt = 1; return chain; } @@ -194,21 +194,20 @@ static void tcf_chain_flush(struct tcf_chain *chain) RCU_INIT_POINTER(*chain->p_filter_chain, NULL); while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) { RCU_INIT_POINTER(chain->filter_chain, tp->next); + tcf_chain_put(chain); tcf_proto_destroy(tp); } } static void tcf_chain_destroy(struct tcf_chain *chain) { - /* May be already removed from the list by the previous call. */ - if (!list_empty(&chain->list)) - list_del_init(&chain->list); + list_del(&chain->list); + kfree(chain); +} - /* There might still be a reference held when we got here from - * tcf_block_put. Wait for the user to drop reference before free. - */ - if (!chain->refcnt) - kfree(chain); +static void tcf_chain_hold(struct tcf_chain *chain) +{ + ++chain->refcnt; } struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, @@ -217,24 +216,19 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, struct tcf_chain *chain; list_for_each_entry(chain, &block->chain_list, list) { - if (chain->index == chain_index) - goto incref; + if (chain->index == chain_index) { + tcf_chain_hold(chain); + return chain; + } } - chain = create ? tcf_chain_create(block, chain_index) : NULL; -incref: - if (chain) - chain->refcnt++; - return chain; + return create ? tcf_chain_create(block, chain_index) : NULL; } EXPORT_SYMBOL(tcf_chain_get); void tcf_chain_put(struct tcf_chain *chain) { - /* Destroy unused chain, with exception of chain 0, which is the - * default one and has to be always present. - */ - if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0) + if (--chain->refcnt == 0) tcf_chain_destroy(chain); } EXPORT_SYMBOL(tcf_chain_put); @@ -279,10 +273,19 @@ void tcf_block_put(struct tcf_block *block) if (!block) return; + /* XXX: Standalone actions are not allowed to jump to any chain, and + * bound actions should be all removed after flushing. However, + * filters are destroyed in RCU callbacks, we have to flush and wait + * for them inside the loop, otherwise we race with RCU callbacks on + * this list. + */ list_for_each_entry_safe(chain, tmp, &block->chain_list, list) { tcf_chain_flush(chain); - tcf_chain_destroy(chain); + rcu_barrier(); } + + list_for_each_entry_safe(chain, tmp, &block->chain_list, list) + tcf_chain_put(chain); kfree(block); } EXPORT_SYMBOL(tcf_block_put); @@ -360,6 +363,7 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain, rcu_assign_pointer(*chain->p_filter_chain, tp); RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); + tcf_chain_hold(chain); } static void tcf_chain_tp_remove(struct tcf_chain *chain, @@ -371,6 +375,7 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain, if (chain->p_filter_chain && tp == chain->filter_chain) RCU_INIT_POINTER(*chain->p_filter_chain, next); RCU_INIT_POINTER(*chain_info->pprev, next); + tcf_chain_put(chain); } static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, -- cgit From 1697c4bb5245649a23f06a144cc38c06715e1b65 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 11 Sep 2017 16:33:32 -0700 Subject: net_sched: carefully handle tcf_block_put() As pointed out by Jiri, there is still a race condition between tcf_block_put() and tcf_chain_destroy() in a RCU callback. There is no way to make it correct without proper locking or synchronization, because both operate on a shared list. Locking is hard, because the only lock we can pick here is a spinlock, however, in tc_dump_tfilter() we iterate this list with a sleeping function called (tcf_chain_dump()), which makes using a lock to protect chain_list almost impossible. Jiri suggested the idea of holding a refcnt before flushing, this works because it guarantees us there would be no parallel tcf_chain_destroy() during the loop, therefore the race condition is gone. But we have to be very careful with proper synchronization with RCU callbacks. Suggested-by: Jiri Pirko Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d29e79d98a69..0b2219adf520 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -275,15 +275,27 @@ void tcf_block_put(struct tcf_block *block) /* XXX: Standalone actions are not allowed to jump to any chain, and * bound actions should be all removed after flushing. However, - * filters are destroyed in RCU callbacks, we have to flush and wait - * for them inside the loop, otherwise we race with RCU callbacks on - * this list. + * filters are destroyed in RCU callbacks, we have to hold the chains + * first, otherwise we would always race with RCU callbacks on this list + * without proper locking. */ - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) { + + /* Wait for existing RCU callbacks to cool down. */ + rcu_barrier(); + + /* Hold a refcnt for all chains, except 0, in case they are gone. */ + list_for_each_entry(chain, &block->chain_list, list) + if (chain->index) + tcf_chain_hold(chain); + + /* No race on the list, because no chain could be destroyed. */ + list_for_each_entry(chain, &block->chain_list, list) tcf_chain_flush(chain); - rcu_barrier(); - } + /* Wait for RCU callbacks to release the reference count. */ + rcu_barrier(); + + /* At this point, all the chains should have refcnt == 1. */ list_for_each_entry_safe(chain, tmp, &block->chain_list, list) tcf_chain_put(chain); kfree(block); -- cgit