aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCong Wang <xiyou.wangcong@gmail.com>2017-10-26 21:24:28 -0400
committerDavid S. Miller <davem@davemloft.net>2017-10-29 09:49:30 -0400
commit7aa0045dadb6ef37485ea9f2a7d28278ca588b51 (patch)
treee3c535a5d93bef05b780d64e220039f360e19104
parent8c83c88584abb3a04a4026be91060bc309f4c034 (diff)
net_sched: introduce a workqueue for RCU callbacks of tc filter
This patch introduces a dedicated workqueue for tc filters so that each tc filter's RCU callback could defer their action destroy work to this workqueue. The helper tcf_queue_work() is introduced for them to use. Because we hold RTNL lock when calling tcf_block_put(), we can not simply flush works inside it, therefore we have to defer it again to this workqueue and make sure all flying RCU callbacks have already queued their work before this one, in other words, to ensure this is the last one to execute to prevent any use-after-free. On the other hand, this makes tcf_block_put() ugly and harder to understand. Since David and Eric strongly dislike adding synchronize_rcu(), this is probably the only solution that could make everyone happy. Please also see the code comments below. Reported-by: Chris Mi <chrism@mellanox.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Jiri Pirko <jiri@resnulli.us> Cc: John Fastabend <john.fastabend@gmail.com> Cc: Jamal Hadi Salim <jhs@mojatatu.com> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/pkt_cls.h3
-rw-r--r--include/net/sch_generic.h2
-rw-r--r--net/sched/cls_api.c68
3 files changed, 56 insertions, 17 deletions
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e80edd8879ef..3009547f3c66 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -2,6 +2,7 @@
2#define __NET_PKT_CLS_H 2#define __NET_PKT_CLS_H
3 3
4#include <linux/pkt_cls.h> 4#include <linux/pkt_cls.h>
5#include <linux/workqueue.h>
5#include <net/sch_generic.h> 6#include <net/sch_generic.h>
6#include <net/act_api.h> 7#include <net/act_api.h>
7 8
@@ -17,6 +18,8 @@ struct tcf_walker {
17int register_tcf_proto_ops(struct tcf_proto_ops *ops); 18int register_tcf_proto_ops(struct tcf_proto_ops *ops);
18int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); 19int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
19 20
21bool tcf_queue_work(struct work_struct *work);
22
20#ifdef CONFIG_NET_CLS 23#ifdef CONFIG_NET_CLS
21struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, 24struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
22 bool create); 25 bool create);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 135f5a2dd931..0dec8a23be57 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -10,6 +10,7 @@
10#include <linux/dynamic_queue_limits.h> 10#include <linux/dynamic_queue_limits.h>
11#include <linux/list.h> 11#include <linux/list.h>
12#include <linux/refcount.h> 12#include <linux/refcount.h>
13#include <linux/workqueue.h>
13#include <net/gen_stats.h> 14#include <net/gen_stats.h>
14#include <net/rtnetlink.h> 15#include <net/rtnetlink.h>
15 16
@@ -271,6 +272,7 @@ struct tcf_chain {
271 272
272struct tcf_block { 273struct tcf_block {
273 struct list_head chain_list; 274 struct list_head chain_list;
275 struct work_struct work;
274}; 276};
275 277
276static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) 278static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0b2219adf520..045d13679ad6 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -77,6 +77,8 @@ out:
77} 77}
78EXPORT_SYMBOL(register_tcf_proto_ops); 78EXPORT_SYMBOL(register_tcf_proto_ops);
79 79
80static struct workqueue_struct *tc_filter_wq;
81
80int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) 82int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
81{ 83{
82 struct tcf_proto_ops *t; 84 struct tcf_proto_ops *t;
@@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
86 * tcf_proto_ops's destroy() handler. 88 * tcf_proto_ops's destroy() handler.
87 */ 89 */
88 rcu_barrier(); 90 rcu_barrier();
91 flush_workqueue(tc_filter_wq);
89 92
90 write_lock(&cls_mod_lock); 93 write_lock(&cls_mod_lock);
91 list_for_each_entry(t, &tcf_proto_base, head) { 94 list_for_each_entry(t, &tcf_proto_base, head) {
@@ -100,6 +103,12 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
100} 103}
101EXPORT_SYMBOL(unregister_tcf_proto_ops); 104EXPORT_SYMBOL(unregister_tcf_proto_ops);
102 105
106bool tcf_queue_work(struct work_struct *work)
107{
108 return queue_work(tc_filter_wq, work);
109}
110EXPORT_SYMBOL(tcf_queue_work);
111
103/* Select new prio value from the range, managed by kernel. */ 112/* Select new prio value from the range, managed by kernel. */
104 113
105static inline u32 tcf_auto_prio(struct tcf_proto *tp) 114static inline u32 tcf_auto_prio(struct tcf_proto *tp)
@@ -266,23 +275,30 @@ err_chain_create:
266} 275}
267EXPORT_SYMBOL(tcf_block_get); 276EXPORT_SYMBOL(tcf_block_get);
268 277
269void tcf_block_put(struct tcf_block *block) 278static void tcf_block_put_final(struct work_struct *work)
270{ 279{
280 struct tcf_block *block = container_of(work, struct tcf_block, work);
271 struct tcf_chain *chain, *tmp; 281 struct tcf_chain *chain, *tmp;
272 282
273 if (!block) 283 /* At this point, all the chains should have refcnt == 1. */
274 return; 284 rtnl_lock();
275 285 list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
276 /* XXX: Standalone actions are not allowed to jump to any chain, and 286 tcf_chain_put(chain);
277 * bound actions should be all removed after flushing. However, 287 rtnl_unlock();
278 * filters are destroyed in RCU callbacks, we have to hold the chains 288 kfree(block);
279 * first, otherwise we would always race with RCU callbacks on this list 289}
280 * without proper locking.
281 */
282 290
283 /* Wait for existing RCU callbacks to cool down. */ 291/* XXX: Standalone actions are not allowed to jump to any chain, and bound
284 rcu_barrier(); 292 * actions should be all removed after flushing. However, filters are destroyed
293 * in RCU callbacks, we have to hold the chains first, otherwise we would
294 * always race with RCU callbacks on this list without proper locking.
295 */
296static void tcf_block_put_deferred(struct work_struct *work)
297{
298 struct tcf_block *block = container_of(work, struct tcf_block, work);
299 struct tcf_chain *chain;
285 300
301 rtnl_lock();
286 /* Hold a refcnt for all chains, except 0, in case they are gone. */ 302 /* Hold a refcnt for all chains, except 0, in case they are gone. */
287 list_for_each_entry(chain, &block->chain_list, list) 303 list_for_each_entry(chain, &block->chain_list, list)
288 if (chain->index) 304 if (chain->index)
@@ -292,13 +308,27 @@ void tcf_block_put(struct tcf_block *block)
292 list_for_each_entry(chain, &block->chain_list, list) 308 list_for_each_entry(chain, &block->chain_list, list)
293 tcf_chain_flush(chain); 309 tcf_chain_flush(chain);
294 310
295 /* Wait for RCU callbacks to release the reference count. */ 311 INIT_WORK(&block->work, tcf_block_put_final);
312 /* Wait for RCU callbacks to release the reference count and make
313 * sure their works have been queued before this.
314 */
296 rcu_barrier(); 315 rcu_barrier();
316 tcf_queue_work(&block->work);
317 rtnl_unlock();
318}
297 319
298 /* At this point, all the chains should have refcnt == 1. */ 320void tcf_block_put(struct tcf_block *block)
299 list_for_each_entry_safe(chain, tmp, &block->chain_list, list) 321{
300 tcf_chain_put(chain); 322 if (!block)
301 kfree(block); 323 return;
324
325 INIT_WORK(&block->work, tcf_block_put_deferred);
326 /* Wait for existing RCU callbacks to cool down, make sure their works
327 * have been queued before this. We can not flush pending works here
328 * because we are holding the RTNL lock.
329 */
330 rcu_barrier();
331 tcf_queue_work(&block->work);
302} 332}
303EXPORT_SYMBOL(tcf_block_put); 333EXPORT_SYMBOL(tcf_block_put);
304 334
@@ -1030,6 +1060,10 @@ EXPORT_SYMBOL(tcf_exts_get_dev);
1030 1060
1031static int __init tc_filter_init(void) 1061static int __init tc_filter_init(void)
1032{ 1062{
1063 tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
1064 if (!tc_filter_wq)
1065 return -ENOMEM;
1066
1033 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0); 1067 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
1034 rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0); 1068 rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
1035 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, 1069 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,