aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Fastabend <john.fastabend@gmail.com>2014-09-20 00:50:34 -0400
committerDavid S. Miller <davem@davemloft.net>2014-09-22 15:59:21 -0400
commitde5df63228fcfbd5bb7fd883774c18fec9e61f12 (patch)
tree4eb60e8f52cdedc39237888272e5ed39a914512e
parenta1ddcfee2d9ae172d0095f3f8227f7fa53288c65 (diff)
net: sched: cls_u32 changes to knode must appear atomic to readers
Changes to the cls_u32 classifier must appear atomic to the readers. Before this patch if a change is requested for both the exts and ifindex, first the ifindex is updated then the exts with tcf_exts_change(). This opens a small window where a reader can have a exts chain with an incorrect ifindex. This violates the the RCU semantics. Here we resolve this by always passing u32_set_parms() a copy of the tc_u_knode to work on and then inserting it into the hash table after the updates have been successfully applied. Tested with the following short script: #tc filter add dev p3p2 parent 8001:0 protocol ip prio 99 handle 1: \ u32 divisor 256 #tc filter add dev p3p2 parent 8001:0 protocol ip prio 99 \ u32 link 1: hashkey mask ffffff00 at 12 \ match ip src 192.168.8.0/2 #tc filter add dev p3p2 parent 8001:0 protocol ip prio 102 \ handle 1::10 u32 classid 1:2 ht 1: \ match ip src 192.168.8.0/8 match ip tos 0x0a 1e #tc filter change dev p3p2 parent 8001:0 protocol ip prio 102 \ handle 1::10 u32 classid 1:2 ht 1: \ match ip src 1.1.0.0/8 match ip tos 0x0b 1e CC: Eric Dumazet <edumazet@google.com> CC: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/sched/cls_u32.c135
1 files changed, 126 insertions, 9 deletions
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 8d90e50a8ce4..e3fb5308d44a 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -354,27 +354,53 @@ static int u32_init(struct tcf_proto *tp)
354 return 0; 354 return 0;
355} 355}
356 356
357static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n) 357static int u32_destroy_key(struct tcf_proto *tp,
358 struct tc_u_knode *n,
359 bool free_pf)
358{ 360{
359 tcf_unbind_filter(tp, &n->res); 361 tcf_unbind_filter(tp, &n->res);
360 tcf_exts_destroy(tp, &n->exts); 362 tcf_exts_destroy(tp, &n->exts);
361 if (n->ht_down) 363 if (n->ht_down)
362 n->ht_down->refcnt--; 364 n->ht_down->refcnt--;
363#ifdef CONFIG_CLS_U32_PERF 365#ifdef CONFIG_CLS_U32_PERF
364 free_percpu(n->pf); 366 if (free_pf)
367 free_percpu(n->pf);
365#endif 368#endif
366#ifdef CONFIG_CLS_U32_MARK 369#ifdef CONFIG_CLS_U32_MARK
367 free_percpu(n->pcpu_success); 370 if (free_pf)
371 free_percpu(n->pcpu_success);
368#endif 372#endif
369 kfree(n); 373 kfree(n);
370 return 0; 374 return 0;
371} 375}
372 376
377/* u32_delete_key_rcu should be called when free'ing a copied
378 * version of a tc_u_knode obtained from u32_init_knode(). When
379 * copies are obtained from u32_init_knode() the statistics are
380 * shared between the old and new copies to allow readers to
381 * continue to update the statistics during the copy. To support
382 * this the u32_delete_key_rcu variant does not free the percpu
383 * statistics.
384 */
373static void u32_delete_key_rcu(struct rcu_head *rcu) 385static void u32_delete_key_rcu(struct rcu_head *rcu)
374{ 386{
375 struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); 387 struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
376 388
377 u32_destroy_key(key->tp, key); 389 u32_destroy_key(key->tp, key, false);
390}
391
392/* u32_delete_key_freepf_rcu is the rcu callback variant
393 * that free's the entire structure including the statistics
394 * percpu variables. Only use this if the key is not a copy
395 * returned by u32_init_knode(). See u32_delete_key_rcu()
396 * for the variant that should be used with keys return from
397 * u32_init_knode()
398 */
399static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
400{
401 struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
402
403 u32_destroy_key(key->tp, key, true);
378} 404}
379 405
380static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) 406static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
@@ -390,7 +416,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
390 if (pkp == key) { 416 if (pkp == key) {
391 RCU_INIT_POINTER(*kp, key->next); 417 RCU_INIT_POINTER(*kp, key->next);
392 418
393 call_rcu(&key->rcu, u32_delete_key_rcu); 419 call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
394 return 0; 420 return 0;
395 } 421 }
396 } 422 }
@@ -408,7 +434,7 @@ static void u32_clear_hnode(struct tc_u_hnode *ht)
408 while ((n = rtnl_dereference(ht->ht[h])) != NULL) { 434 while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
409 RCU_INIT_POINTER(ht->ht[h], 435 RCU_INIT_POINTER(ht->ht[h],
410 rtnl_dereference(n->next)); 436 rtnl_dereference(n->next));
411 call_rcu(&n->rcu, u32_delete_key_rcu); 437 call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
412 } 438 }
413 } 439 }
414} 440}
@@ -584,6 +610,82 @@ errout:
584 return err; 610 return err;
585} 611}
586 612
613static void u32_replace_knode(struct tcf_proto *tp,
614 struct tc_u_common *tp_c,
615 struct tc_u_knode *n)
616{
617 struct tc_u_knode __rcu **ins;
618 struct tc_u_knode *pins;
619 struct tc_u_hnode *ht;
620
621 if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
622 ht = rtnl_dereference(tp->root);
623 else
624 ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));
625
626 ins = &ht->ht[TC_U32_HASH(n->handle)];
627
628 /* The node must always exist for it to be replaced if this is not the
629 * case then something went very wrong elsewhere.
630 */
631 for (pins = rtnl_dereference(*ins); ;
632 ins = &pins->next, pins = rtnl_dereference(*ins))
633 if (pins->handle == n->handle)
634 break;
635
636 RCU_INIT_POINTER(n->next, pins->next);
637 rcu_assign_pointer(*ins, n);
638}
639
640static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
641 struct tc_u_knode *n)
642{
643 struct tc_u_knode *new;
644 struct tc_u32_sel *s = &n->sel;
645
646 new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
647 GFP_KERNEL);
648
649 if (!new)
650 return NULL;
651
652 RCU_INIT_POINTER(new->next, n->next);
653 new->handle = n->handle;
654 RCU_INIT_POINTER(new->ht_up, n->ht_up);
655
656#ifdef CONFIG_NET_CLS_IND
657 new->ifindex = n->ifindex;
658#endif
659 new->fshift = n->fshift;
660 new->res = n->res;
661 RCU_INIT_POINTER(new->ht_down, n->ht_down);
662
663 /* bump reference count as long as we hold pointer to structure */
664 if (new->ht_down)
665 new->ht_down->refcnt++;
666
667#ifdef CONFIG_CLS_U32_PERF
668 /* Statistics may be incremented by readers during update
669 * so we must keep them in tact. When the node is later destroyed
670 * a special destroy call must be made to not free the pf memory.
671 */
672 new->pf = n->pf;
673#endif
674
675#ifdef CONFIG_CLS_U32_MARK
676 new->val = n->val;
677 new->mask = n->mask;
678 /* Similarly success statistics must be moved as pointers */
679 new->pcpu_success = n->pcpu_success;
680#endif
681 new->tp = tp;
682 memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
683
684 tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE);
685
686 return new;
687}
688
587static int u32_change(struct net *net, struct sk_buff *in_skb, 689static int u32_change(struct net *net, struct sk_buff *in_skb,
588 struct tcf_proto *tp, unsigned long base, u32 handle, 690 struct tcf_proto *tp, unsigned long base, u32 handle,
589 struct nlattr **tca, 691 struct nlattr **tca,
@@ -610,12 +712,27 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
610 712
611 n = (struct tc_u_knode *)*arg; 713 n = (struct tc_u_knode *)*arg;
612 if (n) { 714 if (n) {
715 struct tc_u_knode *new;
716
613 if (TC_U32_KEY(n->handle) == 0) 717 if (TC_U32_KEY(n->handle) == 0)
614 return -EINVAL; 718 return -EINVAL;
615 719
616 return u32_set_parms(net, tp, base, 720 new = u32_init_knode(tp, n);
617 rtnl_dereference(n->ht_up), n, tb, 721 if (!new)
618 tca[TCA_RATE], ovr); 722 return -ENOMEM;
723
724 err = u32_set_parms(net, tp, base,
725 rtnl_dereference(n->ht_up), new, tb,
726 tca[TCA_RATE], ovr);
727
728 if (err) {
729 u32_destroy_key(tp, new, false);
730 return err;
731 }
732
733 u32_replace_knode(tp, tp_c, new);
734 call_rcu(&n->rcu, u32_delete_key_rcu);
735 return 0;
619 } 736 }
620 737
621 if (tb[TCA_U32_DIVISOR]) { 738 if (tb[TCA_U32_DIVISOR]) {