diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-01-19 22:48:19 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-01-20 19:59:32 -0500 |
commit | a2da570d62fcb9e8816f6920e1ec02c706b289fa (patch) | |
tree | 9a24b76262c40ec2f1be11e29e224c7d7b20fa26 | |
parent | fd245a4adb5288eac37250875f237c40a20a1944 (diff) |
net_sched: RCU conversion of stab
This patch converts stab qdisc management to RCU, so that we can perform
the qdisc_calculate_pkt_len() call before getting qdisc lock.
This shortens the lock's held time in __dev_xmit_skb().
This permits more qdiscs to get TCQ_F_CAN_BYPASS status, avoiding lot of
cache misses and so reducing latencies.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
CC: Jesper Dangaard Brouer <hawk@diku.dk>
CC: Jarek Poplawski <jarkao2@gmail.com>
CC: Jamal Hadi Salim <hadi@cyberus.ca>
CC: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/sch_generic.h | 21 | ||||
-rw-r--r-- | net/core/dev.c | 8 | ||||
-rw-r--r-- | net/sched/sch_api.c | 26 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 2 |
4 files changed, 38 insertions, 19 deletions
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f6345f55041c..d531baa2506a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h | |||
@@ -36,6 +36,7 @@ enum qdisc___state_t { | |||
36 | }; | 36 | }; |
37 | 37 | ||
38 | struct qdisc_size_table { | 38 | struct qdisc_size_table { |
39 | struct rcu_head rcu; | ||
39 | struct list_head list; | 40 | struct list_head list; |
40 | struct tc_sizespec szopts; | 41 | struct tc_sizespec szopts; |
41 | int refcnt; | 42 | int refcnt; |
@@ -53,7 +54,7 @@ struct Qdisc { | |||
53 | #define TCQ_F_WARN_NONWC (1 << 16) | 54 | #define TCQ_F_WARN_NONWC (1 << 16) |
54 | int padded; | 55 | int padded; |
55 | struct Qdisc_ops *ops; | 56 | struct Qdisc_ops *ops; |
56 | struct qdisc_size_table *stab; | 57 | struct qdisc_size_table __rcu *stab; |
57 | struct list_head list; | 58 | struct list_head list; |
58 | u32 handle; | 59 | u32 handle; |
59 | u32 parent; | 60 | u32 parent; |
@@ -349,8 +350,8 @@ extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, | |||
349 | struct Qdisc_ops *ops); | 350 | struct Qdisc_ops *ops); |
350 | extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, | 351 | extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, |
351 | struct Qdisc_ops *ops, u32 parentid); | 352 | struct Qdisc_ops *ops, u32 parentid); |
352 | extern void qdisc_calculate_pkt_len(struct sk_buff *skb, | 353 | extern void __qdisc_calculate_pkt_len(struct sk_buff *skb, |
353 | struct qdisc_size_table *stab); | 354 | const struct qdisc_size_table *stab); |
354 | extern void tcf_destroy(struct tcf_proto *tp); | 355 | extern void tcf_destroy(struct tcf_proto *tp); |
355 | extern void tcf_destroy_chain(struct tcf_proto **fl); | 356 | extern void tcf_destroy_chain(struct tcf_proto **fl); |
356 | 357 | ||
@@ -429,12 +430,20 @@ enum net_xmit_qdisc_t { | |||
429 | #define net_xmit_drop_count(e) (1) | 430 | #define net_xmit_drop_count(e) (1) |
430 | #endif | 431 | #endif |
431 | 432 | ||
432 | static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) | 433 | static inline void qdisc_calculate_pkt_len(struct sk_buff *skb, |
434 | const struct Qdisc *sch) | ||
433 | { | 435 | { |
434 | #ifdef CONFIG_NET_SCHED | 436 | #ifdef CONFIG_NET_SCHED |
435 | if (sch->stab) | 437 | struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab); |
436 | qdisc_calculate_pkt_len(skb, sch->stab); | 438 | |
439 | if (stab) | ||
440 | __qdisc_calculate_pkt_len(skb, stab); | ||
437 | #endif | 441 | #endif |
442 | } | ||
443 | |||
444 | static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) | ||
445 | { | ||
446 | qdisc_calculate_pkt_len(skb, sch); | ||
438 | return sch->enqueue(skb, sch); | 447 | return sch->enqueue(skb, sch); |
439 | } | 448 | } |
440 | 449 | ||
diff --git a/net/core/dev.c b/net/core/dev.c index a4ccd47f3196..2730352d2ccc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -2325,15 +2325,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2325 | struct netdev_queue *txq) | 2325 | struct netdev_queue *txq) |
2326 | { | 2326 | { |
2327 | spinlock_t *root_lock = qdisc_lock(q); | 2327 | spinlock_t *root_lock = qdisc_lock(q); |
2328 | bool contended = qdisc_is_running(q); | 2328 | bool contended; |
2329 | int rc; | 2329 | int rc; |
2330 | 2330 | ||
2331 | qdisc_skb_cb(skb)->pkt_len = skb->len; | ||
2332 | qdisc_calculate_pkt_len(skb, q); | ||
2331 | /* | 2333 | /* |
2332 | * Heuristic to force contended enqueues to serialize on a | 2334 | * Heuristic to force contended enqueues to serialize on a |
2333 | * separate lock before trying to get qdisc main lock. | 2335 | * separate lock before trying to get qdisc main lock. |
2334 | * This permits __QDISC_STATE_RUNNING owner to get the lock more often | 2336 | * This permits __QDISC_STATE_RUNNING owner to get the lock more often |
2335 | * and dequeue packets faster. | 2337 | * and dequeue packets faster. |
2336 | */ | 2338 | */ |
2339 | contended = qdisc_is_running(q); | ||
2337 | if (unlikely(contended)) | 2340 | if (unlikely(contended)) |
2338 | spin_lock(&q->busylock); | 2341 | spin_lock(&q->busylock); |
2339 | 2342 | ||
@@ -2351,7 +2354,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2351 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) | 2354 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) |
2352 | skb_dst_force(skb); | 2355 | skb_dst_force(skb); |
2353 | 2356 | ||
2354 | qdisc_skb_cb(skb)->pkt_len = skb->len; | ||
2355 | qdisc_bstats_update(q, skb); | 2357 | qdisc_bstats_update(q, skb); |
2356 | 2358 | ||
2357 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { | 2359 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { |
@@ -2366,7 +2368,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2366 | rc = NET_XMIT_SUCCESS; | 2368 | rc = NET_XMIT_SUCCESS; |
2367 | } else { | 2369 | } else { |
2368 | skb_dst_force(skb); | 2370 | skb_dst_force(skb); |
2369 | rc = qdisc_enqueue_root(skb, q); | 2371 | rc = q->enqueue(skb, q) & NET_XMIT_MASK; |
2370 | if (qdisc_run_begin(q)) { | 2372 | if (qdisc_run_begin(q)) { |
2371 | if (unlikely(contended)) { | 2373 | if (unlikely(contended)) { |
2372 | spin_unlock(&q->busylock); | 2374 | spin_unlock(&q->busylock); |
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 374fcbef80e8..150741579408 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c | |||
@@ -398,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) | |||
398 | return stab; | 398 | return stab; |
399 | } | 399 | } |
400 | 400 | ||
401 | static void stab_kfree_rcu(struct rcu_head *head) | ||
402 | { | ||
403 | kfree(container_of(head, struct qdisc_size_table, rcu)); | ||
404 | } | ||
405 | |||
401 | void qdisc_put_stab(struct qdisc_size_table *tab) | 406 | void qdisc_put_stab(struct qdisc_size_table *tab) |
402 | { | 407 | { |
403 | if (!tab) | 408 | if (!tab) |
@@ -407,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab) | |||
407 | 412 | ||
408 | if (--tab->refcnt == 0) { | 413 | if (--tab->refcnt == 0) { |
409 | list_del(&tab->list); | 414 | list_del(&tab->list); |
410 | kfree(tab); | 415 | call_rcu_bh(&tab->rcu, stab_kfree_rcu); |
411 | } | 416 | } |
412 | 417 | ||
413 | spin_unlock(&qdisc_stab_lock); | 418 | spin_unlock(&qdisc_stab_lock); |
@@ -430,7 +435,7 @@ nla_put_failure: | |||
430 | return -1; | 435 | return -1; |
431 | } | 436 | } |
432 | 437 | ||
433 | void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) | 438 | void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) |
434 | { | 439 | { |
435 | int pkt_len, slot; | 440 | int pkt_len, slot; |
436 | 441 | ||
@@ -456,7 +461,7 @@ out: | |||
456 | pkt_len = 1; | 461 | pkt_len = 1; |
457 | qdisc_skb_cb(skb)->pkt_len = pkt_len; | 462 | qdisc_skb_cb(skb)->pkt_len = pkt_len; |
458 | } | 463 | } |
459 | EXPORT_SYMBOL(qdisc_calculate_pkt_len); | 464 | EXPORT_SYMBOL(__qdisc_calculate_pkt_len); |
460 | 465 | ||
461 | void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) | 466 | void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) |
462 | { | 467 | { |
@@ -835,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, | |||
835 | err = PTR_ERR(stab); | 840 | err = PTR_ERR(stab); |
836 | goto err_out4; | 841 | goto err_out4; |
837 | } | 842 | } |
838 | sch->stab = stab; | 843 | rcu_assign_pointer(sch->stab, stab); |
839 | } | 844 | } |
840 | if (tca[TCA_RATE]) { | 845 | if (tca[TCA_RATE]) { |
841 | spinlock_t *root_lock; | 846 | spinlock_t *root_lock; |
@@ -875,7 +880,7 @@ err_out4: | |||
875 | * Any broken qdiscs that would require a ops->reset() here? | 880 | * Any broken qdiscs that would require a ops->reset() here? |
876 | * The qdisc was never in action so it shouldn't be necessary. | 881 | * The qdisc was never in action so it shouldn't be necessary. |
877 | */ | 882 | */ |
878 | qdisc_put_stab(sch->stab); | 883 | qdisc_put_stab(rtnl_dereference(sch->stab)); |
879 | if (ops->destroy) | 884 | if (ops->destroy) |
880 | ops->destroy(sch); | 885 | ops->destroy(sch); |
881 | goto err_out3; | 886 | goto err_out3; |
@@ -883,7 +888,7 @@ err_out4: | |||
883 | 888 | ||
884 | static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) | 889 | static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) |
885 | { | 890 | { |
886 | struct qdisc_size_table *stab = NULL; | 891 | struct qdisc_size_table *ostab, *stab = NULL; |
887 | int err = 0; | 892 | int err = 0; |
888 | 893 | ||
889 | if (tca[TCA_OPTIONS]) { | 894 | if (tca[TCA_OPTIONS]) { |
@@ -900,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) | |||
900 | return PTR_ERR(stab); | 905 | return PTR_ERR(stab); |
901 | } | 906 | } |
902 | 907 | ||
903 | qdisc_put_stab(sch->stab); | 908 | ostab = rtnl_dereference(sch->stab); |
904 | sch->stab = stab; | 909 | rcu_assign_pointer(sch->stab, stab); |
910 | qdisc_put_stab(ostab); | ||
905 | 911 | ||
906 | if (tca[TCA_RATE]) { | 912 | if (tca[TCA_RATE]) { |
907 | /* NB: ignores errors from replace_estimator | 913 | /* NB: ignores errors from replace_estimator |
@@ -1180,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, | |||
1180 | struct nlmsghdr *nlh; | 1186 | struct nlmsghdr *nlh; |
1181 | unsigned char *b = skb_tail_pointer(skb); | 1187 | unsigned char *b = skb_tail_pointer(skb); |
1182 | struct gnet_dump d; | 1188 | struct gnet_dump d; |
1189 | struct qdisc_size_table *stab; | ||
1183 | 1190 | ||
1184 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); | 1191 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); |
1185 | tcm = NLMSG_DATA(nlh); | 1192 | tcm = NLMSG_DATA(nlh); |
@@ -1195,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, | |||
1195 | goto nla_put_failure; | 1202 | goto nla_put_failure; |
1196 | q->qstats.qlen = q->q.qlen; | 1203 | q->qstats.qlen = q->q.qlen; |
1197 | 1204 | ||
1198 | if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) | 1205 | stab = rtnl_dereference(q->stab); |
1206 | if (stab && qdisc_dump_stab(skb, stab) < 0) | ||
1199 | goto nla_put_failure; | 1207 | goto nla_put_failure; |
1200 | 1208 | ||
1201 | if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, | 1209 | if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, |
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2f1cb62130da..cc17e794c41e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -632,7 +632,7 @@ void qdisc_destroy(struct Qdisc *qdisc) | |||
632 | #ifdef CONFIG_NET_SCHED | 632 | #ifdef CONFIG_NET_SCHED |
633 | qdisc_list_del(qdisc); | 633 | qdisc_list_del(qdisc); |
634 | 634 | ||
635 | qdisc_put_stab(qdisc->stab); | 635 | qdisc_put_stab(rtnl_dereference(qdisc->stab)); |
636 | #endif | 636 | #endif |
637 | gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); | 637 | gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); |
638 | if (ops->reset) | 638 | if (ops->reset) |