aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-01-19 22:48:19 -0500
committerDavid S. Miller <davem@davemloft.net>2011-01-20 19:59:32 -0500
commita2da570d62fcb9e8816f6920e1ec02c706b289fa (patch)
tree9a24b76262c40ec2f1be11e29e224c7d7b20fa26
parentfd245a4adb5288eac37250875f237c40a20a1944 (diff)
net_sched: RCU conversion of stab
This patch converts stab qdisc management to RCU, so that we can perform the qdisc_calculate_pkt_len() call before getting qdisc lock. This shortens the lock's held time in __dev_xmit_skb(). This permits more qdiscs to get TCQ_F_CAN_BYPASS status, avoiding lot of cache misses and so reducing latencies. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Patrick McHardy <kaber@trash.net> CC: Jesper Dangaard Brouer <hawk@diku.dk> CC: Jarek Poplawski <jarkao2@gmail.com> CC: Jamal Hadi Salim <hadi@cyberus.ca> CC: Stephen Hemminger <shemminger@vyatta.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/sch_generic.h21
-rw-r--r--net/core/dev.c8
-rw-r--r--net/sched/sch_api.c26
-rw-r--r--net/sched/sch_generic.c2
4 files changed, 38 insertions, 19 deletions
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index f6345f55041c..d531baa2506a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -36,6 +36,7 @@ enum qdisc___state_t {
36}; 36};
37 37
38struct qdisc_size_table { 38struct qdisc_size_table {
39 struct rcu_head rcu;
39 struct list_head list; 40 struct list_head list;
40 struct tc_sizespec szopts; 41 struct tc_sizespec szopts;
41 int refcnt; 42 int refcnt;
@@ -53,7 +54,7 @@ struct Qdisc {
53#define TCQ_F_WARN_NONWC (1 << 16) 54#define TCQ_F_WARN_NONWC (1 << 16)
54 int padded; 55 int padded;
55 struct Qdisc_ops *ops; 56 struct Qdisc_ops *ops;
56 struct qdisc_size_table *stab; 57 struct qdisc_size_table __rcu *stab;
57 struct list_head list; 58 struct list_head list;
58 u32 handle; 59 u32 handle;
59 u32 parent; 60 u32 parent;
@@ -349,8 +350,8 @@ extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
349 struct Qdisc_ops *ops); 350 struct Qdisc_ops *ops);
350extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, 351extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
351 struct Qdisc_ops *ops, u32 parentid); 352 struct Qdisc_ops *ops, u32 parentid);
352extern void qdisc_calculate_pkt_len(struct sk_buff *skb, 353extern void __qdisc_calculate_pkt_len(struct sk_buff *skb,
353 struct qdisc_size_table *stab); 354 const struct qdisc_size_table *stab);
354extern void tcf_destroy(struct tcf_proto *tp); 355extern void tcf_destroy(struct tcf_proto *tp);
355extern void tcf_destroy_chain(struct tcf_proto **fl); 356extern void tcf_destroy_chain(struct tcf_proto **fl);
356 357
@@ -429,12 +430,20 @@ enum net_xmit_qdisc_t {
429#define net_xmit_drop_count(e) (1) 430#define net_xmit_drop_count(e) (1)
430#endif 431#endif
431 432
432static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) 433static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
434 const struct Qdisc *sch)
433{ 435{
434#ifdef CONFIG_NET_SCHED 436#ifdef CONFIG_NET_SCHED
435 if (sch->stab) 437 struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab);
436 qdisc_calculate_pkt_len(skb, sch->stab); 438
439 if (stab)
440 __qdisc_calculate_pkt_len(skb, stab);
437#endif 441#endif
442}
443
444static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
445{
446 qdisc_calculate_pkt_len(skb, sch);
438 return sch->enqueue(skb, sch); 447 return sch->enqueue(skb, sch);
439} 448}
440 449
diff --git a/net/core/dev.c b/net/core/dev.c
index a4ccd47f3196..2730352d2ccc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2325,15 +2325,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2325 struct netdev_queue *txq) 2325 struct netdev_queue *txq)
2326{ 2326{
2327 spinlock_t *root_lock = qdisc_lock(q); 2327 spinlock_t *root_lock = qdisc_lock(q);
2328 bool contended = qdisc_is_running(q); 2328 bool contended;
2329 int rc; 2329 int rc;
2330 2330
2331 qdisc_skb_cb(skb)->pkt_len = skb->len;
2332 qdisc_calculate_pkt_len(skb, q);
2331 /* 2333 /*
2332 * Heuristic to force contended enqueues to serialize on a 2334 * Heuristic to force contended enqueues to serialize on a
2333 * separate lock before trying to get qdisc main lock. 2335 * separate lock before trying to get qdisc main lock.
2334 * This permits __QDISC_STATE_RUNNING owner to get the lock more often 2336 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2335 * and dequeue packets faster. 2337 * and dequeue packets faster.
2336 */ 2338 */
2339 contended = qdisc_is_running(q);
2337 if (unlikely(contended)) 2340 if (unlikely(contended))
2338 spin_lock(&q->busylock); 2341 spin_lock(&q->busylock);
2339 2342
@@ -2351,7 +2354,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2351 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) 2354 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2352 skb_dst_force(skb); 2355 skb_dst_force(skb);
2353 2356
2354 qdisc_skb_cb(skb)->pkt_len = skb->len;
2355 qdisc_bstats_update(q, skb); 2357 qdisc_bstats_update(q, skb);
2356 2358
2357 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { 2359 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
@@ -2366,7 +2368,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2366 rc = NET_XMIT_SUCCESS; 2368 rc = NET_XMIT_SUCCESS;
2367 } else { 2369 } else {
2368 skb_dst_force(skb); 2370 skb_dst_force(skb);
2369 rc = qdisc_enqueue_root(skb, q); 2371 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2370 if (qdisc_run_begin(q)) { 2372 if (qdisc_run_begin(q)) {
2371 if (unlikely(contended)) { 2373 if (unlikely(contended)) {
2372 spin_unlock(&q->busylock); 2374 spin_unlock(&q->busylock);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 374fcbef80e8..150741579408 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -398,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
398 return stab; 398 return stab;
399} 399}
400 400
401static void stab_kfree_rcu(struct rcu_head *head)
402{
403 kfree(container_of(head, struct qdisc_size_table, rcu));
404}
405
401void qdisc_put_stab(struct qdisc_size_table *tab) 406void qdisc_put_stab(struct qdisc_size_table *tab)
402{ 407{
403 if (!tab) 408 if (!tab)
@@ -407,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
407 412
408 if (--tab->refcnt == 0) { 413 if (--tab->refcnt == 0) {
409 list_del(&tab->list); 414 list_del(&tab->list);
410 kfree(tab); 415 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
411 } 416 }
412 417
413 spin_unlock(&qdisc_stab_lock); 418 spin_unlock(&qdisc_stab_lock);
@@ -430,7 +435,7 @@ nla_put_failure:
430 return -1; 435 return -1;
431} 436}
432 437
433void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) 438void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
434{ 439{
435 int pkt_len, slot; 440 int pkt_len, slot;
436 441
@@ -456,7 +461,7 @@ out:
456 pkt_len = 1; 461 pkt_len = 1;
457 qdisc_skb_cb(skb)->pkt_len = pkt_len; 462 qdisc_skb_cb(skb)->pkt_len = pkt_len;
458} 463}
459EXPORT_SYMBOL(qdisc_calculate_pkt_len); 464EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
460 465
461void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) 466void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
462{ 467{
@@ -835,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
835 err = PTR_ERR(stab); 840 err = PTR_ERR(stab);
836 goto err_out4; 841 goto err_out4;
837 } 842 }
838 sch->stab = stab; 843 rcu_assign_pointer(sch->stab, stab);
839 } 844 }
840 if (tca[TCA_RATE]) { 845 if (tca[TCA_RATE]) {
841 spinlock_t *root_lock; 846 spinlock_t *root_lock;
@@ -875,7 +880,7 @@ err_out4:
875 * Any broken qdiscs that would require a ops->reset() here? 880 * Any broken qdiscs that would require a ops->reset() here?
876 * The qdisc was never in action so it shouldn't be necessary. 881 * The qdisc was never in action so it shouldn't be necessary.
877 */ 882 */
878 qdisc_put_stab(sch->stab); 883 qdisc_put_stab(rtnl_dereference(sch->stab));
879 if (ops->destroy) 884 if (ops->destroy)
880 ops->destroy(sch); 885 ops->destroy(sch);
881 goto err_out3; 886 goto err_out3;
@@ -883,7 +888,7 @@ err_out4:
883 888
884static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) 889static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
885{ 890{
886 struct qdisc_size_table *stab = NULL; 891 struct qdisc_size_table *ostab, *stab = NULL;
887 int err = 0; 892 int err = 0;
888 893
889 if (tca[TCA_OPTIONS]) { 894 if (tca[TCA_OPTIONS]) {
@@ -900,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
900 return PTR_ERR(stab); 905 return PTR_ERR(stab);
901 } 906 }
902 907
903 qdisc_put_stab(sch->stab); 908 ostab = rtnl_dereference(sch->stab);
904 sch->stab = stab; 909 rcu_assign_pointer(sch->stab, stab);
910 qdisc_put_stab(ostab);
905 911
906 if (tca[TCA_RATE]) { 912 if (tca[TCA_RATE]) {
907 /* NB: ignores errors from replace_estimator 913 /* NB: ignores errors from replace_estimator
@@ -1180,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1180 struct nlmsghdr *nlh; 1186 struct nlmsghdr *nlh;
1181 unsigned char *b = skb_tail_pointer(skb); 1187 unsigned char *b = skb_tail_pointer(skb);
1182 struct gnet_dump d; 1188 struct gnet_dump d;
1189 struct qdisc_size_table *stab;
1183 1190
1184 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); 1191 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1185 tcm = NLMSG_DATA(nlh); 1192 tcm = NLMSG_DATA(nlh);
@@ -1195,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1195 goto nla_put_failure; 1202 goto nla_put_failure;
1196 q->qstats.qlen = q->q.qlen; 1203 q->qstats.qlen = q->q.qlen;
1197 1204
1198 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) 1205 stab = rtnl_dereference(q->stab);
1206 if (stab && qdisc_dump_stab(skb, stab) < 0)
1199 goto nla_put_failure; 1207 goto nla_put_failure;
1200 1208
1201 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 1209 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2f1cb62130da..cc17e794c41e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -632,7 +632,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
632#ifdef CONFIG_NET_SCHED 632#ifdef CONFIG_NET_SCHED
633 qdisc_list_del(qdisc); 633 qdisc_list_del(qdisc);
634 634
635 qdisc_put_stab(qdisc->stab); 635 qdisc_put_stab(rtnl_dereference(qdisc->stab));
636#endif 636#endif
637 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); 637 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
638 if (ops->reset) 638 if (ops->reset)