diff options
author | Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 2008-07-20 03:08:47 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-07-20 03:08:47 -0400 |
commit | 175f9c1bba9b825d22b142d183c9e175488b260c (patch) | |
tree | 6b9cdb2e3802e7fc8422dc0d0cc200f6d4d1d0a5 | |
parent | 0abf77e55a2459aa9905be4b226e4729d5b4f0cb (diff) |
net_sched: Add size table for qdiscs
Add size table functions for qdiscs and calculate packet size in
qdisc_enqueue().
Based on patch by Patrick McHardy
http://marc.info/?l=linux-netdev&m=115201979221729&w=2
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/pkt_sched.h | 20 | ||||
-rw-r--r-- | include/linux/rtnetlink.h | 1 | ||||
-rw-r--r-- | include/net/pkt_sched.h | 1 | ||||
-rw-r--r-- | include/net/sch_generic.h | 25 | ||||
-rw-r--r-- | net/sched/sch_api.c | 151 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 1 | ||||
-rw-r--r-- | net/sched/sch_netem.c | 5 |
7 files changed, 199 insertions, 5 deletions
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 87f4e0fa8f27..e5de421ac7b4 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h | |||
@@ -85,6 +85,26 @@ struct tc_ratespec | |||
85 | 85 | ||
86 | #define TC_RTAB_SIZE 1024 | 86 | #define TC_RTAB_SIZE 1024 |
87 | 87 | ||
88 | struct tc_sizespec { | ||
89 | unsigned char cell_log; | ||
90 | unsigned char size_log; | ||
91 | short cell_align; | ||
92 | int overhead; | ||
93 | unsigned int linklayer; | ||
94 | unsigned int mpu; | ||
95 | unsigned int mtu; | ||
96 | unsigned int tsize; | ||
97 | }; | ||
98 | |||
99 | enum { | ||
100 | TCA_STAB_UNSPEC, | ||
101 | TCA_STAB_BASE, | ||
102 | TCA_STAB_DATA, | ||
103 | __TCA_STAB_MAX | ||
104 | }; | ||
105 | |||
106 | #define TCA_STAB_MAX (__TCA_STAB_MAX - 1) | ||
107 | |||
88 | /* FIFO section */ | 108 | /* FIFO section */ |
89 | 109 | ||
90 | struct tc_fifo_qopt | 110 | struct tc_fifo_qopt |
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index b358c704d102..f4d386c191f5 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h | |||
@@ -482,6 +482,7 @@ enum | |||
482 | TCA_RATE, | 482 | TCA_RATE, |
483 | TCA_FCNT, | 483 | TCA_FCNT, |
484 | TCA_STATS2, | 484 | TCA_STATS2, |
485 | TCA_STAB, | ||
485 | __TCA_MAX | 486 | __TCA_MAX |
486 | }; | 487 | }; |
487 | 488 | ||
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index e4e30052e4e2..6affcfaa123e 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h | |||
@@ -83,6 +83,7 @@ extern struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); | |||
83 | extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, | 83 | extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, |
84 | struct nlattr *tab); | 84 | struct nlattr *tab); |
85 | extern void qdisc_put_rtab(struct qdisc_rate_table *tab); | 85 | extern void qdisc_put_rtab(struct qdisc_rate_table *tab); |
86 | extern void qdisc_put_stab(struct qdisc_size_table *tab); | ||
86 | 87 | ||
87 | extern void __qdisc_run(struct Qdisc *q); | 88 | extern void __qdisc_run(struct Qdisc *q); |
88 | 89 | ||
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 8229520e088a..db9ad655eb8a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h | |||
@@ -29,6 +29,13 @@ enum qdisc_state_t | |||
29 | __QDISC_STATE_SCHED, | 29 | __QDISC_STATE_SCHED, |
30 | }; | 30 | }; |
31 | 31 | ||
32 | struct qdisc_size_table { | ||
33 | struct list_head list; | ||
34 | struct tc_sizespec szopts; | ||
35 | int refcnt; | ||
36 | u16 data[]; | ||
37 | }; | ||
38 | |||
32 | struct Qdisc | 39 | struct Qdisc |
33 | { | 40 | { |
34 | int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); | 41 | int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); |
@@ -39,6 +46,7 @@ struct Qdisc | |||
39 | #define TCQ_F_INGRESS 4 | 46 | #define TCQ_F_INGRESS 4 |
40 | int padded; | 47 | int padded; |
41 | struct Qdisc_ops *ops; | 48 | struct Qdisc_ops *ops; |
49 | struct qdisc_size_table *stab; | ||
42 | u32 handle; | 50 | u32 handle; |
43 | u32 parent; | 51 | u32 parent; |
44 | atomic_t refcnt; | 52 | atomic_t refcnt; |
@@ -165,6 +173,16 @@ struct tcf_proto | |||
165 | struct tcf_proto_ops *ops; | 173 | struct tcf_proto_ops *ops; |
166 | }; | 174 | }; |
167 | 175 | ||
176 | struct qdisc_skb_cb { | ||
177 | unsigned int pkt_len; | ||
178 | char data[]; | ||
179 | }; | ||
180 | |||
181 | static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb) | ||
182 | { | ||
183 | return (struct qdisc_skb_cb *)skb->cb; | ||
184 | } | ||
185 | |||
168 | static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc) | 186 | static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc) |
169 | { | 187 | { |
170 | return &qdisc->q.lock; | 188 | return &qdisc->q.lock; |
@@ -257,6 +275,8 @@ extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, | |||
257 | extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, | 275 | extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, |
258 | struct netdev_queue *dev_queue, | 276 | struct netdev_queue *dev_queue, |
259 | struct Qdisc_ops *ops, u32 parentid); | 277 | struct Qdisc_ops *ops, u32 parentid); |
278 | extern void qdisc_calculate_pkt_len(struct sk_buff *skb, | ||
279 | struct qdisc_size_table *stab); | ||
260 | extern void tcf_destroy(struct tcf_proto *tp); | 280 | extern void tcf_destroy(struct tcf_proto *tp); |
261 | extern void tcf_destroy_chain(struct tcf_proto **fl); | 281 | extern void tcf_destroy_chain(struct tcf_proto **fl); |
262 | 282 | ||
@@ -308,16 +328,19 @@ static inline bool qdisc_tx_is_noop(const struct net_device *dev) | |||
308 | 328 | ||
309 | static inline unsigned int qdisc_pkt_len(struct sk_buff *skb) | 329 | static inline unsigned int qdisc_pkt_len(struct sk_buff *skb) |
310 | { | 330 | { |
311 | return skb->len; | 331 | return qdisc_skb_cb(skb)->pkt_len; |
312 | } | 332 | } |
313 | 333 | ||
314 | static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) | 334 | static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
315 | { | 335 | { |
336 | if (sch->stab) | ||
337 | qdisc_calculate_pkt_len(skb, sch->stab); | ||
316 | return sch->enqueue(skb, sch); | 338 | return sch->enqueue(skb, sch); |
317 | } | 339 | } |
318 | 340 | ||
319 | static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch) | 341 | static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch) |
320 | { | 342 | { |
343 | qdisc_skb_cb(skb)->pkt_len = skb->len; | ||
321 | return qdisc_enqueue(skb, sch); | 344 | return qdisc_enqueue(skb, sch); |
322 | } | 345 | } |
323 | 346 | ||
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index fb43731c9860..5219d5f9d754 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c | |||
@@ -286,6 +286,129 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab) | |||
286 | } | 286 | } |
287 | EXPORT_SYMBOL(qdisc_put_rtab); | 287 | EXPORT_SYMBOL(qdisc_put_rtab); |
288 | 288 | ||
289 | static LIST_HEAD(qdisc_stab_list); | ||
290 | static DEFINE_SPINLOCK(qdisc_stab_lock); | ||
291 | |||
292 | static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { | ||
293 | [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, | ||
294 | [TCA_STAB_DATA] = { .type = NLA_BINARY }, | ||
295 | }; | ||
296 | |||
297 | static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) | ||
298 | { | ||
299 | struct nlattr *tb[TCA_STAB_MAX + 1]; | ||
300 | struct qdisc_size_table *stab; | ||
301 | struct tc_sizespec *s; | ||
302 | unsigned int tsize = 0; | ||
303 | u16 *tab = NULL; | ||
304 | int err; | ||
305 | |||
306 | err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy); | ||
307 | if (err < 0) | ||
308 | return ERR_PTR(err); | ||
309 | if (!tb[TCA_STAB_BASE]) | ||
310 | return ERR_PTR(-EINVAL); | ||
311 | |||
312 | s = nla_data(tb[TCA_STAB_BASE]); | ||
313 | |||
314 | if (s->tsize > 0) { | ||
315 | if (!tb[TCA_STAB_DATA]) | ||
316 | return ERR_PTR(-EINVAL); | ||
317 | tab = nla_data(tb[TCA_STAB_DATA]); | ||
318 | tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); | ||
319 | } | ||
320 | |||
321 | if (!s || tsize != s->tsize || (!tab && tsize > 0)) | ||
322 | return ERR_PTR(-EINVAL); | ||
323 | |||
324 | spin_lock(&qdisc_stab_lock); | ||
325 | |||
326 | list_for_each_entry(stab, &qdisc_stab_list, list) { | ||
327 | if (memcmp(&stab->szopts, s, sizeof(*s))) | ||
328 | continue; | ||
329 | if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) | ||
330 | continue; | ||
331 | stab->refcnt++; | ||
332 | spin_unlock(&qdisc_stab_lock); | ||
333 | return stab; | ||
334 | } | ||
335 | |||
336 | spin_unlock(&qdisc_stab_lock); | ||
337 | |||
338 | stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); | ||
339 | if (!stab) | ||
340 | return ERR_PTR(-ENOMEM); | ||
341 | |||
342 | stab->refcnt = 1; | ||
343 | stab->szopts = *s; | ||
344 | if (tsize > 0) | ||
345 | memcpy(stab->data, tab, tsize * sizeof(u16)); | ||
346 | |||
347 | spin_lock(&qdisc_stab_lock); | ||
348 | list_add_tail(&stab->list, &qdisc_stab_list); | ||
349 | spin_unlock(&qdisc_stab_lock); | ||
350 | |||
351 | return stab; | ||
352 | } | ||
353 | |||
354 | void qdisc_put_stab(struct qdisc_size_table *tab) | ||
355 | { | ||
356 | if (!tab) | ||
357 | return; | ||
358 | |||
359 | spin_lock(&qdisc_stab_lock); | ||
360 | |||
361 | if (--tab->refcnt == 0) { | ||
362 | list_del(&tab->list); | ||
363 | kfree(tab); | ||
364 | } | ||
365 | |||
366 | spin_unlock(&qdisc_stab_lock); | ||
367 | } | ||
368 | EXPORT_SYMBOL(qdisc_put_stab); | ||
369 | |||
370 | static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) | ||
371 | { | ||
372 | struct nlattr *nest; | ||
373 | |||
374 | nest = nla_nest_start(skb, TCA_STAB); | ||
375 | NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts); | ||
376 | nla_nest_end(skb, nest); | ||
377 | |||
378 | return skb->len; | ||
379 | |||
380 | nla_put_failure: | ||
381 | return -1; | ||
382 | } | ||
383 | |||
384 | void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) | ||
385 | { | ||
386 | int pkt_len, slot; | ||
387 | |||
388 | pkt_len = skb->len + stab->szopts.overhead; | ||
389 | if (unlikely(!stab->szopts.tsize)) | ||
390 | goto out; | ||
391 | |||
392 | slot = pkt_len + stab->szopts.cell_align; | ||
393 | if (unlikely(slot < 0)) | ||
394 | slot = 0; | ||
395 | |||
396 | slot >>= stab->szopts.cell_log; | ||
397 | if (likely(slot < stab->szopts.tsize)) | ||
398 | pkt_len = stab->data[slot]; | ||
399 | else | ||
400 | pkt_len = stab->data[stab->szopts.tsize - 1] * | ||
401 | (slot / stab->szopts.tsize) + | ||
402 | stab->data[slot % stab->szopts.tsize]; | ||
403 | |||
404 | pkt_len <<= stab->szopts.size_log; | ||
405 | out: | ||
406 | if (unlikely(pkt_len < 1)) | ||
407 | pkt_len = 1; | ||
408 | qdisc_skb_cb(skb)->pkt_len = pkt_len; | ||
409 | } | ||
410 | EXPORT_SYMBOL(qdisc_calculate_pkt_len); | ||
411 | |||
289 | static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) | 412 | static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) |
290 | { | 413 | { |
291 | struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, | 414 | struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, |
@@ -613,6 +736,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, | |||
613 | struct nlattr *kind = tca[TCA_KIND]; | 736 | struct nlattr *kind = tca[TCA_KIND]; |
614 | struct Qdisc *sch; | 737 | struct Qdisc *sch; |
615 | struct Qdisc_ops *ops; | 738 | struct Qdisc_ops *ops; |
739 | struct qdisc_size_table *stab; | ||
616 | 740 | ||
617 | ops = qdisc_lookup_ops(kind); | 741 | ops = qdisc_lookup_ops(kind); |
618 | #ifdef CONFIG_KMOD | 742 | #ifdef CONFIG_KMOD |
@@ -670,6 +794,14 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, | |||
670 | sch->handle = handle; | 794 | sch->handle = handle; |
671 | 795 | ||
672 | if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { | 796 | if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { |
797 | if (tca[TCA_STAB]) { | ||
798 | stab = qdisc_get_stab(tca[TCA_STAB]); | ||
799 | if (IS_ERR(stab)) { | ||
800 | err = PTR_ERR(stab); | ||
801 | goto err_out3; | ||
802 | } | ||
803 | sch->stab = stab; | ||
804 | } | ||
673 | if (tca[TCA_RATE]) { | 805 | if (tca[TCA_RATE]) { |
674 | err = gen_new_estimator(&sch->bstats, &sch->rate_est, | 806 | err = gen_new_estimator(&sch->bstats, &sch->rate_est, |
675 | qdisc_root_lock(sch), | 807 | qdisc_root_lock(sch), |
@@ -691,6 +823,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, | |||
691 | return sch; | 823 | return sch; |
692 | } | 824 | } |
693 | err_out3: | 825 | err_out3: |
826 | qdisc_put_stab(sch->stab); | ||
694 | dev_put(dev); | 827 | dev_put(dev); |
695 | kfree((char *) sch - sch->padded); | 828 | kfree((char *) sch - sch->padded); |
696 | err_out2: | 829 | err_out2: |
@@ -702,15 +835,26 @@ err_out: | |||
702 | 835 | ||
703 | static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) | 836 | static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) |
704 | { | 837 | { |
705 | if (tca[TCA_OPTIONS]) { | 838 | struct qdisc_size_table *stab = NULL; |
706 | int err; | 839 | int err = 0; |
707 | 840 | ||
841 | if (tca[TCA_OPTIONS]) { | ||
708 | if (sch->ops->change == NULL) | 842 | if (sch->ops->change == NULL) |
709 | return -EINVAL; | 843 | return -EINVAL; |
710 | err = sch->ops->change(sch, tca[TCA_OPTIONS]); | 844 | err = sch->ops->change(sch, tca[TCA_OPTIONS]); |
711 | if (err) | 845 | if (err) |
712 | return err; | 846 | return err; |
713 | } | 847 | } |
848 | |||
849 | if (tca[TCA_STAB]) { | ||
850 | stab = qdisc_get_stab(tca[TCA_STAB]); | ||
851 | if (IS_ERR(stab)) | ||
852 | return PTR_ERR(stab); | ||
853 | } | ||
854 | |||
855 | qdisc_put_stab(sch->stab); | ||
856 | sch->stab = stab; | ||
857 | |||
714 | if (tca[TCA_RATE]) | 858 | if (tca[TCA_RATE]) |
715 | gen_replace_estimator(&sch->bstats, &sch->rate_est, | 859 | gen_replace_estimator(&sch->bstats, &sch->rate_est, |
716 | qdisc_root_lock(sch), tca[TCA_RATE]); | 860 | qdisc_root_lock(sch), tca[TCA_RATE]); |
@@ -994,6 +1138,9 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, | |||
994 | goto nla_put_failure; | 1138 | goto nla_put_failure; |
995 | q->qstats.qlen = q->q.qlen; | 1139 | q->qstats.qlen = q->q.qlen; |
996 | 1140 | ||
1141 | if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) | ||
1142 | goto nla_put_failure; | ||
1143 | |||
997 | if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, | 1144 | if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, |
998 | TCA_XSTATS, qdisc_root_lock(q), &d) < 0) | 1145 | TCA_XSTATS, qdisc_root_lock(q), &d) < 0) |
999 | goto nla_put_failure; | 1146 | goto nla_put_failure; |
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 522a41a9f904..27a51f04db49 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -469,6 +469,7 @@ static void __qdisc_destroy(struct rcu_head *head) | |||
469 | struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); | 469 | struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); |
470 | const struct Qdisc_ops *ops = qdisc->ops; | 470 | const struct Qdisc_ops *ops = qdisc->ops; |
471 | 471 | ||
472 | qdisc_put_stab(qdisc->stab); | ||
472 | gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); | 473 | gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); |
473 | if (ops->reset) | 474 | if (ops->reset) |
474 | ops->reset(qdisc); | 475 | ops->reset(qdisc); |
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index ae49be00022f..a59085700678 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c | |||
@@ -84,8 +84,9 @@ struct netem_skb_cb { | |||
84 | 84 | ||
85 | static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) | 85 | static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) |
86 | { | 86 | { |
87 | BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct netem_skb_cb)); | 87 | BUILD_BUG_ON(sizeof(skb->cb) < |
88 | return (struct netem_skb_cb *)skb->cb; | 88 | sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb)); |
89 | return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; | ||
89 | } | 90 | } |
90 | 91 | ||
91 | /* init_crandom - initialize correlated random number generator | 92 | /* init_crandom - initialize correlated random number generator |