diff options
author | Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com> | 2007-06-29 00:04:31 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-07-11 01:16:22 -0400 |
commit | d62733c8e437fdb58325617c4b3331769ba82d70 (patch) | |
tree | fcbef07b66cda79b047092e7d53dfd005046838a | |
parent | f25f4e44808f0f6c9875d94ef1c41ef86c288eb2 (diff) |
[SCHED]: Qdisc changes and sch_rr added for multiqueue
Add the new sch_rr qdisc for multiqueue network device support. Allow
sch_prio and sch_rr to be compiled with or without multiqueue hardware
support.
sch_rr is part of sch_prio, and is referenced from MODULE_ALIAS. This
was done since sch_prio and sch_rr only differ in their dequeue
routine.
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/pkt_sched.h | 9 | ||||
-rw-r--r-- | net/sched/Kconfig | 11 | ||||
-rw-r--r-- | net/sched/sch_prio.c | 129 |
3 files changed, 134 insertions, 15 deletions
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index d10f35338507..268c51599eb8 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h | |||
@@ -101,6 +101,15 @@ struct tc_prio_qopt | |||
101 | __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ | 101 | __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ |
102 | }; | 102 | }; |
103 | 103 | ||
104 | enum | ||
105 | { | ||
106 | TCA_PRIO_UNSPEC, | ||
107 | TCA_PRIO_MQ, | ||
108 | __TCA_PRIO_MAX | ||
109 | }; | ||
110 | |||
111 | #define TCA_PRIO_MAX (__TCA_PRIO_MAX - 1) | ||
112 | |||
104 | /* TBF section */ | 113 | /* TBF section */ |
105 | 114 | ||
106 | struct tc_tbf_qopt | 115 | struct tc_tbf_qopt |
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 475df8449be9..f3217942ca87 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig | |||
@@ -111,6 +111,17 @@ config NET_SCH_PRIO | |||
111 | To compile this code as a module, choose M here: the | 111 | To compile this code as a module, choose M here: the |
112 | module will be called sch_prio. | 112 | module will be called sch_prio. |
113 | 113 | ||
114 | config NET_SCH_RR | ||
115 | tristate "Multi Band Round Robin Queuing (RR)" | ||
116 | select NET_SCH_PRIO | ||
117 | ---help--- | ||
118 | Say Y here if you want to use an n-band round robin packet | ||
119 | scheduler. | ||
120 | |||
121 | The module uses sch_prio for its framework and is aliased as | ||
122 | sch_rr, so it will load sch_prio, although it is referred | ||
123 | to using sch_rr. | ||
124 | |||
114 | config NET_SCH_RED | 125 | config NET_SCH_RED |
115 | tristate "Random Early Detection (RED)" | 126 | tristate "Random Early Detection (RED)" |
116 | ---help--- | 127 | ---help--- |
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 6d7542c26e47..404522046289 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c | |||
@@ -40,9 +40,11 @@ | |||
40 | struct prio_sched_data | 40 | struct prio_sched_data |
41 | { | 41 | { |
42 | int bands; | 42 | int bands; |
43 | int curband; /* for round-robin */ | ||
43 | struct tcf_proto *filter_list; | 44 | struct tcf_proto *filter_list; |
44 | u8 prio2band[TC_PRIO_MAX+1]; | 45 | u8 prio2band[TC_PRIO_MAX+1]; |
45 | struct Qdisc *queues[TCQ_PRIO_BANDS]; | 46 | struct Qdisc *queues[TCQ_PRIO_BANDS]; |
47 | int mq; | ||
46 | }; | 48 | }; |
47 | 49 | ||
48 | 50 | ||
@@ -70,14 +72,17 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) | |||
70 | #endif | 72 | #endif |
71 | if (TC_H_MAJ(band)) | 73 | if (TC_H_MAJ(band)) |
72 | band = 0; | 74 | band = 0; |
73 | return q->queues[q->prio2band[band&TC_PRIO_MAX]]; | 75 | band = q->prio2band[band&TC_PRIO_MAX]; |
76 | goto out; | ||
74 | } | 77 | } |
75 | band = res.classid; | 78 | band = res.classid; |
76 | } | 79 | } |
77 | band = TC_H_MIN(band) - 1; | 80 | band = TC_H_MIN(band) - 1; |
78 | if (band >= q->bands) | 81 | if (band >= q->bands) |
79 | return q->queues[q->prio2band[0]]; | 82 | band = q->prio2band[0]; |
80 | 83 | out: | |
84 | if (q->mq) | ||
85 | skb_set_queue_mapping(skb, band); | ||
81 | return q->queues[band]; | 86 | return q->queues[band]; |
82 | } | 87 | } |
83 | 88 | ||
@@ -144,17 +149,58 @@ prio_dequeue(struct Qdisc* sch) | |||
144 | struct Qdisc *qdisc; | 149 | struct Qdisc *qdisc; |
145 | 150 | ||
146 | for (prio = 0; prio < q->bands; prio++) { | 151 | for (prio = 0; prio < q->bands; prio++) { |
147 | qdisc = q->queues[prio]; | 152 | /* Check if the target subqueue is available before |
148 | skb = qdisc->dequeue(qdisc); | 153 | * pulling an skb. This way we avoid excessive requeues |
149 | if (skb) { | 154 | * for slower queues. |
150 | sch->q.qlen--; | 155 | */ |
151 | return skb; | 156 | if (!netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) { |
157 | qdisc = q->queues[prio]; | ||
158 | skb = qdisc->dequeue(qdisc); | ||
159 | if (skb) { | ||
160 | sch->q.qlen--; | ||
161 | return skb; | ||
162 | } | ||
152 | } | 163 | } |
153 | } | 164 | } |
154 | return NULL; | 165 | return NULL; |
155 | 166 | ||
156 | } | 167 | } |
157 | 168 | ||
169 | static struct sk_buff *rr_dequeue(struct Qdisc* sch) | ||
170 | { | ||
171 | struct sk_buff *skb; | ||
172 | struct prio_sched_data *q = qdisc_priv(sch); | ||
173 | struct Qdisc *qdisc; | ||
174 | int bandcount; | ||
175 | |||
176 | /* Only take one pass through the queues. If nothing is available, | ||
177 | * return nothing. | ||
178 | */ | ||
179 | for (bandcount = 0; bandcount < q->bands; bandcount++) { | ||
180 | /* Check if the target subqueue is available before | ||
181 | * pulling an skb. This way we avoid excessive requeues | ||
182 | * for slower queues. If the queue is stopped, try the | ||
183 | * next queue. | ||
184 | */ | ||
185 | if (!netif_subqueue_stopped(sch->dev, | ||
186 | (q->mq ? q->curband : 0))) { | ||
187 | qdisc = q->queues[q->curband]; | ||
188 | skb = qdisc->dequeue(qdisc); | ||
189 | if (skb) { | ||
190 | sch->q.qlen--; | ||
191 | q->curband++; | ||
192 | if (q->curband >= q->bands) | ||
193 | q->curband = 0; | ||
194 | return skb; | ||
195 | } | ||
196 | } | ||
197 | q->curband++; | ||
198 | if (q->curband >= q->bands) | ||
199 | q->curband = 0; | ||
200 | } | ||
201 | return NULL; | ||
202 | } | ||
203 | |||
158 | static unsigned int prio_drop(struct Qdisc* sch) | 204 | static unsigned int prio_drop(struct Qdisc* sch) |
159 | { | 205 | { |
160 | struct prio_sched_data *q = qdisc_priv(sch); | 206 | struct prio_sched_data *q = qdisc_priv(sch); |
@@ -198,21 +244,41 @@ prio_destroy(struct Qdisc* sch) | |||
198 | static int prio_tune(struct Qdisc *sch, struct rtattr *opt) | 244 | static int prio_tune(struct Qdisc *sch, struct rtattr *opt) |
199 | { | 245 | { |
200 | struct prio_sched_data *q = qdisc_priv(sch); | 246 | struct prio_sched_data *q = qdisc_priv(sch); |
201 | struct tc_prio_qopt *qopt = RTA_DATA(opt); | 247 | struct tc_prio_qopt *qopt; |
248 | struct rtattr *tb[TCA_PRIO_MAX]; | ||
202 | int i; | 249 | int i; |
203 | 250 | ||
204 | if (opt->rta_len < RTA_LENGTH(sizeof(*qopt))) | 251 | if (rtattr_parse_nested_compat(tb, TCA_PRIO_MAX, opt, qopt, |
252 | sizeof(*qopt))) | ||
205 | return -EINVAL; | 253 | return -EINVAL; |
206 | if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) | 254 | q->bands = qopt->bands; |
255 | /* If we're multiqueue, make sure the number of incoming bands | ||
256 | * matches the number of queues on the device we're associating with. | ||
257 | * If the number of bands requested is zero, then set q->bands to | ||
258 | * dev->egress_subqueue_count. | ||
259 | */ | ||
260 | q->mq = RTA_GET_FLAG(tb[TCA_PRIO_MQ - 1]); | ||
261 | if (q->mq) { | ||
262 | if (sch->handle != TC_H_ROOT) | ||
263 | return -EINVAL; | ||
264 | if (netif_is_multiqueue(sch->dev)) { | ||
265 | if (q->bands == 0) | ||
266 | q->bands = sch->dev->egress_subqueue_count; | ||
267 | else if (q->bands != sch->dev->egress_subqueue_count) | ||
268 | return -EINVAL; | ||
269 | } else | ||
270 | return -EOPNOTSUPP; | ||
271 | } | ||
272 | |||
273 | if (q->bands > TCQ_PRIO_BANDS || q->bands < 2) | ||
207 | return -EINVAL; | 274 | return -EINVAL; |
208 | 275 | ||
209 | for (i=0; i<=TC_PRIO_MAX; i++) { | 276 | for (i=0; i<=TC_PRIO_MAX; i++) { |
210 | if (qopt->priomap[i] >= qopt->bands) | 277 | if (qopt->priomap[i] >= q->bands) |
211 | return -EINVAL; | 278 | return -EINVAL; |
212 | } | 279 | } |
213 | 280 | ||
214 | sch_tree_lock(sch); | 281 | sch_tree_lock(sch); |
215 | q->bands = qopt->bands; | ||
216 | memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); | 282 | memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); |
217 | 283 | ||
218 | for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { | 284 | for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { |
@@ -268,11 +334,17 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
268 | { | 334 | { |
269 | struct prio_sched_data *q = qdisc_priv(sch); | 335 | struct prio_sched_data *q = qdisc_priv(sch); |
270 | unsigned char *b = skb_tail_pointer(skb); | 336 | unsigned char *b = skb_tail_pointer(skb); |
337 | struct rtattr *nest; | ||
271 | struct tc_prio_qopt opt; | 338 | struct tc_prio_qopt opt; |
272 | 339 | ||
273 | opt.bands = q->bands; | 340 | opt.bands = q->bands; |
274 | memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); | 341 | memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); |
275 | RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | 342 | |
343 | nest = RTA_NEST_COMPAT(skb, TCA_OPTIONS, sizeof(opt), &opt); | ||
344 | if (q->mq) | ||
345 | RTA_PUT_FLAG(skb, TCA_PRIO_MQ); | ||
346 | RTA_NEST_COMPAT_END(skb, nest); | ||
347 | |||
276 | return skb->len; | 348 | return skb->len; |
277 | 349 | ||
278 | rtattr_failure: | 350 | rtattr_failure: |
@@ -443,17 +515,44 @@ static struct Qdisc_ops prio_qdisc_ops = { | |||
443 | .owner = THIS_MODULE, | 515 | .owner = THIS_MODULE, |
444 | }; | 516 | }; |
445 | 517 | ||
518 | static struct Qdisc_ops rr_qdisc_ops = { | ||
519 | .next = NULL, | ||
520 | .cl_ops = &prio_class_ops, | ||
521 | .id = "rr", | ||
522 | .priv_size = sizeof(struct prio_sched_data), | ||
523 | .enqueue = prio_enqueue, | ||
524 | .dequeue = rr_dequeue, | ||
525 | .requeue = prio_requeue, | ||
526 | .drop = prio_drop, | ||
527 | .init = prio_init, | ||
528 | .reset = prio_reset, | ||
529 | .destroy = prio_destroy, | ||
530 | .change = prio_tune, | ||
531 | .dump = prio_dump, | ||
532 | .owner = THIS_MODULE, | ||
533 | }; | ||
534 | |||
446 | static int __init prio_module_init(void) | 535 | static int __init prio_module_init(void) |
447 | { | 536 | { |
448 | return register_qdisc(&prio_qdisc_ops); | 537 | int err; |
538 | |||
539 | err = register_qdisc(&prio_qdisc_ops); | ||
540 | if (err < 0) | ||
541 | return err; | ||
542 | err = register_qdisc(&rr_qdisc_ops); | ||
543 | if (err < 0) | ||
544 | unregister_qdisc(&prio_qdisc_ops); | ||
545 | return err; | ||
449 | } | 546 | } |
450 | 547 | ||
451 | static void __exit prio_module_exit(void) | 548 | static void __exit prio_module_exit(void) |
452 | { | 549 | { |
453 | unregister_qdisc(&prio_qdisc_ops); | 550 | unregister_qdisc(&prio_qdisc_ops); |
551 | unregister_qdisc(&rr_qdisc_ops); | ||
454 | } | 552 | } |
455 | 553 | ||
456 | module_init(prio_module_init) | 554 | module_init(prio_module_init) |
457 | module_exit(prio_module_exit) | 555 | module_exit(prio_module_exit) |
458 | 556 | ||
459 | MODULE_LICENSE("GPL"); | 557 | MODULE_LICENSE("GPL"); |
558 | MODULE_ALIAS("sch_rr"); | ||