aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@intel.com>2008-09-12 19:29:34 -0400
committerDavid S. Miller <davem@davemloft.net>2008-09-12 19:29:34 -0400
commit92651940ab00dbe64722e908f70d816713d677b7 (patch)
treeb68fdef99784bfa46b67aabaf70c19b0e5e0a144
parent78d15e82754945ee9821fb491b57faf43abfb9d7 (diff)
pkt_sched: Add multiqueue scheduler support
This patch is intended to add a qdisc to support the new tx multiqueue architecture by providing a band for each hardware queue. By doing this it is possible to support a different qdisc per physical hardware queue. This qdisc uses the skb->queue_mapping to select which band to place the traffic onto. It then uses a round robin w/ a check to see if the subqueue is stopped to determine which band to dequeue the packet from. Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/multiqueue.txt47
-rw-r--r--include/linux/pkt_sched.h7
-rw-r--r--net/sched/Kconfig9
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/sch_multiq.c467
5 files changed, 530 insertions, 1 deletions
diff --git a/Documentation/networking/multiqueue.txt b/Documentation/networking/multiqueue.txt
index d391ea631141..5787ee6eca4f 100644
--- a/Documentation/networking/multiqueue.txt
+++ b/Documentation/networking/multiqueue.txt
@@ -24,4 +24,49 @@ netif_{start|stop|wake}_subqueue() functions to manage each queue while the
24device is still operational. netdev->queue_lock is still used when the device 24device is still operational. netdev->queue_lock is still used when the device
25comes online or when it's completely shut down (unregister_netdev(), etc.). 25comes online or when it's completely shut down (unregister_netdev(), etc.).
26 26
27Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com> 27
28Section 2: Qdisc support for multiqueue devices
29
30-----------------------------------------------
31
32Currently two qdiscs support multiqueue devices. The first is the default
33pfifo_fast qdisc. This qdisc supports one qdisc per hardware queue. A new
34round-robin qdisc, sch_multiq also supports multiple hardware queues. The
35qdisc is responsible for classifying the skb's and then directing the skb's to
36bands and queues based on the value in skb->queue_mapping. Use this field in
37the base driver to determine which queue to send the skb to.
38
39sch_multiq has been added for hardware that wishes to avoid unnecessary
40requeuing. It will cycle though the bands and verify that the hardware queue
41associated with the band is not stopped prior to dequeuing a packet.
42
43On qdisc load, the number of bands is based on the number of queues on the
44hardware. Once the association is made, any skb with skb->queue_mapping set,
45will be queued to the band associated with the hardware queue.
46
47
48Section 3: Brief howto using MULTIQ for multiqueue devices
49---------------------------------------------------------------
50
51The userspace command 'tc,' part of the iproute2 package, is used to configure
52qdiscs. To add the MULTIQ qdisc to your network device, assuming the device
53is called eth0, run the following command:
54
55# tc qdisc add dev eth0 root handle 1: multiq
56
57The qdisc will allocate the number of bands to equal the number of queues that
58the device reports, and bring the qdisc online. Assuming eth0 has 4 Tx
59queues, the band mapping would look like:
60
61band 0 => queue 0
62band 1 => queue 1
63band 2 => queue 2
64band 3 => queue 3
65
66Traffic will begin flowing through each queue if your base device has either
67the default simple_tx_hash or a custom netdev->select_queue() defined.
68
69The behavior of tc filters remains the same.
70
71Author: Alexander Duyck <alexander.h.duyck@intel.com>
72Original Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com>
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index e5de421ac7b4..5d921fa91a5b 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -123,6 +123,13 @@ struct tc_prio_qopt
123 __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ 123 __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
124}; 124};
125 125
126/* MULTIQ section */
127
128struct tc_multiq_qopt {
129 __u16 bands; /* Number of bands */
130 __u16 max_bands; /* Maximum number of queues */
131};
132
126/* TBF section */ 133/* TBF section */
127 134
128struct tc_tbf_qopt 135struct tc_tbf_qopt
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 9437b27ff84d..efaa7a75e7f3 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -106,6 +106,15 @@ config NET_SCH_PRIO
106 To compile this code as a module, choose M here: the 106 To compile this code as a module, choose M here: the
107 module will be called sch_prio. 107 module will be called sch_prio.
108 108
109config NET_SCH_MULTIQ
110 tristate "Hardware Multiqueue-aware Multi Band Queuing (MULTIQ)"
111 ---help---
112 Say Y here if you want to use an n-band queue packet scheduler
113 to support devices that have multiple hardware transmit queues.
114
115 To compile this code as a module, choose M here: the
116 module will be called sch_multiq.
117
109config NET_SCH_RED 118config NET_SCH_RED
110 tristate "Random Early Detection (RED)" 119 tristate "Random Early Detection (RED)"
111 ---help--- 120 ---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 1d2b0f7df848..3d9b953f7f62 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
26obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o 26obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
27obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o 27obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
28obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o 28obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
29obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
29obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o 30obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
30obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o 31obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
31obj-$(CONFIG_NET_CLS_U32) += cls_u32.o 32obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
new file mode 100644
index 000000000000..49a8b67ed3b8
--- /dev/null
+++ b/net/sched/sch_multiq.c
@@ -0,0 +1,467 @@
1/*
2 * Copyright (c) 2008, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Author: Alexander Duyck <alexander.h.duyck@intel.com>
18 */
19
20#include <linux/module.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/string.h>
24#include <linux/errno.h>
25#include <linux/skbuff.h>
26#include <net/netlink.h>
27#include <net/pkt_sched.h>
28
29
30struct multiq_sched_data {
31 u16 bands;
32 u16 max_bands;
33 u16 curband;
34 struct tcf_proto *filter_list;
35 struct Qdisc **queues;
36};
37
38
39static struct Qdisc *
40multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
41{
42 struct multiq_sched_data *q = qdisc_priv(sch);
43 u32 band;
44 struct tcf_result res;
45 int err;
46
47 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
48 err = tc_classify(skb, q->filter_list, &res);
49#ifdef CONFIG_NET_CLS_ACT
50 switch (err) {
51 case TC_ACT_STOLEN:
52 case TC_ACT_QUEUED:
53 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
54 case TC_ACT_SHOT:
55 return NULL;
56 }
57#endif
58 band = skb_get_queue_mapping(skb);
59
60 if (band >= q->bands)
61 return q->queues[0];
62
63 return q->queues[band];
64}
65
66static int
67multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
68{
69 struct Qdisc *qdisc;
70 int ret;
71
72 qdisc = multiq_classify(skb, sch, &ret);
73#ifdef CONFIG_NET_CLS_ACT
74 if (qdisc == NULL) {
75
76 if (ret & __NET_XMIT_BYPASS)
77 sch->qstats.drops++;
78 kfree_skb(skb);
79 return ret;
80 }
81#endif
82
83 ret = qdisc_enqueue(skb, qdisc);
84 if (ret == NET_XMIT_SUCCESS) {
85 sch->bstats.bytes += qdisc_pkt_len(skb);
86 sch->bstats.packets++;
87 sch->q.qlen++;
88 return NET_XMIT_SUCCESS;
89 }
90 if (net_xmit_drop_count(ret))
91 sch->qstats.drops++;
92 return ret;
93}
94
95
96static int
97multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
98{
99 struct Qdisc *qdisc;
100 int ret;
101
102 qdisc = multiq_classify(skb, sch, &ret);
103#ifdef CONFIG_NET_CLS_ACT
104 if (qdisc == NULL) {
105 if (ret & __NET_XMIT_BYPASS)
106 sch->qstats.drops++;
107 kfree_skb(skb);
108 return ret;
109 }
110#endif
111
112 ret = qdisc->ops->requeue(skb, qdisc);
113 if (ret == NET_XMIT_SUCCESS) {
114 sch->q.qlen++;
115 sch->qstats.requeues++;
116 return NET_XMIT_SUCCESS;
117 }
118 if (net_xmit_drop_count(ret))
119 sch->qstats.drops++;
120 return ret;
121}
122
123
124static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
125{
126 struct multiq_sched_data *q = qdisc_priv(sch);
127 struct Qdisc *qdisc;
128 struct sk_buff *skb;
129 int band;
130
131 for (band = 0; band < q->bands; band++) {
132 /* cycle through bands to ensure fairness */
133 q->curband++;
134 if (q->curband >= q->bands)
135 q->curband = 0;
136
137 /* Check that target subqueue is available before
138 * pulling an skb to avoid excessive requeues
139 */
140 if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
141 qdisc = q->queues[q->curband];
142 skb = qdisc->dequeue(qdisc);
143 if (skb) {
144 sch->q.qlen--;
145 return skb;
146 }
147 }
148 }
149 return NULL;
150
151}
152
153static unsigned int multiq_drop(struct Qdisc *sch)
154{
155 struct multiq_sched_data *q = qdisc_priv(sch);
156 int band;
157 unsigned int len;
158 struct Qdisc *qdisc;
159
160 for (band = q->bands-1; band >= 0; band--) {
161 qdisc = q->queues[band];
162 if (qdisc->ops->drop) {
163 len = qdisc->ops->drop(qdisc);
164 if (len != 0) {
165 sch->q.qlen--;
166 return len;
167 }
168 }
169 }
170 return 0;
171}
172
173
174static void
175multiq_reset(struct Qdisc *sch)
176{
177 u16 band;
178 struct multiq_sched_data *q = qdisc_priv(sch);
179
180 for (band = 0; band < q->bands; band++)
181 qdisc_reset(q->queues[band]);
182 sch->q.qlen = 0;
183 q->curband = 0;
184}
185
186static void
187multiq_destroy(struct Qdisc *sch)
188{
189 int band;
190 struct multiq_sched_data *q = qdisc_priv(sch);
191
192 tcf_destroy_chain(&q->filter_list);
193 for (band = 0; band < q->bands; band++)
194 qdisc_destroy(q->queues[band]);
195
196 kfree(q->queues);
197}
198
199static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
200{
201 struct multiq_sched_data *q = qdisc_priv(sch);
202 struct tc_multiq_qopt *qopt;
203 int i;
204
205 if (!netif_is_multiqueue(qdisc_dev(sch)))
206 return -EINVAL;
207 if (nla_len(opt) < sizeof(*qopt))
208 return -EINVAL;
209
210 qopt = nla_data(opt);
211
212 qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
213
214 sch_tree_lock(sch);
215 q->bands = qopt->bands;
216 for (i = q->bands; i < q->max_bands; i++) {
217 struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
218 if (child != &noop_qdisc) {
219 qdisc_tree_decrease_qlen(child, child->q.qlen);
220 qdisc_destroy(child);
221 }
222 }
223
224 sch_tree_unlock(sch);
225
226 for (i = 0; i < q->bands; i++) {
227 if (q->queues[i] == &noop_qdisc) {
228 struct Qdisc *child;
229 child = qdisc_create_dflt(qdisc_dev(sch),
230 sch->dev_queue,
231 &pfifo_qdisc_ops,
232 TC_H_MAKE(sch->handle,
233 i + 1));
234 if (child) {
235 sch_tree_lock(sch);
236 child = xchg(&q->queues[i], child);
237
238 if (child != &noop_qdisc) {
239 qdisc_tree_decrease_qlen(child,
240 child->q.qlen);
241 qdisc_destroy(child);
242 }
243 sch_tree_unlock(sch);
244 }
245 }
246 }
247 return 0;
248}
249
250static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
251{
252 struct multiq_sched_data *q = qdisc_priv(sch);
253 int i;
254
255 q->queues = NULL;
256
257 if (opt == NULL)
258 return -EINVAL;
259
260 q->max_bands = qdisc_dev(sch)->num_tx_queues;
261
262 q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL);
263 if (!q->queues)
264 return -ENOBUFS;
265 for (i = 0; i < q->max_bands; i++)
266 q->queues[i] = &noop_qdisc;
267
268 return multiq_tune(sch, opt);
269}
270
271static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
272{
273 struct multiq_sched_data *q = qdisc_priv(sch);
274 unsigned char *b = skb_tail_pointer(skb);
275 struct tc_multiq_qopt opt;
276
277 opt.bands = q->bands;
278 opt.max_bands = q->max_bands;
279
280 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
281
282 return skb->len;
283
284nla_put_failure:
285 nlmsg_trim(skb, b);
286 return -1;
287}
288
289static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
290 struct Qdisc **old)
291{
292 struct multiq_sched_data *q = qdisc_priv(sch);
293 unsigned long band = arg - 1;
294
295 if (band >= q->bands)
296 return -EINVAL;
297
298 if (new == NULL)
299 new = &noop_qdisc;
300
301 sch_tree_lock(sch);
302 *old = q->queues[band];
303 q->queues[band] = new;
304 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
305 qdisc_reset(*old);
306 sch_tree_unlock(sch);
307
308 return 0;
309}
310
311static struct Qdisc *
312multiq_leaf(struct Qdisc *sch, unsigned long arg)
313{
314 struct multiq_sched_data *q = qdisc_priv(sch);
315 unsigned long band = arg - 1;
316
317 if (band >= q->bands)
318 return NULL;
319
320 return q->queues[band];
321}
322
323static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
324{
325 struct multiq_sched_data *q = qdisc_priv(sch);
326 unsigned long band = TC_H_MIN(classid);
327
328 if (band - 1 >= q->bands)
329 return 0;
330 return band;
331}
332
333static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
334 u32 classid)
335{
336 return multiq_get(sch, classid);
337}
338
339
340static void multiq_put(struct Qdisc *q, unsigned long cl)
341{
342 return;
343}
344
345static int multiq_change(struct Qdisc *sch, u32 handle, u32 parent,
346 struct nlattr **tca, unsigned long *arg)
347{
348 unsigned long cl = *arg;
349 struct multiq_sched_data *q = qdisc_priv(sch);
350
351 if (cl - 1 > q->bands)
352 return -ENOENT;
353 return 0;
354}
355
356static int multiq_delete(struct Qdisc *sch, unsigned long cl)
357{
358 struct multiq_sched_data *q = qdisc_priv(sch);
359 if (cl - 1 > q->bands)
360 return -ENOENT;
361 return 0;
362}
363
364
365static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
366 struct sk_buff *skb, struct tcmsg *tcm)
367{
368 struct multiq_sched_data *q = qdisc_priv(sch);
369
370 if (cl - 1 > q->bands)
371 return -ENOENT;
372 tcm->tcm_handle |= TC_H_MIN(cl);
373 if (q->queues[cl-1])
374 tcm->tcm_info = q->queues[cl-1]->handle;
375 return 0;
376}
377
378static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
379 struct gnet_dump *d)
380{
381 struct multiq_sched_data *q = qdisc_priv(sch);
382 struct Qdisc *cl_q;
383
384 cl_q = q->queues[cl - 1];
385 if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 ||
386 gnet_stats_copy_queue(d, &cl_q->qstats) < 0)
387 return -1;
388
389 return 0;
390}
391
392static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
393{
394 struct multiq_sched_data *q = qdisc_priv(sch);
395 int band;
396
397 if (arg->stop)
398 return;
399
400 for (band = 0; band < q->bands; band++) {
401 if (arg->count < arg->skip) {
402 arg->count++;
403 continue;
404 }
405 if (arg->fn(sch, band+1, arg) < 0) {
406 arg->stop = 1;
407 break;
408 }
409 arg->count++;
410 }
411}
412
413static struct tcf_proto **multiq_find_tcf(struct Qdisc *sch, unsigned long cl)
414{
415 struct multiq_sched_data *q = qdisc_priv(sch);
416
417 if (cl)
418 return NULL;
419 return &q->filter_list;
420}
421
422static const struct Qdisc_class_ops multiq_class_ops = {
423 .graft = multiq_graft,
424 .leaf = multiq_leaf,
425 .get = multiq_get,
426 .put = multiq_put,
427 .change = multiq_change,
428 .delete = multiq_delete,
429 .walk = multiq_walk,
430 .tcf_chain = multiq_find_tcf,
431 .bind_tcf = multiq_bind,
432 .unbind_tcf = multiq_put,
433 .dump = multiq_dump_class,
434 .dump_stats = multiq_dump_class_stats,
435};
436
437static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
438 .next = NULL,
439 .cl_ops = &multiq_class_ops,
440 .id = "multiq",
441 .priv_size = sizeof(struct multiq_sched_data),
442 .enqueue = multiq_enqueue,
443 .dequeue = multiq_dequeue,
444 .requeue = multiq_requeue,
445 .drop = multiq_drop,
446 .init = multiq_init,
447 .reset = multiq_reset,
448 .destroy = multiq_destroy,
449 .change = multiq_tune,
450 .dump = multiq_dump,
451 .owner = THIS_MODULE,
452};
453
454static int __init multiq_module_init(void)
455{
456 return register_qdisc(&multiq_qdisc_ops);
457}
458
459static void __exit multiq_module_exit(void)
460{
461 unregister_qdisc(&multiq_qdisc_ops);
462}
463
464module_init(multiq_module_init)
465module_exit(multiq_module_exit)
466
467MODULE_LICENSE("GPL");