diff options
author | Nishanth Devarajan <ndev2021@gmail.com> | 2018-07-23 10:07:41 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-07-24 17:44:00 -0400 |
commit | aea5f654e6b78a0c976f7a25950155932c77a53f (patch) | |
tree | d67053f9a2ec1a7a8a379e7480626be85a7e4cf2 /net | |
parent | b8f8c8eb408b36ad55dd41a616b3f51998880fb6 (diff) |
net/sched: add skbprio scheduler
Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes packets
according to their skb->priority field. Under congestion, already-enqueued lower
priority packets will be dropped to make space available for higher priority
packets. Skbprio was conceived as a solution for denial-of-service defenses that
need to route packets with different priorities as a means to overcome DoS
attacks.
v5
*Do not reference qdisc_dev(sch)->tx_queue_len for setting limit. Instead set
default sch->limit to 64.
v4
*Drop Documentation/networking/sch_skbprio.txt doc file to move it to tc man
page for Skbprio, in iproute2.
v3
*Drop max_limit parameter in struct skbprio_sched_data and instead use
sch->limit.
*Reference qdisc_dev(sch)->tx_queue_len only once, during initialisation for
qdisc (previously being referenced every time qdisc changes).
*Move qdisc's detailed description from in-code to Documentation/networking.
*When qdisc is saturated, enqueue incoming packet first before dequeueing
lowest priority packet in queue - improves usage of call stack registers.
*Introduce and use overlimit stat to keep track of number of dropped packets.
v2
*Use skb->priority field rather than DS field. Rename queueing discipline as
SKB Priority Queue (previously Gatekeeper Priority Queue).
*Queueing discipline is made classful to expose Skbprio's internal priority
queues.
Signed-off-by: Nishanth Devarajan <ndev2021@gmail.com>
Reviewed-by: Sachin Paryani <sachin.paryani@gmail.com>
Reviewed-by: Cody Doucette <doucette@bu.edu>
Reviewed-by: Michel Machado <michel@digirati.com.br>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/sched/Kconfig | 13 | ||||
-rw-r--r-- | net/sched/Makefile | 1 | ||||
-rw-r--r-- | net/sched/sch_skbprio.c | 320 |
3 files changed, 334 insertions, 0 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index bba71225adbd..e95741388311 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig | |||
@@ -251,6 +251,19 @@ config NET_SCH_MQPRIO | |||
251 | 251 | ||
252 | If unsure, say N. | 252 | If unsure, say N. |
253 | 253 | ||
254 | config NET_SCH_SKBPRIO | ||
255 | tristate "SKB priority queue scheduler (SKBPRIO)" | ||
256 | help | ||
257 | Say Y here if you want to use the SKB priority queue | ||
258 | scheduler. This schedules packets according to skb->priority, | ||
259 | which is useful for request packets in DoS mitigation systems such | ||
260 | as Gatekeeper. | ||
261 | |||
262 | To compile this driver as a module, choose M here: the module will | ||
263 | be called sch_skbprio. | ||
264 | |||
265 | If unsure, say N. | ||
266 | |||
254 | config NET_SCH_CHOKE | 267 | config NET_SCH_CHOKE |
255 | tristate "CHOose and Keep responsive flow scheduler (CHOKE)" | 268 | tristate "CHOose and Keep responsive flow scheduler (CHOKE)" |
256 | help | 269 | help |
diff --git a/net/sched/Makefile b/net/sched/Makefile index 910ec7463a36..f0403f49edcb 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile | |||
@@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o | |||
46 | obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o | 46 | obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o |
47 | obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o | 47 | obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o |
48 | obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o | 48 | obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o |
49 | obj-$(CONFIG_NET_SCH_SKBPRIO) += sch_skbprio.o | ||
49 | obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o | 50 | obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o |
50 | obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o | 51 | obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o |
51 | obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o | 52 | obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o |
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c new file mode 100644 index 000000000000..52c0b6d8f1d7 --- /dev/null +++ b/net/sched/sch_skbprio.c | |||
@@ -0,0 +1,320 @@ | |||
1 | /* | ||
2 | * net/sched/sch_skbprio.c SKB Priority Queue. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Authors: Nishanth Devarajan, <ndev2021@gmail.com> | ||
10 | * Cody Doucette, <doucette@bu.edu> | ||
11 | * original idea by Michel Machado, Cody Doucette, and Qiaobin Fu | ||
12 | */ | ||
13 | |||
14 | #include <linux/string.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/slab.h> | ||
17 | #include <linux/types.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/errno.h> | ||
20 | #include <linux/skbuff.h> | ||
21 | #include <net/pkt_sched.h> | ||
22 | #include <net/sch_generic.h> | ||
23 | #include <net/inet_ecn.h> | ||
24 | |||
25 | /* SKB Priority Queue | ||
26 | * ================================= | ||
27 | * | ||
28 | * Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes | ||
29 | * packets according to their skb->priority field. Under congestion, | ||
30 | * Skbprio drops already-enqueued lower priority packets to make space | ||
31 | * available for higher priority packets; it was conceived as a solution | ||
32 | * for denial-of-service defenses that need to route packets with different | ||
33 | * priorities as a mean to overcome DoS attacks. | ||
34 | */ | ||
35 | |||
36 | struct skbprio_sched_data { | ||
37 | /* Queue state. */ | ||
38 | struct sk_buff_head qdiscs[SKBPRIO_MAX_PRIORITY]; | ||
39 | struct gnet_stats_queue qstats[SKBPRIO_MAX_PRIORITY]; | ||
40 | u16 highest_prio; | ||
41 | u16 lowest_prio; | ||
42 | }; | ||
43 | |||
44 | static u16 calc_new_high_prio(const struct skbprio_sched_data *q) | ||
45 | { | ||
46 | int prio; | ||
47 | |||
48 | for (prio = q->highest_prio - 1; prio >= q->lowest_prio; prio--) { | ||
49 | if (!skb_queue_empty(&q->qdiscs[prio])) | ||
50 | return prio; | ||
51 | } | ||
52 | |||
53 | /* SKB queue is empty, return 0 (default highest priority setting). */ | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | static u16 calc_new_low_prio(const struct skbprio_sched_data *q) | ||
58 | { | ||
59 | int prio; | ||
60 | |||
61 | for (prio = q->lowest_prio + 1; prio <= q->highest_prio; prio++) { | ||
62 | if (!skb_queue_empty(&q->qdiscs[prio])) | ||
63 | return prio; | ||
64 | } | ||
65 | |||
66 | /* SKB queue is empty, return SKBPRIO_MAX_PRIORITY - 1 | ||
67 | * (default lowest priority setting). | ||
68 | */ | ||
69 | return SKBPRIO_MAX_PRIORITY - 1; | ||
70 | } | ||
71 | |||
72 | static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, | ||
73 | struct sk_buff **to_free) | ||
74 | { | ||
75 | const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1; | ||
76 | struct skbprio_sched_data *q = qdisc_priv(sch); | ||
77 | struct sk_buff_head *qdisc; | ||
78 | struct sk_buff_head *lp_qdisc; | ||
79 | struct sk_buff *to_drop; | ||
80 | u16 prio, lp; | ||
81 | |||
82 | /* Obtain the priority of @skb. */ | ||
83 | prio = min(skb->priority, max_priority); | ||
84 | |||
85 | qdisc = &q->qdiscs[prio]; | ||
86 | if (sch->q.qlen < sch->limit) { | ||
87 | __skb_queue_tail(qdisc, skb); | ||
88 | qdisc_qstats_backlog_inc(sch, skb); | ||
89 | q->qstats[prio].backlog += qdisc_pkt_len(skb); | ||
90 | |||
91 | /* Check to update highest and lowest priorities. */ | ||
92 | if (prio > q->highest_prio) | ||
93 | q->highest_prio = prio; | ||
94 | |||
95 | if (prio < q->lowest_prio) | ||
96 | q->lowest_prio = prio; | ||
97 | |||
98 | sch->q.qlen++; | ||
99 | return NET_XMIT_SUCCESS; | ||
100 | } | ||
101 | |||
102 | /* If this packet has the lowest priority, drop it. */ | ||
103 | lp = q->lowest_prio; | ||
104 | if (prio <= lp) { | ||
105 | q->qstats[prio].drops++; | ||
106 | q->qstats[prio].overlimits++; | ||
107 | return qdisc_drop(skb, sch, to_free); | ||
108 | } | ||
109 | |||
110 | __skb_queue_tail(qdisc, skb); | ||
111 | qdisc_qstats_backlog_inc(sch, skb); | ||
112 | q->qstats[prio].backlog += qdisc_pkt_len(skb); | ||
113 | |||
114 | /* Drop the packet at the tail of the lowest priority qdisc. */ | ||
115 | lp_qdisc = &q->qdiscs[lp]; | ||
116 | to_drop = __skb_dequeue_tail(lp_qdisc); | ||
117 | BUG_ON(!to_drop); | ||
118 | qdisc_qstats_backlog_dec(sch, to_drop); | ||
119 | qdisc_drop(to_drop, sch, to_free); | ||
120 | |||
121 | q->qstats[lp].backlog -= qdisc_pkt_len(to_drop); | ||
122 | q->qstats[lp].drops++; | ||
123 | q->qstats[lp].overlimits++; | ||
124 | |||
125 | /* Check to update highest and lowest priorities. */ | ||
126 | if (skb_queue_empty(lp_qdisc)) { | ||
127 | if (q->lowest_prio == q->highest_prio) { | ||
128 | /* The incoming packet is the only packet in queue. */ | ||
129 | BUG_ON(sch->q.qlen != 1); | ||
130 | q->lowest_prio = prio; | ||
131 | q->highest_prio = prio; | ||
132 | } else { | ||
133 | q->lowest_prio = calc_new_low_prio(q); | ||
134 | } | ||
135 | } | ||
136 | |||
137 | if (prio > q->highest_prio) | ||
138 | q->highest_prio = prio; | ||
139 | |||
140 | return NET_XMIT_CN; | ||
141 | } | ||
142 | |||
143 | static struct sk_buff *skbprio_dequeue(struct Qdisc *sch) | ||
144 | { | ||
145 | struct skbprio_sched_data *q = qdisc_priv(sch); | ||
146 | struct sk_buff_head *hpq = &q->qdiscs[q->highest_prio]; | ||
147 | struct sk_buff *skb = __skb_dequeue(hpq); | ||
148 | |||
149 | if (unlikely(!skb)) | ||
150 | return NULL; | ||
151 | |||
152 | sch->q.qlen--; | ||
153 | qdisc_qstats_backlog_dec(sch, skb); | ||
154 | qdisc_bstats_update(sch, skb); | ||
155 | |||
156 | q->qstats[q->highest_prio].backlog -= qdisc_pkt_len(skb); | ||
157 | |||
158 | /* Update highest priority field. */ | ||
159 | if (skb_queue_empty(hpq)) { | ||
160 | if (q->lowest_prio == q->highest_prio) { | ||
161 | BUG_ON(sch->q.qlen); | ||
162 | q->highest_prio = 0; | ||
163 | q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; | ||
164 | } else { | ||
165 | q->highest_prio = calc_new_high_prio(q); | ||
166 | } | ||
167 | } | ||
168 | return skb; | ||
169 | } | ||
170 | |||
171 | static int skbprio_change(struct Qdisc *sch, struct nlattr *opt, | ||
172 | struct netlink_ext_ack *extack) | ||
173 | { | ||
174 | struct tc_skbprio_qopt *ctl = nla_data(opt); | ||
175 | |||
176 | sch->limit = ctl->limit; | ||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | static int skbprio_init(struct Qdisc *sch, struct nlattr *opt, | ||
181 | struct netlink_ext_ack *extack) | ||
182 | { | ||
183 | struct skbprio_sched_data *q = qdisc_priv(sch); | ||
184 | int prio; | ||
185 | |||
186 | /* Initialise all queues, one for each possible priority. */ | ||
187 | for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) | ||
188 | __skb_queue_head_init(&q->qdiscs[prio]); | ||
189 | |||
190 | memset(&q->qstats, 0, sizeof(q->qstats)); | ||
191 | q->highest_prio = 0; | ||
192 | q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; | ||
193 | sch->limit = 64; | ||
194 | if (!opt) | ||
195 | return 0; | ||
196 | |||
197 | return skbprio_change(sch, opt, extack); | ||
198 | } | ||
199 | |||
200 | static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb) | ||
201 | { | ||
202 | struct tc_skbprio_qopt opt; | ||
203 | |||
204 | opt.limit = sch->limit; | ||
205 | |||
206 | if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) | ||
207 | return -1; | ||
208 | |||
209 | return skb->len; | ||
210 | } | ||
211 | |||
212 | static void skbprio_reset(struct Qdisc *sch) | ||
213 | { | ||
214 | struct skbprio_sched_data *q = qdisc_priv(sch); | ||
215 | int prio; | ||
216 | |||
217 | sch->qstats.backlog = 0; | ||
218 | sch->q.qlen = 0; | ||
219 | |||
220 | for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) | ||
221 | __skb_queue_purge(&q->qdiscs[prio]); | ||
222 | |||
223 | memset(&q->qstats, 0, sizeof(q->qstats)); | ||
224 | q->highest_prio = 0; | ||
225 | q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; | ||
226 | } | ||
227 | |||
228 | static void skbprio_destroy(struct Qdisc *sch) | ||
229 | { | ||
230 | struct skbprio_sched_data *q = qdisc_priv(sch); | ||
231 | int prio; | ||
232 | |||
233 | for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) | ||
234 | __skb_queue_purge(&q->qdiscs[prio]); | ||
235 | } | ||
236 | |||
237 | static struct Qdisc *skbprio_leaf(struct Qdisc *sch, unsigned long arg) | ||
238 | { | ||
239 | return NULL; | ||
240 | } | ||
241 | |||
242 | static unsigned long skbprio_find(struct Qdisc *sch, u32 classid) | ||
243 | { | ||
244 | return 0; | ||
245 | } | ||
246 | |||
247 | static int skbprio_dump_class(struct Qdisc *sch, unsigned long cl, | ||
248 | struct sk_buff *skb, struct tcmsg *tcm) | ||
249 | { | ||
250 | tcm->tcm_handle |= TC_H_MIN(cl); | ||
251 | return 0; | ||
252 | } | ||
253 | |||
254 | static int skbprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, | ||
255 | struct gnet_dump *d) | ||
256 | { | ||
257 | struct skbprio_sched_data *q = qdisc_priv(sch); | ||
258 | if (gnet_stats_copy_queue(d, NULL, &q->qstats[cl - 1], | ||
259 | q->qstats[cl - 1].qlen) < 0) | ||
260 | return -1; | ||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) | ||
265 | { | ||
266 | unsigned int i; | ||
267 | |||
268 | if (arg->stop) | ||
269 | return; | ||
270 | |||
271 | for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) { | ||
272 | if (arg->count < arg->skip) { | ||
273 | arg->count++; | ||
274 | continue; | ||
275 | } | ||
276 | if (arg->fn(sch, i + 1, arg) < 0) { | ||
277 | arg->stop = 1; | ||
278 | break; | ||
279 | } | ||
280 | arg->count++; | ||
281 | } | ||
282 | } | ||
283 | |||
284 | static const struct Qdisc_class_ops skbprio_class_ops = { | ||
285 | .leaf = skbprio_leaf, | ||
286 | .find = skbprio_find, | ||
287 | .dump = skbprio_dump_class, | ||
288 | .dump_stats = skbprio_dump_class_stats, | ||
289 | .walk = skbprio_walk, | ||
290 | }; | ||
291 | |||
292 | static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = { | ||
293 | .cl_ops = &skbprio_class_ops, | ||
294 | .id = "skbprio", | ||
295 | .priv_size = sizeof(struct skbprio_sched_data), | ||
296 | .enqueue = skbprio_enqueue, | ||
297 | .dequeue = skbprio_dequeue, | ||
298 | .peek = qdisc_peek_dequeued, | ||
299 | .init = skbprio_init, | ||
300 | .reset = skbprio_reset, | ||
301 | .change = skbprio_change, | ||
302 | .dump = skbprio_dump, | ||
303 | .destroy = skbprio_destroy, | ||
304 | .owner = THIS_MODULE, | ||
305 | }; | ||
306 | |||
307 | static int __init skbprio_module_init(void) | ||
308 | { | ||
309 | return register_qdisc(&skbprio_qdisc_ops); | ||
310 | } | ||
311 | |||
312 | static void __exit skbprio_module_exit(void) | ||
313 | { | ||
314 | unregister_qdisc(&skbprio_qdisc_ops); | ||
315 | } | ||
316 | |||
317 | module_init(skbprio_module_init) | ||
318 | module_exit(skbprio_module_exit) | ||
319 | |||
320 | MODULE_LICENSE("GPL"); | ||