diff options
author | stephen hemminger <shemminger@vyatta.com> | 2011-02-02 10:21:10 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-02-02 23:52:42 -0500 |
commit | 45e144339ac59971eb44be32e1282760aaabe861 (patch) | |
tree | b9d93acb2c1bb505d3b6facc426d307c59dc63a4 /net | |
parent | 119b3d386985fcd477b3131190c041516a73f83a (diff) |
sched: CHOKe flow scheduler
CHOKe ("CHOose and Kill" or "CHOose and Keep") is an alternative
packet scheduler based on the Random Exponential Drop (RED) algorithm.
The core idea is:
For every packet arrival:
Calculate Qave
if (Qave < minth)
Queue the new packet
else
Select randomly a packet from the queue
if (both packets from same flow)
then Drop both the packets
else if (Qave > maxth)
Drop packet
else
Admit packet with proability p (same as RED)
See also:
Rong Pan, Balaji Prabhakar, Konstantinos Psounis, "CHOKe: a stateless active
queue management scheme for approximating fair bandwidth allocation",
Proceeding of INFOCOM'2000, March 2000.
Help from:
Eric Dumazet <eric.dumazet@gmail.com>
Patrick McHardy <kaber@trash.net>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/sched/Kconfig | 11 | ||||
-rw-r--r-- | net/sched/Makefile | 2 | ||||
-rw-r--r-- | net/sched/sch_choke.c | 676 |
3 files changed, 689 insertions, 0 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index e318f458713e..8c19b6e3201e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig | |||
@@ -217,6 +217,17 @@ config NET_SCH_MQPRIO | |||
217 | 217 | ||
218 | If unsure, say N. | 218 | If unsure, say N. |
219 | 219 | ||
220 | config NET_SCH_CHOKE | ||
221 | tristate "CHOose and Keep responsive flow scheduler (CHOKE)" | ||
222 | help | ||
223 | Say Y here if you want to use the CHOKe packet scheduler (CHOose | ||
224 | and Keep for responsive flows, CHOose and Kill for unresponsive | ||
225 | flows). This is a variation of RED which trys to penalize flows | ||
226 | that monopolize the queue. | ||
227 | |||
228 | To compile this code as a module, choose M here: the | ||
229 | module will be called sch_choke. | ||
230 | |||
220 | config NET_SCH_INGRESS | 231 | config NET_SCH_INGRESS |
221 | tristate "Ingress Qdisc" | 232 | tristate "Ingress Qdisc" |
222 | depends on NET_CLS_ACT | 233 | depends on NET_CLS_ACT |
diff --git a/net/sched/Makefile b/net/sched/Makefile index 26ce681a2c60..06c6cdfd1948 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile | |||
@@ -33,6 +33,8 @@ obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o | |||
33 | obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o | 33 | obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o |
34 | obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o | 34 | obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o |
35 | obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o | 35 | obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o |
36 | obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o | ||
37 | |||
36 | obj-$(CONFIG_NET_CLS_U32) += cls_u32.o | 38 | obj-$(CONFIG_NET_CLS_U32) += cls_u32.o |
37 | obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o | 39 | obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o |
38 | obj-$(CONFIG_NET_CLS_FW) += cls_fw.o | 40 | obj-$(CONFIG_NET_CLS_FW) += cls_fw.o |
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c new file mode 100644 index 000000000000..a1cec18dd49c --- /dev/null +++ b/net/sched/sch_choke.c | |||
@@ -0,0 +1,676 @@ | |||
1 | /* | ||
2 | * net/sched/sch_choke.c CHOKE scheduler | ||
3 | * | ||
4 | * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com> | ||
5 | * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * version 2 as published by the Free Software Foundation. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <linux/reciprocal_div.h> | ||
18 | #include <net/pkt_sched.h> | ||
19 | #include <net/inet_ecn.h> | ||
20 | #include <net/red.h> | ||
21 | #include <linux/ip.h> | ||
22 | #include <net/ip.h> | ||
23 | #include <linux/ipv6.h> | ||
24 | #include <net/ipv6.h> | ||
25 | |||
26 | /* | ||
27 | CHOKe stateless AQM for fair bandwidth allocation | ||
28 | ================================================= | ||
29 | |||
30 | CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for | ||
31 | unresponsive flows) is a variant of RED that penalizes misbehaving flows but | ||
32 | maintains no flow state. The difference from RED is an additional step | ||
33 | during the enqueuing process. If average queue size is over the | ||
34 | low threshold (qmin), a packet is chosen at random from the queue. | ||
35 | If both the new and chosen packet are from the same flow, both | ||
36 | are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it | ||
37 | needs to access packets in queue randomly. It has a minimal class | ||
38 | interface to allow overriding the builtin flow classifier with | ||
39 | filters. | ||
40 | |||
41 | Source: | ||
42 | R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless | ||
43 | Active Queue Management Scheme for Approximating Fair Bandwidth Allocation", | ||
44 | IEEE INFOCOM, 2000. | ||
45 | |||
46 | A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial | ||
47 | Characteristics", IEEE/ACM Transactions on Networking, 2004 | ||
48 | |||
49 | */ | ||
50 | |||
51 | /* Upper bound on size of sk_buff table (packets) */ | ||
52 | #define CHOKE_MAX_QUEUE (128*1024 - 1) | ||
53 | |||
54 | struct choke_sched_data { | ||
55 | /* Parameters */ | ||
56 | u32 limit; | ||
57 | unsigned char flags; | ||
58 | |||
59 | struct red_parms parms; | ||
60 | |||
61 | /* Variables */ | ||
62 | struct tcf_proto *filter_list; | ||
63 | struct { | ||
64 | u32 prob_drop; /* Early probability drops */ | ||
65 | u32 prob_mark; /* Early probability marks */ | ||
66 | u32 forced_drop; /* Forced drops, qavg > max_thresh */ | ||
67 | u32 forced_mark; /* Forced marks, qavg > max_thresh */ | ||
68 | u32 pdrop; /* Drops due to queue limits */ | ||
69 | u32 other; /* Drops due to drop() calls */ | ||
70 | u32 matched; /* Drops to flow match */ | ||
71 | } stats; | ||
72 | |||
73 | unsigned int head; | ||
74 | unsigned int tail; | ||
75 | |||
76 | unsigned int tab_mask; /* size - 1 */ | ||
77 | |||
78 | struct sk_buff **tab; | ||
79 | }; | ||
80 | |||
81 | /* deliver a random number between 0 and N - 1 */ | ||
82 | static u32 random_N(unsigned int N) | ||
83 | { | ||
84 | return reciprocal_divide(random32(), N); | ||
85 | } | ||
86 | |||
87 | /* number of elements in queue including holes */ | ||
88 | static unsigned int choke_len(const struct choke_sched_data *q) | ||
89 | { | ||
90 | return (q->tail - q->head) & q->tab_mask; | ||
91 | } | ||
92 | |||
93 | /* Is ECN parameter configured */ | ||
94 | static int use_ecn(const struct choke_sched_data *q) | ||
95 | { | ||
96 | return q->flags & TC_RED_ECN; | ||
97 | } | ||
98 | |||
99 | /* Should packets over max just be dropped (versus marked) */ | ||
100 | static int use_harddrop(const struct choke_sched_data *q) | ||
101 | { | ||
102 | return q->flags & TC_RED_HARDDROP; | ||
103 | } | ||
104 | |||
105 | /* Move head pointer forward to skip over holes */ | ||
106 | static void choke_zap_head_holes(struct choke_sched_data *q) | ||
107 | { | ||
108 | do { | ||
109 | q->head = (q->head + 1) & q->tab_mask; | ||
110 | if (q->head == q->tail) | ||
111 | break; | ||
112 | } while (q->tab[q->head] == NULL); | ||
113 | } | ||
114 | |||
115 | /* Move tail pointer backwards to reuse holes */ | ||
116 | static void choke_zap_tail_holes(struct choke_sched_data *q) | ||
117 | { | ||
118 | do { | ||
119 | q->tail = (q->tail - 1) & q->tab_mask; | ||
120 | if (q->head == q->tail) | ||
121 | break; | ||
122 | } while (q->tab[q->tail] == NULL); | ||
123 | } | ||
124 | |||
125 | /* Drop packet from queue array by creating a "hole" */ | ||
126 | static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) | ||
127 | { | ||
128 | struct choke_sched_data *q = qdisc_priv(sch); | ||
129 | struct sk_buff *skb = q->tab[idx]; | ||
130 | |||
131 | q->tab[idx] = NULL; | ||
132 | |||
133 | if (idx == q->head) | ||
134 | choke_zap_head_holes(q); | ||
135 | if (idx == q->tail) | ||
136 | choke_zap_tail_holes(q); | ||
137 | |||
138 | sch->qstats.backlog -= qdisc_pkt_len(skb); | ||
139 | qdisc_drop(skb, sch); | ||
140 | qdisc_tree_decrease_qlen(sch, 1); | ||
141 | --sch->q.qlen; | ||
142 | } | ||
143 | |||
144 | /* | ||
145 | * Compare flow of two packets | ||
146 | * Returns true only if source and destination address and port match. | ||
147 | * false for special cases | ||
148 | */ | ||
149 | static bool choke_match_flow(struct sk_buff *skb1, | ||
150 | struct sk_buff *skb2) | ||
151 | { | ||
152 | int off1, off2, poff; | ||
153 | const u32 *ports1, *ports2; | ||
154 | u8 ip_proto; | ||
155 | __u32 hash1; | ||
156 | |||
157 | if (skb1->protocol != skb2->protocol) | ||
158 | return false; | ||
159 | |||
160 | /* Use hash value as quick check | ||
161 | * Assumes that __skb_get_rxhash makes IP header and ports linear | ||
162 | */ | ||
163 | hash1 = skb_get_rxhash(skb1); | ||
164 | if (!hash1 || hash1 != skb_get_rxhash(skb2)) | ||
165 | return false; | ||
166 | |||
167 | /* Probably match, but be sure to avoid hash collisions */ | ||
168 | off1 = skb_network_offset(skb1); | ||
169 | off2 = skb_network_offset(skb2); | ||
170 | |||
171 | switch (skb1->protocol) { | ||
172 | case __constant_htons(ETH_P_IP): { | ||
173 | const struct iphdr *ip1, *ip2; | ||
174 | |||
175 | ip1 = (const struct iphdr *) (skb1->data + off1); | ||
176 | ip2 = (const struct iphdr *) (skb2->data + off2); | ||
177 | |||
178 | ip_proto = ip1->protocol; | ||
179 | if (ip_proto != ip2->protocol || | ||
180 | ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr) | ||
181 | return false; | ||
182 | |||
183 | if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET)) | ||
184 | ip_proto = 0; | ||
185 | off1 += ip1->ihl * 4; | ||
186 | off2 += ip2->ihl * 4; | ||
187 | break; | ||
188 | } | ||
189 | |||
190 | case __constant_htons(ETH_P_IPV6): { | ||
191 | const struct ipv6hdr *ip1, *ip2; | ||
192 | |||
193 | ip1 = (const struct ipv6hdr *) (skb1->data + off1); | ||
194 | ip2 = (const struct ipv6hdr *) (skb2->data + off2); | ||
195 | |||
196 | ip_proto = ip1->nexthdr; | ||
197 | if (ip_proto != ip2->nexthdr || | ||
198 | ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) || | ||
199 | ipv6_addr_cmp(&ip1->daddr, &ip2->daddr)) | ||
200 | return false; | ||
201 | off1 += 40; | ||
202 | off2 += 40; | ||
203 | } | ||
204 | |||
205 | default: /* Maybe compare MAC header here? */ | ||
206 | return false; | ||
207 | } | ||
208 | |||
209 | poff = proto_ports_offset(ip_proto); | ||
210 | if (poff < 0) | ||
211 | return true; | ||
212 | |||
213 | off1 += poff; | ||
214 | off2 += poff; | ||
215 | |||
216 | ports1 = (__force u32 *)(skb1->data + off1); | ||
217 | ports2 = (__force u32 *)(skb2->data + off2); | ||
218 | return *ports1 == *ports2; | ||
219 | } | ||
220 | |||
221 | static inline void choke_set_classid(struct sk_buff *skb, u16 classid) | ||
222 | { | ||
223 | *(unsigned int *)(qdisc_skb_cb(skb)->data) = classid; | ||
224 | } | ||
225 | |||
226 | static u16 choke_get_classid(const struct sk_buff *skb) | ||
227 | { | ||
228 | return *(unsigned int *)(qdisc_skb_cb(skb)->data); | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * Classify flow using either: | ||
233 | * 1. pre-existing classification result in skb | ||
234 | * 2. fast internal classification | ||
235 | * 3. use TC filter based classification | ||
236 | */ | ||
237 | static bool choke_classify(struct sk_buff *skb, | ||
238 | struct Qdisc *sch, int *qerr) | ||
239 | |||
240 | { | ||
241 | struct choke_sched_data *q = qdisc_priv(sch); | ||
242 | struct tcf_result res; | ||
243 | int result; | ||
244 | |||
245 | result = tc_classify(skb, q->filter_list, &res); | ||
246 | if (result >= 0) { | ||
247 | #ifdef CONFIG_NET_CLS_ACT | ||
248 | switch (result) { | ||
249 | case TC_ACT_STOLEN: | ||
250 | case TC_ACT_QUEUED: | ||
251 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; | ||
252 | case TC_ACT_SHOT: | ||
253 | return false; | ||
254 | } | ||
255 | #endif | ||
256 | choke_set_classid(skb, TC_H_MIN(res.classid)); | ||
257 | return true; | ||
258 | } | ||
259 | |||
260 | return false; | ||
261 | } | ||
262 | |||
263 | /* | ||
264 | * Select a packet at random from queue | ||
265 | * HACK: since queue can have holes from previous deletion; retry several | ||
266 | * times to find a random skb but then just give up and return the head | ||
267 | * Will return NULL if queue is empty (q->head == q->tail) | ||
268 | */ | ||
269 | static struct sk_buff *choke_peek_random(const struct choke_sched_data *q, | ||
270 | unsigned int *pidx) | ||
271 | { | ||
272 | struct sk_buff *skb; | ||
273 | int retrys = 3; | ||
274 | |||
275 | do { | ||
276 | *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask; | ||
277 | skb = q->tab[*pidx]; | ||
278 | if (skb) | ||
279 | return skb; | ||
280 | } while (--retrys > 0); | ||
281 | |||
282 | return q->tab[*pidx = q->head]; | ||
283 | } | ||
284 | |||
285 | /* | ||
286 | * Compare new packet with random packet in queue | ||
287 | * returns true if matched and sets *pidx | ||
288 | */ | ||
289 | static bool choke_match_random(const struct choke_sched_data *q, | ||
290 | struct sk_buff *nskb, | ||
291 | unsigned int *pidx) | ||
292 | { | ||
293 | struct sk_buff *oskb; | ||
294 | |||
295 | if (q->head == q->tail) | ||
296 | return false; | ||
297 | |||
298 | oskb = choke_peek_random(q, pidx); | ||
299 | if (q->filter_list) | ||
300 | return choke_get_classid(nskb) == choke_get_classid(oskb); | ||
301 | |||
302 | return choke_match_flow(oskb, nskb); | ||
303 | } | ||
304 | |||
305 | static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) | ||
306 | { | ||
307 | struct choke_sched_data *q = qdisc_priv(sch); | ||
308 | struct red_parms *p = &q->parms; | ||
309 | int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | ||
310 | |||
311 | if (q->filter_list) { | ||
312 | /* If using external classifiers, get result and record it. */ | ||
313 | if (!choke_classify(skb, sch, &ret)) | ||
314 | goto other_drop; /* Packet was eaten by filter */ | ||
315 | } | ||
316 | |||
317 | /* Compute average queue usage (see RED) */ | ||
318 | p->qavg = red_calc_qavg(p, sch->q.qlen); | ||
319 | if (red_is_idling(p)) | ||
320 | red_end_of_idle_period(p); | ||
321 | |||
322 | /* Is queue small? */ | ||
323 | if (p->qavg <= p->qth_min) | ||
324 | p->qcount = -1; | ||
325 | else { | ||
326 | unsigned int idx; | ||
327 | |||
328 | /* Draw a packet at random from queue and compare flow */ | ||
329 | if (choke_match_random(q, skb, &idx)) { | ||
330 | q->stats.matched++; | ||
331 | choke_drop_by_idx(sch, idx); | ||
332 | goto congestion_drop; | ||
333 | } | ||
334 | |||
335 | /* Queue is large, always mark/drop */ | ||
336 | if (p->qavg > p->qth_max) { | ||
337 | p->qcount = -1; | ||
338 | |||
339 | sch->qstats.overlimits++; | ||
340 | if (use_harddrop(q) || !use_ecn(q) || | ||
341 | !INET_ECN_set_ce(skb)) { | ||
342 | q->stats.forced_drop++; | ||
343 | goto congestion_drop; | ||
344 | } | ||
345 | |||
346 | q->stats.forced_mark++; | ||
347 | } else if (++p->qcount) { | ||
348 | if (red_mark_probability(p, p->qavg)) { | ||
349 | p->qcount = 0; | ||
350 | p->qR = red_random(p); | ||
351 | |||
352 | sch->qstats.overlimits++; | ||
353 | if (!use_ecn(q) || !INET_ECN_set_ce(skb)) { | ||
354 | q->stats.prob_drop++; | ||
355 | goto congestion_drop; | ||
356 | } | ||
357 | |||
358 | q->stats.prob_mark++; | ||
359 | } | ||
360 | } else | ||
361 | p->qR = red_random(p); | ||
362 | } | ||
363 | |||
364 | /* Admit new packet */ | ||
365 | if (sch->q.qlen < q->limit) { | ||
366 | q->tab[q->tail] = skb; | ||
367 | q->tail = (q->tail + 1) & q->tab_mask; | ||
368 | ++sch->q.qlen; | ||
369 | sch->qstats.backlog += qdisc_pkt_len(skb); | ||
370 | return NET_XMIT_SUCCESS; | ||
371 | } | ||
372 | |||
373 | q->stats.pdrop++; | ||
374 | sch->qstats.drops++; | ||
375 | kfree_skb(skb); | ||
376 | return NET_XMIT_DROP; | ||
377 | |||
378 | congestion_drop: | ||
379 | qdisc_drop(skb, sch); | ||
380 | return NET_XMIT_CN; | ||
381 | |||
382 | other_drop: | ||
383 | if (ret & __NET_XMIT_BYPASS) | ||
384 | sch->qstats.drops++; | ||
385 | kfree_skb(skb); | ||
386 | return ret; | ||
387 | } | ||
388 | |||
389 | static struct sk_buff *choke_dequeue(struct Qdisc *sch) | ||
390 | { | ||
391 | struct choke_sched_data *q = qdisc_priv(sch); | ||
392 | struct sk_buff *skb; | ||
393 | |||
394 | if (q->head == q->tail) { | ||
395 | if (!red_is_idling(&q->parms)) | ||
396 | red_start_of_idle_period(&q->parms); | ||
397 | return NULL; | ||
398 | } | ||
399 | |||
400 | skb = q->tab[q->head]; | ||
401 | q->tab[q->head] = NULL; | ||
402 | choke_zap_head_holes(q); | ||
403 | --sch->q.qlen; | ||
404 | sch->qstats.backlog -= qdisc_pkt_len(skb); | ||
405 | qdisc_bstats_update(sch, skb); | ||
406 | |||
407 | return skb; | ||
408 | } | ||
409 | |||
410 | static unsigned int choke_drop(struct Qdisc *sch) | ||
411 | { | ||
412 | struct choke_sched_data *q = qdisc_priv(sch); | ||
413 | unsigned int len; | ||
414 | |||
415 | len = qdisc_queue_drop(sch); | ||
416 | if (len > 0) | ||
417 | q->stats.other++; | ||
418 | else { | ||
419 | if (!red_is_idling(&q->parms)) | ||
420 | red_start_of_idle_period(&q->parms); | ||
421 | } | ||
422 | |||
423 | return len; | ||
424 | } | ||
425 | |||
426 | static void choke_reset(struct Qdisc *sch) | ||
427 | { | ||
428 | struct choke_sched_data *q = qdisc_priv(sch); | ||
429 | |||
430 | red_restart(&q->parms); | ||
431 | } | ||
432 | |||
433 | static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = { | ||
434 | [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) }, | ||
435 | [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE }, | ||
436 | }; | ||
437 | |||
438 | |||
439 | static void choke_free(void *addr) | ||
440 | { | ||
441 | if (addr) { | ||
442 | if (is_vmalloc_addr(addr)) | ||
443 | vfree(addr); | ||
444 | else | ||
445 | kfree(addr); | ||
446 | } | ||
447 | } | ||
448 | |||
449 | static int choke_change(struct Qdisc *sch, struct nlattr *opt) | ||
450 | { | ||
451 | struct choke_sched_data *q = qdisc_priv(sch); | ||
452 | struct nlattr *tb[TCA_CHOKE_MAX + 1]; | ||
453 | const struct tc_red_qopt *ctl; | ||
454 | int err; | ||
455 | struct sk_buff **old = NULL; | ||
456 | unsigned int mask; | ||
457 | |||
458 | if (opt == NULL) | ||
459 | return -EINVAL; | ||
460 | |||
461 | err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy); | ||
462 | if (err < 0) | ||
463 | return err; | ||
464 | |||
465 | if (tb[TCA_CHOKE_PARMS] == NULL || | ||
466 | tb[TCA_CHOKE_STAB] == NULL) | ||
467 | return -EINVAL; | ||
468 | |||
469 | ctl = nla_data(tb[TCA_CHOKE_PARMS]); | ||
470 | |||
471 | if (ctl->limit > CHOKE_MAX_QUEUE) | ||
472 | return -EINVAL; | ||
473 | |||
474 | mask = roundup_pow_of_two(ctl->limit + 1) - 1; | ||
475 | if (mask != q->tab_mask) { | ||
476 | struct sk_buff **ntab; | ||
477 | |||
478 | ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL); | ||
479 | if (!ntab) | ||
480 | ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *)); | ||
481 | if (!ntab) | ||
482 | return -ENOMEM; | ||
483 | |||
484 | sch_tree_lock(sch); | ||
485 | old = q->tab; | ||
486 | if (old) { | ||
487 | unsigned int oqlen = sch->q.qlen, tail = 0; | ||
488 | |||
489 | while (q->head != q->tail) { | ||
490 | struct sk_buff *skb = q->tab[q->head]; | ||
491 | |||
492 | q->head = (q->head + 1) & q->tab_mask; | ||
493 | if (!skb) | ||
494 | continue; | ||
495 | if (tail < mask) { | ||
496 | ntab[tail++] = skb; | ||
497 | continue; | ||
498 | } | ||
499 | sch->qstats.backlog -= qdisc_pkt_len(skb); | ||
500 | --sch->q.qlen; | ||
501 | qdisc_drop(skb, sch); | ||
502 | } | ||
503 | qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen); | ||
504 | q->head = 0; | ||
505 | q->tail = tail; | ||
506 | } | ||
507 | |||
508 | q->tab_mask = mask; | ||
509 | q->tab = ntab; | ||
510 | } else | ||
511 | sch_tree_lock(sch); | ||
512 | |||
513 | q->flags = ctl->flags; | ||
514 | q->limit = ctl->limit; | ||
515 | |||
516 | red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, | ||
517 | ctl->Plog, ctl->Scell_log, | ||
518 | nla_data(tb[TCA_CHOKE_STAB])); | ||
519 | |||
520 | if (q->head == q->tail) | ||
521 | red_end_of_idle_period(&q->parms); | ||
522 | |||
523 | sch_tree_unlock(sch); | ||
524 | choke_free(old); | ||
525 | return 0; | ||
526 | } | ||
527 | |||
528 | static int choke_init(struct Qdisc *sch, struct nlattr *opt) | ||
529 | { | ||
530 | return choke_change(sch, opt); | ||
531 | } | ||
532 | |||
533 | static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) | ||
534 | { | ||
535 | struct choke_sched_data *q = qdisc_priv(sch); | ||
536 | struct nlattr *opts = NULL; | ||
537 | struct tc_red_qopt opt = { | ||
538 | .limit = q->limit, | ||
539 | .flags = q->flags, | ||
540 | .qth_min = q->parms.qth_min >> q->parms.Wlog, | ||
541 | .qth_max = q->parms.qth_max >> q->parms.Wlog, | ||
542 | .Wlog = q->parms.Wlog, | ||
543 | .Plog = q->parms.Plog, | ||
544 | .Scell_log = q->parms.Scell_log, | ||
545 | }; | ||
546 | |||
547 | opts = nla_nest_start(skb, TCA_OPTIONS); | ||
548 | if (opts == NULL) | ||
549 | goto nla_put_failure; | ||
550 | |||
551 | NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt); | ||
552 | return nla_nest_end(skb, opts); | ||
553 | |||
554 | nla_put_failure: | ||
555 | nla_nest_cancel(skb, opts); | ||
556 | return -EMSGSIZE; | ||
557 | } | ||
558 | |||
559 | static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | ||
560 | { | ||
561 | struct choke_sched_data *q = qdisc_priv(sch); | ||
562 | struct tc_choke_xstats st = { | ||
563 | .early = q->stats.prob_drop + q->stats.forced_drop, | ||
564 | .marked = q->stats.prob_mark + q->stats.forced_mark, | ||
565 | .pdrop = q->stats.pdrop, | ||
566 | .other = q->stats.other, | ||
567 | .matched = q->stats.matched, | ||
568 | }; | ||
569 | |||
570 | return gnet_stats_copy_app(d, &st, sizeof(st)); | ||
571 | } | ||
572 | |||
573 | static void choke_destroy(struct Qdisc *sch) | ||
574 | { | ||
575 | struct choke_sched_data *q = qdisc_priv(sch); | ||
576 | |||
577 | tcf_destroy_chain(&q->filter_list); | ||
578 | choke_free(q->tab); | ||
579 | } | ||
580 | |||
581 | static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg) | ||
582 | { | ||
583 | return NULL; | ||
584 | } | ||
585 | |||
586 | static unsigned long choke_get(struct Qdisc *sch, u32 classid) | ||
587 | { | ||
588 | return 0; | ||
589 | } | ||
590 | |||
591 | static void choke_put(struct Qdisc *q, unsigned long cl) | ||
592 | { | ||
593 | } | ||
594 | |||
595 | static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent, | ||
596 | u32 classid) | ||
597 | { | ||
598 | return 0; | ||
599 | } | ||
600 | |||
601 | static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl) | ||
602 | { | ||
603 | struct choke_sched_data *q = qdisc_priv(sch); | ||
604 | |||
605 | if (cl) | ||
606 | return NULL; | ||
607 | return &q->filter_list; | ||
608 | } | ||
609 | |||
610 | static int choke_dump_class(struct Qdisc *sch, unsigned long cl, | ||
611 | struct sk_buff *skb, struct tcmsg *tcm) | ||
612 | { | ||
613 | tcm->tcm_handle |= TC_H_MIN(cl); | ||
614 | return 0; | ||
615 | } | ||
616 | |||
617 | static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg) | ||
618 | { | ||
619 | if (!arg->stop) { | ||
620 | if (arg->fn(sch, 1, arg) < 0) { | ||
621 | arg->stop = 1; | ||
622 | return; | ||
623 | } | ||
624 | arg->count++; | ||
625 | } | ||
626 | } | ||
627 | |||
628 | static const struct Qdisc_class_ops choke_class_ops = { | ||
629 | .leaf = choke_leaf, | ||
630 | .get = choke_get, | ||
631 | .put = choke_put, | ||
632 | .tcf_chain = choke_find_tcf, | ||
633 | .bind_tcf = choke_bind, | ||
634 | .unbind_tcf = choke_put, | ||
635 | .dump = choke_dump_class, | ||
636 | .walk = choke_walk, | ||
637 | }; | ||
638 | |||
639 | static struct sk_buff *choke_peek_head(struct Qdisc *sch) | ||
640 | { | ||
641 | struct choke_sched_data *q = qdisc_priv(sch); | ||
642 | |||
643 | return (q->head != q->tail) ? q->tab[q->head] : NULL; | ||
644 | } | ||
645 | |||
646 | static struct Qdisc_ops choke_qdisc_ops __read_mostly = { | ||
647 | .id = "choke", | ||
648 | .priv_size = sizeof(struct choke_sched_data), | ||
649 | |||
650 | .enqueue = choke_enqueue, | ||
651 | .dequeue = choke_dequeue, | ||
652 | .peek = choke_peek_head, | ||
653 | .drop = choke_drop, | ||
654 | .init = choke_init, | ||
655 | .destroy = choke_destroy, | ||
656 | .reset = choke_reset, | ||
657 | .change = choke_change, | ||
658 | .dump = choke_dump, | ||
659 | .dump_stats = choke_dump_stats, | ||
660 | .owner = THIS_MODULE, | ||
661 | }; | ||
662 | |||
663 | static int __init choke_module_init(void) | ||
664 | { | ||
665 | return register_qdisc(&choke_qdisc_ops); | ||
666 | } | ||
667 | |||
668 | static void __exit choke_module_exit(void) | ||
669 | { | ||
670 | unregister_qdisc(&choke_qdisc_ops); | ||
671 | } | ||
672 | |||
673 | module_init(choke_module_init) | ||
674 | module_exit(choke_module_exit) | ||
675 | |||
676 | MODULE_LICENSE("GPL"); | ||