diff options
| -rw-r--r-- | include/linux/pkt_sched.h | 29 | ||||
| -rw-r--r-- | net/sched/Kconfig | 11 | ||||
| -rw-r--r-- | net/sched/Makefile | 2 | ||||
| -rw-r--r-- | net/sched/sch_choke.c | 676 |
4 files changed, 718 insertions, 0 deletions
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 776cd93d5f7b..d4bb6f58c90c 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h | |||
| @@ -247,6 +247,35 @@ struct tc_gred_sopt { | |||
| 247 | __u16 pad1; | 247 | __u16 pad1; |
| 248 | }; | 248 | }; |
| 249 | 249 | ||
| 250 | /* CHOKe section */ | ||
| 251 | |||
| 252 | enum { | ||
| 253 | TCA_CHOKE_UNSPEC, | ||
| 254 | TCA_CHOKE_PARMS, | ||
| 255 | TCA_CHOKE_STAB, | ||
| 256 | __TCA_CHOKE_MAX, | ||
| 257 | }; | ||
| 258 | |||
| 259 | #define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1) | ||
| 260 | |||
| 261 | struct tc_choke_qopt { | ||
| 262 | __u32 limit; /* Hard queue length (packets) */ | ||
| 263 | __u32 qth_min; /* Min average threshold (packets) */ | ||
| 264 | __u32 qth_max; /* Max average threshold (packets) */ | ||
| 265 | unsigned char Wlog; /* log(W) */ | ||
| 266 | unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ | ||
| 267 | unsigned char Scell_log; /* cell size for idle damping */ | ||
| 268 | unsigned char flags; /* see RED flags */ | ||
| 269 | }; | ||
| 270 | |||
| 271 | struct tc_choke_xstats { | ||
| 272 | __u32 early; /* Early drops */ | ||
| 273 | __u32 pdrop; /* Drops due to queue limits */ | ||
| 274 | __u32 other; /* Drops due to drop() calls */ | ||
| 275 | __u32 marked; /* Marked packets */ | ||
| 276 | __u32 matched; /* Drops due to flow match */ | ||
| 277 | }; | ||
| 278 | |||
| 250 | /* HTB section */ | 279 | /* HTB section */ |
| 251 | #define TC_HTB_NUMPRIO 8 | 280 | #define TC_HTB_NUMPRIO 8 |
| 252 | #define TC_HTB_MAXDEPTH 8 | 281 | #define TC_HTB_MAXDEPTH 8 |
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index e318f458713e..8c19b6e3201e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig | |||
| @@ -217,6 +217,17 @@ config NET_SCH_MQPRIO | |||
| 217 | 217 | ||
| 218 | If unsure, say N. | 218 | If unsure, say N. |
| 219 | 219 | ||
| 220 | config NET_SCH_CHOKE | ||
| 221 | tristate "CHOose and Keep responsive flow scheduler (CHOKE)" | ||
| 222 | help | ||
| 223 | Say Y here if you want to use the CHOKe packet scheduler (CHOose | ||
| 224 | and Keep for responsive flows, CHOose and Kill for unresponsive | ||
| 225 | flows). This is a variation of RED which trys to penalize flows | ||
| 226 | that monopolize the queue. | ||
| 227 | |||
| 228 | To compile this code as a module, choose M here: the | ||
| 229 | module will be called sch_choke. | ||
| 230 | |||
| 220 | config NET_SCH_INGRESS | 231 | config NET_SCH_INGRESS |
| 221 | tristate "Ingress Qdisc" | 232 | tristate "Ingress Qdisc" |
| 222 | depends on NET_CLS_ACT | 233 | depends on NET_CLS_ACT |
diff --git a/net/sched/Makefile b/net/sched/Makefile index 26ce681a2c60..06c6cdfd1948 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile | |||
| @@ -33,6 +33,8 @@ obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o | |||
| 33 | obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o | 33 | obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o |
| 34 | obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o | 34 | obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o |
| 35 | obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o | 35 | obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o |
| 36 | obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o | ||
| 37 | |||
| 36 | obj-$(CONFIG_NET_CLS_U32) += cls_u32.o | 38 | obj-$(CONFIG_NET_CLS_U32) += cls_u32.o |
| 37 | obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o | 39 | obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o |
| 38 | obj-$(CONFIG_NET_CLS_FW) += cls_fw.o | 40 | obj-$(CONFIG_NET_CLS_FW) += cls_fw.o |
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c new file mode 100644 index 000000000000..a1cec18dd49c --- /dev/null +++ b/net/sched/sch_choke.c | |||
| @@ -0,0 +1,676 @@ | |||
| 1 | /* | ||
| 2 | * net/sched/sch_choke.c CHOKE scheduler | ||
| 3 | * | ||
| 4 | * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com> | ||
| 5 | * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License | ||
| 9 | * version 2 as published by the Free Software Foundation. | ||
| 10 | * | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/module.h> | ||
| 14 | #include <linux/types.h> | ||
| 15 | #include <linux/kernel.h> | ||
| 16 | #include <linux/skbuff.h> | ||
| 17 | #include <linux/reciprocal_div.h> | ||
| 18 | #include <net/pkt_sched.h> | ||
| 19 | #include <net/inet_ecn.h> | ||
| 20 | #include <net/red.h> | ||
| 21 | #include <linux/ip.h> | ||
| 22 | #include <net/ip.h> | ||
| 23 | #include <linux/ipv6.h> | ||
| 24 | #include <net/ipv6.h> | ||
| 25 | |||
| 26 | /* | ||
| 27 | CHOKe stateless AQM for fair bandwidth allocation | ||
| 28 | ================================================= | ||
| 29 | |||
| 30 | CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for | ||
| 31 | unresponsive flows) is a variant of RED that penalizes misbehaving flows but | ||
| 32 | maintains no flow state. The difference from RED is an additional step | ||
| 33 | during the enqueuing process. If average queue size is over the | ||
| 34 | low threshold (qmin), a packet is chosen at random from the queue. | ||
| 35 | If both the new and chosen packet are from the same flow, both | ||
| 36 | are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it | ||
| 37 | needs to access packets in queue randomly. It has a minimal class | ||
| 38 | interface to allow overriding the builtin flow classifier with | ||
| 39 | filters. | ||
| 40 | |||
| 41 | Source: | ||
| 42 | R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless | ||
| 43 | Active Queue Management Scheme for Approximating Fair Bandwidth Allocation", | ||
| 44 | IEEE INFOCOM, 2000. | ||
| 45 | |||
| 46 | A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial | ||
| 47 | Characteristics", IEEE/ACM Transactions on Networking, 2004 | ||
| 48 | |||
| 49 | */ | ||
| 50 | |||
| 51 | /* Upper bound on size of sk_buff table (packets) */ | ||
| 52 | #define CHOKE_MAX_QUEUE (128*1024 - 1) | ||
| 53 | |||
| 54 | struct choke_sched_data { | ||
| 55 | /* Parameters */ | ||
| 56 | u32 limit; | ||
| 57 | unsigned char flags; | ||
| 58 | |||
| 59 | struct red_parms parms; | ||
| 60 | |||
| 61 | /* Variables */ | ||
| 62 | struct tcf_proto *filter_list; | ||
| 63 | struct { | ||
| 64 | u32 prob_drop; /* Early probability drops */ | ||
| 65 | u32 prob_mark; /* Early probability marks */ | ||
| 66 | u32 forced_drop; /* Forced drops, qavg > max_thresh */ | ||
| 67 | u32 forced_mark; /* Forced marks, qavg > max_thresh */ | ||
| 68 | u32 pdrop; /* Drops due to queue limits */ | ||
| 69 | u32 other; /* Drops due to drop() calls */ | ||
| 70 | u32 matched; /* Drops to flow match */ | ||
| 71 | } stats; | ||
| 72 | |||
| 73 | unsigned int head; | ||
| 74 | unsigned int tail; | ||
| 75 | |||
| 76 | unsigned int tab_mask; /* size - 1 */ | ||
| 77 | |||
| 78 | struct sk_buff **tab; | ||
| 79 | }; | ||
| 80 | |||
| 81 | /* deliver a random number between 0 and N - 1 */ | ||
| 82 | static u32 random_N(unsigned int N) | ||
| 83 | { | ||
| 84 | return reciprocal_divide(random32(), N); | ||
| 85 | } | ||
| 86 | |||
| 87 | /* number of elements in queue including holes */ | ||
| 88 | static unsigned int choke_len(const struct choke_sched_data *q) | ||
| 89 | { | ||
| 90 | return (q->tail - q->head) & q->tab_mask; | ||
| 91 | } | ||
| 92 | |||
| 93 | /* Is ECN parameter configured */ | ||
| 94 | static int use_ecn(const struct choke_sched_data *q) | ||
| 95 | { | ||
| 96 | return q->flags & TC_RED_ECN; | ||
| 97 | } | ||
| 98 | |||
| 99 | /* Should packets over max just be dropped (versus marked) */ | ||
| 100 | static int use_harddrop(const struct choke_sched_data *q) | ||
| 101 | { | ||
| 102 | return q->flags & TC_RED_HARDDROP; | ||
| 103 | } | ||
| 104 | |||
| 105 | /* Move head pointer forward to skip over holes */ | ||
| 106 | static void choke_zap_head_holes(struct choke_sched_data *q) | ||
| 107 | { | ||
| 108 | do { | ||
| 109 | q->head = (q->head + 1) & q->tab_mask; | ||
| 110 | if (q->head == q->tail) | ||
| 111 | break; | ||
| 112 | } while (q->tab[q->head] == NULL); | ||
| 113 | } | ||
| 114 | |||
| 115 | /* Move tail pointer backwards to reuse holes */ | ||
| 116 | static void choke_zap_tail_holes(struct choke_sched_data *q) | ||
| 117 | { | ||
| 118 | do { | ||
| 119 | q->tail = (q->tail - 1) & q->tab_mask; | ||
| 120 | if (q->head == q->tail) | ||
| 121 | break; | ||
| 122 | } while (q->tab[q->tail] == NULL); | ||
| 123 | } | ||
| 124 | |||
| 125 | /* Drop packet from queue array by creating a "hole" */ | ||
| 126 | static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) | ||
| 127 | { | ||
| 128 | struct choke_sched_data *q = qdisc_priv(sch); | ||
| 129 | struct sk_buff *skb = q->tab[idx]; | ||
| 130 | |||
| 131 | q->tab[idx] = NULL; | ||
| 132 | |||
| 133 | if (idx == q->head) | ||
| 134 | choke_zap_head_holes(q); | ||
| 135 | if (idx == q->tail) | ||
| 136 | choke_zap_tail_holes(q); | ||
| 137 | |||
| 138 | sch->qstats.backlog -= qdisc_pkt_len(skb); | ||
| 139 | qdisc_drop(skb, sch); | ||
| 140 | qdisc_tree_decrease_qlen(sch, 1); | ||
| 141 | --sch->q.qlen; | ||
| 142 | } | ||
| 143 | |||
| 144 | /* | ||
| 145 | * Compare flow of two packets | ||
| 146 | * Returns true only if source and destination address and port match. | ||
| 147 | * false for special cases | ||
| 148 | */ | ||
| 149 | static bool choke_match_flow(struct sk_buff *skb1, | ||
| 150 | struct sk_buff *skb2) | ||
| 151 | { | ||
| 152 | int off1, off2, poff; | ||
| 153 | const u32 *ports1, *ports2; | ||
| 154 | u8 ip_proto; | ||
| 155 | __u32 hash1; | ||
| 156 | |||
| 157 | if (skb1->protocol != skb2->protocol) | ||
| 158 | return false; | ||
| 159 | |||
| 160 | /* Use hash value as quick check | ||
| 161 | * Assumes that __skb_get_rxhash makes IP header and ports linear | ||
| 162 | */ | ||
| 163 | hash1 = skb_get_rxhash(skb1); | ||
| 164 | if (!hash1 || hash1 != skb_get_rxhash(skb2)) | ||
| 165 | return false; | ||
| 166 | |||
| 167 | /* Probably match, but be sure to avoid hash collisions */ | ||
| 168 | off1 = skb_network_offset(skb1); | ||
| 169 | off2 = skb_network_offset(skb2); | ||
| 170 | |||
| 171 | switch (skb1->protocol) { | ||
| 172 | case __constant_htons(ETH_P_IP): { | ||
| 173 | const struct iphdr *ip1, *ip2; | ||
| 174 | |||
| 175 | ip1 = (const struct iphdr *) (skb1->data + off1); | ||
| 176 | ip2 = (const struct iphdr *) (skb2->data + off2); | ||
| 177 | |||
| 178 | ip_proto = ip1->protocol; | ||
| 179 | if (ip_proto != ip2->protocol || | ||
| 180 | ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr) | ||
| 181 | return false; | ||
| 182 | |||
| 183 | if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET)) | ||
| 184 | ip_proto = 0; | ||
| 185 | off1 += ip1->ihl * 4; | ||
| 186 | off2 += ip2->ihl * 4; | ||
| 187 | break; | ||
| 188 | } | ||
| 189 | |||
| 190 | case __constant_htons(ETH_P_IPV6): { | ||
| 191 | const struct ipv6hdr *ip1, *ip2; | ||
| 192 | |||
| 193 | ip1 = (const struct ipv6hdr *) (skb1->data + off1); | ||
| 194 | ip2 = (const struct ipv6hdr *) (skb2->data + off2); | ||
| 195 | |||
| 196 | ip_proto = ip1->nexthdr; | ||
| 197 | if (ip_proto != ip2->nexthdr || | ||
| 198 | ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) || | ||
| 199 | ipv6_addr_cmp(&ip1->daddr, &ip2->daddr)) | ||
| 200 | return false; | ||
| 201 | off1 += 40; | ||
| 202 | off2 += 40; | ||
| 203 | } | ||
| 204 | |||
| 205 | default: /* Maybe compare MAC header here? */ | ||
| 206 | return false; | ||
| 207 | } | ||
| 208 | |||
| 209 | poff = proto_ports_offset(ip_proto); | ||
| 210 | if (poff < 0) | ||
| 211 | return true; | ||
| 212 | |||
| 213 | off1 += poff; | ||
| 214 | off2 += poff; | ||
| 215 | |||
| 216 | ports1 = (__force u32 *)(skb1->data + off1); | ||
| 217 | ports2 = (__force u32 *)(skb2->data + off2); | ||
| 218 | return *ports1 == *ports2; | ||
| 219 | } | ||
| 220 | |||
| 221 | static inline void choke_set_classid(struct sk_buff *skb, u16 classid) | ||
| 222 | { | ||
| 223 | *(unsigned int *)(qdisc_skb_cb(skb)->data) = classid; | ||
| 224 | } | ||
| 225 | |||
| 226 | static u16 choke_get_classid(const struct sk_buff *skb) | ||
| 227 | { | ||
| 228 | return *(unsigned int *)(qdisc_skb_cb(skb)->data); | ||
| 229 | } | ||
| 230 | |||
| 231 | /* | ||
| 232 | * Classify flow using either: | ||
| 233 | * 1. pre-existing classification result in skb | ||
| 234 | * 2. fast internal classification | ||
| 235 | * 3. use TC filter based classification | ||
| 236 | */ | ||
| 237 | static bool choke_classify(struct sk_buff *skb, | ||
| 238 | struct Qdisc *sch, int *qerr) | ||
| 239 | |||
| 240 | { | ||
| 241 | struct choke_sched_data *q = qdisc_priv(sch); | ||
| 242 | struct tcf_result res; | ||
| 243 | int result; | ||
| 244 | |||
| 245 | result = tc_classify(skb, q->filter_list, &res); | ||
| 246 | if (result >= 0) { | ||
| 247 | #ifdef CONFIG_NET_CLS_ACT | ||
| 248 | switch (result) { | ||
| 249 | case TC_ACT_STOLEN: | ||
| 250 | case TC_ACT_QUEUED: | ||
| 251 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; | ||
| 252 | case TC_ACT_SHOT: | ||
| 253 | return false; | ||
| 254 | } | ||
| 255 | #endif | ||
| 256 | choke_set_classid(skb, TC_H_MIN(res.classid)); | ||
| 257 | return true; | ||
| 258 | } | ||
| 259 | |||
| 260 | return false; | ||
| 261 | } | ||
| 262 | |||
| 263 | /* | ||
| 264 | * Select a packet at random from queue | ||
| 265 | * HACK: since queue can have holes from previous deletion; retry several | ||
| 266 | * times to find a random skb but then just give up and return the head | ||
| 267 | * Will return NULL if queue is empty (q->head == q->tail) | ||
| 268 | */ | ||
| 269 | static struct sk_buff *choke_peek_random(const struct choke_sched_data *q, | ||
| 270 | unsigned int *pidx) | ||
| 271 | { | ||
| 272 | struct sk_buff *skb; | ||
| 273 | int retrys = 3; | ||
| 274 | |||
| 275 | do { | ||
| 276 | *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask; | ||
| 277 | skb = q->tab[*pidx]; | ||
| 278 | if (skb) | ||
| 279 | return skb; | ||
| 280 | } while (--retrys > 0); | ||
| 281 | |||
| 282 | return q->tab[*pidx = q->head]; | ||
| 283 | } | ||
| 284 | |||
| 285 | /* | ||
| 286 | * Compare new packet with random packet in queue | ||
| 287 | * returns true if matched and sets *pidx | ||
| 288 | */ | ||
| 289 | static bool choke_match_random(const struct choke_sched_data *q, | ||
| 290 | struct sk_buff *nskb, | ||
| 291 | unsigned int *pidx) | ||
| 292 | { | ||
| 293 | struct sk_buff *oskb; | ||
| 294 | |||
| 295 | if (q->head == q->tail) | ||
| 296 | return false; | ||
| 297 | |||
| 298 | oskb = choke_peek_random(q, pidx); | ||
| 299 | if (q->filter_list) | ||
| 300 | return choke_get_classid(nskb) == choke_get_classid(oskb); | ||
| 301 | |||
| 302 | return choke_match_flow(oskb, nskb); | ||
| 303 | } | ||
| 304 | |||
| 305 | static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) | ||
| 306 | { | ||
| 307 | struct choke_sched_data *q = qdisc_priv(sch); | ||
| 308 | struct red_parms *p = &q->parms; | ||
| 309 | int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | ||
| 310 | |||
| 311 | if (q->filter_list) { | ||
| 312 | /* If using external classifiers, get result and record it. */ | ||
| 313 | if (!choke_classify(skb, sch, &ret)) | ||
| 314 | goto other_drop; /* Packet was eaten by filter */ | ||
| 315 | } | ||
| 316 | |||
| 317 | /* Compute average queue usage (see RED) */ | ||
| 318 | p->qavg = red_calc_qavg(p, sch->q.qlen); | ||
| 319 | if (red_is_idling(p)) | ||
| 320 | red_end_of_idle_period(p); | ||
| 321 | |||
| 322 | /* Is queue small? */ | ||
| 323 | if (p->qavg <= p->qth_min) | ||
| 324 | p->qcount = -1; | ||
| 325 | else { | ||
| 326 | unsigned int idx; | ||
| 327 | |||
| 328 | /* Draw a packet at random from queue and compare flow */ | ||
| 329 | if (choke_match_random(q, skb, &idx)) { | ||
| 330 | q->stats.matched++; | ||
| 331 | choke_drop_by_idx(sch, idx); | ||
| 332 | goto congestion_drop; | ||
| 333 | } | ||
| 334 | |||
| 335 | /* Queue is large, always mark/drop */ | ||
| 336 | if (p->qavg > p->qth_max) { | ||
| 337 | p->qcount = -1; | ||
| 338 | |||
| 339 | sch->qstats.overlimits++; | ||
| 340 | if (use_harddrop(q) || !use_ecn(q) || | ||
| 341 | !INET_ECN_set_ce(skb)) { | ||
| 342 | q->stats.forced_drop++; | ||
| 343 | goto congestion_drop; | ||
| 344 | } | ||
| 345 | |||
| 346 | q->stats.forced_mark++; | ||
| 347 | } else if (++p->qcount) { | ||
| 348 | if (red_mark_probability(p, p->qavg)) { | ||
| 349 | p->qcount = 0; | ||
| 350 | p->qR = red_random(p); | ||
| 351 | |||
| 352 | sch->qstats.overlimits++; | ||
| 353 | if (!use_ecn(q) || !INET_ECN_set_ce(skb)) { | ||
| 354 | q->stats.prob_drop++; | ||
| 355 | goto congestion_drop; | ||
| 356 | } | ||
| 357 | |||
| 358 | q->stats.prob_mark++; | ||
| 359 | } | ||
| 360 | } else | ||
| 361 | p->qR = red_random(p); | ||
| 362 | } | ||
| 363 | |||
| 364 | /* Admit new packet */ | ||
| 365 | if (sch->q.qlen < q->limit) { | ||
| 366 | q->tab[q->tail] = skb; | ||
| 367 | q->tail = (q->tail + 1) & q->tab_mask; | ||
| 368 | ++sch->q.qlen; | ||
| 369 | sch->qstats.backlog += qdisc_pkt_len(skb); | ||
| 370 | return NET_XMIT_SUCCESS; | ||
| 371 | } | ||
| 372 | |||
| 373 | q->stats.pdrop++; | ||
| 374 | sch->qstats.drops++; | ||
| 375 | kfree_skb(skb); | ||
| 376 | return NET_XMIT_DROP; | ||
| 377 | |||
| 378 | congestion_drop: | ||
| 379 | qdisc_drop(skb, sch); | ||
| 380 | return NET_XMIT_CN; | ||
| 381 | |||
| 382 | other_drop: | ||
| 383 | if (ret & __NET_XMIT_BYPASS) | ||
| 384 | sch->qstats.drops++; | ||
| 385 | kfree_skb(skb); | ||
| 386 | return ret; | ||
| 387 | } | ||
| 388 | |||
| 389 | static struct sk_buff *choke_dequeue(struct Qdisc *sch) | ||
| 390 | { | ||
| 391 | struct choke_sched_data *q = qdisc_priv(sch); | ||
| 392 | struct sk_buff *skb; | ||
| 393 | |||
| 394 | if (q->head == q->tail) { | ||
| 395 | if (!red_is_idling(&q->parms)) | ||
| 396 | red_start_of_idle_period(&q->parms); | ||
| 397 | return NULL; | ||
| 398 | } | ||
| 399 | |||
| 400 | skb = q->tab[q->head]; | ||
| 401 | q->tab[q->head] = NULL; | ||
| 402 | choke_zap_head_holes(q); | ||
| 403 | --sch->q.qlen; | ||
| 404 | sch->qstats.backlog -= qdisc_pkt_len(skb); | ||
| 405 | qdisc_bstats_update(sch, skb); | ||
| 406 | |||
| 407 | return skb; | ||
| 408 | } | ||
| 409 | |||
| 410 | static unsigned int choke_drop(struct Qdisc *sch) | ||
| 411 | { | ||
| 412 | struct choke_sched_data *q = qdisc_priv(sch); | ||
| 413 | unsigned int len; | ||
| 414 | |||
| 415 | len = qdisc_queue_drop(sch); | ||
| 416 | if (len > 0) | ||
| 417 | q->stats.other++; | ||
| 418 | else { | ||
| 419 | if (!red_is_idling(&q->parms)) | ||
| 420 | red_start_of_idle_period(&q->parms); | ||
| 421 | } | ||
| 422 | |||
| 423 | return len; | ||
| 424 | } | ||
| 425 | |||
| 426 | static void choke_reset(struct Qdisc *sch) | ||
| 427 | { | ||
| 428 | struct choke_sched_data *q = qdisc_priv(sch); | ||
| 429 | |||
| 430 | red_restart(&q->parms); | ||
| 431 | } | ||
| 432 | |||
| 433 | static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = { | ||
| 434 | [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) }, | ||
| 435 | [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE }, | ||
| 436 | }; | ||
| 437 | |||
| 438 | |||
| 439 | static void choke_free(void *addr) | ||
| 440 | { | ||
| 441 | if (addr) { | ||
| 442 | if (is_vmalloc_addr(addr)) | ||
| 443 | vfree(addr); | ||
| 444 | else | ||
| 445 | kfree(addr); | ||
| 446 | } | ||
| 447 | } | ||
| 448 | |||
| 449 | static int choke_change(struct Qdisc *sch, struct nlattr *opt) | ||
| 450 | { | ||
| 451 | struct choke_sched_data *q = qdisc_priv(sch); | ||
| 452 | struct nlattr *tb[TCA_CHOKE_MAX + 1]; | ||
| 453 | const struct tc_red_qopt *ctl; | ||
| 454 | int err; | ||
| 455 | struct sk_buff **old = NULL; | ||
| 456 | unsigned int mask; | ||
| 457 | |||
| 458 | if (opt == NULL) | ||
| 459 | return -EINVAL; | ||
| 460 | |||
| 461 | err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy); | ||
| 462 | if (err < 0) | ||
| 463 | return err; | ||
| 464 | |||
| 465 | if (tb[TCA_CHOKE_PARMS] == NULL || | ||
| 466 | tb[TCA_CHOKE_STAB] == NULL) | ||
| 467 | return -EINVAL; | ||
| 468 | |||
| 469 | ctl = nla_data(tb[TCA_CHOKE_PARMS]); | ||
| 470 | |||
| 471 | if (ctl->limit > CHOKE_MAX_QUEUE) | ||
| 472 | return -EINVAL; | ||
| 473 | |||
| 474 | mask = roundup_pow_of_two(ctl->limit + 1) - 1; | ||
| 475 | if (mask != q->tab_mask) { | ||
| 476 | struct sk_buff **ntab; | ||
| 477 | |||
| 478 | ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL); | ||
| 479 | if (!ntab) | ||
| 480 | ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *)); | ||
| 481 | if (!ntab) | ||
| 482 | return -ENOMEM; | ||
| 483 | |||
| 484 | sch_tree_lock(sch); | ||
| 485 | old = q->tab; | ||
| 486 | if (old) { | ||
| 487 | unsigned int oqlen = sch->q.qlen, tail = 0; | ||
| 488 | |||
| 489 | while (q->head != q->tail) { | ||
| 490 | struct sk_buff *skb = q->tab[q->head]; | ||
| 491 | |||
| 492 | q->head = (q->head + 1) & q->tab_mask; | ||
| 493 | if (!skb) | ||
| 494 | continue; | ||
| 495 | if (tail < mask) { | ||
| 496 | ntab[tail++] = skb; | ||
| 497 | continue; | ||
| 498 | } | ||
| 499 | sch->qstats.backlog -= qdisc_pkt_len(skb); | ||
| 500 | --sch->q.qlen; | ||
| 501 | qdisc_drop(skb, sch); | ||
| 502 | } | ||
| 503 | qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen); | ||
| 504 | q->head = 0; | ||
| 505 | q->tail = tail; | ||
| 506 | } | ||
| 507 | |||
| 508 | q->tab_mask = mask; | ||
| 509 | q->tab = ntab; | ||
| 510 | } else | ||
| 511 | sch_tree_lock(sch); | ||
| 512 | |||
| 513 | q->flags = ctl->flags; | ||
| 514 | q->limit = ctl->limit; | ||
| 515 | |||
| 516 | red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, | ||
| 517 | ctl->Plog, ctl->Scell_log, | ||
| 518 | nla_data(tb[TCA_CHOKE_STAB])); | ||
| 519 | |||
| 520 | if (q->head == q->tail) | ||
| 521 | red_end_of_idle_period(&q->parms); | ||
| 522 | |||
| 523 | sch_tree_unlock(sch); | ||
| 524 | choke_free(old); | ||
| 525 | return 0; | ||
| 526 | } | ||
| 527 | |||
| 528 | static int choke_init(struct Qdisc *sch, struct nlattr *opt) | ||
| 529 | { | ||
| 530 | return choke_change(sch, opt); | ||
| 531 | } | ||
| 532 | |||
| 533 | static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) | ||
| 534 | { | ||
| 535 | struct choke_sched_data *q = qdisc_priv(sch); | ||
| 536 | struct nlattr *opts = NULL; | ||
| 537 | struct tc_red_qopt opt = { | ||
| 538 | .limit = q->limit, | ||
| 539 | .flags = q->flags, | ||
| 540 | .qth_min = q->parms.qth_min >> q->parms.Wlog, | ||
| 541 | .qth_max = q->parms.qth_max >> q->parms.Wlog, | ||
| 542 | .Wlog = q->parms.Wlog, | ||
| 543 | .Plog = q->parms.Plog, | ||
| 544 | .Scell_log = q->parms.Scell_log, | ||
| 545 | }; | ||
| 546 | |||
| 547 | opts = nla_nest_start(skb, TCA_OPTIONS); | ||
| 548 | if (opts == NULL) | ||
| 549 | goto nla_put_failure; | ||
| 550 | |||
| 551 | NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt); | ||
| 552 | return nla_nest_end(skb, opts); | ||
| 553 | |||
| 554 | nla_put_failure: | ||
| 555 | nla_nest_cancel(skb, opts); | ||
| 556 | return -EMSGSIZE; | ||
| 557 | } | ||
| 558 | |||
| 559 | static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | ||
| 560 | { | ||
| 561 | struct choke_sched_data *q = qdisc_priv(sch); | ||
| 562 | struct tc_choke_xstats st = { | ||
| 563 | .early = q->stats.prob_drop + q->stats.forced_drop, | ||
| 564 | .marked = q->stats.prob_mark + q->stats.forced_mark, | ||
| 565 | .pdrop = q->stats.pdrop, | ||
| 566 | .other = q->stats.other, | ||
| 567 | .matched = q->stats.matched, | ||
| 568 | }; | ||
| 569 | |||
| 570 | return gnet_stats_copy_app(d, &st, sizeof(st)); | ||
| 571 | } | ||
| 572 | |||
| 573 | static void choke_destroy(struct Qdisc *sch) | ||
| 574 | { | ||
| 575 | struct choke_sched_data *q = qdisc_priv(sch); | ||
| 576 | |||
| 577 | tcf_destroy_chain(&q->filter_list); | ||
| 578 | choke_free(q->tab); | ||
| 579 | } | ||
| 580 | |||
| 581 | static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg) | ||
| 582 | { | ||
| 583 | return NULL; | ||
| 584 | } | ||
| 585 | |||
| 586 | static unsigned long choke_get(struct Qdisc *sch, u32 classid) | ||
| 587 | { | ||
| 588 | return 0; | ||
| 589 | } | ||
| 590 | |||
| 591 | static void choke_put(struct Qdisc *q, unsigned long cl) | ||
| 592 | { | ||
| 593 | } | ||
| 594 | |||
| 595 | static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent, | ||
| 596 | u32 classid) | ||
| 597 | { | ||
| 598 | return 0; | ||
| 599 | } | ||
| 600 | |||
| 601 | static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl) | ||
| 602 | { | ||
| 603 | struct choke_sched_data *q = qdisc_priv(sch); | ||
| 604 | |||
| 605 | if (cl) | ||
| 606 | return NULL; | ||
| 607 | return &q->filter_list; | ||
| 608 | } | ||
| 609 | |||
| 610 | static int choke_dump_class(struct Qdisc *sch, unsigned long cl, | ||
| 611 | struct sk_buff *skb, struct tcmsg *tcm) | ||
| 612 | { | ||
| 613 | tcm->tcm_handle |= TC_H_MIN(cl); | ||
| 614 | return 0; | ||
| 615 | } | ||
| 616 | |||
| 617 | static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg) | ||
| 618 | { | ||
| 619 | if (!arg->stop) { | ||
| 620 | if (arg->fn(sch, 1, arg) < 0) { | ||
| 621 | arg->stop = 1; | ||
| 622 | return; | ||
| 623 | } | ||
| 624 | arg->count++; | ||
| 625 | } | ||
| 626 | } | ||
| 627 | |||
| 628 | static const struct Qdisc_class_ops choke_class_ops = { | ||
| 629 | .leaf = choke_leaf, | ||
| 630 | .get = choke_get, | ||
| 631 | .put = choke_put, | ||
| 632 | .tcf_chain = choke_find_tcf, | ||
| 633 | .bind_tcf = choke_bind, | ||
| 634 | .unbind_tcf = choke_put, | ||
| 635 | .dump = choke_dump_class, | ||
| 636 | .walk = choke_walk, | ||
| 637 | }; | ||
| 638 | |||
| 639 | static struct sk_buff *choke_peek_head(struct Qdisc *sch) | ||
| 640 | { | ||
| 641 | struct choke_sched_data *q = qdisc_priv(sch); | ||
| 642 | |||
| 643 | return (q->head != q->tail) ? q->tab[q->head] : NULL; | ||
| 644 | } | ||
| 645 | |||
| 646 | static struct Qdisc_ops choke_qdisc_ops __read_mostly = { | ||
| 647 | .id = "choke", | ||
| 648 | .priv_size = sizeof(struct choke_sched_data), | ||
| 649 | |||
| 650 | .enqueue = choke_enqueue, | ||
| 651 | .dequeue = choke_dequeue, | ||
| 652 | .peek = choke_peek_head, | ||
| 653 | .drop = choke_drop, | ||
| 654 | .init = choke_init, | ||
| 655 | .destroy = choke_destroy, | ||
| 656 | .reset = choke_reset, | ||
| 657 | .change = choke_change, | ||
| 658 | .dump = choke_dump, | ||
| 659 | .dump_stats = choke_dump_stats, | ||
| 660 | .owner = THIS_MODULE, | ||
| 661 | }; | ||
| 662 | |||
| 663 | static int __init choke_module_init(void) | ||
| 664 | { | ||
| 665 | return register_qdisc(&choke_qdisc_ops); | ||
| 666 | } | ||
| 667 | |||
| 668 | static void __exit choke_module_exit(void) | ||
| 669 | { | ||
| 670 | unregister_qdisc(&choke_qdisc_ops); | ||
| 671 | } | ||
| 672 | |||
| 673 | module_init(choke_module_init) | ||
| 674 | module_exit(choke_module_exit) | ||
| 675 | |||
| 676 | MODULE_LICENSE("GPL"); | ||
