diff options
Diffstat (limited to 'net/sched/sch_netem.c')
-rw-r--r-- | net/sched/sch_netem.c | 209 |
1 files changed, 96 insertions, 113 deletions
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index e0c9fbe73b15..bb9bf8d5003c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c | |||
@@ -53,7 +53,6 @@ | |||
53 | 53 | ||
54 | struct netem_sched_data { | 54 | struct netem_sched_data { |
55 | struct Qdisc *qdisc; | 55 | struct Qdisc *qdisc; |
56 | struct sk_buff_head delayed; | ||
57 | struct timer_list timer; | 56 | struct timer_list timer; |
58 | 57 | ||
59 | u32 latency; | 58 | u32 latency; |
@@ -63,11 +62,12 @@ struct netem_sched_data { | |||
63 | u32 gap; | 62 | u32 gap; |
64 | u32 jitter; | 63 | u32 jitter; |
65 | u32 duplicate; | 64 | u32 duplicate; |
65 | u32 reorder; | ||
66 | 66 | ||
67 | struct crndstate { | 67 | struct crndstate { |
68 | unsigned long last; | 68 | unsigned long last; |
69 | unsigned long rho; | 69 | unsigned long rho; |
70 | } delay_cor, loss_cor, dup_cor; | 70 | } delay_cor, loss_cor, dup_cor, reorder_cor; |
71 | 71 | ||
72 | struct disttable { | 72 | struct disttable { |
73 | u32 size; | 73 | u32 size; |
@@ -137,122 +137,68 @@ static long tabledist(unsigned long mu, long sigma, | |||
137 | return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; | 137 | return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; |
138 | } | 138 | } |
139 | 139 | ||
140 | /* Put skb in the private delayed queue. */ | ||
141 | static int netem_delay(struct Qdisc *sch, struct sk_buff *skb) | ||
142 | { | ||
143 | struct netem_sched_data *q = qdisc_priv(sch); | ||
144 | psched_tdiff_t td; | ||
145 | psched_time_t now; | ||
146 | |||
147 | PSCHED_GET_TIME(now); | ||
148 | td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist); | ||
149 | |||
150 | /* Always queue at tail to keep packets in order */ | ||
151 | if (likely(q->delayed.qlen < q->limit)) { | ||
152 | struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; | ||
153 | |||
154 | PSCHED_TADD2(now, td, cb->time_to_send); | ||
155 | |||
156 | pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb, | ||
157 | now, cb->time_to_send); | ||
158 | |||
159 | __skb_queue_tail(&q->delayed, skb); | ||
160 | return NET_XMIT_SUCCESS; | ||
161 | } | ||
162 | |||
163 | pr_debug("netem_delay: queue over limit %d\n", q->limit); | ||
164 | sch->qstats.overlimits++; | ||
165 | kfree_skb(skb); | ||
166 | return NET_XMIT_DROP; | ||
167 | } | ||
168 | |||
169 | /* | 140 | /* |
170 | * Move a packet that is ready to send from the delay holding | 141 | * Insert one skb into qdisc. |
171 | * list to the underlying qdisc. | 142 | * Note: parent depends on return value to account for queue length. |
143 | * NET_XMIT_DROP: queue length didn't change. | ||
144 | * NET_XMIT_SUCCESS: one skb was queued. | ||
172 | */ | 145 | */ |
173 | static int netem_run(struct Qdisc *sch) | ||
174 | { | ||
175 | struct netem_sched_data *q = qdisc_priv(sch); | ||
176 | struct sk_buff *skb; | ||
177 | psched_time_t now; | ||
178 | |||
179 | PSCHED_GET_TIME(now); | ||
180 | |||
181 | skb = skb_peek(&q->delayed); | ||
182 | if (skb) { | ||
183 | const struct netem_skb_cb *cb | ||
184 | = (const struct netem_skb_cb *)skb->cb; | ||
185 | long delay | ||
186 | = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); | ||
187 | pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); | ||
188 | |||
189 | /* if more time remaining? */ | ||
190 | if (delay > 0) { | ||
191 | mod_timer(&q->timer, jiffies + delay); | ||
192 | return 1; | ||
193 | } | ||
194 | |||
195 | __skb_unlink(skb, &q->delayed); | ||
196 | |||
197 | if (q->qdisc->enqueue(skb, q->qdisc)) { | ||
198 | sch->q.qlen--; | ||
199 | sch->qstats.drops++; | ||
200 | } | ||
201 | } | ||
202 | |||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | 146 | static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
207 | { | 147 | { |
208 | struct netem_sched_data *q = qdisc_priv(sch); | 148 | struct netem_sched_data *q = qdisc_priv(sch); |
149 | struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; | ||
150 | struct sk_buff *skb2; | ||
209 | int ret; | 151 | int ret; |
152 | int count = 1; | ||
210 | 153 | ||
211 | pr_debug("netem_enqueue skb=%p\n", skb); | 154 | pr_debug("netem_enqueue skb=%p\n", skb); |
212 | 155 | ||
156 | /* Random duplication */ | ||
157 | if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) | ||
158 | ++count; | ||
159 | |||
213 | /* Random packet drop 0 => none, ~0 => all */ | 160 | /* Random packet drop 0 => none, ~0 => all */ |
214 | if (q->loss && q->loss >= get_crandom(&q->loss_cor)) { | 161 | if (q->loss && q->loss >= get_crandom(&q->loss_cor)) |
215 | pr_debug("netem_enqueue: random loss\n"); | 162 | --count; |
163 | |||
164 | if (count == 0) { | ||
216 | sch->qstats.drops++; | 165 | sch->qstats.drops++; |
217 | kfree_skb(skb); | 166 | kfree_skb(skb); |
218 | return 0; /* lie about loss so TCP doesn't know */ | 167 | return NET_XMIT_DROP; |
219 | } | 168 | } |
220 | 169 | ||
221 | /* Random duplication */ | 170 | /* |
222 | if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) { | 171 | * If we need to duplicate packet, then re-insert at top of the |
223 | struct sk_buff *skb2; | 172 | * qdisc tree, since parent queuer expects that only one |
224 | 173 | * skb will be queued. | |
225 | skb2 = skb_clone(skb, GFP_ATOMIC); | 174 | */ |
226 | if (skb2 && netem_delay(sch, skb2) == NET_XMIT_SUCCESS) { | 175 | if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { |
227 | struct Qdisc *qp; | 176 | struct Qdisc *rootq = sch->dev->qdisc; |
228 | 177 | u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ | |
229 | /* Since one packet can generate two packets in the | 178 | q->duplicate = 0; |
230 | * queue, the parent's qlen accounting gets confused, | 179 | |
231 | * so fix it. | 180 | rootq->enqueue(skb2, rootq); |
232 | */ | 181 | q->duplicate = dupsave; |
233 | qp = qdisc_lookup(sch->dev, TC_H_MAJ(sch->parent)); | ||
234 | if (qp) | ||
235 | qp->q.qlen++; | ||
236 | |||
237 | sch->q.qlen++; | ||
238 | sch->bstats.bytes += skb2->len; | ||
239 | sch->bstats.packets++; | ||
240 | } else | ||
241 | sch->qstats.drops++; | ||
242 | } | 182 | } |
243 | 183 | ||
244 | /* If doing simple delay then gap == 0 so all packets | 184 | if (q->gap == 0 /* not doing reordering */ |
245 | * go into the delayed holding queue | 185 | || q->counter < q->gap /* inside last reordering gap */ |
246 | * otherwise if doing out of order only "1 out of gap" | 186 | || q->reorder < get_crandom(&q->reorder_cor)) { |
247 | * packets will be delayed. | 187 | psched_time_t now; |
248 | */ | 188 | PSCHED_GET_TIME(now); |
249 | if (q->counter < q->gap) { | 189 | PSCHED_TADD2(now, tabledist(q->latency, q->jitter, |
190 | &q->delay_cor, q->delay_dist), | ||
191 | cb->time_to_send); | ||
250 | ++q->counter; | 192 | ++q->counter; |
251 | ret = q->qdisc->enqueue(skb, q->qdisc); | 193 | ret = q->qdisc->enqueue(skb, q->qdisc); |
252 | } else { | 194 | } else { |
195 | /* | ||
196 | * Do re-ordering by putting one out of N packets at the front | ||
197 | * of the queue. | ||
198 | */ | ||
199 | PSCHED_GET_TIME(cb->time_to_send); | ||
253 | q->counter = 0; | 200 | q->counter = 0; |
254 | ret = netem_delay(sch, skb); | 201 | ret = q->qdisc->ops->requeue(skb, q->qdisc); |
255 | netem_run(sch); | ||
256 | } | 202 | } |
257 | 203 | ||
258 | if (likely(ret == NET_XMIT_SUCCESS)) { | 204 | if (likely(ret == NET_XMIT_SUCCESS)) { |
@@ -296,22 +242,33 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) | |||
296 | { | 242 | { |
297 | struct netem_sched_data *q = qdisc_priv(sch); | 243 | struct netem_sched_data *q = qdisc_priv(sch); |
298 | struct sk_buff *skb; | 244 | struct sk_buff *skb; |
299 | int pending; | ||
300 | |||
301 | pending = netem_run(sch); | ||
302 | 245 | ||
303 | skb = q->qdisc->dequeue(q->qdisc); | 246 | skb = q->qdisc->dequeue(q->qdisc); |
304 | if (skb) { | 247 | if (skb) { |
305 | pr_debug("netem_dequeue: return skb=%p\n", skb); | 248 | const struct netem_skb_cb *cb |
306 | sch->q.qlen--; | 249 | = (const struct netem_skb_cb *)skb->cb; |
307 | sch->flags &= ~TCQ_F_THROTTLED; | 250 | psched_time_t now; |
308 | } | 251 | long delay; |
309 | else if (pending) { | 252 | |
310 | pr_debug("netem_dequeue: throttling\n"); | 253 | /* if more time remaining? */ |
254 | PSCHED_GET_TIME(now); | ||
255 | delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); | ||
256 | pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); | ||
257 | if (delay <= 0) { | ||
258 | pr_debug("netem_dequeue: return skb=%p\n", skb); | ||
259 | sch->q.qlen--; | ||
260 | sch->flags &= ~TCQ_F_THROTTLED; | ||
261 | return skb; | ||
262 | } | ||
263 | |||
264 | mod_timer(&q->timer, jiffies + delay); | ||
311 | sch->flags |= TCQ_F_THROTTLED; | 265 | sch->flags |= TCQ_F_THROTTLED; |
312 | } | ||
313 | 266 | ||
314 | return skb; | 267 | if (q->qdisc->ops->requeue(skb, q->qdisc) != 0) |
268 | sch->qstats.drops++; | ||
269 | } | ||
270 | |||
271 | return NULL; | ||
315 | } | 272 | } |
316 | 273 | ||
317 | static void netem_watchdog(unsigned long arg) | 274 | static void netem_watchdog(unsigned long arg) |
@@ -328,8 +285,6 @@ static void netem_reset(struct Qdisc *sch) | |||
328 | struct netem_sched_data *q = qdisc_priv(sch); | 285 | struct netem_sched_data *q = qdisc_priv(sch); |
329 | 286 | ||
330 | qdisc_reset(q->qdisc); | 287 | qdisc_reset(q->qdisc); |
331 | skb_queue_purge(&q->delayed); | ||
332 | |||
333 | sch->q.qlen = 0; | 288 | sch->q.qlen = 0; |
334 | sch->flags &= ~TCQ_F_THROTTLED; | 289 | sch->flags &= ~TCQ_F_THROTTLED; |
335 | del_timer_sync(&q->timer); | 290 | del_timer_sync(&q->timer); |
@@ -397,6 +352,19 @@ static int get_correlation(struct Qdisc *sch, const struct rtattr *attr) | |||
397 | return 0; | 352 | return 0; |
398 | } | 353 | } |
399 | 354 | ||
355 | static int get_reorder(struct Qdisc *sch, const struct rtattr *attr) | ||
356 | { | ||
357 | struct netem_sched_data *q = qdisc_priv(sch); | ||
358 | const struct tc_netem_reorder *r = RTA_DATA(attr); | ||
359 | |||
360 | if (RTA_PAYLOAD(attr) != sizeof(*r)) | ||
361 | return -EINVAL; | ||
362 | |||
363 | q->reorder = r->probability; | ||
364 | init_crandom(&q->reorder_cor, r->correlation); | ||
365 | return 0; | ||
366 | } | ||
367 | |||
400 | static int netem_change(struct Qdisc *sch, struct rtattr *opt) | 368 | static int netem_change(struct Qdisc *sch, struct rtattr *opt) |
401 | { | 369 | { |
402 | struct netem_sched_data *q = qdisc_priv(sch); | 370 | struct netem_sched_data *q = qdisc_priv(sch); |
@@ -417,9 +385,15 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) | |||
417 | q->jitter = qopt->jitter; | 385 | q->jitter = qopt->jitter; |
418 | q->limit = qopt->limit; | 386 | q->limit = qopt->limit; |
419 | q->gap = qopt->gap; | 387 | q->gap = qopt->gap; |
388 | q->counter = 0; | ||
420 | q->loss = qopt->loss; | 389 | q->loss = qopt->loss; |
421 | q->duplicate = qopt->duplicate; | 390 | q->duplicate = qopt->duplicate; |
422 | 391 | ||
392 | /* for compatiablity with earlier versions. | ||
393 | * if gap is set, need to assume 100% probablity | ||
394 | */ | ||
395 | q->reorder = ~0; | ||
396 | |||
423 | /* Handle nested options after initial queue options. | 397 | /* Handle nested options after initial queue options. |
424 | * Should have put all options in nested format but too late now. | 398 | * Should have put all options in nested format but too late now. |
425 | */ | 399 | */ |
@@ -441,6 +415,11 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) | |||
441 | if (ret) | 415 | if (ret) |
442 | return ret; | 416 | return ret; |
443 | } | 417 | } |
418 | if (tb[TCA_NETEM_REORDER-1]) { | ||
419 | ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]); | ||
420 | if (ret) | ||
421 | return ret; | ||
422 | } | ||
444 | } | 423 | } |
445 | 424 | ||
446 | 425 | ||
@@ -455,11 +434,9 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt) | |||
455 | if (!opt) | 434 | if (!opt) |
456 | return -EINVAL; | 435 | return -EINVAL; |
457 | 436 | ||
458 | skb_queue_head_init(&q->delayed); | ||
459 | init_timer(&q->timer); | 437 | init_timer(&q->timer); |
460 | q->timer.function = netem_watchdog; | 438 | q->timer.function = netem_watchdog; |
461 | q->timer.data = (unsigned long) sch; | 439 | q->timer.data = (unsigned long) sch; |
462 | q->counter = 0; | ||
463 | 440 | ||
464 | q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); | 441 | q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); |
465 | if (!q->qdisc) { | 442 | if (!q->qdisc) { |
@@ -491,6 +468,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
491 | struct rtattr *rta = (struct rtattr *) b; | 468 | struct rtattr *rta = (struct rtattr *) b; |
492 | struct tc_netem_qopt qopt; | 469 | struct tc_netem_qopt qopt; |
493 | struct tc_netem_corr cor; | 470 | struct tc_netem_corr cor; |
471 | struct tc_netem_reorder reorder; | ||
494 | 472 | ||
495 | qopt.latency = q->latency; | 473 | qopt.latency = q->latency; |
496 | qopt.jitter = q->jitter; | 474 | qopt.jitter = q->jitter; |
@@ -504,6 +482,11 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
504 | cor.loss_corr = q->loss_cor.rho; | 482 | cor.loss_corr = q->loss_cor.rho; |
505 | cor.dup_corr = q->dup_cor.rho; | 483 | cor.dup_corr = q->dup_cor.rho; |
506 | RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); | 484 | RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); |
485 | |||
486 | reorder.probability = q->reorder; | ||
487 | reorder.correlation = q->reorder_cor.rho; | ||
488 | RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); | ||
489 | |||
507 | rta->rta_len = skb->tail - b; | 490 | rta->rta_len = skb->tail - b; |
508 | 491 | ||
509 | return skb->len; | 492 | return skb->len; |