aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@osdl.org>2005-05-26 15:55:01 -0400
committerDavid S. Miller <davem@davemloft.net>2005-05-26 15:55:01 -0400
commit0f9f32ac65ee4a452a912a8440cebbc4dff73852 (patch)
tree86e8a90e0baad0fe22e2b354ef64562af61c2c87
parent0afb51e72855971dba83b3c6b70c547c2d1161fd (diff)
[PKT_SCHED] netem: use only inner qdisc -- no private skbuff queue
Netem works better if there if packets are just queued in the inner discipline rather than having a separate delayed queue. Change to use the dequeue/requeue to peek like TBF does. By doing this potential qlen problems with the old method are avoided. The problems happened when the netem_run that moved packets from the inner discipline to the nested discipline failed (because inner queue was full). This happened in dequeue, so the effective qlen of the netem would be decreased (because of the drop), but there was no way to keep the outer qdisc (caller of netem dequeue) in sync. The problem window is still there since this patch doesn't address the issue of requeue failing in netem_dequeue, but that shouldn't happen since the sequence dequeue/requeue should always work. Long term correct fix is to implement qdisc->peek in all the qdisc's to allow for this (needed by several other qdisc's as well). Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/sched/sch_netem.c124
1 files changed, 36 insertions, 88 deletions
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 5c0f0c209a4c..48360f7eec5d 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -53,7 +53,6 @@
53 53
54struct netem_sched_data { 54struct netem_sched_data {
55 struct Qdisc *qdisc; 55 struct Qdisc *qdisc;
56 struct sk_buff_head delayed;
57 struct timer_list timer; 56 struct timer_list timer;
58 57
59 u32 latency; 58 u32 latency;
@@ -137,72 +136,6 @@ static long tabledist(unsigned long mu, long sigma,
137 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; 136 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
138} 137}
139 138
140/* Put skb in the private delayed queue. */
141static int netem_delay(struct Qdisc *sch, struct sk_buff *skb)
142{
143 struct netem_sched_data *q = qdisc_priv(sch);
144 psched_tdiff_t td;
145 psched_time_t now;
146
147 PSCHED_GET_TIME(now);
148 td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist);
149
150 /* Always queue at tail to keep packets in order */
151 if (likely(q->delayed.qlen < q->limit)) {
152 struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
153
154 PSCHED_TADD2(now, td, cb->time_to_send);
155
156 pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb,
157 now, cb->time_to_send);
158
159 __skb_queue_tail(&q->delayed, skb);
160 return NET_XMIT_SUCCESS;
161 }
162
163 pr_debug("netem_delay: queue over limit %d\n", q->limit);
164 sch->qstats.overlimits++;
165 kfree_skb(skb);
166 return NET_XMIT_DROP;
167}
168
169/*
170 * Move a packet that is ready to send from the delay holding
171 * list to the underlying qdisc.
172 */
173static int netem_run(struct Qdisc *sch)
174{
175 struct netem_sched_data *q = qdisc_priv(sch);
176 struct sk_buff *skb;
177 psched_time_t now;
178
179 PSCHED_GET_TIME(now);
180
181 skb = skb_peek(&q->delayed);
182 if (skb) {
183 const struct netem_skb_cb *cb
184 = (const struct netem_skb_cb *)skb->cb;
185 long delay
186 = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
187 pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
188
189 /* if more time remaining? */
190 if (delay > 0) {
191 mod_timer(&q->timer, jiffies + delay);
192 return 1;
193 }
194
195 __skb_unlink(skb, &q->delayed);
196
197 if (q->qdisc->enqueue(skb, q->qdisc)) {
198 sch->q.qlen--;
199 sch->qstats.drops++;
200 }
201 }
202
203 return 0;
204}
205
206/* 139/*
207 * Insert one skb into qdisc. 140 * Insert one skb into qdisc.
208 * Note: parent depends on return value to account for queue length. 141 * Note: parent depends on return value to account for queue length.
@@ -212,6 +145,7 @@ static int netem_run(struct Qdisc *sch)
212static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) 145static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
213{ 146{
214 struct netem_sched_data *q = qdisc_priv(sch); 147 struct netem_sched_data *q = qdisc_priv(sch);
148 struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
215 struct sk_buff *skb2; 149 struct sk_buff *skb2;
216 int ret; 150 int ret;
217 int count = 1; 151 int count = 1;
@@ -246,18 +180,24 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
246 q->duplicate = dupsave; 180 q->duplicate = dupsave;
247 } 181 }
248 182
249 /* If doing simple delay then gap == 0 so all packets 183 /*
250 * go into the delayed holding queue 184 * Do re-ordering by putting one out of N packets at the front
251 * otherwise if doing out of order only "1 out of gap" 185 * of the queue.
252 * packets will be delayed. 186 * gap == 0 is special case for no-reordering.
253 */ 187 */
254 if (q->counter < q->gap) { 188 if (q->gap == 0 || q->counter != q->gap) {
189 psched_time_t now;
190 PSCHED_GET_TIME(now);
191 PSCHED_TADD2(now,
192 tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist),
193 cb->time_to_send);
194
255 ++q->counter; 195 ++q->counter;
256 ret = q->qdisc->enqueue(skb, q->qdisc); 196 ret = q->qdisc->enqueue(skb, q->qdisc);
257 } else { 197 } else {
258 q->counter = 0; 198 q->counter = 0;
259 ret = netem_delay(sch, skb); 199 PSCHED_GET_TIME(cb->time_to_send);
260 netem_run(sch); 200 ret = q->qdisc->ops->requeue(skb, q->qdisc);
261 } 201 }
262 202
263 if (likely(ret == NET_XMIT_SUCCESS)) { 203 if (likely(ret == NET_XMIT_SUCCESS)) {
@@ -301,22 +241,33 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
301{ 241{
302 struct netem_sched_data *q = qdisc_priv(sch); 242 struct netem_sched_data *q = qdisc_priv(sch);
303 struct sk_buff *skb; 243 struct sk_buff *skb;
304 int pending;
305
306 pending = netem_run(sch);
307 244
308 skb = q->qdisc->dequeue(q->qdisc); 245 skb = q->qdisc->dequeue(q->qdisc);
309 if (skb) { 246 if (skb) {
310 pr_debug("netem_dequeue: return skb=%p\n", skb); 247 const struct netem_skb_cb *cb
311 sch->q.qlen--; 248 = (const struct netem_skb_cb *)skb->cb;
312 sch->flags &= ~TCQ_F_THROTTLED; 249 psched_time_t now;
313 } 250 long delay;
314 else if (pending) { 251
315 pr_debug("netem_dequeue: throttling\n"); 252 /* if more time remaining? */
253 PSCHED_GET_TIME(now);
254 delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
255 pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
256 if (delay <= 0) {
257 pr_debug("netem_dequeue: return skb=%p\n", skb);
258 sch->q.qlen--;
259 sch->flags &= ~TCQ_F_THROTTLED;
260 return skb;
261 }
262
263 mod_timer(&q->timer, jiffies + delay);
316 sch->flags |= TCQ_F_THROTTLED; 264 sch->flags |= TCQ_F_THROTTLED;
317 }
318 265
319 return skb; 266 if (q->qdisc->ops->requeue(skb, q->qdisc) != 0)
267 sch->qstats.drops++;
268 }
269
270 return NULL;
320} 271}
321 272
322static void netem_watchdog(unsigned long arg) 273static void netem_watchdog(unsigned long arg)
@@ -333,8 +284,6 @@ static void netem_reset(struct Qdisc *sch)
333 struct netem_sched_data *q = qdisc_priv(sch); 284 struct netem_sched_data *q = qdisc_priv(sch);
334 285
335 qdisc_reset(q->qdisc); 286 qdisc_reset(q->qdisc);
336 skb_queue_purge(&q->delayed);
337
338 sch->q.qlen = 0; 287 sch->q.qlen = 0;
339 sch->flags &= ~TCQ_F_THROTTLED; 288 sch->flags &= ~TCQ_F_THROTTLED;
340 del_timer_sync(&q->timer); 289 del_timer_sync(&q->timer);
@@ -460,7 +409,6 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt)
460 if (!opt) 409 if (!opt)
461 return -EINVAL; 410 return -EINVAL;
462 411
463 skb_queue_head_init(&q->delayed);
464 init_timer(&q->timer); 412 init_timer(&q->timer);
465 q->timer.function = netem_watchdog; 413 q->timer.function = netem_watchdog;
466 q->timer.data = (unsigned long) sch; 414 q->timer.data = (unsigned long) sch;