diff options
-rw-r--r-- | include/linux/skbuff.h | 38 | ||||
-rw-r--r-- | include/net/inet_hashtables.h | 2 | ||||
-rw-r--r-- | net/core/stream.c | 12 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 32 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 15 | ||||
-rw-r--r-- | net/sched/sch_netem.c | 122 |
9 files changed, 155 insertions, 83 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4286d832166f..fdfb8fe8c38c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -603,23 +603,23 @@ static inline void skb_queue_head_init(struct sk_buff_head *list) | |||
603 | */ | 603 | */ |
604 | 604 | ||
605 | /** | 605 | /** |
606 | * __skb_queue_head - queue a buffer at the list head | 606 | * __skb_queue_after - queue a buffer at the list head |
607 | * @list: list to use | 607 | * @list: list to use |
608 | * @prev: place after this buffer | ||
608 | * @newsk: buffer to queue | 609 | * @newsk: buffer to queue |
609 | * | 610 | * |
610 | * Queue a buffer at the start of a list. This function takes no locks | 611 | * Queue a buffer int the middle of a list. This function takes no locks |
611 | * and you must therefore hold required locks before calling it. | 612 | * and you must therefore hold required locks before calling it. |
612 | * | 613 | * |
613 | * A buffer cannot be placed on two lists at the same time. | 614 | * A buffer cannot be placed on two lists at the same time. |
614 | */ | 615 | */ |
615 | extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); | 616 | static inline void __skb_queue_after(struct sk_buff_head *list, |
616 | static inline void __skb_queue_head(struct sk_buff_head *list, | 617 | struct sk_buff *prev, |
617 | struct sk_buff *newsk) | 618 | struct sk_buff *newsk) |
618 | { | 619 | { |
619 | struct sk_buff *prev, *next; | 620 | struct sk_buff *next; |
620 | |||
621 | list->qlen++; | 621 | list->qlen++; |
622 | prev = (struct sk_buff *)list; | 622 | |
623 | next = prev->next; | 623 | next = prev->next; |
624 | newsk->next = next; | 624 | newsk->next = next; |
625 | newsk->prev = prev; | 625 | newsk->prev = prev; |
@@ -627,6 +627,23 @@ static inline void __skb_queue_head(struct sk_buff_head *list, | |||
627 | } | 627 | } |
628 | 628 | ||
629 | /** | 629 | /** |
630 | * __skb_queue_head - queue a buffer at the list head | ||
631 | * @list: list to use | ||
632 | * @newsk: buffer to queue | ||
633 | * | ||
634 | * Queue a buffer at the start of a list. This function takes no locks | ||
635 | * and you must therefore hold required locks before calling it. | ||
636 | * | ||
637 | * A buffer cannot be placed on two lists at the same time. | ||
638 | */ | ||
639 | extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); | ||
640 | static inline void __skb_queue_head(struct sk_buff_head *list, | ||
641 | struct sk_buff *newsk) | ||
642 | { | ||
643 | __skb_queue_after(list, (struct sk_buff *)list, newsk); | ||
644 | } | ||
645 | |||
646 | /** | ||
630 | * __skb_queue_tail - queue a buffer at the list tail | 647 | * __skb_queue_tail - queue a buffer at the list tail |
631 | * @list: list to use | 648 | * @list: list to use |
632 | * @newsk: buffer to queue | 649 | * @newsk: buffer to queue |
@@ -1203,6 +1220,11 @@ static inline void kunmap_skb_frag(void *vaddr) | |||
1203 | prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ | 1220 | prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ |
1204 | skb = skb->next) | 1221 | skb = skb->next) |
1205 | 1222 | ||
1223 | #define skb_queue_reverse_walk(queue, skb) \ | ||
1224 | for (skb = (queue)->prev; \ | ||
1225 | prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \ | ||
1226 | skb = skb->prev) | ||
1227 | |||
1206 | 1228 | ||
1207 | extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, | 1229 | extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, |
1208 | int noblock, int *err); | 1230 | int noblock, int *err); |
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f50f95968340..07840baa9341 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h | |||
@@ -125,9 +125,7 @@ struct inet_hashinfo { | |||
125 | rwlock_t lhash_lock ____cacheline_aligned; | 125 | rwlock_t lhash_lock ____cacheline_aligned; |
126 | atomic_t lhash_users; | 126 | atomic_t lhash_users; |
127 | wait_queue_head_t lhash_wait; | 127 | wait_queue_head_t lhash_wait; |
128 | spinlock_t portalloc_lock; | ||
129 | kmem_cache_t *bind_bucket_cachep; | 128 | kmem_cache_t *bind_bucket_cachep; |
130 | int port_rover; | ||
131 | }; | 129 | }; |
132 | 130 | ||
133 | static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, | 131 | static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, |
diff --git a/net/core/stream.c b/net/core/stream.c index ac9edfdf8742..15bfd03e8024 100644 --- a/net/core/stream.c +++ b/net/core/stream.c | |||
@@ -52,8 +52,9 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) | |||
52 | { | 52 | { |
53 | struct task_struct *tsk = current; | 53 | struct task_struct *tsk = current; |
54 | DEFINE_WAIT(wait); | 54 | DEFINE_WAIT(wait); |
55 | int done; | ||
55 | 56 | ||
56 | while (1) { | 57 | do { |
57 | if (sk->sk_err) | 58 | if (sk->sk_err) |
58 | return sock_error(sk); | 59 | return sock_error(sk); |
59 | if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) | 60 | if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) |
@@ -65,13 +66,12 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) | |||
65 | 66 | ||
66 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 67 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); |
67 | sk->sk_write_pending++; | 68 | sk->sk_write_pending++; |
68 | if (sk_wait_event(sk, timeo_p, | 69 | done = sk_wait_event(sk, timeo_p, |
69 | !((1 << sk->sk_state) & | 70 | !((1 << sk->sk_state) & |
70 | ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))) | 71 | ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); |
71 | break; | ||
72 | finish_wait(sk->sk_sleep, &wait); | 72 | finish_wait(sk->sk_sleep, &wait); |
73 | sk->sk_write_pending--; | 73 | sk->sk_write_pending--; |
74 | } | 74 | } while (!done); |
75 | return 0; | 75 | return 0; |
76 | } | 76 | } |
77 | 77 | ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 6298cf58ff9e..4b9bc81ae1a3 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -31,8 +31,6 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { | |||
31 | .lhash_lock = RW_LOCK_UNLOCKED, | 31 | .lhash_lock = RW_LOCK_UNLOCKED, |
32 | .lhash_users = ATOMIC_INIT(0), | 32 | .lhash_users = ATOMIC_INIT(0), |
33 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), | 33 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), |
34 | .portalloc_lock = SPIN_LOCK_UNLOCKED, | ||
35 | .port_rover = 1024 - 1, | ||
36 | }; | 34 | }; |
37 | 35 | ||
38 | EXPORT_SYMBOL_GPL(dccp_hashinfo); | 36 | EXPORT_SYMBOL_GPL(dccp_hashinfo); |
@@ -125,36 +123,15 @@ static int dccp_v4_hash_connect(struct sock *sk) | |||
125 | int ret; | 123 | int ret; |
126 | 124 | ||
127 | if (snum == 0) { | 125 | if (snum == 0) { |
128 | int rover; | ||
129 | int low = sysctl_local_port_range[0]; | 126 | int low = sysctl_local_port_range[0]; |
130 | int high = sysctl_local_port_range[1]; | 127 | int high = sysctl_local_port_range[1]; |
131 | int remaining = (high - low) + 1; | 128 | int remaining = (high - low) + 1; |
129 | int rover = net_random() % (high - low) + low; | ||
132 | struct hlist_node *node; | 130 | struct hlist_node *node; |
133 | struct inet_timewait_sock *tw = NULL; | 131 | struct inet_timewait_sock *tw = NULL; |
134 | 132 | ||
135 | local_bh_disable(); | 133 | local_bh_disable(); |
136 | |||
137 | /* TODO. Actually it is not so bad idea to remove | ||
138 | * dccp_hashinfo.portalloc_lock before next submission to | ||
139 | * Linus. | ||
140 | * As soon as we touch this place at all it is time to think. | ||
141 | * | ||
142 | * Now it protects single _advisory_ variable | ||
143 | * dccp_hashinfo.port_rover, hence it is mostly useless. | ||
144 | * Code will work nicely if we just delete it, but | ||
145 | * I am afraid in contented case it will work not better or | ||
146 | * even worse: another cpu just will hit the same bucket | ||
147 | * and spin there. | ||
148 | * So some cpu salt could remove both contention and | ||
149 | * memory pingpong. Any ideas how to do this in a nice way? | ||
150 | */ | ||
151 | spin_lock(&dccp_hashinfo.portalloc_lock); | ||
152 | rover = dccp_hashinfo.port_rover; | ||
153 | |||
154 | do { | 134 | do { |
155 | rover++; | ||
156 | if ((rover < low) || (rover > high)) | ||
157 | rover = low; | ||
158 | head = &dccp_hashinfo.bhash[inet_bhashfn(rover, | 135 | head = &dccp_hashinfo.bhash[inet_bhashfn(rover, |
159 | dccp_hashinfo.bhash_size)]; | 136 | dccp_hashinfo.bhash_size)]; |
160 | spin_lock(&head->lock); | 137 | spin_lock(&head->lock); |
@@ -187,9 +164,9 @@ static int dccp_v4_hash_connect(struct sock *sk) | |||
187 | 164 | ||
188 | next_port: | 165 | next_port: |
189 | spin_unlock(&head->lock); | 166 | spin_unlock(&head->lock); |
167 | if (++rover > high) | ||
168 | rover = low; | ||
190 | } while (--remaining > 0); | 169 | } while (--remaining > 0); |
191 | dccp_hashinfo.port_rover = rover; | ||
192 | spin_unlock(&dccp_hashinfo.portalloc_lock); | ||
193 | 170 | ||
194 | local_bh_enable(); | 171 | local_bh_enable(); |
195 | 172 | ||
@@ -197,9 +174,6 @@ static int dccp_v4_hash_connect(struct sock *sk) | |||
197 | 174 | ||
198 | ok: | 175 | ok: |
199 | /* All locks still held and bhs disabled */ | 176 | /* All locks still held and bhs disabled */ |
200 | dccp_hashinfo.port_rover = rover; | ||
201 | spin_unlock(&dccp_hashinfo.portalloc_lock); | ||
202 | |||
203 | inet_bind_hash(sk, tb, rover); | 177 | inet_bind_hash(sk, tb, rover); |
204 | if (sk_unhashed(sk)) { | 178 | if (sk_unhashed(sk)) { |
205 | inet_sk(sk)->sport = htons(rover); | 179 | inet_sk(sk)->sport = htons(rover); |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 94468a76c5b4..3fe021f1a566 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -78,17 +78,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, | |||
78 | int low = sysctl_local_port_range[0]; | 78 | int low = sysctl_local_port_range[0]; |
79 | int high = sysctl_local_port_range[1]; | 79 | int high = sysctl_local_port_range[1]; |
80 | int remaining = (high - low) + 1; | 80 | int remaining = (high - low) + 1; |
81 | int rover; | 81 | int rover = net_random() % (high - low) + low; |
82 | 82 | ||
83 | spin_lock(&hashinfo->portalloc_lock); | ||
84 | if (hashinfo->port_rover < low) | ||
85 | rover = low; | ||
86 | else | ||
87 | rover = hashinfo->port_rover; | ||
88 | do { | 83 | do { |
89 | rover++; | ||
90 | if (rover > high) | ||
91 | rover = low; | ||
92 | head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; | 84 | head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; |
93 | spin_lock(&head->lock); | 85 | spin_lock(&head->lock); |
94 | inet_bind_bucket_for_each(tb, node, &head->chain) | 86 | inet_bind_bucket_for_each(tb, node, &head->chain) |
@@ -97,9 +89,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, | |||
97 | break; | 89 | break; |
98 | next: | 90 | next: |
99 | spin_unlock(&head->lock); | 91 | spin_unlock(&head->lock); |
92 | if (++rover > high) | ||
93 | rover = low; | ||
100 | } while (--remaining > 0); | 94 | } while (--remaining > 0); |
101 | hashinfo->port_rover = rover; | ||
102 | spin_unlock(&hashinfo->portalloc_lock); | ||
103 | 95 | ||
104 | /* Exhausted local port range during search? It is not | 96 | /* Exhausted local port range during search? It is not |
105 | * possible for us to be holding one of the bind hash | 97 | * possible for us to be holding one of the bind hash |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f3f0013a9580..72b7c22e1ea5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -2112,7 +2112,6 @@ void __init tcp_init(void) | |||
2112 | sysctl_tcp_max_orphans >>= (3 - order); | 2112 | sysctl_tcp_max_orphans >>= (3 - order); |
2113 | sysctl_max_syn_backlog = 128; | 2113 | sysctl_max_syn_backlog = 128; |
2114 | } | 2114 | } |
2115 | tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1; | ||
2116 | 2115 | ||
2117 | sysctl_tcp_mem[0] = 768 << order; | 2116 | sysctl_tcp_mem[0] = 768 << order; |
2118 | sysctl_tcp_mem[1] = 1024 << order; | 2117 | sysctl_tcp_mem[1] = 1024 << order; |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c85819d8474b..49d67cd75edd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -93,8 +93,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { | |||
93 | .lhash_lock = RW_LOCK_UNLOCKED, | 93 | .lhash_lock = RW_LOCK_UNLOCKED, |
94 | .lhash_users = ATOMIC_INIT(0), | 94 | .lhash_users = ATOMIC_INIT(0), |
95 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), | 95 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), |
96 | .portalloc_lock = SPIN_LOCK_UNLOCKED, | ||
97 | .port_rover = 1024 - 1, | ||
98 | }; | 96 | }; |
99 | 97 | ||
100 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) | 98 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d693cb988b78..d746d3b27efb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -114,16 +114,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) | |||
114 | int low = sysctl_local_port_range[0]; | 114 | int low = sysctl_local_port_range[0]; |
115 | int high = sysctl_local_port_range[1]; | 115 | int high = sysctl_local_port_range[1]; |
116 | int remaining = (high - low) + 1; | 116 | int remaining = (high - low) + 1; |
117 | int rover; | 117 | int rover = net_random() % (high - low) + low; |
118 | 118 | ||
119 | spin_lock(&tcp_hashinfo.portalloc_lock); | 119 | do { |
120 | if (tcp_hashinfo.port_rover < low) | ||
121 | rover = low; | ||
122 | else | ||
123 | rover = tcp_hashinfo.port_rover; | ||
124 | do { rover++; | ||
125 | if (rover > high) | ||
126 | rover = low; | ||
127 | head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; | 120 | head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; |
128 | spin_lock(&head->lock); | 121 | spin_lock(&head->lock); |
129 | inet_bind_bucket_for_each(tb, node, &head->chain) | 122 | inet_bind_bucket_for_each(tb, node, &head->chain) |
@@ -132,9 +125,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) | |||
132 | break; | 125 | break; |
133 | next: | 126 | next: |
134 | spin_unlock(&head->lock); | 127 | spin_unlock(&head->lock); |
128 | if (++rover > high) | ||
129 | rover = low; | ||
135 | } while (--remaining > 0); | 130 | } while (--remaining > 0); |
136 | tcp_hashinfo.port_rover = rover; | ||
137 | spin_unlock(&tcp_hashinfo.portalloc_lock); | ||
138 | 131 | ||
139 | /* Exhausted local port range during search? It is not | 132 | /* Exhausted local port range during search? It is not |
140 | * possible for us to be holding one of the bind hash | 133 | * possible for us to be holding one of the bind hash |
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index bb9bf8d5003c..cdc8d283791c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c | |||
@@ -25,6 +25,8 @@ | |||
25 | 25 | ||
26 | #include <net/pkt_sched.h> | 26 | #include <net/pkt_sched.h> |
27 | 27 | ||
28 | #define VERSION "1.1" | ||
29 | |||
28 | /* Network Emulation Queuing algorithm. | 30 | /* Network Emulation Queuing algorithm. |
29 | ==================================== | 31 | ==================================== |
30 | 32 | ||
@@ -185,10 +187,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
185 | || q->counter < q->gap /* inside last reordering gap */ | 187 | || q->counter < q->gap /* inside last reordering gap */ |
186 | || q->reorder < get_crandom(&q->reorder_cor)) { | 188 | || q->reorder < get_crandom(&q->reorder_cor)) { |
187 | psched_time_t now; | 189 | psched_time_t now; |
190 | psched_tdiff_t delay; | ||
191 | |||
192 | delay = tabledist(q->latency, q->jitter, | ||
193 | &q->delay_cor, q->delay_dist); | ||
194 | |||
188 | PSCHED_GET_TIME(now); | 195 | PSCHED_GET_TIME(now); |
189 | PSCHED_TADD2(now, tabledist(q->latency, q->jitter, | 196 | PSCHED_TADD2(now, delay, cb->time_to_send); |
190 | &q->delay_cor, q->delay_dist), | ||
191 | cb->time_to_send); | ||
192 | ++q->counter; | 197 | ++q->counter; |
193 | ret = q->qdisc->enqueue(skb, q->qdisc); | 198 | ret = q->qdisc->enqueue(skb, q->qdisc); |
194 | } else { | 199 | } else { |
@@ -248,24 +253,31 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) | |||
248 | const struct netem_skb_cb *cb | 253 | const struct netem_skb_cb *cb |
249 | = (const struct netem_skb_cb *)skb->cb; | 254 | = (const struct netem_skb_cb *)skb->cb; |
250 | psched_time_t now; | 255 | psched_time_t now; |
251 | long delay; | ||
252 | 256 | ||
253 | /* if more time remaining? */ | 257 | /* if more time remaining? */ |
254 | PSCHED_GET_TIME(now); | 258 | PSCHED_GET_TIME(now); |
255 | delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); | 259 | |
256 | pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); | 260 | if (PSCHED_TLESS(cb->time_to_send, now)) { |
257 | if (delay <= 0) { | ||
258 | pr_debug("netem_dequeue: return skb=%p\n", skb); | 261 | pr_debug("netem_dequeue: return skb=%p\n", skb); |
259 | sch->q.qlen--; | 262 | sch->q.qlen--; |
260 | sch->flags &= ~TCQ_F_THROTTLED; | 263 | sch->flags &= ~TCQ_F_THROTTLED; |
261 | return skb; | 264 | return skb; |
262 | } | 265 | } else { |
266 | psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now); | ||
267 | |||
268 | if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { | ||
269 | sch->qstats.drops++; | ||
263 | 270 | ||
264 | mod_timer(&q->timer, jiffies + delay); | 271 | /* After this qlen is confused */ |
265 | sch->flags |= TCQ_F_THROTTLED; | 272 | printk(KERN_ERR "netem: queue discpline %s could not requeue\n", |
273 | q->qdisc->ops->id); | ||
266 | 274 | ||
267 | if (q->qdisc->ops->requeue(skb, q->qdisc) != 0) | 275 | sch->q.qlen--; |
268 | sch->qstats.drops++; | 276 | } |
277 | |||
278 | mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay)); | ||
279 | sch->flags |= TCQ_F_THROTTLED; | ||
280 | } | ||
269 | } | 281 | } |
270 | 282 | ||
271 | return NULL; | 283 | return NULL; |
@@ -290,11 +302,16 @@ static void netem_reset(struct Qdisc *sch) | |||
290 | del_timer_sync(&q->timer); | 302 | del_timer_sync(&q->timer); |
291 | } | 303 | } |
292 | 304 | ||
305 | /* Pass size change message down to embedded FIFO */ | ||
293 | static int set_fifo_limit(struct Qdisc *q, int limit) | 306 | static int set_fifo_limit(struct Qdisc *q, int limit) |
294 | { | 307 | { |
295 | struct rtattr *rta; | 308 | struct rtattr *rta; |
296 | int ret = -ENOMEM; | 309 | int ret = -ENOMEM; |
297 | 310 | ||
311 | /* Hack to avoid sending change message to non-FIFO */ | ||
312 | if (strncmp(q->ops->id + 1, "fifo", 4) != 0) | ||
313 | return 0; | ||
314 | |||
298 | rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); | 315 | rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); |
299 | if (rta) { | 316 | if (rta) { |
300 | rta->rta_type = RTM_NEWQDISC; | 317 | rta->rta_type = RTM_NEWQDISC; |
@@ -426,6 +443,84 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) | |||
426 | return 0; | 443 | return 0; |
427 | } | 444 | } |
428 | 445 | ||
446 | /* | ||
447 | * Special case version of FIFO queue for use by netem. | ||
448 | * It queues in order based on timestamps in skb's | ||
449 | */ | ||
450 | struct fifo_sched_data { | ||
451 | u32 limit; | ||
452 | }; | ||
453 | |||
454 | static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) | ||
455 | { | ||
456 | struct fifo_sched_data *q = qdisc_priv(sch); | ||
457 | struct sk_buff_head *list = &sch->q; | ||
458 | const struct netem_skb_cb *ncb | ||
459 | = (const struct netem_skb_cb *)nskb->cb; | ||
460 | struct sk_buff *skb; | ||
461 | |||
462 | if (likely(skb_queue_len(list) < q->limit)) { | ||
463 | skb_queue_reverse_walk(list, skb) { | ||
464 | const struct netem_skb_cb *cb | ||
465 | = (const struct netem_skb_cb *)skb->cb; | ||
466 | |||
467 | if (PSCHED_TLESS(cb->time_to_send, ncb->time_to_send)) | ||
468 | break; | ||
469 | } | ||
470 | |||
471 | __skb_queue_after(list, skb, nskb); | ||
472 | |||
473 | sch->qstats.backlog += nskb->len; | ||
474 | sch->bstats.bytes += nskb->len; | ||
475 | sch->bstats.packets++; | ||
476 | |||
477 | return NET_XMIT_SUCCESS; | ||
478 | } | ||
479 | |||
480 | return qdisc_drop(nskb, sch); | ||
481 | } | ||
482 | |||
483 | static int tfifo_init(struct Qdisc *sch, struct rtattr *opt) | ||
484 | { | ||
485 | struct fifo_sched_data *q = qdisc_priv(sch); | ||
486 | |||
487 | if (opt) { | ||
488 | struct tc_fifo_qopt *ctl = RTA_DATA(opt); | ||
489 | if (RTA_PAYLOAD(opt) < sizeof(*ctl)) | ||
490 | return -EINVAL; | ||
491 | |||
492 | q->limit = ctl->limit; | ||
493 | } else | ||
494 | q->limit = max_t(u32, sch->dev->tx_queue_len, 1); | ||
495 | |||
496 | return 0; | ||
497 | } | ||
498 | |||
499 | static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb) | ||
500 | { | ||
501 | struct fifo_sched_data *q = qdisc_priv(sch); | ||
502 | struct tc_fifo_qopt opt = { .limit = q->limit }; | ||
503 | |||
504 | RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | ||
505 | return skb->len; | ||
506 | |||
507 | rtattr_failure: | ||
508 | return -1; | ||
509 | } | ||
510 | |||
511 | static struct Qdisc_ops tfifo_qdisc_ops = { | ||
512 | .id = "tfifo", | ||
513 | .priv_size = sizeof(struct fifo_sched_data), | ||
514 | .enqueue = tfifo_enqueue, | ||
515 | .dequeue = qdisc_dequeue_head, | ||
516 | .requeue = qdisc_requeue, | ||
517 | .drop = qdisc_queue_drop, | ||
518 | .init = tfifo_init, | ||
519 | .reset = qdisc_reset_queue, | ||
520 | .change = tfifo_init, | ||
521 | .dump = tfifo_dump, | ||
522 | }; | ||
523 | |||
429 | static int netem_init(struct Qdisc *sch, struct rtattr *opt) | 524 | static int netem_init(struct Qdisc *sch, struct rtattr *opt) |
430 | { | 525 | { |
431 | struct netem_sched_data *q = qdisc_priv(sch); | 526 | struct netem_sched_data *q = qdisc_priv(sch); |
@@ -438,7 +533,7 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt) | |||
438 | q->timer.function = netem_watchdog; | 533 | q->timer.function = netem_watchdog; |
439 | q->timer.data = (unsigned long) sch; | 534 | q->timer.data = (unsigned long) sch; |
440 | 535 | ||
441 | q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); | 536 | q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops); |
442 | if (!q->qdisc) { | 537 | if (!q->qdisc) { |
443 | pr_debug("netem: qdisc create failed\n"); | 538 | pr_debug("netem: qdisc create failed\n"); |
444 | return -ENOMEM; | 539 | return -ENOMEM; |
@@ -601,6 +696,7 @@ static struct Qdisc_ops netem_qdisc_ops = { | |||
601 | 696 | ||
602 | static int __init netem_module_init(void) | 697 | static int __init netem_module_init(void) |
603 | { | 698 | { |
699 | pr_info("netem: version " VERSION "\n"); | ||
604 | return register_qdisc(&netem_qdisc_ops); | 700 | return register_qdisc(&netem_qdisc_ops); |
605 | } | 701 | } |
606 | static void __exit netem_module_exit(void) | 702 | static void __exit netem_module_exit(void) |