diff options
author | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-30 19:16:45 -0400 |
---|---|---|
committer | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-30 19:16:45 -0400 |
commit | ada47b5fe13d89735805b566185f4885f5a3f750 (patch) | |
tree | 644b88f8a71896307d71438e9b3af49126ffb22b /drivers/net/virtio_net.c | |
parent | 43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff) | |
parent | 3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff) |
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 493 |
1 files changed, 278 insertions, 215 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b9e002fccbca..b0577dd1a42d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/virtio_net.h> | 25 | #include <linux/virtio_net.h> |
26 | #include <linux/scatterlist.h> | 26 | #include <linux/scatterlist.h> |
27 | #include <linux/if_vlan.h> | 27 | #include <linux/if_vlan.h> |
28 | #include <linux/slab.h> | ||
28 | 29 | ||
29 | static int napi_weight = 128; | 30 | static int napi_weight = 128; |
30 | module_param(napi_weight, int, 0444); | 31 | module_param(napi_weight, int, 0444); |
@@ -56,10 +57,6 @@ struct virtnet_info | |||
56 | /* Host will merge rx buffers for big packets (shake it! shake it!) */ | 57 | /* Host will merge rx buffers for big packets (shake it! shake it!) */ |
57 | bool mergeable_rx_bufs; | 58 | bool mergeable_rx_bufs; |
58 | 59 | ||
59 | /* Receive & send queues. */ | ||
60 | struct sk_buff_head recv; | ||
61 | struct sk_buff_head send; | ||
62 | |||
63 | /* Work struct for refilling if we run low on memory. */ | 60 | /* Work struct for refilling if we run low on memory. */ |
64 | struct delayed_work refill; | 61 | struct delayed_work refill; |
65 | 62 | ||
@@ -75,34 +72,44 @@ struct skb_vnet_hdr { | |||
75 | unsigned int num_sg; | 72 | unsigned int num_sg; |
76 | }; | 73 | }; |
77 | 74 | ||
75 | struct padded_vnet_hdr { | ||
76 | struct virtio_net_hdr hdr; | ||
77 | /* | ||
78 | * virtio_net_hdr should be in a separated sg buffer because of a | ||
79 | * QEMU bug, and data sg buffer shares same page with this header sg. | ||
80 | * This padding makes next sg 16 byte aligned after virtio_net_hdr. | ||
81 | */ | ||
82 | char padding[6]; | ||
83 | }; | ||
84 | |||
78 | static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) | 85 | static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) |
79 | { | 86 | { |
80 | return (struct skb_vnet_hdr *)skb->cb; | 87 | return (struct skb_vnet_hdr *)skb->cb; |
81 | } | 88 | } |
82 | 89 | ||
83 | static void give_a_page(struct virtnet_info *vi, struct page *page) | 90 | /* |
84 | { | 91 | * private is used to chain pages for big packets, put the whole |
85 | page->private = (unsigned long)vi->pages; | 92 | * most recent used list in the beginning for reuse |
86 | vi->pages = page; | 93 | */ |
87 | } | 94 | static void give_pages(struct virtnet_info *vi, struct page *page) |
88 | |||
89 | static void trim_pages(struct virtnet_info *vi, struct sk_buff *skb) | ||
90 | { | 95 | { |
91 | unsigned int i; | 96 | struct page *end; |
92 | 97 | ||
93 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | 98 | /* Find end of list, sew whole thing into vi->pages. */ |
94 | give_a_page(vi, skb_shinfo(skb)->frags[i].page); | 99 | for (end = page; end->private; end = (struct page *)end->private); |
95 | skb_shinfo(skb)->nr_frags = 0; | 100 | end->private = (unsigned long)vi->pages; |
96 | skb->data_len = 0; | 101 | vi->pages = page; |
97 | } | 102 | } |
98 | 103 | ||
99 | static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask) | 104 | static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask) |
100 | { | 105 | { |
101 | struct page *p = vi->pages; | 106 | struct page *p = vi->pages; |
102 | 107 | ||
103 | if (p) | 108 | if (p) { |
104 | vi->pages = (struct page *)p->private; | 109 | vi->pages = (struct page *)p->private; |
105 | else | 110 | /* clear private here, it is used to chain pages */ |
111 | p->private = 0; | ||
112 | } else | ||
106 | p = alloc_page(gfp_mask); | 113 | p = alloc_page(gfp_mask); |
107 | return p; | 114 | return p; |
108 | } | 115 | } |
@@ -118,99 +125,142 @@ static void skb_xmit_done(struct virtqueue *svq) | |||
118 | netif_wake_queue(vi->dev); | 125 | netif_wake_queue(vi->dev); |
119 | } | 126 | } |
120 | 127 | ||
121 | static void receive_skb(struct net_device *dev, struct sk_buff *skb, | 128 | static void set_skb_frag(struct sk_buff *skb, struct page *page, |
122 | unsigned len) | 129 | unsigned int offset, unsigned int *len) |
123 | { | 130 | { |
124 | struct virtnet_info *vi = netdev_priv(dev); | 131 | int i = skb_shinfo(skb)->nr_frags; |
125 | struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); | 132 | skb_frag_t *f; |
126 | int err; | 133 | |
127 | int i; | 134 | f = &skb_shinfo(skb)->frags[i]; |
128 | 135 | f->size = min((unsigned)PAGE_SIZE - offset, *len); | |
129 | if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { | 136 | f->page_offset = offset; |
130 | pr_debug("%s: short packet %i\n", dev->name, len); | 137 | f->page = page; |
131 | dev->stats.rx_length_errors++; | 138 | |
132 | goto drop; | 139 | skb->data_len += f->size; |
133 | } | 140 | skb->len += f->size; |
141 | skb_shinfo(skb)->nr_frags++; | ||
142 | *len -= f->size; | ||
143 | } | ||
134 | 144 | ||
135 | if (vi->mergeable_rx_bufs) { | 145 | static struct sk_buff *page_to_skb(struct virtnet_info *vi, |
136 | unsigned int copy; | 146 | struct page *page, unsigned int len) |
137 | char *p = page_address(skb_shinfo(skb)->frags[0].page); | 147 | { |
148 | struct sk_buff *skb; | ||
149 | struct skb_vnet_hdr *hdr; | ||
150 | unsigned int copy, hdr_len, offset; | ||
151 | char *p; | ||
138 | 152 | ||
139 | if (len > PAGE_SIZE) | 153 | p = page_address(page); |
140 | len = PAGE_SIZE; | ||
141 | len -= sizeof(struct virtio_net_hdr_mrg_rxbuf); | ||
142 | 154 | ||
143 | memcpy(&hdr->mhdr, p, sizeof(hdr->mhdr)); | 155 | /* copy small packet so we can reuse these pages for small data */ |
144 | p += sizeof(hdr->mhdr); | 156 | skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN); |
157 | if (unlikely(!skb)) | ||
158 | return NULL; | ||
145 | 159 | ||
146 | copy = len; | 160 | hdr = skb_vnet_hdr(skb); |
147 | if (copy > skb_tailroom(skb)) | ||
148 | copy = skb_tailroom(skb); | ||
149 | 161 | ||
150 | memcpy(skb_put(skb, copy), p, copy); | 162 | if (vi->mergeable_rx_bufs) { |
163 | hdr_len = sizeof hdr->mhdr; | ||
164 | offset = hdr_len; | ||
165 | } else { | ||
166 | hdr_len = sizeof hdr->hdr; | ||
167 | offset = sizeof(struct padded_vnet_hdr); | ||
168 | } | ||
151 | 169 | ||
152 | len -= copy; | 170 | memcpy(hdr, p, hdr_len); |
153 | 171 | ||
154 | if (!len) { | 172 | len -= hdr_len; |
155 | give_a_page(vi, skb_shinfo(skb)->frags[0].page); | 173 | p += offset; |
156 | skb_shinfo(skb)->nr_frags--; | ||
157 | } else { | ||
158 | skb_shinfo(skb)->frags[0].page_offset += | ||
159 | sizeof(hdr->mhdr) + copy; | ||
160 | skb_shinfo(skb)->frags[0].size = len; | ||
161 | skb->data_len += len; | ||
162 | skb->len += len; | ||
163 | } | ||
164 | 174 | ||
165 | while (--hdr->mhdr.num_buffers) { | 175 | copy = len; |
166 | struct sk_buff *nskb; | 176 | if (copy > skb_tailroom(skb)) |
177 | copy = skb_tailroom(skb); | ||
178 | memcpy(skb_put(skb, copy), p, copy); | ||
167 | 179 | ||
168 | i = skb_shinfo(skb)->nr_frags; | 180 | len -= copy; |
169 | if (i >= MAX_SKB_FRAGS) { | 181 | offset += copy; |
170 | pr_debug("%s: packet too long %d\n", dev->name, | ||
171 | len); | ||
172 | dev->stats.rx_length_errors++; | ||
173 | goto drop; | ||
174 | } | ||
175 | 182 | ||
176 | nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len); | 183 | while (len) { |
177 | if (!nskb) { | 184 | set_skb_frag(skb, page, offset, &len); |
178 | pr_debug("%s: rx error: %d buffers missing\n", | 185 | page = (struct page *)page->private; |
179 | dev->name, hdr->mhdr.num_buffers); | 186 | offset = 0; |
180 | dev->stats.rx_length_errors++; | 187 | } |
181 | goto drop; | ||
182 | } | ||
183 | 188 | ||
184 | __skb_unlink(nskb, &vi->recv); | 189 | if (page) |
185 | vi->num--; | 190 | give_pages(vi, page); |
186 | 191 | ||
187 | skb_shinfo(skb)->frags[i] = skb_shinfo(nskb)->frags[0]; | 192 | return skb; |
188 | skb_shinfo(nskb)->nr_frags = 0; | 193 | } |
189 | kfree_skb(nskb); | ||
190 | 194 | ||
191 | if (len > PAGE_SIZE) | 195 | static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb) |
192 | len = PAGE_SIZE; | 196 | { |
197 | struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); | ||
198 | struct page *page; | ||
199 | int num_buf, i, len; | ||
200 | |||
201 | num_buf = hdr->mhdr.num_buffers; | ||
202 | while (--num_buf) { | ||
203 | i = skb_shinfo(skb)->nr_frags; | ||
204 | if (i >= MAX_SKB_FRAGS) { | ||
205 | pr_debug("%s: packet too long\n", skb->dev->name); | ||
206 | skb->dev->stats.rx_length_errors++; | ||
207 | return -EINVAL; | ||
208 | } | ||
193 | 209 | ||
194 | skb_shinfo(skb)->frags[i].size = len; | 210 | page = vi->rvq->vq_ops->get_buf(vi->rvq, &len); |
195 | skb_shinfo(skb)->nr_frags++; | 211 | if (!page) { |
196 | skb->data_len += len; | 212 | pr_debug("%s: rx error: %d buffers missing\n", |
197 | skb->len += len; | 213 | skb->dev->name, hdr->mhdr.num_buffers); |
214 | skb->dev->stats.rx_length_errors++; | ||
215 | return -EINVAL; | ||
198 | } | 216 | } |
199 | } else { | 217 | if (len > PAGE_SIZE) |
200 | len -= sizeof(hdr->hdr); | 218 | len = PAGE_SIZE; |
219 | |||
220 | set_skb_frag(skb, page, 0, &len); | ||
221 | |||
222 | --vi->num; | ||
223 | } | ||
224 | return 0; | ||
225 | } | ||
201 | 226 | ||
202 | if (len <= MAX_PACKET_LEN) | 227 | static void receive_buf(struct net_device *dev, void *buf, unsigned int len) |
203 | trim_pages(vi, skb); | 228 | { |
229 | struct virtnet_info *vi = netdev_priv(dev); | ||
230 | struct sk_buff *skb; | ||
231 | struct page *page; | ||
232 | struct skb_vnet_hdr *hdr; | ||
204 | 233 | ||
205 | err = pskb_trim(skb, len); | 234 | if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { |
206 | if (err) { | 235 | pr_debug("%s: short packet %i\n", dev->name, len); |
207 | pr_debug("%s: pskb_trim failed %i %d\n", dev->name, | 236 | dev->stats.rx_length_errors++; |
208 | len, err); | 237 | if (vi->mergeable_rx_bufs || vi->big_packets) |
238 | give_pages(vi, buf); | ||
239 | else | ||
240 | dev_kfree_skb(buf); | ||
241 | return; | ||
242 | } | ||
243 | |||
244 | if (!vi->mergeable_rx_bufs && !vi->big_packets) { | ||
245 | skb = buf; | ||
246 | len -= sizeof(struct virtio_net_hdr); | ||
247 | skb_trim(skb, len); | ||
248 | } else { | ||
249 | page = buf; | ||
250 | skb = page_to_skb(vi, page, len); | ||
251 | if (unlikely(!skb)) { | ||
209 | dev->stats.rx_dropped++; | 252 | dev->stats.rx_dropped++; |
210 | goto drop; | 253 | give_pages(vi, page); |
254 | return; | ||
211 | } | 255 | } |
256 | if (vi->mergeable_rx_bufs) | ||
257 | if (receive_mergeable(vi, skb)) { | ||
258 | dev_kfree_skb(skb); | ||
259 | return; | ||
260 | } | ||
212 | } | 261 | } |
213 | 262 | ||
263 | hdr = skb_vnet_hdr(skb); | ||
214 | skb->truesize += skb->data_len; | 264 | skb->truesize += skb->data_len; |
215 | dev->stats.rx_bytes += skb->len; | 265 | dev->stats.rx_bytes += skb->len; |
216 | dev->stats.rx_packets++; | 266 | dev->stats.rx_packets++; |
@@ -267,113 +317,121 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, | |||
267 | 317 | ||
268 | frame_err: | 318 | frame_err: |
269 | dev->stats.rx_frame_errors++; | 319 | dev->stats.rx_frame_errors++; |
270 | drop: | ||
271 | dev_kfree_skb(skb); | 320 | dev_kfree_skb(skb); |
272 | } | 321 | } |
273 | 322 | ||
274 | static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp) | 323 | static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp) |
275 | { | 324 | { |
276 | struct sk_buff *skb; | 325 | struct sk_buff *skb; |
277 | struct scatterlist sg[2+MAX_SKB_FRAGS]; | 326 | struct skb_vnet_hdr *hdr; |
278 | int num, err, i; | 327 | struct scatterlist sg[2]; |
279 | bool oom = false; | 328 | int err; |
280 | |||
281 | sg_init_table(sg, 2+MAX_SKB_FRAGS); | ||
282 | do { | ||
283 | struct skb_vnet_hdr *hdr; | ||
284 | 329 | ||
285 | skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN + NET_IP_ALIGN); | 330 | sg_init_table(sg, 2); |
286 | if (unlikely(!skb)) { | 331 | skb = netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN); |
287 | oom = true; | 332 | if (unlikely(!skb)) |
288 | break; | 333 | return -ENOMEM; |
289 | } | ||
290 | 334 | ||
291 | skb_reserve(skb, NET_IP_ALIGN); | 335 | skb_put(skb, MAX_PACKET_LEN); |
292 | skb_put(skb, MAX_PACKET_LEN); | ||
293 | 336 | ||
294 | hdr = skb_vnet_hdr(skb); | 337 | hdr = skb_vnet_hdr(skb); |
295 | sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr)); | 338 | sg_set_buf(sg, &hdr->hdr, sizeof hdr->hdr); |
296 | 339 | ||
297 | if (vi->big_packets) { | 340 | skb_to_sgvec(skb, sg + 1, 0, skb->len); |
298 | for (i = 0; i < MAX_SKB_FRAGS; i++) { | ||
299 | skb_frag_t *f = &skb_shinfo(skb)->frags[i]; | ||
300 | f->page = get_a_page(vi, gfp); | ||
301 | if (!f->page) | ||
302 | break; | ||
303 | 341 | ||
304 | f->page_offset = 0; | 342 | err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 2, skb); |
305 | f->size = PAGE_SIZE; | 343 | if (err < 0) |
344 | dev_kfree_skb(skb); | ||
306 | 345 | ||
307 | skb->data_len += PAGE_SIZE; | 346 | return err; |
308 | skb->len += PAGE_SIZE; | 347 | } |
309 | 348 | ||
310 | skb_shinfo(skb)->nr_frags++; | 349 | static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp) |
311 | } | 350 | { |
351 | struct scatterlist sg[MAX_SKB_FRAGS + 2]; | ||
352 | struct page *first, *list = NULL; | ||
353 | char *p; | ||
354 | int i, err, offset; | ||
355 | |||
356 | sg_init_table(sg, MAX_SKB_FRAGS + 2); | ||
357 | /* page in sg[MAX_SKB_FRAGS + 1] is list tail */ | ||
358 | for (i = MAX_SKB_FRAGS + 1; i > 1; --i) { | ||
359 | first = get_a_page(vi, gfp); | ||
360 | if (!first) { | ||
361 | if (list) | ||
362 | give_pages(vi, list); | ||
363 | return -ENOMEM; | ||
312 | } | 364 | } |
365 | sg_set_buf(&sg[i], page_address(first), PAGE_SIZE); | ||
313 | 366 | ||
314 | num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; | 367 | /* chain new page in list head to match sg */ |
315 | skb_queue_head(&vi->recv, skb); | 368 | first->private = (unsigned long)list; |
369 | list = first; | ||
370 | } | ||
316 | 371 | ||
317 | err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb); | 372 | first = get_a_page(vi, gfp); |
318 | if (err < 0) { | 373 | if (!first) { |
319 | skb_unlink(skb, &vi->recv); | 374 | give_pages(vi, list); |
320 | trim_pages(vi, skb); | 375 | return -ENOMEM; |
321 | kfree_skb(skb); | 376 | } |
322 | break; | 377 | p = page_address(first); |
323 | } | 378 | |
324 | vi->num++; | 379 | /* sg[0], sg[1] share the same page */ |
325 | } while (err >= num); | 380 | /* a separated sg[0] for virtio_net_hdr only during to QEMU bug*/ |
326 | if (unlikely(vi->num > vi->max)) | 381 | sg_set_buf(&sg[0], p, sizeof(struct virtio_net_hdr)); |
327 | vi->max = vi->num; | 382 | |
328 | vi->rvq->vq_ops->kick(vi->rvq); | 383 | /* sg[1] for data packet, from offset */ |
329 | return !oom; | 384 | offset = sizeof(struct padded_vnet_hdr); |
385 | sg_set_buf(&sg[1], p + offset, PAGE_SIZE - offset); | ||
386 | |||
387 | /* chain first in list head */ | ||
388 | first->private = (unsigned long)list; | ||
389 | err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, MAX_SKB_FRAGS + 2, | ||
390 | first); | ||
391 | if (err < 0) | ||
392 | give_pages(vi, first); | ||
393 | |||
394 | return err; | ||
330 | } | 395 | } |
331 | 396 | ||
332 | /* Returns false if we couldn't fill entirely (OOM). */ | 397 | static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp) |
333 | static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp) | ||
334 | { | 398 | { |
335 | struct sk_buff *skb; | 399 | struct page *page; |
336 | struct scatterlist sg[1]; | 400 | struct scatterlist sg; |
337 | int err; | 401 | int err; |
338 | bool oom = false; | ||
339 | 402 | ||
340 | if (!vi->mergeable_rx_bufs) | 403 | page = get_a_page(vi, gfp); |
341 | return try_fill_recv_maxbufs(vi, gfp); | 404 | if (!page) |
342 | 405 | return -ENOMEM; | |
343 | do { | ||
344 | skb_frag_t *f; | ||
345 | |||
346 | skb = netdev_alloc_skb(vi->dev, GOOD_COPY_LEN + NET_IP_ALIGN); | ||
347 | if (unlikely(!skb)) { | ||
348 | oom = true; | ||
349 | break; | ||
350 | } | ||
351 | 406 | ||
352 | skb_reserve(skb, NET_IP_ALIGN); | 407 | sg_init_one(&sg, page_address(page), PAGE_SIZE); |
353 | 408 | ||
354 | f = &skb_shinfo(skb)->frags[0]; | 409 | err = vi->rvq->vq_ops->add_buf(vi->rvq, &sg, 0, 1, page); |
355 | f->page = get_a_page(vi, gfp); | 410 | if (err < 0) |
356 | if (!f->page) { | 411 | give_pages(vi, page); |
357 | oom = true; | ||
358 | kfree_skb(skb); | ||
359 | break; | ||
360 | } | ||
361 | 412 | ||
362 | f->page_offset = 0; | 413 | return err; |
363 | f->size = PAGE_SIZE; | 414 | } |
364 | 415 | ||
365 | skb_shinfo(skb)->nr_frags++; | 416 | /* Returns false if we couldn't fill entirely (OOM). */ |
417 | static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp) | ||
418 | { | ||
419 | int err; | ||
420 | bool oom = false; | ||
366 | 421 | ||
367 | sg_init_one(sg, page_address(f->page), PAGE_SIZE); | 422 | do { |
368 | skb_queue_head(&vi->recv, skb); | 423 | if (vi->mergeable_rx_bufs) |
424 | err = add_recvbuf_mergeable(vi, gfp); | ||
425 | else if (vi->big_packets) | ||
426 | err = add_recvbuf_big(vi, gfp); | ||
427 | else | ||
428 | err = add_recvbuf_small(vi, gfp); | ||
369 | 429 | ||
370 | err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb); | ||
371 | if (err < 0) { | 430 | if (err < 0) { |
372 | skb_unlink(skb, &vi->recv); | 431 | oom = true; |
373 | kfree_skb(skb); | ||
374 | break; | 432 | break; |
375 | } | 433 | } |
376 | vi->num++; | 434 | ++vi->num; |
377 | } while (err > 0); | 435 | } while (err > 0); |
378 | if (unlikely(vi->num > vi->max)) | 436 | if (unlikely(vi->num > vi->max)) |
379 | vi->max = vi->num; | 437 | vi->max = vi->num; |
@@ -398,8 +456,7 @@ static void refill_work(struct work_struct *work) | |||
398 | 456 | ||
399 | vi = container_of(work, struct virtnet_info, refill.work); | 457 | vi = container_of(work, struct virtnet_info, refill.work); |
400 | napi_disable(&vi->napi); | 458 | napi_disable(&vi->napi); |
401 | try_fill_recv(vi, GFP_KERNEL); | 459 | still_empty = !try_fill_recv(vi, GFP_KERNEL); |
402 | still_empty = (vi->num == 0); | ||
403 | napi_enable(&vi->napi); | 460 | napi_enable(&vi->napi); |
404 | 461 | ||
405 | /* In theory, this can happen: if we don't get any buffers in | 462 | /* In theory, this can happen: if we don't get any buffers in |
@@ -411,15 +468,14 @@ static void refill_work(struct work_struct *work) | |||
411 | static int virtnet_poll(struct napi_struct *napi, int budget) | 468 | static int virtnet_poll(struct napi_struct *napi, int budget) |
412 | { | 469 | { |
413 | struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi); | 470 | struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi); |
414 | struct sk_buff *skb = NULL; | 471 | void *buf; |
415 | unsigned int len, received = 0; | 472 | unsigned int len, received = 0; |
416 | 473 | ||
417 | again: | 474 | again: |
418 | while (received < budget && | 475 | while (received < budget && |
419 | (skb = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) { | 476 | (buf = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) { |
420 | __skb_unlink(skb, &vi->recv); | 477 | receive_buf(vi->dev, buf, len); |
421 | receive_skb(vi->dev, skb, len); | 478 | --vi->num; |
422 | vi->num--; | ||
423 | received++; | 479 | received++; |
424 | } | 480 | } |
425 | 481 | ||
@@ -431,8 +487,8 @@ again: | |||
431 | /* Out of packets? */ | 487 | /* Out of packets? */ |
432 | if (received < budget) { | 488 | if (received < budget) { |
433 | napi_complete(napi); | 489 | napi_complete(napi); |
434 | if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq)) | 490 | if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq)) && |
435 | && napi_schedule_prep(napi)) { | 491 | napi_schedule_prep(napi)) { |
436 | vi->rvq->vq_ops->disable_cb(vi->rvq); | 492 | vi->rvq->vq_ops->disable_cb(vi->rvq); |
437 | __napi_schedule(napi); | 493 | __napi_schedule(napi); |
438 | goto again; | 494 | goto again; |
@@ -449,7 +505,6 @@ static unsigned int free_old_xmit_skbs(struct virtnet_info *vi) | |||
449 | 505 | ||
450 | while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) { | 506 | while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) { |
451 | pr_debug("Sent skb %p\n", skb); | 507 | pr_debug("Sent skb %p\n", skb); |
452 | __skb_unlink(skb, &vi->send); | ||
453 | vi->dev->stats.tx_bytes += skb->len; | 508 | vi->dev->stats.tx_bytes += skb->len; |
454 | vi->dev->stats.tx_packets++; | 509 | vi->dev->stats.tx_packets++; |
455 | tot_sgs += skb_vnet_hdr(skb)->num_sg; | 510 | tot_sgs += skb_vnet_hdr(skb)->num_sg; |
@@ -499,9 +554,9 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) | |||
499 | 554 | ||
500 | /* Encode metadata header at front. */ | 555 | /* Encode metadata header at front. */ |
501 | if (vi->mergeable_rx_bufs) | 556 | if (vi->mergeable_rx_bufs) |
502 | sg_set_buf(sg, &hdr->mhdr, sizeof(hdr->mhdr)); | 557 | sg_set_buf(sg, &hdr->mhdr, sizeof hdr->mhdr); |
503 | else | 558 | else |
504 | sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr)); | 559 | sg_set_buf(sg, &hdr->hdr, sizeof hdr->hdr); |
505 | 560 | ||
506 | hdr->num_sg = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; | 561 | hdr->num_sg = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; |
507 | return vi->svq->vq_ops->add_buf(vi->svq, sg, hdr->num_sg, 0, skb); | 562 | return vi->svq->vq_ops->add_buf(vi->svq, sg, hdr->num_sg, 0, skb); |
@@ -532,15 +587,6 @@ again: | |||
532 | } | 587 | } |
533 | vi->svq->vq_ops->kick(vi->svq); | 588 | vi->svq->vq_ops->kick(vi->svq); |
534 | 589 | ||
535 | /* | ||
536 | * Put new one in send queue. You'd expect we'd need this before | ||
537 | * xmit_skb calls add_buf(), since the callback can be triggered | ||
538 | * immediately after that. But since the callback just triggers | ||
539 | * another call back here, normal network xmit locking prevents the | ||
540 | * race. | ||
541 | */ | ||
542 | __skb_queue_head(&vi->send, skb); | ||
543 | |||
544 | /* Don't wait up for transmitted skbs to be freed. */ | 590 | /* Don't wait up for transmitted skbs to be freed. */ |
545 | skb_orphan(skb); | 591 | skb_orphan(skb); |
546 | nf_reset(skb); | 592 | nf_reset(skb); |
@@ -678,6 +724,8 @@ static void virtnet_set_rx_mode(struct net_device *dev) | |||
678 | struct virtio_net_ctrl_mac *mac_data; | 724 | struct virtio_net_ctrl_mac *mac_data; |
679 | struct dev_addr_list *addr; | 725 | struct dev_addr_list *addr; |
680 | struct netdev_hw_addr *ha; | 726 | struct netdev_hw_addr *ha; |
727 | int uc_count; | ||
728 | int mc_count; | ||
681 | void *buf; | 729 | void *buf; |
682 | int i; | 730 | int i; |
683 | 731 | ||
@@ -704,9 +752,12 @@ static void virtnet_set_rx_mode(struct net_device *dev) | |||
704 | dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", | 752 | dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", |
705 | allmulti ? "en" : "dis"); | 753 | allmulti ? "en" : "dis"); |
706 | 754 | ||
755 | uc_count = netdev_uc_count(dev); | ||
756 | mc_count = netdev_mc_count(dev); | ||
707 | /* MAC filter - use one buffer for both lists */ | 757 | /* MAC filter - use one buffer for both lists */ |
708 | mac_data = buf = kzalloc(((dev->uc.count + dev->mc_count) * ETH_ALEN) + | 758 | buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) + |
709 | (2 * sizeof(mac_data->entries)), GFP_ATOMIC); | 759 | (2 * sizeof(mac_data->entries)), GFP_ATOMIC); |
760 | mac_data = buf; | ||
710 | if (!buf) { | 761 | if (!buf) { |
711 | dev_warn(&dev->dev, "No memory for MAC address buffer\n"); | 762 | dev_warn(&dev->dev, "No memory for MAC address buffer\n"); |
712 | return; | 763 | return; |
@@ -715,24 +766,24 @@ static void virtnet_set_rx_mode(struct net_device *dev) | |||
715 | sg_init_table(sg, 2); | 766 | sg_init_table(sg, 2); |
716 | 767 | ||
717 | /* Store the unicast list and count in the front of the buffer */ | 768 | /* Store the unicast list and count in the front of the buffer */ |
718 | mac_data->entries = dev->uc.count; | 769 | mac_data->entries = uc_count; |
719 | i = 0; | 770 | i = 0; |
720 | list_for_each_entry(ha, &dev->uc.list, list) | 771 | netdev_for_each_uc_addr(ha, dev) |
721 | memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); | 772 | memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); |
722 | 773 | ||
723 | sg_set_buf(&sg[0], mac_data, | 774 | sg_set_buf(&sg[0], mac_data, |
724 | sizeof(mac_data->entries) + (dev->uc.count * ETH_ALEN)); | 775 | sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); |
725 | 776 | ||
726 | /* multicast list and count fill the end */ | 777 | /* multicast list and count fill the end */ |
727 | mac_data = (void *)&mac_data->macs[dev->uc.count][0]; | 778 | mac_data = (void *)&mac_data->macs[uc_count][0]; |
728 | 779 | ||
729 | mac_data->entries = dev->mc_count; | 780 | mac_data->entries = mc_count; |
730 | addr = dev->mc_list; | 781 | i = 0; |
731 | for (i = 0; i < dev->mc_count; i++, addr = addr->next) | 782 | netdev_for_each_mc_addr(addr, dev) |
732 | memcpy(&mac_data->macs[i][0], addr->da_addr, ETH_ALEN); | 783 | memcpy(&mac_data->macs[i++][0], addr->da_addr, ETH_ALEN); |
733 | 784 | ||
734 | sg_set_buf(&sg[1], mac_data, | 785 | sg_set_buf(&sg[1], mac_data, |
735 | sizeof(mac_data->entries) + (dev->mc_count * ETH_ALEN)); | 786 | sizeof(mac_data->entries) + (mc_count * ETH_ALEN)); |
736 | 787 | ||
737 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, | 788 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, |
738 | VIRTIO_NET_CTRL_MAC_TABLE_SET, | 789 | VIRTIO_NET_CTRL_MAC_TABLE_SET, |
@@ -893,9 +944,9 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
893 | INIT_DELAYED_WORK(&vi->refill, refill_work); | 944 | INIT_DELAYED_WORK(&vi->refill, refill_work); |
894 | 945 | ||
895 | /* If we can receive ANY GSO packets, we must allocate large ones. */ | 946 | /* If we can receive ANY GSO packets, we must allocate large ones. */ |
896 | if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) | 947 | if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || |
897 | || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) | 948 | virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) || |
898 | || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) | 949 | virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) |
899 | vi->big_packets = true; | 950 | vi->big_packets = true; |
900 | 951 | ||
901 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) | 952 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) |
@@ -919,10 +970,6 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
919 | dev->features |= NETIF_F_HW_VLAN_FILTER; | 970 | dev->features |= NETIF_F_HW_VLAN_FILTER; |
920 | } | 971 | } |
921 | 972 | ||
922 | /* Initialize our empty receive and send queues. */ | ||
923 | skb_queue_head_init(&vi->recv); | ||
924 | skb_queue_head_init(&vi->send); | ||
925 | |||
926 | err = register_netdev(dev); | 973 | err = register_netdev(dev); |
927 | if (err) { | 974 | if (err) { |
928 | pr_debug("virtio_net: registering device failed\n"); | 975 | pr_debug("virtio_net: registering device failed\n"); |
@@ -955,26 +1002,42 @@ free: | |||
955 | return err; | 1002 | return err; |
956 | } | 1003 | } |
957 | 1004 | ||
1005 | static void free_unused_bufs(struct virtnet_info *vi) | ||
1006 | { | ||
1007 | void *buf; | ||
1008 | while (1) { | ||
1009 | buf = vi->svq->vq_ops->detach_unused_buf(vi->svq); | ||
1010 | if (!buf) | ||
1011 | break; | ||
1012 | dev_kfree_skb(buf); | ||
1013 | } | ||
1014 | while (1) { | ||
1015 | buf = vi->rvq->vq_ops->detach_unused_buf(vi->rvq); | ||
1016 | if (!buf) | ||
1017 | break; | ||
1018 | if (vi->mergeable_rx_bufs || vi->big_packets) | ||
1019 | give_pages(vi, buf); | ||
1020 | else | ||
1021 | dev_kfree_skb(buf); | ||
1022 | --vi->num; | ||
1023 | } | ||
1024 | BUG_ON(vi->num != 0); | ||
1025 | } | ||
1026 | |||
958 | static void __devexit virtnet_remove(struct virtio_device *vdev) | 1027 | static void __devexit virtnet_remove(struct virtio_device *vdev) |
959 | { | 1028 | { |
960 | struct virtnet_info *vi = vdev->priv; | 1029 | struct virtnet_info *vi = vdev->priv; |
961 | struct sk_buff *skb; | ||
962 | 1030 | ||
963 | /* Stop all the virtqueues. */ | 1031 | /* Stop all the virtqueues. */ |
964 | vdev->config->reset(vdev); | 1032 | vdev->config->reset(vdev); |
965 | 1033 | ||
966 | /* Free our skbs in send and recv queues, if any. */ | ||
967 | while ((skb = __skb_dequeue(&vi->recv)) != NULL) { | ||
968 | kfree_skb(skb); | ||
969 | vi->num--; | ||
970 | } | ||
971 | __skb_queue_purge(&vi->send); | ||
972 | |||
973 | BUG_ON(vi->num != 0); | ||
974 | 1034 | ||
975 | unregister_netdev(vi->dev); | 1035 | unregister_netdev(vi->dev); |
976 | cancel_delayed_work_sync(&vi->refill); | 1036 | cancel_delayed_work_sync(&vi->refill); |
977 | 1037 | ||
1038 | /* Free unused buffers in both send and recv, if any. */ | ||
1039 | free_unused_bufs(vi); | ||
1040 | |||
978 | vdev->config->del_vqs(vi->vdev); | 1041 | vdev->config->del_vqs(vi->vdev); |
979 | 1042 | ||
980 | while (vi->pages) | 1043 | while (vi->pages) |