diff options
Diffstat (limited to 'drivers/net/virtio_net.c')
| -rw-r--r-- | drivers/net/virtio_net.c | 368 |
1 files changed, 220 insertions, 148 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9fbdfcd1e1a0..d208f8604981 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c | |||
| @@ -36,7 +36,10 @@ module_param(csum, bool, 0444); | |||
| 36 | module_param(gso, bool, 0444); | 36 | module_param(gso, bool, 0444); |
| 37 | 37 | ||
| 38 | /* FIXME: MTU in config. */ | 38 | /* FIXME: MTU in config. */ |
| 39 | #define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) | 39 | #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) |
| 40 | #define MERGE_BUFFER_LEN (ALIGN(GOOD_PACKET_LEN + \ | ||
| 41 | sizeof(struct virtio_net_hdr_mrg_rxbuf), \ | ||
| 42 | L1_CACHE_BYTES)) | ||
| 40 | #define GOOD_COPY_LEN 128 | 43 | #define GOOD_COPY_LEN 128 |
| 41 | 44 | ||
| 42 | #define VIRTNET_DRIVER_VERSION "1.0.0" | 45 | #define VIRTNET_DRIVER_VERSION "1.0.0" |
| @@ -124,12 +127,14 @@ struct virtnet_info { | |||
| 124 | /* Lock for config space updates */ | 127 | /* Lock for config space updates */ |
| 125 | struct mutex config_lock; | 128 | struct mutex config_lock; |
| 126 | 129 | ||
| 130 | /* Page_frag for GFP_KERNEL packet buffer allocation when we run | ||
| 131 | * low on memory. | ||
| 132 | */ | ||
| 133 | struct page_frag alloc_frag; | ||
| 134 | |||
| 127 | /* Does the affinity hint is set for virtqueues? */ | 135 | /* Does the affinity hint is set for virtqueues? */ |
| 128 | bool affinity_hint_set; | 136 | bool affinity_hint_set; |
| 129 | 137 | ||
| 130 | /* Per-cpu variable to show the mapping from CPU to virtqueue */ | ||
| 131 | int __percpu *vq_index; | ||
| 132 | |||
| 133 | /* CPU hot plug notifier */ | 138 | /* CPU hot plug notifier */ |
| 134 | struct notifier_block nb; | 139 | struct notifier_block nb; |
| 135 | }; | 140 | }; |
| @@ -217,33 +222,18 @@ static void skb_xmit_done(struct virtqueue *vq) | |||
| 217 | netif_wake_subqueue(vi->dev, vq2txq(vq)); | 222 | netif_wake_subqueue(vi->dev, vq2txq(vq)); |
| 218 | } | 223 | } |
| 219 | 224 | ||
| 220 | static void set_skb_frag(struct sk_buff *skb, struct page *page, | ||
| 221 | unsigned int offset, unsigned int *len) | ||
| 222 | { | ||
| 223 | int size = min((unsigned)PAGE_SIZE - offset, *len); | ||
| 224 | int i = skb_shinfo(skb)->nr_frags; | ||
| 225 | |||
| 226 | __skb_fill_page_desc(skb, i, page, offset, size); | ||
| 227 | |||
| 228 | skb->data_len += size; | ||
| 229 | skb->len += size; | ||
| 230 | skb->truesize += PAGE_SIZE; | ||
| 231 | skb_shinfo(skb)->nr_frags++; | ||
| 232 | skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; | ||
| 233 | *len -= size; | ||
| 234 | } | ||
| 235 | |||
| 236 | /* Called from bottom half context */ | 225 | /* Called from bottom half context */ |
| 237 | static struct sk_buff *page_to_skb(struct receive_queue *rq, | 226 | static struct sk_buff *page_to_skb(struct receive_queue *rq, |
| 238 | struct page *page, unsigned int len) | 227 | struct page *page, unsigned int offset, |
| 228 | unsigned int len, unsigned int truesize) | ||
| 239 | { | 229 | { |
| 240 | struct virtnet_info *vi = rq->vq->vdev->priv; | 230 | struct virtnet_info *vi = rq->vq->vdev->priv; |
| 241 | struct sk_buff *skb; | 231 | struct sk_buff *skb; |
| 242 | struct skb_vnet_hdr *hdr; | 232 | struct skb_vnet_hdr *hdr; |
| 243 | unsigned int copy, hdr_len, offset; | 233 | unsigned int copy, hdr_len, hdr_padded_len; |
| 244 | char *p; | 234 | char *p; |
| 245 | 235 | ||
| 246 | p = page_address(page); | 236 | p = page_address(page) + offset; |
| 247 | 237 | ||
| 248 | /* copy small packet so we can reuse these pages for small data */ | 238 | /* copy small packet so we can reuse these pages for small data */ |
| 249 | skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN); | 239 | skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN); |
| @@ -254,16 +244,17 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, | |||
| 254 | 244 | ||
| 255 | if (vi->mergeable_rx_bufs) { | 245 | if (vi->mergeable_rx_bufs) { |
| 256 | hdr_len = sizeof hdr->mhdr; | 246 | hdr_len = sizeof hdr->mhdr; |
| 257 | offset = hdr_len; | 247 | hdr_padded_len = sizeof hdr->mhdr; |
| 258 | } else { | 248 | } else { |
| 259 | hdr_len = sizeof hdr->hdr; | 249 | hdr_len = sizeof hdr->hdr; |
| 260 | offset = sizeof(struct padded_vnet_hdr); | 250 | hdr_padded_len = sizeof(struct padded_vnet_hdr); |
| 261 | } | 251 | } |
| 262 | 252 | ||
| 263 | memcpy(hdr, p, hdr_len); | 253 | memcpy(hdr, p, hdr_len); |
| 264 | 254 | ||
| 265 | len -= hdr_len; | 255 | len -= hdr_len; |
| 266 | p += offset; | 256 | offset += hdr_padded_len; |
| 257 | p += hdr_padded_len; | ||
| 267 | 258 | ||
| 268 | copy = len; | 259 | copy = len; |
| 269 | if (copy > skb_tailroom(skb)) | 260 | if (copy > skb_tailroom(skb)) |
| @@ -273,6 +264,14 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, | |||
| 273 | len -= copy; | 264 | len -= copy; |
| 274 | offset += copy; | 265 | offset += copy; |
| 275 | 266 | ||
| 267 | if (vi->mergeable_rx_bufs) { | ||
| 268 | if (len) | ||
| 269 | skb_add_rx_frag(skb, 0, page, offset, len, truesize); | ||
| 270 | else | ||
| 271 | put_page(page); | ||
| 272 | return skb; | ||
| 273 | } | ||
| 274 | |||
| 276 | /* | 275 | /* |
| 277 | * Verify that we can indeed put this data into a skb. | 276 | * Verify that we can indeed put this data into a skb. |
| 278 | * This is here to handle cases when the device erroneously | 277 | * This is here to handle cases when the device erroneously |
| @@ -284,9 +283,12 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, | |||
| 284 | dev_kfree_skb(skb); | 283 | dev_kfree_skb(skb); |
| 285 | return NULL; | 284 | return NULL; |
| 286 | } | 285 | } |
| 287 | 286 | BUG_ON(offset >= PAGE_SIZE); | |
| 288 | while (len) { | 287 | while (len) { |
| 289 | set_skb_frag(skb, page, offset, &len); | 288 | unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); |
| 289 | skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, | ||
| 290 | frag_size, truesize); | ||
| 291 | len -= frag_size; | ||
| 290 | page = (struct page *)page->private; | 292 | page = (struct page *)page->private; |
| 291 | offset = 0; | 293 | offset = 0; |
| 292 | } | 294 | } |
| @@ -297,36 +299,120 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, | |||
| 297 | return skb; | 299 | return skb; |
| 298 | } | 300 | } |
| 299 | 301 | ||
| 300 | static int receive_mergeable(struct receive_queue *rq, struct sk_buff *skb) | 302 | static struct sk_buff *receive_small(void *buf, unsigned int len) |
| 301 | { | 303 | { |
| 302 | struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); | 304 | struct sk_buff * skb = buf; |
| 303 | struct page *page; | 305 | |
| 304 | int num_buf, i, len; | 306 | len -= sizeof(struct virtio_net_hdr); |
| 307 | skb_trim(skb, len); | ||
| 308 | |||
| 309 | return skb; | ||
| 310 | } | ||
| 311 | |||
| 312 | static struct sk_buff *receive_big(struct net_device *dev, | ||
| 313 | struct receive_queue *rq, | ||
| 314 | void *buf, | ||
| 315 | unsigned int len) | ||
| 316 | { | ||
| 317 | struct page *page = buf; | ||
| 318 | struct sk_buff *skb = page_to_skb(rq, page, 0, len, PAGE_SIZE); | ||
| 319 | |||
| 320 | if (unlikely(!skb)) | ||
| 321 | goto err; | ||
| 322 | |||
| 323 | return skb; | ||
| 324 | |||
| 325 | err: | ||
| 326 | dev->stats.rx_dropped++; | ||
| 327 | give_pages(rq, page); | ||
| 328 | return NULL; | ||
| 329 | } | ||
| 330 | |||
| 331 | static struct sk_buff *receive_mergeable(struct net_device *dev, | ||
| 332 | struct receive_queue *rq, | ||
| 333 | void *buf, | ||
| 334 | unsigned int len) | ||
| 335 | { | ||
| 336 | struct skb_vnet_hdr *hdr = buf; | ||
| 337 | int num_buf = hdr->mhdr.num_buffers; | ||
| 338 | struct page *page = virt_to_head_page(buf); | ||
| 339 | int offset = buf - page_address(page); | ||
| 340 | struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, | ||
| 341 | MERGE_BUFFER_LEN); | ||
| 342 | struct sk_buff *curr_skb = head_skb; | ||
| 343 | |||
| 344 | if (unlikely(!curr_skb)) | ||
| 345 | goto err_skb; | ||
| 305 | 346 | ||
| 306 | num_buf = hdr->mhdr.num_buffers; | ||
| 307 | while (--num_buf) { | 347 | while (--num_buf) { |
| 308 | i = skb_shinfo(skb)->nr_frags; | 348 | int num_skb_frags; |
| 309 | if (i >= MAX_SKB_FRAGS) { | 349 | |
| 310 | pr_debug("%s: packet too long\n", skb->dev->name); | 350 | buf = virtqueue_get_buf(rq->vq, &len); |
| 311 | skb->dev->stats.rx_length_errors++; | 351 | if (unlikely(!buf)) { |
| 312 | return -EINVAL; | 352 | pr_debug("%s: rx error: %d buffers out of %d missing\n", |
| 353 | dev->name, num_buf, hdr->mhdr.num_buffers); | ||
| 354 | dev->stats.rx_length_errors++; | ||
| 355 | goto err_buf; | ||
| 313 | } | 356 | } |
| 314 | page = virtqueue_get_buf(rq->vq, &len); | 357 | if (unlikely(len > MERGE_BUFFER_LEN)) { |
| 315 | if (!page) { | 358 | pr_debug("%s: rx error: merge buffer too long\n", |
| 316 | pr_debug("%s: rx error: %d buffers missing\n", | 359 | dev->name); |
| 317 | skb->dev->name, hdr->mhdr.num_buffers); | 360 | len = MERGE_BUFFER_LEN; |
| 318 | skb->dev->stats.rx_length_errors++; | ||
| 319 | return -EINVAL; | ||
| 320 | } | 361 | } |
| 321 | 362 | ||
| 322 | if (len > PAGE_SIZE) | 363 | page = virt_to_head_page(buf); |
| 323 | len = PAGE_SIZE; | 364 | --rq->num; |
| 365 | |||
| 366 | num_skb_frags = skb_shinfo(curr_skb)->nr_frags; | ||
| 367 | if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { | ||
| 368 | struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); | ||
| 369 | |||
| 370 | if (unlikely(!nskb)) | ||
| 371 | goto err_skb; | ||
| 372 | if (curr_skb == head_skb) | ||
| 373 | skb_shinfo(curr_skb)->frag_list = nskb; | ||
| 374 | else | ||
| 375 | curr_skb->next = nskb; | ||
| 376 | curr_skb = nskb; | ||
| 377 | head_skb->truesize += nskb->truesize; | ||
| 378 | num_skb_frags = 0; | ||
| 379 | } | ||
| 380 | if (curr_skb != head_skb) { | ||
| 381 | head_skb->data_len += len; | ||
| 382 | head_skb->len += len; | ||
| 383 | head_skb->truesize += MERGE_BUFFER_LEN; | ||
| 384 | } | ||
| 385 | offset = buf - page_address(page); | ||
| 386 | if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { | ||
| 387 | put_page(page); | ||
| 388 | skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, | ||
| 389 | len, MERGE_BUFFER_LEN); | ||
| 390 | } else { | ||
| 391 | skb_add_rx_frag(curr_skb, num_skb_frags, page, | ||
| 392 | offset, len, MERGE_BUFFER_LEN); | ||
| 393 | } | ||
| 394 | } | ||
| 324 | 395 | ||
| 325 | set_skb_frag(skb, page, 0, &len); | 396 | return head_skb; |
| 326 | 397 | ||
| 398 | err_skb: | ||
| 399 | put_page(page); | ||
| 400 | while (--num_buf) { | ||
| 401 | buf = virtqueue_get_buf(rq->vq, &len); | ||
| 402 | if (unlikely(!buf)) { | ||
| 403 | pr_debug("%s: rx error: %d buffers missing\n", | ||
| 404 | dev->name, num_buf); | ||
| 405 | dev->stats.rx_length_errors++; | ||
| 406 | break; | ||
| 407 | } | ||
| 408 | page = virt_to_head_page(buf); | ||
| 409 | put_page(page); | ||
| 327 | --rq->num; | 410 | --rq->num; |
| 328 | } | 411 | } |
| 329 | return 0; | 412 | err_buf: |
| 413 | dev->stats.rx_dropped++; | ||
| 414 | dev_kfree_skb(head_skb); | ||
| 415 | return NULL; | ||
| 330 | } | 416 | } |
| 331 | 417 | ||
| 332 | static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) | 418 | static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) |
| @@ -335,37 +421,29 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) | |||
| 335 | struct net_device *dev = vi->dev; | 421 | struct net_device *dev = vi->dev; |
| 336 | struct virtnet_stats *stats = this_cpu_ptr(vi->stats); | 422 | struct virtnet_stats *stats = this_cpu_ptr(vi->stats); |
| 337 | struct sk_buff *skb; | 423 | struct sk_buff *skb; |
| 338 | struct page *page; | ||
| 339 | struct skb_vnet_hdr *hdr; | 424 | struct skb_vnet_hdr *hdr; |
| 340 | 425 | ||
| 341 | if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { | 426 | if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { |
| 342 | pr_debug("%s: short packet %i\n", dev->name, len); | 427 | pr_debug("%s: short packet %i\n", dev->name, len); |
| 343 | dev->stats.rx_length_errors++; | 428 | dev->stats.rx_length_errors++; |
| 344 | if (vi->mergeable_rx_bufs || vi->big_packets) | 429 | if (vi->mergeable_rx_bufs) |
| 430 | put_page(virt_to_head_page(buf)); | ||
| 431 | else if (vi->big_packets) | ||
| 345 | give_pages(rq, buf); | 432 | give_pages(rq, buf); |
| 346 | else | 433 | else |
| 347 | dev_kfree_skb(buf); | 434 | dev_kfree_skb(buf); |
| 348 | return; | 435 | return; |
| 349 | } | 436 | } |
| 350 | 437 | ||
| 351 | if (!vi->mergeable_rx_bufs && !vi->big_packets) { | 438 | if (vi->mergeable_rx_bufs) |
| 352 | skb = buf; | 439 | skb = receive_mergeable(dev, rq, buf, len); |
| 353 | len -= sizeof(struct virtio_net_hdr); | 440 | else if (vi->big_packets) |
| 354 | skb_trim(skb, len); | 441 | skb = receive_big(dev, rq, buf, len); |
| 355 | } else { | 442 | else |
| 356 | page = buf; | 443 | skb = receive_small(buf, len); |
| 357 | skb = page_to_skb(rq, page, len); | 444 | |
| 358 | if (unlikely(!skb)) { | 445 | if (unlikely(!skb)) |
| 359 | dev->stats.rx_dropped++; | 446 | return; |
| 360 | give_pages(rq, page); | ||
| 361 | return; | ||
| 362 | } | ||
| 363 | if (vi->mergeable_rx_bufs) | ||
| 364 | if (receive_mergeable(rq, skb)) { | ||
| 365 | dev_kfree_skb(skb); | ||
| 366 | return; | ||
| 367 | } | ||
| 368 | } | ||
| 369 | 447 | ||
| 370 | hdr = skb_vnet_hdr(skb); | 448 | hdr = skb_vnet_hdr(skb); |
| 371 | 449 | ||
| @@ -435,11 +513,11 @@ static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp) | |||
| 435 | struct skb_vnet_hdr *hdr; | 513 | struct skb_vnet_hdr *hdr; |
| 436 | int err; | 514 | int err; |
| 437 | 515 | ||
| 438 | skb = __netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN, gfp); | 516 | skb = __netdev_alloc_skb_ip_align(vi->dev, GOOD_PACKET_LEN, gfp); |
| 439 | if (unlikely(!skb)) | 517 | if (unlikely(!skb)) |
| 440 | return -ENOMEM; | 518 | return -ENOMEM; |
| 441 | 519 | ||
| 442 | skb_put(skb, MAX_PACKET_LEN); | 520 | skb_put(skb, GOOD_PACKET_LEN); |
| 443 | 521 | ||
| 444 | hdr = skb_vnet_hdr(skb); | 522 | hdr = skb_vnet_hdr(skb); |
| 445 | sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr); | 523 | sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr); |
| @@ -501,18 +579,28 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) | |||
| 501 | 579 | ||
| 502 | static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) | 580 | static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) |
| 503 | { | 581 | { |
| 504 | struct page *page; | 582 | struct virtnet_info *vi = rq->vq->vdev->priv; |
| 583 | char *buf = NULL; | ||
| 505 | int err; | 584 | int err; |
| 506 | 585 | ||
| 507 | page = get_a_page(rq, gfp); | 586 | if (gfp & __GFP_WAIT) { |
| 508 | if (!page) | 587 | if (skb_page_frag_refill(MERGE_BUFFER_LEN, &vi->alloc_frag, |
| 588 | gfp)) { | ||
| 589 | buf = (char *)page_address(vi->alloc_frag.page) + | ||
| 590 | vi->alloc_frag.offset; | ||
| 591 | get_page(vi->alloc_frag.page); | ||
| 592 | vi->alloc_frag.offset += MERGE_BUFFER_LEN; | ||
| 593 | } | ||
| 594 | } else { | ||
| 595 | buf = netdev_alloc_frag(MERGE_BUFFER_LEN); | ||
| 596 | } | ||
| 597 | if (!buf) | ||
| 509 | return -ENOMEM; | 598 | return -ENOMEM; |
| 510 | 599 | ||
| 511 | sg_init_one(rq->sg, page_address(page), PAGE_SIZE); | 600 | sg_init_one(rq->sg, buf, MERGE_BUFFER_LEN); |
| 512 | 601 | err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp); | |
| 513 | err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, page, gfp); | ||
| 514 | if (err < 0) | 602 | if (err < 0) |
| 515 | give_pages(rq, page); | 603 | put_page(virt_to_head_page(buf)); |
| 516 | 604 | ||
| 517 | return err; | 605 | return err; |
| 518 | } | 606 | } |
| @@ -545,7 +633,8 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) | |||
| 545 | } while (rq->vq->num_free); | 633 | } while (rq->vq->num_free); |
| 546 | if (unlikely(rq->num > rq->max)) | 634 | if (unlikely(rq->num > rq->max)) |
| 547 | rq->max = rq->num; | 635 | rq->max = rq->num; |
| 548 | virtqueue_kick(rq->vq); | 636 | if (unlikely(!virtqueue_kick(rq->vq))) |
| 637 | return false; | ||
| 549 | return !oom; | 638 | return !oom; |
| 550 | } | 639 | } |
| 551 | 640 | ||
| @@ -751,7 +840,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 751 | err = xmit_skb(sq, skb); | 840 | err = xmit_skb(sq, skb); |
| 752 | 841 | ||
| 753 | /* This should not happen! */ | 842 | /* This should not happen! */ |
| 754 | if (unlikely(err)) { | 843 | if (unlikely(err) || unlikely(!virtqueue_kick(sq->vq))) { |
| 755 | dev->stats.tx_fifo_errors++; | 844 | dev->stats.tx_fifo_errors++; |
| 756 | if (net_ratelimit()) | 845 | if (net_ratelimit()) |
| 757 | dev_warn(&dev->dev, | 846 | dev_warn(&dev->dev, |
| @@ -760,7 +849,6 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 760 | kfree_skb(skb); | 849 | kfree_skb(skb); |
| 761 | return NETDEV_TX_OK; | 850 | return NETDEV_TX_OK; |
| 762 | } | 851 | } |
| 763 | virtqueue_kick(sq->vq); | ||
| 764 | 852 | ||
| 765 | /* Don't wait up for transmitted skbs to be freed. */ | 853 | /* Don't wait up for transmitted skbs to be freed. */ |
| 766 | skb_orphan(skb); | 854 | skb_orphan(skb); |
| @@ -819,12 +907,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, | |||
| 819 | BUG_ON(virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC) | 907 | BUG_ON(virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC) |
| 820 | < 0); | 908 | < 0); |
| 821 | 909 | ||
| 822 | virtqueue_kick(vi->cvq); | 910 | if (unlikely(!virtqueue_kick(vi->cvq))) |
| 911 | return status == VIRTIO_NET_OK; | ||
| 823 | 912 | ||
| 824 | /* Spin for a response, the kick causes an ioport write, trapping | 913 | /* Spin for a response, the kick causes an ioport write, trapping |
| 825 | * into the hypervisor, so the request should be handled immediately. | 914 | * into the hypervisor, so the request should be handled immediately. |
| 826 | */ | 915 | */ |
| 827 | while (!virtqueue_get_buf(vi->cvq, &tmp)) | 916 | while (!virtqueue_get_buf(vi->cvq, &tmp) && |
| 917 | !virtqueue_is_broken(vi->cvq)) | ||
| 828 | cpu_relax(); | 918 | cpu_relax(); |
| 829 | 919 | ||
| 830 | return status == VIRTIO_NET_OK; | 920 | return status == VIRTIO_NET_OK; |
| @@ -852,8 +942,13 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) | |||
| 852 | return -EINVAL; | 942 | return -EINVAL; |
| 853 | } | 943 | } |
| 854 | } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { | 944 | } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { |
| 855 | vdev->config->set(vdev, offsetof(struct virtio_net_config, mac), | 945 | unsigned int i; |
| 856 | addr->sa_data, dev->addr_len); | 946 | |
| 947 | /* Naturally, this has an atomicity problem. */ | ||
| 948 | for (i = 0; i < dev->addr_len; i++) | ||
| 949 | virtio_cwrite8(vdev, | ||
| 950 | offsetof(struct virtio_net_config, mac) + | ||
| 951 | i, addr->sa_data[i]); | ||
| 857 | } | 952 | } |
| 858 | 953 | ||
| 859 | eth_commit_mac_addr_change(dev, p); | 954 | eth_commit_mac_addr_change(dev, p); |
| @@ -1029,7 +1124,7 @@ static void virtnet_set_rx_mode(struct net_device *dev) | |||
| 1029 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, | 1124 | if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, |
| 1030 | VIRTIO_NET_CTRL_MAC_TABLE_SET, | 1125 | VIRTIO_NET_CTRL_MAC_TABLE_SET, |
| 1031 | sg, NULL)) | 1126 | sg, NULL)) |
| 1032 | dev_warn(&dev->dev, "Failed to set MAC fitler table.\n"); | 1127 | dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); |
| 1033 | 1128 | ||
| 1034 | kfree(buf); | 1129 | kfree(buf); |
| 1035 | } | 1130 | } |
| @@ -1065,7 +1160,6 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, | |||
| 1065 | static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu) | 1160 | static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu) |
| 1066 | { | 1161 | { |
| 1067 | int i; | 1162 | int i; |
| 1068 | int cpu; | ||
| 1069 | 1163 | ||
| 1070 | if (vi->affinity_hint_set) { | 1164 | if (vi->affinity_hint_set) { |
| 1071 | for (i = 0; i < vi->max_queue_pairs; i++) { | 1165 | for (i = 0; i < vi->max_queue_pairs; i++) { |
| @@ -1075,16 +1169,6 @@ static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu) | |||
| 1075 | 1169 | ||
| 1076 | vi->affinity_hint_set = false; | 1170 | vi->affinity_hint_set = false; |
| 1077 | } | 1171 | } |
| 1078 | |||
| 1079 | i = 0; | ||
| 1080 | for_each_online_cpu(cpu) { | ||
| 1081 | if (cpu == hcpu) { | ||
| 1082 | *per_cpu_ptr(vi->vq_index, cpu) = -1; | ||
| 1083 | } else { | ||
| 1084 | *per_cpu_ptr(vi->vq_index, cpu) = | ||
| 1085 | ++i % vi->curr_queue_pairs; | ||
| 1086 | } | ||
| 1087 | } | ||
| 1088 | } | 1172 | } |
| 1089 | 1173 | ||
| 1090 | static void virtnet_set_affinity(struct virtnet_info *vi) | 1174 | static void virtnet_set_affinity(struct virtnet_info *vi) |
| @@ -1106,7 +1190,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi) | |||
| 1106 | for_each_online_cpu(cpu) { | 1190 | for_each_online_cpu(cpu) { |
| 1107 | virtqueue_set_affinity(vi->rq[i].vq, cpu); | 1191 | virtqueue_set_affinity(vi->rq[i].vq, cpu); |
| 1108 | virtqueue_set_affinity(vi->sq[i].vq, cpu); | 1192 | virtqueue_set_affinity(vi->sq[i].vq, cpu); |
| 1109 | *per_cpu_ptr(vi->vq_index, cpu) = i; | 1193 | netif_set_xps_queue(vi->dev, cpumask_of(cpu), i); |
| 1110 | i++; | 1194 | i++; |
| 1111 | } | 1195 | } |
| 1112 | 1196 | ||
| @@ -1118,11 +1202,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, | |||
| 1118 | { | 1202 | { |
| 1119 | struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); | 1203 | struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); |
| 1120 | 1204 | ||
| 1121 | mutex_lock(&vi->config_lock); | ||
| 1122 | |||
| 1123 | if (!vi->config_enable) | ||
| 1124 | goto done; | ||
| 1125 | |||
| 1126 | switch(action & ~CPU_TASKS_FROZEN) { | 1205 | switch(action & ~CPU_TASKS_FROZEN) { |
| 1127 | case CPU_ONLINE: | 1206 | case CPU_ONLINE: |
| 1128 | case CPU_DOWN_FAILED: | 1207 | case CPU_DOWN_FAILED: |
| @@ -1136,8 +1215,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, | |||
| 1136 | break; | 1215 | break; |
| 1137 | } | 1216 | } |
| 1138 | 1217 | ||
| 1139 | done: | ||
| 1140 | mutex_unlock(&vi->config_lock); | ||
| 1141 | return NOTIFY_OK; | 1218 | return NOTIFY_OK; |
| 1142 | } | 1219 | } |
| 1143 | 1220 | ||
| @@ -1227,28 +1304,6 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu) | |||
| 1227 | return 0; | 1304 | return 0; |
| 1228 | } | 1305 | } |
| 1229 | 1306 | ||
| 1230 | /* To avoid contending a lock hold by a vcpu who would exit to host, select the | ||
| 1231 | * txq based on the processor id. | ||
| 1232 | */ | ||
| 1233 | static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb) | ||
| 1234 | { | ||
| 1235 | int txq; | ||
| 1236 | struct virtnet_info *vi = netdev_priv(dev); | ||
| 1237 | |||
| 1238 | if (skb_rx_queue_recorded(skb)) { | ||
| 1239 | txq = skb_get_rx_queue(skb); | ||
| 1240 | } else { | ||
| 1241 | txq = *__this_cpu_ptr(vi->vq_index); | ||
| 1242 | if (txq == -1) | ||
| 1243 | txq = 0; | ||
| 1244 | } | ||
| 1245 | |||
| 1246 | while (unlikely(txq >= dev->real_num_tx_queues)) | ||
| 1247 | txq -= dev->real_num_tx_queues; | ||
| 1248 | |||
| 1249 | return txq; | ||
| 1250 | } | ||
| 1251 | |||
| 1252 | static const struct net_device_ops virtnet_netdev = { | 1307 | static const struct net_device_ops virtnet_netdev = { |
| 1253 | .ndo_open = virtnet_open, | 1308 | .ndo_open = virtnet_open, |
| 1254 | .ndo_stop = virtnet_close, | 1309 | .ndo_stop = virtnet_close, |
| @@ -1260,7 +1315,6 @@ static const struct net_device_ops virtnet_netdev = { | |||
| 1260 | .ndo_get_stats64 = virtnet_stats, | 1315 | .ndo_get_stats64 = virtnet_stats, |
| 1261 | .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, | 1316 | .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, |
| 1262 | .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, | 1317 | .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, |
| 1263 | .ndo_select_queue = virtnet_select_queue, | ||
| 1264 | #ifdef CONFIG_NET_POLL_CONTROLLER | 1318 | #ifdef CONFIG_NET_POLL_CONTROLLER |
| 1265 | .ndo_poll_controller = virtnet_netpoll, | 1319 | .ndo_poll_controller = virtnet_netpoll, |
| 1266 | #endif | 1320 | #endif |
| @@ -1276,9 +1330,8 @@ static void virtnet_config_changed_work(struct work_struct *work) | |||
| 1276 | if (!vi->config_enable) | 1330 | if (!vi->config_enable) |
| 1277 | goto done; | 1331 | goto done; |
| 1278 | 1332 | ||
| 1279 | if (virtio_config_val(vi->vdev, VIRTIO_NET_F_STATUS, | 1333 | if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS, |
| 1280 | offsetof(struct virtio_net_config, status), | 1334 | struct virtio_net_config, status, &v) < 0) |
| 1281 | &v) < 0) | ||
| 1282 | goto done; | 1335 | goto done; |
| 1283 | 1336 | ||
| 1284 | if (v & VIRTIO_NET_S_ANNOUNCE) { | 1337 | if (v & VIRTIO_NET_S_ANNOUNCE) { |
| @@ -1314,6 +1367,11 @@ static void virtnet_config_changed(struct virtio_device *vdev) | |||
| 1314 | 1367 | ||
| 1315 | static void virtnet_free_queues(struct virtnet_info *vi) | 1368 | static void virtnet_free_queues(struct virtnet_info *vi) |
| 1316 | { | 1369 | { |
| 1370 | int i; | ||
| 1371 | |||
| 1372 | for (i = 0; i < vi->max_queue_pairs; i++) | ||
| 1373 | netif_napi_del(&vi->rq[i].napi); | ||
| 1374 | |||
| 1317 | kfree(vi->rq); | 1375 | kfree(vi->rq); |
| 1318 | kfree(vi->sq); | 1376 | kfree(vi->sq); |
| 1319 | } | 1377 | } |
| @@ -1343,7 +1401,9 @@ static void free_unused_bufs(struct virtnet_info *vi) | |||
| 1343 | struct virtqueue *vq = vi->rq[i].vq; | 1401 | struct virtqueue *vq = vi->rq[i].vq; |
| 1344 | 1402 | ||
| 1345 | while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { | 1403 | while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { |
| 1346 | if (vi->mergeable_rx_bufs || vi->big_packets) | 1404 | if (vi->mergeable_rx_bufs) |
| 1405 | put_page(virt_to_head_page(buf)); | ||
| 1406 | else if (vi->big_packets) | ||
| 1347 | give_pages(&vi->rq[i], buf); | 1407 | give_pages(&vi->rq[i], buf); |
| 1348 | else | 1408 | else |
| 1349 | dev_kfree_skb(buf); | 1409 | dev_kfree_skb(buf); |
| @@ -1500,9 +1560,9 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
| 1500 | u16 max_queue_pairs; | 1560 | u16 max_queue_pairs; |
| 1501 | 1561 | ||
| 1502 | /* Find if host supports multiqueue virtio_net device */ | 1562 | /* Find if host supports multiqueue virtio_net device */ |
| 1503 | err = virtio_config_val(vdev, VIRTIO_NET_F_MQ, | 1563 | err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ, |
| 1504 | offsetof(struct virtio_net_config, | 1564 | struct virtio_net_config, |
| 1505 | max_virtqueue_pairs), &max_queue_pairs); | 1565 | max_virtqueue_pairs, &max_queue_pairs); |
| 1506 | 1566 | ||
| 1507 | /* We need at least 2 queue's */ | 1567 | /* We need at least 2 queue's */ |
| 1508 | if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || | 1568 | if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || |
| @@ -1554,9 +1614,11 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
| 1554 | dev->vlan_features = dev->features; | 1614 | dev->vlan_features = dev->features; |
| 1555 | 1615 | ||
| 1556 | /* Configuration may specify what MAC to use. Otherwise random. */ | 1616 | /* Configuration may specify what MAC to use. Otherwise random. */ |
| 1557 | if (virtio_config_val_len(vdev, VIRTIO_NET_F_MAC, | 1617 | if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) |
| 1558 | offsetof(struct virtio_net_config, mac), | 1618 | virtio_cread_bytes(vdev, |
| 1559 | dev->dev_addr, dev->addr_len) < 0) | 1619 | offsetof(struct virtio_net_config, mac), |
| 1620 | dev->dev_addr, dev->addr_len); | ||
| 1621 | else | ||
| 1560 | eth_hw_addr_random(dev); | 1622 | eth_hw_addr_random(dev); |
| 1561 | 1623 | ||
| 1562 | /* Set up our device-specific information */ | 1624 | /* Set up our device-specific information */ |
| @@ -1569,9 +1631,12 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
| 1569 | if (vi->stats == NULL) | 1631 | if (vi->stats == NULL) |
| 1570 | goto free; | 1632 | goto free; |
| 1571 | 1633 | ||
| 1572 | vi->vq_index = alloc_percpu(int); | 1634 | for_each_possible_cpu(i) { |
| 1573 | if (vi->vq_index == NULL) | 1635 | struct virtnet_stats *virtnet_stats; |
| 1574 | goto free_stats; | 1636 | virtnet_stats = per_cpu_ptr(vi->stats, i); |
| 1637 | u64_stats_init(&virtnet_stats->tx_syncp); | ||
| 1638 | u64_stats_init(&virtnet_stats->rx_syncp); | ||
| 1639 | } | ||
| 1575 | 1640 | ||
| 1576 | mutex_init(&vi->config_lock); | 1641 | mutex_init(&vi->config_lock); |
| 1577 | vi->config_enable = true; | 1642 | vi->config_enable = true; |
| @@ -1599,10 +1664,10 @@ static int virtnet_probe(struct virtio_device *vdev) | |||
| 1599 | /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ | 1664 | /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ |
| 1600 | err = init_vqs(vi); | 1665 | err = init_vqs(vi); |
| 1601 | if (err) | 1666 | if (err) |
| 1602 | goto free_index; | 1667 | goto free_stats; |
| 1603 | 1668 | ||
| 1604 | netif_set_real_num_tx_queues(dev, 1); | 1669 | netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); |
| 1605 | netif_set_real_num_rx_queues(dev, 1); | 1670 | netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); |
| 1606 | 1671 | ||
| 1607 | err = register_netdev(dev); | 1672 | err = register_netdev(dev); |
| 1608 | if (err) { | 1673 | if (err) { |
| @@ -1650,8 +1715,8 @@ free_recv_bufs: | |||
| 1650 | free_vqs: | 1715 | free_vqs: |
| 1651 | cancel_delayed_work_sync(&vi->refill); | 1716 | cancel_delayed_work_sync(&vi->refill); |
| 1652 | virtnet_del_vqs(vi); | 1717 | virtnet_del_vqs(vi); |
| 1653 | free_index: | 1718 | if (vi->alloc_frag.page) |
| 1654 | free_percpu(vi->vq_index); | 1719 | put_page(vi->alloc_frag.page); |
| 1655 | free_stats: | 1720 | free_stats: |
| 1656 | free_percpu(vi->stats); | 1721 | free_percpu(vi->stats); |
| 1657 | free: | 1722 | free: |
| @@ -1685,20 +1750,23 @@ static void virtnet_remove(struct virtio_device *vdev) | |||
| 1685 | unregister_netdev(vi->dev); | 1750 | unregister_netdev(vi->dev); |
| 1686 | 1751 | ||
| 1687 | remove_vq_common(vi); | 1752 | remove_vq_common(vi); |
| 1753 | if (vi->alloc_frag.page) | ||
| 1754 | put_page(vi->alloc_frag.page); | ||
| 1688 | 1755 | ||
| 1689 | flush_work(&vi->config_work); | 1756 | flush_work(&vi->config_work); |
| 1690 | 1757 | ||
| 1691 | free_percpu(vi->vq_index); | ||
| 1692 | free_percpu(vi->stats); | 1758 | free_percpu(vi->stats); |
| 1693 | free_netdev(vi->dev); | 1759 | free_netdev(vi->dev); |
| 1694 | } | 1760 | } |
| 1695 | 1761 | ||
| 1696 | #ifdef CONFIG_PM | 1762 | #ifdef CONFIG_PM_SLEEP |
| 1697 | static int virtnet_freeze(struct virtio_device *vdev) | 1763 | static int virtnet_freeze(struct virtio_device *vdev) |
| 1698 | { | 1764 | { |
| 1699 | struct virtnet_info *vi = vdev->priv; | 1765 | struct virtnet_info *vi = vdev->priv; |
| 1700 | int i; | 1766 | int i; |
| 1701 | 1767 | ||
| 1768 | unregister_hotcpu_notifier(&vi->nb); | ||
| 1769 | |||
| 1702 | /* Prevent config work handler from accessing the device */ | 1770 | /* Prevent config work handler from accessing the device */ |
| 1703 | mutex_lock(&vi->config_lock); | 1771 | mutex_lock(&vi->config_lock); |
| 1704 | vi->config_enable = false; | 1772 | vi->config_enable = false; |
| @@ -1747,6 +1815,10 @@ static int virtnet_restore(struct virtio_device *vdev) | |||
| 1747 | virtnet_set_queues(vi, vi->curr_queue_pairs); | 1815 | virtnet_set_queues(vi, vi->curr_queue_pairs); |
| 1748 | rtnl_unlock(); | 1816 | rtnl_unlock(); |
| 1749 | 1817 | ||
| 1818 | err = register_hotcpu_notifier(&vi->nb); | ||
| 1819 | if (err) | ||
| 1820 | return err; | ||
| 1821 | |||
| 1750 | return 0; | 1822 | return 0; |
| 1751 | } | 1823 | } |
| 1752 | #endif | 1824 | #endif |
| @@ -1778,7 +1850,7 @@ static struct virtio_driver virtio_net_driver = { | |||
| 1778 | .probe = virtnet_probe, | 1850 | .probe = virtnet_probe, |
| 1779 | .remove = virtnet_remove, | 1851 | .remove = virtnet_remove, |
| 1780 | .config_changed = virtnet_config_changed, | 1852 | .config_changed = virtnet_config_changed, |
| 1781 | #ifdef CONFIG_PM | 1853 | #ifdef CONFIG_PM_SLEEP |
| 1782 | .freeze = virtnet_freeze, | 1854 | .freeze = virtnet_freeze, |
| 1783 | .restore = virtnet_restore, | 1855 | .restore = virtnet_restore, |
| 1784 | #endif | 1856 | #endif |
