diff options
-rw-r--r-- | drivers/net/virtio_net.c | 172 |
1 files changed, 102 insertions, 70 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 08327e005ccc..5deeda61d6d3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c | |||
@@ -333,9 +333,9 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, | |||
333 | static void virtnet_xdp_xmit(struct virtnet_info *vi, | 333 | static void virtnet_xdp_xmit(struct virtnet_info *vi, |
334 | struct receive_queue *rq, | 334 | struct receive_queue *rq, |
335 | struct send_queue *sq, | 335 | struct send_queue *sq, |
336 | struct xdp_buff *xdp) | 336 | struct xdp_buff *xdp, |
337 | void *data) | ||
337 | { | 338 | { |
338 | struct page *page = virt_to_head_page(xdp->data); | ||
339 | struct virtio_net_hdr_mrg_rxbuf *hdr; | 339 | struct virtio_net_hdr_mrg_rxbuf *hdr; |
340 | unsigned int num_sg, len; | 340 | unsigned int num_sg, len; |
341 | void *xdp_sent; | 341 | void *xdp_sent; |
@@ -343,32 +343,46 @@ static void virtnet_xdp_xmit(struct virtnet_info *vi, | |||
343 | 343 | ||
344 | /* Free up any pending old buffers before queueing new ones. */ | 344 | /* Free up any pending old buffers before queueing new ones. */ |
345 | while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) { | 345 | while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) { |
346 | struct page *sent_page = virt_to_head_page(xdp_sent); | 346 | if (vi->mergeable_rx_bufs) { |
347 | struct page *sent_page = virt_to_head_page(xdp_sent); | ||
347 | 348 | ||
348 | if (vi->mergeable_rx_bufs) | ||
349 | put_page(sent_page); | 349 | put_page(sent_page); |
350 | else | 350 | } else { /* small buffer */ |
351 | give_pages(rq, sent_page); | 351 | struct sk_buff *skb = xdp_sent; |
352 | |||
353 | kfree_skb(skb); | ||
354 | } | ||
352 | } | 355 | } |
353 | 356 | ||
354 | /* Zero header and leave csum up to XDP layers */ | 357 | if (vi->mergeable_rx_bufs) { |
355 | hdr = xdp->data; | 358 | /* Zero header and leave csum up to XDP layers */ |
356 | memset(hdr, 0, vi->hdr_len); | 359 | hdr = xdp->data; |
360 | memset(hdr, 0, vi->hdr_len); | ||
361 | |||
362 | num_sg = 1; | ||
363 | sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data); | ||
364 | } else { /* small buffer */ | ||
365 | struct sk_buff *skb = data; | ||
366 | |||
367 | /* Zero header and leave csum up to XDP layers */ | ||
368 | hdr = skb_vnet_hdr(skb); | ||
369 | memset(hdr, 0, vi->hdr_len); | ||
357 | 370 | ||
358 | num_sg = 1; | 371 | num_sg = 2; |
359 | sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data); | 372 | sg_init_table(sq->sg, 2); |
373 | sg_set_buf(sq->sg, hdr, vi->hdr_len); | ||
374 | skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); | ||
375 | } | ||
360 | err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, | 376 | err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, |
361 | xdp->data, GFP_ATOMIC); | 377 | data, GFP_ATOMIC); |
362 | if (unlikely(err)) { | 378 | if (unlikely(err)) { |
363 | if (vi->mergeable_rx_bufs) | 379 | if (vi->mergeable_rx_bufs) { |
380 | struct page *page = virt_to_head_page(xdp->data); | ||
381 | |||
364 | put_page(page); | 382 | put_page(page); |
365 | else | 383 | } else /* small buffer */ |
366 | give_pages(rq, page); | 384 | kfree_skb(data); |
367 | return; // On error abort to avoid unnecessary kick | 385 | return; // On error abort to avoid unnecessary kick |
368 | } else if (!vi->mergeable_rx_bufs) { | ||
369 | /* If not mergeable bufs must be big packets so cleanup pages */ | ||
370 | give_pages(rq, (struct page *)page->private); | ||
371 | page->private = 0; | ||
372 | } | 386 | } |
373 | 387 | ||
374 | virtqueue_kick(sq->vq); | 388 | virtqueue_kick(sq->vq); |
@@ -377,23 +391,26 @@ static void virtnet_xdp_xmit(struct virtnet_info *vi, | |||
377 | static u32 do_xdp_prog(struct virtnet_info *vi, | 391 | static u32 do_xdp_prog(struct virtnet_info *vi, |
378 | struct receive_queue *rq, | 392 | struct receive_queue *rq, |
379 | struct bpf_prog *xdp_prog, | 393 | struct bpf_prog *xdp_prog, |
380 | struct page *page, int offset, int len) | 394 | void *data, int len) |
381 | { | 395 | { |
382 | int hdr_padded_len; | 396 | int hdr_padded_len; |
383 | struct xdp_buff xdp; | 397 | struct xdp_buff xdp; |
398 | void *buf; | ||
384 | unsigned int qp; | 399 | unsigned int qp; |
385 | u32 act; | 400 | u32 act; |
386 | u8 *buf; | ||
387 | |||
388 | buf = page_address(page) + offset; | ||
389 | 401 | ||
390 | if (vi->mergeable_rx_bufs) | 402 | if (vi->mergeable_rx_bufs) { |
391 | hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); | 403 | hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); |
392 | else | 404 | xdp.data = data + hdr_padded_len; |
393 | hdr_padded_len = sizeof(struct padded_vnet_hdr); | 405 | xdp.data_end = xdp.data + (len - vi->hdr_len); |
406 | buf = data; | ||
407 | } else { /* small buffers */ | ||
408 | struct sk_buff *skb = data; | ||
394 | 409 | ||
395 | xdp.data = buf + hdr_padded_len; | 410 | xdp.data = skb->data; |
396 | xdp.data_end = xdp.data + (len - vi->hdr_len); | 411 | xdp.data_end = xdp.data + len; |
412 | buf = skb->data; | ||
413 | } | ||
397 | 414 | ||
398 | act = bpf_prog_run_xdp(xdp_prog, &xdp); | 415 | act = bpf_prog_run_xdp(xdp_prog, &xdp); |
399 | switch (act) { | 416 | switch (act) { |
@@ -403,8 +420,8 @@ static u32 do_xdp_prog(struct virtnet_info *vi, | |||
403 | qp = vi->curr_queue_pairs - | 420 | qp = vi->curr_queue_pairs - |
404 | vi->xdp_queue_pairs + | 421 | vi->xdp_queue_pairs + |
405 | smp_processor_id(); | 422 | smp_processor_id(); |
406 | xdp.data = buf + (vi->mergeable_rx_bufs ? 0 : 4); | 423 | xdp.data = buf; |
407 | virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp); | 424 | virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp, data); |
408 | return XDP_TX; | 425 | return XDP_TX; |
409 | default: | 426 | default: |
410 | bpf_warn_invalid_xdp_action(act); | 427 | bpf_warn_invalid_xdp_action(act); |
@@ -414,26 +431,17 @@ static u32 do_xdp_prog(struct virtnet_info *vi, | |||
414 | } | 431 | } |
415 | } | 432 | } |
416 | 433 | ||
417 | static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len) | 434 | static struct sk_buff *receive_small(struct net_device *dev, |
435 | struct virtnet_info *vi, | ||
436 | struct receive_queue *rq, | ||
437 | void *buf, unsigned int len) | ||
418 | { | 438 | { |
419 | struct sk_buff * skb = buf; | 439 | struct sk_buff * skb = buf; |
440 | struct bpf_prog *xdp_prog; | ||
420 | 441 | ||
421 | len -= vi->hdr_len; | 442 | len -= vi->hdr_len; |
422 | skb_trim(skb, len); | 443 | skb_trim(skb, len); |
423 | 444 | ||
424 | return skb; | ||
425 | } | ||
426 | |||
427 | static struct sk_buff *receive_big(struct net_device *dev, | ||
428 | struct virtnet_info *vi, | ||
429 | struct receive_queue *rq, | ||
430 | void *buf, | ||
431 | unsigned int len) | ||
432 | { | ||
433 | struct bpf_prog *xdp_prog; | ||
434 | struct page *page = buf; | ||
435 | struct sk_buff *skb; | ||
436 | |||
437 | rcu_read_lock(); | 445 | rcu_read_lock(); |
438 | xdp_prog = rcu_dereference(rq->xdp_prog); | 446 | xdp_prog = rcu_dereference(rq->xdp_prog); |
439 | if (xdp_prog) { | 447 | if (xdp_prog) { |
@@ -442,7 +450,7 @@ static struct sk_buff *receive_big(struct net_device *dev, | |||
442 | 450 | ||
443 | if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) | 451 | if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) |
444 | goto err_xdp; | 452 | goto err_xdp; |
445 | act = do_xdp_prog(vi, rq, xdp_prog, page, 0, len); | 453 | act = do_xdp_prog(vi, rq, xdp_prog, skb, len); |
446 | switch (act) { | 454 | switch (act) { |
447 | case XDP_PASS: | 455 | case XDP_PASS: |
448 | break; | 456 | break; |
@@ -456,18 +464,33 @@ static struct sk_buff *receive_big(struct net_device *dev, | |||
456 | } | 464 | } |
457 | rcu_read_unlock(); | 465 | rcu_read_unlock(); |
458 | 466 | ||
459 | skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); | 467 | return skb; |
468 | |||
469 | err_xdp: | ||
470 | rcu_read_unlock(); | ||
471 | dev->stats.rx_dropped++; | ||
472 | kfree_skb(skb); | ||
473 | xdp_xmit: | ||
474 | return NULL; | ||
475 | } | ||
476 | |||
477 | static struct sk_buff *receive_big(struct net_device *dev, | ||
478 | struct virtnet_info *vi, | ||
479 | struct receive_queue *rq, | ||
480 | void *buf, | ||
481 | unsigned int len) | ||
482 | { | ||
483 | struct page *page = buf; | ||
484 | struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); | ||
485 | |||
460 | if (unlikely(!skb)) | 486 | if (unlikely(!skb)) |
461 | goto err; | 487 | goto err; |
462 | 488 | ||
463 | return skb; | 489 | return skb; |
464 | 490 | ||
465 | err_xdp: | ||
466 | rcu_read_unlock(); | ||
467 | err: | 491 | err: |
468 | dev->stats.rx_dropped++; | 492 | dev->stats.rx_dropped++; |
469 | give_pages(rq, page); | 493 | give_pages(rq, page); |
470 | xdp_xmit: | ||
471 | return NULL; | 494 | return NULL; |
472 | } | 495 | } |
473 | 496 | ||
@@ -483,7 +506,7 @@ xdp_xmit: | |||
483 | * anymore. | 506 | * anymore. |
484 | */ | 507 | */ |
485 | static struct page *xdp_linearize_page(struct receive_queue *rq, | 508 | static struct page *xdp_linearize_page(struct receive_queue *rq, |
486 | u16 num_buf, | 509 | u16 *num_buf, |
487 | struct page *p, | 510 | struct page *p, |
488 | int offset, | 511 | int offset, |
489 | unsigned int *len) | 512 | unsigned int *len) |
@@ -497,7 +520,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, | |||
497 | memcpy(page_address(page) + page_off, page_address(p) + offset, *len); | 520 | memcpy(page_address(page) + page_off, page_address(p) + offset, *len); |
498 | page_off += *len; | 521 | page_off += *len; |
499 | 522 | ||
500 | while (--num_buf) { | 523 | while (--*num_buf) { |
501 | unsigned int buflen; | 524 | unsigned int buflen; |
502 | unsigned long ctx; | 525 | unsigned long ctx; |
503 | void *buf; | 526 | void *buf; |
@@ -507,19 +530,22 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, | |||
507 | if (unlikely(!ctx)) | 530 | if (unlikely(!ctx)) |
508 | goto err_buf; | 531 | goto err_buf; |
509 | 532 | ||
533 | buf = mergeable_ctx_to_buf_address(ctx); | ||
534 | p = virt_to_head_page(buf); | ||
535 | off = buf - page_address(p); | ||
536 | |||
510 | /* guard against a misconfigured or uncooperative backend that | 537 | /* guard against a misconfigured or uncooperative backend that |
511 | * is sending packet larger than the MTU. | 538 | * is sending packet larger than the MTU. |
512 | */ | 539 | */ |
513 | if ((page_off + buflen) > PAGE_SIZE) | 540 | if ((page_off + buflen) > PAGE_SIZE) { |
541 | put_page(p); | ||
514 | goto err_buf; | 542 | goto err_buf; |
515 | 543 | } | |
516 | buf = mergeable_ctx_to_buf_address(ctx); | ||
517 | p = virt_to_head_page(buf); | ||
518 | off = buf - page_address(p); | ||
519 | 544 | ||
520 | memcpy(page_address(page) + page_off, | 545 | memcpy(page_address(page) + page_off, |
521 | page_address(p) + off, buflen); | 546 | page_address(p) + off, buflen); |
522 | page_off += buflen; | 547 | page_off += buflen; |
548 | put_page(p); | ||
523 | } | 549 | } |
524 | 550 | ||
525 | *len = page_off; | 551 | *len = page_off; |
@@ -552,16 +578,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, | |||
552 | struct page *xdp_page; | 578 | struct page *xdp_page; |
553 | u32 act; | 579 | u32 act; |
554 | 580 | ||
555 | /* No known backend devices should send packets with | 581 | /* This happens when rx buffer size is underestimated */ |
556 | * more than a single buffer when XDP conditions are | ||
557 | * met. However it is not strictly illegal so the case | ||
558 | * is handled as an exception and a warning is thrown. | ||
559 | */ | ||
560 | if (unlikely(num_buf > 1)) { | 582 | if (unlikely(num_buf > 1)) { |
561 | bpf_warn_invalid_xdp_buffer(); | ||
562 | |||
563 | /* linearize data for XDP */ | 583 | /* linearize data for XDP */ |
564 | xdp_page = xdp_linearize_page(rq, num_buf, | 584 | xdp_page = xdp_linearize_page(rq, &num_buf, |
565 | page, offset, &len); | 585 | page, offset, &len); |
566 | if (!xdp_page) | 586 | if (!xdp_page) |
567 | goto err_xdp; | 587 | goto err_xdp; |
@@ -575,16 +595,25 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, | |||
575 | * the receive path after XDP is loaded. In practice I | 595 | * the receive path after XDP is loaded. In practice I |
576 | * was not able to create this condition. | 596 | * was not able to create this condition. |
577 | */ | 597 | */ |
578 | if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) | 598 | if (unlikely(hdr->hdr.gso_type)) |
579 | goto err_xdp; | 599 | goto err_xdp; |
580 | 600 | ||
581 | act = do_xdp_prog(vi, rq, xdp_prog, page, offset, len); | 601 | act = do_xdp_prog(vi, rq, xdp_prog, |
602 | page_address(xdp_page) + offset, len); | ||
582 | switch (act) { | 603 | switch (act) { |
583 | case XDP_PASS: | 604 | case XDP_PASS: |
584 | if (unlikely(xdp_page != page)) | 605 | /* We can only create skb based on xdp_page. */ |
585 | __free_pages(xdp_page, 0); | 606 | if (unlikely(xdp_page != page)) { |
607 | rcu_read_unlock(); | ||
608 | put_page(page); | ||
609 | head_skb = page_to_skb(vi, rq, xdp_page, | ||
610 | 0, len, PAGE_SIZE); | ||
611 | ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); | ||
612 | return head_skb; | ||
613 | } | ||
586 | break; | 614 | break; |
587 | case XDP_TX: | 615 | case XDP_TX: |
616 | ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); | ||
588 | if (unlikely(xdp_page != page)) | 617 | if (unlikely(xdp_page != page)) |
589 | goto err_xdp; | 618 | goto err_xdp; |
590 | rcu_read_unlock(); | 619 | rcu_read_unlock(); |
@@ -593,6 +622,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, | |||
593 | default: | 622 | default: |
594 | if (unlikely(xdp_page != page)) | 623 | if (unlikely(xdp_page != page)) |
595 | __free_pages(xdp_page, 0); | 624 | __free_pages(xdp_page, 0); |
625 | ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); | ||
596 | goto err_xdp; | 626 | goto err_xdp; |
597 | } | 627 | } |
598 | } | 628 | } |
@@ -704,7 +734,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, | |||
704 | else if (vi->big_packets) | 734 | else if (vi->big_packets) |
705 | skb = receive_big(dev, vi, rq, buf, len); | 735 | skb = receive_big(dev, vi, rq, buf, len); |
706 | else | 736 | else |
707 | skb = receive_small(vi, buf, len); | 737 | skb = receive_small(dev, vi, rq, buf, len); |
708 | 738 | ||
709 | if (unlikely(!skb)) | 739 | if (unlikely(!skb)) |
710 | return; | 740 | return; |
@@ -1678,7 +1708,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) | |||
1678 | int i, err; | 1708 | int i, err; |
1679 | 1709 | ||
1680 | if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || | 1710 | if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || |
1681 | virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6)) { | 1711 | virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || |
1712 | virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || | ||
1713 | virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO)) { | ||
1682 | netdev_warn(dev, "can't set XDP while host is implementing LRO, disable LRO first\n"); | 1714 | netdev_warn(dev, "can't set XDP while host is implementing LRO, disable LRO first\n"); |
1683 | return -EOPNOTSUPP; | 1715 | return -EOPNOTSUPP; |
1684 | } | 1716 | } |