aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVenkatesh Srinivas <venkateshs@google.com>2015-11-10 19:21:07 -0500
committerMichael S. Tsirkin <mst@redhat.com>2015-12-07 10:28:11 -0500
commitf277ec42f38f406ed073569b24b04f1c291fec0f (patch)
tree60d080a5b9ffec2ad61a8d692158935e2e1f948c
parent82107539bbb9db303fb6676c78c836add5680bb0 (diff)
virtio_ring: shadow available ring flags & index
Improves cacheline transfer flow of available ring header. Virtqueues are implemented as a pair of rings, one producer->consumer avail ring and one consumer->producer used ring; preceding the avail ring in memory are two contiguous u16 fields -- avail->flags and avail->idx. A producer posts work by writing to avail->idx and a consumer reads avail->idx. The flags and idx fields only need to be written by a producer CPU and only read by a consumer CPU; when the producer and consumer are running on different CPUs and the virtio_ring code is structured to only have source writes/sink reads, we can continuously transfer the avail header cacheline between 'M' states between cores. This flow optimizes core -> core bandwidth on certain CPUs. (see: "Software Optimization Guide for AMD Family 15h Processors", Section 11.6; similar language appears in the 10h guide and should apply to CPUs w/ exclusive caches, using LLC as a transfer cache) Unfortunately the existing virtio_ring code issued reads to the avail->idx and read-modify-writes to avail->flags on the producer. This change shadows the flags and index fields in producer memory; the vring code now reads from the shadows and only ever writes to avail->flags and avail->idx, allowing the cacheline to transfer core -> core optimally. In a concurrent version of vring_bench, the time required for 10,000,000 buffer checkout/returns was reduced by ~2% (average across many runs) on an AMD Piledriver (15h) CPU: (w/o shadowing): Performance counter stats for './vring_bench': 5,451,082,016 L1-dcache-loads ... 2.221477739 seconds time elapsed (w/ shadowing): Performance counter stats for './vring_bench': 5,405,701,361 L1-dcache-loads ... 2.168405376 seconds time elapsed The further away (in a NUMA sense) virtio producers and consumers are from each other, the more we expect to benefit. Physical implementations of virtio devices and implementations of virtio where the consumer polls vring avail indexes (vhost) should also benefit. Signed-off-by: Venkatesh Srinivas <venkateshs@google.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
-rw-r--r--drivers/virtio/virtio_ring.c46
1 files changed, 34 insertions, 12 deletions
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index abdb341887f5..ee663c458b20 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -80,6 +80,12 @@ struct vring_virtqueue {
80 /* Last used index we've seen. */ 80 /* Last used index we've seen. */
81 u16 last_used_idx; 81 u16 last_used_idx;
82 82
83 /* Last written value to avail->flags */
84 u16 avail_flags_shadow;
85
86 /* Last written value to avail->idx in guest byte order */
87 u16 avail_idx_shadow;
88
83 /* How to notify other side. FIXME: commonalize hcalls! */ 89 /* How to notify other side. FIXME: commonalize hcalls! */
84 bool (*notify)(struct virtqueue *vq); 90 bool (*notify)(struct virtqueue *vq);
85 91
@@ -235,13 +241,14 @@ static inline int virtqueue_add(struct virtqueue *_vq,
235 241
236 /* Put entry in available array (but don't update avail->idx until they 242 /* Put entry in available array (but don't update avail->idx until they
237 * do sync). */ 243 * do sync). */
238 avail = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) & (vq->vring.num - 1); 244 avail = vq->avail_idx_shadow & (vq->vring.num - 1);
239 vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 245 vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
240 246
241 /* Descriptors and available array need to be set before we expose the 247 /* Descriptors and available array need to be set before we expose the
242 * new available array entries. */ 248 * new available array entries. */
243 virtio_wmb(vq->weak_barriers); 249 virtio_wmb(vq->weak_barriers);
244 vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) + 1); 250 vq->avail_idx_shadow++;
251 vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
245 vq->num_added++; 252 vq->num_added++;
246 253
247 pr_debug("Added buffer head %i to %p\n", head, vq); 254 pr_debug("Added buffer head %i to %p\n", head, vq);
@@ -354,8 +361,8 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq)
354 * event. */ 361 * event. */
355 virtio_mb(vq->weak_barriers); 362 virtio_mb(vq->weak_barriers);
356 363
357 old = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - vq->num_added; 364 old = vq->avail_idx_shadow - vq->num_added;
358 new = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx); 365 new = vq->avail_idx_shadow;
359 vq->num_added = 0; 366 vq->num_added = 0;
360 367
361#ifdef DEBUG 368#ifdef DEBUG
@@ -510,7 +517,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
510 /* If we expect an interrupt for the next entry, tell host 517 /* If we expect an interrupt for the next entry, tell host
511 * by writing event index and flush out the write before 518 * by writing event index and flush out the write before
512 * the read in the next get_buf call. */ 519 * the read in the next get_buf call. */
513 if (!(vq->vring.avail->flags & cpu_to_virtio16(_vq->vdev, VRING_AVAIL_F_NO_INTERRUPT))) { 520 if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
514 vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx); 521 vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx);
515 virtio_mb(vq->weak_barriers); 522 virtio_mb(vq->weak_barriers);
516 } 523 }
@@ -537,7 +544,11 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
537{ 544{
538 struct vring_virtqueue *vq = to_vvq(_vq); 545 struct vring_virtqueue *vq = to_vvq(_vq);
539 546
540 vq->vring.avail->flags |= cpu_to_virtio16(_vq->vdev, VRING_AVAIL_F_NO_INTERRUPT); 547 if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
548 vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
549 vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
550 }
551
541} 552}
542EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 553EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
543 554
@@ -565,7 +576,10 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
565 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 576 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
566 * either clear the flags bit or point the event index at the next 577 * either clear the flags bit or point the event index at the next
567 * entry. Always do both to keep code simple. */ 578 * entry. Always do both to keep code simple. */
568 vq->vring.avail->flags &= cpu_to_virtio16(_vq->vdev, ~VRING_AVAIL_F_NO_INTERRUPT); 579 if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
580 vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
581 vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
582 }
569 vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx); 583 vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
570 END_USE(vq); 584 END_USE(vq);
571 return last_used_idx; 585 return last_used_idx;
@@ -633,9 +647,12 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
633 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 647 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
634 * either clear the flags bit or point the event index at the next 648 * either clear the flags bit or point the event index at the next
635 * entry. Always do both to keep code simple. */ 649 * entry. Always do both to keep code simple. */
636 vq->vring.avail->flags &= cpu_to_virtio16(_vq->vdev, ~VRING_AVAIL_F_NO_INTERRUPT); 650 if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
651 vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
652 vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
653 }
637 /* TODO: tune this threshold */ 654 /* TODO: tune this threshold */
638 bufs = (u16)(virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - vq->last_used_idx) * 3 / 4; 655 bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
639 vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs); 656 vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs);
640 virtio_mb(vq->weak_barriers); 657 virtio_mb(vq->weak_barriers);
641 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) { 658 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) {
@@ -670,7 +687,8 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
670 /* detach_buf clears data, so grab it now. */ 687 /* detach_buf clears data, so grab it now. */
671 buf = vq->data[i]; 688 buf = vq->data[i];
672 detach_buf(vq, i); 689 detach_buf(vq, i);
673 vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - 1); 690 vq->avail_idx_shadow--;
691 vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
674 END_USE(vq); 692 END_USE(vq);
675 return buf; 693 return buf;
676 } 694 }
@@ -735,6 +753,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
735 vq->weak_barriers = weak_barriers; 753 vq->weak_barriers = weak_barriers;
736 vq->broken = false; 754 vq->broken = false;
737 vq->last_used_idx = 0; 755 vq->last_used_idx = 0;
756 vq->avail_flags_shadow = 0;
757 vq->avail_idx_shadow = 0;
738 vq->num_added = 0; 758 vq->num_added = 0;
739 list_add_tail(&vq->vq.list, &vdev->vqs); 759 list_add_tail(&vq->vq.list, &vdev->vqs);
740#ifdef DEBUG 760#ifdef DEBUG
@@ -746,8 +766,10 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
746 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 766 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
747 767
748 /* No callback? Tell other side not to bother us. */ 768 /* No callback? Tell other side not to bother us. */
749 if (!callback) 769 if (!callback) {
750 vq->vring.avail->flags |= cpu_to_virtio16(vdev, VRING_AVAIL_F_NO_INTERRUPT); 770 vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
771 vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
772 }
751 773
752 /* Put everything in free lists. */ 774 /* Put everything in free lists. */
753 vq->free_head = 0; 775 vq->free_head = 0;