diff options
author | Ming Lei <ming.lei@canonical.com> | 2014-07-06 10:39:26 -0400 |
---|---|---|
committer | Christoph Hellwig <hch@lst.de> | 2014-07-25 17:17:00 -0400 |
commit | 938ece711c5b1ba4fa8e3b9fc8cc03843ae82a5b (patch) | |
tree | b0e463b8a15699a1eb68f92baf3ed52200f255aa /drivers/scsi | |
parent | 0758f4f732b08b6ef07f2e5f735655cf69fea477 (diff) |
virtio-scsi: replace target spinlock with seqcount
The spinlock of tgt_lock is only for serializing read and write
req_vq, one lockless seqcount is enough for the purpose.
On one 16core VM with vhost-scsi backend, the patch can improve
IOPS with 3% on random read test.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
[Add initialization in virtscsi_target_alloc. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'drivers/scsi')
-rw-r--r-- | drivers/scsi/virtio_scsi.c | 42 |
1 files changed, 29 insertions, 13 deletions
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 308256b5e4cb..cdce502c3c46 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <scsi/scsi_host.h> | 27 | #include <scsi/scsi_host.h> |
28 | #include <scsi/scsi_device.h> | 28 | #include <scsi/scsi_device.h> |
29 | #include <scsi/scsi_cmnd.h> | 29 | #include <scsi/scsi_cmnd.h> |
30 | #include <linux/seqlock.h> | ||
30 | 31 | ||
31 | #define VIRTIO_SCSI_MEMPOOL_SZ 64 | 32 | #define VIRTIO_SCSI_MEMPOOL_SZ 64 |
32 | #define VIRTIO_SCSI_EVENT_LEN 8 | 33 | #define VIRTIO_SCSI_EVENT_LEN 8 |
@@ -75,18 +76,16 @@ struct virtio_scsi_vq { | |||
75 | * queue, and also lets the driver optimize the IRQ affinity for the virtqueues | 76 | * queue, and also lets the driver optimize the IRQ affinity for the virtqueues |
76 | * (each virtqueue's affinity is set to the CPU that "owns" the queue). | 77 | * (each virtqueue's affinity is set to the CPU that "owns" the queue). |
77 | * | 78 | * |
78 | * tgt_lock is held to serialize reading and writing req_vq. Reading req_vq | 79 | * tgt_seq is held to serialize reading and writing req_vq. |
79 | * could be done locklessly, but we do not do it yet. | ||
80 | * | 80 | * |
81 | * Decrements of reqs are never concurrent with writes of req_vq: before the | 81 | * Decrements of reqs are never concurrent with writes of req_vq: before the |
82 | * decrement reqs will be != 0; after the decrement the virtqueue completion | 82 | * decrement reqs will be != 0; after the decrement the virtqueue completion |
83 | * routine will not use the req_vq so it can be changed by a new request. | 83 | * routine will not use the req_vq so it can be changed by a new request. |
84 | * Thus they can happen outside the tgt_lock, provided of course we make reqs | 84 | * Thus they can happen outside the tgt_seq, provided of course we make reqs |
85 | * an atomic_t. | 85 | * an atomic_t. |
86 | */ | 86 | */ |
87 | struct virtio_scsi_target_state { | 87 | struct virtio_scsi_target_state { |
88 | /* This spinlock never held at the same time as vq_lock. */ | 88 | seqcount_t tgt_seq; |
89 | spinlock_t tgt_lock; | ||
90 | 89 | ||
91 | /* Count of outstanding requests. */ | 90 | /* Count of outstanding requests. */ |
92 | atomic_t reqs; | 91 | atomic_t reqs; |
@@ -559,19 +558,33 @@ static struct virtio_scsi_vq *virtscsi_pick_vq(struct virtio_scsi *vscsi, | |||
559 | unsigned long flags; | 558 | unsigned long flags; |
560 | u32 queue_num; | 559 | u32 queue_num; |
561 | 560 | ||
562 | spin_lock_irqsave(&tgt->tgt_lock, flags); | 561 | local_irq_save(flags); |
562 | if (atomic_inc_return(&tgt->reqs) > 1) { | ||
563 | unsigned long seq; | ||
564 | |||
565 | do { | ||
566 | seq = read_seqcount_begin(&tgt->tgt_seq); | ||
567 | vq = tgt->req_vq; | ||
568 | } while (read_seqcount_retry(&tgt->tgt_seq, seq)); | ||
569 | } else { | ||
570 | /* no writes can be concurrent because of atomic_t */ | ||
571 | write_seqcount_begin(&tgt->tgt_seq); | ||
572 | |||
573 | /* keep previous req_vq if a reader just arrived */ | ||
574 | if (unlikely(atomic_read(&tgt->reqs) > 1)) { | ||
575 | vq = tgt->req_vq; | ||
576 | goto unlock; | ||
577 | } | ||
563 | 578 | ||
564 | if (atomic_inc_return(&tgt->reqs) > 1) | ||
565 | vq = tgt->req_vq; | ||
566 | else { | ||
567 | queue_num = smp_processor_id(); | 579 | queue_num = smp_processor_id(); |
568 | while (unlikely(queue_num >= vscsi->num_queues)) | 580 | while (unlikely(queue_num >= vscsi->num_queues)) |
569 | queue_num -= vscsi->num_queues; | 581 | queue_num -= vscsi->num_queues; |
570 | |||
571 | tgt->req_vq = vq = &vscsi->req_vqs[queue_num]; | 582 | tgt->req_vq = vq = &vscsi->req_vqs[queue_num]; |
583 | unlock: | ||
584 | write_seqcount_end(&tgt->tgt_seq); | ||
572 | } | 585 | } |
586 | local_irq_restore(flags); | ||
573 | 587 | ||
574 | spin_unlock_irqrestore(&tgt->tgt_lock, flags); | ||
575 | return vq; | 588 | return vq; |
576 | } | 589 | } |
577 | 590 | ||
@@ -667,14 +680,17 @@ static int virtscsi_abort(struct scsi_cmnd *sc) | |||
667 | 680 | ||
668 | static int virtscsi_target_alloc(struct scsi_target *starget) | 681 | static int virtscsi_target_alloc(struct scsi_target *starget) |
669 | { | 682 | { |
683 | struct Scsi_Host *sh = dev_to_shost(starget->dev.parent); | ||
684 | struct virtio_scsi *vscsi = shost_priv(sh); | ||
685 | |||
670 | struct virtio_scsi_target_state *tgt = | 686 | struct virtio_scsi_target_state *tgt = |
671 | kmalloc(sizeof(*tgt), GFP_KERNEL); | 687 | kmalloc(sizeof(*tgt), GFP_KERNEL); |
672 | if (!tgt) | 688 | if (!tgt) |
673 | return -ENOMEM; | 689 | return -ENOMEM; |
674 | 690 | ||
675 | spin_lock_init(&tgt->tgt_lock); | 691 | seqcount_init(&tgt->tgt_seq); |
676 | atomic_set(&tgt->reqs, 0); | 692 | atomic_set(&tgt->reqs, 0); |
677 | tgt->req_vq = NULL; | 693 | tgt->req_vq = &vscsi->req_vqs[0]; |
678 | 694 | ||
679 | starget->hostdata = tgt; | 695 | starget->hostdata = tgt; |
680 | return 0; | 696 | return 0; |