From c13f7e1a94007c4381814e7daf033e3e8f0663f3 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 29 Oct 2010 23:32:01 +0200 Subject: drbd: don't recvmsg with zero length This should fix a performance degradation we observed recently. If we don't expect any subheader, we should not call into the tcp stack, as that may add considerable latency if there is no data available at this point. For a synthetic synchronous write load with single outstanding writes, this additional latency when processing the "unplug remote" packet added up to a performance degradation factor >= 10. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 89d8a7cc4054..24487d4fb202 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3627,17 +3627,19 @@ static void drbdd(struct drbd_conf *mdev) } shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header); - rv = drbd_recv(mdev, &header->h80.payload, shs); - if (unlikely(rv != shs)) { - dev_err(DEV, "short read while reading sub header: rv=%d\n", rv); - goto err_out; - } - if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) { dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size); goto err_out; } + if (shs) { + rv = drbd_recv(mdev, &header->h80.payload, shs); + if (unlikely(rv != shs)) { + dev_err(DEV, "short read while reading sub header: rv=%d\n", rv); + goto err_out; + } + } + rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs); if (unlikely(!rv)) { -- cgit v1.2.2 From a115413de13ae6beb0cbfc198afe385a261ab284 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Sat, 13 Nov 2010 20:42:29 +0100 Subject: drbd: fix for spin_lock_irqsave in endio callback In commit 9b7f76dc37919ea36caa9680a3f765e5b19b25fb, Author: Lars Ellenberg Date: Wed Aug 11 23:40:24 2010 +0200 drbd: new configuration parameter c-min-rate a bad chunk slipped through, which is now reverted as well, restoring the correct irqsave for the endio callback. This patch also add comments at both req_mod() and in the endio callback so it should not happen again. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.h | 3 ++- drivers/block/drbd/drbd_worker.c | 10 +++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 181ea0364822..ab2bd09d54b4 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -339,7 +339,8 @@ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) } /* completion of master bio is outside of spinlock. - * If you need it irqsave, do it your self! */ + * If you need it irqsave, do it your self! + * Which means: don't use from bio endio callback. */ static inline int req_mod(struct drbd_request *req, enum drbd_req_event what) { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 47d223c2409c..34f224b018b3 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -193,8 +193,10 @@ void drbd_endio_sec(struct bio *bio, int error) */ void drbd_endio_pri(struct bio *bio, int error) { + unsigned long flags; struct drbd_request *req = bio->bi_private; struct drbd_conf *mdev = req->mdev; + struct bio_and_error m; enum drbd_req_event what; int uptodate = bio_flagged(bio, BIO_UPTODATE); @@ -220,7 +222,13 @@ void drbd_endio_pri(struct bio *bio, int error) bio_put(req->private_bio); req->private_bio = ERR_PTR(error); - req_mod(req, what); + /* not req_mod(), we need irqsave here! */ + spin_lock_irqsave(&mdev->req_lock, flags); + __req_mod(req, what, &m); + spin_unlock_irqrestore(&mdev->req_lock, flags); + + if (m.bio) + complete_master_bio(mdev, &m); } int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) -- cgit v1.2.2 From e692cb668fdd5a712c6ed2a2d6f2a36ee83997b4 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 1 Dec 2010 19:41:49 +0100 Subject: block: Deprecate QUEUE_FLAG_CLUSTER and use queue_limits instead When stacking devices, a request_queue is not always available. This forced us to have a no_cluster flag in the queue_limits that could be used as a carrier until the request_queue had been set up for a metadevice. There were several problems with that approach. First of all it was up to the stacking device to remember to set queue flag after stacking had completed. Also, the queue flag and the queue limits had to be kept in sync at all times. We got that wrong, which could lead to us issuing commands that went beyond the max scatterlist limit set by the driver. The proper fix is to avoid having two flags for tracking the same thing. We deprecate QUEUE_FLAG_CLUSTER and use the queue limit directly in the block layer merging functions. The queue_limit 'no_cluster' is turned into 'cluster' to avoid double negatives and to ease stacking. Clustering defaults to being enabled as before. The queue flag logic is removed from the stacking function, and explicitly setting the cluster flag is no longer necessary in DM and MD. Reported-by: Ed Lin Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Cc: stable@kernel.org Signed-off-by: Jens Axboe --- drivers/md/dm-table.c | 5 ----- drivers/md/md.c | 3 --- drivers/scsi/scsi_lib.c | 3 +-- 3 files changed, 1 insertion(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 90267f8d64ee..e2da1912a2cb 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1131,11 +1131,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, */ q->limits = *limits; - if (limits->no_cluster) - queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); - else - queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); - if (!dm_table_supports_discards(t)) queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); else diff --git a/drivers/md/md.c b/drivers/md/md.c index 84c46a161927..52694d29663d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4296,9 +4296,6 @@ static int md_alloc(dev_t dev, char *name) goto abort; mddev->queue->queuedata = mddev; - /* Can be unlocked because the queue is new: no concurrency */ - queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue); - blk_queue_make_request(mddev->queue, md_make_request); disk = alloc_disk(1 << shift); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index eafeeda6e194..9d7ba07dc5ef 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1642,9 +1642,8 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, blk_queue_max_segment_size(q, dma_get_max_seg_size(dev)); - /* New queue, no concurrency on queue_flags */ if (!shost->use_clustering) - queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); + q->limits.cluster = 0; /* * set a reasonable default alignment on word boundaries: the -- cgit v1.2.2 From 72d4cd9f38b5ed96b75df4c622be25e1c2648dd3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 17 Dec 2010 08:34:20 +0100 Subject: block: max hardware sectors limit wrapper Implement blk_limits_max_hw_sectors() and make blk_queue_max_hw_sectors() a wrapper around it. DM needs this to avoid setting queue_limits' max_hw_sectors and max_sectors directly. dm_set_device_limits() now leverages blk_limits_max_hw_sectors() logic to establish the appropriate max_hw_sectors minimum (PAGE_SIZE). Fixes issue where DM was incorrectly setting max_sectors rather than max_hw_sectors (which caused dm_merge_bvec()'s max_hw_sectors check to be ineffective). Signed-off-by: Mike Snitzer Cc: stable@kernel.org Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- drivers/md/dm-table.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e2da1912a2cb..4d705cea0f8c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -517,9 +517,8 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, */ if (q->merge_bvec_fn && !ti->type->merge) - limits->max_sectors = - min_not_zero(limits->max_sectors, - (unsigned int) (PAGE_SIZE >> 9)); + blk_limits_max_hw_sectors(limits, + (unsigned int) (PAGE_SIZE >> 9)); return 0; } EXPORT_SYMBOL_GPL(dm_set_device_limits); -- cgit v1.2.2 From 0fc13c8995cd96f4123de400c71c223d80400ed9 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Fri, 17 Dec 2010 09:01:37 +0100 Subject: cciss: fix cciss_revalidate panic If you delete a logical drive, and then run BLKRRPART (e.g. via fdisk) on a logical drive which is "after" the deleted logical drive in the h->drv[] array, then cciss_revalidate panics because it will access the null pointer h->drv[x] when x hits the deleted drive. Signed-off-by: Stephen M. Cameron Cc: stable@kernel.org Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index f291587d753e..233e06c29ff4 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2834,6 +2834,8 @@ static int cciss_revalidate(struct gendisk *disk) InquiryData_struct *inq_buff = NULL; for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) { + if (!h->drv[logvol]) + continue if (memcmp(h->drv[logvol]->LunID, drv->LunID, sizeof(drv->LunID)) == 0) { FOUND = 1; -- cgit v1.2.2