From c13f7e1a94007c4381814e7daf033e3e8f0663f3 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Fri, 29 Oct 2010 23:32:01 +0200
Subject: drbd: don't recvmsg with zero length

This should fix a performance degradation we observed recently.

If we don't expect any subheader, we should not call into the tcp stack,
as that may add considerable latency if there is no data available at
this point.

For a synthetic synchronous write load with single outstanding writes,
this additional latency when processing the "unplug remote" packet
added up to a performance degradation factor >= 10.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_receiver.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 89d8a7cc4054..24487d4fb202 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3627,17 +3627,19 @@ static void drbdd(struct drbd_conf *mdev)
 		}
 
 		shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header);
-		rv = drbd_recv(mdev, &header->h80.payload, shs);
-		if (unlikely(rv != shs)) {
-			dev_err(DEV, "short read while reading sub header: rv=%d\n", rv);
-			goto err_out;
-		}
-
 		if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) {
 			dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size);
 			goto err_out;
 		}
 
+		if (shs) {
+			rv = drbd_recv(mdev, &header->h80.payload, shs);
+			if (unlikely(rv != shs)) {
+				dev_err(DEV, "short read while reading sub header: rv=%d\n", rv);
+				goto err_out;
+			}
+		}
+
 		rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs);
 
 		if (unlikely(!rv)) {
-- 
cgit v1.2.2


From a115413de13ae6beb0cbfc198afe385a261ab284 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Sat, 13 Nov 2010 20:42:29 +0100
Subject: drbd: fix for spin_lock_irqsave in endio callback

In commit 9b7f76dc37919ea36caa9680a3f765e5b19b25fb,
 Author: Lars Ellenberg <lars.ellenberg@linbit.com>
 Date:   Wed Aug 11 23:40:24 2010 +0200

    drbd: new configuration parameter c-min-rate

a bad chunk slipped through, which is now reverted as well,
restoring the correct irqsave for the endio callback.

This patch also add comments at both req_mod()
and in the endio callback so it should not happen again.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_req.h    |  3 ++-
 drivers/block/drbd/drbd_worker.c | 10 +++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 181ea0364822..ab2bd09d54b4 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -339,7 +339,8 @@ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
 }
 
 /* completion of master bio is outside of spinlock.
- * If you need it irqsave, do it your self! */
+ * If you need it irqsave, do it your self!
+ * Which means: don't use from bio endio callback. */
 static inline int req_mod(struct drbd_request *req,
 		enum drbd_req_event what)
 {
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 47d223c2409c..34f224b018b3 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -193,8 +193,10 @@ void drbd_endio_sec(struct bio *bio, int error)
  */
 void drbd_endio_pri(struct bio *bio, int error)
 {
+	unsigned long flags;
 	struct drbd_request *req = bio->bi_private;
 	struct drbd_conf *mdev = req->mdev;
+	struct bio_and_error m;
 	enum drbd_req_event what;
 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
 
@@ -220,7 +222,13 @@ void drbd_endio_pri(struct bio *bio, int error)
 	bio_put(req->private_bio);
 	req->private_bio = ERR_PTR(error);
 
-	req_mod(req, what);
+	/* not req_mod(), we need irqsave here! */
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	__req_mod(req, what, &m);
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	if (m.bio)
+		complete_master_bio(mdev, &m);
 }
 
 int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
-- 
cgit v1.2.2


From e692cb668fdd5a712c6ed2a2d6f2a36ee83997b4 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 1 Dec 2010 19:41:49 +0100
Subject: block: Deprecate QUEUE_FLAG_CLUSTER and use queue_limits instead

When stacking devices, a request_queue is not always available. This
forced us to have a no_cluster flag in the queue_limits that could be
used as a carrier until the request_queue had been set up for a
metadevice.

There were several problems with that approach. First of all it was up
to the stacking device to remember to set queue flag after stacking had
completed. Also, the queue flag and the queue limits had to be kept in
sync at all times. We got that wrong, which could lead to us issuing
commands that went beyond the max scatterlist limit set by the driver.

The proper fix is to avoid having two flags for tracking the same thing.
We deprecate QUEUE_FLAG_CLUSTER and use the queue limit directly in the
block layer merging functions. The queue_limit 'no_cluster' is turned
into 'cluster' to avoid double negatives and to ease stacking.
Clustering defaults to being enabled as before. The queue flag logic is
removed from the stacking function, and explicitly setting the cluster
flag is no longer necessary in DM and MD.

Reported-by: Ed Lin <ed.lin@promise.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 drivers/md/dm-table.c   | 5 -----
 drivers/md/md.c         | 3 ---
 drivers/scsi/scsi_lib.c | 3 +--
 3 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'drivers')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 90267f8d64ee..e2da1912a2cb 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1131,11 +1131,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	 */
 	q->limits = *limits;
 
-	if (limits->no_cluster)
-		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
-	else
-		queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
-
 	if (!dm_table_supports_discards(t))
 		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
 	else
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 84c46a161927..52694d29663d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4296,9 +4296,6 @@ static int md_alloc(dev_t dev, char *name)
 		goto abort;
 	mddev->queue->queuedata = mddev;
 
-	/* Can be unlocked because the queue is new: no concurrency */
-	queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
-
 	blk_queue_make_request(mddev->queue, md_make_request);
 
 	disk = alloc_disk(1 << shift);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index eafeeda6e194..9d7ba07dc5ef 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1642,9 +1642,8 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
 
 	blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
 
-	/* New queue, no concurrency on queue_flags */
 	if (!shost->use_clustering)
-		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
+		q->limits.cluster = 0;
 
 	/*
 	 * set a reasonable default alignment on word boundaries: the
-- 
cgit v1.2.2


From 72d4cd9f38b5ed96b75df4c622be25e1c2648dd3 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 17 Dec 2010 08:34:20 +0100
Subject: block: max hardware sectors limit wrapper

Implement blk_limits_max_hw_sectors() and make
blk_queue_max_hw_sectors() a wrapper around it.

DM needs this to avoid setting queue_limits' max_hw_sectors and
max_sectors directly.  dm_set_device_limits() now leverages
blk_limits_max_hw_sectors() logic to establish the appropriate
max_hw_sectors minimum (PAGE_SIZE).  Fixes issue where DM was
incorrectly setting max_sectors rather than max_hw_sectors (which
caused dm_merge_bvec()'s max_hw_sectors check to be ineffective).

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@kernel.org
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 drivers/md/dm-table.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index e2da1912a2cb..4d705cea0f8c 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -517,9 +517,8 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
 	 */
 
 	if (q->merge_bvec_fn && !ti->type->merge)
-		limits->max_sectors =
-			min_not_zero(limits->max_sectors,
-				     (unsigned int) (PAGE_SIZE >> 9));
+		blk_limits_max_hw_sectors(limits,
+					  (unsigned int) (PAGE_SIZE >> 9));
 	return 0;
 }
 EXPORT_SYMBOL_GPL(dm_set_device_limits);
-- 
cgit v1.2.2


From 0fc13c8995cd96f4123de400c71c223d80400ed9 Mon Sep 17 00:00:00 2001
From: "Stephen M. Cameron" <scameron@beardog.cce.hp.com>
Date: Fri, 17 Dec 2010 09:01:37 +0100
Subject: cciss: fix cciss_revalidate panic

If you delete a logical drive, and then run BLKRRPART (e.g. via fdisk)
on a logical drive which is "after" the deleted logical drive in the h->drv[]
array, then cciss_revalidate panics because it will access the null pointer
h->drv[x] when x hits the deleted drive.

Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 drivers/block/cciss.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index f291587d753e..233e06c29ff4 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2834,6 +2834,8 @@ static int cciss_revalidate(struct gendisk *disk)
 	InquiryData_struct *inq_buff = NULL;
 
 	for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) {
+		if (!h->drv[logvol])
+			continue
 		if (memcmp(h->drv[logvol]->LunID, drv->LunID,
 			sizeof(drv->LunID)) == 0) {
 			FOUND = 1;
-- 
cgit v1.2.2