Merge branch 'for-3.10/drivers' of git://git.kernel.dk/linux-block

Pull block driver updates from Jens Axboe: "It might look big in volume, but when categorized, not a lot of drivers are touched. The pull request contains: - mtip32xx fixes from Micron. - A slew of drbd updates, this time in a nicer series. - bcache, a flash/ssd caching framework from Kent. - Fixes for cciss" * 'for-3.10/drivers' of git://git.kernel.dk/linux-block: (66 commits) bcache: Use bd_link_disk_holder() bcache: Allocator cleanup/fixes cciss: bug fix to prevent cciss from loading in kdump crash kernel cciss: add cciss_allow_hpsa module parameter drivers/block/mg_disk.c: add CONFIG_PM_SLEEP to suspend/resume functions mtip32xx: Workaround for unaligned writes bcache: Make sure blocksize isn't smaller than device blocksize bcache: Fix merge_bvec_fn usage for when it modifies the bvm bcache: Correctly check against BIO_MAX_PAGES bcache: Hack around stuff that clones up to bi_max_vecs bcache: Set ra_pages based on backing device's ra_pages bcache: Take data offset from the bdev superblock. mtip32xx: mtip32xx: Disable TRIM support mtip32xx: fix a smatch warning bcache: Disable broken btree fuzz tester bcache: Fix a format string overflow bcache: Fix a minor memory leak on device teardown bcache: Documentation updates bcache: Use WARN_ONCE() instead of __WARN() bcache: Add missing #include <linux/prefetch.h> ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2013-05-08 14:51:05 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-05-08 14:51:05 -0400
commit: ebb37277796269da36a8bc5d72ed1e8e1fb7d34b (patch)
tree: 0ded627a62a5cec70b18d12825dd858855c135d3 /drivers/block/drbd/drbd_req.c
parent: 4de13d7aa8f4d02f4dc99d4609575659f92b3c5a (diff)
parent: f50efd2fdbd9b35b11f5778ed85beb764184bda9 (diff)
1 files changed, 161 insertions, 31 deletions
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 2b8303ad63c9..c24379ffd4e3 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -34,14 +34,14 @@
 static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size);
 /* Update disk stats at start of I/O request */
-static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio)
+static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req)
 {
-        const int rw = bio_data_dir(bio);
+        const int rw = bio_data_dir(req->master_bio);
        int cpu;
        cpu = part_stat_lock();
        part_round_stats(cpu, &mdev->vdisk->part0);
        part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
-        part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
+        part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], req->i.size >> 9);
        (void) cpu; /* The macro invocations above want the cpu argument, I do not like
                       the compiler warning about cpu only assigned but never used... */
        part_inc_in_flight(&mdev->vdisk->part0, rw);
@@ -263,8 +263,7 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
                else
                        root = &mdev->read_requests;
                drbd_remove_request_interval(root, req);
-        } else if (!(s & RQ_POSTPONED))
+        }
-                D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
        /* Before we can signal completion to the upper layers,
         * we may need to close the current transfer log epoch.
@@ -755,6 +754,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
                D_ASSERT(req->rq_state & RQ_NET_PENDING);
                mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE);
                break;
+        case QUEUE_AS_DRBD_BARRIER:
+                start_new_tl_epoch(mdev->tconn);
+                mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE);
+                break;
        };
        return rv;
@@ -861,8 +865,10 @@ static void maybe_pull_ahead(struct drbd_conf *mdev)
        bool congested = false;
        enum drbd_on_congestion on_congestion;
+        rcu_read_lock();
        nc = rcu_dereference(tconn->net_conf);
        on_congestion = nc ? nc->on_congestion : OC_BLOCK;
+        rcu_read_unlock();
        if (on_congestion == OC_BLOCK ||
            tconn->agreed_pro_version < 96)
                return;
@@ -956,14 +962,8 @@ static int drbd_process_write_request(struct drbd_request *req)
        struct drbd_conf *mdev = req->w.mdev;
        int remote, send_oos;
-        rcu_read_lock();
        remote = drbd_should_do_remote(mdev->state);
-        if (remote) {
-                maybe_pull_ahead(mdev);
-                remote = drbd_should_do_remote(mdev->state);
-        }
        send_oos = drbd_should_send_out_of_sync(mdev->state);
-        rcu_read_unlock();
        /* Need to replicate writes.  Unless it is an empty flush,
         * which is better mapped to a DRBD P_BARRIER packet,
@@ -975,8 +975,8 @@ static int drbd_process_write_request(struct drbd_request *req)
                /* The only size==0 bios we expect are empty flushes. */
                D_ASSERT(req->master_bio->bi_rw & REQ_FLUSH);
                if (remote)
-                        start_new_tl_epoch(mdev->tconn);
+                        _req_mod(req, QUEUE_AS_DRBD_BARRIER);
-                return 0;
+                return remote;
        }
        if (!remote && !send_oos)
@@ -1020,12 +1020,24 @@ drbd_submit_req_private_bio(struct drbd_request *req)
                bio_endio(bio, -EIO);
 }
-void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
+static void drbd_queue_write(struct drbd_conf *mdev, struct drbd_request *req)
 {
-        const int rw = bio_rw(bio);
+        spin_lock(&mdev->submit.lock);
-        struct bio_and_error m = { NULL, };
+        list_add_tail(&req->tl_requests, &mdev->submit.writes);
+        spin_unlock(&mdev->submit.lock);
+        queue_work(mdev->submit.wq, &mdev->submit.worker);
+}
+/* returns the new drbd_request pointer, if the caller is expected to
+ * drbd_send_and_submit() it (to save latency), or NULL if we queued the
+ * request on the submitter thread.
+ * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request.
+ */
+struct drbd_request *
+drbd_request_prepare(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
+{
+        const int rw = bio_data_dir(bio);
        struct drbd_request *req;
-        bool no_remote = false;
        /* allocate outside of all locks; */
        req = drbd_req_new(mdev, bio);
@@ -1035,7 +1047,7 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long
                 * if user cannot handle io errors, that's not our business. */
                dev_err(DEV, "could not kmalloc() req\n");
                bio_endio(bio, -ENOMEM);
-                return;
+                return ERR_PTR(-ENOMEM);
        }
        req->start_time = start_time;
@@ -1044,28 +1056,40 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long
                req->private_bio = NULL;
        }
-        /* For WRITES going to the local disk, grab a reference on the target
+        /* Update disk stats */
-         * extent.  This waits for any resync activity in the corresponding
+        _drbd_start_io_acct(mdev, req);
-         * resync extent to finish, and, if necessary, pulls in the target
-         * extent into the activity log, which involves further disk io because
-         * of transactional on-disk meta data updates.
-         * Empty flushes don't need to go into the activity log, they can only
-         * flush data for pending writes which are already in there. */
        if (rw == WRITE && req->private_bio && req->i.size
        && !test_bit(AL_SUSPENDED, &mdev->flags)) {
+                if (!drbd_al_begin_io_fastpath(mdev, &req->i)) {
+                        drbd_queue_write(mdev, req);
+                        return NULL;
+                }
                req->rq_state |= RQ_IN_ACT_LOG;
-                drbd_al_begin_io(mdev, &req->i);
        }
+        return req;
+}
+static void drbd_send_and_submit(struct drbd_conf *mdev, struct drbd_request *req)
+{
+        const int rw = bio_rw(req->master_bio);
+        struct bio_and_error m = { NULL, };
+        bool no_remote = false;
        spin_lock_irq(&mdev->tconn->req_lock);
        if (rw == WRITE) {
                /* This may temporarily give up the req_lock,
                 * but will re-aquire it before it returns here.
                 * Needs to be before the check on drbd_suspended() */
                complete_conflicting_writes(req);
+                /* no more giving up req_lock from now on! */
+                /* check for congestion, and potentially stop sending
+                 * full data updates, but start sending "dirty bits" only. */
+                maybe_pull_ahead(mdev);
        }
-        /* no more giving up req_lock from now on! */
        if (drbd_suspended(mdev)) {
                /* push back and retry: */
@@ -1078,9 +1102,6 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long
                goto out;
        }
-        /* Update disk stats */
-        _drbd_start_io_acct(mdev, req, bio);
        /* We fail READ/READA early, if we can not serve it.
         * We must do this before req is registered on any lists.
         * Otherwise, drbd_req_complete() will queue failed READ for retry. */
@@ -1137,7 +1158,116 @@ out:
        if (m.bio)
                complete_master_bio(mdev, &m);
-        return;
+}
+void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
+{
+        struct drbd_request *req = drbd_request_prepare(mdev, bio, start_time);
+        if (IS_ERR_OR_NULL(req))
+                return;
+        drbd_send_and_submit(mdev, req);
+}
+static void submit_fast_path(struct drbd_conf *mdev, struct list_head *incoming)
+{
+        struct drbd_request *req, *tmp;
+        list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
+                const int rw = bio_data_dir(req->master_bio);
+                if (rw == WRITE /* rw != WRITE should not even end up here! */
+                && req->private_bio && req->i.size
+                && !test_bit(AL_SUSPENDED, &mdev->flags)) {
+                        if (!drbd_al_begin_io_fastpath(mdev, &req->i))
+                                continue;
+                        req->rq_state |= RQ_IN_ACT_LOG;
+                }
+                list_del_init(&req->tl_requests);
+                drbd_send_and_submit(mdev, req);
+        }
+}
+static bool prepare_al_transaction_nonblock(struct drbd_conf *mdev,
+                                            struct list_head *incoming,
+                                            struct list_head *pending)
+{
+        struct drbd_request *req, *tmp;
+        int wake = 0;
+        int err;
+        spin_lock_irq(&mdev->al_lock);
+        list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
+                err = drbd_al_begin_io_nonblock(mdev, &req->i);
+                if (err == -EBUSY)
+                        wake = 1;
+                if (err)
+                        continue;
+                req->rq_state |= RQ_IN_ACT_LOG;
+                list_move_tail(&req->tl_requests, pending);
+        }
+        spin_unlock_irq(&mdev->al_lock);
+        if (wake)
+                wake_up(&mdev->al_wait);
+        return !list_empty(pending);
+}
+void do_submit(struct work_struct *ws)
+{
+        struct drbd_conf *mdev = container_of(ws, struct drbd_conf, submit.worker);
+        LIST_HEAD(incoming);
+        LIST_HEAD(pending);
+        struct drbd_request *req, *tmp;
+        for (;;) {
+                spin_lock(&mdev->submit.lock);
+                list_splice_tail_init(&mdev->submit.writes, &incoming);
+                spin_unlock(&mdev->submit.lock);
+                submit_fast_path(mdev, &incoming);
+                if (list_empty(&incoming))
+                        break;
+                wait_event(mdev->al_wait, prepare_al_transaction_nonblock(mdev, &incoming, &pending));
+                /* Maybe more was queued, while we prepared the transaction?
+                 * Try to stuff them into this transaction as well.
+                 * Be strictly non-blocking here, no wait_event, we already
+                 * have something to commit.
+                 * Stop if we don't make any more progres.
+                 */
+                for (;;) {
+                        LIST_HEAD(more_pending);
+                        LIST_HEAD(more_incoming);
+                        bool made_progress;
+                        /* It is ok to look outside the lock,
+                         * it's only an optimization anyways */
+                        if (list_empty(&mdev->submit.writes))
+                                break;
+                        spin_lock(&mdev->submit.lock);
+                        list_splice_tail_init(&mdev->submit.writes, &more_incoming);
+                        spin_unlock(&mdev->submit.lock);
+                        if (list_empty(&more_incoming))
+                                break;
+                        made_progress = prepare_al_transaction_nonblock(mdev, &more_incoming, &more_pending);
+                        list_splice_tail_init(&more_pending, &pending);
+                        list_splice_tail_init(&more_incoming, &incoming);
+                        if (!made_progress)
+                                break;
+                }
+                drbd_al_begin_io_commit(mdev, false);
+                list_for_each_entry_safe(req, tmp, &pending, tl_requests) {
+                        list_del_init(&req->tl_requests);
+                        drbd_send_and_submit(mdev, req);
+                }
+        }
 }
 void drbd_make_request(struct request_queue *q, struct bio *bio)
author	Linus Torvalds <torvalds@linux-foundation.org>	2013-05-08 14:51:05 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-05-08 14:51:05 -0400
commit	ebb37277796269da36a8bc5d72ed1e8e1fb7d34b (patch)
tree	0ded627a62a5cec70b18d12825dd858855c135d3 /drivers/block/drbd/drbd_req.c
parent	4de13d7aa8f4d02f4dc99d4609575659f92b3c5a (diff)
parent	f50efd2fdbd9b35b11f5778ed85beb764184bda9 (diff)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 2b8303ad63c9..c24379ffd4e3 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c
@@ -34,14 +34,14 @@
34	static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size);	34	static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size);
35		35
36	/* Update disk stats at start of I/O request */	36	/* Update disk stats at start of I/O request */
37	static void _drbd_start_io_acct(struct drbd_conf mdev, struct drbd_request req, struct bio *bio)	37	static void _drbd_start_io_acct(struct drbd_conf mdev, struct drbd_request req)
38	{	38	{
39	const int rw = bio_data_dir(bio);	39	const int rw = bio_data_dir(req->master_bio);
40	int cpu;	40	int cpu;
41	cpu = part_stat_lock();	41	cpu = part_stat_lock();
42	part_round_stats(cpu, &mdev->vdisk->part0);	42	part_round_stats(cpu, &mdev->vdisk->part0);
43	part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);	43	part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
44	part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));	44	part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], req->i.size >> 9);
45	(void) cpu; /* The macro invocations above want the cpu argument, I do not like	45	(void) cpu; /* The macro invocations above want the cpu argument, I do not like
46	the compiler warning about cpu only assigned but never used... */	46	the compiler warning about cpu only assigned but never used... */
47	part_inc_in_flight(&mdev->vdisk->part0, rw);	47	part_inc_in_flight(&mdev->vdisk->part0, rw);
@@ -263,8 +263,7 @@ void drbd_req_complete(struct drbd_request req, struct bio_and_error m)
263	else	263	else
264	root = &mdev->read_requests;	264	root = &mdev->read_requests;
265	drbd_remove_request_interval(root, req);	265	drbd_remove_request_interval(root, req);
266	} else if (!(s & RQ_POSTPONED))	266	}
267	D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
268		267
269	/* Before we can signal completion to the upper layers,	268	/* Before we can signal completion to the upper layers,
270	* we may need to close the current transfer log epoch.	269	* we may need to close the current transfer log epoch.
@@ -755,6 +754,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
755	D_ASSERT(req->rq_state & RQ_NET_PENDING);	754	D_ASSERT(req->rq_state & RQ_NET_PENDING);
756	mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK\|RQ_NET_DONE);	755	mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK\|RQ_NET_DONE);
757	break;	756	break;
		757
		758	case QUEUE_AS_DRBD_BARRIER:
		759	start_new_tl_epoch(mdev->tconn);
		760	mod_rq_state(req, m, 0, RQ_NET_OK\|RQ_NET_DONE);
		761	break;
758	};	762	};
759		763
760	return rv;	764	return rv;
@@ -861,8 +865,10 @@ static void maybe_pull_ahead(struct drbd_conf *mdev)
861	bool congested = false;	865	bool congested = false;
862	enum drbd_on_congestion on_congestion;	866	enum drbd_on_congestion on_congestion;
863		867
		868	rcu_read_lock();
864	nc = rcu_dereference(tconn->net_conf);	869	nc = rcu_dereference(tconn->net_conf);
865	on_congestion = nc ? nc->on_congestion : OC_BLOCK;	870	on_congestion = nc ? nc->on_congestion : OC_BLOCK;
		871	rcu_read_unlock();
866	if (on_congestion == OC_BLOCK \|\|	872	if (on_congestion == OC_BLOCK \|\|
867	tconn->agreed_pro_version < 96)	873	tconn->agreed_pro_version < 96)
868	return;	874	return;
@@ -956,14 +962,8 @@ static int drbd_process_write_request(struct drbd_request *req)
956	struct drbd_conf *mdev = req->w.mdev;	962	struct drbd_conf *mdev = req->w.mdev;
957	int remote, send_oos;	963	int remote, send_oos;
958		964
959	rcu_read_lock();
960	remote = drbd_should_do_remote(mdev->state);	965	remote = drbd_should_do_remote(mdev->state);
961	if (remote) {
962	maybe_pull_ahead(mdev);
963	remote = drbd_should_do_remote(mdev->state);
964	}
965	send_oos = drbd_should_send_out_of_sync(mdev->state);	966	send_oos = drbd_should_send_out_of_sync(mdev->state);
966	rcu_read_unlock();
967		967
968	/* Need to replicate writes. Unless it is an empty flush,	968	/* Need to replicate writes. Unless it is an empty flush,
969	* which is better mapped to a DRBD P_BARRIER packet,	969	* which is better mapped to a DRBD P_BARRIER packet,
@@ -975,8 +975,8 @@ static int drbd_process_write_request(struct drbd_request *req)
975	/* The only size==0 bios we expect are empty flushes. */	975	/* The only size==0 bios we expect are empty flushes. */
976	D_ASSERT(req->master_bio->bi_rw & REQ_FLUSH);	976	D_ASSERT(req->master_bio->bi_rw & REQ_FLUSH);
977	if (remote)	977	if (remote)
978	start_new_tl_epoch(mdev->tconn);	978	_req_mod(req, QUEUE_AS_DRBD_BARRIER);
979	return 0;	979	return remote;
980	}	980	}
981		981
982	if (!remote && !send_oos)	982	if (!remote && !send_oos)
@@ -1020,12 +1020,24 @@ drbd_submit_req_private_bio(struct drbd_request *req)
1020	bio_endio(bio, -EIO);	1020	bio_endio(bio, -EIO);
1021	}	1021	}
1022		1022
1023	void __drbd_make_request(struct drbd_conf mdev, struct bio bio, unsigned long start_time)	1023	static void drbd_queue_write(struct drbd_conf mdev, struct drbd_request req)
1024	{	1024	{
1025	const int rw = bio_rw(bio);	1025	spin_lock(&mdev->submit.lock);
1026	struct bio_and_error m = { NULL, };	1026	list_add_tail(&req->tl_requests, &mdev->submit.writes);
		1027	spin_unlock(&mdev->submit.lock);
		1028	queue_work(mdev->submit.wq, &mdev->submit.worker);
		1029	}
		1030
		1031	/* returns the new drbd_request pointer, if the caller is expected to
		1032	* drbd_send_and_submit() it (to save latency), or NULL if we queued the
		1033	* request on the submitter thread.
		1034	* Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request.
		1035	*/
		1036	struct drbd_request *
		1037	drbd_request_prepare(struct drbd_conf mdev, struct bio bio, unsigned long start_time)
		1038	{
		1039	const int rw = bio_data_dir(bio);
1027	struct drbd_request *req;	1040	struct drbd_request *req;
1028	bool no_remote = false;
1029		1041
1030	/* allocate outside of all locks; */	1042	/* allocate outside of all locks; */
1031	req = drbd_req_new(mdev, bio);	1043	req = drbd_req_new(mdev, bio);
@@ -1035,7 +1047,7 @@ void __drbd_make_request(struct drbd_conf mdev, struct bio bio, unsigned long
1035	* if user cannot handle io errors, that's not our business. */	1047	* if user cannot handle io errors, that's not our business. */
1036	dev_err(DEV, "could not kmalloc() req\n");	1048	dev_err(DEV, "could not kmalloc() req\n");
1037	bio_endio(bio, -ENOMEM);	1049	bio_endio(bio, -ENOMEM);
1038	return;	1050	return ERR_PTR(-ENOMEM);
1039	}	1051	}
1040	req->start_time = start_time;	1052	req->start_time = start_time;
1041		1053
@@ -1044,28 +1056,40 @@ void __drbd_make_request(struct drbd_conf mdev, struct bio bio, unsigned long
1044	req->private_bio = NULL;	1056	req->private_bio = NULL;
1045	}	1057	}
1046		1058
1047	/* For WRITES going to the local disk, grab a reference on the target	1059	/* Update disk stats */
1048	* extent. This waits for any resync activity in the corresponding	1060	_drbd_start_io_acct(mdev, req);
1049	* resync extent to finish, and, if necessary, pulls in the target	1061
1050	* extent into the activity log, which involves further disk io because
1051	* of transactional on-disk meta data updates.
1052	* Empty flushes don't need to go into the activity log, they can only
1053	* flush data for pending writes which are already in there. */
1054	if (rw == WRITE && req->private_bio && req->i.size	1062	if (rw == WRITE && req->private_bio && req->i.size
1055	&& !test_bit(AL_SUSPENDED, &mdev->flags)) {	1063	&& !test_bit(AL_SUSPENDED, &mdev->flags)) {
		1064	if (!drbd_al_begin_io_fastpath(mdev, &req->i)) {
		1065	drbd_queue_write(mdev, req);
		1066	return NULL;
		1067	}
1056	req->rq_state \|= RQ_IN_ACT_LOG;	1068	req->rq_state \|= RQ_IN_ACT_LOG;
1057	drbd_al_begin_io(mdev, &req->i);
1058	}	1069	}
1059		1070
		1071	return req;
		1072	}
		1073
		1074	static void drbd_send_and_submit(struct drbd_conf mdev, struct drbd_request req)
		1075	{
		1076	const int rw = bio_rw(req->master_bio);
		1077	struct bio_and_error m = { NULL, };
		1078	bool no_remote = false;
		1079
1060	spin_lock_irq(&mdev->tconn->req_lock);	1080	spin_lock_irq(&mdev->tconn->req_lock);
1061	if (rw == WRITE) {	1081	if (rw == WRITE) {
1062	/* This may temporarily give up the req_lock,	1082	/* This may temporarily give up the req_lock,
1063	* but will re-aquire it before it returns here.	1083	* but will re-aquire it before it returns here.
1064	* Needs to be before the check on drbd_suspended() */	1084	* Needs to be before the check on drbd_suspended() */
1065	complete_conflicting_writes(req);	1085	complete_conflicting_writes(req);
		1086	/* no more giving up req_lock from now on! */
		1087
		1088	/* check for congestion, and potentially stop sending
		1089	* full data updates, but start sending "dirty bits" only. */
		1090	maybe_pull_ahead(mdev);
1066	}	1091	}
1067		1092
1068	/* no more giving up req_lock from now on! */
1069		1093
1070	if (drbd_suspended(mdev)) {	1094	if (drbd_suspended(mdev)) {
1071	/* push back and retry: */	1095	/* push back and retry: */
@@ -1078,9 +1102,6 @@ void __drbd_make_request(struct drbd_conf mdev, struct bio bio, unsigned long
1078	goto out;	1102	goto out;
1079	}	1103	}
1080		1104
1081	/* Update disk stats */
1082	_drbd_start_io_acct(mdev, req, bio);
1083
1084	/* We fail READ/READA early, if we can not serve it.	1105	/* We fail READ/READA early, if we can not serve it.
1085	* We must do this before req is registered on any lists.	1106	* We must do this before req is registered on any lists.
1086	* Otherwise, drbd_req_complete() will queue failed READ for retry. */	1107	* Otherwise, drbd_req_complete() will queue failed READ for retry. */
@@ -1137,7 +1158,116 @@ out:
1137		1158
1138	if (m.bio)	1159	if (m.bio)
1139	complete_master_bio(mdev, &m);	1160	complete_master_bio(mdev, &m);
1140	return;	1161	}
		1162
		1163	void __drbd_make_request(struct drbd_conf mdev, struct bio bio, unsigned long start_time)
		1164	{
		1165	struct drbd_request *req = drbd_request_prepare(mdev, bio, start_time);
		1166	if (IS_ERR_OR_NULL(req))
		1167	return;
		1168	drbd_send_and_submit(mdev, req);
		1169	}
		1170
		1171	static void submit_fast_path(struct drbd_conf mdev, struct list_head incoming)
		1172	{
		1173	struct drbd_request req, tmp;
		1174	list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
		1175	const int rw = bio_data_dir(req->master_bio);
		1176
		1177	if (rw == WRITE /* rw != WRITE should not even end up here! */
		1178	&& req->private_bio && req->i.size
		1179	&& !test_bit(AL_SUSPENDED, &mdev->flags)) {
		1180	if (!drbd_al_begin_io_fastpath(mdev, &req->i))
		1181	continue;
		1182
		1183	req->rq_state \|= RQ_IN_ACT_LOG;
		1184	}
		1185
		1186	list_del_init(&req->tl_requests);
		1187	drbd_send_and_submit(mdev, req);
		1188	}
		1189	}
		1190
		1191	static bool prepare_al_transaction_nonblock(struct drbd_conf *mdev,
		1192	struct list_head *incoming,
		1193	struct list_head *pending)
		1194	{
		1195	struct drbd_request req, tmp;
		1196	int wake = 0;
		1197	int err;
		1198
		1199	spin_lock_irq(&mdev->al_lock);
		1200	list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
		1201	err = drbd_al_begin_io_nonblock(mdev, &req->i);
		1202	if (err == -EBUSY)
		1203	wake = 1;
		1204	if (err)
		1205	continue;
		1206	req->rq_state \|= RQ_IN_ACT_LOG;
		1207	list_move_tail(&req->tl_requests, pending);
		1208	}
		1209	spin_unlock_irq(&mdev->al_lock);
		1210	if (wake)
		1211	wake_up(&mdev->al_wait);
		1212
		1213	return !list_empty(pending);
		1214	}
		1215
		1216	void do_submit(struct work_struct *ws)
		1217	{
		1218	struct drbd_conf *mdev = container_of(ws, struct drbd_conf, submit.worker);
		1219	LIST_HEAD(incoming);
		1220	LIST_HEAD(pending);
		1221	struct drbd_request req, tmp;
		1222
		1223	for (;;) {
		1224	spin_lock(&mdev->submit.lock);
		1225	list_splice_tail_init(&mdev->submit.writes, &incoming);
		1226	spin_unlock(&mdev->submit.lock);
		1227
		1228	submit_fast_path(mdev, &incoming);
		1229	if (list_empty(&incoming))
		1230	break;
		1231
		1232	wait_event(mdev->al_wait, prepare_al_transaction_nonblock(mdev, &incoming, &pending));
		1233	/* Maybe more was queued, while we prepared the transaction?
		1234	* Try to stuff them into this transaction as well.
		1235	* Be strictly non-blocking here, no wait_event, we already
		1236	* have something to commit.
		1237	* Stop if we don't make any more progres.
		1238	*/
		1239	for (;;) {
		1240	LIST_HEAD(more_pending);
		1241	LIST_HEAD(more_incoming);
		1242	bool made_progress;
		1243
		1244	/* It is ok to look outside the lock,
		1245	* it's only an optimization anyways */
		1246	if (list_empty(&mdev->submit.writes))
		1247	break;
		1248
		1249	spin_lock(&mdev->submit.lock);
		1250	list_splice_tail_init(&mdev->submit.writes, &more_incoming);
		1251	spin_unlock(&mdev->submit.lock);
		1252
		1253	if (list_empty(&more_incoming))
		1254	break;
		1255
		1256	made_progress = prepare_al_transaction_nonblock(mdev, &more_incoming, &more_pending);
		1257
		1258	list_splice_tail_init(&more_pending, &pending);
		1259	list_splice_tail_init(&more_incoming, &incoming);
		1260
		1261	if (!made_progress)
		1262	break;
		1263	}
		1264	drbd_al_begin_io_commit(mdev, false);
		1265
		1266	list_for_each_entry_safe(req, tmp, &pending, tl_requests) {
		1267	list_del_init(&req->tl_requests);
		1268	drbd_send_and_submit(mdev, req);
		1269	}
		1270	}
1141	}	1271	}
1142		1272
1143	void drbd_make_request(struct request_queue q, struct bio bio)	1273	void drbd_make_request(struct request_queue q, struct bio bio)