From 56392d2f40aac4b520fc50bc356f40e07f7e1c7d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:48 +0100 Subject: drbd: Clarify when activity log I/O is delegated to the worker thread Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 2b8303ad63c9..7d1ff1aaeb71 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1054,7 +1054,7 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long if (rw == WRITE && req->private_bio && req->i.size && !test_bit(AL_SUSPENDED, &mdev->flags)) { req->rq_state |= RQ_IN_ACT_LOG; - drbd_al_begin_io(mdev, &req->i); + drbd_al_begin_io(mdev, &req->i, true); } spin_lock_irq(&mdev->tconn->req_lock); -- cgit v1.2.2 From 6d9febe237146156947f0da8407c620b5c33c1df Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:50 +0100 Subject: drbd: split __drbd_make_request in before and after drbd_al_begin_io This is in preparation to be able to defer requests that need to wait for an activity log transaction to a submitter workqueue. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 7d1ff1aaeb71..96d5968fc1e4 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -34,14 +34,14 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size); /* Update disk stats at start of I/O request */ -static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio) +static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req) { - const int rw = bio_data_dir(bio); + const int rw = bio_data_dir(req->master_bio); int cpu; cpu = part_stat_lock(); part_round_stats(cpu, &mdev->vdisk->part0); part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); - part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); + part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], req->i.size >> 9); (void) cpu; /* The macro invocations above want the cpu argument, I do not like the compiler warning about cpu only assigned but never used... */ part_inc_in_flight(&mdev->vdisk->part0, rw); @@ -1020,12 +1020,16 @@ drbd_submit_req_private_bio(struct drbd_request *req) bio_endio(bio, -EIO); } -void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) +/* returns the new drbd_request pointer, if the caller is expected to + * drbd_send_and_submit() it (to save latency), or NULL if we queued the + * request on the submitter thread. + * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request. + */ +struct drbd_request * +drbd_request_prepare(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) { - const int rw = bio_rw(bio); - struct bio_and_error m = { NULL, }; + const int rw = bio_data_dir(bio); struct drbd_request *req; - bool no_remote = false; /* allocate outside of all locks; */ req = drbd_req_new(mdev, bio); @@ -1035,7 +1039,7 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long * if user cannot handle io errors, that's not our business. */ dev_err(DEV, "could not kmalloc() req\n"); bio_endio(bio, -ENOMEM); - return; + return ERR_PTR(-ENOMEM); } req->start_time = start_time; @@ -1057,6 +1061,15 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long drbd_al_begin_io(mdev, &req->i, true); } + return req; +} + +static void drbd_send_and_submit(struct drbd_conf *mdev, struct drbd_request *req) +{ + const int rw = bio_rw(req->master_bio); + struct bio_and_error m = { NULL, }; + bool no_remote = false; + spin_lock_irq(&mdev->tconn->req_lock); if (rw == WRITE) { /* This may temporarily give up the req_lock, @@ -1079,7 +1092,7 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long } /* Update disk stats */ - _drbd_start_io_acct(mdev, req, bio); + _drbd_start_io_acct(mdev, req); /* We fail READ/READA early, if we can not serve it. * We must do this before req is registered on any lists. @@ -1137,7 +1150,14 @@ out: if (m.bio) complete_master_bio(mdev, &m); - return; +} + +void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) +{ + struct drbd_request *req = drbd_request_prepare(mdev, bio, start_time); + if (IS_ERR_OR_NULL(req)) + return; + drbd_send_and_submit(mdev, req); } void drbd_make_request(struct request_queue *q, struct bio *bio) -- cgit v1.2.2 From 113fef9e20e0d614b3f5940b67c96e719c559eea Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 22 Mar 2013 18:14:40 -0600 Subject: drbd: prepare to queue write requests on a submit worker Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 96d5968fc1e4..4af709e0aae5 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1160,6 +1160,35 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long drbd_send_and_submit(mdev, req); } +void __drbd_make_request_from_worker(struct drbd_conf *mdev, struct drbd_request *req) +{ + const int rw = bio_rw(req->master_bio); + + if (rw == WRITE && req->private_bio && req->i.size + && !test_bit(AL_SUSPENDED, &mdev->flags)) { + drbd_al_begin_io(mdev, &req->i, false); + req->rq_state |= RQ_IN_ACT_LOG; + } + drbd_send_and_submit(mdev, req); +} + + +void do_submit(struct work_struct *ws) +{ + struct drbd_conf *mdev = container_of(ws, struct drbd_conf, submit.worker); + LIST_HEAD(writes); + struct drbd_request *req, *tmp; + + spin_lock(&mdev->submit.lock); + list_splice_init(&mdev->submit.writes, &writes); + spin_unlock(&mdev->submit.lock); + + list_for_each_entry_safe(req, tmp, &writes, tl_requests) { + list_del_init(&req->tl_requests); + __drbd_make_request_from_worker(mdev, req); + } +} + void drbd_make_request(struct request_queue *q, struct bio *bio) { struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; -- cgit v1.2.2 From 779b3fe4c0e9dea19ae3ddef0b5fd1a663b63ee6 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:54 +0100 Subject: drbd: queue writes on submitter thread, unless they pass the activity log fastpath Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 4af709e0aae5..43bc1d064bc7 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1020,6 +1020,14 @@ drbd_submit_req_private_bio(struct drbd_request *req) bio_endio(bio, -EIO); } +static void drbd_queue_write(struct drbd_conf *mdev, struct drbd_request *req) +{ + spin_lock(&mdev->submit.lock); + list_add_tail(&req->tl_requests, &mdev->submit.writes); + spin_unlock(&mdev->submit.lock); + queue_work(mdev->submit.wq, &mdev->submit.worker); +} + /* returns the new drbd_request pointer, if the caller is expected to * drbd_send_and_submit() it (to save latency), or NULL if we queued the * request on the submitter thread. @@ -1048,17 +1056,13 @@ drbd_request_prepare(struct drbd_conf *mdev, struct bio *bio, unsigned long star req->private_bio = NULL; } - /* For WRITES going to the local disk, grab a reference on the target - * extent. This waits for any resync activity in the corresponding - * resync extent to finish, and, if necessary, pulls in the target - * extent into the activity log, which involves further disk io because - * of transactional on-disk meta data updates. - * Empty flushes don't need to go into the activity log, they can only - * flush data for pending writes which are already in there. */ if (rw == WRITE && req->private_bio && req->i.size && !test_bit(AL_SUSPENDED, &mdev->flags)) { + if (!drbd_al_begin_io_fastpath(mdev, &req->i)) { + drbd_queue_write(mdev, req); + return NULL; + } req->rq_state |= RQ_IN_ACT_LOG; - drbd_al_begin_io(mdev, &req->i, true); } return req; -- cgit v1.2.2 From 08a1ddab6df7d3c7b6341774cb1cf4b21b96a214 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:56 +0100 Subject: drbd: consolidate as many updates as possible into one AL transaction Depending on current IO depth, try to consolidate as many updates as possible into one activity log transaction. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 70 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 14 deletions(-) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 43bc1d064bc7..b923d41678e1 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1164,32 +1164,74 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long drbd_send_and_submit(mdev, req); } -void __drbd_make_request_from_worker(struct drbd_conf *mdev, struct drbd_request *req) +static void submit_fast_path(struct drbd_conf *mdev, struct list_head *incoming) { - const int rw = bio_rw(req->master_bio); + struct drbd_request *req, *tmp; + list_for_each_entry_safe(req, tmp, incoming, tl_requests) { + const int rw = bio_data_dir(req->master_bio); - if (rw == WRITE && req->private_bio && req->i.size - && !test_bit(AL_SUSPENDED, &mdev->flags)) { - drbd_al_begin_io(mdev, &req->i, false); - req->rq_state |= RQ_IN_ACT_LOG; + if (rw == WRITE /* rw != WRITE should not even end up here! */ + && req->private_bio && req->i.size + && !test_bit(AL_SUSPENDED, &mdev->flags)) { + if (!drbd_al_begin_io_fastpath(mdev, &req->i)) + continue; + + req->rq_state |= RQ_IN_ACT_LOG; + } + + list_del_init(&req->tl_requests); + drbd_send_and_submit(mdev, req); } - drbd_send_and_submit(mdev, req); } +static bool prepare_al_transaction_nonblock(struct drbd_conf *mdev, + struct list_head *incoming, + struct list_head *pending) +{ + struct drbd_request *req, *tmp; + int wake = 0; + int err; + + spin_lock_irq(&mdev->al_lock); + list_for_each_entry_safe(req, tmp, incoming, tl_requests) { + err = drbd_al_begin_io_nonblock(mdev, &req->i); + if (err == -EBUSY) + wake = 1; + if (err) + continue; + req->rq_state |= RQ_IN_ACT_LOG; + list_move_tail(&req->tl_requests, pending); + } + spin_unlock_irq(&mdev->al_lock); + if (wake) + wake_up(&mdev->al_wait); + + return !list_empty(pending); +} void do_submit(struct work_struct *ws) { struct drbd_conf *mdev = container_of(ws, struct drbd_conf, submit.worker); - LIST_HEAD(writes); + LIST_HEAD(incoming); + LIST_HEAD(pending); struct drbd_request *req, *tmp; - spin_lock(&mdev->submit.lock); - list_splice_init(&mdev->submit.writes, &writes); - spin_unlock(&mdev->submit.lock); + for (;;) { + spin_lock(&mdev->submit.lock); + list_splice_tail_init(&mdev->submit.writes, &incoming); + spin_unlock(&mdev->submit.lock); - list_for_each_entry_safe(req, tmp, &writes, tl_requests) { - list_del_init(&req->tl_requests); - __drbd_make_request_from_worker(mdev, req); + submit_fast_path(mdev, &incoming); + if (list_empty(&incoming)) + break; + + wait_event(mdev->al_wait, prepare_al_transaction_nonblock(mdev, &incoming, &pending)); + drbd_al_begin_io_commit(mdev, false); + + list_for_each_entry_safe(req, tmp, &pending, tl_requests) { + list_del_init(&req->tl_requests); + drbd_send_and_submit(mdev, req); + } } } -- cgit v1.2.2 From 7e8c288f6cde950a6ca001ec06a32c8c2cf4180e Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:57 +0100 Subject: drbd: move start io accounting before activity log transaction The IO accounting of the drbd "queue depth" was misleading. We only started IO accounting once we already wrote the activity log. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index b923d41678e1..d72f2fef1cba 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1056,6 +1056,9 @@ drbd_request_prepare(struct drbd_conf *mdev, struct bio *bio, unsigned long star req->private_bio = NULL; } + /* Update disk stats */ + _drbd_start_io_acct(mdev, req); + if (rw == WRITE && req->private_bio && req->i.size && !test_bit(AL_SUSPENDED, &mdev->flags)) { if (!drbd_al_begin_io_fastpath(mdev, &req->i)) { @@ -1095,9 +1098,6 @@ static void drbd_send_and_submit(struct drbd_conf *mdev, struct drbd_request *re goto out; } - /* Update disk stats */ - _drbd_start_io_acct(mdev, req); - /* We fail READ/READA early, if we can not serve it. * We must do this before req is registered on any lists. * Otherwise, drbd_req_complete() will queue failed READ for retry. */ -- cgit v1.2.2 From 45ad07b3ac1e3062188fb760fe71cafb4a100215 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:58 +0100 Subject: drbd: try hard to max out the updates per AL transaction There may have been more incoming requests while we where preparing the current transaction. Try to consolidate more updates into this transaction until we make no more progres. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d72f2fef1cba..9f7ff1cb46ff 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1226,6 +1226,37 @@ void do_submit(struct work_struct *ws) break; wait_event(mdev->al_wait, prepare_al_transaction_nonblock(mdev, &incoming, &pending)); + /* Maybe more was queued, while we prepared the transaction? + * Try to stuff them into this transaction as well. + * Be strictly non-blocking here, no wait_event, we already + * have something to commit. + * Stop if we don't make any more progres. + */ + for (;;) { + LIST_HEAD(more_pending); + LIST_HEAD(more_incoming); + bool made_progress; + + /* It is ok to look outside the lock, + * it's only an optimization anyways */ + if (list_empty(&mdev->submit.writes)) + break; + + spin_lock(&mdev->submit.lock); + list_splice_tail_init(&mdev->submit.writes, &more_incoming); + spin_unlock(&mdev->submit.lock); + + if (list_empty(&more_incoming)) + break; + + made_progress = prepare_al_transaction_nonblock(mdev, &more_incoming, &more_pending); + + list_splice_tail_init(&more_pending, &pending); + list_splice_tail_init(&more_incoming, &incoming); + + if (!made_progress) + break; + } drbd_al_begin_io_commit(mdev, false); list_for_each_entry_safe(req, tmp, &pending, tl_requests) { -- cgit v1.2.2 From 7074e4a745799d521b17775f6d076d84dc7f8c50 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 27 Mar 2013 14:08:41 +0100 Subject: drbd: only fail empty flushes if no good data is reachable We completed empty flushes (blkdev_issue_flush()) with IO error if we lost the local disk, even if we still have an established replication link to a healthy remote disk. Fix this to only report errors to upper layers, if neither local nor remote data is reachable. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 9f7ff1cb46ff..beefe65764ff 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -263,8 +263,7 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) else root = &mdev->read_requests; drbd_remove_request_interval(root, req); - } else if (!(s & RQ_POSTPONED)) - D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); + } /* Before we can signal completion to the upper layers, * we may need to close the current transfer log epoch. @@ -755,6 +754,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(req->rq_state & RQ_NET_PENDING); mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE); break; + + case QUEUE_AS_DRBD_BARRIER: + start_new_tl_epoch(mdev->tconn); + mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE); + break; }; return rv; @@ -975,8 +979,8 @@ static int drbd_process_write_request(struct drbd_request *req) /* The only size==0 bios we expect are empty flushes. */ D_ASSERT(req->master_bio->bi_rw & REQ_FLUSH); if (remote) - start_new_tl_epoch(mdev->tconn); - return 0; + _req_mod(req, QUEUE_AS_DRBD_BARRIER); + return remote; } if (!remote && !send_oos) -- cgit v1.2.2 From 607f25e56ee0a31e451f6bd8a7109fa1f5dcbe29 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 27 Mar 2013 14:08:45 +0100 Subject: drbd: fix drbd epoch write count for ahead/behind mode The sanity check when receiving P_BARRIER_ACK does expect all write requests with a given req->epoch to have been either all replicated, or all not replicated. Because req->epoch was assigned before calling maybe_pull_ahead(), this expectation was not met, leading to an off-by-one in the sanity check, and further to a "Protocol Error". Fix: move the call to maybe_pull_ahead() a few lines up, and assign req->epoch only after that. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index beefe65764ff..c24379ffd4e3 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -865,8 +865,10 @@ static void maybe_pull_ahead(struct drbd_conf *mdev) bool congested = false; enum drbd_on_congestion on_congestion; + rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); on_congestion = nc ? nc->on_congestion : OC_BLOCK; + rcu_read_unlock(); if (on_congestion == OC_BLOCK || tconn->agreed_pro_version < 96) return; @@ -960,14 +962,8 @@ static int drbd_process_write_request(struct drbd_request *req) struct drbd_conf *mdev = req->w.mdev; int remote, send_oos; - rcu_read_lock(); remote = drbd_should_do_remote(mdev->state); - if (remote) { - maybe_pull_ahead(mdev); - remote = drbd_should_do_remote(mdev->state); - } send_oos = drbd_should_send_out_of_sync(mdev->state); - rcu_read_unlock(); /* Need to replicate writes. Unless it is an empty flush, * which is better mapped to a DRBD P_BARRIER packet, @@ -1087,9 +1083,13 @@ static void drbd_send_and_submit(struct drbd_conf *mdev, struct drbd_request *re * but will re-aquire it before it returns here. * Needs to be before the check on drbd_suspended() */ complete_conflicting_writes(req); + /* no more giving up req_lock from now on! */ + + /* check for congestion, and potentially stop sending + * full data updates, but start sending "dirty bits" only. */ + maybe_pull_ahead(mdev); } - /* no more giving up req_lock from now on! */ if (drbd_suspended(mdev)) { /* push back and retry: */ -- cgit v1.2.2