aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-08-25 14:36:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-08-25 14:36:43 -0400
commita7e546f175f07630453c44b5afe14dd667dcfec9 (patch)
tree352c2577161f0cbe8c3b49bb6f053cfd49ed32b4 /drivers
parentda31ce727e8cc6920de5840e35b4e770c08e86e3 (diff)
parent676ce6d5ca3098339c028d44fe0427d1566a4d2d (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block-related fixes from Jens Axboe: - Improvements to the buffered and direct write IO plugging from Fengguang. - Abstract out the mapping of a bio in a request, and use that to provide a blk_bio_map_sg() helper. Useful for mapping just a bio instead of a full request. - Regression fix from Hugh, fixing up a patch that went into the previous release cycle (and marked stable, too) attempting to prevent a loop in __getblk_slow(). - Updates to discard requests, fixing up the sizing and how we align them. Also a change to disallow merging of discard requests, since that doesn't really work properly yet. - A few drbd fixes. - Documentation updates. * 'for-linus' of git://git.kernel.dk/linux-block: block: replace __getblk_slow misfix by grow_dev_page fix drbd: Write all pages of the bitmap after an online resize drbd: Finish requests that completed while IO was frozen drbd: fix drbd wire compatibility for empty flushes Documentation: update tunable options in block/cfq-iosched.txt Documentation: update tunable options in block/cfq-iosched.txt Documentation: update missing index files in block/00-INDEX block: move down direct IO plugging block: remove plugging at buffered write time block: disable discard request merge temporarily bio: Fix potential memory leak in bio_find_or_create_slab() block: Don't use static to define "void *p" in show_partition_start() block: Add blk_bio_map_sg() helper block: Introduce __blk_segment_map_sg() helper fs/block-dev.c:fix performance regression in O_DIRECT writes to md block devices block: split discard into aligned requests block: reorganize rounding of max_discard_sectors
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/drbd/drbd_bitmap.c15
-rw-r--r--drivers/block/drbd/drbd_int.h1
-rw-r--r--drivers/block/drbd/drbd_main.c28
-rw-r--r--drivers/block/drbd/drbd_nl.c4
-rw-r--r--drivers/block/drbd/drbd_req.c36
5 files changed, 61 insertions, 23 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index ba91b408abad..d84566496746 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -889,6 +889,7 @@ struct bm_aio_ctx {
889 unsigned int done; 889 unsigned int done;
890 unsigned flags; 890 unsigned flags;
891#define BM_AIO_COPY_PAGES 1 891#define BM_AIO_COPY_PAGES 1
892#define BM_WRITE_ALL_PAGES 2
892 int error; 893 int error;
893 struct kref kref; 894 struct kref kref;
894}; 895};
@@ -1059,7 +1060,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
1059 if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) 1060 if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
1060 break; 1061 break;
1061 if (rw & WRITE) { 1062 if (rw & WRITE) {
1062 if (bm_test_page_unchanged(b->bm_pages[i])) { 1063 if (!(flags & BM_WRITE_ALL_PAGES) &&
1064 bm_test_page_unchanged(b->bm_pages[i])) {
1063 dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); 1065 dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
1064 continue; 1066 continue;
1065 } 1067 }
@@ -1141,6 +1143,17 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
1141} 1143}
1142 1144
1143/** 1145/**
1146 * drbd_bm_write_all() - Write the whole bitmap to its on disk location.
1147 * @mdev: DRBD device.
1148 *
1149 * Will write all pages.
1150 */
1151int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local)
1152{
1153 return bm_rw(mdev, WRITE, BM_WRITE_ALL_PAGES, 0);
1154}
1155
1156/**
1144 * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed. 1157 * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
1145 * @mdev: DRBD device. 1158 * @mdev: DRBD device.
1146 * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages 1159 * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index b2ca143d0053..b953cc7c9c00 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1469,6 +1469,7 @@ extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
1469extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local); 1469extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);
1470extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); 1470extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
1471extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); 1471extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
1472extern int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local);
1472extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local); 1473extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local);
1473extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, 1474extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
1474 unsigned long al_enr); 1475 unsigned long al_enr);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index dbe6135a2abe..f93a0320e952 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -79,6 +79,7 @@ static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
79static void md_sync_timer_fn(unsigned long data); 79static void md_sync_timer_fn(unsigned long data);
80static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); 80static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
81static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused); 81static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
82static void _tl_clear(struct drbd_conf *mdev);
82 83
83MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " 84MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
84 "Lars Ellenberg <lars@linbit.com>"); 85 "Lars Ellenberg <lars@linbit.com>");
@@ -432,19 +433,10 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
432 433
433 /* Actions operating on the disk state, also want to work on 434 /* Actions operating on the disk state, also want to work on
434 requests that got barrier acked. */ 435 requests that got barrier acked. */
435 switch (what) {
436 case fail_frozen_disk_io:
437 case restart_frozen_disk_io:
438 list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
439 req = list_entry(le, struct drbd_request, tl_requests);
440 _req_mod(req, what);
441 }
442 436
443 case connection_lost_while_pending: 437 list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
444 case resend: 438 req = list_entry(le, struct drbd_request, tl_requests);
445 break; 439 _req_mod(req, what);
446 default:
447 dev_err(DEV, "what = %d in _tl_restart()\n", what);
448 } 440 }
449} 441}
450 442
@@ -459,11 +451,16 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
459 */ 451 */
460void tl_clear(struct drbd_conf *mdev) 452void tl_clear(struct drbd_conf *mdev)
461{ 453{
454 spin_lock_irq(&mdev->req_lock);
455 _tl_clear(mdev);
456 spin_unlock_irq(&mdev->req_lock);
457}
458
459static void _tl_clear(struct drbd_conf *mdev)
460{
462 struct list_head *le, *tle; 461 struct list_head *le, *tle;
463 struct drbd_request *r; 462 struct drbd_request *r;
464 463
465 spin_lock_irq(&mdev->req_lock);
466
467 _tl_restart(mdev, connection_lost_while_pending); 464 _tl_restart(mdev, connection_lost_while_pending);
468 465
469 /* we expect this list to be empty. */ 466 /* we expect this list to be empty. */
@@ -482,7 +479,6 @@ void tl_clear(struct drbd_conf *mdev)
482 479
483 memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); 480 memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
484 481
485 spin_unlock_irq(&mdev->req_lock);
486} 482}
487 483
488void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) 484void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
@@ -1476,12 +1472,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1476 if (ns.susp_fen) { 1472 if (ns.susp_fen) {
1477 /* case1: The outdate peer handler is successful: */ 1473 /* case1: The outdate peer handler is successful: */
1478 if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { 1474 if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) {
1479 tl_clear(mdev);
1480 if (test_bit(NEW_CUR_UUID, &mdev->flags)) { 1475 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
1481 drbd_uuid_new_current(mdev); 1476 drbd_uuid_new_current(mdev);
1482 clear_bit(NEW_CUR_UUID, &mdev->flags); 1477 clear_bit(NEW_CUR_UUID, &mdev->flags);
1483 } 1478 }
1484 spin_lock_irq(&mdev->req_lock); 1479 spin_lock_irq(&mdev->req_lock);
1480 _tl_clear(mdev);
1485 _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); 1481 _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
1486 spin_unlock_irq(&mdev->req_lock); 1482 spin_unlock_irq(&mdev->req_lock);
1487 } 1483 }
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index fb9dce8daa24..edb490aad8b4 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -674,8 +674,8 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
674 la_size_changed && md_moved ? "size changed and md moved" : 674 la_size_changed && md_moved ? "size changed and md moved" :
675 la_size_changed ? "size changed" : "md moved"); 675 la_size_changed ? "size changed" : "md moved");
676 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ 676 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
677 err = drbd_bitmap_io(mdev, &drbd_bm_write, 677 err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
678 "size changed", BM_LOCKED_MASK); 678 "size changed", BM_LOCKED_MASK);
679 if (err) { 679 if (err) {
680 rv = dev_size_error; 680 rv = dev_size_error;
681 goto out; 681 goto out;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 910335c30927..01b2ac641c7b 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -695,6 +695,12 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
695 break; 695 break;
696 696
697 case resend: 697 case resend:
698 /* Simply complete (local only) READs. */
699 if (!(req->rq_state & RQ_WRITE) && !req->w.cb) {
700 _req_may_be_done(req, m);
701 break;
702 }
703
698 /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK 704 /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
699 before the connection loss (B&C only); only P_BARRIER_ACK was missing. 705 before the connection loss (B&C only); only P_BARRIER_ACK was missing.
700 Trowing them out of the TL here by pretending we got a BARRIER_ACK 706 Trowing them out of the TL here by pretending we got a BARRIER_ACK
@@ -834,7 +840,15 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
834 req->private_bio = NULL; 840 req->private_bio = NULL;
835 } 841 }
836 if (rw == WRITE) { 842 if (rw == WRITE) {
837 remote = 1; 843 /* Need to replicate writes. Unless it is an empty flush,
844 * which is better mapped to a DRBD P_BARRIER packet,
845 * also for drbd wire protocol compatibility reasons. */
846 if (unlikely(size == 0)) {
847 /* The only size==0 bios we expect are empty flushes. */
848 D_ASSERT(bio->bi_rw & REQ_FLUSH);
849 remote = 0;
850 } else
851 remote = 1;
838 } else { 852 } else {
839 /* READ || READA */ 853 /* READ || READA */
840 if (local) { 854 if (local) {
@@ -870,8 +884,11 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
870 * extent. This waits for any resync activity in the corresponding 884 * extent. This waits for any resync activity in the corresponding
871 * resync extent to finish, and, if necessary, pulls in the target 885 * resync extent to finish, and, if necessary, pulls in the target
872 * extent into the activity log, which involves further disk io because 886 * extent into the activity log, which involves further disk io because
873 * of transactional on-disk meta data updates. */ 887 * of transactional on-disk meta data updates.
874 if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) { 888 * Empty flushes don't need to go into the activity log, they can only
889 * flush data for pending writes which are already in there. */
890 if (rw == WRITE && local && size
891 && !test_bit(AL_SUSPENDED, &mdev->flags)) {
875 req->rq_state |= RQ_IN_ACT_LOG; 892 req->rq_state |= RQ_IN_ACT_LOG;
876 drbd_al_begin_io(mdev, sector); 893 drbd_al_begin_io(mdev, sector);
877 } 894 }
@@ -994,7 +1011,10 @@ allocate_barrier:
994 if (rw == WRITE && _req_conflicts(req)) 1011 if (rw == WRITE && _req_conflicts(req))
995 goto fail_conflicting; 1012 goto fail_conflicting;
996 1013
997 list_add_tail(&req->tl_requests, &mdev->newest_tle->requests); 1014 /* no point in adding empty flushes to the transfer log,
1015 * they are mapped to drbd barriers already. */
1016 if (likely(size!=0))
1017 list_add_tail(&req->tl_requests, &mdev->newest_tle->requests);
998 1018
999 /* NOTE remote first: to get the concurrent write detection right, 1019 /* NOTE remote first: to get the concurrent write detection right,
1000 * we must register the request before start of local IO. */ 1020 * we must register the request before start of local IO. */
@@ -1014,6 +1034,14 @@ allocate_barrier:
1014 mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) 1034 mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96)
1015 maybe_pull_ahead(mdev); 1035 maybe_pull_ahead(mdev);
1016 1036
1037 /* If this was a flush, queue a drbd barrier/start a new epoch.
1038 * Unless the current epoch was empty anyways, or we are not currently
1039 * replicating, in which case there is no point. */
1040 if (unlikely(bio->bi_rw & REQ_FLUSH)
1041 && mdev->newest_tle->n_writes
1042 && drbd_should_do_remote(mdev->state))
1043 queue_barrier(mdev);
1044
1017 spin_unlock_irq(&mdev->req_lock); 1045 spin_unlock_irq(&mdev->req_lock);
1018 kfree(b); /* if someone else has beaten us to it... */ 1046 kfree(b); /* if someone else has beaten us to it... */
1019 1047