diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-25 14:36:43 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-25 14:36:43 -0400 |
commit | a7e546f175f07630453c44b5afe14dd667dcfec9 (patch) | |
tree | 352c2577161f0cbe8c3b49bb6f053cfd49ed32b4 /drivers/block/drbd | |
parent | da31ce727e8cc6920de5840e35b4e770c08e86e3 (diff) | |
parent | 676ce6d5ca3098339c028d44fe0427d1566a4d2d (diff) |
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block-related fixes from Jens Axboe:
- Improvements to the buffered and direct write IO plugging from
Fengguang.
- Abstract out the mapping of a bio in a request, and use that to
provide a blk_bio_map_sg() helper. Useful for mapping just a bio
instead of a full request.
- Regression fix from Hugh, fixing up a patch that went into the
previous release cycle (and marked stable, too) attempting to prevent
a loop in __getblk_slow().
- Updates to discard requests, fixing up the sizing and how we align
them. Also a change to disallow merging of discard requests, since
that doesn't really work properly yet.
- A few drbd fixes.
- Documentation updates.
* 'for-linus' of git://git.kernel.dk/linux-block:
block: replace __getblk_slow misfix by grow_dev_page fix
drbd: Write all pages of the bitmap after an online resize
drbd: Finish requests that completed while IO was frozen
drbd: fix drbd wire compatibility for empty flushes
Documentation: update tunable options in block/cfq-iosched.txt
Documentation: update tunable options in block/cfq-iosched.txt
Documentation: update missing index files in block/00-INDEX
block: move down direct IO plugging
block: remove plugging at buffered write time
block: disable discard request merge temporarily
bio: Fix potential memory leak in bio_find_or_create_slab()
block: Don't use static to define "void *p" in show_partition_start()
block: Add blk_bio_map_sg() helper
block: Introduce __blk_segment_map_sg() helper
fs/block-dev.c:fix performance regression in O_DIRECT writes to md block devices
block: split discard into aligned requests
block: reorganize rounding of max_discard_sectors
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 15 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 28 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 4 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 36 |
5 files changed, 61 insertions, 23 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index ba91b408abad..d84566496746 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -889,6 +889,7 @@ struct bm_aio_ctx { | |||
889 | unsigned int done; | 889 | unsigned int done; |
890 | unsigned flags; | 890 | unsigned flags; |
891 | #define BM_AIO_COPY_PAGES 1 | 891 | #define BM_AIO_COPY_PAGES 1 |
892 | #define BM_WRITE_ALL_PAGES 2 | ||
892 | int error; | 893 | int error; |
893 | struct kref kref; | 894 | struct kref kref; |
894 | }; | 895 | }; |
@@ -1059,7 +1060,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w | |||
1059 | if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) | 1060 | if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) |
1060 | break; | 1061 | break; |
1061 | if (rw & WRITE) { | 1062 | if (rw & WRITE) { |
1062 | if (bm_test_page_unchanged(b->bm_pages[i])) { | 1063 | if (!(flags & BM_WRITE_ALL_PAGES) && |
1064 | bm_test_page_unchanged(b->bm_pages[i])) { | ||
1063 | dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); | 1065 | dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); |
1064 | continue; | 1066 | continue; |
1065 | } | 1067 | } |
@@ -1141,6 +1143,17 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) | |||
1141 | } | 1143 | } |
1142 | 1144 | ||
1143 | /** | 1145 | /** |
1146 | * drbd_bm_write_all() - Write the whole bitmap to its on disk location. | ||
1147 | * @mdev: DRBD device. | ||
1148 | * | ||
1149 | * Will write all pages. | ||
1150 | */ | ||
1151 | int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local) | ||
1152 | { | ||
1153 | return bm_rw(mdev, WRITE, BM_WRITE_ALL_PAGES, 0); | ||
1154 | } | ||
1155 | |||
1156 | /** | ||
1144 | * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed. | 1157 | * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed. |
1145 | * @mdev: DRBD device. | 1158 | * @mdev: DRBD device. |
1146 | * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages | 1159 | * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b2ca143d0053..b953cc7c9c00 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -1469,6 +1469,7 @@ extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); | |||
1469 | extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local); | 1469 | extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local); |
1470 | extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); | 1470 | extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); |
1471 | extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); | 1471 | extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); |
1472 | extern int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local); | ||
1472 | extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local); | 1473 | extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local); |
1473 | extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, | 1474 | extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, |
1474 | unsigned long al_enr); | 1475 | unsigned long al_enr); |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index dbe6135a2abe..f93a0320e952 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -79,6 +79,7 @@ static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); | |||
79 | static void md_sync_timer_fn(unsigned long data); | 79 | static void md_sync_timer_fn(unsigned long data); |
80 | static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); | 80 | static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); |
81 | static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused); | 81 | static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused); |
82 | static void _tl_clear(struct drbd_conf *mdev); | ||
82 | 83 | ||
83 | MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " | 84 | MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " |
84 | "Lars Ellenberg <lars@linbit.com>"); | 85 | "Lars Ellenberg <lars@linbit.com>"); |
@@ -432,19 +433,10 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) | |||
432 | 433 | ||
433 | /* Actions operating on the disk state, also want to work on | 434 | /* Actions operating on the disk state, also want to work on |
434 | requests that got barrier acked. */ | 435 | requests that got barrier acked. */ |
435 | switch (what) { | ||
436 | case fail_frozen_disk_io: | ||
437 | case restart_frozen_disk_io: | ||
438 | list_for_each_safe(le, tle, &mdev->barrier_acked_requests) { | ||
439 | req = list_entry(le, struct drbd_request, tl_requests); | ||
440 | _req_mod(req, what); | ||
441 | } | ||
442 | 436 | ||
443 | case connection_lost_while_pending: | 437 | list_for_each_safe(le, tle, &mdev->barrier_acked_requests) { |
444 | case resend: | 438 | req = list_entry(le, struct drbd_request, tl_requests); |
445 | break; | 439 | _req_mod(req, what); |
446 | default: | ||
447 | dev_err(DEV, "what = %d in _tl_restart()\n", what); | ||
448 | } | 440 | } |
449 | } | 441 | } |
450 | 442 | ||
@@ -459,11 +451,16 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) | |||
459 | */ | 451 | */ |
460 | void tl_clear(struct drbd_conf *mdev) | 452 | void tl_clear(struct drbd_conf *mdev) |
461 | { | 453 | { |
454 | spin_lock_irq(&mdev->req_lock); | ||
455 | _tl_clear(mdev); | ||
456 | spin_unlock_irq(&mdev->req_lock); | ||
457 | } | ||
458 | |||
459 | static void _tl_clear(struct drbd_conf *mdev) | ||
460 | { | ||
462 | struct list_head *le, *tle; | 461 | struct list_head *le, *tle; |
463 | struct drbd_request *r; | 462 | struct drbd_request *r; |
464 | 463 | ||
465 | spin_lock_irq(&mdev->req_lock); | ||
466 | |||
467 | _tl_restart(mdev, connection_lost_while_pending); | 464 | _tl_restart(mdev, connection_lost_while_pending); |
468 | 465 | ||
469 | /* we expect this list to be empty. */ | 466 | /* we expect this list to be empty. */ |
@@ -482,7 +479,6 @@ void tl_clear(struct drbd_conf *mdev) | |||
482 | 479 | ||
483 | memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); | 480 | memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); |
484 | 481 | ||
485 | spin_unlock_irq(&mdev->req_lock); | ||
486 | } | 482 | } |
487 | 483 | ||
488 | void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) | 484 | void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) |
@@ -1476,12 +1472,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1476 | if (ns.susp_fen) { | 1472 | if (ns.susp_fen) { |
1477 | /* case1: The outdate peer handler is successful: */ | 1473 | /* case1: The outdate peer handler is successful: */ |
1478 | if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { | 1474 | if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { |
1479 | tl_clear(mdev); | ||
1480 | if (test_bit(NEW_CUR_UUID, &mdev->flags)) { | 1475 | if (test_bit(NEW_CUR_UUID, &mdev->flags)) { |
1481 | drbd_uuid_new_current(mdev); | 1476 | drbd_uuid_new_current(mdev); |
1482 | clear_bit(NEW_CUR_UUID, &mdev->flags); | 1477 | clear_bit(NEW_CUR_UUID, &mdev->flags); |
1483 | } | 1478 | } |
1484 | spin_lock_irq(&mdev->req_lock); | 1479 | spin_lock_irq(&mdev->req_lock); |
1480 | _tl_clear(mdev); | ||
1485 | _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); | 1481 | _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); |
1486 | spin_unlock_irq(&mdev->req_lock); | 1482 | spin_unlock_irq(&mdev->req_lock); |
1487 | } | 1483 | } |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index fb9dce8daa24..edb490aad8b4 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -674,8 +674,8 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds | |||
674 | la_size_changed && md_moved ? "size changed and md moved" : | 674 | la_size_changed && md_moved ? "size changed and md moved" : |
675 | la_size_changed ? "size changed" : "md moved"); | 675 | la_size_changed ? "size changed" : "md moved"); |
676 | /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ | 676 | /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ |
677 | err = drbd_bitmap_io(mdev, &drbd_bm_write, | 677 | err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write, |
678 | "size changed", BM_LOCKED_MASK); | 678 | "size changed", BM_LOCKED_MASK); |
679 | if (err) { | 679 | if (err) { |
680 | rv = dev_size_error; | 680 | rv = dev_size_error; |
681 | goto out; | 681 | goto out; |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 910335c30927..01b2ac641c7b 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -695,6 +695,12 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
695 | break; | 695 | break; |
696 | 696 | ||
697 | case resend: | 697 | case resend: |
698 | /* Simply complete (local only) READs. */ | ||
699 | if (!(req->rq_state & RQ_WRITE) && !req->w.cb) { | ||
700 | _req_may_be_done(req, m); | ||
701 | break; | ||
702 | } | ||
703 | |||
698 | /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK | 704 | /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK |
699 | before the connection loss (B&C only); only P_BARRIER_ACK was missing. | 705 | before the connection loss (B&C only); only P_BARRIER_ACK was missing. |
700 | Trowing them out of the TL here by pretending we got a BARRIER_ACK | 706 | Trowing them out of the TL here by pretending we got a BARRIER_ACK |
@@ -834,7 +840,15 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns | |||
834 | req->private_bio = NULL; | 840 | req->private_bio = NULL; |
835 | } | 841 | } |
836 | if (rw == WRITE) { | 842 | if (rw == WRITE) { |
837 | remote = 1; | 843 | /* Need to replicate writes. Unless it is an empty flush, |
844 | * which is better mapped to a DRBD P_BARRIER packet, | ||
845 | * also for drbd wire protocol compatibility reasons. */ | ||
846 | if (unlikely(size == 0)) { | ||
847 | /* The only size==0 bios we expect are empty flushes. */ | ||
848 | D_ASSERT(bio->bi_rw & REQ_FLUSH); | ||
849 | remote = 0; | ||
850 | } else | ||
851 | remote = 1; | ||
838 | } else { | 852 | } else { |
839 | /* READ || READA */ | 853 | /* READ || READA */ |
840 | if (local) { | 854 | if (local) { |
@@ -870,8 +884,11 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns | |||
870 | * extent. This waits for any resync activity in the corresponding | 884 | * extent. This waits for any resync activity in the corresponding |
871 | * resync extent to finish, and, if necessary, pulls in the target | 885 | * resync extent to finish, and, if necessary, pulls in the target |
872 | * extent into the activity log, which involves further disk io because | 886 | * extent into the activity log, which involves further disk io because |
873 | * of transactional on-disk meta data updates. */ | 887 | * of transactional on-disk meta data updates. |
874 | if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) { | 888 | * Empty flushes don't need to go into the activity log, they can only |
889 | * flush data for pending writes which are already in there. */ | ||
890 | if (rw == WRITE && local && size | ||
891 | && !test_bit(AL_SUSPENDED, &mdev->flags)) { | ||
875 | req->rq_state |= RQ_IN_ACT_LOG; | 892 | req->rq_state |= RQ_IN_ACT_LOG; |
876 | drbd_al_begin_io(mdev, sector); | 893 | drbd_al_begin_io(mdev, sector); |
877 | } | 894 | } |
@@ -994,7 +1011,10 @@ allocate_barrier: | |||
994 | if (rw == WRITE && _req_conflicts(req)) | 1011 | if (rw == WRITE && _req_conflicts(req)) |
995 | goto fail_conflicting; | 1012 | goto fail_conflicting; |
996 | 1013 | ||
997 | list_add_tail(&req->tl_requests, &mdev->newest_tle->requests); | 1014 | /* no point in adding empty flushes to the transfer log, |
1015 | * they are mapped to drbd barriers already. */ | ||
1016 | if (likely(size!=0)) | ||
1017 | list_add_tail(&req->tl_requests, &mdev->newest_tle->requests); | ||
998 | 1018 | ||
999 | /* NOTE remote first: to get the concurrent write detection right, | 1019 | /* NOTE remote first: to get the concurrent write detection right, |
1000 | * we must register the request before start of local IO. */ | 1020 | * we must register the request before start of local IO. */ |
@@ -1014,6 +1034,14 @@ allocate_barrier: | |||
1014 | mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) | 1034 | mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) |
1015 | maybe_pull_ahead(mdev); | 1035 | maybe_pull_ahead(mdev); |
1016 | 1036 | ||
1037 | /* If this was a flush, queue a drbd barrier/start a new epoch. | ||
1038 | * Unless the current epoch was empty anyways, or we are not currently | ||
1039 | * replicating, in which case there is no point. */ | ||
1040 | if (unlikely(bio->bi_rw & REQ_FLUSH) | ||
1041 | && mdev->newest_tle->n_writes | ||
1042 | && drbd_should_do_remote(mdev->state)) | ||
1043 | queue_barrier(mdev); | ||
1044 | |||
1017 | spin_unlock_irq(&mdev->req_lock); | 1045 | spin_unlock_irq(&mdev->req_lock); |
1018 | kfree(b); /* if someone else has beaten us to it... */ | 1046 | kfree(b); /* if someone else has beaten us to it... */ |
1019 | 1047 | ||