diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2010-12-15 02:59:11 -0500 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2011-03-10 05:43:19 -0500 |
commit | 19f843aa08e2d8f87a09b4c2edc43b00638423a8 (patch) | |
tree | 49919bd17ba7e03eb7cb76175910714d55704997 /drivers/block/drbd/drbd_actlog.c | |
parent | 95a0f10cddbf93ce89c175ac1c53dad2d20ad309 (diff) |
drbd: bitmap keep track of changes vs on-disk bitmap
When we set or clear bits in a bitmap page,
also set a flag in the page->private pointer.
This allows us to skip writes of unchanged pages.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_actlog.c')
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 132 |
1 files changed, 29 insertions, 103 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index e3f0f4d31d7..090fc2ce0df 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -262,6 +262,33 @@ void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) | |||
262 | spin_unlock_irqrestore(&mdev->al_lock, flags); | 262 | spin_unlock_irqrestore(&mdev->al_lock, flags); |
263 | } | 263 | } |
264 | 264 | ||
265 | #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) | ||
266 | /* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT | ||
267 | * are still coupled, or assume too much about their relation. | ||
268 | * Code below will not work if this is violated. | ||
269 | * Will be cleaned up with some followup patch. | ||
270 | */ | ||
271 | # error FIXME | ||
272 | #endif | ||
273 | |||
274 | static unsigned int al_extent_to_bm_page(unsigned int al_enr) | ||
275 | { | ||
276 | return al_enr >> | ||
277 | /* bit to page */ | ||
278 | ((PAGE_SHIFT + 3) - | ||
279 | /* al extent number to bit */ | ||
280 | (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); | ||
281 | } | ||
282 | |||
283 | static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) | ||
284 | { | ||
285 | return rs_enr >> | ||
286 | /* bit to page */ | ||
287 | ((PAGE_SHIFT + 3) - | ||
288 | /* al extent number to bit */ | ||
289 | (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); | ||
290 | } | ||
291 | |||
265 | int | 292 | int |
266 | w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) | 293 | w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) |
267 | { | 294 | { |
@@ -289,7 +316,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) | |||
289 | * For now, we must not write the transaction, | 316 | * For now, we must not write the transaction, |
290 | * if we cannot write out the bitmap of the evicted extent. */ | 317 | * if we cannot write out the bitmap of the evicted extent. */ |
291 | if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) | 318 | if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) |
292 | drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT); | 319 | drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted)); |
293 | 320 | ||
294 | /* The bitmap write may have failed, causing a state change. */ | 321 | /* The bitmap write may have failed, causing a state change. */ |
295 | if (mdev->state.disk < D_INCONSISTENT) { | 322 | if (mdev->state.disk < D_INCONSISTENT) { |
@@ -636,105 +663,6 @@ out_bio_put: | |||
636 | } | 663 | } |
637 | 664 | ||
638 | /** | 665 | /** |
639 | * drbd_al_to_on_disk_bm() - * Writes bitmap parts covered by active AL extents | ||
640 | * @mdev: DRBD device. | ||
641 | * | ||
642 | * Called when we detach (unconfigure) local storage, | ||
643 | * or when we go from R_PRIMARY to R_SECONDARY role. | ||
644 | */ | ||
645 | void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) | ||
646 | { | ||
647 | int i, nr_elements; | ||
648 | unsigned int enr; | ||
649 | struct bio **bios; | ||
650 | struct drbd_atodb_wait wc; | ||
651 | |||
652 | ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING)) | ||
653 | return; /* sorry, I don't have any act_log etc... */ | ||
654 | |||
655 | wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); | ||
656 | |||
657 | nr_elements = mdev->act_log->nr_elements; | ||
658 | |||
659 | /* GFP_KERNEL, we are not in anyone's write-out path */ | ||
660 | bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL); | ||
661 | if (!bios) | ||
662 | goto submit_one_by_one; | ||
663 | |||
664 | atomic_set(&wc.count, 0); | ||
665 | init_completion(&wc.io_done); | ||
666 | wc.mdev = mdev; | ||
667 | wc.error = 0; | ||
668 | |||
669 | for (i = 0; i < nr_elements; i++) { | ||
670 | enr = lc_element_by_index(mdev->act_log, i)->lc_number; | ||
671 | if (enr == LC_FREE) | ||
672 | continue; | ||
673 | /* next statement also does atomic_inc wc.count and local_cnt */ | ||
674 | if (atodb_prepare_unless_covered(mdev, bios, | ||
675 | enr/AL_EXT_PER_BM_SECT, | ||
676 | &wc)) | ||
677 | goto free_bios_submit_one_by_one; | ||
678 | } | ||
679 | |||
680 | /* unnecessary optimization? */ | ||
681 | lc_unlock(mdev->act_log); | ||
682 | wake_up(&mdev->al_wait); | ||
683 | |||
684 | /* all prepared, submit them */ | ||
685 | for (i = 0; i < nr_elements; i++) { | ||
686 | if (bios[i] == NULL) | ||
687 | break; | ||
688 | if (drbd_insert_fault(mdev, DRBD_FAULT_MD_WR)) { | ||
689 | bios[i]->bi_rw = WRITE; | ||
690 | bio_endio(bios[i], -EIO); | ||
691 | } else { | ||
692 | submit_bio(WRITE, bios[i]); | ||
693 | } | ||
694 | } | ||
695 | |||
696 | /* always (try to) flush bitmap to stable storage */ | ||
697 | drbd_md_flush(mdev); | ||
698 | |||
699 | /* In case we did not submit a single IO do not wait for | ||
700 | * them to complete. ( Because we would wait forever here. ) | ||
701 | * | ||
702 | * In case we had IOs and they are already complete, there | ||
703 | * is not point in waiting anyways. | ||
704 | * Therefore this if () ... */ | ||
705 | if (atomic_read(&wc.count)) | ||
706 | wait_for_completion(&wc.io_done); | ||
707 | |||
708 | put_ldev(mdev); | ||
709 | |||
710 | kfree(bios); | ||
711 | return; | ||
712 | |||
713 | free_bios_submit_one_by_one: | ||
714 | /* free everything by calling the endio callback directly. */ | ||
715 | for (i = 0; i < nr_elements && bios[i]; i++) | ||
716 | bio_endio(bios[i], 0); | ||
717 | |||
718 | kfree(bios); | ||
719 | |||
720 | submit_one_by_one: | ||
721 | dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n"); | ||
722 | |||
723 | for (i = 0; i < mdev->act_log->nr_elements; i++) { | ||
724 | enr = lc_element_by_index(mdev->act_log, i)->lc_number; | ||
725 | if (enr == LC_FREE) | ||
726 | continue; | ||
727 | /* Really slow: if we have al-extents 16..19 active, | ||
728 | * sector 4 will be written four times! Synchronous! */ | ||
729 | drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT); | ||
730 | } | ||
731 | |||
732 | lc_unlock(mdev->act_log); | ||
733 | wake_up(&mdev->al_wait); | ||
734 | put_ldev(mdev); | ||
735 | } | ||
736 | |||
737 | /** | ||
738 | * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents | 666 | * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents |
739 | * @mdev: DRBD device. | 667 | * @mdev: DRBD device. |
740 | */ | 668 | */ |
@@ -813,7 +741,7 @@ static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused | |||
813 | return 1; | 741 | return 1; |
814 | } | 742 | } |
815 | 743 | ||
816 | drbd_bm_write_sect(mdev, udw->enr); | 744 | drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr)); |
817 | put_ldev(mdev); | 745 | put_ldev(mdev); |
818 | 746 | ||
819 | kfree(udw); | 747 | kfree(udw); |
@@ -893,7 +821,6 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, | |||
893 | dev_warn(DEV, "Kicking resync_lru element enr=%u " | 821 | dev_warn(DEV, "Kicking resync_lru element enr=%u " |
894 | "out with rs_failed=%d\n", | 822 | "out with rs_failed=%d\n", |
895 | ext->lce.lc_number, ext->rs_failed); | 823 | ext->lce.lc_number, ext->rs_failed); |
896 | set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); | ||
897 | } | 824 | } |
898 | ext->rs_left = rs_left; | 825 | ext->rs_left = rs_left; |
899 | ext->rs_failed = success ? 0 : count; | 826 | ext->rs_failed = success ? 0 : count; |
@@ -912,7 +839,6 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, | |||
912 | drbd_queue_work_front(&mdev->data.work, &udw->w); | 839 | drbd_queue_work_front(&mdev->data.work, &udw->w); |
913 | } else { | 840 | } else { |
914 | dev_warn(DEV, "Could not kmalloc an udw\n"); | 841 | dev_warn(DEV, "Could not kmalloc an udw\n"); |
915 | set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); | ||
916 | } | 842 | } |
917 | } | 843 | } |
918 | } else { | 844 | } else { |