diff options
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 315 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 42 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 3 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 42 |
4 files changed, 197 insertions, 205 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 9c42edf4871b..278c31f24639 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -667,36 +667,56 @@ int drbd_initialize_al(struct drbd_device *device, void *buffer) | |||
667 | return 0; | 667 | return 0; |
668 | } | 668 | } |
669 | 669 | ||
670 | static const char *drbd_change_sync_fname[] = { | ||
671 | [RECORD_RS_FAILED] = "drbd_rs_failed_io", | ||
672 | [SET_IN_SYNC] = "drbd_set_in_sync", | ||
673 | [SET_OUT_OF_SYNC] = "drbd_set_out_of_sync" | ||
674 | }; | ||
675 | |||
670 | /* ATTENTION. The AL's extents are 4MB each, while the extents in the | 676 | /* ATTENTION. The AL's extents are 4MB each, while the extents in the |
671 | * resync LRU-cache are 16MB each. | 677 | * resync LRU-cache are 16MB each. |
672 | * The caller of this function has to hold an get_ldev() reference. | 678 | * The caller of this function has to hold an get_ldev() reference. |
673 | * | 679 | * |
680 | * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success), | ||
681 | * potentially pulling in (and recounting the corresponding bits) | ||
682 | * this resync extent into the resync extent lru cache. | ||
683 | * | ||
684 | * Returns whether all bits have been cleared for this resync extent, | ||
685 | * precisely: (rs_left <= rs_failed) | ||
686 | * | ||
674 | * TODO will be obsoleted once we have a caching lru of the on disk bitmap | 687 | * TODO will be obsoleted once we have a caching lru of the on disk bitmap |
675 | */ | 688 | */ |
676 | static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t sector, | 689 | static bool update_rs_extent(struct drbd_device *device, |
677 | int count, int success) | 690 | unsigned int enr, int count, |
691 | enum update_sync_bits_mode mode) | ||
678 | { | 692 | { |
679 | struct lc_element *e; | 693 | struct lc_element *e; |
680 | unsigned int enr; | ||
681 | 694 | ||
682 | D_ASSERT(device, atomic_read(&device->local_cnt)); | 695 | D_ASSERT(device, atomic_read(&device->local_cnt)); |
683 | 696 | ||
684 | /* I simply assume that a sector/size pair never crosses | 697 | /* When setting out-of-sync bits, |
685 | * a 16 MB extent border. (Currently this is true...) */ | 698 | * we don't need it cached (lc_find). |
686 | enr = BM_SECT_TO_EXT(sector); | 699 | * But if it is present in the cache, |
687 | 700 | * we should update the cached bit count. | |
688 | e = lc_get(device->resync, enr); | 701 | * Otherwise, that extent should be in the resync extent lru cache |
702 | * already -- or we want to pull it in if necessary -- (lc_get), | ||
703 | * then update and check rs_left and rs_failed. */ | ||
704 | if (mode == SET_OUT_OF_SYNC) | ||
705 | e = lc_find(device->resync, enr); | ||
706 | else | ||
707 | e = lc_get(device->resync, enr); | ||
689 | if (e) { | 708 | if (e) { |
690 | struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); | 709 | struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); |
691 | if (ext->lce.lc_number == enr) { | 710 | if (ext->lce.lc_number == enr) { |
692 | if (success) | 711 | if (mode == SET_IN_SYNC) |
693 | ext->rs_left -= count; | 712 | ext->rs_left -= count; |
713 | else if (mode == SET_OUT_OF_SYNC) | ||
714 | ext->rs_left += count; | ||
694 | else | 715 | else |
695 | ext->rs_failed += count; | 716 | ext->rs_failed += count; |
696 | if (ext->rs_left < ext->rs_failed) { | 717 | if (ext->rs_left < ext->rs_failed) { |
697 | drbd_warn(device, "BAD! sector=%llus enr=%u rs_left=%d " | 718 | drbd_warn(device, "BAD! enr=%u rs_left=%d " |
698 | "rs_failed=%d count=%d cstate=%s\n", | 719 | "rs_failed=%d count=%d cstate=%s\n", |
699 | (unsigned long long)sector, | ||
700 | ext->lce.lc_number, ext->rs_left, | 720 | ext->lce.lc_number, ext->rs_left, |
701 | ext->rs_failed, count, | 721 | ext->rs_failed, count, |
702 | drbd_conn_str(device->state.conn)); | 722 | drbd_conn_str(device->state.conn)); |
@@ -730,24 +750,27 @@ static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t secto | |||
730 | ext->lce.lc_number, ext->rs_failed); | 750 | ext->lce.lc_number, ext->rs_failed); |
731 | } | 751 | } |
732 | ext->rs_left = rs_left; | 752 | ext->rs_left = rs_left; |
733 | ext->rs_failed = success ? 0 : count; | 753 | ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0; |
734 | /* we don't keep a persistent log of the resync lru, | 754 | /* we don't keep a persistent log of the resync lru, |
735 | * we can commit any change right away. */ | 755 | * we can commit any change right away. */ |
736 | lc_committed(device->resync); | 756 | lc_committed(device->resync); |
737 | } | 757 | } |
738 | lc_put(device->resync, &ext->lce); | 758 | if (mode != SET_OUT_OF_SYNC) |
759 | lc_put(device->resync, &ext->lce); | ||
739 | /* no race, we are within the al_lock! */ | 760 | /* no race, we are within the al_lock! */ |
740 | 761 | ||
741 | if (ext->rs_left == ext->rs_failed) { | 762 | if (ext->rs_left <= ext->rs_failed) { |
742 | ext->rs_failed = 0; | 763 | ext->rs_failed = 0; |
743 | wake_up(&first_peer_device(device)->connection->sender_work.q_wait); | 764 | return true; |
744 | } | 765 | } |
745 | } else { | 766 | } else if (mode != SET_OUT_OF_SYNC) { |
767 | /* be quiet if lc_find() did not find it. */ | ||
746 | drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n", | 768 | drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n", |
747 | device->resync_locked, | 769 | device->resync_locked, |
748 | device->resync->nr_elements, | 770 | device->resync->nr_elements, |
749 | device->resync->flags); | 771 | device->resync->flags); |
750 | } | 772 | } |
773 | return false; | ||
751 | } | 774 | } |
752 | 775 | ||
753 | void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) | 776 | void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) |
@@ -766,105 +789,112 @@ void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go | |||
766 | } | 789 | } |
767 | } | 790 | } |
768 | 791 | ||
769 | /* clear the bit corresponding to the piece of storage in question: | 792 | /* It is called lazy update, so don't do write-out too often. */ |
770 | * size byte of data starting from sector. Only clear a bits of the affected | 793 | static bool lazy_bitmap_update_due(struct drbd_device *device) |
771 | * one ore more _aligned_ BM_BLOCK_SIZE blocks. | ||
772 | * | ||
773 | * called by worker on C_SYNC_TARGET and receiver on SyncSource. | ||
774 | * | ||
775 | */ | ||
776 | void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size, | ||
777 | const char *file, const unsigned int line) | ||
778 | { | 794 | { |
779 | /* Is called from worker and receiver context _only_ */ | 795 | return time_after(jiffies, device->rs_last_bcast + 2*HZ); |
780 | unsigned long sbnr, ebnr, lbnr; | 796 | } |
781 | unsigned long count = 0; | ||
782 | sector_t esector, nr_sectors; | ||
783 | int wake_up = 0; | ||
784 | unsigned long flags; | ||
785 | 797 | ||
786 | if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { | 798 | static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done) |
787 | drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", | 799 | { |
788 | (unsigned long long)sector, size); | 800 | struct drbd_connection *connection; |
801 | if (rs_done) | ||
802 | set_bit(RS_DONE, &device->flags); | ||
803 | /* and also set RS_PROGRESS below */ | ||
804 | else if (!lazy_bitmap_update_due(device)) | ||
789 | return; | 805 | return; |
790 | } | ||
791 | 806 | ||
792 | if (!get_ldev(device)) | 807 | /* compare with test_and_clear_bit() calls in and above |
793 | return; /* no disk, no metadata, no bitmap to clear bits in */ | 808 | * try_update_all_on_disk_bitmaps() from the drbd_worker(). */ |
794 | 809 | if (test_and_set_bit(RS_PROGRESS, &device->flags)) | |
795 | nr_sectors = drbd_get_capacity(device->this_bdev); | 810 | return; |
796 | esector = sector + (size >> 9) - 1; | 811 | connection = first_peer_device(device)->connection; |
797 | 812 | if (!test_and_set_bit(CONN_RS_PROGRESS, &connection->flags)) | |
798 | if (!expect(sector < nr_sectors)) | 813 | wake_up(&connection->sender_work.q_wait); |
799 | goto out; | 814 | } |
800 | if (!expect(esector < nr_sectors)) | ||
801 | esector = nr_sectors - 1; | ||
802 | |||
803 | lbnr = BM_SECT_TO_BIT(nr_sectors-1); | ||
804 | |||
805 | /* we clear it (in sync). | ||
806 | * round up start sector, round down end sector. we make sure we only | ||
807 | * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */ | ||
808 | if (unlikely(esector < BM_SECT_PER_BIT-1)) | ||
809 | goto out; | ||
810 | if (unlikely(esector == (nr_sectors-1))) | ||
811 | ebnr = lbnr; | ||
812 | else | ||
813 | ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); | ||
814 | sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); | ||
815 | |||
816 | if (sbnr > ebnr) | ||
817 | goto out; | ||
818 | 815 | ||
816 | static int update_sync_bits(struct drbd_device *device, | ||
817 | unsigned long sbnr, unsigned long ebnr, | ||
818 | enum update_sync_bits_mode mode) | ||
819 | { | ||
819 | /* | 820 | /* |
820 | * ok, (capacity & 7) != 0 sometimes, but who cares... | 821 | * We keep a count of set bits per resync-extent in the ->rs_left |
821 | * we count rs_{total,left} in bits, not sectors. | 822 | * caching member, so we need to loop and work within the resync extent |
823 | * alignment. Typically this loop will execute exactly once. | ||
822 | */ | 824 | */ |
823 | count = drbd_bm_clear_bits(device, sbnr, ebnr); | 825 | unsigned long flags; |
824 | if (count) { | 826 | unsigned long count = 0; |
825 | drbd_advance_rs_marks(device, drbd_bm_total_weight(device)); | 827 | unsigned int cleared = 0; |
826 | spin_lock_irqsave(&device->al_lock, flags); | 828 | while (sbnr <= ebnr) { |
827 | drbd_try_clear_on_disk_bm(device, sector, count, true); | 829 | /* set temporary boundary bit number to last bit number within |
828 | spin_unlock_irqrestore(&device->al_lock, flags); | 830 | * the resync extent of the current start bit number, |
829 | 831 | * but cap at provided end bit number */ | |
830 | /* just wake_up unconditional now, various lc_chaged(), | 832 | unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK); |
831 | * lc_put() in drbd_try_clear_on_disk_bm(). */ | 833 | unsigned long c; |
832 | wake_up = 1; | 834 | |
835 | if (mode == RECORD_RS_FAILED) | ||
836 | /* Only called from drbd_rs_failed_io(), bits | ||
837 | * supposedly still set. Recount, maybe some | ||
838 | * of the bits have been successfully cleared | ||
839 | * by application IO meanwhile. | ||
840 | */ | ||
841 | c = drbd_bm_count_bits(device, sbnr, tbnr); | ||
842 | else if (mode == SET_IN_SYNC) | ||
843 | c = drbd_bm_clear_bits(device, sbnr, tbnr); | ||
844 | else /* if (mode == SET_OUT_OF_SYNC) */ | ||
845 | c = drbd_bm_set_bits(device, sbnr, tbnr); | ||
846 | |||
847 | if (c) { | ||
848 | spin_lock_irqsave(&device->al_lock, flags); | ||
849 | cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode); | ||
850 | spin_unlock_irqrestore(&device->al_lock, flags); | ||
851 | count += c; | ||
852 | } | ||
853 | sbnr = tbnr + 1; | ||
833 | } | 854 | } |
834 | out: | 855 | if (count) { |
835 | put_ldev(device); | 856 | if (mode == SET_IN_SYNC) { |
836 | if (wake_up) | 857 | unsigned long still_to_go = drbd_bm_total_weight(device); |
858 | bool rs_is_done = (still_to_go <= device->rs_failed); | ||
859 | drbd_advance_rs_marks(device, still_to_go); | ||
860 | if (cleared || rs_is_done) | ||
861 | maybe_schedule_on_disk_bitmap_update(device, rs_is_done); | ||
862 | } else if (mode == RECORD_RS_FAILED) | ||
863 | device->rs_failed += count; | ||
837 | wake_up(&device->al_wait); | 864 | wake_up(&device->al_wait); |
865 | } | ||
866 | return count; | ||
838 | } | 867 | } |
839 | 868 | ||
840 | /* | 869 | /* clear the bit corresponding to the piece of storage in question: |
841 | * this is intended to set one request worth of data out of sync. | 870 | * size byte of data starting from sector. Only clear a bits of the affected |
842 | * affects at least 1 bit, | 871 | * one ore more _aligned_ BM_BLOCK_SIZE blocks. |
843 | * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits. | 872 | * |
873 | * called by worker on C_SYNC_TARGET and receiver on SyncSource. | ||
844 | * | 874 | * |
845 | * called by tl_clear and drbd_send_dblock (==drbd_make_request). | ||
846 | * so this can be _any_ process. | ||
847 | */ | 875 | */ |
848 | int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size, | 876 | int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, |
849 | const char *file, const unsigned int line) | 877 | enum update_sync_bits_mode mode, |
878 | const char *file, const unsigned int line) | ||
850 | { | 879 | { |
851 | unsigned long sbnr, ebnr, flags; | 880 | /* Is called from worker and receiver context _only_ */ |
881 | unsigned long sbnr, ebnr, lbnr; | ||
882 | unsigned long count = 0; | ||
852 | sector_t esector, nr_sectors; | 883 | sector_t esector, nr_sectors; |
853 | unsigned int enr, count = 0; | ||
854 | struct lc_element *e; | ||
855 | 884 | ||
856 | /* this should be an empty REQ_FLUSH */ | 885 | /* This would be an empty REQ_FLUSH, be silent. */ |
857 | if (size == 0) | 886 | if ((mode == SET_OUT_OF_SYNC) && size == 0) |
858 | return 0; | 887 | return 0; |
859 | 888 | ||
860 | if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { | 889 | if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { |
861 | drbd_err(device, "sector: %llus, size: %d\n", | 890 | drbd_err(device, "%s: sector=%llus size=%d nonsense!\n", |
862 | (unsigned long long)sector, size); | 891 | drbd_change_sync_fname[mode], |
892 | (unsigned long long)sector, size); | ||
863 | return 0; | 893 | return 0; |
864 | } | 894 | } |
865 | 895 | ||
866 | if (!get_ldev(device)) | 896 | if (!get_ldev(device)) |
867 | return 0; /* no disk, no metadata, no bitmap to set bits in */ | 897 | return 0; /* no disk, no metadata, no bitmap to manipulate bits in */ |
868 | 898 | ||
869 | nr_sectors = drbd_get_capacity(device->this_bdev); | 899 | nr_sectors = drbd_get_capacity(device->this_bdev); |
870 | esector = sector + (size >> 9) - 1; | 900 | esector = sector + (size >> 9) - 1; |
@@ -874,25 +904,28 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size | |||
874 | if (!expect(esector < nr_sectors)) | 904 | if (!expect(esector < nr_sectors)) |
875 | esector = nr_sectors - 1; | 905 | esector = nr_sectors - 1; |
876 | 906 | ||
877 | /* we set it out of sync, | 907 | lbnr = BM_SECT_TO_BIT(nr_sectors-1); |
878 | * we do not need to round anything here */ | ||
879 | sbnr = BM_SECT_TO_BIT(sector); | ||
880 | ebnr = BM_SECT_TO_BIT(esector); | ||
881 | |||
882 | /* ok, (capacity & 7) != 0 sometimes, but who cares... | ||
883 | * we count rs_{total,left} in bits, not sectors. */ | ||
884 | spin_lock_irqsave(&device->al_lock, flags); | ||
885 | count = drbd_bm_set_bits(device, sbnr, ebnr); | ||
886 | 908 | ||
887 | enr = BM_SECT_TO_EXT(sector); | 909 | if (mode == SET_IN_SYNC) { |
888 | e = lc_find(device->resync, enr); | 910 | /* Round up start sector, round down end sector. We make sure |
889 | if (e) | 911 | * we only clear full, aligned, BM_BLOCK_SIZE blocks. */ |
890 | lc_entry(e, struct bm_extent, lce)->rs_left += count; | 912 | if (unlikely(esector < BM_SECT_PER_BIT-1)) |
891 | spin_unlock_irqrestore(&device->al_lock, flags); | 913 | goto out; |
914 | if (unlikely(esector == (nr_sectors-1))) | ||
915 | ebnr = lbnr; | ||
916 | else | ||
917 | ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); | ||
918 | sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); | ||
919 | } else { | ||
920 | /* We set it out of sync, or record resync failure. | ||
921 | * Should not round anything here. */ | ||
922 | sbnr = BM_SECT_TO_BIT(sector); | ||
923 | ebnr = BM_SECT_TO_BIT(esector); | ||
924 | } | ||
892 | 925 | ||
926 | count = update_sync_bits(device, sbnr, ebnr, mode); | ||
893 | out: | 927 | out: |
894 | put_ldev(device); | 928 | put_ldev(device); |
895 | |||
896 | return count; | 929 | return count; |
897 | } | 930 | } |
898 | 931 | ||
@@ -1209,69 +1242,3 @@ int drbd_rs_del_all(struct drbd_device *device) | |||
1209 | 1242 | ||
1210 | return 0; | 1243 | return 0; |
1211 | } | 1244 | } |
1212 | |||
1213 | /** | ||
1214 | * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks | ||
1215 | * @device: DRBD device. | ||
1216 | * @sector: The sector number. | ||
1217 | * @size: Size of failed IO operation, in byte. | ||
1218 | */ | ||
1219 | void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size) | ||
1220 | { | ||
1221 | /* Is called from worker and receiver context _only_ */ | ||
1222 | unsigned long sbnr, ebnr, lbnr; | ||
1223 | unsigned long count; | ||
1224 | sector_t esector, nr_sectors; | ||
1225 | int wake_up = 0; | ||
1226 | |||
1227 | if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { | ||
1228 | drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", | ||
1229 | (unsigned long long)sector, size); | ||
1230 | return; | ||
1231 | } | ||
1232 | nr_sectors = drbd_get_capacity(device->this_bdev); | ||
1233 | esector = sector + (size >> 9) - 1; | ||
1234 | |||
1235 | if (!expect(sector < nr_sectors)) | ||
1236 | return; | ||
1237 | if (!expect(esector < nr_sectors)) | ||
1238 | esector = nr_sectors - 1; | ||
1239 | |||
1240 | lbnr = BM_SECT_TO_BIT(nr_sectors-1); | ||
1241 | |||
1242 | /* | ||
1243 | * round up start sector, round down end sector. we make sure we only | ||
1244 | * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */ | ||
1245 | if (unlikely(esector < BM_SECT_PER_BIT-1)) | ||
1246 | return; | ||
1247 | if (unlikely(esector == (nr_sectors-1))) | ||
1248 | ebnr = lbnr; | ||
1249 | else | ||
1250 | ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); | ||
1251 | sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); | ||
1252 | |||
1253 | if (sbnr > ebnr) | ||
1254 | return; | ||
1255 | |||
1256 | /* | ||
1257 | * ok, (capacity & 7) != 0 sometimes, but who cares... | ||
1258 | * we count rs_{total,left} in bits, not sectors. | ||
1259 | */ | ||
1260 | spin_lock_irq(&device->al_lock); | ||
1261 | count = drbd_bm_count_bits(device, sbnr, ebnr); | ||
1262 | if (count) { | ||
1263 | device->rs_failed += count; | ||
1264 | |||
1265 | if (get_ldev(device)) { | ||
1266 | drbd_try_clear_on_disk_bm(device, sector, count, false); | ||
1267 | put_ldev(device); | ||
1268 | } | ||
1269 | |||
1270 | /* just wake_up unconditional now, various lc_chaged(), | ||
1271 | * lc_put() in drbd_try_clear_on_disk_bm(). */ | ||
1272 | wake_up = 1; | ||
1273 | } | ||
1274 | spin_unlock_irq(&device->al_lock); | ||
1275 | if (wake_up) | ||
1276 | wake_up(&device->al_wait); | ||
1277 | } | ||
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index eb002a7656af..a16f9ae3c98a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -432,7 +432,11 @@ enum { | |||
432 | * goes into C_CONNECTED state. */ | 432 | * goes into C_CONNECTED state. */ |
433 | CONSIDER_RESYNC, | 433 | CONSIDER_RESYNC, |
434 | 434 | ||
435 | RS_PROGRESS, /* tell worker that resync made significant progress */ | ||
436 | RS_DONE, /* tell worker that resync is done */ | ||
437 | |||
435 | MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ | 438 | MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ |
439 | |||
436 | SUSPEND_IO, /* suspend application io */ | 440 | SUSPEND_IO, /* suspend application io */ |
437 | BITMAP_IO, /* suspend application io; | 441 | BITMAP_IO, /* suspend application io; |
438 | once no more io in flight, start bitmap io */ | 442 | once no more io in flight, start bitmap io */ |
@@ -577,6 +581,7 @@ enum { | |||
577 | * and potentially deadlock on, this drbd worker. | 581 | * and potentially deadlock on, this drbd worker. |
578 | */ | 582 | */ |
579 | DISCONNECT_SENT, | 583 | DISCONNECT_SENT, |
584 | CONN_RS_PROGRESS, /* tell worker that resync made significant progress */ | ||
580 | }; | 585 | }; |
581 | 586 | ||
582 | struct drbd_resource { | 587 | struct drbd_resource { |
@@ -1106,17 +1111,21 @@ struct bm_extent { | |||
1106 | /* in which _bitmap_ extent (resp. sector) the bit for a certain | 1111 | /* in which _bitmap_ extent (resp. sector) the bit for a certain |
1107 | * _storage_ sector is located in */ | 1112 | * _storage_ sector is located in */ |
1108 | #define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9)) | 1113 | #define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9)) |
1114 | #define BM_BIT_TO_EXT(x) ((x) >> (BM_EXT_SHIFT - BM_BLOCK_SHIFT)) | ||
1109 | 1115 | ||
1110 | /* how much _storage_ sectors we have per bitmap sector */ | 1116 | /* first storage sector a bitmap extent corresponds to */ |
1111 | #define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9)) | 1117 | #define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9)) |
1118 | /* how much _storage_ sectors we have per bitmap extent */ | ||
1112 | #define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) | 1119 | #define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) |
1120 | /* how many bits are covered by one bitmap extent (resync extent) */ | ||
1121 | #define BM_BITS_PER_EXT (1UL << (BM_EXT_SHIFT - BM_BLOCK_SHIFT)) | ||
1122 | |||
1123 | #define BM_BLOCKS_PER_BM_EXT_MASK (BM_BITS_PER_EXT - 1) | ||
1124 | |||
1113 | 1125 | ||
1114 | /* in one sector of the bitmap, we have this many activity_log extents. */ | 1126 | /* in one sector of the bitmap, we have this many activity_log extents. */ |
1115 | #define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) | 1127 | #define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) |
1116 | 1128 | ||
1117 | #define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT) | ||
1118 | #define BM_BLOCKS_PER_BM_EXT_MASK ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1) | ||
1119 | |||
1120 | /* the extent in "PER_EXTENT" below is an activity log extent | 1129 | /* the extent in "PER_EXTENT" below is an activity log extent |
1121 | * we need that many (long words/bytes) to store the bitmap | 1130 | * we need that many (long words/bytes) to store the bitmap |
1122 | * of one AL_EXTENT_SIZE chunk of storage. | 1131 | * of one AL_EXTENT_SIZE chunk of storage. |
@@ -1214,7 +1223,6 @@ extern unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned lon | |||
1214 | extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo); | 1223 | extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo); |
1215 | extern unsigned long _drbd_bm_total_weight(struct drbd_device *device); | 1224 | extern unsigned long _drbd_bm_total_weight(struct drbd_device *device); |
1216 | extern unsigned long drbd_bm_total_weight(struct drbd_device *device); | 1225 | extern unsigned long drbd_bm_total_weight(struct drbd_device *device); |
1217 | extern int drbd_bm_rs_done(struct drbd_device *device); | ||
1218 | /* for receive_bitmap */ | 1226 | /* for receive_bitmap */ |
1219 | extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, | 1227 | extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, |
1220 | size_t number, unsigned long *buffer); | 1228 | size_t number, unsigned long *buffer); |
@@ -1503,14 +1511,17 @@ extern int drbd_rs_del_all(struct drbd_device *device); | |||
1503 | extern void drbd_rs_failed_io(struct drbd_device *device, | 1511 | extern void drbd_rs_failed_io(struct drbd_device *device, |
1504 | sector_t sector, int size); | 1512 | sector_t sector, int size); |
1505 | extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go); | 1513 | extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go); |
1506 | extern void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, | 1514 | |
1507 | int size, const char *file, const unsigned int line); | 1515 | enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC }; |
1516 | extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, | ||
1517 | enum update_sync_bits_mode mode, | ||
1518 | const char *file, const unsigned int line); | ||
1508 | #define drbd_set_in_sync(device, sector, size) \ | 1519 | #define drbd_set_in_sync(device, sector, size) \ |
1509 | __drbd_set_in_sync(device, sector, size, __FILE__, __LINE__) | 1520 | __drbd_change_sync(device, sector, size, SET_IN_SYNC, __FILE__, __LINE__) |
1510 | extern int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, | ||
1511 | int size, const char *file, const unsigned int line); | ||
1512 | #define drbd_set_out_of_sync(device, sector, size) \ | 1521 | #define drbd_set_out_of_sync(device, sector, size) \ |
1513 | __drbd_set_out_of_sync(device, sector, size, __FILE__, __LINE__) | 1522 | __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC, __FILE__, __LINE__) |
1523 | #define drbd_rs_failed_io(device, sector, size) \ | ||
1524 | __drbd_change_sync(device, sector, size, RECORD_RS_FAILED, __FILE__, __LINE__) | ||
1514 | extern void drbd_al_shrink(struct drbd_device *device); | 1525 | extern void drbd_al_shrink(struct drbd_device *device); |
1515 | extern int drbd_initialize_al(struct drbd_device *, void *); | 1526 | extern int drbd_initialize_al(struct drbd_device *, void *); |
1516 | 1527 | ||
@@ -1915,6 +1926,15 @@ static inline void _sub_unacked(struct drbd_device *device, int n, const char *f | |||
1915 | ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); | 1926 | ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); |
1916 | } | 1927 | } |
1917 | 1928 | ||
1929 | static inline bool is_sync_state(enum drbd_conns connection_state) | ||
1930 | { | ||
1931 | return | ||
1932 | (connection_state == C_SYNC_SOURCE | ||
1933 | || connection_state == C_SYNC_TARGET | ||
1934 | || connection_state == C_PAUSED_SYNC_S | ||
1935 | || connection_state == C_PAUSED_SYNC_T); | ||
1936 | } | ||
1937 | |||
1918 | /** | 1938 | /** |
1919 | * get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev | 1939 | * get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev |
1920 | * @M: DRBD device. | 1940 | * @M: DRBD device. |
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 19da7c7590cd..1bddd6cf8ac7 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c | |||
@@ -1011,6 +1011,9 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, | |||
1011 | atomic_inc(&device->local_cnt); | 1011 | atomic_inc(&device->local_cnt); |
1012 | 1012 | ||
1013 | did_remote = drbd_should_do_remote(device->state); | 1013 | did_remote = drbd_should_do_remote(device->state); |
1014 | if (!is_sync_state(os.conn) && is_sync_state(ns.conn)) | ||
1015 | clear_bit(RS_DONE, &device->flags); | ||
1016 | |||
1014 | device->state.i = ns.i; | 1017 | device->state.i = ns.i; |
1015 | should_do_remote = drbd_should_do_remote(device->state); | 1018 | should_do_remote = drbd_should_do_remote(device->state); |
1016 | device->resource->susp = ns.susp; | 1019 | device->resource->susp = ns.susp; |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 47bc84017b5b..bafb62eb22c9 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -1740,11 +1740,20 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1740 | device->rs_mark_time[i] = now; | 1740 | device->rs_mark_time[i] = now; |
1741 | } | 1741 | } |
1742 | _drbd_pause_after(device); | 1742 | _drbd_pause_after(device); |
1743 | /* Forget potentially stale cached per resync extent bit-counts. | ||
1744 | * Open coded drbd_rs_cancel_all(device), we already have IRQs | ||
1745 | * disabled, and know the disk state is ok. */ | ||
1746 | spin_lock(&device->al_lock); | ||
1747 | lc_reset(device->resync); | ||
1748 | device->resync_locked = 0; | ||
1749 | device->resync_wenr = LC_FREE; | ||
1750 | spin_unlock(&device->al_lock); | ||
1743 | } | 1751 | } |
1744 | write_unlock(&global_state_lock); | 1752 | write_unlock(&global_state_lock); |
1745 | spin_unlock_irq(&device->resource->req_lock); | 1753 | spin_unlock_irq(&device->resource->req_lock); |
1746 | 1754 | ||
1747 | if (r == SS_SUCCESS) { | 1755 | if (r == SS_SUCCESS) { |
1756 | wake_up(&device->al_wait); /* for lc_reset() above */ | ||
1748 | /* reset rs_last_bcast when a resync or verify is started, | 1757 | /* reset rs_last_bcast when a resync or verify is started, |
1749 | * to deal with potential jiffies wrap. */ | 1758 | * to deal with potential jiffies wrap. */ |
1750 | device->rs_last_bcast = jiffies - HZ; | 1759 | device->rs_last_bcast = jiffies - HZ; |
@@ -1807,36 +1816,22 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1807 | static void update_on_disk_bitmap(struct drbd_device *device) | 1816 | static void update_on_disk_bitmap(struct drbd_device *device) |
1808 | { | 1817 | { |
1809 | struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; | 1818 | struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; |
1819 | bool resync_done = test_and_clear_bit(RS_DONE, &device->flags); | ||
1810 | device->rs_last_bcast = jiffies; | 1820 | device->rs_last_bcast = jiffies; |
1811 | 1821 | ||
1812 | if (!get_ldev(device)) | 1822 | if (!get_ldev(device)) |
1813 | return; | 1823 | return; |
1814 | 1824 | ||
1815 | drbd_bm_write_lazy(device, 0); | 1825 | drbd_bm_write_lazy(device, 0); |
1816 | if (drbd_bm_total_weight(device) <= device->rs_failed) | 1826 | if (resync_done && is_sync_state(device->state.conn)) |
1817 | drbd_resync_finished(device); | 1827 | drbd_resync_finished(device); |
1828 | |||
1818 | drbd_bcast_event(device, &sib); | 1829 | drbd_bcast_event(device, &sib); |
1819 | /* update timestamp, in case it took a while to write out stuff */ | 1830 | /* update timestamp, in case it took a while to write out stuff */ |
1820 | device->rs_last_bcast = jiffies; | 1831 | device->rs_last_bcast = jiffies; |
1821 | put_ldev(device); | 1832 | put_ldev(device); |
1822 | } | 1833 | } |
1823 | 1834 | ||
1824 | bool wants_lazy_bitmap_update(struct drbd_device *device) | ||
1825 | { | ||
1826 | enum drbd_conns connection_state = device->state.conn; | ||
1827 | return | ||
1828 | /* only do a lazy writeout, if device is in some resync state */ | ||
1829 | (connection_state == C_SYNC_SOURCE | ||
1830 | || connection_state == C_SYNC_TARGET | ||
1831 | || connection_state == C_PAUSED_SYNC_S | ||
1832 | || connection_state == C_PAUSED_SYNC_T) && | ||
1833 | /* AND | ||
1834 | * either we just finished, or the last lazy update | ||
1835 | * was some time ago already. */ | ||
1836 | (drbd_bm_total_weight(device) <= device->rs_failed | ||
1837 | || time_after(jiffies, device->rs_last_bcast + 2*HZ)); | ||
1838 | } | ||
1839 | |||
1840 | static void try_update_all_on_disk_bitmaps(struct drbd_connection *connection) | 1835 | static void try_update_all_on_disk_bitmaps(struct drbd_connection *connection) |
1841 | { | 1836 | { |
1842 | struct drbd_peer_device *peer_device; | 1837 | struct drbd_peer_device *peer_device; |
@@ -1845,8 +1840,9 @@ static void try_update_all_on_disk_bitmaps(struct drbd_connection *connection) | |||
1845 | rcu_read_lock(); | 1840 | rcu_read_lock(); |
1846 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { | 1841 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { |
1847 | struct drbd_device *device = peer_device->device; | 1842 | struct drbd_device *device = peer_device->device; |
1848 | if (!wants_lazy_bitmap_update(device)) | 1843 | if (!test_and_clear_bit(RS_PROGRESS, &device->flags)) |
1849 | continue; | 1844 | continue; |
1845 | |||
1850 | kref_get(&device->kref); | 1846 | kref_get(&device->kref); |
1851 | rcu_read_unlock(); | 1847 | rcu_read_unlock(); |
1852 | update_on_disk_bitmap(device); | 1848 | update_on_disk_bitmap(device); |
@@ -1930,15 +1926,18 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head * | |||
1930 | if (send_barrier) | 1926 | if (send_barrier) |
1931 | maybe_send_barrier(connection, | 1927 | maybe_send_barrier(connection, |
1932 | connection->send.current_epoch_nr + 1); | 1928 | connection->send.current_epoch_nr + 1); |
1929 | |||
1930 | if (test_bit(CONN_RS_PROGRESS, &connection->flags)) | ||
1931 | break; | ||
1932 | |||
1933 | /* drbd_send() may have called flush_signals() */ | 1933 | /* drbd_send() may have called flush_signals() */ |
1934 | if (get_t_state(&connection->worker) != RUNNING) | 1934 | if (get_t_state(&connection->worker) != RUNNING) |
1935 | break; | 1935 | break; |
1936 | |||
1936 | schedule(); | 1937 | schedule(); |
1937 | /* may be woken up for other things but new work, too, | 1938 | /* may be woken up for other things but new work, too, |
1938 | * e.g. if the current epoch got closed. | 1939 | * e.g. if the current epoch got closed. |
1939 | * In which case we send the barrier above. */ | 1940 | * In which case we send the barrier above. */ |
1940 | |||
1941 | try_update_all_on_disk_bitmaps(connection); | ||
1942 | } | 1941 | } |
1943 | finish_wait(&connection->sender_work.q_wait, &wait); | 1942 | finish_wait(&connection->sender_work.q_wait, &wait); |
1944 | 1943 | ||
@@ -1973,6 +1972,9 @@ int drbd_worker(struct drbd_thread *thi) | |||
1973 | if (list_empty(&work_list)) | 1972 | if (list_empty(&work_list)) |
1974 | wait_for_work(connection, &work_list); | 1973 | wait_for_work(connection, &work_list); |
1975 | 1974 | ||
1975 | if (test_and_clear_bit(CONN_RS_PROGRESS, &connection->flags)) | ||
1976 | try_update_all_on_disk_bitmaps(connection); | ||
1977 | |||
1976 | if (signal_pending(current)) { | 1978 | if (signal_pending(current)) { |
1977 | flush_signals(current); | 1979 | flush_signals(current); |
1978 | if (get_t_state(thi) == RUNNING) { | 1980 | if (get_t_state(thi) == RUNNING) { |