aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/block/drbd/drbd_actlog.c315
-rw-r--r--drivers/block/drbd/drbd_int.h42
-rw-r--r--drivers/block/drbd/drbd_state.c3
-rw-r--r--drivers/block/drbd/drbd_worker.c42
4 files changed, 197 insertions, 205 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 9c42edf4871b..278c31f24639 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -667,36 +667,56 @@ int drbd_initialize_al(struct drbd_device *device, void *buffer)
667 return 0; 667 return 0;
668} 668}
669 669
670static const char *drbd_change_sync_fname[] = {
671 [RECORD_RS_FAILED] = "drbd_rs_failed_io",
672 [SET_IN_SYNC] = "drbd_set_in_sync",
673 [SET_OUT_OF_SYNC] = "drbd_set_out_of_sync"
674};
675
670/* ATTENTION. The AL's extents are 4MB each, while the extents in the 676/* ATTENTION. The AL's extents are 4MB each, while the extents in the
671 * resync LRU-cache are 16MB each. 677 * resync LRU-cache are 16MB each.
672 * The caller of this function has to hold an get_ldev() reference. 678 * The caller of this function has to hold an get_ldev() reference.
673 * 679 *
680 * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success),
681 * potentially pulling in (and recounting the corresponding bits)
682 * this resync extent into the resync extent lru cache.
683 *
684 * Returns whether all bits have been cleared for this resync extent,
685 * precisely: (rs_left <= rs_failed)
686 *
674 * TODO will be obsoleted once we have a caching lru of the on disk bitmap 687 * TODO will be obsoleted once we have a caching lru of the on disk bitmap
675 */ 688 */
676static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t sector, 689static bool update_rs_extent(struct drbd_device *device,
677 int count, int success) 690 unsigned int enr, int count,
691 enum update_sync_bits_mode mode)
678{ 692{
679 struct lc_element *e; 693 struct lc_element *e;
680 unsigned int enr;
681 694
682 D_ASSERT(device, atomic_read(&device->local_cnt)); 695 D_ASSERT(device, atomic_read(&device->local_cnt));
683 696
684 /* I simply assume that a sector/size pair never crosses 697 /* When setting out-of-sync bits,
685 * a 16 MB extent border. (Currently this is true...) */ 698 * we don't need it cached (lc_find).
686 enr = BM_SECT_TO_EXT(sector); 699 * But if it is present in the cache,
687 700 * we should update the cached bit count.
688 e = lc_get(device->resync, enr); 701 * Otherwise, that extent should be in the resync extent lru cache
702 * already -- or we want to pull it in if necessary -- (lc_get),
703 * then update and check rs_left and rs_failed. */
704 if (mode == SET_OUT_OF_SYNC)
705 e = lc_find(device->resync, enr);
706 else
707 e = lc_get(device->resync, enr);
689 if (e) { 708 if (e) {
690 struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); 709 struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
691 if (ext->lce.lc_number == enr) { 710 if (ext->lce.lc_number == enr) {
692 if (success) 711 if (mode == SET_IN_SYNC)
693 ext->rs_left -= count; 712 ext->rs_left -= count;
713 else if (mode == SET_OUT_OF_SYNC)
714 ext->rs_left += count;
694 else 715 else
695 ext->rs_failed += count; 716 ext->rs_failed += count;
696 if (ext->rs_left < ext->rs_failed) { 717 if (ext->rs_left < ext->rs_failed) {
697 drbd_warn(device, "BAD! sector=%llus enr=%u rs_left=%d " 718 drbd_warn(device, "BAD! enr=%u rs_left=%d "
698 "rs_failed=%d count=%d cstate=%s\n", 719 "rs_failed=%d count=%d cstate=%s\n",
699 (unsigned long long)sector,
700 ext->lce.lc_number, ext->rs_left, 720 ext->lce.lc_number, ext->rs_left,
701 ext->rs_failed, count, 721 ext->rs_failed, count,
702 drbd_conn_str(device->state.conn)); 722 drbd_conn_str(device->state.conn));
@@ -730,24 +750,27 @@ static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t secto
730 ext->lce.lc_number, ext->rs_failed); 750 ext->lce.lc_number, ext->rs_failed);
731 } 751 }
732 ext->rs_left = rs_left; 752 ext->rs_left = rs_left;
733 ext->rs_failed = success ? 0 : count; 753 ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0;
734 /* we don't keep a persistent log of the resync lru, 754 /* we don't keep a persistent log of the resync lru,
735 * we can commit any change right away. */ 755 * we can commit any change right away. */
736 lc_committed(device->resync); 756 lc_committed(device->resync);
737 } 757 }
738 lc_put(device->resync, &ext->lce); 758 if (mode != SET_OUT_OF_SYNC)
759 lc_put(device->resync, &ext->lce);
739 /* no race, we are within the al_lock! */ 760 /* no race, we are within the al_lock! */
740 761
741 if (ext->rs_left == ext->rs_failed) { 762 if (ext->rs_left <= ext->rs_failed) {
742 ext->rs_failed = 0; 763 ext->rs_failed = 0;
743 wake_up(&first_peer_device(device)->connection->sender_work.q_wait); 764 return true;
744 } 765 }
745 } else { 766 } else if (mode != SET_OUT_OF_SYNC) {
767 /* be quiet if lc_find() did not find it. */
746 drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n", 768 drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n",
747 device->resync_locked, 769 device->resync_locked,
748 device->resync->nr_elements, 770 device->resync->nr_elements,
749 device->resync->flags); 771 device->resync->flags);
750 } 772 }
773 return false;
751} 774}
752 775
753void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) 776void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go)
@@ -766,105 +789,112 @@ void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go
766 } 789 }
767} 790}
768 791
769/* clear the bit corresponding to the piece of storage in question: 792/* It is called lazy update, so don't do write-out too often. */
770 * size byte of data starting from sector. Only clear a bits of the affected 793static bool lazy_bitmap_update_due(struct drbd_device *device)
771 * one ore more _aligned_ BM_BLOCK_SIZE blocks.
772 *
773 * called by worker on C_SYNC_TARGET and receiver on SyncSource.
774 *
775 */
776void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size,
777 const char *file, const unsigned int line)
778{ 794{
779 /* Is called from worker and receiver context _only_ */ 795 return time_after(jiffies, device->rs_last_bcast + 2*HZ);
780 unsigned long sbnr, ebnr, lbnr; 796}
781 unsigned long count = 0;
782 sector_t esector, nr_sectors;
783 int wake_up = 0;
784 unsigned long flags;
785 797
786 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { 798static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done)
787 drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", 799{
788 (unsigned long long)sector, size); 800 struct drbd_connection *connection;
801 if (rs_done)
802 set_bit(RS_DONE, &device->flags);
803 /* and also set RS_PROGRESS below */
804 else if (!lazy_bitmap_update_due(device))
789 return; 805 return;
790 }
791 806
792 if (!get_ldev(device)) 807 /* compare with test_and_clear_bit() calls in and above
793 return; /* no disk, no metadata, no bitmap to clear bits in */ 808 * try_update_all_on_disk_bitmaps() from the drbd_worker(). */
794 809 if (test_and_set_bit(RS_PROGRESS, &device->flags))
795 nr_sectors = drbd_get_capacity(device->this_bdev); 810 return;
796 esector = sector + (size >> 9) - 1; 811 connection = first_peer_device(device)->connection;
797 812 if (!test_and_set_bit(CONN_RS_PROGRESS, &connection->flags))
798 if (!expect(sector < nr_sectors)) 813 wake_up(&connection->sender_work.q_wait);
799 goto out; 814}
800 if (!expect(esector < nr_sectors))
801 esector = nr_sectors - 1;
802
803 lbnr = BM_SECT_TO_BIT(nr_sectors-1);
804
805 /* we clear it (in sync).
806 * round up start sector, round down end sector. we make sure we only
807 * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */
808 if (unlikely(esector < BM_SECT_PER_BIT-1))
809 goto out;
810 if (unlikely(esector == (nr_sectors-1)))
811 ebnr = lbnr;
812 else
813 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
814 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
815
816 if (sbnr > ebnr)
817 goto out;
818 815
816static int update_sync_bits(struct drbd_device *device,
817 unsigned long sbnr, unsigned long ebnr,
818 enum update_sync_bits_mode mode)
819{
819 /* 820 /*
820 * ok, (capacity & 7) != 0 sometimes, but who cares... 821 * We keep a count of set bits per resync-extent in the ->rs_left
821 * we count rs_{total,left} in bits, not sectors. 822 * caching member, so we need to loop and work within the resync extent
823 * alignment. Typically this loop will execute exactly once.
822 */ 824 */
823 count = drbd_bm_clear_bits(device, sbnr, ebnr); 825 unsigned long flags;
824 if (count) { 826 unsigned long count = 0;
825 drbd_advance_rs_marks(device, drbd_bm_total_weight(device)); 827 unsigned int cleared = 0;
826 spin_lock_irqsave(&device->al_lock, flags); 828 while (sbnr <= ebnr) {
827 drbd_try_clear_on_disk_bm(device, sector, count, true); 829 /* set temporary boundary bit number to last bit number within
828 spin_unlock_irqrestore(&device->al_lock, flags); 830 * the resync extent of the current start bit number,
829 831 * but cap at provided end bit number */
830 /* just wake_up unconditional now, various lc_chaged(), 832 unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK);
831 * lc_put() in drbd_try_clear_on_disk_bm(). */ 833 unsigned long c;
832 wake_up = 1; 834
835 if (mode == RECORD_RS_FAILED)
836 /* Only called from drbd_rs_failed_io(), bits
837 * supposedly still set. Recount, maybe some
838 * of the bits have been successfully cleared
839 * by application IO meanwhile.
840 */
841 c = drbd_bm_count_bits(device, sbnr, tbnr);
842 else if (mode == SET_IN_SYNC)
843 c = drbd_bm_clear_bits(device, sbnr, tbnr);
844 else /* if (mode == SET_OUT_OF_SYNC) */
845 c = drbd_bm_set_bits(device, sbnr, tbnr);
846
847 if (c) {
848 spin_lock_irqsave(&device->al_lock, flags);
849 cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode);
850 spin_unlock_irqrestore(&device->al_lock, flags);
851 count += c;
852 }
853 sbnr = tbnr + 1;
833 } 854 }
834out: 855 if (count) {
835 put_ldev(device); 856 if (mode == SET_IN_SYNC) {
836 if (wake_up) 857 unsigned long still_to_go = drbd_bm_total_weight(device);
858 bool rs_is_done = (still_to_go <= device->rs_failed);
859 drbd_advance_rs_marks(device, still_to_go);
860 if (cleared || rs_is_done)
861 maybe_schedule_on_disk_bitmap_update(device, rs_is_done);
862 } else if (mode == RECORD_RS_FAILED)
863 device->rs_failed += count;
837 wake_up(&device->al_wait); 864 wake_up(&device->al_wait);
865 }
866 return count;
838} 867}
839 868
840/* 869/* clear the bit corresponding to the piece of storage in question:
841 * this is intended to set one request worth of data out of sync. 870 * size byte of data starting from sector. Only clear a bits of the affected
842 * affects at least 1 bit, 871 * one ore more _aligned_ BM_BLOCK_SIZE blocks.
843 * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits. 872 *
873 * called by worker on C_SYNC_TARGET and receiver on SyncSource.
844 * 874 *
845 * called by tl_clear and drbd_send_dblock (==drbd_make_request).
846 * so this can be _any_ process.
847 */ 875 */
848int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size, 876int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
849 const char *file, const unsigned int line) 877 enum update_sync_bits_mode mode,
878 const char *file, const unsigned int line)
850{ 879{
851 unsigned long sbnr, ebnr, flags; 880 /* Is called from worker and receiver context _only_ */
881 unsigned long sbnr, ebnr, lbnr;
882 unsigned long count = 0;
852 sector_t esector, nr_sectors; 883 sector_t esector, nr_sectors;
853 unsigned int enr, count = 0;
854 struct lc_element *e;
855 884
856 /* this should be an empty REQ_FLUSH */ 885 /* This would be an empty REQ_FLUSH, be silent. */
857 if (size == 0) 886 if ((mode == SET_OUT_OF_SYNC) && size == 0)
858 return 0; 887 return 0;
859 888
860 if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { 889 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
861 drbd_err(device, "sector: %llus, size: %d\n", 890 drbd_err(device, "%s: sector=%llus size=%d nonsense!\n",
862 (unsigned long long)sector, size); 891 drbd_change_sync_fname[mode],
892 (unsigned long long)sector, size);
863 return 0; 893 return 0;
864 } 894 }
865 895
866 if (!get_ldev(device)) 896 if (!get_ldev(device))
867 return 0; /* no disk, no metadata, no bitmap to set bits in */ 897 return 0; /* no disk, no metadata, no bitmap to manipulate bits in */
868 898
869 nr_sectors = drbd_get_capacity(device->this_bdev); 899 nr_sectors = drbd_get_capacity(device->this_bdev);
870 esector = sector + (size >> 9) - 1; 900 esector = sector + (size >> 9) - 1;
@@ -874,25 +904,28 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size
874 if (!expect(esector < nr_sectors)) 904 if (!expect(esector < nr_sectors))
875 esector = nr_sectors - 1; 905 esector = nr_sectors - 1;
876 906
877 /* we set it out of sync, 907 lbnr = BM_SECT_TO_BIT(nr_sectors-1);
878 * we do not need to round anything here */
879 sbnr = BM_SECT_TO_BIT(sector);
880 ebnr = BM_SECT_TO_BIT(esector);
881
882 /* ok, (capacity & 7) != 0 sometimes, but who cares...
883 * we count rs_{total,left} in bits, not sectors. */
884 spin_lock_irqsave(&device->al_lock, flags);
885 count = drbd_bm_set_bits(device, sbnr, ebnr);
886 908
887 enr = BM_SECT_TO_EXT(sector); 909 if (mode == SET_IN_SYNC) {
888 e = lc_find(device->resync, enr); 910 /* Round up start sector, round down end sector. We make sure
889 if (e) 911 * we only clear full, aligned, BM_BLOCK_SIZE blocks. */
890 lc_entry(e, struct bm_extent, lce)->rs_left += count; 912 if (unlikely(esector < BM_SECT_PER_BIT-1))
891 spin_unlock_irqrestore(&device->al_lock, flags); 913 goto out;
914 if (unlikely(esector == (nr_sectors-1)))
915 ebnr = lbnr;
916 else
917 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
918 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
919 } else {
920 /* We set it out of sync, or record resync failure.
921 * Should not round anything here. */
922 sbnr = BM_SECT_TO_BIT(sector);
923 ebnr = BM_SECT_TO_BIT(esector);
924 }
892 925
926 count = update_sync_bits(device, sbnr, ebnr, mode);
893out: 927out:
894 put_ldev(device); 928 put_ldev(device);
895
896 return count; 929 return count;
897} 930}
898 931
@@ -1209,69 +1242,3 @@ int drbd_rs_del_all(struct drbd_device *device)
1209 1242
1210 return 0; 1243 return 0;
1211} 1244}
1212
1213/**
1214 * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks
1215 * @device: DRBD device.
1216 * @sector: The sector number.
1217 * @size: Size of failed IO operation, in byte.
1218 */
1219void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size)
1220{
1221 /* Is called from worker and receiver context _only_ */
1222 unsigned long sbnr, ebnr, lbnr;
1223 unsigned long count;
1224 sector_t esector, nr_sectors;
1225 int wake_up = 0;
1226
1227 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
1228 drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
1229 (unsigned long long)sector, size);
1230 return;
1231 }
1232 nr_sectors = drbd_get_capacity(device->this_bdev);
1233 esector = sector + (size >> 9) - 1;
1234
1235 if (!expect(sector < nr_sectors))
1236 return;
1237 if (!expect(esector < nr_sectors))
1238 esector = nr_sectors - 1;
1239
1240 lbnr = BM_SECT_TO_BIT(nr_sectors-1);
1241
1242 /*
1243 * round up start sector, round down end sector. we make sure we only
1244 * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */
1245 if (unlikely(esector < BM_SECT_PER_BIT-1))
1246 return;
1247 if (unlikely(esector == (nr_sectors-1)))
1248 ebnr = lbnr;
1249 else
1250 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
1251 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
1252
1253 if (sbnr > ebnr)
1254 return;
1255
1256 /*
1257 * ok, (capacity & 7) != 0 sometimes, but who cares...
1258 * we count rs_{total,left} in bits, not sectors.
1259 */
1260 spin_lock_irq(&device->al_lock);
1261 count = drbd_bm_count_bits(device, sbnr, ebnr);
1262 if (count) {
1263 device->rs_failed += count;
1264
1265 if (get_ldev(device)) {
1266 drbd_try_clear_on_disk_bm(device, sector, count, false);
1267 put_ldev(device);
1268 }
1269
1270 /* just wake_up unconditional now, various lc_chaged(),
1271 * lc_put() in drbd_try_clear_on_disk_bm(). */
1272 wake_up = 1;
1273 }
1274 spin_unlock_irq(&device->al_lock);
1275 if (wake_up)
1276 wake_up(&device->al_wait);
1277}
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index eb002a7656af..a16f9ae3c98a 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -432,7 +432,11 @@ enum {
432 * goes into C_CONNECTED state. */ 432 * goes into C_CONNECTED state. */
433 CONSIDER_RESYNC, 433 CONSIDER_RESYNC,
434 434
435 RS_PROGRESS, /* tell worker that resync made significant progress */
436 RS_DONE, /* tell worker that resync is done */
437
435 MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ 438 MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
439
436 SUSPEND_IO, /* suspend application io */ 440 SUSPEND_IO, /* suspend application io */
437 BITMAP_IO, /* suspend application io; 441 BITMAP_IO, /* suspend application io;
438 once no more io in flight, start bitmap io */ 442 once no more io in flight, start bitmap io */
@@ -577,6 +581,7 @@ enum {
577 * and potentially deadlock on, this drbd worker. 581 * and potentially deadlock on, this drbd worker.
578 */ 582 */
579 DISCONNECT_SENT, 583 DISCONNECT_SENT,
584 CONN_RS_PROGRESS, /* tell worker that resync made significant progress */
580}; 585};
581 586
582struct drbd_resource { 587struct drbd_resource {
@@ -1106,17 +1111,21 @@ struct bm_extent {
1106/* in which _bitmap_ extent (resp. sector) the bit for a certain 1111/* in which _bitmap_ extent (resp. sector) the bit for a certain
1107 * _storage_ sector is located in */ 1112 * _storage_ sector is located in */
1108#define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9)) 1113#define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9))
1114#define BM_BIT_TO_EXT(x) ((x) >> (BM_EXT_SHIFT - BM_BLOCK_SHIFT))
1109 1115
1110/* how much _storage_ sectors we have per bitmap sector */ 1116/* first storage sector a bitmap extent corresponds to */
1111#define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9)) 1117#define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9))
1118/* how much _storage_ sectors we have per bitmap extent */
1112#define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) 1119#define BM_SECT_PER_EXT BM_EXT_TO_SECT(1)
1120/* how many bits are covered by one bitmap extent (resync extent) */
1121#define BM_BITS_PER_EXT (1UL << (BM_EXT_SHIFT - BM_BLOCK_SHIFT))
1122
1123#define BM_BLOCKS_PER_BM_EXT_MASK (BM_BITS_PER_EXT - 1)
1124
1113 1125
1114/* in one sector of the bitmap, we have this many activity_log extents. */ 1126/* in one sector of the bitmap, we have this many activity_log extents. */
1115#define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) 1127#define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT))
1116 1128
1117#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT)
1118#define BM_BLOCKS_PER_BM_EXT_MASK ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1)
1119
1120/* the extent in "PER_EXTENT" below is an activity log extent 1129/* the extent in "PER_EXTENT" below is an activity log extent
1121 * we need that many (long words/bytes) to store the bitmap 1130 * we need that many (long words/bytes) to store the bitmap
1122 * of one AL_EXTENT_SIZE chunk of storage. 1131 * of one AL_EXTENT_SIZE chunk of storage.
@@ -1214,7 +1223,6 @@ extern unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned lon
1214extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo); 1223extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo);
1215extern unsigned long _drbd_bm_total_weight(struct drbd_device *device); 1224extern unsigned long _drbd_bm_total_weight(struct drbd_device *device);
1216extern unsigned long drbd_bm_total_weight(struct drbd_device *device); 1225extern unsigned long drbd_bm_total_weight(struct drbd_device *device);
1217extern int drbd_bm_rs_done(struct drbd_device *device);
1218/* for receive_bitmap */ 1226/* for receive_bitmap */
1219extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, 1227extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset,
1220 size_t number, unsigned long *buffer); 1228 size_t number, unsigned long *buffer);
@@ -1503,14 +1511,17 @@ extern int drbd_rs_del_all(struct drbd_device *device);
1503extern void drbd_rs_failed_io(struct drbd_device *device, 1511extern void drbd_rs_failed_io(struct drbd_device *device,
1504 sector_t sector, int size); 1512 sector_t sector, int size);
1505extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go); 1513extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go);
1506extern void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, 1514
1507 int size, const char *file, const unsigned int line); 1515enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC };
1516extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
1517 enum update_sync_bits_mode mode,
1518 const char *file, const unsigned int line);
1508#define drbd_set_in_sync(device, sector, size) \ 1519#define drbd_set_in_sync(device, sector, size) \
1509 __drbd_set_in_sync(device, sector, size, __FILE__, __LINE__) 1520 __drbd_change_sync(device, sector, size, SET_IN_SYNC, __FILE__, __LINE__)
1510extern int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector,
1511 int size, const char *file, const unsigned int line);
1512#define drbd_set_out_of_sync(device, sector, size) \ 1521#define drbd_set_out_of_sync(device, sector, size) \
1513 __drbd_set_out_of_sync(device, sector, size, __FILE__, __LINE__) 1522 __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC, __FILE__, __LINE__)
1523#define drbd_rs_failed_io(device, sector, size) \
1524 __drbd_change_sync(device, sector, size, RECORD_RS_FAILED, __FILE__, __LINE__)
1514extern void drbd_al_shrink(struct drbd_device *device); 1525extern void drbd_al_shrink(struct drbd_device *device);
1515extern int drbd_initialize_al(struct drbd_device *, void *); 1526extern int drbd_initialize_al(struct drbd_device *, void *);
1516 1527
@@ -1915,6 +1926,15 @@ static inline void _sub_unacked(struct drbd_device *device, int n, const char *f
1915 ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); 1926 ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
1916} 1927}
1917 1928
1929static inline bool is_sync_state(enum drbd_conns connection_state)
1930{
1931 return
1932 (connection_state == C_SYNC_SOURCE
1933 || connection_state == C_SYNC_TARGET
1934 || connection_state == C_PAUSED_SYNC_S
1935 || connection_state == C_PAUSED_SYNC_T);
1936}
1937
1918/** 1938/**
1919 * get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev 1939 * get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev
1920 * @M: DRBD device. 1940 * @M: DRBD device.
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 19da7c7590cd..1bddd6cf8ac7 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1011,6 +1011,9 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
1011 atomic_inc(&device->local_cnt); 1011 atomic_inc(&device->local_cnt);
1012 1012
1013 did_remote = drbd_should_do_remote(device->state); 1013 did_remote = drbd_should_do_remote(device->state);
1014 if (!is_sync_state(os.conn) && is_sync_state(ns.conn))
1015 clear_bit(RS_DONE, &device->flags);
1016
1014 device->state.i = ns.i; 1017 device->state.i = ns.i;
1015 should_do_remote = drbd_should_do_remote(device->state); 1018 should_do_remote = drbd_should_do_remote(device->state);
1016 device->resource->susp = ns.susp; 1019 device->resource->susp = ns.susp;
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 47bc84017b5b..bafb62eb22c9 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1740,11 +1740,20 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1740 device->rs_mark_time[i] = now; 1740 device->rs_mark_time[i] = now;
1741 } 1741 }
1742 _drbd_pause_after(device); 1742 _drbd_pause_after(device);
1743 /* Forget potentially stale cached per resync extent bit-counts.
1744 * Open coded drbd_rs_cancel_all(device), we already have IRQs
1745 * disabled, and know the disk state is ok. */
1746 spin_lock(&device->al_lock);
1747 lc_reset(device->resync);
1748 device->resync_locked = 0;
1749 device->resync_wenr = LC_FREE;
1750 spin_unlock(&device->al_lock);
1743 } 1751 }
1744 write_unlock(&global_state_lock); 1752 write_unlock(&global_state_lock);
1745 spin_unlock_irq(&device->resource->req_lock); 1753 spin_unlock_irq(&device->resource->req_lock);
1746 1754
1747 if (r == SS_SUCCESS) { 1755 if (r == SS_SUCCESS) {
1756 wake_up(&device->al_wait); /* for lc_reset() above */
1748 /* reset rs_last_bcast when a resync or verify is started, 1757 /* reset rs_last_bcast when a resync or verify is started,
1749 * to deal with potential jiffies wrap. */ 1758 * to deal with potential jiffies wrap. */
1750 device->rs_last_bcast = jiffies - HZ; 1759 device->rs_last_bcast = jiffies - HZ;
@@ -1807,36 +1816,22 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1807static void update_on_disk_bitmap(struct drbd_device *device) 1816static void update_on_disk_bitmap(struct drbd_device *device)
1808{ 1817{
1809 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; 1818 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
1819 bool resync_done = test_and_clear_bit(RS_DONE, &device->flags);
1810 device->rs_last_bcast = jiffies; 1820 device->rs_last_bcast = jiffies;
1811 1821
1812 if (!get_ldev(device)) 1822 if (!get_ldev(device))
1813 return; 1823 return;
1814 1824
1815 drbd_bm_write_lazy(device, 0); 1825 drbd_bm_write_lazy(device, 0);
1816 if (drbd_bm_total_weight(device) <= device->rs_failed) 1826 if (resync_done && is_sync_state(device->state.conn))
1817 drbd_resync_finished(device); 1827 drbd_resync_finished(device);
1828
1818 drbd_bcast_event(device, &sib); 1829 drbd_bcast_event(device, &sib);
1819 /* update timestamp, in case it took a while to write out stuff */ 1830 /* update timestamp, in case it took a while to write out stuff */
1820 device->rs_last_bcast = jiffies; 1831 device->rs_last_bcast = jiffies;
1821 put_ldev(device); 1832 put_ldev(device);
1822} 1833}
1823 1834
1824bool wants_lazy_bitmap_update(struct drbd_device *device)
1825{
1826 enum drbd_conns connection_state = device->state.conn;
1827 return
1828 /* only do a lazy writeout, if device is in some resync state */
1829 (connection_state == C_SYNC_SOURCE
1830 || connection_state == C_SYNC_TARGET
1831 || connection_state == C_PAUSED_SYNC_S
1832 || connection_state == C_PAUSED_SYNC_T) &&
1833 /* AND
1834 * either we just finished, or the last lazy update
1835 * was some time ago already. */
1836 (drbd_bm_total_weight(device) <= device->rs_failed
1837 || time_after(jiffies, device->rs_last_bcast + 2*HZ));
1838}
1839
1840static void try_update_all_on_disk_bitmaps(struct drbd_connection *connection) 1835static void try_update_all_on_disk_bitmaps(struct drbd_connection *connection)
1841{ 1836{
1842 struct drbd_peer_device *peer_device; 1837 struct drbd_peer_device *peer_device;
@@ -1845,8 +1840,9 @@ static void try_update_all_on_disk_bitmaps(struct drbd_connection *connection)
1845 rcu_read_lock(); 1840 rcu_read_lock();
1846 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 1841 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1847 struct drbd_device *device = peer_device->device; 1842 struct drbd_device *device = peer_device->device;
1848 if (!wants_lazy_bitmap_update(device)) 1843 if (!test_and_clear_bit(RS_PROGRESS, &device->flags))
1849 continue; 1844 continue;
1845
1850 kref_get(&device->kref); 1846 kref_get(&device->kref);
1851 rcu_read_unlock(); 1847 rcu_read_unlock();
1852 update_on_disk_bitmap(device); 1848 update_on_disk_bitmap(device);
@@ -1930,15 +1926,18 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
1930 if (send_barrier) 1926 if (send_barrier)
1931 maybe_send_barrier(connection, 1927 maybe_send_barrier(connection,
1932 connection->send.current_epoch_nr + 1); 1928 connection->send.current_epoch_nr + 1);
1929
1930 if (test_bit(CONN_RS_PROGRESS, &connection->flags))
1931 break;
1932
1933 /* drbd_send() may have called flush_signals() */ 1933 /* drbd_send() may have called flush_signals() */
1934 if (get_t_state(&connection->worker) != RUNNING) 1934 if (get_t_state(&connection->worker) != RUNNING)
1935 break; 1935 break;
1936
1936 schedule(); 1937 schedule();
1937 /* may be woken up for other things but new work, too, 1938 /* may be woken up for other things but new work, too,
1938 * e.g. if the current epoch got closed. 1939 * e.g. if the current epoch got closed.
1939 * In which case we send the barrier above. */ 1940 * In which case we send the barrier above. */
1940
1941 try_update_all_on_disk_bitmaps(connection);
1942 } 1941 }
1943 finish_wait(&connection->sender_work.q_wait, &wait); 1942 finish_wait(&connection->sender_work.q_wait, &wait);
1944 1943
@@ -1973,6 +1972,9 @@ int drbd_worker(struct drbd_thread *thi)
1973 if (list_empty(&work_list)) 1972 if (list_empty(&work_list))
1974 wait_for_work(connection, &work_list); 1973 wait_for_work(connection, &work_list);
1975 1974
1975 if (test_and_clear_bit(CONN_RS_PROGRESS, &connection->flags))
1976 try_update_all_on_disk_bitmaps(connection);
1977
1976 if (signal_pending(current)) { 1978 if (signal_pending(current)) {
1977 flush_signals(current); 1979 flush_signals(current);
1978 if (get_t_state(thi) == RUNNING) { 1980 if (get_t_state(thi) == RUNNING) {