aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2012-07-26 08:09:49 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-11-09 08:05:32 -0500
commit58ffa580a748dd16b1e5ab260bea39cdbd1e94ef (patch)
tree4155bcde6d49f50545bcb1312b6470c64e823d72 /drivers/block/drbd
parent970fbde1f1ebae0c85bbaed3de83684a58d60fad (diff)
drbd: introduce stop-sector to online verify
We now can schedule only a specific range of sectors for online verify, or interrupt a running verify without interrupting the connection. Had to bump the protocol version differently, we are now 101. Added verify_can_do_stop_sector() { protocol >= 97 && protocol != 100; } Also, the return value convention for worker callbacks has changed, we returned "true/false" for "keep the connection up" in 8.3, we return 0 for success and <= for failure in 8.4. Affected: receive_state() Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r--drivers/block/drbd/drbd_int.h7
-rw-r--r--drivers/block/drbd/drbd_nl.c14
-rw-r--r--drivers/block/drbd/drbd_proc.c12
-rw-r--r--drivers/block/drbd/drbd_receiver.c10
-rw-r--r--drivers/block/drbd/drbd_state.c17
-rw-r--r--drivers/block/drbd/drbd_worker.c33
6 files changed, 74 insertions, 19 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 22adfc7189de..eddc4388a1b1 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -971,6 +971,7 @@ struct drbd_conf {
971 971
972 /* where does the admin want us to start? (sector) */ 972 /* where does the admin want us to start? (sector) */
973 sector_t ov_start_sector; 973 sector_t ov_start_sector;
974 sector_t ov_stop_sector;
974 /* where are we now? (sector) */ 975 /* where are we now? (sector) */
975 sector_t ov_position; 976 sector_t ov_position;
976 /* Start sector of out of sync range (to merge printk reporting). */ 977 /* Start sector of out of sync range (to merge printk reporting). */
@@ -2264,6 +2265,12 @@ static inline void dec_ap_bio(struct drbd_conf *mdev)
2264 wake_up(&mdev->misc_wait); 2265 wake_up(&mdev->misc_wait);
2265} 2266}
2266 2267
2268static inline bool verify_can_do_stop_sector(struct drbd_conf *mdev)
2269{
2270 return mdev->tconn->agreed_pro_version >= 97 &&
2271 mdev->tconn->agreed_pro_version != 100;
2272}
2273
2267static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) 2274static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
2268{ 2275{
2269 int changed = mdev->ed_uuid != val; 2276 int changed = mdev->ed_uuid != val;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 4afd626ca3dc..eefb56308aea 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2939,6 +2939,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
2939{ 2939{
2940 struct drbd_conf *mdev; 2940 struct drbd_conf *mdev;
2941 enum drbd_ret_code retcode; 2941 enum drbd_ret_code retcode;
2942 struct start_ov_parms parms;
2942 2943
2943 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 2944 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2944 if (!adm_ctx.reply_skb) 2945 if (!adm_ctx.reply_skb)
@@ -2947,19 +2948,22 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
2947 goto out; 2948 goto out;
2948 2949
2949 mdev = adm_ctx.mdev; 2950 mdev = adm_ctx.mdev;
2951
2952 /* resume from last known position, if possible */
2953 parms.ov_start_sector = mdev->ov_start_sector;
2954 parms.ov_stop_sector = ULLONG_MAX;
2950 if (info->attrs[DRBD_NLA_START_OV_PARMS]) { 2955 if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
2951 /* resume from last known position, if possible */
2952 struct start_ov_parms parms =
2953 { .ov_start_sector = mdev->ov_start_sector };
2954 int err = start_ov_parms_from_attrs(&parms, info); 2956 int err = start_ov_parms_from_attrs(&parms, info);
2955 if (err) { 2957 if (err) {
2956 retcode = ERR_MANDATORY_TAG; 2958 retcode = ERR_MANDATORY_TAG;
2957 drbd_msg_put_info(from_attrs_err_to_txt(err)); 2959 drbd_msg_put_info(from_attrs_err_to_txt(err));
2958 goto out; 2960 goto out;
2959 } 2961 }
2960 /* w_make_ov_request expects position to be aligned */
2961 mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT;
2962 } 2962 }
2963 /* w_make_ov_request expects position to be aligned */
2964 mdev->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
2965 mdev->ov_stop_sector = parms.ov_stop_sector;
2966
2963 /* If there is still bitmap IO pending, e.g. previous resync or verify 2967 /* If there is still bitmap IO pending, e.g. previous resync or verify
2964 * just being finished, wait for it before requesting a new resync. */ 2968 * just being finished, wait for it before requesting a new resync. */
2965 drbd_suspend_io(mdev); 2969 drbd_suspend_io(mdev);
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index e0f0d2a6d538..56672a61eb94 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -167,18 +167,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
167 * we convert to sectors in the display below. */ 167 * we convert to sectors in the display below. */
168 unsigned long bm_bits = drbd_bm_bits(mdev); 168 unsigned long bm_bits = drbd_bm_bits(mdev);
169 unsigned long bit_pos; 169 unsigned long bit_pos;
170 unsigned long long stop_sector = 0;
170 if (mdev->state.conn == C_VERIFY_S || 171 if (mdev->state.conn == C_VERIFY_S ||
171 mdev->state.conn == C_VERIFY_T) 172 mdev->state.conn == C_VERIFY_T) {
172 bit_pos = bm_bits - mdev->ov_left; 173 bit_pos = bm_bits - mdev->ov_left;
173 else 174 if (verify_can_do_stop_sector(mdev))
175 stop_sector = mdev->ov_stop_sector;
176 } else
174 bit_pos = mdev->bm_resync_fo; 177 bit_pos = mdev->bm_resync_fo;
175 /* Total sectors may be slightly off for oddly 178 /* Total sectors may be slightly off for oddly
176 * sized devices. So what. */ 179 * sized devices. So what. */
177 seq_printf(seq, 180 seq_printf(seq,
178 "\t%3d%% sector pos: %llu/%llu\n", 181 "\t%3d%% sector pos: %llu/%llu",
179 (int)(bit_pos / (bm_bits/100+1)), 182 (int)(bit_pos / (bm_bits/100+1)),
180 (unsigned long long)bit_pos * BM_SECT_PER_BIT, 183 (unsigned long long)bit_pos * BM_SECT_PER_BIT,
181 (unsigned long long)bm_bits * BM_SECT_PER_BIT); 184 (unsigned long long)bm_bits * BM_SECT_PER_BIT);
185 if (stop_sector != 0 && stop_sector != ULLONG_MAX)
186 seq_printf(seq, " stop sector: %llu", stop_sector);
187 seq_printf(seq, "\n");
182 } 188 }
183} 189}
184 190
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 7fe6b01618d4..8fddec96dfbe 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3843,7 +3843,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
3843 * already decided to close the connection again, 3843 * already decided to close the connection again,
3844 * we must not "re-establish" it here. */ 3844 * we must not "re-establish" it here. */
3845 if (os.conn <= C_TEAR_DOWN) 3845 if (os.conn <= C_TEAR_DOWN)
3846 return false; 3846 return -ECONNRESET;
3847 3847
3848 /* If this is the "end of sync" confirmation, usually the peer disk 3848 /* If this is the "end of sync" confirmation, usually the peer disk
3849 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits 3849 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
@@ -3875,6 +3875,14 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
3875 } 3875 }
3876 } 3876 }
3877 3877
3878 /* explicit verify finished notification, stop sector reached. */
3879 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3880 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
3881 ov_out_of_sync_print(mdev);
3882 drbd_resync_finished(mdev);
3883 return 0;
3884 }
3885
3878 /* peer says his disk is inconsistent, while we think it is uptodate, 3886 /* peer says his disk is inconsistent, while we think it is uptodate,
3879 * and this happens while the peer still thinks we have a sync going on, 3887 * and this happens while the peer still thinks we have a sync going on,
3880 * but we think we are already done with the sync. 3888 * but we think we are already done with the sync.
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 444581828d70..12f2b4fbe559 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -975,13 +975,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
975 wake_up(&mdev->state_wait); 975 wake_up(&mdev->state_wait);
976 wake_up(&mdev->tconn->ping_wait); 976 wake_up(&mdev->tconn->ping_wait);
977 977
978 /* aborted verify run. log the last position */ 978 /* Aborted verify run, or we reached the stop sector.
979 * Log the last position, unless end-of-device. */
979 if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && 980 if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
980 ns.conn < C_CONNECTED) { 981 ns.conn <= C_CONNECTED) {
981 mdev->ov_start_sector = 982 mdev->ov_start_sector =
982 BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); 983 BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
983 dev_info(DEV, "Online Verify reached sector %llu\n", 984 if (mdev->ov_left)
984 (unsigned long long)mdev->ov_start_sector); 985 dev_info(DEV, "Online Verify reached sector %llu\n",
986 (unsigned long long)mdev->ov_start_sector);
985 } 987 }
986 988
987 if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && 989 if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
@@ -1422,6 +1424,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1422 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) 1424 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
1423 drbd_send_state(mdev, ns); 1425 drbd_send_state(mdev, ns);
1424 1426
1427 /* Verify finished, or reached stop sector. Peer did not know about
1428 * the stop sector, and we may even have changed the stop sector during
1429 * verify to interrupt/stop early. Send the new state. */
1430 if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
1431 && verify_can_do_stop_sector(mdev))
1432 drbd_send_state(mdev, ns);
1433
1425 /* Wake up role changes, that were delayed because of connection establishing */ 1434 /* Wake up role changes, that were delayed because of connection establishing */
1426 if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { 1435 if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) {
1427 if (test_and_clear_bit(STATE_SENT, &mdev->tconn->flags)) 1436 if (test_and_clear_bit(STATE_SENT, &mdev->tconn->flags))
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 9d7e1fb0f431..1c9c6fd332c3 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -692,6 +692,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel)
692 int number, i, size; 692 int number, i, size;
693 sector_t sector; 693 sector_t sector;
694 const sector_t capacity = drbd_get_capacity(mdev->this_bdev); 694 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
695 bool stop_sector_reached = false;
695 696
696 if (unlikely(cancel)) 697 if (unlikely(cancel))
697 return 1; 698 return 1;
@@ -700,9 +701,17 @@ static int w_make_ov_request(struct drbd_work *w, int cancel)
700 701
701 sector = mdev->ov_position; 702 sector = mdev->ov_position;
702 for (i = 0; i < number; i++) { 703 for (i = 0; i < number; i++) {
703 if (sector >= capacity) { 704 if (sector >= capacity)
704 return 1; 705 return 1;
705 } 706
707 /* We check for "finished" only in the reply path:
708 * w_e_end_ov_reply().
709 * We need to send at least one request out. */
710 stop_sector_reached = i > 0
711 && verify_can_do_stop_sector(mdev)
712 && sector >= mdev->ov_stop_sector;
713 if (stop_sector_reached)
714 break;
706 715
707 size = BM_BLOCK_SIZE; 716 size = BM_BLOCK_SIZE;
708 717
@@ -726,7 +735,8 @@ static int w_make_ov_request(struct drbd_work *w, int cancel)
726 735
727 requeue: 736 requeue:
728 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); 737 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
729 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); 738 if (i == 0 || !stop_sector_reached)
739 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
730 return 1; 740 return 1;
731} 741}
732 742
@@ -792,7 +802,12 @@ int drbd_resync_finished(struct drbd_conf *mdev)
792 dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; 802 dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
793 if (dt <= 0) 803 if (dt <= 0)
794 dt = 1; 804 dt = 1;
805
795 db = mdev->rs_total; 806 db = mdev->rs_total;
807 /* adjust for verify start and stop sectors, respective reached position */
808 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
809 db -= mdev->ov_left;
810
796 dbdt = Bit2KB(db/dt); 811 dbdt = Bit2KB(db/dt);
797 mdev->rs_paused /= HZ; 812 mdev->rs_paused /= HZ;
798 813
@@ -815,7 +830,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
815 ns.conn = C_CONNECTED; 830 ns.conn = C_CONNECTED;
816 831
817 dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", 832 dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
818 verify_done ? "Online verify " : "Resync", 833 verify_done ? "Online verify" : "Resync",
819 dt + mdev->rs_paused, mdev->rs_paused, dbdt); 834 dt + mdev->rs_paused, mdev->rs_paused, dbdt);
820 835
821 n_oos = drbd_bm_total_weight(mdev); 836 n_oos = drbd_bm_total_weight(mdev);
@@ -896,7 +911,9 @@ out:
896 mdev->rs_total = 0; 911 mdev->rs_total = 0;
897 mdev->rs_failed = 0; 912 mdev->rs_failed = 0;
898 mdev->rs_paused = 0; 913 mdev->rs_paused = 0;
899 if (verify_done) 914
915 /* reset start sector, if we reached end of device */
916 if (verify_done && mdev->ov_left == 0)
900 mdev->ov_start_sector = 0; 917 mdev->ov_start_sector = 0;
901 918
902 drbd_md_sync(mdev); 919 drbd_md_sync(mdev);
@@ -1144,6 +1161,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
1144 unsigned int size = peer_req->i.size; 1161 unsigned int size = peer_req->i.size;
1145 int digest_size; 1162 int digest_size;
1146 int err, eq = 0; 1163 int err, eq = 0;
1164 bool stop_sector_reached = false;
1147 1165
1148 if (unlikely(cancel)) { 1166 if (unlikely(cancel)) {
1149 drbd_free_peer_req(mdev, peer_req); 1167 drbd_free_peer_req(mdev, peer_req);
@@ -1194,7 +1212,10 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
1194 if ((mdev->ov_left & 0x200) == 0x200) 1212 if ((mdev->ov_left & 0x200) == 0x200)
1195 drbd_advance_rs_marks(mdev, mdev->ov_left); 1213 drbd_advance_rs_marks(mdev, mdev->ov_left);
1196 1214
1197 if (mdev->ov_left == 0) { 1215 stop_sector_reached = verify_can_do_stop_sector(mdev) &&
1216 (sector + (size>>9)) >= mdev->ov_stop_sector;
1217
1218 if (mdev->ov_left == 0 || stop_sector_reached) {
1198 ov_out_of_sync_print(mdev); 1219 ov_out_of_sync_print(mdev);
1199 drbd_resync_finished(mdev); 1220 drbd_resync_finished(mdev);
1200 } 1221 }