diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2012-07-26 08:09:49 -0400 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2012-11-09 08:05:32 -0500 |
commit | 58ffa580a748dd16b1e5ab260bea39cdbd1e94ef (patch) | |
tree | 4155bcde6d49f50545bcb1312b6470c64e823d72 /drivers/block/drbd | |
parent | 970fbde1f1ebae0c85bbaed3de83684a58d60fad (diff) |
drbd: introduce stop-sector to online verify
We now can schedule only a specific range of sectors for online verify,
or interrupt a running verify without interrupting the connection.
Had to bump the protocol version differently, we are now 101.
Added verify_can_do_stop_sector() { protocol >= 97 && protocol != 100; }
Also, the return value convention for worker callbacks has changed,
we returned "true/false" for "keep the connection up" in 8.3,
we return 0 for success and <= for failure in 8.4.
Affected: receive_state()
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 7 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 14 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_proc.c | 12 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 10 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 17 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 33 |
6 files changed, 74 insertions, 19 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 22adfc7189de..eddc4388a1b1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -971,6 +971,7 @@ struct drbd_conf { | |||
971 | 971 | ||
972 | /* where does the admin want us to start? (sector) */ | 972 | /* where does the admin want us to start? (sector) */ |
973 | sector_t ov_start_sector; | 973 | sector_t ov_start_sector; |
974 | sector_t ov_stop_sector; | ||
974 | /* where are we now? (sector) */ | 975 | /* where are we now? (sector) */ |
975 | sector_t ov_position; | 976 | sector_t ov_position; |
976 | /* Start sector of out of sync range (to merge printk reporting). */ | 977 | /* Start sector of out of sync range (to merge printk reporting). */ |
@@ -2264,6 +2265,12 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) | |||
2264 | wake_up(&mdev->misc_wait); | 2265 | wake_up(&mdev->misc_wait); |
2265 | } | 2266 | } |
2266 | 2267 | ||
2268 | static inline bool verify_can_do_stop_sector(struct drbd_conf *mdev) | ||
2269 | { | ||
2270 | return mdev->tconn->agreed_pro_version >= 97 && | ||
2271 | mdev->tconn->agreed_pro_version != 100; | ||
2272 | } | ||
2273 | |||
2267 | static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) | 2274 | static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) |
2268 | { | 2275 | { |
2269 | int changed = mdev->ed_uuid != val; | 2276 | int changed = mdev->ed_uuid != val; |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 4afd626ca3dc..eefb56308aea 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -2939,6 +2939,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) | |||
2939 | { | 2939 | { |
2940 | struct drbd_conf *mdev; | 2940 | struct drbd_conf *mdev; |
2941 | enum drbd_ret_code retcode; | 2941 | enum drbd_ret_code retcode; |
2942 | struct start_ov_parms parms; | ||
2942 | 2943 | ||
2943 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 2944 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); |
2944 | if (!adm_ctx.reply_skb) | 2945 | if (!adm_ctx.reply_skb) |
@@ -2947,19 +2948,22 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) | |||
2947 | goto out; | 2948 | goto out; |
2948 | 2949 | ||
2949 | mdev = adm_ctx.mdev; | 2950 | mdev = adm_ctx.mdev; |
2951 | |||
2952 | /* resume from last known position, if possible */ | ||
2953 | parms.ov_start_sector = mdev->ov_start_sector; | ||
2954 | parms.ov_stop_sector = ULLONG_MAX; | ||
2950 | if (info->attrs[DRBD_NLA_START_OV_PARMS]) { | 2955 | if (info->attrs[DRBD_NLA_START_OV_PARMS]) { |
2951 | /* resume from last known position, if possible */ | ||
2952 | struct start_ov_parms parms = | ||
2953 | { .ov_start_sector = mdev->ov_start_sector }; | ||
2954 | int err = start_ov_parms_from_attrs(&parms, info); | 2956 | int err = start_ov_parms_from_attrs(&parms, info); |
2955 | if (err) { | 2957 | if (err) { |
2956 | retcode = ERR_MANDATORY_TAG; | 2958 | retcode = ERR_MANDATORY_TAG; |
2957 | drbd_msg_put_info(from_attrs_err_to_txt(err)); | 2959 | drbd_msg_put_info(from_attrs_err_to_txt(err)); |
2958 | goto out; | 2960 | goto out; |
2959 | } | 2961 | } |
2960 | /* w_make_ov_request expects position to be aligned */ | ||
2961 | mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT; | ||
2962 | } | 2962 | } |
2963 | /* w_make_ov_request expects position to be aligned */ | ||
2964 | mdev->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1); | ||
2965 | mdev->ov_stop_sector = parms.ov_stop_sector; | ||
2966 | |||
2963 | /* If there is still bitmap IO pending, e.g. previous resync or verify | 2967 | /* If there is still bitmap IO pending, e.g. previous resync or verify |
2964 | * just being finished, wait for it before requesting a new resync. */ | 2968 | * just being finished, wait for it before requesting a new resync. */ |
2965 | drbd_suspend_io(mdev); | 2969 | drbd_suspend_io(mdev); |
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index e0f0d2a6d538..56672a61eb94 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c | |||
@@ -167,18 +167,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) | |||
167 | * we convert to sectors in the display below. */ | 167 | * we convert to sectors in the display below. */ |
168 | unsigned long bm_bits = drbd_bm_bits(mdev); | 168 | unsigned long bm_bits = drbd_bm_bits(mdev); |
169 | unsigned long bit_pos; | 169 | unsigned long bit_pos; |
170 | unsigned long long stop_sector = 0; | ||
170 | if (mdev->state.conn == C_VERIFY_S || | 171 | if (mdev->state.conn == C_VERIFY_S || |
171 | mdev->state.conn == C_VERIFY_T) | 172 | mdev->state.conn == C_VERIFY_T) { |
172 | bit_pos = bm_bits - mdev->ov_left; | 173 | bit_pos = bm_bits - mdev->ov_left; |
173 | else | 174 | if (verify_can_do_stop_sector(mdev)) |
175 | stop_sector = mdev->ov_stop_sector; | ||
176 | } else | ||
174 | bit_pos = mdev->bm_resync_fo; | 177 | bit_pos = mdev->bm_resync_fo; |
175 | /* Total sectors may be slightly off for oddly | 178 | /* Total sectors may be slightly off for oddly |
176 | * sized devices. So what. */ | 179 | * sized devices. So what. */ |
177 | seq_printf(seq, | 180 | seq_printf(seq, |
178 | "\t%3d%% sector pos: %llu/%llu\n", | 181 | "\t%3d%% sector pos: %llu/%llu", |
179 | (int)(bit_pos / (bm_bits/100+1)), | 182 | (int)(bit_pos / (bm_bits/100+1)), |
180 | (unsigned long long)bit_pos * BM_SECT_PER_BIT, | 183 | (unsigned long long)bit_pos * BM_SECT_PER_BIT, |
181 | (unsigned long long)bm_bits * BM_SECT_PER_BIT); | 184 | (unsigned long long)bm_bits * BM_SECT_PER_BIT); |
185 | if (stop_sector != 0 && stop_sector != ULLONG_MAX) | ||
186 | seq_printf(seq, " stop sector: %llu", stop_sector); | ||
187 | seq_printf(seq, "\n"); | ||
182 | } | 188 | } |
183 | } | 189 | } |
184 | 190 | ||
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7fe6b01618d4..8fddec96dfbe 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -3843,7 +3843,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) | |||
3843 | * already decided to close the connection again, | 3843 | * already decided to close the connection again, |
3844 | * we must not "re-establish" it here. */ | 3844 | * we must not "re-establish" it here. */ |
3845 | if (os.conn <= C_TEAR_DOWN) | 3845 | if (os.conn <= C_TEAR_DOWN) |
3846 | return false; | 3846 | return -ECONNRESET; |
3847 | 3847 | ||
3848 | /* If this is the "end of sync" confirmation, usually the peer disk | 3848 | /* If this is the "end of sync" confirmation, usually the peer disk |
3849 | * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits | 3849 | * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits |
@@ -3875,6 +3875,14 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) | |||
3875 | } | 3875 | } |
3876 | } | 3876 | } |
3877 | 3877 | ||
3878 | /* explicit verify finished notification, stop sector reached. */ | ||
3879 | if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && | ||
3880 | peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { | ||
3881 | ov_out_of_sync_print(mdev); | ||
3882 | drbd_resync_finished(mdev); | ||
3883 | return 0; | ||
3884 | } | ||
3885 | |||
3878 | /* peer says his disk is inconsistent, while we think it is uptodate, | 3886 | /* peer says his disk is inconsistent, while we think it is uptodate, |
3879 | * and this happens while the peer still thinks we have a sync going on, | 3887 | * and this happens while the peer still thinks we have a sync going on, |
3880 | * but we think we are already done with the sync. | 3888 | * but we think we are already done with the sync. |
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 444581828d70..12f2b4fbe559 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c | |||
@@ -975,13 +975,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, | |||
975 | wake_up(&mdev->state_wait); | 975 | wake_up(&mdev->state_wait); |
976 | wake_up(&mdev->tconn->ping_wait); | 976 | wake_up(&mdev->tconn->ping_wait); |
977 | 977 | ||
978 | /* aborted verify run. log the last position */ | 978 | /* Aborted verify run, or we reached the stop sector. |
979 | * Log the last position, unless end-of-device. */ | ||
979 | if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && | 980 | if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && |
980 | ns.conn < C_CONNECTED) { | 981 | ns.conn <= C_CONNECTED) { |
981 | mdev->ov_start_sector = | 982 | mdev->ov_start_sector = |
982 | BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); | 983 | BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); |
983 | dev_info(DEV, "Online Verify reached sector %llu\n", | 984 | if (mdev->ov_left) |
984 | (unsigned long long)mdev->ov_start_sector); | 985 | dev_info(DEV, "Online Verify reached sector %llu\n", |
986 | (unsigned long long)mdev->ov_start_sector); | ||
985 | } | 987 | } |
986 | 988 | ||
987 | if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && | 989 | if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && |
@@ -1422,6 +1424,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1422 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) | 1424 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) |
1423 | drbd_send_state(mdev, ns); | 1425 | drbd_send_state(mdev, ns); |
1424 | 1426 | ||
1427 | /* Verify finished, or reached stop sector. Peer did not know about | ||
1428 | * the stop sector, and we may even have changed the stop sector during | ||
1429 | * verify to interrupt/stop early. Send the new state. */ | ||
1430 | if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED | ||
1431 | && verify_can_do_stop_sector(mdev)) | ||
1432 | drbd_send_state(mdev, ns); | ||
1433 | |||
1425 | /* Wake up role changes, that were delayed because of connection establishing */ | 1434 | /* Wake up role changes, that were delayed because of connection establishing */ |
1426 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { | 1435 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { |
1427 | if (test_and_clear_bit(STATE_SENT, &mdev->tconn->flags)) | 1436 | if (test_and_clear_bit(STATE_SENT, &mdev->tconn->flags)) |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 9d7e1fb0f431..1c9c6fd332c3 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -692,6 +692,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) | |||
692 | int number, i, size; | 692 | int number, i, size; |
693 | sector_t sector; | 693 | sector_t sector; |
694 | const sector_t capacity = drbd_get_capacity(mdev->this_bdev); | 694 | const sector_t capacity = drbd_get_capacity(mdev->this_bdev); |
695 | bool stop_sector_reached = false; | ||
695 | 696 | ||
696 | if (unlikely(cancel)) | 697 | if (unlikely(cancel)) |
697 | return 1; | 698 | return 1; |
@@ -700,9 +701,17 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) | |||
700 | 701 | ||
701 | sector = mdev->ov_position; | 702 | sector = mdev->ov_position; |
702 | for (i = 0; i < number; i++) { | 703 | for (i = 0; i < number; i++) { |
703 | if (sector >= capacity) { | 704 | if (sector >= capacity) |
704 | return 1; | 705 | return 1; |
705 | } | 706 | |
707 | /* We check for "finished" only in the reply path: | ||
708 | * w_e_end_ov_reply(). | ||
709 | * We need to send at least one request out. */ | ||
710 | stop_sector_reached = i > 0 | ||
711 | && verify_can_do_stop_sector(mdev) | ||
712 | && sector >= mdev->ov_stop_sector; | ||
713 | if (stop_sector_reached) | ||
714 | break; | ||
706 | 715 | ||
707 | size = BM_BLOCK_SIZE; | 716 | size = BM_BLOCK_SIZE; |
708 | 717 | ||
@@ -726,7 +735,8 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) | |||
726 | 735 | ||
727 | requeue: | 736 | requeue: |
728 | mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); | 737 | mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); |
729 | mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); | 738 | if (i == 0 || !stop_sector_reached) |
739 | mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); | ||
730 | return 1; | 740 | return 1; |
731 | } | 741 | } |
732 | 742 | ||
@@ -792,7 +802,12 @@ int drbd_resync_finished(struct drbd_conf *mdev) | |||
792 | dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; | 802 | dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; |
793 | if (dt <= 0) | 803 | if (dt <= 0) |
794 | dt = 1; | 804 | dt = 1; |
805 | |||
795 | db = mdev->rs_total; | 806 | db = mdev->rs_total; |
807 | /* adjust for verify start and stop sectors, respective reached position */ | ||
808 | if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) | ||
809 | db -= mdev->ov_left; | ||
810 | |||
796 | dbdt = Bit2KB(db/dt); | 811 | dbdt = Bit2KB(db/dt); |
797 | mdev->rs_paused /= HZ; | 812 | mdev->rs_paused /= HZ; |
798 | 813 | ||
@@ -815,7 +830,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) | |||
815 | ns.conn = C_CONNECTED; | 830 | ns.conn = C_CONNECTED; |
816 | 831 | ||
817 | dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", | 832 | dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", |
818 | verify_done ? "Online verify " : "Resync", | 833 | verify_done ? "Online verify" : "Resync", |
819 | dt + mdev->rs_paused, mdev->rs_paused, dbdt); | 834 | dt + mdev->rs_paused, mdev->rs_paused, dbdt); |
820 | 835 | ||
821 | n_oos = drbd_bm_total_weight(mdev); | 836 | n_oos = drbd_bm_total_weight(mdev); |
@@ -896,7 +911,9 @@ out: | |||
896 | mdev->rs_total = 0; | 911 | mdev->rs_total = 0; |
897 | mdev->rs_failed = 0; | 912 | mdev->rs_failed = 0; |
898 | mdev->rs_paused = 0; | 913 | mdev->rs_paused = 0; |
899 | if (verify_done) | 914 | |
915 | /* reset start sector, if we reached end of device */ | ||
916 | if (verify_done && mdev->ov_left == 0) | ||
900 | mdev->ov_start_sector = 0; | 917 | mdev->ov_start_sector = 0; |
901 | 918 | ||
902 | drbd_md_sync(mdev); | 919 | drbd_md_sync(mdev); |
@@ -1144,6 +1161,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) | |||
1144 | unsigned int size = peer_req->i.size; | 1161 | unsigned int size = peer_req->i.size; |
1145 | int digest_size; | 1162 | int digest_size; |
1146 | int err, eq = 0; | 1163 | int err, eq = 0; |
1164 | bool stop_sector_reached = false; | ||
1147 | 1165 | ||
1148 | if (unlikely(cancel)) { | 1166 | if (unlikely(cancel)) { |
1149 | drbd_free_peer_req(mdev, peer_req); | 1167 | drbd_free_peer_req(mdev, peer_req); |
@@ -1194,7 +1212,10 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) | |||
1194 | if ((mdev->ov_left & 0x200) == 0x200) | 1212 | if ((mdev->ov_left & 0x200) == 0x200) |
1195 | drbd_advance_rs_marks(mdev, mdev->ov_left); | 1213 | drbd_advance_rs_marks(mdev, mdev->ov_left); |
1196 | 1214 | ||
1197 | if (mdev->ov_left == 0) { | 1215 | stop_sector_reached = verify_can_do_stop_sector(mdev) && |
1216 | (sector + (size>>9)) >= mdev->ov_stop_sector; | ||
1217 | |||
1218 | if (mdev->ov_left == 0 || stop_sector_reached) { | ||
1198 | ov_out_of_sync_print(mdev); | 1219 | ov_out_of_sync_print(mdev); |
1199 | drbd_resync_finished(mdev); | 1220 | drbd_resync_finished(mdev); |
1200 | } | 1221 | } |