diff options
author | Kurt Hackel <kurt.hackel@oracle.com> | 2006-05-01 14:49:52 -0400 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2006-06-26 17:43:03 -0400 |
commit | e7e69eb38946ebef86e27442d01514fcf9c854ee (patch) | |
tree | a17cfb1edba8e3c897778808eec641a97c51005c | |
parent | e4eb03681a8313168d99c2f93175331a898a2c16 (diff) |
ocfs2: teach dlm_restart_lock_mastery() to wait on recovery
Change behavior of dlm_restart_lock_mastery() when a node goes down. Dump
all responses that have been collected and start over.
Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 100 |
1 files changed, 44 insertions, 56 deletions
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index e5d7271d5038..915283fb48c3 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -867,6 +867,7 @@ lookup: | |||
867 | spin_unlock(&dlm->master_lock); | 867 | spin_unlock(&dlm->master_lock); |
868 | spin_unlock(&dlm->spinlock); | 868 | spin_unlock(&dlm->spinlock); |
869 | 869 | ||
870 | redo_request: | ||
870 | while (wait_on_recovery) { | 871 | while (wait_on_recovery) { |
871 | /* any cluster changes that occurred after dropping the | 872 | /* any cluster changes that occurred after dropping the |
872 | * dlm spinlock would be detectable be a change on the mle, | 873 | * dlm spinlock would be detectable be a change on the mle, |
@@ -904,7 +905,6 @@ lookup: | |||
904 | if (blocked) | 905 | if (blocked) |
905 | goto wait; | 906 | goto wait; |
906 | 907 | ||
907 | redo_request: | ||
908 | ret = -EINVAL; | 908 | ret = -EINVAL; |
909 | dlm_node_iter_init(mle->vote_map, &iter); | 909 | dlm_node_iter_init(mle->vote_map, &iter); |
910 | while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { | 910 | while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { |
@@ -929,6 +929,7 @@ wait: | |||
929 | /* keep going until the response map includes all nodes */ | 929 | /* keep going until the response map includes all nodes */ |
930 | ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked); | 930 | ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked); |
931 | if (ret < 0) { | 931 | if (ret < 0) { |
932 | wait_on_recovery = 1; | ||
932 | mlog(0, "%s:%.*s: node map changed, redo the " | 933 | mlog(0, "%s:%.*s: node map changed, redo the " |
933 | "master request now, blocked=%d\n", | 934 | "master request now, blocked=%d\n", |
934 | dlm->name, res->lockname.len, | 935 | dlm->name, res->lockname.len, |
@@ -1210,18 +1211,6 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, | |||
1210 | set_bit(node, mle->vote_map); | 1211 | set_bit(node, mle->vote_map); |
1211 | } else { | 1212 | } else { |
1212 | mlog(ML_ERROR, "node down! %d\n", node); | 1213 | mlog(ML_ERROR, "node down! %d\n", node); |
1213 | |||
1214 | /* if the node wasn't involved in mastery skip it, | ||
1215 | * but clear it out from the maps so that it will | ||
1216 | * not affect mastery of this lockres */ | ||
1217 | clear_bit(node, mle->response_map); | ||
1218 | clear_bit(node, mle->vote_map); | ||
1219 | if (!test_bit(node, mle->maybe_map)) | ||
1220 | goto next; | ||
1221 | |||
1222 | /* if we're already blocked on lock mastery, and the | ||
1223 | * dead node wasn't the expected master, or there is | ||
1224 | * another node in the maybe_map, keep waiting */ | ||
1225 | if (blocked) { | 1214 | if (blocked) { |
1226 | int lowest = find_next_bit(mle->maybe_map, | 1215 | int lowest = find_next_bit(mle->maybe_map, |
1227 | O2NM_MAX_NODES, 0); | 1216 | O2NM_MAX_NODES, 0); |
@@ -1229,54 +1218,53 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, | |||
1229 | /* act like it was never there */ | 1218 | /* act like it was never there */ |
1230 | clear_bit(node, mle->maybe_map); | 1219 | clear_bit(node, mle->maybe_map); |
1231 | 1220 | ||
1232 | if (node != lowest) | 1221 | if (node == lowest) { |
1233 | goto next; | 1222 | mlog(0, "expected master %u died" |
1234 | 1223 | " while this node was blocked " | |
1235 | mlog(ML_ERROR, "expected master %u died while " | 1224 | "waiting on it!\n", node); |
1236 | "this node was blocked waiting on it!\n", | 1225 | lowest = find_next_bit(mle->maybe_map, |
1237 | node); | 1226 | O2NM_MAX_NODES, |
1238 | lowest = find_next_bit(mle->maybe_map, | 1227 | lowest+1); |
1239 | O2NM_MAX_NODES, | 1228 | if (lowest < O2NM_MAX_NODES) { |
1240 | lowest+1); | 1229 | mlog(0, "%s:%.*s:still " |
1241 | if (lowest < O2NM_MAX_NODES) { | 1230 | "blocked. waiting on %u " |
1242 | mlog(0, "still blocked. waiting " | 1231 | "now\n", dlm->name, |
1243 | "on %u now\n", lowest); | 1232 | res->lockname.len, |
1244 | goto next; | 1233 | res->lockname.name, |
1234 | lowest); | ||
1235 | } else { | ||
1236 | /* mle is an MLE_BLOCK, but | ||
1237 | * there is now nothing left to | ||
1238 | * block on. we need to return | ||
1239 | * all the way back out and try | ||
1240 | * again with an MLE_MASTER. | ||
1241 | * dlm_do_local_recovery_cleanup | ||
1242 | * has already run, so the mle | ||
1243 | * refcount is ok */ | ||
1244 | mlog(0, "%s:%.*s: no " | ||
1245 | "longer blocking. try to " | ||
1246 | "master this here\n", | ||
1247 | dlm->name, | ||
1248 | res->lockname.len, | ||
1249 | res->lockname.name); | ||
1250 | mle->type = DLM_MLE_MASTER; | ||
1251 | mle->u.res = res; | ||
1252 | } | ||
1245 | } | 1253 | } |
1246 | |||
1247 | /* mle is an MLE_BLOCK, but there is now | ||
1248 | * nothing left to block on. we need to return | ||
1249 | * all the way back out and try again with | ||
1250 | * an MLE_MASTER. dlm_do_local_recovery_cleanup | ||
1251 | * has already run, so the mle refcount is ok */ | ||
1252 | mlog(0, "no longer blocking. we can " | ||
1253 | "try to master this here\n"); | ||
1254 | mle->type = DLM_MLE_MASTER; | ||
1255 | memset(mle->maybe_map, 0, | ||
1256 | sizeof(mle->maybe_map)); | ||
1257 | memset(mle->response_map, 0, | ||
1258 | sizeof(mle->maybe_map)); | ||
1259 | memcpy(mle->vote_map, mle->node_map, | ||
1260 | sizeof(mle->node_map)); | ||
1261 | mle->u.res = res; | ||
1262 | set_bit(dlm->node_num, mle->maybe_map); | ||
1263 | |||
1264 | ret = -EAGAIN; | ||
1265 | goto next; | ||
1266 | } | 1254 | } |
1267 | 1255 | ||
1268 | clear_bit(node, mle->maybe_map); | 1256 | /* now blank out everything, as if we had never |
1269 | if (node > dlm->node_num) | 1257 | * contacted anyone */ |
1270 | goto next; | 1258 | memset(mle->maybe_map, 0, sizeof(mle->maybe_map)); |
1271 | 1259 | memset(mle->response_map, 0, sizeof(mle->response_map)); | |
1272 | mlog(0, "dead node in map!\n"); | 1260 | /* reset the vote_map to the current node_map */ |
1273 | /* yuck. go back and re-contact all nodes | 1261 | memcpy(mle->vote_map, mle->node_map, |
1274 | * in the vote_map, removing this node. */ | 1262 | sizeof(mle->node_map)); |
1275 | memset(mle->response_map, 0, | 1263 | /* put myself into the maybe map */ |
1276 | sizeof(mle->response_map)); | 1264 | if (mle->type != DLM_MLE_BLOCK) |
1265 | set_bit(dlm->node_num, mle->maybe_map); | ||
1277 | } | 1266 | } |
1278 | ret = -EAGAIN; | 1267 | ret = -EAGAIN; |
1279 | next: | ||
1280 | node = dlm_bitmap_diff_iter_next(&bdi, &sc); | 1268 | node = dlm_bitmap_diff_iter_next(&bdi, &sc); |
1281 | } | 1269 | } |
1282 | return ret; | 1270 | return ret; |