diff options
| author | Kurt Hackel <kurt.hackel@oracle.com> | 2006-05-01 14:49:52 -0400 |
|---|---|---|
| committer | Mark Fasheh <mark.fasheh@oracle.com> | 2006-06-26 17:43:03 -0400 |
| commit | e7e69eb38946ebef86e27442d01514fcf9c854ee (patch) | |
| tree | a17cfb1edba8e3c897778808eec641a97c51005c /fs/ocfs2 | |
| parent | e4eb03681a8313168d99c2f93175331a898a2c16 (diff) | |
ocfs2: teach dlm_restart_lock_mastery() to wait on recovery
Change behavior of dlm_restart_lock_mastery() when a node goes down. Dump
all responses that have been collected and start over.
Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2')
| -rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 100 |
1 files changed, 44 insertions, 56 deletions
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index e5d7271d5038..915283fb48c3 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
| @@ -867,6 +867,7 @@ lookup: | |||
| 867 | spin_unlock(&dlm->master_lock); | 867 | spin_unlock(&dlm->master_lock); |
| 868 | spin_unlock(&dlm->spinlock); | 868 | spin_unlock(&dlm->spinlock); |
| 869 | 869 | ||
| 870 | redo_request: | ||
| 870 | while (wait_on_recovery) { | 871 | while (wait_on_recovery) { |
| 871 | /* any cluster changes that occurred after dropping the | 872 | /* any cluster changes that occurred after dropping the |
| 872 | * dlm spinlock would be detectable be a change on the mle, | 873 | * dlm spinlock would be detectable be a change on the mle, |
| @@ -904,7 +905,6 @@ lookup: | |||
| 904 | if (blocked) | 905 | if (blocked) |
| 905 | goto wait; | 906 | goto wait; |
| 906 | 907 | ||
| 907 | redo_request: | ||
| 908 | ret = -EINVAL; | 908 | ret = -EINVAL; |
| 909 | dlm_node_iter_init(mle->vote_map, &iter); | 909 | dlm_node_iter_init(mle->vote_map, &iter); |
| 910 | while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { | 910 | while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { |
| @@ -929,6 +929,7 @@ wait: | |||
| 929 | /* keep going until the response map includes all nodes */ | 929 | /* keep going until the response map includes all nodes */ |
| 930 | ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked); | 930 | ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked); |
| 931 | if (ret < 0) { | 931 | if (ret < 0) { |
| 932 | wait_on_recovery = 1; | ||
| 932 | mlog(0, "%s:%.*s: node map changed, redo the " | 933 | mlog(0, "%s:%.*s: node map changed, redo the " |
| 933 | "master request now, blocked=%d\n", | 934 | "master request now, blocked=%d\n", |
| 934 | dlm->name, res->lockname.len, | 935 | dlm->name, res->lockname.len, |
| @@ -1210,18 +1211,6 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, | |||
| 1210 | set_bit(node, mle->vote_map); | 1211 | set_bit(node, mle->vote_map); |
| 1211 | } else { | 1212 | } else { |
| 1212 | mlog(ML_ERROR, "node down! %d\n", node); | 1213 | mlog(ML_ERROR, "node down! %d\n", node); |
| 1213 | |||
| 1214 | /* if the node wasn't involved in mastery skip it, | ||
| 1215 | * but clear it out from the maps so that it will | ||
| 1216 | * not affect mastery of this lockres */ | ||
| 1217 | clear_bit(node, mle->response_map); | ||
| 1218 | clear_bit(node, mle->vote_map); | ||
| 1219 | if (!test_bit(node, mle->maybe_map)) | ||
| 1220 | goto next; | ||
| 1221 | |||
| 1222 | /* if we're already blocked on lock mastery, and the | ||
| 1223 | * dead node wasn't the expected master, or there is | ||
| 1224 | * another node in the maybe_map, keep waiting */ | ||
| 1225 | if (blocked) { | 1214 | if (blocked) { |
| 1226 | int lowest = find_next_bit(mle->maybe_map, | 1215 | int lowest = find_next_bit(mle->maybe_map, |
| 1227 | O2NM_MAX_NODES, 0); | 1216 | O2NM_MAX_NODES, 0); |
| @@ -1229,54 +1218,53 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, | |||
| 1229 | /* act like it was never there */ | 1218 | /* act like it was never there */ |
| 1230 | clear_bit(node, mle->maybe_map); | 1219 | clear_bit(node, mle->maybe_map); |
| 1231 | 1220 | ||
| 1232 | if (node != lowest) | 1221 | if (node == lowest) { |
| 1233 | goto next; | 1222 | mlog(0, "expected master %u died" |
| 1234 | 1223 | " while this node was blocked " | |
| 1235 | mlog(ML_ERROR, "expected master %u died while " | 1224 | "waiting on it!\n", node); |
| 1236 | "this node was blocked waiting on it!\n", | 1225 | lowest = find_next_bit(mle->maybe_map, |
| 1237 | node); | 1226 | O2NM_MAX_NODES, |
| 1238 | lowest = find_next_bit(mle->maybe_map, | 1227 | lowest+1); |
| 1239 | O2NM_MAX_NODES, | 1228 | if (lowest < O2NM_MAX_NODES) { |
| 1240 | lowest+1); | 1229 | mlog(0, "%s:%.*s:still " |
| 1241 | if (lowest < O2NM_MAX_NODES) { | 1230 | "blocked. waiting on %u " |
| 1242 | mlog(0, "still blocked. waiting " | 1231 | "now\n", dlm->name, |
| 1243 | "on %u now\n", lowest); | 1232 | res->lockname.len, |
| 1244 | goto next; | 1233 | res->lockname.name, |
| 1234 | lowest); | ||
| 1235 | } else { | ||
| 1236 | /* mle is an MLE_BLOCK, but | ||
| 1237 | * there is now nothing left to | ||
| 1238 | * block on. we need to return | ||
| 1239 | * all the way back out and try | ||
| 1240 | * again with an MLE_MASTER. | ||
| 1241 | * dlm_do_local_recovery_cleanup | ||
| 1242 | * has already run, so the mle | ||
| 1243 | * refcount is ok */ | ||
| 1244 | mlog(0, "%s:%.*s: no " | ||
| 1245 | "longer blocking. try to " | ||
| 1246 | "master this here\n", | ||
| 1247 | dlm->name, | ||
| 1248 | res->lockname.len, | ||
| 1249 | res->lockname.name); | ||
| 1250 | mle->type = DLM_MLE_MASTER; | ||
| 1251 | mle->u.res = res; | ||
| 1252 | } | ||
| 1245 | } | 1253 | } |
| 1246 | |||
| 1247 | /* mle is an MLE_BLOCK, but there is now | ||
| 1248 | * nothing left to block on. we need to return | ||
| 1249 | * all the way back out and try again with | ||
| 1250 | * an MLE_MASTER. dlm_do_local_recovery_cleanup | ||
| 1251 | * has already run, so the mle refcount is ok */ | ||
| 1252 | mlog(0, "no longer blocking. we can " | ||
| 1253 | "try to master this here\n"); | ||
| 1254 | mle->type = DLM_MLE_MASTER; | ||
| 1255 | memset(mle->maybe_map, 0, | ||
| 1256 | sizeof(mle->maybe_map)); | ||
| 1257 | memset(mle->response_map, 0, | ||
| 1258 | sizeof(mle->maybe_map)); | ||
| 1259 | memcpy(mle->vote_map, mle->node_map, | ||
| 1260 | sizeof(mle->node_map)); | ||
| 1261 | mle->u.res = res; | ||
| 1262 | set_bit(dlm->node_num, mle->maybe_map); | ||
| 1263 | |||
| 1264 | ret = -EAGAIN; | ||
| 1265 | goto next; | ||
| 1266 | } | 1254 | } |
| 1267 | 1255 | ||
| 1268 | clear_bit(node, mle->maybe_map); | 1256 | /* now blank out everything, as if we had never |
| 1269 | if (node > dlm->node_num) | 1257 | * contacted anyone */ |
| 1270 | goto next; | 1258 | memset(mle->maybe_map, 0, sizeof(mle->maybe_map)); |
| 1271 | 1259 | memset(mle->response_map, 0, sizeof(mle->response_map)); | |
| 1272 | mlog(0, "dead node in map!\n"); | 1260 | /* reset the vote_map to the current node_map */ |
| 1273 | /* yuck. go back and re-contact all nodes | 1261 | memcpy(mle->vote_map, mle->node_map, |
| 1274 | * in the vote_map, removing this node. */ | 1262 | sizeof(mle->node_map)); |
| 1275 | memset(mle->response_map, 0, | 1263 | /* put myself into the maybe map */ |
| 1276 | sizeof(mle->response_map)); | 1264 | if (mle->type != DLM_MLE_BLOCK) |
| 1265 | set_bit(dlm->node_num, mle->maybe_map); | ||
| 1277 | } | 1266 | } |
| 1278 | ret = -EAGAIN; | 1267 | ret = -EAGAIN; |
| 1279 | next: | ||
| 1280 | node = dlm_bitmap_diff_iter_next(&bdi, &sc); | 1268 | node = dlm_bitmap_diff_iter_next(&bdi, &sc); |
| 1281 | } | 1269 | } |
| 1282 | return ret; | 1270 | return ret; |
