aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorKurt Hackel <kurt.hackel@oracle.com>2006-05-01 14:49:52 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2006-06-26 17:43:03 -0400
commite7e69eb38946ebef86e27442d01514fcf9c854ee (patch)
treea17cfb1edba8e3c897778808eec641a97c51005c /fs
parente4eb03681a8313168d99c2f93175331a898a2c16 (diff)
ocfs2: teach dlm_restart_lock_mastery() to wait on recovery
Change behavior of dlm_restart_lock_mastery() when a node goes down. Dump all responses that have been collected and start over. Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c100
1 files changed, 44 insertions, 56 deletions
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index e5d7271d5038..915283fb48c3 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -867,6 +867,7 @@ lookup:
867 spin_unlock(&dlm->master_lock); 867 spin_unlock(&dlm->master_lock);
868 spin_unlock(&dlm->spinlock); 868 spin_unlock(&dlm->spinlock);
869 869
870redo_request:
870 while (wait_on_recovery) { 871 while (wait_on_recovery) {
871 /* any cluster changes that occurred after dropping the 872 /* any cluster changes that occurred after dropping the
872 * dlm spinlock would be detectable be a change on the mle, 873 * dlm spinlock would be detectable be a change on the mle,
@@ -904,7 +905,6 @@ lookup:
904 if (blocked) 905 if (blocked)
905 goto wait; 906 goto wait;
906 907
907redo_request:
908 ret = -EINVAL; 908 ret = -EINVAL;
909 dlm_node_iter_init(mle->vote_map, &iter); 909 dlm_node_iter_init(mle->vote_map, &iter);
910 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { 910 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
@@ -929,6 +929,7 @@ wait:
929 /* keep going until the response map includes all nodes */ 929 /* keep going until the response map includes all nodes */
930 ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked); 930 ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked);
931 if (ret < 0) { 931 if (ret < 0) {
932 wait_on_recovery = 1;
932 mlog(0, "%s:%.*s: node map changed, redo the " 933 mlog(0, "%s:%.*s: node map changed, redo the "
933 "master request now, blocked=%d\n", 934 "master request now, blocked=%d\n",
934 dlm->name, res->lockname.len, 935 dlm->name, res->lockname.len,
@@ -1210,18 +1211,6 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
1210 set_bit(node, mle->vote_map); 1211 set_bit(node, mle->vote_map);
1211 } else { 1212 } else {
1212 mlog(ML_ERROR, "node down! %d\n", node); 1213 mlog(ML_ERROR, "node down! %d\n", node);
1213
1214 /* if the node wasn't involved in mastery skip it,
1215 * but clear it out from the maps so that it will
1216 * not affect mastery of this lockres */
1217 clear_bit(node, mle->response_map);
1218 clear_bit(node, mle->vote_map);
1219 if (!test_bit(node, mle->maybe_map))
1220 goto next;
1221
1222 /* if we're already blocked on lock mastery, and the
1223 * dead node wasn't the expected master, or there is
1224 * another node in the maybe_map, keep waiting */
1225 if (blocked) { 1214 if (blocked) {
1226 int lowest = find_next_bit(mle->maybe_map, 1215 int lowest = find_next_bit(mle->maybe_map,
1227 O2NM_MAX_NODES, 0); 1216 O2NM_MAX_NODES, 0);
@@ -1229,54 +1218,53 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
1229 /* act like it was never there */ 1218 /* act like it was never there */
1230 clear_bit(node, mle->maybe_map); 1219 clear_bit(node, mle->maybe_map);
1231 1220
1232 if (node != lowest) 1221 if (node == lowest) {
1233 goto next; 1222 mlog(0, "expected master %u died"
1234 1223 " while this node was blocked "
1235 mlog(ML_ERROR, "expected master %u died while " 1224 "waiting on it!\n", node);
1236 "this node was blocked waiting on it!\n", 1225 lowest = find_next_bit(mle->maybe_map,
1237 node); 1226 O2NM_MAX_NODES,
1238 lowest = find_next_bit(mle->maybe_map, 1227 lowest+1);
1239 O2NM_MAX_NODES, 1228 if (lowest < O2NM_MAX_NODES) {
1240 lowest+1); 1229 mlog(0, "%s:%.*s:still "
1241 if (lowest < O2NM_MAX_NODES) { 1230 "blocked. waiting on %u "
1242 mlog(0, "still blocked. waiting " 1231 "now\n", dlm->name,
1243 "on %u now\n", lowest); 1232 res->lockname.len,
1244 goto next; 1233 res->lockname.name,
1234 lowest);
1235 } else {
1236 /* mle is an MLE_BLOCK, but
1237 * there is now nothing left to
1238 * block on. we need to return
1239 * all the way back out and try
1240 * again with an MLE_MASTER.
1241 * dlm_do_local_recovery_cleanup
1242 * has already run, so the mle
1243 * refcount is ok */
1244 mlog(0, "%s:%.*s: no "
1245 "longer blocking. try to "
1246 "master this here\n",
1247 dlm->name,
1248 res->lockname.len,
1249 res->lockname.name);
1250 mle->type = DLM_MLE_MASTER;
1251 mle->u.res = res;
1252 }
1245 } 1253 }
1246
1247 /* mle is an MLE_BLOCK, but there is now
1248 * nothing left to block on. we need to return
1249 * all the way back out and try again with
1250 * an MLE_MASTER. dlm_do_local_recovery_cleanup
1251 * has already run, so the mle refcount is ok */
1252 mlog(0, "no longer blocking. we can "
1253 "try to master this here\n");
1254 mle->type = DLM_MLE_MASTER;
1255 memset(mle->maybe_map, 0,
1256 sizeof(mle->maybe_map));
1257 memset(mle->response_map, 0,
1258 sizeof(mle->maybe_map));
1259 memcpy(mle->vote_map, mle->node_map,
1260 sizeof(mle->node_map));
1261 mle->u.res = res;
1262 set_bit(dlm->node_num, mle->maybe_map);
1263
1264 ret = -EAGAIN;
1265 goto next;
1266 } 1254 }
1267 1255
1268 clear_bit(node, mle->maybe_map); 1256 /* now blank out everything, as if we had never
1269 if (node > dlm->node_num) 1257 * contacted anyone */
1270 goto next; 1258 memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
1271 1259 memset(mle->response_map, 0, sizeof(mle->response_map));
1272 mlog(0, "dead node in map!\n"); 1260 /* reset the vote_map to the current node_map */
1273 /* yuck. go back and re-contact all nodes 1261 memcpy(mle->vote_map, mle->node_map,
1274 * in the vote_map, removing this node. */ 1262 sizeof(mle->node_map));
1275 memset(mle->response_map, 0, 1263 /* put myself into the maybe map */
1276 sizeof(mle->response_map)); 1264 if (mle->type != DLM_MLE_BLOCK)
1265 set_bit(dlm->node_num, mle->maybe_map);
1277 } 1266 }
1278 ret = -EAGAIN; 1267 ret = -EAGAIN;
1279next:
1280 node = dlm_bitmap_diff_iter_next(&bdi, &sc); 1268 node = dlm_bitmap_diff_iter_next(&bdi, &sc);
1281 } 1269 }
1282 return ret; 1270 return ret;