aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/dlm
diff options
context:
space:
mode:
authorSunil Mushran <sunil.mushran@oracle.com>2007-01-29 18:44:27 -0500
committerMark Fasheh <mark.fasheh@oracle.com>2007-02-07 15:10:39 -0500
commit0dd82141b236ce36253e3056c6068ee3d5732196 (patch)
tree51c4c4746ffa390d4dba6a342aeaa526a35cb4eb /fs/ocfs2/dlm
parente4968476a9bc5a6b30076076b4f3ce3e692e0d79 (diff)
ocfs2_dlm: Add timeout to dlm join domain
Currently the ocfs2 dlm has no timeout during dlm join domain. While this is not a problem in normal operation, this does become an issue if, say, the other node is refusing to let the node join the domain because of a stuck recovery. This patch adds a 90 sec timeout. Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/dlm')
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c14
1 files changed, 13 insertions, 1 deletions
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index e8ecf8c3dbe7..6087c4749fee 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1264,6 +1264,8 @@ bail:
1264static int dlm_join_domain(struct dlm_ctxt *dlm) 1264static int dlm_join_domain(struct dlm_ctxt *dlm)
1265{ 1265{
1266 int status; 1266 int status;
1267 unsigned int backoff;
1268 unsigned int total_backoff = 0;
1267 1269
1268 BUG_ON(!dlm); 1270 BUG_ON(!dlm);
1269 1271
@@ -1295,18 +1297,27 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
1295 } 1297 }
1296 1298
1297 do { 1299 do {
1298 unsigned int backoff;
1299 status = dlm_try_to_join_domain(dlm); 1300 status = dlm_try_to_join_domain(dlm);
1300 1301
1301 /* If we're racing another node to the join, then we 1302 /* If we're racing another node to the join, then we
1302 * need to back off temporarily and let them 1303 * need to back off temporarily and let them
1303 * complete. */ 1304 * complete. */
1305#define DLM_JOIN_TIMEOUT_MSECS 90000
1304 if (status == -EAGAIN) { 1306 if (status == -EAGAIN) {
1305 if (signal_pending(current)) { 1307 if (signal_pending(current)) {
1306 status = -ERESTARTSYS; 1308 status = -ERESTARTSYS;
1307 goto bail; 1309 goto bail;
1308 } 1310 }
1309 1311
1312 if (total_backoff >
1313 msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
1314 status = -ERESTARTSYS;
1315 mlog(ML_NOTICE, "Timed out joining dlm domain "
1316 "%s after %u msecs\n", dlm->name,
1317 jiffies_to_msecs(total_backoff));
1318 goto bail;
1319 }
1320
1310 /* 1321 /*
1311 * <chip> After you! 1322 * <chip> After you!
1312 * <dale> No, after you! 1323 * <dale> No, after you!
@@ -1316,6 +1327,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
1316 */ 1327 */
1317 backoff = (unsigned int)(jiffies & 0x3); 1328 backoff = (unsigned int)(jiffies & 0x3);
1318 backoff *= DLM_DOMAIN_BACKOFF_MS; 1329 backoff *= DLM_DOMAIN_BACKOFF_MS;
1330 total_backoff += backoff;
1319 mlog(0, "backoff %d\n", backoff); 1331 mlog(0, "backoff %d\n", backoff);
1320 msleep(backoff); 1332 msleep(backoff);
1321 } 1333 }