diff options
author | Sunil Mushran <sunil.mushran@oracle.com> | 2007-01-29 18:44:27 -0500 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2007-02-07 15:10:39 -0500 |
commit | 0dd82141b236ce36253e3056c6068ee3d5732196 (patch) | |
tree | 51c4c4746ffa390d4dba6a342aeaa526a35cb4eb /fs/ocfs2/dlm | |
parent | e4968476a9bc5a6b30076076b4f3ce3e692e0d79 (diff) |
ocfs2_dlm: Add timeout to dlm join domain
Currently the ocfs2 dlm has no timeout during dlm join domain. While this is
not a problem in normal operation, this does become an issue if, say, the
other node is refusing to let the node join the domain because of a stuck
recovery. This patch adds a 90 sec timeout.
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/dlm')
-rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index e8ecf8c3dbe7..6087c4749fee 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -1264,6 +1264,8 @@ bail: | |||
1264 | static int dlm_join_domain(struct dlm_ctxt *dlm) | 1264 | static int dlm_join_domain(struct dlm_ctxt *dlm) |
1265 | { | 1265 | { |
1266 | int status; | 1266 | int status; |
1267 | unsigned int backoff; | ||
1268 | unsigned int total_backoff = 0; | ||
1267 | 1269 | ||
1268 | BUG_ON(!dlm); | 1270 | BUG_ON(!dlm); |
1269 | 1271 | ||
@@ -1295,18 +1297,27 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) | |||
1295 | } | 1297 | } |
1296 | 1298 | ||
1297 | do { | 1299 | do { |
1298 | unsigned int backoff; | ||
1299 | status = dlm_try_to_join_domain(dlm); | 1300 | status = dlm_try_to_join_domain(dlm); |
1300 | 1301 | ||
1301 | /* If we're racing another node to the join, then we | 1302 | /* If we're racing another node to the join, then we |
1302 | * need to back off temporarily and let them | 1303 | * need to back off temporarily and let them |
1303 | * complete. */ | 1304 | * complete. */ |
1305 | #define DLM_JOIN_TIMEOUT_MSECS 90000 | ||
1304 | if (status == -EAGAIN) { | 1306 | if (status == -EAGAIN) { |
1305 | if (signal_pending(current)) { | 1307 | if (signal_pending(current)) { |
1306 | status = -ERESTARTSYS; | 1308 | status = -ERESTARTSYS; |
1307 | goto bail; | 1309 | goto bail; |
1308 | } | 1310 | } |
1309 | 1311 | ||
1312 | if (total_backoff > | ||
1313 | msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) { | ||
1314 | status = -ERESTARTSYS; | ||
1315 | mlog(ML_NOTICE, "Timed out joining dlm domain " | ||
1316 | "%s after %u msecs\n", dlm->name, | ||
1317 | jiffies_to_msecs(total_backoff)); | ||
1318 | goto bail; | ||
1319 | } | ||
1320 | |||
1310 | /* | 1321 | /* |
1311 | * <chip> After you! | 1322 | * <chip> After you! |
1312 | * <dale> No, after you! | 1323 | * <dale> No, after you! |
@@ -1316,6 +1327,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) | |||
1316 | */ | 1327 | */ |
1317 | backoff = (unsigned int)(jiffies & 0x3); | 1328 | backoff = (unsigned int)(jiffies & 0x3); |
1318 | backoff *= DLM_DOMAIN_BACKOFF_MS; | 1329 | backoff *= DLM_DOMAIN_BACKOFF_MS; |
1330 | total_backoff += backoff; | ||
1319 | mlog(0, "backoff %d\n", backoff); | 1331 | mlog(0, "backoff %d\n", backoff); |
1320 | msleep(backoff); | 1332 | msleep(backoff); |
1321 | } | 1333 | } |