aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSunil Mushran <sunil.mushran@oracle.com>2010-01-21 13:50:03 -0500
committerJoel Becker <joel.becker@oracle.com>2010-02-03 02:51:13 -0500
commita19128260107f951d1b4c421cf98b92f8092b069 (patch)
tree0e98386142589c986fe433e1dd390f4a21db7bc9 /fs
parent0b94a909eb2e2f6990d05fd486a0cb4902ef1ae7 (diff)
ocfs2: Prevent a livelock in dlmglue
There is possibility of a livelock in __ocfs2_cluster_lock(). If a node were to get an ast for an upconvert request, followed immediately by a bast, there is a small window where the fs may downconvert the lock before the process requesting the upconvert is able to take the lock. This patch adds a new flag to indicate that the upconvert is still in progress and that the dc thread should not downconvert it right now. Wengang Wang <wen.gang.wang@oracle.com> and Joel Becker <joel.becker@oracle.com> contributed heavily to this patch. Reported-by: David Teigland <teigland@redhat.com> Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/dlmglue.c49
-rw-r--r--fs/ocfs2/ocfs2.h4
2 files changed, 50 insertions, 3 deletions
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 0cdf63042b76..85d7c490755b 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -875,6 +875,14 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
875 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 875 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
876 876
877 lockres->l_level = lockres->l_requested; 877 lockres->l_level = lockres->l_requested;
878
879 /*
880 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
881 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
882 * downconverting the lock before the upconvert has fully completed.
883 */
884 lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
885
878 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 886 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
879 887
880 mlog_exit_void(); 888 mlog_exit_void();
@@ -1134,6 +1142,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1134 mlog_entry_void(); 1142 mlog_entry_void();
1135 spin_lock_irqsave(&lockres->l_lock, flags); 1143 spin_lock_irqsave(&lockres->l_lock, flags);
1136 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1144 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1145 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1137 if (convert) 1146 if (convert)
1138 lockres->l_action = OCFS2_AST_INVALID; 1147 lockres->l_action = OCFS2_AST_INVALID;
1139 else 1148 else
@@ -1324,13 +1333,13 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1324again: 1333again:
1325 wait = 0; 1334 wait = 0;
1326 1335
1336 spin_lock_irqsave(&lockres->l_lock, flags);
1337
1327 if (catch_signals && signal_pending(current)) { 1338 if (catch_signals && signal_pending(current)) {
1328 ret = -ERESTARTSYS; 1339 ret = -ERESTARTSYS;
1329 goto out; 1340 goto unlock;
1330 } 1341 }
1331 1342
1332 spin_lock_irqsave(&lockres->l_lock, flags);
1333
1334 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1343 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1335 "Cluster lock called on freeing lockres %s! flags " 1344 "Cluster lock called on freeing lockres %s! flags "
1336 "0x%lx\n", lockres->l_name, lockres->l_flags); 1345 "0x%lx\n", lockres->l_name, lockres->l_flags);
@@ -1347,6 +1356,25 @@ again:
1347 goto unlock; 1356 goto unlock;
1348 } 1357 }
1349 1358
1359 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
1360 /*
1361 * We've upconverted. If the lock now has a level we can
1362 * work with, we take it. If, however, the lock is not at the
1363 * required level, we go thru the full cycle. One way this could
1364 * happen is if a process requesting an upconvert to PR is
1365 * closely followed by another requesting upconvert to an EX.
1366 * If the process requesting EX lands here, we want it to
1367 * continue attempting to upconvert and let the process
1368 * requesting PR take the lock.
1369 * If multiple processes request upconvert to PR, the first one
1370 * here will take the lock. The others will have to go thru the
1371 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
1372 * downconvert request.
1373 */
1374 if (level <= lockres->l_level)
1375 goto update_holders;
1376 }
1377
1350 if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1378 if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1351 !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 1379 !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1352 /* is the lock is currently blocked on behalf of 1380 /* is the lock is currently blocked on behalf of
@@ -1417,11 +1445,14 @@ again:
1417 goto again; 1445 goto again;
1418 } 1446 }
1419 1447
1448update_holders:
1420 /* Ok, if we get here then we're good to go. */ 1449 /* Ok, if we get here then we're good to go. */
1421 ocfs2_inc_holders(lockres, level); 1450 ocfs2_inc_holders(lockres, level);
1422 1451
1423 ret = 0; 1452 ret = 0;
1424unlock: 1453unlock:
1454 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1455
1425 spin_unlock_irqrestore(&lockres->l_lock, flags); 1456 spin_unlock_irqrestore(&lockres->l_lock, flags);
1426out: 1457out:
1427 /* 1458 /*
@@ -3402,6 +3433,18 @@ recheck:
3402 goto leave; 3433 goto leave;
3403 } 3434 }
3404 3435
3436 /*
3437 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
3438 * set when the ast is received for an upconvert just before the
3439 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
3440 * on the heels of the ast, we want to delay the downconvert just
3441 * enough to allow the up requestor to do its task. Because this
3442 * lock is in the blocked queue, the lock will be downconverted
3443 * as soon as the requestor is done with the lock.
3444 */
3445 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
3446 goto leave_requeue;
3447
3405 /* if we're blocking an exclusive and we have *any* holders, 3448 /* if we're blocking an exclusive and we have *any* holders,
3406 * then requeue. */ 3449 * then requeue. */
3407 if ((lockres->l_blocking == DLM_LOCK_EX) 3450 if ((lockres->l_blocking == DLM_LOCK_EX)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 9362eea7424b..740f448041e2 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -136,6 +136,10 @@ enum ocfs2_unlock_action {
136#define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a 136#define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a
137 call to dlm_lock. Only 137 call to dlm_lock. Only
138 exists with BUSY set. */ 138 exists with BUSY set. */
139#define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread
140 * from downconverting
141 * before the upconvert
142 * has completed */
139 143
140struct ocfs2_lock_res_ops; 144struct ocfs2_lock_res_ops;
141 145