diff options
author | Sunil Mushran <sunil.mushran@oracle.com> | 2010-01-21 13:50:03 -0500 |
---|---|---|
committer | Joel Becker <joel.becker@oracle.com> | 2010-02-03 02:51:13 -0500 |
commit | a19128260107f951d1b4c421cf98b92f8092b069 (patch) | |
tree | 0e98386142589c986fe433e1dd390f4a21db7bc9 /fs | |
parent | 0b94a909eb2e2f6990d05fd486a0cb4902ef1ae7 (diff) |
ocfs2: Prevent a livelock in dlmglue
There is possibility of a livelock in __ocfs2_cluster_lock(). If a node were
to get an ast for an upconvert request, followed immediately by a bast,
there is a small window where the fs may downconvert the lock before the
process requesting the upconvert is able to take the lock.
This patch adds a new flag to indicate that the upconvert is still in
progress and that the dc thread should not downconvert it right now.
Wengang Wang <wen.gang.wang@oracle.com> and Joel Becker
<joel.becker@oracle.com> contributed heavily to this patch.
Reported-by: David Teigland <teigland@redhat.com>
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ocfs2/dlmglue.c | 49 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2.h | 4 |
2 files changed, 50 insertions, 3 deletions
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 0cdf63042b76..85d7c490755b 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -875,6 +875,14 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo | |||
875 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 875 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
876 | 876 | ||
877 | lockres->l_level = lockres->l_requested; | 877 | lockres->l_level = lockres->l_requested; |
878 | |||
879 | /* | ||
880 | * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing | ||
881 | * the OCFS2_LOCK_BUSY flag to prevent the dc thread from | ||
882 | * downconverting the lock before the upconvert has fully completed. | ||
883 | */ | ||
884 | lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | ||
885 | |||
878 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 886 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
879 | 887 | ||
880 | mlog_exit_void(); | 888 | mlog_exit_void(); |
@@ -1134,6 +1142,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | |||
1134 | mlog_entry_void(); | 1142 | mlog_entry_void(); |
1135 | spin_lock_irqsave(&lockres->l_lock, flags); | 1143 | spin_lock_irqsave(&lockres->l_lock, flags); |
1136 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 1144 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
1145 | lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | ||
1137 | if (convert) | 1146 | if (convert) |
1138 | lockres->l_action = OCFS2_AST_INVALID; | 1147 | lockres->l_action = OCFS2_AST_INVALID; |
1139 | else | 1148 | else |
@@ -1324,13 +1333,13 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, | |||
1324 | again: | 1333 | again: |
1325 | wait = 0; | 1334 | wait = 0; |
1326 | 1335 | ||
1336 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1337 | |||
1327 | if (catch_signals && signal_pending(current)) { | 1338 | if (catch_signals && signal_pending(current)) { |
1328 | ret = -ERESTARTSYS; | 1339 | ret = -ERESTARTSYS; |
1329 | goto out; | 1340 | goto unlock; |
1330 | } | 1341 | } |
1331 | 1342 | ||
1332 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1333 | |||
1334 | mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, | 1343 | mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, |
1335 | "Cluster lock called on freeing lockres %s! flags " | 1344 | "Cluster lock called on freeing lockres %s! flags " |
1336 | "0x%lx\n", lockres->l_name, lockres->l_flags); | 1345 | "0x%lx\n", lockres->l_name, lockres->l_flags); |
@@ -1347,6 +1356,25 @@ again: | |||
1347 | goto unlock; | 1356 | goto unlock; |
1348 | } | 1357 | } |
1349 | 1358 | ||
1359 | if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { | ||
1360 | /* | ||
1361 | * We've upconverted. If the lock now has a level we can | ||
1362 | * work with, we take it. If, however, the lock is not at the | ||
1363 | * required level, we go thru the full cycle. One way this could | ||
1364 | * happen is if a process requesting an upconvert to PR is | ||
1365 | * closely followed by another requesting upconvert to an EX. | ||
1366 | * If the process requesting EX lands here, we want it to | ||
1367 | * continue attempting to upconvert and let the process | ||
1368 | * requesting PR take the lock. | ||
1369 | * If multiple processes request upconvert to PR, the first one | ||
1370 | * here will take the lock. The others will have to go thru the | ||
1371 | * OCFS2_LOCK_BLOCKED check to ensure that there is no pending | ||
1372 | * downconvert request. | ||
1373 | */ | ||
1374 | if (level <= lockres->l_level) | ||
1375 | goto update_holders; | ||
1376 | } | ||
1377 | |||
1350 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED && | 1378 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED && |
1351 | !ocfs2_may_continue_on_blocked_lock(lockres, level)) { | 1379 | !ocfs2_may_continue_on_blocked_lock(lockres, level)) { |
1352 | /* is the lock is currently blocked on behalf of | 1380 | /* is the lock is currently blocked on behalf of |
@@ -1417,11 +1445,14 @@ again: | |||
1417 | goto again; | 1445 | goto again; |
1418 | } | 1446 | } |
1419 | 1447 | ||
1448 | update_holders: | ||
1420 | /* Ok, if we get here then we're good to go. */ | 1449 | /* Ok, if we get here then we're good to go. */ |
1421 | ocfs2_inc_holders(lockres, level); | 1450 | ocfs2_inc_holders(lockres, level); |
1422 | 1451 | ||
1423 | ret = 0; | 1452 | ret = 0; |
1424 | unlock: | 1453 | unlock: |
1454 | lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | ||
1455 | |||
1425 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1456 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1426 | out: | 1457 | out: |
1427 | /* | 1458 | /* |
@@ -3402,6 +3433,18 @@ recheck: | |||
3402 | goto leave; | 3433 | goto leave; |
3403 | } | 3434 | } |
3404 | 3435 | ||
3436 | /* | ||
3437 | * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is | ||
3438 | * set when the ast is received for an upconvert just before the | ||
3439 | * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast | ||
3440 | * on the heels of the ast, we want to delay the downconvert just | ||
3441 | * enough to allow the up requestor to do its task. Because this | ||
3442 | * lock is in the blocked queue, the lock will be downconverted | ||
3443 | * as soon as the requestor is done with the lock. | ||
3444 | */ | ||
3445 | if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) | ||
3446 | goto leave_requeue; | ||
3447 | |||
3405 | /* if we're blocking an exclusive and we have *any* holders, | 3448 | /* if we're blocking an exclusive and we have *any* holders, |
3406 | * then requeue. */ | 3449 | * then requeue. */ |
3407 | if ((lockres->l_blocking == DLM_LOCK_EX) | 3450 | if ((lockres->l_blocking == DLM_LOCK_EX) |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 9362eea7424b..740f448041e2 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -136,6 +136,10 @@ enum ocfs2_unlock_action { | |||
136 | #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a | 136 | #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a |
137 | call to dlm_lock. Only | 137 | call to dlm_lock. Only |
138 | exists with BUSY set. */ | 138 | exists with BUSY set. */ |
139 | #define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread | ||
140 | * from downconverting | ||
141 | * before the upconvert | ||
142 | * has completed */ | ||
139 | 143 | ||
140 | struct ocfs2_lock_res_ops; | 144 | struct ocfs2_lock_res_ops; |
141 | 145 | ||