diff options
| author | Sunil Mushran <sunil.mushran@oracle.com> | 2010-01-21 13:50:03 -0500 |
|---|---|---|
| committer | Joel Becker <joel.becker@oracle.com> | 2010-02-03 02:51:13 -0500 |
| commit | a19128260107f951d1b4c421cf98b92f8092b069 (patch) | |
| tree | 0e98386142589c986fe433e1dd390f4a21db7bc9 | |
| parent | 0b94a909eb2e2f6990d05fd486a0cb4902ef1ae7 (diff) | |
ocfs2: Prevent a livelock in dlmglue
There is possibility of a livelock in __ocfs2_cluster_lock(). If a node were
to get an ast for an upconvert request, followed immediately by a bast,
there is a small window where the fs may downconvert the lock before the
process requesting the upconvert is able to take the lock.
This patch adds a new flag to indicate that the upconvert is still in
progress and that the dc thread should not downconvert it right now.
Wengang Wang <wen.gang.wang@oracle.com> and Joel Becker
<joel.becker@oracle.com> contributed heavily to this patch.
Reported-by: David Teigland <teigland@redhat.com>
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
| -rw-r--r-- | fs/ocfs2/dlmglue.c | 49 | ||||
| -rw-r--r-- | fs/ocfs2/ocfs2.h | 4 |
2 files changed, 50 insertions, 3 deletions
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 0cdf63042b76..85d7c490755b 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -875,6 +875,14 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo | |||
| 875 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 875 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
| 876 | 876 | ||
| 877 | lockres->l_level = lockres->l_requested; | 877 | lockres->l_level = lockres->l_requested; |
| 878 | |||
| 879 | /* | ||
| 880 | * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing | ||
| 881 | * the OCFS2_LOCK_BUSY flag to prevent the dc thread from | ||
| 882 | * downconverting the lock before the upconvert has fully completed. | ||
| 883 | */ | ||
| 884 | lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | ||
| 885 | |||
| 878 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 886 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
| 879 | 887 | ||
| 880 | mlog_exit_void(); | 888 | mlog_exit_void(); |
| @@ -1134,6 +1142,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | |||
| 1134 | mlog_entry_void(); | 1142 | mlog_entry_void(); |
| 1135 | spin_lock_irqsave(&lockres->l_lock, flags); | 1143 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 1136 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 1144 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
| 1145 | lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | ||
| 1137 | if (convert) | 1146 | if (convert) |
| 1138 | lockres->l_action = OCFS2_AST_INVALID; | 1147 | lockres->l_action = OCFS2_AST_INVALID; |
| 1139 | else | 1148 | else |
| @@ -1324,13 +1333,13 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, | |||
| 1324 | again: | 1333 | again: |
| 1325 | wait = 0; | 1334 | wait = 0; |
| 1326 | 1335 | ||
| 1336 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
| 1337 | |||
| 1327 | if (catch_signals && signal_pending(current)) { | 1338 | if (catch_signals && signal_pending(current)) { |
| 1328 | ret = -ERESTARTSYS; | 1339 | ret = -ERESTARTSYS; |
| 1329 | goto out; | 1340 | goto unlock; |
| 1330 | } | 1341 | } |
| 1331 | 1342 | ||
| 1332 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
| 1333 | |||
| 1334 | mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, | 1343 | mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, |
| 1335 | "Cluster lock called on freeing lockres %s! flags " | 1344 | "Cluster lock called on freeing lockres %s! flags " |
| 1336 | "0x%lx\n", lockres->l_name, lockres->l_flags); | 1345 | "0x%lx\n", lockres->l_name, lockres->l_flags); |
| @@ -1347,6 +1356,25 @@ again: | |||
| 1347 | goto unlock; | 1356 | goto unlock; |
| 1348 | } | 1357 | } |
| 1349 | 1358 | ||
| 1359 | if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { | ||
| 1360 | /* | ||
| 1361 | * We've upconverted. If the lock now has a level we can | ||
| 1362 | * work with, we take it. If, however, the lock is not at the | ||
| 1363 | * required level, we go thru the full cycle. One way this could | ||
| 1364 | * happen is if a process requesting an upconvert to PR is | ||
| 1365 | * closely followed by another requesting upconvert to an EX. | ||
| 1366 | * If the process requesting EX lands here, we want it to | ||
| 1367 | * continue attempting to upconvert and let the process | ||
| 1368 | * requesting PR take the lock. | ||
| 1369 | * If multiple processes request upconvert to PR, the first one | ||
| 1370 | * here will take the lock. The others will have to go thru the | ||
| 1371 | * OCFS2_LOCK_BLOCKED check to ensure that there is no pending | ||
| 1372 | * downconvert request. | ||
| 1373 | */ | ||
| 1374 | if (level <= lockres->l_level) | ||
| 1375 | goto update_holders; | ||
| 1376 | } | ||
| 1377 | |||
| 1350 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED && | 1378 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED && |
| 1351 | !ocfs2_may_continue_on_blocked_lock(lockres, level)) { | 1379 | !ocfs2_may_continue_on_blocked_lock(lockres, level)) { |
| 1352 | /* is the lock is currently blocked on behalf of | 1380 | /* is the lock is currently blocked on behalf of |
| @@ -1417,11 +1445,14 @@ again: | |||
| 1417 | goto again; | 1445 | goto again; |
| 1418 | } | 1446 | } |
| 1419 | 1447 | ||
| 1448 | update_holders: | ||
| 1420 | /* Ok, if we get here then we're good to go. */ | 1449 | /* Ok, if we get here then we're good to go. */ |
| 1421 | ocfs2_inc_holders(lockres, level); | 1450 | ocfs2_inc_holders(lockres, level); |
| 1422 | 1451 | ||
| 1423 | ret = 0; | 1452 | ret = 0; |
| 1424 | unlock: | 1453 | unlock: |
| 1454 | lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | ||
| 1455 | |||
| 1425 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1456 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 1426 | out: | 1457 | out: |
| 1427 | /* | 1458 | /* |
| @@ -3402,6 +3433,18 @@ recheck: | |||
| 3402 | goto leave; | 3433 | goto leave; |
| 3403 | } | 3434 | } |
| 3404 | 3435 | ||
| 3436 | /* | ||
| 3437 | * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is | ||
| 3438 | * set when the ast is received for an upconvert just before the | ||
| 3439 | * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast | ||
| 3440 | * on the heels of the ast, we want to delay the downconvert just | ||
| 3441 | * enough to allow the up requestor to do its task. Because this | ||
| 3442 | * lock is in the blocked queue, the lock will be downconverted | ||
| 3443 | * as soon as the requestor is done with the lock. | ||
| 3444 | */ | ||
| 3445 | if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) | ||
| 3446 | goto leave_requeue; | ||
| 3447 | |||
| 3405 | /* if we're blocking an exclusive and we have *any* holders, | 3448 | /* if we're blocking an exclusive and we have *any* holders, |
| 3406 | * then requeue. */ | 3449 | * then requeue. */ |
| 3407 | if ((lockres->l_blocking == DLM_LOCK_EX) | 3450 | if ((lockres->l_blocking == DLM_LOCK_EX) |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 9362eea7424b..740f448041e2 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
| @@ -136,6 +136,10 @@ enum ocfs2_unlock_action { | |||
| 136 | #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a | 136 | #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a |
| 137 | call to dlm_lock. Only | 137 | call to dlm_lock. Only |
| 138 | exists with BUSY set. */ | 138 | exists with BUSY set. */ |
| 139 | #define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread | ||
| 140 | * from downconverting | ||
| 141 | * before the upconvert | ||
| 142 | * has completed */ | ||
| 139 | 143 | ||
| 140 | struct ocfs2_lock_res_ops; | 144 | struct ocfs2_lock_res_ops; |
| 141 | 145 | ||
