diff options
author | Sunil Mushran <sunil.mushran@oracle.com> | 2010-02-26 22:42:44 -0500 |
---|---|---|
committer | Joel Becker <joel.becker@oracle.com> | 2010-02-27 22:57:06 -0500 |
commit | 9b915181af0a99fe94ef0152e6a4ca9990c3b6d0 (patch) | |
tree | ecbca6ed6fd749c6130a7a1f28a9fe7f705ce029 /fs/ocfs2 | |
parent | bc9838c4d44a1713ab1bf24aa6675bc3a02b6a88 (diff) |
ocfs2: Use a separate masklog for AST and BASTs
This patch adds a new masklog and uses it allow tracing ASTs and BASTs
in the dlmglue layer. This has been found to be very useful in debugging
cluster locking issues.
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/cluster/masklog.c | 1 | ||||
-rw-r--r-- | fs/ocfs2/cluster/masklog.h | 1 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.c | 90 |
3 files changed, 67 insertions, 25 deletions
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index 1cd2934de615..b39da877b12f 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c | |||
@@ -112,6 +112,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = { | |||
112 | define_mask(XATTR), | 112 | define_mask(XATTR), |
113 | define_mask(QUOTA), | 113 | define_mask(QUOTA), |
114 | define_mask(REFCOUNT), | 114 | define_mask(REFCOUNT), |
115 | define_mask(BASTS), | ||
115 | define_mask(ERROR), | 116 | define_mask(ERROR), |
116 | define_mask(NOTICE), | 117 | define_mask(NOTICE), |
117 | define_mask(KTHREAD), | 118 | define_mask(KTHREAD), |
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 0442366b3060..3dfddbec32f2 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
@@ -114,6 +114,7 @@ | |||
114 | #define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ | 114 | #define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ |
115 | #define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ | 115 | #define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ |
116 | #define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */ | 116 | #define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */ |
117 | #define ML_BASTS 0x0000001000000000ULL /* dlmglue asts and basts */ | ||
117 | /* bits that are infrequently given and frequently matched in the high word */ | 118 | /* bits that are infrequently given and frequently matched in the high word */ |
118 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ | 119 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ |
119 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ | 120 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index d009d7744d63..8298608d4165 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -932,6 +932,10 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, | |||
932 | lockres->l_blocking = level; | 932 | lockres->l_blocking = level; |
933 | } | 933 | } |
934 | 934 | ||
935 | mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n", | ||
936 | lockres->l_name, level, lockres->l_level, lockres->l_blocking, | ||
937 | needs_downconvert); | ||
938 | |||
935 | if (needs_downconvert) | 939 | if (needs_downconvert) |
936 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | 940 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); |
937 | 941 | ||
@@ -1054,8 +1058,8 @@ static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level) | |||
1054 | 1058 | ||
1055 | BUG_ON(level <= DLM_LOCK_NL); | 1059 | BUG_ON(level <= DLM_LOCK_NL); |
1056 | 1060 | ||
1057 | mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", | 1061 | mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, " |
1058 | lockres->l_name, level, lockres->l_level, | 1062 | "type %s\n", lockres->l_name, level, lockres->l_level, |
1059 | ocfs2_lock_type_string(lockres->l_type)); | 1063 | ocfs2_lock_type_string(lockres->l_type)); |
1060 | 1064 | ||
1061 | /* | 1065 | /* |
@@ -1099,6 +1103,10 @@ static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb) | |||
1099 | return; | 1103 | return; |
1100 | } | 1104 | } |
1101 | 1105 | ||
1106 | mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, " | ||
1107 | "level %d => %d\n", lockres->l_name, lockres->l_action, | ||
1108 | lockres->l_unlock_action, lockres->l_level, lockres->l_requested); | ||
1109 | |||
1102 | switch(lockres->l_action) { | 1110 | switch(lockres->l_action) { |
1103 | case OCFS2_AST_ATTACH: | 1111 | case OCFS2_AST_ATTACH: |
1104 | ocfs2_generic_handle_attach_action(lockres); | 1112 | ocfs2_generic_handle_attach_action(lockres); |
@@ -1111,8 +1119,8 @@ static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb) | |||
1111 | ocfs2_generic_handle_downconvert_action(lockres); | 1119 | ocfs2_generic_handle_downconvert_action(lockres); |
1112 | break; | 1120 | break; |
1113 | default: | 1121 | default: |
1114 | mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " | 1122 | mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, " |
1115 | "lockres flags = 0x%lx, unlock action: %u\n", | 1123 | "flags 0x%lx, unlock: %u\n", |
1116 | lockres->l_name, lockres->l_action, lockres->l_flags, | 1124 | lockres->l_name, lockres->l_action, lockres->l_flags, |
1117 | lockres->l_unlock_action); | 1125 | lockres->l_unlock_action); |
1118 | BUG(); | 1126 | BUG(); |
@@ -1145,8 +1153,8 @@ static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error) | |||
1145 | 1153 | ||
1146 | mlog_entry_void(); | 1154 | mlog_entry_void(); |
1147 | 1155 | ||
1148 | mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name, | 1156 | mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n", |
1149 | lockres->l_unlock_action); | 1157 | lockres->l_name, lockres->l_unlock_action); |
1150 | 1158 | ||
1151 | spin_lock_irqsave(&lockres->l_lock, flags); | 1159 | spin_lock_irqsave(&lockres->l_lock, flags); |
1152 | if (error) { | 1160 | if (error) { |
@@ -1497,7 +1505,7 @@ again: | |||
1497 | BUG_ON(level == DLM_LOCK_IV); | 1505 | BUG_ON(level == DLM_LOCK_IV); |
1498 | BUG_ON(level == DLM_LOCK_NL); | 1506 | BUG_ON(level == DLM_LOCK_NL); |
1499 | 1507 | ||
1500 | mlog(0, "lock %s, convert from %d to level = %d\n", | 1508 | mlog(ML_BASTS, "lockres %s, convert from %d to %d\n", |
1501 | lockres->l_name, lockres->l_level, level); | 1509 | lockres->l_name, lockres->l_level, level); |
1502 | 1510 | ||
1503 | /* call dlm_lock to upgrade lock now */ | 1511 | /* call dlm_lock to upgrade lock now */ |
@@ -3314,13 +3322,20 @@ static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | |||
3314 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); | 3322 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); |
3315 | 3323 | ||
3316 | if (lockres->l_level <= new_level) { | 3324 | if (lockres->l_level <= new_level) { |
3317 | mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n", | 3325 | mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, " |
3318 | lockres->l_level, new_level); | 3326 | "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, " |
3327 | "block %d, pgen %d\n", lockres->l_name, lockres->l_level, | ||
3328 | new_level, list_empty(&lockres->l_blocked_list), | ||
3329 | list_empty(&lockres->l_mask_waiters), lockres->l_type, | ||
3330 | lockres->l_flags, lockres->l_ro_holders, | ||
3331 | lockres->l_ex_holders, lockres->l_action, | ||
3332 | lockres->l_unlock_action, lockres->l_requested, | ||
3333 | lockres->l_blocking, lockres->l_pending_gen); | ||
3319 | BUG(); | 3334 | BUG(); |
3320 | } | 3335 | } |
3321 | 3336 | ||
3322 | mlog(0, "lock %s, new_level = %d, l_blocking = %d\n", | 3337 | mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n", |
3323 | lockres->l_name, new_level, lockres->l_blocking); | 3338 | lockres->l_name, lockres->l_level, new_level, lockres->l_blocking); |
3324 | 3339 | ||
3325 | lockres->l_action = OCFS2_AST_DOWNCONVERT; | 3340 | lockres->l_action = OCFS2_AST_DOWNCONVERT; |
3326 | lockres->l_requested = new_level; | 3341 | lockres->l_requested = new_level; |
@@ -3339,6 +3354,9 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | |||
3339 | 3354 | ||
3340 | mlog_entry_void(); | 3355 | mlog_entry_void(); |
3341 | 3356 | ||
3357 | mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, | ||
3358 | lockres->l_level, new_level); | ||
3359 | |||
3342 | if (lvb) | 3360 | if (lvb) |
3343 | dlm_flags |= DLM_LKF_VALBLK; | 3361 | dlm_flags |= DLM_LKF_VALBLK; |
3344 | 3362 | ||
@@ -3368,14 +3386,12 @@ static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | |||
3368 | assert_spin_locked(&lockres->l_lock); | 3386 | assert_spin_locked(&lockres->l_lock); |
3369 | 3387 | ||
3370 | mlog_entry_void(); | 3388 | mlog_entry_void(); |
3371 | mlog(0, "lock %s\n", lockres->l_name); | ||
3372 | 3389 | ||
3373 | if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { | 3390 | if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { |
3374 | /* If we're already trying to cancel a lock conversion | 3391 | /* If we're already trying to cancel a lock conversion |
3375 | * then just drop the spinlock and allow the caller to | 3392 | * then just drop the spinlock and allow the caller to |
3376 | * requeue this lock. */ | 3393 | * requeue this lock. */ |
3377 | 3394 | mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name); | |
3378 | mlog(0, "Lockres %s, skip convert\n", lockres->l_name); | ||
3379 | return 0; | 3395 | return 0; |
3380 | } | 3396 | } |
3381 | 3397 | ||
@@ -3390,6 +3406,8 @@ static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | |||
3390 | "lock %s, invalid flags: 0x%lx\n", | 3406 | "lock %s, invalid flags: 0x%lx\n", |
3391 | lockres->l_name, lockres->l_flags); | 3407 | lockres->l_name, lockres->l_flags); |
3392 | 3408 | ||
3409 | mlog(ML_BASTS, "lockres %s\n", lockres->l_name); | ||
3410 | |||
3393 | return 1; | 3411 | return 1; |
3394 | } | 3412 | } |
3395 | 3413 | ||
@@ -3399,7 +3417,6 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb, | |||
3399 | int ret; | 3417 | int ret; |
3400 | 3418 | ||
3401 | mlog_entry_void(); | 3419 | mlog_entry_void(); |
3402 | mlog(0, "lock %s\n", lockres->l_name); | ||
3403 | 3420 | ||
3404 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, | 3421 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, |
3405 | DLM_LKF_CANCEL); | 3422 | DLM_LKF_CANCEL); |
@@ -3408,7 +3425,7 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb, | |||
3408 | ocfs2_recover_from_dlm_error(lockres, 0); | 3425 | ocfs2_recover_from_dlm_error(lockres, 0); |
3409 | } | 3426 | } |
3410 | 3427 | ||
3411 | mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name); | 3428 | mlog(ML_BASTS, "lockres %s\n", lockres->l_name); |
3412 | 3429 | ||
3413 | mlog_exit(ret); | 3430 | mlog_exit(ret); |
3414 | return ret; | 3431 | return ret; |
@@ -3465,8 +3482,11 @@ recheck: | |||
3465 | * at the same time they set OCFS2_DLM_BUSY. They must | 3482 | * at the same time they set OCFS2_DLM_BUSY. They must |
3466 | * clear OCFS2_DLM_PENDING after dlm_lock() returns. | 3483 | * clear OCFS2_DLM_PENDING after dlm_lock() returns. |
3467 | */ | 3484 | */ |
3468 | if (lockres->l_flags & OCFS2_LOCK_PENDING) | 3485 | if (lockres->l_flags & OCFS2_LOCK_PENDING) { |
3486 | mlog(ML_BASTS, "lockres %s, ReQ: Pending\n", | ||
3487 | lockres->l_name); | ||
3469 | goto leave_requeue; | 3488 | goto leave_requeue; |
3489 | } | ||
3470 | 3490 | ||
3471 | ctl->requeue = 1; | 3491 | ctl->requeue = 1; |
3472 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | 3492 | ret = ocfs2_prepare_cancel_convert(osb, lockres); |
@@ -3498,6 +3518,7 @@ recheck: | |||
3498 | */ | 3518 | */ |
3499 | if (lockres->l_level == DLM_LOCK_NL) { | 3519 | if (lockres->l_level == DLM_LOCK_NL) { |
3500 | BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); | 3520 | BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); |
3521 | mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name); | ||
3501 | lockres->l_blocking = DLM_LOCK_NL; | 3522 | lockres->l_blocking = DLM_LOCK_NL; |
3502 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); | 3523 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); |
3503 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3524 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
@@ -3507,28 +3528,41 @@ recheck: | |||
3507 | /* if we're blocking an exclusive and we have *any* holders, | 3528 | /* if we're blocking an exclusive and we have *any* holders, |
3508 | * then requeue. */ | 3529 | * then requeue. */ |
3509 | if ((lockres->l_blocking == DLM_LOCK_EX) | 3530 | if ((lockres->l_blocking == DLM_LOCK_EX) |
3510 | && (lockres->l_ex_holders || lockres->l_ro_holders)) | 3531 | && (lockres->l_ex_holders || lockres->l_ro_holders)) { |
3532 | mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n", | ||
3533 | lockres->l_name, lockres->l_ex_holders, | ||
3534 | lockres->l_ro_holders); | ||
3511 | goto leave_requeue; | 3535 | goto leave_requeue; |
3536 | } | ||
3512 | 3537 | ||
3513 | /* If it's a PR we're blocking, then only | 3538 | /* If it's a PR we're blocking, then only |
3514 | * requeue if we've got any EX holders */ | 3539 | * requeue if we've got any EX holders */ |
3515 | if (lockres->l_blocking == DLM_LOCK_PR && | 3540 | if (lockres->l_blocking == DLM_LOCK_PR && |
3516 | lockres->l_ex_holders) | 3541 | lockres->l_ex_holders) { |
3542 | mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n", | ||
3543 | lockres->l_name, lockres->l_ex_holders); | ||
3517 | goto leave_requeue; | 3544 | goto leave_requeue; |
3545 | } | ||
3518 | 3546 | ||
3519 | /* | 3547 | /* |
3520 | * Can we get a lock in this state if the holder counts are | 3548 | * Can we get a lock in this state if the holder counts are |
3521 | * zero? The meta data unblock code used to check this. | 3549 | * zero? The meta data unblock code used to check this. |
3522 | */ | 3550 | */ |
3523 | if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 3551 | if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) |
3524 | && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) | 3552 | && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) { |
3553 | mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n", | ||
3554 | lockres->l_name); | ||
3525 | goto leave_requeue; | 3555 | goto leave_requeue; |
3556 | } | ||
3526 | 3557 | ||
3527 | new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); | 3558 | new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); |
3528 | 3559 | ||
3529 | if (lockres->l_ops->check_downconvert | 3560 | if (lockres->l_ops->check_downconvert |
3530 | && !lockres->l_ops->check_downconvert(lockres, new_level)) | 3561 | && !lockres->l_ops->check_downconvert(lockres, new_level)) { |
3562 | mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n", | ||
3563 | lockres->l_name); | ||
3531 | goto leave_requeue; | 3564 | goto leave_requeue; |
3565 | } | ||
3532 | 3566 | ||
3533 | /* If we get here, then we know that there are no more | 3567 | /* If we get here, then we know that there are no more |
3534 | * incompatible holders (and anyone asking for an incompatible | 3568 | * incompatible holders (and anyone asking for an incompatible |
@@ -3546,13 +3580,19 @@ recheck: | |||
3546 | 3580 | ||
3547 | ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); | 3581 | ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); |
3548 | 3582 | ||
3549 | if (ctl->unblock_action == UNBLOCK_STOP_POST) | 3583 | if (ctl->unblock_action == UNBLOCK_STOP_POST) { |
3584 | mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n", | ||
3585 | lockres->l_name); | ||
3550 | goto leave; | 3586 | goto leave; |
3587 | } | ||
3551 | 3588 | ||
3552 | spin_lock_irqsave(&lockres->l_lock, flags); | 3589 | spin_lock_irqsave(&lockres->l_lock, flags); |
3553 | if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { | 3590 | if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { |
3554 | /* If this changed underneath us, then we can't drop | 3591 | /* If this changed underneath us, then we can't drop |
3555 | * it just yet. */ | 3592 | * it just yet. */ |
3593 | mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, " | ||
3594 | "Recheck\n", lockres->l_name, blocking, | ||
3595 | lockres->l_blocking, level, lockres->l_level); | ||
3556 | goto recheck; | 3596 | goto recheck; |
3557 | } | 3597 | } |
3558 | 3598 | ||
@@ -3963,7 +4003,7 @@ static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | |||
3963 | BUG_ON(!lockres); | 4003 | BUG_ON(!lockres); |
3964 | BUG_ON(!lockres->l_ops); | 4004 | BUG_ON(!lockres->l_ops); |
3965 | 4005 | ||
3966 | mlog(0, "lockres %s blocked.\n", lockres->l_name); | 4006 | mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name); |
3967 | 4007 | ||
3968 | /* Detect whether a lock has been marked as going away while | 4008 | /* Detect whether a lock has been marked as going away while |
3969 | * the downconvert thread was processing other things. A lock can | 4009 | * the downconvert thread was processing other things. A lock can |
@@ -3986,7 +4026,7 @@ unqueue: | |||
3986 | } else | 4026 | } else |
3987 | ocfs2_schedule_blocked_lock(osb, lockres); | 4027 | ocfs2_schedule_blocked_lock(osb, lockres); |
3988 | 4028 | ||
3989 | mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, | 4029 | mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name, |
3990 | ctl.requeue ? "yes" : "no"); | 4030 | ctl.requeue ? "yes" : "no"); |
3991 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 4031 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3992 | 4032 | ||
@@ -4008,7 +4048,7 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | |||
4008 | /* Do not schedule a lock for downconvert when it's on | 4048 | /* Do not schedule a lock for downconvert when it's on |
4009 | * the way to destruction - any nodes wanting access | 4049 | * the way to destruction - any nodes wanting access |
4010 | * to the resource will get it soon. */ | 4050 | * to the resource will get it soon. */ |
4011 | mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n", | 4051 | mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n", |
4012 | lockres->l_name, lockres->l_flags); | 4052 | lockres->l_name, lockres->l_flags); |
4013 | return; | 4053 | return; |
4014 | } | 4054 | } |