aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/dlmglue.c
diff options
context:
space:
mode:
authorSunil Mushran <sunil.mushran@oracle.com>2010-02-26 22:42:44 -0500
committerJoel Becker <joel.becker@oracle.com>2010-02-27 22:57:06 -0500
commit9b915181af0a99fe94ef0152e6a4ca9990c3b6d0 (patch)
treeecbca6ed6fd749c6130a7a1f28a9fe7f705ce029 /fs/ocfs2/dlmglue.c
parentbc9838c4d44a1713ab1bf24aa6675bc3a02b6a88 (diff)
ocfs2: Use a separate masklog for AST and BASTs
This patch adds a new masklog and uses it allow tracing ASTs and BASTs in the dlmglue layer. This has been found to be very useful in debugging cluster locking issues. Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs/ocfs2/dlmglue.c')
-rw-r--r--fs/ocfs2/dlmglue.c90
1 files changed, 65 insertions, 25 deletions
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index d009d7744d63..8298608d4165 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -932,6 +932,10 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
932 lockres->l_blocking = level; 932 lockres->l_blocking = level;
933 } 933 }
934 934
935 mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n",
936 lockres->l_name, level, lockres->l_level, lockres->l_blocking,
937 needs_downconvert);
938
935 if (needs_downconvert) 939 if (needs_downconvert)
936 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 940 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
937 941
@@ -1054,8 +1058,8 @@ static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level)
1054 1058
1055 BUG_ON(level <= DLM_LOCK_NL); 1059 BUG_ON(level <= DLM_LOCK_NL);
1056 1060
1057 mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", 1061 mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, "
1058 lockres->l_name, level, lockres->l_level, 1062 "type %s\n", lockres->l_name, level, lockres->l_level,
1059 ocfs2_lock_type_string(lockres->l_type)); 1063 ocfs2_lock_type_string(lockres->l_type));
1060 1064
1061 /* 1065 /*
@@ -1099,6 +1103,10 @@ static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb)
1099 return; 1103 return;
1100 } 1104 }
1101 1105
1106 mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, "
1107 "level %d => %d\n", lockres->l_name, lockres->l_action,
1108 lockres->l_unlock_action, lockres->l_level, lockres->l_requested);
1109
1102 switch(lockres->l_action) { 1110 switch(lockres->l_action) {
1103 case OCFS2_AST_ATTACH: 1111 case OCFS2_AST_ATTACH:
1104 ocfs2_generic_handle_attach_action(lockres); 1112 ocfs2_generic_handle_attach_action(lockres);
@@ -1111,8 +1119,8 @@ static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb)
1111 ocfs2_generic_handle_downconvert_action(lockres); 1119 ocfs2_generic_handle_downconvert_action(lockres);
1112 break; 1120 break;
1113 default: 1121 default:
1114 mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " 1122 mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, "
1115 "lockres flags = 0x%lx, unlock action: %u\n", 1123 "flags 0x%lx, unlock: %u\n",
1116 lockres->l_name, lockres->l_action, lockres->l_flags, 1124 lockres->l_name, lockres->l_action, lockres->l_flags,
1117 lockres->l_unlock_action); 1125 lockres->l_unlock_action);
1118 BUG(); 1126 BUG();
@@ -1145,8 +1153,8 @@ static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1145 1153
1146 mlog_entry_void(); 1154 mlog_entry_void();
1147 1155
1148 mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name, 1156 mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n",
1149 lockres->l_unlock_action); 1157 lockres->l_name, lockres->l_unlock_action);
1150 1158
1151 spin_lock_irqsave(&lockres->l_lock, flags); 1159 spin_lock_irqsave(&lockres->l_lock, flags);
1152 if (error) { 1160 if (error) {
@@ -1497,7 +1505,7 @@ again:
1497 BUG_ON(level == DLM_LOCK_IV); 1505 BUG_ON(level == DLM_LOCK_IV);
1498 BUG_ON(level == DLM_LOCK_NL); 1506 BUG_ON(level == DLM_LOCK_NL);
1499 1507
1500 mlog(0, "lock %s, convert from %d to level = %d\n", 1508 mlog(ML_BASTS, "lockres %s, convert from %d to %d\n",
1501 lockres->l_name, lockres->l_level, level); 1509 lockres->l_name, lockres->l_level, level);
1502 1510
1503 /* call dlm_lock to upgrade lock now */ 1511 /* call dlm_lock to upgrade lock now */
@@ -3314,13 +3322,20 @@ static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
3314 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 3322 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
3315 3323
3316 if (lockres->l_level <= new_level) { 3324 if (lockres->l_level <= new_level) {
3317 mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n", 3325 mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, "
3318 lockres->l_level, new_level); 3326 "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, "
3327 "block %d, pgen %d\n", lockres->l_name, lockres->l_level,
3328 new_level, list_empty(&lockres->l_blocked_list),
3329 list_empty(&lockres->l_mask_waiters), lockres->l_type,
3330 lockres->l_flags, lockres->l_ro_holders,
3331 lockres->l_ex_holders, lockres->l_action,
3332 lockres->l_unlock_action, lockres->l_requested,
3333 lockres->l_blocking, lockres->l_pending_gen);
3319 BUG(); 3334 BUG();
3320 } 3335 }
3321 3336
3322 mlog(0, "lock %s, new_level = %d, l_blocking = %d\n", 3337 mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n",
3323 lockres->l_name, new_level, lockres->l_blocking); 3338 lockres->l_name, lockres->l_level, new_level, lockres->l_blocking);
3324 3339
3325 lockres->l_action = OCFS2_AST_DOWNCONVERT; 3340 lockres->l_action = OCFS2_AST_DOWNCONVERT;
3326 lockres->l_requested = new_level; 3341 lockres->l_requested = new_level;
@@ -3339,6 +3354,9 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3339 3354
3340 mlog_entry_void(); 3355 mlog_entry_void();
3341 3356
3357 mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
3358 lockres->l_level, new_level);
3359
3342 if (lvb) 3360 if (lvb)
3343 dlm_flags |= DLM_LKF_VALBLK; 3361 dlm_flags |= DLM_LKF_VALBLK;
3344 3362
@@ -3368,14 +3386,12 @@ static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3368 assert_spin_locked(&lockres->l_lock); 3386 assert_spin_locked(&lockres->l_lock);
3369 3387
3370 mlog_entry_void(); 3388 mlog_entry_void();
3371 mlog(0, "lock %s\n", lockres->l_name);
3372 3389
3373 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 3390 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
3374 /* If we're already trying to cancel a lock conversion 3391 /* If we're already trying to cancel a lock conversion
3375 * then just drop the spinlock and allow the caller to 3392 * then just drop the spinlock and allow the caller to
3376 * requeue this lock. */ 3393 * requeue this lock. */
3377 3394 mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name);
3378 mlog(0, "Lockres %s, skip convert\n", lockres->l_name);
3379 return 0; 3395 return 0;
3380 } 3396 }
3381 3397
@@ -3390,6 +3406,8 @@ static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3390 "lock %s, invalid flags: 0x%lx\n", 3406 "lock %s, invalid flags: 0x%lx\n",
3391 lockres->l_name, lockres->l_flags); 3407 lockres->l_name, lockres->l_flags);
3392 3408
3409 mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
3410
3393 return 1; 3411 return 1;
3394} 3412}
3395 3413
@@ -3399,7 +3417,6 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3399 int ret; 3417 int ret;
3400 3418
3401 mlog_entry_void(); 3419 mlog_entry_void();
3402 mlog(0, "lock %s\n", lockres->l_name);
3403 3420
3404 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 3421 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
3405 DLM_LKF_CANCEL); 3422 DLM_LKF_CANCEL);
@@ -3408,7 +3425,7 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3408 ocfs2_recover_from_dlm_error(lockres, 0); 3425 ocfs2_recover_from_dlm_error(lockres, 0);
3409 } 3426 }
3410 3427
3411 mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name); 3428 mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
3412 3429
3413 mlog_exit(ret); 3430 mlog_exit(ret);
3414 return ret; 3431 return ret;
@@ -3465,8 +3482,11 @@ recheck:
3465 * at the same time they set OCFS2_DLM_BUSY. They must 3482 * at the same time they set OCFS2_DLM_BUSY. They must
3466 * clear OCFS2_DLM_PENDING after dlm_lock() returns. 3483 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
3467 */ 3484 */
3468 if (lockres->l_flags & OCFS2_LOCK_PENDING) 3485 if (lockres->l_flags & OCFS2_LOCK_PENDING) {
3486 mlog(ML_BASTS, "lockres %s, ReQ: Pending\n",
3487 lockres->l_name);
3469 goto leave_requeue; 3488 goto leave_requeue;
3489 }
3470 3490
3471 ctl->requeue = 1; 3491 ctl->requeue = 1;
3472 ret = ocfs2_prepare_cancel_convert(osb, lockres); 3492 ret = ocfs2_prepare_cancel_convert(osb, lockres);
@@ -3498,6 +3518,7 @@ recheck:
3498 */ 3518 */
3499 if (lockres->l_level == DLM_LOCK_NL) { 3519 if (lockres->l_level == DLM_LOCK_NL) {
3500 BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); 3520 BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
3521 mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name);
3501 lockres->l_blocking = DLM_LOCK_NL; 3522 lockres->l_blocking = DLM_LOCK_NL;
3502 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 3523 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
3503 spin_unlock_irqrestore(&lockres->l_lock, flags); 3524 spin_unlock_irqrestore(&lockres->l_lock, flags);
@@ -3507,28 +3528,41 @@ recheck:
3507 /* if we're blocking an exclusive and we have *any* holders, 3528 /* if we're blocking an exclusive and we have *any* holders,
3508 * then requeue. */ 3529 * then requeue. */
3509 if ((lockres->l_blocking == DLM_LOCK_EX) 3530 if ((lockres->l_blocking == DLM_LOCK_EX)
3510 && (lockres->l_ex_holders || lockres->l_ro_holders)) 3531 && (lockres->l_ex_holders || lockres->l_ro_holders)) {
3532 mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n",
3533 lockres->l_name, lockres->l_ex_holders,
3534 lockres->l_ro_holders);
3511 goto leave_requeue; 3535 goto leave_requeue;
3536 }
3512 3537
3513 /* If it's a PR we're blocking, then only 3538 /* If it's a PR we're blocking, then only
3514 * requeue if we've got any EX holders */ 3539 * requeue if we've got any EX holders */
3515 if (lockres->l_blocking == DLM_LOCK_PR && 3540 if (lockres->l_blocking == DLM_LOCK_PR &&
3516 lockres->l_ex_holders) 3541 lockres->l_ex_holders) {
3542 mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n",
3543 lockres->l_name, lockres->l_ex_holders);
3517 goto leave_requeue; 3544 goto leave_requeue;
3545 }
3518 3546
3519 /* 3547 /*
3520 * Can we get a lock in this state if the holder counts are 3548 * Can we get a lock in this state if the holder counts are
3521 * zero? The meta data unblock code used to check this. 3549 * zero? The meta data unblock code used to check this.
3522 */ 3550 */
3523 if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 3551 if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
3524 && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) 3552 && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) {
3553 mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n",
3554 lockres->l_name);
3525 goto leave_requeue; 3555 goto leave_requeue;
3556 }
3526 3557
3527 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 3558 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
3528 3559
3529 if (lockres->l_ops->check_downconvert 3560 if (lockres->l_ops->check_downconvert
3530 && !lockres->l_ops->check_downconvert(lockres, new_level)) 3561 && !lockres->l_ops->check_downconvert(lockres, new_level)) {
3562 mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n",
3563 lockres->l_name);
3531 goto leave_requeue; 3564 goto leave_requeue;
3565 }
3532 3566
3533 /* If we get here, then we know that there are no more 3567 /* If we get here, then we know that there are no more
3534 * incompatible holders (and anyone asking for an incompatible 3568 * incompatible holders (and anyone asking for an incompatible
@@ -3546,13 +3580,19 @@ recheck:
3546 3580
3547 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3581 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
3548 3582
3549 if (ctl->unblock_action == UNBLOCK_STOP_POST) 3583 if (ctl->unblock_action == UNBLOCK_STOP_POST) {
3584 mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n",
3585 lockres->l_name);
3550 goto leave; 3586 goto leave;
3587 }
3551 3588
3552 spin_lock_irqsave(&lockres->l_lock, flags); 3589 spin_lock_irqsave(&lockres->l_lock, flags);
3553 if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { 3590 if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
3554 /* If this changed underneath us, then we can't drop 3591 /* If this changed underneath us, then we can't drop
3555 * it just yet. */ 3592 * it just yet. */
3593 mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, "
3594 "Recheck\n", lockres->l_name, blocking,
3595 lockres->l_blocking, level, lockres->l_level);
3556 goto recheck; 3596 goto recheck;
3557 } 3597 }
3558 3598
@@ -3963,7 +4003,7 @@ static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
3963 BUG_ON(!lockres); 4003 BUG_ON(!lockres);
3964 BUG_ON(!lockres->l_ops); 4004 BUG_ON(!lockres->l_ops);
3965 4005
3966 mlog(0, "lockres %s blocked.\n", lockres->l_name); 4006 mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name);
3967 4007
3968 /* Detect whether a lock has been marked as going away while 4008 /* Detect whether a lock has been marked as going away while
3969 * the downconvert thread was processing other things. A lock can 4009 * the downconvert thread was processing other things. A lock can
@@ -3986,7 +4026,7 @@ unqueue:
3986 } else 4026 } else
3987 ocfs2_schedule_blocked_lock(osb, lockres); 4027 ocfs2_schedule_blocked_lock(osb, lockres);
3988 4028
3989 mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, 4029 mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name,
3990 ctl.requeue ? "yes" : "no"); 4030 ctl.requeue ? "yes" : "no");
3991 spin_unlock_irqrestore(&lockres->l_lock, flags); 4031 spin_unlock_irqrestore(&lockres->l_lock, flags);
3992 4032
@@ -4008,7 +4048,7 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
4008 /* Do not schedule a lock for downconvert when it's on 4048 /* Do not schedule a lock for downconvert when it's on
4009 * the way to destruction - any nodes wanting access 4049 * the way to destruction - any nodes wanting access
4010 * to the resource will get it soon. */ 4050 * to the resource will get it soon. */
4011 mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n", 4051 mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n",
4012 lockres->l_name, lockres->l_flags); 4052 lockres->l_name, lockres->l_flags);
4013 return; 4053 return;
4014 } 4054 }