aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/transaction.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2009-07-13 16:16:20 -0400
committerTheodore Ts'o <tytso@mit.edu>2009-07-13 16:16:20 -0400
commitf91d1d04171026e56c7e343ee3cdcc801dd85cfb (patch)
tree282341868e06cb09ecd6c0838ea027feec514c2c /fs/jbd2/transaction.c
parent3e03f9ca6a2599db1823bb0ea24e0845219a0e69 (diff)
jbd2: Fix a race between checkpointing code and journal_get_write_access()
The following race can happen: CPU1 CPU2 checkpointing code checks the buffer, adds it to an array for writeback do_get_write_access() ... lock_buffer() unlock_buffer() flush_batch() submits the buffer for IO __jbd2_journal_file_buffer() So a buffer under writeout is returned from do_get_write_access(). Since the filesystem code relies on the fact that journaled buffers cannot be written out, it does not take the buffer lock and so it can modify buffer while it is under writeout. That can lead to a filesystem corruption if we crash at the right moment. We fix the problem by clearing the buffer dirty bit under buffer_lock even if the buffer is on BJ_None list. Actually, we clear the dirty bit regardless the list the buffer is in and warn about the fact if the buffer is already journalled. Thanks for spotting the problem goes to dingdinghua <dingdinghua85@gmail.com>. Reported-by: dingdinghua <dingdinghua85@gmail.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r--fs/jbd2/transaction.c68
1 files changed, 35 insertions, 33 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 494501edba6b..6213ac728f30 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -499,34 +499,15 @@ void jbd2_journal_unlock_updates (journal_t *journal)
499 wake_up(&journal->j_wait_transaction_locked); 499 wake_up(&journal->j_wait_transaction_locked);
500} 500}
501 501
502/* 502static void warn_dirty_buffer(struct buffer_head *bh)
503 * Report any unexpected dirty buffers which turn up. Normally those
504 * indicate an error, but they can occur if the user is running (say)
505 * tune2fs to modify the live filesystem, so we need the option of
506 * continuing as gracefully as possible. #
507 *
508 * The caller should already hold the journal lock and
509 * j_list_lock spinlock: most callers will need those anyway
510 * in order to probe the buffer's journaling state safely.
511 */
512static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
513{ 503{
514 int jlist; 504 char b[BDEVNAME_SIZE];
515
516 /* If this buffer is one which might reasonably be dirty
517 * --- ie. data, or not part of this journal --- then
518 * we're OK to leave it alone, but otherwise we need to
519 * move the dirty bit to the journal's own internal
520 * JBDDirty bit. */
521 jlist = jh->b_jlist;
522 505
523 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 506 printk(KERN_WARNING
524 jlist == BJ_Shadow || jlist == BJ_Forget) { 507 "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
525 struct buffer_head *bh = jh2bh(jh); 508 "There's a risk of filesystem corruption in case of system "
526 509 "crash.\n",
527 if (test_clear_buffer_dirty(bh)) 510 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
528 set_buffer_jbddirty(bh);
529 }
530} 511}
531 512
532/* 513/*
@@ -593,14 +574,16 @@ repeat:
593 if (jh->b_next_transaction) 574 if (jh->b_next_transaction)
594 J_ASSERT_JH(jh, jh->b_next_transaction == 575 J_ASSERT_JH(jh, jh->b_next_transaction ==
595 transaction); 576 transaction);
577 warn_dirty_buffer(bh);
596 } 578 }
597 /* 579 /*
598 * In any case we need to clean the dirty flag and we must 580 * In any case we need to clean the dirty flag and we must
599 * do it under the buffer lock to be sure we don't race 581 * do it under the buffer lock to be sure we don't race
600 * with running write-out. 582 * with running write-out.
601 */ 583 */
602 JBUFFER_TRACE(jh, "Unexpected dirty buffer"); 584 JBUFFER_TRACE(jh, "Journalling dirty buffer");
603 jbd_unexpected_dirty_buffer(jh); 585 clear_buffer_dirty(bh);
586 set_buffer_jbddirty(bh);
604 } 587 }
605 588
606 unlock_buffer(bh); 589 unlock_buffer(bh);
@@ -843,6 +826,15 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
843 J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); 826 J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
844 827
845 if (jh->b_transaction == NULL) { 828 if (jh->b_transaction == NULL) {
829 /*
830 * Previous jbd2_journal_forget() could have left the buffer
831 * with jbddirty bit set because it was being committed. When
832 * the commit finished, we've filed the buffer for
833 * checkpointing and marked it dirty. Now we are reallocating
834 * the buffer so the transaction freeing it must have
835 * committed and so it's safe to clear the dirty bit.
836 */
837 clear_buffer_dirty(jh2bh(jh));
846 jh->b_transaction = transaction; 838 jh->b_transaction = transaction;
847 839
848 /* first access by this transaction */ 840 /* first access by this transaction */
@@ -1644,8 +1636,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1644 1636
1645 if (jh->b_cp_transaction) { 1637 if (jh->b_cp_transaction) {
1646 JBUFFER_TRACE(jh, "on running+cp transaction"); 1638 JBUFFER_TRACE(jh, "on running+cp transaction");
1639 /*
1640 * We don't want to write the buffer anymore, clear the
1641 * bit so that we don't confuse checks in
1642 * __journal_file_buffer
1643 */
1644 clear_buffer_dirty(bh);
1647 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); 1645 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1648 clear_buffer_jbddirty(bh);
1649 may_free = 0; 1646 may_free = 0;
1650 } else { 1647 } else {
1651 JBUFFER_TRACE(jh, "on running transaction"); 1648 JBUFFER_TRACE(jh, "on running transaction");
@@ -1896,12 +1893,17 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
1896 if (jh->b_transaction && jh->b_jlist == jlist) 1893 if (jh->b_transaction && jh->b_jlist == jlist)
1897 return; 1894 return;
1898 1895
1899 /* The following list of buffer states needs to be consistent
1900 * with __jbd_unexpected_dirty_buffer()'s handling of dirty
1901 * state. */
1902
1903 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 1896 if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
1904 jlist == BJ_Shadow || jlist == BJ_Forget) { 1897 jlist == BJ_Shadow || jlist == BJ_Forget) {
1898 /*
1899 * For metadata buffers, we track dirty bit in buffer_jbddirty
1900 * instead of buffer_dirty. We should not see a dirty bit set
1901 * here because we clear it in do_get_write_access but e.g.
1902 * tune2fs can modify the sb and set the dirty bit at any time
1903 * so we try to gracefully handle that.
1904 */
1905 if (buffer_dirty(bh))
1906 warn_dirty_buffer(bh);
1905 if (test_clear_buffer_dirty(bh) || 1907 if (test_clear_buffer_dirty(bh) ||
1906 test_clear_buffer_jbddirty(bh)) 1908 test_clear_buffer_jbddirty(bh))
1907 was_dirty = 1; 1909 was_dirty = 1;