aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2012-04-07 05:05:19 -0400
committerJan Kara <jack@suse.cz>2012-05-15 17:34:37 -0400
commitfd2cbd4dfa3db477dd6226d387d3f1911d36a6a9 (patch)
treeb0ada946d14cdcf5db6da2d177be9590a3449e9a /fs/jbd
parent1ce8486dcc00c1e095af8d155fa4451936b89013 (diff)
jbd: Write journal superblock with WRITE_FUA after checkpointing
If journal superblock is written only in disk's caches and other transaction starts reusing space of the transaction cleaned from the log, it can happen blocks of a new transaction reach the disk before journal superblock. When power failure happens in such case, subsequent journal replay would still try to replay the old transaction but some of it's blocks may be already overwritten by the new transaction. For this reason we must use WRITE_FUA when updating log tail and we must first write new log tail to disk and update in-memory information only after that. Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/jbd')
-rw-r--r--fs/jbd/checkpoint.c23
-rw-r--r--fs/jbd/commit.c9
-rw-r--r--fs/jbd/journal.c60
3 files changed, 57 insertions, 35 deletions
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 80c85f3e087f..08c03044abdd 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -508,20 +508,19 @@ int cleanup_journal_tail(journal_t *journal)
508 /* 508 /*
509 * We need to make sure that any blocks that were recently written out 509 * We need to make sure that any blocks that were recently written out
510 * --- perhaps by log_do_checkpoint() --- are flushed out before we 510 * --- perhaps by log_do_checkpoint() --- are flushed out before we
511 * drop the transactions from the journal. It's unlikely this will be 511 * drop the transactions from the journal. Similarly we need to be sure
512 * necessary, especially with an appropriately sized journal, but we 512 * superblock makes it to disk before next transaction starts reusing
513 * need this to guarantee correctness. Fortunately 513 * freed space (otherwise we could replay some blocks of the new
514 * cleanup_journal_tail() doesn't get called all that often. 514 * transaction thinking they belong to the old one). So we use
515 * WRITE_FLUSH_FUA. It's unlikely this will be necessary, especially
516 * with an appropriately sized journal, but we need this to guarantee
517 * correctness. Fortunately cleanup_journal_tail() doesn't get called
518 * all that often.
515 */ 519 */
516 if (journal->j_flags & JFS_BARRIER) 520 journal_update_sb_log_tail(journal, first_tid, blocknr,
517 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 521 WRITE_FLUSH_FUA);
518 522
519 spin_lock(&journal->j_state_lock); 523 spin_lock(&journal->j_state_lock);
520 if (!tid_gt(first_tid, journal->j_tail_sequence)) {
521 spin_unlock(&journal->j_state_lock);
522 /* Someone else cleaned up journal so return 0 */
523 return 0;
524 }
525 /* OK, update the superblock to recover the freed space. 524 /* OK, update the superblock to recover the freed space.
526 * Physical blocks come first: have we wrapped beyond the end of 525 * Physical blocks come first: have we wrapped beyond the end of
527 * the log? */ 526 * the log? */
@@ -539,8 +538,6 @@ int cleanup_journal_tail(journal_t *journal)
539 journal->j_tail_sequence = first_tid; 538 journal->j_tail_sequence = first_tid;
540 journal->j_tail = blocknr; 539 journal->j_tail = blocknr;
541 spin_unlock(&journal->j_state_lock); 540 spin_unlock(&journal->j_state_lock);
542 if (!(journal->j_flags & JFS_ABORT))
543 journal_update_sb_log_tail(journal);
544 return 0; 541 return 0;
545} 542}
546 543
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 1b27f46e6108..52c15c776029 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -309,7 +309,14 @@ void journal_commit_transaction(journal_t *journal)
309 if (journal->j_flags & JFS_FLUSHED) { 309 if (journal->j_flags & JFS_FLUSHED) {
310 jbd_debug(3, "super block updated\n"); 310 jbd_debug(3, "super block updated\n");
311 mutex_lock(&journal->j_checkpoint_mutex); 311 mutex_lock(&journal->j_checkpoint_mutex);
312 journal_update_sb_log_tail(journal); 312 /*
313 * We hold j_checkpoint_mutex so tail cannot change under us.
314 * We don't need any special data guarantees for writing sb
315 * since journal is empty and it is ok for write to be
316 * flushed only with transaction commit.
317 */
318 journal_update_sb_log_tail(journal, journal->j_tail_sequence,
319 journal->j_tail, WRITE_SYNC);
313 mutex_unlock(&journal->j_checkpoint_mutex); 320 mutex_unlock(&journal->j_checkpoint_mutex);
314 } else { 321 } else {
315 jbd_debug(3, "superblock not updated\n"); 322 jbd_debug(3, "superblock not updated\n");
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b29c7678525d..425c2f2cf170 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -938,8 +938,16 @@ static int journal_reset(journal_t *journal)
938 } else { 938 } else {
939 /* Lock here to make assertions happy... */ 939 /* Lock here to make assertions happy... */
940 mutex_lock(&journal->j_checkpoint_mutex); 940 mutex_lock(&journal->j_checkpoint_mutex);
941 /* Add the dynamic fields and write it to disk. */ 941 /*
942 journal_update_sb_log_tail(journal); 942 * Update log tail information. We use WRITE_FUA since new
943 * transaction will start reusing journal space and so we
944 * must make sure information about current log tail is on
945 * disk before that.
946 */
947 journal_update_sb_log_tail(journal,
948 journal->j_tail_sequence,
949 journal->j_tail,
950 WRITE_FUA);
943 mutex_unlock(&journal->j_checkpoint_mutex); 951 mutex_unlock(&journal->j_checkpoint_mutex);
944 } 952 }
945 return journal_start_thread(journal); 953 return journal_start_thread(journal);
@@ -1018,11 +1026,15 @@ int journal_create(journal_t *journal)
1018 return journal_reset(journal); 1026 return journal_reset(journal);
1019} 1027}
1020 1028
1021static void journal_write_superblock(journal_t *journal) 1029static void journal_write_superblock(journal_t *journal, int write_op)
1022{ 1030{
1023 struct buffer_head *bh = journal->j_sb_buffer; 1031 struct buffer_head *bh = journal->j_sb_buffer;
1032 int ret;
1024 1033
1025 trace_journal_write_superblock(journal); 1034 trace_journal_write_superblock(journal, write_op);
1035 if (!(journal->j_flags & JFS_BARRIER))
1036 write_op &= ~(REQ_FUA | REQ_FLUSH);
1037 lock_buffer(bh);
1026 if (buffer_write_io_error(bh)) { 1038 if (buffer_write_io_error(bh)) {
1027 char b[BDEVNAME_SIZE]; 1039 char b[BDEVNAME_SIZE];
1028 /* 1040 /*
@@ -1040,40 +1052,46 @@ static void journal_write_superblock(journal_t *journal)
1040 set_buffer_uptodate(bh); 1052 set_buffer_uptodate(bh);
1041 } 1053 }
1042 1054
1043 BUFFER_TRACE(bh, "marking dirty"); 1055 get_bh(bh);
1044 mark_buffer_dirty(bh); 1056 bh->b_end_io = end_buffer_write_sync;
1045 sync_dirty_buffer(bh); 1057 ret = submit_bh(write_op, bh);
1058 wait_on_buffer(bh);
1046 if (buffer_write_io_error(bh)) { 1059 if (buffer_write_io_error(bh)) {
1047 char b[BDEVNAME_SIZE];
1048 printk(KERN_ERR "JBD: I/O error detected "
1049 "when updating journal superblock for %s.\n",
1050 journal_dev_name(journal, b));
1051 clear_buffer_write_io_error(bh); 1060 clear_buffer_write_io_error(bh);
1052 set_buffer_uptodate(bh); 1061 set_buffer_uptodate(bh);
1062 ret = -EIO;
1063 }
1064 if (ret) {
1065 char b[BDEVNAME_SIZE];
1066 printk(KERN_ERR "JBD: Error %d detected "
1067 "when updating journal superblock for %s.\n",
1068 ret, journal_dev_name(journal, b));
1053 } 1069 }
1054} 1070}
1055 1071
1056/** 1072/**
1057 * journal_update_sb_log_tail() - Update log tail in journal sb on disk. 1073 * journal_update_sb_log_tail() - Update log tail in journal sb on disk.
1058 * @journal: The journal to update. 1074 * @journal: The journal to update.
1075 * @tail_tid: TID of the new transaction at the tail of the log
1076 * @tail_block: The first block of the transaction at the tail of the log
1077 * @write_op: With which operation should we write the journal sb
1059 * 1078 *
1060 * Update a journal's superblock information about log tail and write it to 1079 * Update a journal's superblock information about log tail and write it to
1061 * disk, waiting for the IO to complete. 1080 * disk, waiting for the IO to complete.
1062 */ 1081 */
1063void journal_update_sb_log_tail(journal_t *journal) 1082void journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
1083 unsigned int tail_block, int write_op)
1064{ 1084{
1065 journal_superblock_t *sb = journal->j_superblock; 1085 journal_superblock_t *sb = journal->j_superblock;
1066 1086
1067 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1087 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1068 spin_lock(&journal->j_state_lock); 1088 jbd_debug(1,"JBD: updating superblock (start %u, seq %u)\n",
1069 jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n", 1089 tail_block, tail_tid);
1070 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
1071 1090
1072 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1091 sb->s_sequence = cpu_to_be32(tail_tid);
1073 sb->s_start = cpu_to_be32(journal->j_tail); 1092 sb->s_start = cpu_to_be32(tail_block);
1074 spin_unlock(&journal->j_state_lock);
1075 1093
1076 journal_write_superblock(journal); 1094 journal_write_superblock(journal, write_op);
1077 1095
1078 /* Log is no longer empty */ 1096 /* Log is no longer empty */
1079 spin_lock(&journal->j_state_lock); 1097 spin_lock(&journal->j_state_lock);
@@ -1102,7 +1120,7 @@ static void mark_journal_empty(journal_t *journal)
1102 sb->s_start = cpu_to_be32(0); 1120 sb->s_start = cpu_to_be32(0);
1103 spin_unlock(&journal->j_state_lock); 1121 spin_unlock(&journal->j_state_lock);
1104 1122
1105 journal_write_superblock(journal); 1123 journal_write_superblock(journal, WRITE_FUA);
1106 1124
1107 spin_lock(&journal->j_state_lock); 1125 spin_lock(&journal->j_state_lock);
1108 /* Log is empty */ 1126 /* Log is empty */
@@ -1127,7 +1145,7 @@ static void journal_update_sb_errno(journal_t *journal)
1127 sb->s_errno = cpu_to_be32(journal->j_errno); 1145 sb->s_errno = cpu_to_be32(journal->j_errno);
1128 spin_unlock(&journal->j_state_lock); 1146 spin_unlock(&journal->j_state_lock);
1129 1147
1130 journal_write_superblock(journal); 1148 journal_write_superblock(journal, WRITE_SYNC);
1131} 1149}
1132 1150
1133/* 1151/*