diff options
-rw-r--r-- | fs/jbd/checkpoint.c | 23 | ||||
-rw-r--r-- | fs/jbd/commit.c | 9 | ||||
-rw-r--r-- | fs/jbd/journal.c | 60 | ||||
-rw-r--r-- | include/linux/jbd.h | 3 | ||||
-rw-r--r-- | include/trace/events/jbd.h | 9 |
5 files changed, 65 insertions, 39 deletions
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 80c85f3e087f..08c03044abdd 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
@@ -508,20 +508,19 @@ int cleanup_journal_tail(journal_t *journal) | |||
508 | /* | 508 | /* |
509 | * We need to make sure that any blocks that were recently written out | 509 | * We need to make sure that any blocks that were recently written out |
510 | * --- perhaps by log_do_checkpoint() --- are flushed out before we | 510 | * --- perhaps by log_do_checkpoint() --- are flushed out before we |
511 | * drop the transactions from the journal. It's unlikely this will be | 511 | * drop the transactions from the journal. Similarly we need to be sure |
512 | * necessary, especially with an appropriately sized journal, but we | 512 | * superblock makes it to disk before next transaction starts reusing |
513 | * need this to guarantee correctness. Fortunately | 513 | * freed space (otherwise we could replay some blocks of the new |
514 | * cleanup_journal_tail() doesn't get called all that often. | 514 | * transaction thinking they belong to the old one). So we use |
515 | * WRITE_FLUSH_FUA. It's unlikely this will be necessary, especially | ||
516 | * with an appropriately sized journal, but we need this to guarantee | ||
517 | * correctness. Fortunately cleanup_journal_tail() doesn't get called | ||
518 | * all that often. | ||
515 | */ | 519 | */ |
516 | if (journal->j_flags & JFS_BARRIER) | 520 | journal_update_sb_log_tail(journal, first_tid, blocknr, |
517 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | 521 | WRITE_FLUSH_FUA); |
518 | 522 | ||
519 | spin_lock(&journal->j_state_lock); | 523 | spin_lock(&journal->j_state_lock); |
520 | if (!tid_gt(first_tid, journal->j_tail_sequence)) { | ||
521 | spin_unlock(&journal->j_state_lock); | ||
522 | /* Someone else cleaned up journal so return 0 */ | ||
523 | return 0; | ||
524 | } | ||
525 | /* OK, update the superblock to recover the freed space. | 524 | /* OK, update the superblock to recover the freed space. |
526 | * Physical blocks come first: have we wrapped beyond the end of | 525 | * Physical blocks come first: have we wrapped beyond the end of |
527 | * the log? */ | 526 | * the log? */ |
@@ -539,8 +538,6 @@ int cleanup_journal_tail(journal_t *journal) | |||
539 | journal->j_tail_sequence = first_tid; | 538 | journal->j_tail_sequence = first_tid; |
540 | journal->j_tail = blocknr; | 539 | journal->j_tail = blocknr; |
541 | spin_unlock(&journal->j_state_lock); | 540 | spin_unlock(&journal->j_state_lock); |
542 | if (!(journal->j_flags & JFS_ABORT)) | ||
543 | journal_update_sb_log_tail(journal); | ||
544 | return 0; | 541 | return 0; |
545 | } | 542 | } |
546 | 543 | ||
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 1b27f46e6108..52c15c776029 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -309,7 +309,14 @@ void journal_commit_transaction(journal_t *journal) | |||
309 | if (journal->j_flags & JFS_FLUSHED) { | 309 | if (journal->j_flags & JFS_FLUSHED) { |
310 | jbd_debug(3, "super block updated\n"); | 310 | jbd_debug(3, "super block updated\n"); |
311 | mutex_lock(&journal->j_checkpoint_mutex); | 311 | mutex_lock(&journal->j_checkpoint_mutex); |
312 | journal_update_sb_log_tail(journal); | 312 | /* |
313 | * We hold j_checkpoint_mutex so tail cannot change under us. | ||
314 | * We don't need any special data guarantees for writing sb | ||
315 | * since journal is empty and it is ok for write to be | ||
316 | * flushed only with transaction commit. | ||
317 | */ | ||
318 | journal_update_sb_log_tail(journal, journal->j_tail_sequence, | ||
319 | journal->j_tail, WRITE_SYNC); | ||
313 | mutex_unlock(&journal->j_checkpoint_mutex); | 320 | mutex_unlock(&journal->j_checkpoint_mutex); |
314 | } else { | 321 | } else { |
315 | jbd_debug(3, "superblock not updated\n"); | 322 | jbd_debug(3, "superblock not updated\n"); |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index b29c7678525d..425c2f2cf170 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -938,8 +938,16 @@ static int journal_reset(journal_t *journal) | |||
938 | } else { | 938 | } else { |
939 | /* Lock here to make assertions happy... */ | 939 | /* Lock here to make assertions happy... */ |
940 | mutex_lock(&journal->j_checkpoint_mutex); | 940 | mutex_lock(&journal->j_checkpoint_mutex); |
941 | /* Add the dynamic fields and write it to disk. */ | 941 | /* |
942 | journal_update_sb_log_tail(journal); | 942 | * Update log tail information. We use WRITE_FUA since new |
943 | * transaction will start reusing journal space and so we | ||
944 | * must make sure information about current log tail is on | ||
945 | * disk before that. | ||
946 | */ | ||
947 | journal_update_sb_log_tail(journal, | ||
948 | journal->j_tail_sequence, | ||
949 | journal->j_tail, | ||
950 | WRITE_FUA); | ||
943 | mutex_unlock(&journal->j_checkpoint_mutex); | 951 | mutex_unlock(&journal->j_checkpoint_mutex); |
944 | } | 952 | } |
945 | return journal_start_thread(journal); | 953 | return journal_start_thread(journal); |
@@ -1018,11 +1026,15 @@ int journal_create(journal_t *journal) | |||
1018 | return journal_reset(journal); | 1026 | return journal_reset(journal); |
1019 | } | 1027 | } |
1020 | 1028 | ||
1021 | static void journal_write_superblock(journal_t *journal) | 1029 | static void journal_write_superblock(journal_t *journal, int write_op) |
1022 | { | 1030 | { |
1023 | struct buffer_head *bh = journal->j_sb_buffer; | 1031 | struct buffer_head *bh = journal->j_sb_buffer; |
1032 | int ret; | ||
1024 | 1033 | ||
1025 | trace_journal_write_superblock(journal); | 1034 | trace_journal_write_superblock(journal, write_op); |
1035 | if (!(journal->j_flags & JFS_BARRIER)) | ||
1036 | write_op &= ~(REQ_FUA | REQ_FLUSH); | ||
1037 | lock_buffer(bh); | ||
1026 | if (buffer_write_io_error(bh)) { | 1038 | if (buffer_write_io_error(bh)) { |
1027 | char b[BDEVNAME_SIZE]; | 1039 | char b[BDEVNAME_SIZE]; |
1028 | /* | 1040 | /* |
@@ -1040,40 +1052,46 @@ static void journal_write_superblock(journal_t *journal) | |||
1040 | set_buffer_uptodate(bh); | 1052 | set_buffer_uptodate(bh); |
1041 | } | 1053 | } |
1042 | 1054 | ||
1043 | BUFFER_TRACE(bh, "marking dirty"); | 1055 | get_bh(bh); |
1044 | mark_buffer_dirty(bh); | 1056 | bh->b_end_io = end_buffer_write_sync; |
1045 | sync_dirty_buffer(bh); | 1057 | ret = submit_bh(write_op, bh); |
1058 | wait_on_buffer(bh); | ||
1046 | if (buffer_write_io_error(bh)) { | 1059 | if (buffer_write_io_error(bh)) { |
1047 | char b[BDEVNAME_SIZE]; | ||
1048 | printk(KERN_ERR "JBD: I/O error detected " | ||
1049 | "when updating journal superblock for %s.\n", | ||
1050 | journal_dev_name(journal, b)); | ||
1051 | clear_buffer_write_io_error(bh); | 1060 | clear_buffer_write_io_error(bh); |
1052 | set_buffer_uptodate(bh); | 1061 | set_buffer_uptodate(bh); |
1062 | ret = -EIO; | ||
1063 | } | ||
1064 | if (ret) { | ||
1065 | char b[BDEVNAME_SIZE]; | ||
1066 | printk(KERN_ERR "JBD: Error %d detected " | ||
1067 | "when updating journal superblock for %s.\n", | ||
1068 | ret, journal_dev_name(journal, b)); | ||
1053 | } | 1069 | } |
1054 | } | 1070 | } |
1055 | 1071 | ||
1056 | /** | 1072 | /** |
1057 | * journal_update_sb_log_tail() - Update log tail in journal sb on disk. | 1073 | * journal_update_sb_log_tail() - Update log tail in journal sb on disk. |
1058 | * @journal: The journal to update. | 1074 | * @journal: The journal to update. |
1075 | * @tail_tid: TID of the new transaction at the tail of the log | ||
1076 | * @tail_block: The first block of the transaction at the tail of the log | ||
1077 | * @write_op: With which operation should we write the journal sb | ||
1059 | * | 1078 | * |
1060 | * Update a journal's superblock information about log tail and write it to | 1079 | * Update a journal's superblock information about log tail and write it to |
1061 | * disk, waiting for the IO to complete. | 1080 | * disk, waiting for the IO to complete. |
1062 | */ | 1081 | */ |
1063 | void journal_update_sb_log_tail(journal_t *journal) | 1082 | void journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, |
1083 | unsigned int tail_block, int write_op) | ||
1064 | { | 1084 | { |
1065 | journal_superblock_t *sb = journal->j_superblock; | 1085 | journal_superblock_t *sb = journal->j_superblock; |
1066 | 1086 | ||
1067 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | 1087 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); |
1068 | spin_lock(&journal->j_state_lock); | 1088 | jbd_debug(1,"JBD: updating superblock (start %u, seq %u)\n", |
1069 | jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n", | 1089 | tail_block, tail_tid); |
1070 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | ||
1071 | 1090 | ||
1072 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); | 1091 | sb->s_sequence = cpu_to_be32(tail_tid); |
1073 | sb->s_start = cpu_to_be32(journal->j_tail); | 1092 | sb->s_start = cpu_to_be32(tail_block); |
1074 | spin_unlock(&journal->j_state_lock); | ||
1075 | 1093 | ||
1076 | journal_write_superblock(journal); | 1094 | journal_write_superblock(journal, write_op); |
1077 | 1095 | ||
1078 | /* Log is no longer empty */ | 1096 | /* Log is no longer empty */ |
1079 | spin_lock(&journal->j_state_lock); | 1097 | spin_lock(&journal->j_state_lock); |
@@ -1102,7 +1120,7 @@ static void mark_journal_empty(journal_t *journal) | |||
1102 | sb->s_start = cpu_to_be32(0); | 1120 | sb->s_start = cpu_to_be32(0); |
1103 | spin_unlock(&journal->j_state_lock); | 1121 | spin_unlock(&journal->j_state_lock); |
1104 | 1122 | ||
1105 | journal_write_superblock(journal); | 1123 | journal_write_superblock(journal, WRITE_FUA); |
1106 | 1124 | ||
1107 | spin_lock(&journal->j_state_lock); | 1125 | spin_lock(&journal->j_state_lock); |
1108 | /* Log is empty */ | 1126 | /* Log is empty */ |
@@ -1127,7 +1145,7 @@ static void journal_update_sb_errno(journal_t *journal) | |||
1127 | sb->s_errno = cpu_to_be32(journal->j_errno); | 1145 | sb->s_errno = cpu_to_be32(journal->j_errno); |
1128 | spin_unlock(&journal->j_state_lock); | 1146 | spin_unlock(&journal->j_state_lock); |
1129 | 1147 | ||
1130 | journal_write_superblock(journal); | 1148 | journal_write_superblock(journal, WRITE_SYNC); |
1131 | } | 1149 | } |
1132 | 1150 | ||
1133 | /* | 1151 | /* |
diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 9716d370c501..c8f32975f0e4 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h | |||
@@ -864,7 +864,8 @@ extern int journal_destroy (journal_t *); | |||
864 | extern int journal_recover (journal_t *journal); | 864 | extern int journal_recover (journal_t *journal); |
865 | extern int journal_wipe (journal_t *, int); | 865 | extern int journal_wipe (journal_t *, int); |
866 | extern int journal_skip_recovery (journal_t *); | 866 | extern int journal_skip_recovery (journal_t *); |
867 | extern void journal_update_sb_log_tail (journal_t *); | 867 | extern void journal_update_sb_log_tail (journal_t *, tid_t, unsigned int, |
868 | int); | ||
868 | extern void journal_abort (journal_t *, int); | 869 | extern void journal_abort (journal_t *, int); |
869 | extern int journal_errno (journal_t *); | 870 | extern int journal_errno (journal_t *); |
870 | extern void journal_ack_err (journal_t *); | 871 | extern void journal_ack_err (journal_t *); |
diff --git a/include/trace/events/jbd.h b/include/trace/events/jbd.h index d9658a940a39..da6f2591c25e 100644 --- a/include/trace/events/jbd.h +++ b/include/trace/events/jbd.h | |||
@@ -170,19 +170,22 @@ TRACE_EVENT(jbd_cleanup_journal_tail, | |||
170 | ); | 170 | ); |
171 | 171 | ||
172 | TRACE_EVENT(journal_write_superblock, | 172 | TRACE_EVENT(journal_write_superblock, |
173 | TP_PROTO(journal_t *journal), | 173 | TP_PROTO(journal_t *journal, int write_op), |
174 | 174 | ||
175 | TP_ARGS(journal), | 175 | TP_ARGS(journal, write_op), |
176 | 176 | ||
177 | TP_STRUCT__entry( | 177 | TP_STRUCT__entry( |
178 | __field( dev_t, dev ) | 178 | __field( dev_t, dev ) |
179 | __field( int, write_op ) | ||
179 | ), | 180 | ), |
180 | 181 | ||
181 | TP_fast_assign( | 182 | TP_fast_assign( |
182 | __entry->dev = journal->j_fs_dev->bd_dev; | 183 | __entry->dev = journal->j_fs_dev->bd_dev; |
184 | __entry->write_op = write_op; | ||
183 | ), | 185 | ), |
184 | 186 | ||
185 | TP_printk("dev %d,%d", MAJOR(__entry->dev), MINOR(__entry->dev)) | 187 | TP_printk("dev %d,%d write_op %x", MAJOR(__entry->dev), |
188 | MINOR(__entry->dev), __entry->write_op) | ||
186 | ); | 189 | ); |
187 | 190 | ||
188 | #endif /* _TRACE_JBD_H */ | 191 | #endif /* _TRACE_JBD_H */ |