diff options
author | Jan Kara <jack@suse.cz> | 2012-03-13 22:22:54 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-03-13 22:22:54 -0400 |
commit | 79feb521a44705262d15cc819a4117a447b11ea7 (patch) | |
tree | a4de6ed084b7a68c0885049d94841ce8334b64a7 /include | |
parent | a78bb11d7acd525623c6a0c2ff4e213d527573fa (diff) |
jbd2: issue cache flush after checkpointing even with internal journal
When we reach jbd2_cleanup_journal_tail(), there is no guarantee that
checkpointed buffers are on a stable storage - especially if buffers were
written out by jbd2_log_do_checkpoint(), they are likely to be only in disk's
caches. Thus when we update journal superblock effectively removing old
transaction from journal, this write of superblock can get to stable storage
before those checkpointed buffers which can result in filesystem corruption
after a crash. Thus we must unconditionally issue a cache flush before we
update journal superblock in these cases.
A similar problem can also occur if journal superblock is written only in
disk's caches, other transaction starts reusing space of the transaction
cleaned from the log and power failure happens. Subsequent journal replay would
still try to replay the old transaction but some of it's blocks may be already
overwritten by the new transaction. For this reason we must use WRITE_FUA when
updating log tail and we must first write new log tail to disk and update
in-memory information only after that.
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/jbd2.h | 6 | ||||
-rw-r--r-- | include/trace/events/jbd2.h | 11 |
2 files changed, 12 insertions, 5 deletions
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 5f05c77438e5..876a7d87192b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -971,6 +971,9 @@ extern void __journal_clean_data_list(transaction_t *transaction); | |||
971 | /* Log buffer allocation */ | 971 | /* Log buffer allocation */ |
972 | extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *); | 972 | extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *); |
973 | int jbd2_journal_next_log_block(journal_t *, unsigned long long *); | 973 | int jbd2_journal_next_log_block(journal_t *, unsigned long long *); |
974 | int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, | ||
975 | unsigned long *block); | ||
976 | void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); | ||
974 | 977 | ||
975 | /* Commit management */ | 978 | /* Commit management */ |
976 | extern void jbd2_journal_commit_transaction(journal_t *); | 979 | extern void jbd2_journal_commit_transaction(journal_t *); |
@@ -1087,7 +1090,8 @@ extern int jbd2_journal_destroy (journal_t *); | |||
1087 | extern int jbd2_journal_recover (journal_t *journal); | 1090 | extern int jbd2_journal_recover (journal_t *journal); |
1088 | extern int jbd2_journal_wipe (journal_t *, int); | 1091 | extern int jbd2_journal_wipe (journal_t *, int); |
1089 | extern int jbd2_journal_skip_recovery (journal_t *); | 1092 | extern int jbd2_journal_skip_recovery (journal_t *); |
1090 | extern void jbd2_journal_update_sb_log_tail (journal_t *); | 1093 | extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t, |
1094 | unsigned long, int); | ||
1091 | extern void __jbd2_journal_abort_hard (journal_t *); | 1095 | extern void __jbd2_journal_abort_hard (journal_t *); |
1092 | extern void jbd2_journal_abort (journal_t *, int); | 1096 | extern void jbd2_journal_abort (journal_t *, int); |
1093 | extern int jbd2_journal_errno (journal_t *); | 1097 | extern int jbd2_journal_errno (journal_t *); |
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index e05a362bf3f1..127993dbf322 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h | |||
@@ -207,7 +207,7 @@ TRACE_EVENT(jbd2_checkpoint_stats, | |||
207 | __entry->forced_to_close, __entry->written, __entry->dropped) | 207 | __entry->forced_to_close, __entry->written, __entry->dropped) |
208 | ); | 208 | ); |
209 | 209 | ||
210 | TRACE_EVENT(jbd2_cleanup_journal_tail, | 210 | TRACE_EVENT(jbd2_update_log_tail, |
211 | 211 | ||
212 | TP_PROTO(journal_t *journal, tid_t first_tid, | 212 | TP_PROTO(journal_t *journal, tid_t first_tid, |
213 | unsigned long block_nr, unsigned long freed), | 213 | unsigned long block_nr, unsigned long freed), |
@@ -238,19 +238,22 @@ TRACE_EVENT(jbd2_cleanup_journal_tail, | |||
238 | 238 | ||
239 | TRACE_EVENT(jbd2_write_superblock, | 239 | TRACE_EVENT(jbd2_write_superblock, |
240 | 240 | ||
241 | TP_PROTO(journal_t *journal), | 241 | TP_PROTO(journal_t *journal, int write_op), |
242 | 242 | ||
243 | TP_ARGS(journal), | 243 | TP_ARGS(journal, write_op), |
244 | 244 | ||
245 | TP_STRUCT__entry( | 245 | TP_STRUCT__entry( |
246 | __field( dev_t, dev ) | 246 | __field( dev_t, dev ) |
247 | __field( int, write_op ) | ||
247 | ), | 248 | ), |
248 | 249 | ||
249 | TP_fast_assign( | 250 | TP_fast_assign( |
250 | __entry->dev = journal->j_fs_dev->bd_dev; | 251 | __entry->dev = journal->j_fs_dev->bd_dev; |
252 | __entry->write_op = write_op; | ||
251 | ), | 253 | ), |
252 | 254 | ||
253 | TP_printk("dev %d,%d", MAJOR(__entry->dev), MINOR(__entry->dev)) | 255 | TP_printk("dev %d,%d write_op %x", MAJOR(__entry->dev), |
256 | MINOR(__entry->dev), __entry->write_op) | ||
254 | ); | 257 | ); |
255 | 258 | ||
256 | #endif /* _TRACE_JBD2_H */ | 259 | #endif /* _TRACE_JBD2_H */ |