aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/jbd2/checkpoint.c75
-rw-r--r--fs/jbd2/commit.c11
-rw-r--r--fs/jbd2/journal.c138
-rw-r--r--fs/jbd2/recovery.c5
-rw-r--r--include/linux/jbd2.h6
-rw-r--r--include/trace/events/jbd2.h11
6 files changed, 155 insertions, 91 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 19dcd0b86bca..7f7ee5b90402 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -478,79 +478,28 @@ out:
478 478
479int jbd2_cleanup_journal_tail(journal_t *journal) 479int jbd2_cleanup_journal_tail(journal_t *journal)
480{ 480{
481 transaction_t * transaction;
482 tid_t first_tid; 481 tid_t first_tid;
483 unsigned long blocknr, freed; 482 unsigned long blocknr;
484 483
485 if (is_journal_aborted(journal)) 484 if (is_journal_aborted(journal))
486 return 1; 485 return 1;
487 486
488 /* OK, work out the oldest transaction remaining in the log, and 487 if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
489 * the log block it starts at.
490 *
491 * If the log is now empty, we need to work out which is the
492 * next transaction ID we will write, and where it will
493 * start. */
494
495 write_lock(&journal->j_state_lock);
496 spin_lock(&journal->j_list_lock);
497 transaction = journal->j_checkpoint_transactions;
498 if (transaction) {
499 first_tid = transaction->t_tid;
500 blocknr = transaction->t_log_start;
501 } else if ((transaction = journal->j_committing_transaction) != NULL) {
502 first_tid = transaction->t_tid;
503 blocknr = transaction->t_log_start;
504 } else if ((transaction = journal->j_running_transaction) != NULL) {
505 first_tid = transaction->t_tid;
506 blocknr = journal->j_head;
507 } else {
508 first_tid = journal->j_transaction_sequence;
509 blocknr = journal->j_head;
510 }
511 spin_unlock(&journal->j_list_lock);
512 J_ASSERT(blocknr != 0);
513
514 /* If the oldest pinned transaction is at the tail of the log
515 already then there's not much we can do right now. */
516 if (journal->j_tail_sequence == first_tid) {
517 write_unlock(&journal->j_state_lock);
518 return 1; 488 return 1;
519 } 489 J_ASSERT(blocknr != 0);
520
521 /* OK, update the superblock to recover the freed space.
522 * Physical blocks come first: have we wrapped beyond the end of
523 * the log? */
524 freed = blocknr - journal->j_tail;
525 if (blocknr < journal->j_tail)
526 freed = freed + journal->j_last - journal->j_first;
527
528 trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
529 jbd_debug(1,
530 "Cleaning journal tail from %d to %d (offset %lu), "
531 "freeing %lu\n",
532 journal->j_tail_sequence, first_tid, blocknr, freed);
533
534 journal->j_free += freed;
535 journal->j_tail_sequence = first_tid;
536 journal->j_tail = blocknr;
537 write_unlock(&journal->j_state_lock);
538 490
539 /* 491 /*
540 * If there is an external journal, we need to make sure that 492 * We need to make sure that any blocks that were recently written out
541 * any data blocks that were recently written out --- perhaps 493 * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before
542 * by jbd2_log_do_checkpoint() --- are flushed out before we 494 * we drop the transactions from the journal. It's unlikely this will
543 * drop the transactions from the external journal. It's 495 * be necessary, especially with an appropriately sized journal, but we
544 * unlikely this will be necessary, especially with a 496 * need this to guarantee correctness. Fortunately
545 * appropriately sized journal, but we need this to guarantee 497 * jbd2_cleanup_journal_tail() doesn't get called all that often.
546 * correctness. Fortunately jbd2_cleanup_journal_tail()
547 * doesn't get called all that often.
548 */ 498 */
549 if ((journal->j_fs_dev != journal->j_dev) && 499 if (journal->j_flags & JBD2_BARRIER)
550 (journal->j_flags & JBD2_BARRIER))
551 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 500 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
552 if (!(journal->j_flags & JBD2_ABORT)) 501
553 jbd2_journal_update_sb_log_tail(journal); 502 __jbd2_update_log_tail(journal, first_tid, blocknr);
554 return 0; 503 return 0;
555} 504}
556 505
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6705717d9b7f..b89ef84786a7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -341,7 +341,16 @@ void jbd2_journal_commit_transaction(journal_t *journal)
341 if (journal->j_flags & JBD2_FLUSHED) { 341 if (journal->j_flags & JBD2_FLUSHED) {
342 jbd_debug(3, "super block updated\n"); 342 jbd_debug(3, "super block updated\n");
343 mutex_lock(&journal->j_checkpoint_mutex); 343 mutex_lock(&journal->j_checkpoint_mutex);
344 jbd2_journal_update_sb_log_tail(journal); 344 /*
345 * We hold j_checkpoint_mutex so tail cannot change under us.
346 * We don't need any special data guarantees for writing sb
347 * since journal is empty and it is ok for write to be
348 * flushed only with transaction commit.
349 */
350 jbd2_journal_update_sb_log_tail(journal,
351 journal->j_tail_sequence,
352 journal->j_tail,
353 WRITE_SYNC);
345 mutex_unlock(&journal->j_checkpoint_mutex); 354 mutex_unlock(&journal->j_checkpoint_mutex);
346 } else { 355 } else {
347 jbd_debug(3, "superblock not updated\n"); 356 jbd_debug(3, "superblock not updated\n");
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index fc5f2acc9f18..c5ff177400ff 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -742,6 +742,85 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
742 return jbd2_journal_add_journal_head(bh); 742 return jbd2_journal_add_journal_head(bh);
743} 743}
744 744
745/*
746 * Return tid of the oldest transaction in the journal and block in the journal
747 * where the transaction starts.
748 *
749 * If the journal is now empty, return which will be the next transaction ID
750 * we will write and where will that transaction start.
751 *
752 * The return value is 0 if journal tail cannot be pushed any further, 1 if
753 * it can.
754 */
755int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
756 unsigned long *block)
757{
758 transaction_t *transaction;
759 int ret;
760
761 read_lock(&journal->j_state_lock);
762 spin_lock(&journal->j_list_lock);
763 transaction = journal->j_checkpoint_transactions;
764 if (transaction) {
765 *tid = transaction->t_tid;
766 *block = transaction->t_log_start;
767 } else if ((transaction = journal->j_committing_transaction) != NULL) {
768 *tid = transaction->t_tid;
769 *block = transaction->t_log_start;
770 } else if ((transaction = journal->j_running_transaction) != NULL) {
771 *tid = transaction->t_tid;
772 *block = journal->j_head;
773 } else {
774 *tid = journal->j_transaction_sequence;
775 *block = journal->j_head;
776 }
777 ret = tid_gt(*tid, journal->j_tail_sequence);
778 spin_unlock(&journal->j_list_lock);
779 read_unlock(&journal->j_state_lock);
780
781 return ret;
782}
783
784/*
785 * Update information in journal structure and in on disk journal superblock
786 * about log tail. This function does not check whether information passed in
787 * really pushes log tail further. It's responsibility of the caller to make
788 * sure provided log tail information is valid (e.g. by holding
789 * j_checkpoint_mutex all the time between computing log tail and calling this
790 * function as is the case with jbd2_cleanup_journal_tail()).
791 *
792 * Requires j_checkpoint_mutex
793 */
794void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
795{
796 unsigned long freed;
797
798 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
799
800 /*
801 * We cannot afford for write to remain in drive's caches since as
802 * soon as we update j_tail, next transaction can start reusing journal
803 * space and if we lose sb update during power failure we'd replay
804 * old transaction with possibly newly overwritten data.
805 */
806 jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
807 write_lock(&journal->j_state_lock);
808 freed = block - journal->j_tail;
809 if (block < journal->j_tail)
810 freed += journal->j_last - journal->j_first;
811
812 trace_jbd2_update_log_tail(journal, tid, block, freed);
813 jbd_debug(1,
814 "Cleaning journal tail from %d to %d (offset %lu), "
815 "freeing %lu\n",
816 journal->j_tail_sequence, tid, block, freed);
817
818 journal->j_free += freed;
819 journal->j_tail_sequence = tid;
820 journal->j_tail = block;
821 write_unlock(&journal->j_state_lock);
822}
823
745struct jbd2_stats_proc_session { 824struct jbd2_stats_proc_session {
746 journal_t *journal; 825 journal_t *journal;
747 struct transaction_stats_s *stats; 826 struct transaction_stats_s *stats;
@@ -1125,18 +1204,30 @@ static int journal_reset(journal_t *journal)
1125 } else { 1204 } else {
1126 /* Lock here to make assertions happy... */ 1205 /* Lock here to make assertions happy... */
1127 mutex_lock(&journal->j_checkpoint_mutex); 1206 mutex_lock(&journal->j_checkpoint_mutex);
1128 /* Add the dynamic fields and write it to disk. */ 1207 /*
1129 jbd2_journal_update_sb_log_tail(journal); 1208 * Update log tail information. We use WRITE_FUA since new
1209 * transaction will start reusing journal space and so we
1210 * must make sure information about current log tail is on
1211 * disk before that.
1212 */
1213 jbd2_journal_update_sb_log_tail(journal,
1214 journal->j_tail_sequence,
1215 journal->j_tail,
1216 WRITE_FUA);
1130 mutex_unlock(&journal->j_checkpoint_mutex); 1217 mutex_unlock(&journal->j_checkpoint_mutex);
1131 } 1218 }
1132 return jbd2_journal_start_thread(journal); 1219 return jbd2_journal_start_thread(journal);
1133} 1220}
1134 1221
1135static void jbd2_write_superblock(journal_t *journal) 1222static void jbd2_write_superblock(journal_t *journal, int write_op)
1136{ 1223{
1137 struct buffer_head *bh = journal->j_sb_buffer; 1224 struct buffer_head *bh = journal->j_sb_buffer;
1225 int ret;
1138 1226
1139 trace_jbd2_write_superblock(journal); 1227 trace_jbd2_write_superblock(journal, write_op);
1228 if (!(journal->j_flags & JBD2_BARRIER))
1229 write_op &= ~(REQ_FUA | REQ_FLUSH);
1230 lock_buffer(bh);
1140 if (buffer_write_io_error(bh)) { 1231 if (buffer_write_io_error(bh)) {
1141 /* 1232 /*
1142 * Oh, dear. A previous attempt to write the journal 1233 * Oh, dear. A previous attempt to write the journal
@@ -1152,40 +1243,45 @@ static void jbd2_write_superblock(journal_t *journal)
1152 clear_buffer_write_io_error(bh); 1243 clear_buffer_write_io_error(bh);
1153 set_buffer_uptodate(bh); 1244 set_buffer_uptodate(bh);
1154 } 1245 }
1155 1246 get_bh(bh);
1156 BUFFER_TRACE(bh, "marking dirty"); 1247 bh->b_end_io = end_buffer_write_sync;
1157 mark_buffer_dirty(bh); 1248 ret = submit_bh(write_op, bh);
1158 sync_dirty_buffer(bh); 1249 wait_on_buffer(bh);
1159 if (buffer_write_io_error(bh)) { 1250 if (buffer_write_io_error(bh)) {
1160 printk(KERN_ERR "JBD2: I/O error detected "
1161 "when updating journal superblock for %s.\n",
1162 journal->j_devname);
1163 clear_buffer_write_io_error(bh); 1251 clear_buffer_write_io_error(bh);
1164 set_buffer_uptodate(bh); 1252 set_buffer_uptodate(bh);
1253 ret = -EIO;
1254 }
1255 if (ret) {
1256 printk(KERN_ERR "JBD2: Error %d detected when updating "
1257 "journal superblock for %s.\n", ret,
1258 journal->j_devname);
1165 } 1259 }
1166} 1260}
1167 1261
1168/** 1262/**
1169 * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk. 1263 * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.
1170 * @journal: The journal to update. 1264 * @journal: The journal to update.
1265 * @tail_tid: TID of the new transaction at the tail of the log
1266 * @tail_block: The first block of the transaction at the tail of the log
1267 * @write_op: With which operation should we write the journal sb
1171 * 1268 *
1172 * Update a journal's superblock information about log tail and write it to 1269 * Update a journal's superblock information about log tail and write it to
1173 * disk, waiting for the IO to complete. 1270 * disk, waiting for the IO to complete.
1174 */ 1271 */
1175void jbd2_journal_update_sb_log_tail(journal_t *journal) 1272void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
1273 unsigned long tail_block, int write_op)
1176{ 1274{
1177 journal_superblock_t *sb = journal->j_superblock; 1275 journal_superblock_t *sb = journal->j_superblock;
1178 1276
1179 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1277 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1180 read_lock(&journal->j_state_lock); 1278 jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
1181 jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d)\n", 1279 tail_block, tail_tid);
1182 journal->j_tail, journal->j_tail_sequence);
1183 1280
1184 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1281 sb->s_sequence = cpu_to_be32(tail_tid);
1185 sb->s_start = cpu_to_be32(journal->j_tail); 1282 sb->s_start = cpu_to_be32(tail_block);
1186 read_unlock(&journal->j_state_lock);
1187 1283
1188 jbd2_write_superblock(journal); 1284 jbd2_write_superblock(journal, write_op);
1189 1285
1190 /* Log is no longer empty */ 1286 /* Log is no longer empty */
1191 write_lock(&journal->j_state_lock); 1287 write_lock(&journal->j_state_lock);
@@ -1214,7 +1310,7 @@ static void jbd2_mark_journal_empty(journal_t *journal)
1214 sb->s_start = cpu_to_be32(0); 1310 sb->s_start = cpu_to_be32(0);
1215 read_unlock(&journal->j_state_lock); 1311 read_unlock(&journal->j_state_lock);
1216 1312
1217 jbd2_write_superblock(journal); 1313 jbd2_write_superblock(journal, WRITE_FUA);
1218 1314
1219 /* Log is no longer empty */ 1315 /* Log is no longer empty */
1220 write_lock(&journal->j_state_lock); 1316 write_lock(&journal->j_state_lock);
@@ -1240,7 +1336,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal)
1240 sb->s_errno = cpu_to_be32(journal->j_errno); 1336 sb->s_errno = cpu_to_be32(journal->j_errno);
1241 read_unlock(&journal->j_state_lock); 1337 read_unlock(&journal->j_state_lock);
1242 1338
1243 jbd2_write_superblock(journal); 1339 jbd2_write_superblock(journal, WRITE_SYNC);
1244} 1340}
1245 1341
1246/* 1342/*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index da6d7baf1390..c1a03354a22f 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -21,6 +21,7 @@
21#include <linux/jbd2.h> 21#include <linux/jbd2.h>
22#include <linux/errno.h> 22#include <linux/errno.h>
23#include <linux/crc32.h> 23#include <linux/crc32.h>
24#include <linux/blkdev.h>
24#endif 25#endif
25 26
26/* 27/*
@@ -265,7 +266,9 @@ int jbd2_journal_recover(journal_t *journal)
265 err2 = sync_blockdev(journal->j_fs_dev); 266 err2 = sync_blockdev(journal->j_fs_dev);
266 if (!err) 267 if (!err)
267 err = err2; 268 err = err2;
268 269 /* Make sure all replayed data is on permanent storage */
270 if (journal->j_flags & JBD2_BARRIER)
271 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
269 return err; 272 return err;
270} 273}
271 274
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 5f05c77438e5..876a7d87192b 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -971,6 +971,9 @@ extern void __journal_clean_data_list(transaction_t *transaction);
971/* Log buffer allocation */ 971/* Log buffer allocation */
972extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *); 972extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *);
973int jbd2_journal_next_log_block(journal_t *, unsigned long long *); 973int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
974int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
975 unsigned long *block);
976void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
974 977
975/* Commit management */ 978/* Commit management */
976extern void jbd2_journal_commit_transaction(journal_t *); 979extern void jbd2_journal_commit_transaction(journal_t *);
@@ -1087,7 +1090,8 @@ extern int jbd2_journal_destroy (journal_t *);
1087extern int jbd2_journal_recover (journal_t *journal); 1090extern int jbd2_journal_recover (journal_t *journal);
1088extern int jbd2_journal_wipe (journal_t *, int); 1091extern int jbd2_journal_wipe (journal_t *, int);
1089extern int jbd2_journal_skip_recovery (journal_t *); 1092extern int jbd2_journal_skip_recovery (journal_t *);
1090extern void jbd2_journal_update_sb_log_tail (journal_t *); 1093extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t,
1094 unsigned long, int);
1091extern void __jbd2_journal_abort_hard (journal_t *); 1095extern void __jbd2_journal_abort_hard (journal_t *);
1092extern void jbd2_journal_abort (journal_t *, int); 1096extern void jbd2_journal_abort (journal_t *, int);
1093extern int jbd2_journal_errno (journal_t *); 1097extern int jbd2_journal_errno (journal_t *);
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index e05a362bf3f1..127993dbf322 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -207,7 +207,7 @@ TRACE_EVENT(jbd2_checkpoint_stats,
207 __entry->forced_to_close, __entry->written, __entry->dropped) 207 __entry->forced_to_close, __entry->written, __entry->dropped)
208); 208);
209 209
210TRACE_EVENT(jbd2_cleanup_journal_tail, 210TRACE_EVENT(jbd2_update_log_tail,
211 211
212 TP_PROTO(journal_t *journal, tid_t first_tid, 212 TP_PROTO(journal_t *journal, tid_t first_tid,
213 unsigned long block_nr, unsigned long freed), 213 unsigned long block_nr, unsigned long freed),
@@ -238,19 +238,22 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
238 238
239TRACE_EVENT(jbd2_write_superblock, 239TRACE_EVENT(jbd2_write_superblock,
240 240
241 TP_PROTO(journal_t *journal), 241 TP_PROTO(journal_t *journal, int write_op),
242 242
243 TP_ARGS(journal), 243 TP_ARGS(journal, write_op),
244 244
245 TP_STRUCT__entry( 245 TP_STRUCT__entry(
246 __field( dev_t, dev ) 246 __field( dev_t, dev )
247 __field( int, write_op )
247 ), 248 ),
248 249
249 TP_fast_assign( 250 TP_fast_assign(
250 __entry->dev = journal->j_fs_dev->bd_dev; 251 __entry->dev = journal->j_fs_dev->bd_dev;
252 __entry->write_op = write_op;
251 ), 253 ),
252 254
253 TP_printk("dev %d,%d", MAJOR(__entry->dev), MINOR(__entry->dev)) 255 TP_printk("dev %d,%d write_op %x", MAJOR(__entry->dev),
256 MINOR(__entry->dev), __entry->write_op)
254); 257);
255 258
256#endif /* _TRACE_JBD2_H */ 259#endif /* _TRACE_JBD2_H */