aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd')
-rw-r--r--fs/jbd/checkpoint.c33
-rw-r--r--fs/jbd/commit.c2
-rw-r--r--fs/jbd/journal.c106
-rw-r--r--fs/jbd/recovery.c58
-rw-r--r--fs/jbd/revoke.c74
-rw-r--r--fs/jbd/transaction.c160
6 files changed, 222 insertions, 211 deletions
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 47678a26c13b..0208cc7ac5d0 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * linux/fs/checkpoint.c 2 * linux/fs/checkpoint.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 * 5 *
6 * Copyright 1999 Red Hat Software --- All Rights Reserved 6 * Copyright 1999 Red Hat Software --- All Rights Reserved
@@ -9,8 +9,8 @@
9 * the terms of the GNU General Public License, version 2, or at your 9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference. 10 * option, any later version, incorporated herein by reference.
11 * 11 *
12 * Checkpoint routines for the generic filesystem journaling code. 12 * Checkpoint routines for the generic filesystem journaling code.
13 * Part of the ext2fs journaling system. 13 * Part of the ext2fs journaling system.
14 * 14 *
15 * Checkpointing is the process of ensuring that a section of the log is 15 * Checkpointing is the process of ensuring that a section of the log is
16 * committed fully to disk, so that that portion of the log can be 16 * committed fully to disk, so that that portion of the log can be
@@ -145,6 +145,7 @@ void __log_wait_for_space(journal_t *journal)
145 * jbd_unlock_bh_state(). 145 * jbd_unlock_bh_state().
146 */ 146 */
147static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) 147static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
148 __releases(journal->j_list_lock)
148{ 149{
149 get_bh(bh); 150 get_bh(bh);
150 spin_unlock(&journal->j_list_lock); 151 spin_unlock(&journal->j_list_lock);
@@ -225,7 +226,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
225 * Try to flush one buffer from the checkpoint list to disk. 226 * Try to flush one buffer from the checkpoint list to disk.
226 * 227 *
227 * Return 1 if something happened which requires us to abort the current 228 * Return 1 if something happened which requires us to abort the current
228 * scan of the checkpoint list. 229 * scan of the checkpoint list.
229 * 230 *
230 * Called with j_list_lock held and drops it if 1 is returned 231 * Called with j_list_lock held and drops it if 1 is returned
231 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 232 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@@ -269,7 +270,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
269 * possibly block, while still holding the journal lock. 270 * possibly block, while still holding the journal lock.
270 * We cannot afford to let the transaction logic start 271 * We cannot afford to let the transaction logic start
271 * messing around with this buffer before we write it to 272 * messing around with this buffer before we write it to
272 * disk, as that would break recoverability. 273 * disk, as that would break recoverability.
273 */ 274 */
274 BUFFER_TRACE(bh, "queue"); 275 BUFFER_TRACE(bh, "queue");
275 get_bh(bh); 276 get_bh(bh);
@@ -292,7 +293,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
292 * Perform an actual checkpoint. We take the first transaction on the 293 * Perform an actual checkpoint. We take the first transaction on the
293 * list of transactions to be checkpointed and send all its buffers 294 * list of transactions to be checkpointed and send all its buffers
294 * to disk. We submit larger chunks of data at once. 295 * to disk. We submit larger chunks of data at once.
295 * 296 *
296 * The journal should be locked before calling this function. 297 * The journal should be locked before calling this function.
297 */ 298 */
298int log_do_checkpoint(journal_t *journal) 299int log_do_checkpoint(journal_t *journal)
@@ -303,10 +304,10 @@ int log_do_checkpoint(journal_t *journal)
303 304
304 jbd_debug(1, "Start checkpoint\n"); 305 jbd_debug(1, "Start checkpoint\n");
305 306
306 /* 307 /*
307 * First thing: if there are any transactions in the log which 308 * First thing: if there are any transactions in the log which
308 * don't need checkpointing, just eliminate them from the 309 * don't need checkpointing, just eliminate them from the
309 * journal straight away. 310 * journal straight away.
310 */ 311 */
311 result = cleanup_journal_tail(journal); 312 result = cleanup_journal_tail(journal);
312 jbd_debug(1, "cleanup_journal_tail returned %d\n", result); 313 jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
@@ -384,9 +385,9 @@ out:
384 * we have already got rid of any since the last update of the log tail 385 * we have already got rid of any since the last update of the log tail
385 * in the journal superblock. If so, we can instantly roll the 386 * in the journal superblock. If so, we can instantly roll the
386 * superblock forward to remove those transactions from the log. 387 * superblock forward to remove those transactions from the log.
387 * 388 *
388 * Return <0 on error, 0 on success, 1 if there was nothing to clean up. 389 * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
389 * 390 *
390 * Called with the journal lock held. 391 * Called with the journal lock held.
391 * 392 *
392 * This is the only part of the journaling code which really needs to be 393 * This is the only part of the journaling code which really needs to be
@@ -403,8 +404,8 @@ int cleanup_journal_tail(journal_t *journal)
403 unsigned long blocknr, freed; 404 unsigned long blocknr, freed;
404 405
405 /* OK, work out the oldest transaction remaining in the log, and 406 /* OK, work out the oldest transaction remaining in the log, and
406 * the log block it starts at. 407 * the log block it starts at.
407 * 408 *
408 * If the log is now empty, we need to work out which is the 409 * If the log is now empty, we need to work out which is the
409 * next transaction ID we will write, and where it will 410 * next transaction ID we will write, and where it will
410 * start. */ 411 * start. */
@@ -479,7 +480,7 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
479 if (!jh) 480 if (!jh)
480 return 0; 481 return 0;
481 482
482 last_jh = jh->b_cpprev; 483 last_jh = jh->b_cpprev;
483 do { 484 do {
484 jh = next_jh; 485 jh = next_jh;
485 next_jh = jh->b_cpnext; 486 next_jh = jh->b_cpnext;
@@ -557,7 +558,7 @@ out:
557 return ret; 558 return ret;
558} 559}
559 560
560/* 561/*
561 * journal_remove_checkpoint: called after a buffer has been committed 562 * journal_remove_checkpoint: called after a buffer has been committed
562 * to disk (either by being write-back flushed to disk, or being 563 * to disk (either by being write-back flushed to disk, or being
563 * committed to the log). 564 * committed to the log).
@@ -635,7 +636,7 @@ out:
635 * Called with the journal locked. 636 * Called with the journal locked.
636 * Called with j_list_lock held. 637 * Called with j_list_lock held.
637 */ 638 */
638void __journal_insert_checkpoint(struct journal_head *jh, 639void __journal_insert_checkpoint(struct journal_head *jh,
639 transaction_t *transaction) 640 transaction_t *transaction)
640{ 641{
641 JBUFFER_TRACE(jh, "entry"); 642 JBUFFER_TRACE(jh, "entry");
@@ -657,7 +658,7 @@ void __journal_insert_checkpoint(struct journal_head *jh,
657 658
658/* 659/*
659 * We've finished with this transaction structure: adios... 660 * We've finished with this transaction structure: adios...
660 * 661 *
661 * The transaction must have no links except for the checkpoint by this 662 * The transaction must have no links except for the checkpoint by this
662 * point. 663 * point.
663 * 664 *
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 32a8caf0c41e..10be51290a27 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/commit.c 2 * linux/fs/jbd/commit.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
5 * 5 *
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f66724ce443a..10fff9443938 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/fs/journal.c 2 * linux/fs/jbd/journal.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
5 * 5 *
@@ -31,7 +31,7 @@
31#include <linux/smp_lock.h> 31#include <linux/smp_lock.h>
32#include <linux/init.h> 32#include <linux/init.h>
33#include <linux/mm.h> 33#include <linux/mm.h>
34#include <linux/suspend.h> 34#include <linux/freezer.h>
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/kthread.h> 36#include <linux/kthread.h>
37#include <linux/poison.h> 37#include <linux/poison.h>
@@ -181,7 +181,7 @@ loop:
181 transaction->t_expires)) 181 transaction->t_expires))
182 should_sleep = 0; 182 should_sleep = 0;
183 if (journal->j_flags & JFS_UNMOUNT) 183 if (journal->j_flags & JFS_UNMOUNT)
184 should_sleep = 0; 184 should_sleep = 0;
185 if (should_sleep) { 185 if (should_sleep) {
186 spin_unlock(&journal->j_state_lock); 186 spin_unlock(&journal->j_state_lock);
187 schedule(); 187 schedule();
@@ -271,7 +271,7 @@ static void journal_kill_thread(journal_t *journal)
271int journal_write_metadata_buffer(transaction_t *transaction, 271int journal_write_metadata_buffer(transaction_t *transaction,
272 struct journal_head *jh_in, 272 struct journal_head *jh_in,
273 struct journal_head **jh_out, 273 struct journal_head **jh_out,
274 int blocknr) 274 unsigned long blocknr)
275{ 275{
276 int need_copy_out = 0; 276 int need_copy_out = 0;
277 int done_copy_out = 0; 277 int done_copy_out = 0;
@@ -578,7 +578,7 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp)
578 * this is a no-op. If needed, we can use j_blk_offset - everything is 578 * this is a no-op. If needed, we can use j_blk_offset - everything is
579 * ready. 579 * ready.
580 */ 580 */
581int journal_bmap(journal_t *journal, unsigned long blocknr, 581int journal_bmap(journal_t *journal, unsigned long blocknr,
582 unsigned long *retp) 582 unsigned long *retp)
583{ 583{
584 int err = 0; 584 int err = 0;
@@ -696,13 +696,13 @@ fail:
696 * @bdev: Block device on which to create the journal 696 * @bdev: Block device on which to create the journal
697 * @fs_dev: Device which hold journalled filesystem for this journal. 697 * @fs_dev: Device which hold journalled filesystem for this journal.
698 * @start: Block nr Start of journal. 698 * @start: Block nr Start of journal.
699 * @len: Lenght of the journal in blocks. 699 * @len: Length of the journal in blocks.
700 * @blocksize: blocksize of journalling device 700 * @blocksize: blocksize of journalling device
701 * @returns: a newly created journal_t * 701 * @returns: a newly created journal_t *
702 * 702 *
703 * journal_init_dev creates a journal which maps a fixed contiguous 703 * journal_init_dev creates a journal which maps a fixed contiguous
704 * range of blocks on an arbitrary block device. 704 * range of blocks on an arbitrary block device.
705 * 705 *
706 */ 706 */
707journal_t * journal_init_dev(struct block_device *bdev, 707journal_t * journal_init_dev(struct block_device *bdev,
708 struct block_device *fs_dev, 708 struct block_device *fs_dev,
@@ -715,18 +715,8 @@ journal_t * journal_init_dev(struct block_device *bdev,
715 if (!journal) 715 if (!journal)
716 return NULL; 716 return NULL;
717 717
718 journal->j_dev = bdev;
719 journal->j_fs_dev = fs_dev;
720 journal->j_blk_offset = start;
721 journal->j_maxlen = len;
722 journal->j_blocksize = blocksize;
723
724 bh = __getblk(journal->j_dev, start, journal->j_blocksize);
725 J_ASSERT(bh != NULL);
726 journal->j_sb_buffer = bh;
727 journal->j_superblock = (journal_superblock_t *)bh->b_data;
728
729 /* journal descriptor can store up to n blocks -bzzz */ 718 /* journal descriptor can store up to n blocks -bzzz */
719 journal->j_blocksize = blocksize;
730 n = journal->j_blocksize / sizeof(journal_block_tag_t); 720 n = journal->j_blocksize / sizeof(journal_block_tag_t);
731 journal->j_wbufsize = n; 721 journal->j_wbufsize = n;
732 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 722 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
@@ -735,15 +725,25 @@ journal_t * journal_init_dev(struct block_device *bdev,
735 __FUNCTION__); 725 __FUNCTION__);
736 kfree(journal); 726 kfree(journal);
737 journal = NULL; 727 journal = NULL;
728 goto out;
738 } 729 }
730 journal->j_dev = bdev;
731 journal->j_fs_dev = fs_dev;
732 journal->j_blk_offset = start;
733 journal->j_maxlen = len;
739 734
735 bh = __getblk(journal->j_dev, start, journal->j_blocksize);
736 J_ASSERT(bh != NULL);
737 journal->j_sb_buffer = bh;
738 journal->j_superblock = (journal_superblock_t *)bh->b_data;
739out:
740 return journal; 740 return journal;
741} 741}
742 742
743/** 743/**
744 * journal_t * journal_init_inode () - creates a journal which maps to a inode. 744 * journal_t * journal_init_inode () - creates a journal which maps to a inode.
745 * @inode: An inode to create the journal in 745 * @inode: An inode to create the journal in
746 * 746 *
747 * journal_init_inode creates a journal which maps an on-disk inode as 747 * journal_init_inode creates a journal which maps an on-disk inode as
748 * the journal. The inode must exist already, must support bmap() and 748 * the journal. The inode must exist already, must support bmap() and
749 * must have all data blocks preallocated. 749 * must have all data blocks preallocated.
@@ -763,7 +763,7 @@ journal_t * journal_init_inode (struct inode *inode)
763 journal->j_inode = inode; 763 journal->j_inode = inode;
764 jbd_debug(1, 764 jbd_debug(1,
765 "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", 765 "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
766 journal, inode->i_sb->s_id, inode->i_ino, 766 journal, inode->i_sb->s_id, inode->i_ino,
767 (long long) inode->i_size, 767 (long long) inode->i_size,
768 inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); 768 inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
769 769
@@ -798,10 +798,10 @@ journal_t * journal_init_inode (struct inode *inode)
798 return journal; 798 return journal;
799} 799}
800 800
801/* 801/*
802 * If the journal init or create aborts, we need to mark the journal 802 * If the journal init or create aborts, we need to mark the journal
803 * superblock as being NULL to prevent the journal destroy from writing 803 * superblock as being NULL to prevent the journal destroy from writing
804 * back a bogus superblock. 804 * back a bogus superblock.
805 */ 805 */
806static void journal_fail_superblock (journal_t *journal) 806static void journal_fail_superblock (journal_t *journal)
807{ 807{
@@ -820,7 +820,7 @@ static void journal_fail_superblock (journal_t *journal)
820static int journal_reset(journal_t *journal) 820static int journal_reset(journal_t *journal)
821{ 821{
822 journal_superblock_t *sb = journal->j_superblock; 822 journal_superblock_t *sb = journal->j_superblock;
823 unsigned int first, last; 823 unsigned long first, last;
824 824
825 first = be32_to_cpu(sb->s_first); 825 first = be32_to_cpu(sb->s_first);
826 last = be32_to_cpu(sb->s_maxlen); 826 last = be32_to_cpu(sb->s_maxlen);
@@ -844,13 +844,13 @@ static int journal_reset(journal_t *journal)
844 return 0; 844 return 0;
845} 845}
846 846
847/** 847/**
848 * int journal_create() - Initialise the new journal file 848 * int journal_create() - Initialise the new journal file
849 * @journal: Journal to create. This structure must have been initialised 849 * @journal: Journal to create. This structure must have been initialised
850 * 850 *
851 * Given a journal_t structure which tells us which disk blocks we can 851 * Given a journal_t structure which tells us which disk blocks we can
852 * use, create a new journal superblock and initialise all of the 852 * use, create a new journal superblock and initialise all of the
853 * journal fields from scratch. 853 * journal fields from scratch.
854 **/ 854 **/
855int journal_create(journal_t *journal) 855int journal_create(journal_t *journal)
856{ 856{
@@ -915,7 +915,7 @@ int journal_create(journal_t *journal)
915 return journal_reset(journal); 915 return journal_reset(journal);
916} 916}
917 917
918/** 918/**
919 * void journal_update_superblock() - Update journal sb on disk. 919 * void journal_update_superblock() - Update journal sb on disk.
920 * @journal: The journal to update. 920 * @journal: The journal to update.
921 * @wait: Set to '0' if you don't want to wait for IO completion. 921 * @wait: Set to '0' if you don't want to wait for IO completion.
@@ -939,7 +939,7 @@ void journal_update_superblock(journal_t *journal, int wait)
939 journal->j_transaction_sequence) { 939 journal->j_transaction_sequence) {
940 jbd_debug(1,"JBD: Skipping superblock update on recovered sb " 940 jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
941 "(start %ld, seq %d, errno %d)\n", 941 "(start %ld, seq %d, errno %d)\n",
942 journal->j_tail, journal->j_tail_sequence, 942 journal->j_tail, journal->j_tail_sequence,
943 journal->j_errno); 943 journal->j_errno);
944 goto out; 944 goto out;
945 } 945 }
@@ -1062,7 +1062,7 @@ static int load_superblock(journal_t *journal)
1062/** 1062/**
1063 * int journal_load() - Read journal from disk. 1063 * int journal_load() - Read journal from disk.
1064 * @journal: Journal to act on. 1064 * @journal: Journal to act on.
1065 * 1065 *
1066 * Given a journal_t structure which tells us which disk blocks contain 1066 * Given a journal_t structure which tells us which disk blocks contain
1067 * a journal, read the journal from disk to initialise the in-memory 1067 * a journal, read the journal from disk to initialise the in-memory
1068 * structures. 1068 * structures.
@@ -1094,7 +1094,7 @@ int journal_load(journal_t *journal)
1094 /* 1094 /*
1095 * Create a slab for this blocksize 1095 * Create a slab for this blocksize
1096 */ 1096 */
1097 err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize)); 1097 err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
1098 if (err) 1098 if (err)
1099 return err; 1099 return err;
1100 1100
@@ -1172,9 +1172,9 @@ void journal_destroy(journal_t *journal)
1172 * @compat: bitmask of compatible features 1172 * @compat: bitmask of compatible features
1173 * @ro: bitmask of features that force read-only mount 1173 * @ro: bitmask of features that force read-only mount
1174 * @incompat: bitmask of incompatible features 1174 * @incompat: bitmask of incompatible features
1175 * 1175 *
1176 * Check whether the journal uses all of a given set of 1176 * Check whether the journal uses all of a given set of
1177 * features. Return true (non-zero) if it does. 1177 * features. Return true (non-zero) if it does.
1178 **/ 1178 **/
1179 1179
1180int journal_check_used_features (journal_t *journal, unsigned long compat, 1180int journal_check_used_features (journal_t *journal, unsigned long compat,
@@ -1203,7 +1203,7 @@ int journal_check_used_features (journal_t *journal, unsigned long compat,
1203 * @compat: bitmask of compatible features 1203 * @compat: bitmask of compatible features
1204 * @ro: bitmask of features that force read-only mount 1204 * @ro: bitmask of features that force read-only mount
1205 * @incompat: bitmask of incompatible features 1205 * @incompat: bitmask of incompatible features
1206 * 1206 *
1207 * Check whether the journaling code supports the use of 1207 * Check whether the journaling code supports the use of
1208 * all of a given set of features on this journal. Return true 1208 * all of a given set of features on this journal. Return true
1209 * (non-zero) if it can. */ 1209 * (non-zero) if it can. */
@@ -1241,7 +1241,7 @@ int journal_check_available_features (journal_t *journal, unsigned long compat,
1241 * @incompat: bitmask of incompatible features 1241 * @incompat: bitmask of incompatible features
1242 * 1242 *
1243 * Mark a given journal feature as present on the 1243 * Mark a given journal feature as present on the
1244 * superblock. Returns true if the requested features could be set. 1244 * superblock. Returns true if the requested features could be set.
1245 * 1245 *
1246 */ 1246 */
1247 1247
@@ -1327,7 +1327,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
1327/** 1327/**
1328 * int journal_flush () - Flush journal 1328 * int journal_flush () - Flush journal
1329 * @journal: Journal to act on. 1329 * @journal: Journal to act on.
1330 * 1330 *
1331 * Flush all data for a given journal to disk and empty the journal. 1331 * Flush all data for a given journal to disk and empty the journal.
1332 * Filesystems can use this when remounting readonly to ensure that 1332 * Filesystems can use this when remounting readonly to ensure that
1333 * recovery does not need to happen on remount. 1333 * recovery does not need to happen on remount.
@@ -1394,7 +1394,7 @@ int journal_flush(journal_t *journal)
1394 * int journal_wipe() - Wipe journal contents 1394 * int journal_wipe() - Wipe journal contents
1395 * @journal: Journal to act on. 1395 * @journal: Journal to act on.
1396 * @write: flag (see below) 1396 * @write: flag (see below)
1397 * 1397 *
1398 * Wipe out all of the contents of a journal, safely. This will produce 1398 * Wipe out all of the contents of a journal, safely. This will produce
1399 * a warning if the journal contains any valid recovery information. 1399 * a warning if the journal contains any valid recovery information.
1400 * Must be called between journal_init_*() and journal_load(). 1400 * Must be called between journal_init_*() and journal_load().
@@ -1449,7 +1449,7 @@ static const char *journal_dev_name(journal_t *journal, char *buffer)
1449 1449
1450/* 1450/*
1451 * Journal abort has very specific semantics, which we describe 1451 * Journal abort has very specific semantics, which we describe
1452 * for journal abort. 1452 * for journal abort.
1453 * 1453 *
1454 * Two internal function, which provide abort to te jbd layer 1454 * Two internal function, which provide abort to te jbd layer
1455 * itself are here. 1455 * itself are here.
@@ -1504,7 +1504,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
1504 * Perform a complete, immediate shutdown of the ENTIRE 1504 * Perform a complete, immediate shutdown of the ENTIRE
1505 * journal (not of a single transaction). This operation cannot be 1505 * journal (not of a single transaction). This operation cannot be
1506 * undone without closing and reopening the journal. 1506 * undone without closing and reopening the journal.
1507 * 1507 *
1508 * The journal_abort function is intended to support higher level error 1508 * The journal_abort function is intended to support higher level error
1509 * recovery mechanisms such as the ext2/ext3 remount-readonly error 1509 * recovery mechanisms such as the ext2/ext3 remount-readonly error
1510 * mode. 1510 * mode.
@@ -1538,7 +1538,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
1538 * supply an errno; a null errno implies that absolutely no further 1538 * supply an errno; a null errno implies that absolutely no further
1539 * writes are done to the journal (unless there are any already in 1539 * writes are done to the journal (unless there are any already in
1540 * progress). 1540 * progress).
1541 * 1541 *
1542 */ 1542 */
1543 1543
1544void journal_abort(journal_t *journal, int errno) 1544void journal_abort(journal_t *journal, int errno)
@@ -1546,7 +1546,7 @@ void journal_abort(journal_t *journal, int errno)
1546 __journal_abort_soft(journal, errno); 1546 __journal_abort_soft(journal, errno);
1547} 1547}
1548 1548
1549/** 1549/**
1550 * int journal_errno () - returns the journal's error state. 1550 * int journal_errno () - returns the journal's error state.
1551 * @journal: journal to examine. 1551 * @journal: journal to examine.
1552 * 1552 *
@@ -1570,7 +1570,7 @@ int journal_errno(journal_t *journal)
1570 return err; 1570 return err;
1571} 1571}
1572 1572
1573/** 1573/**
1574 * int journal_clear_err () - clears the journal's error state 1574 * int journal_clear_err () - clears the journal's error state
1575 * @journal: journal to act on. 1575 * @journal: journal to act on.
1576 * 1576 *
@@ -1590,7 +1590,7 @@ int journal_clear_err(journal_t *journal)
1590 return err; 1590 return err;
1591} 1591}
1592 1592
1593/** 1593/**
1594 * void journal_ack_err() - Ack journal err. 1594 * void journal_ack_err() - Ack journal err.
1595 * @journal: journal to act on. 1595 * @journal: journal to act on.
1596 * 1596 *
@@ -1612,7 +1612,7 @@ int journal_blocks_per_page(struct inode *inode)
1612 1612
1613/* 1613/*
1614 * Simple support for retrying memory allocations. Introduced to help to 1614 * Simple support for retrying memory allocations. Introduced to help to
1615 * debug different VM deadlock avoidance strategies. 1615 * debug different VM deadlock avoidance strategies.
1616 */ 1616 */
1617void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) 1617void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
1618{ 1618{
@@ -1630,7 +1630,7 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
1630#define JBD_MAX_SLABS 5 1630#define JBD_MAX_SLABS 5
1631#define JBD_SLAB_INDEX(size) (size >> 11) 1631#define JBD_SLAB_INDEX(size) (size >> 11)
1632 1632
1633static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; 1633static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
1634static const char *jbd_slab_names[JBD_MAX_SLABS] = { 1634static const char *jbd_slab_names[JBD_MAX_SLABS] = {
1635 "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" 1635 "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
1636}; 1636};
@@ -1693,7 +1693,7 @@ void jbd_slab_free(void *ptr, size_t size)
1693/* 1693/*
1694 * Journal_head storage management 1694 * Journal_head storage management
1695 */ 1695 */
1696static kmem_cache_t *journal_head_cache; 1696static struct kmem_cache *journal_head_cache;
1697#ifdef CONFIG_JBD_DEBUG 1697#ifdef CONFIG_JBD_DEBUG
1698static atomic_t nr_journal_heads = ATOMIC_INIT(0); 1698static atomic_t nr_journal_heads = ATOMIC_INIT(0);
1699#endif 1699#endif
@@ -1996,7 +1996,7 @@ static void __exit remove_jbd_proc_entry(void)
1996 1996
1997#endif 1997#endif
1998 1998
1999kmem_cache_t *jbd_handle_cache; 1999struct kmem_cache *jbd_handle_cache;
2000 2000
2001static int __init journal_init_handle_cache(void) 2001static int __init journal_init_handle_cache(void)
2002{ 2002{
@@ -2047,13 +2047,7 @@ static int __init journal_init(void)
2047{ 2047{
2048 int ret; 2048 int ret;
2049 2049
2050/* Static check for data structure consistency. There's no code 2050 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
2051 * invoked --- we'll just get a linker failure if things aren't right.
2052 */
2053 extern void journal_bad_superblock_size(void);
2054 if (sizeof(struct journal_superblock_s) != 1024)
2055 journal_bad_superblock_size();
2056
2057 2051
2058 ret = journal_init_caches(); 2052 ret = journal_init_caches();
2059 if (ret != 0) 2053 if (ret != 0)
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index de5bafb4e853..11563fe2a52b 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * linux/fs/recovery.c 2 * linux/fs/recovery.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 * 5 *
6 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 6 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
@@ -10,7 +10,7 @@
10 * option, any later version, incorporated herein by reference. 10 * option, any later version, incorporated herein by reference.
11 * 11 *
12 * Journal recovery routines for the generic filesystem journaling code; 12 * Journal recovery routines for the generic filesystem journaling code;
13 * part of the ext2fs journaling system. 13 * part of the ext2fs journaling system.
14 */ 14 */
15 15
16#ifndef __KERNEL__ 16#ifndef __KERNEL__
@@ -25,9 +25,9 @@
25 25
26/* 26/*
27 * Maintain information about the progress of the recovery job, so that 27 * Maintain information about the progress of the recovery job, so that
28 * the different passes can carry information between them. 28 * the different passes can carry information between them.
29 */ 29 */
30struct recovery_info 30struct recovery_info
31{ 31{
32 tid_t start_transaction; 32 tid_t start_transaction;
33 tid_t end_transaction; 33 tid_t end_transaction;
@@ -46,7 +46,7 @@ static int scan_revoke_records(journal_t *, struct buffer_head *,
46#ifdef __KERNEL__ 46#ifdef __KERNEL__
47 47
48/* Release readahead buffers after use */ 48/* Release readahead buffers after use */
49void journal_brelse_array(struct buffer_head *b[], int n) 49static void journal_brelse_array(struct buffer_head *b[], int n)
50{ 50{
51 while (--n >= 0) 51 while (--n >= 0)
52 brelse (b[n]); 52 brelse (b[n]);
@@ -116,7 +116,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
116 err = 0; 116 err = 0;
117 117
118failed: 118failed:
119 if (nbufs) 119 if (nbufs)
120 journal_brelse_array(bufs, nbufs); 120 journal_brelse_array(bufs, nbufs);
121 return err; 121 return err;
122} 122}
@@ -128,7 +128,7 @@ failed:
128 * Read a block from the journal 128 * Read a block from the journal
129 */ 129 */
130 130
131static int jread(struct buffer_head **bhp, journal_t *journal, 131static int jread(struct buffer_head **bhp, journal_t *journal,
132 unsigned int offset) 132 unsigned int offset)
133{ 133{
134 int err; 134 int err;
@@ -212,14 +212,14 @@ do { \
212/** 212/**
213 * journal_recover - recovers a on-disk journal 213 * journal_recover - recovers a on-disk journal
214 * @journal: the journal to recover 214 * @journal: the journal to recover
215 * 215 *
216 * The primary function for recovering the log contents when mounting a 216 * The primary function for recovering the log contents when mounting a
217 * journaled device. 217 * journaled device.
218 * 218 *
219 * Recovery is done in three passes. In the first pass, we look for the 219 * Recovery is done in three passes. In the first pass, we look for the
220 * end of the log. In the second, we assemble the list of revoke 220 * end of the log. In the second, we assemble the list of revoke
221 * blocks. In the third and final pass, we replay any un-revoked blocks 221 * blocks. In the third and final pass, we replay any un-revoked blocks
222 * in the log. 222 * in the log.
223 */ 223 */
224int journal_recover(journal_t *journal) 224int journal_recover(journal_t *journal)
225{ 225{
@@ -231,10 +231,10 @@ int journal_recover(journal_t *journal)
231 memset(&info, 0, sizeof(info)); 231 memset(&info, 0, sizeof(info));
232 sb = journal->j_superblock; 232 sb = journal->j_superblock;
233 233
234 /* 234 /*
235 * The journal superblock's s_start field (the current log head) 235 * The journal superblock's s_start field (the current log head)
236 * is always zero if, and only if, the journal was cleanly 236 * is always zero if, and only if, the journal was cleanly
237 * unmounted. 237 * unmounted.
238 */ 238 */
239 239
240 if (!sb->s_start) { 240 if (!sb->s_start) {
@@ -253,7 +253,7 @@ int journal_recover(journal_t *journal)
253 jbd_debug(0, "JBD: recovery, exit status %d, " 253 jbd_debug(0, "JBD: recovery, exit status %d, "
254 "recovered transactions %u to %u\n", 254 "recovered transactions %u to %u\n",
255 err, info.start_transaction, info.end_transaction); 255 err, info.start_transaction, info.end_transaction);
256 jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n", 256 jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
257 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 257 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
258 258
259 /* Restart the log at the next transaction ID, thus invalidating 259 /* Restart the log at the next transaction ID, thus invalidating
@@ -268,15 +268,15 @@ int journal_recover(journal_t *journal)
268/** 268/**
269 * journal_skip_recovery - Start journal and wipe exiting records 269 * journal_skip_recovery - Start journal and wipe exiting records
270 * @journal: journal to startup 270 * @journal: journal to startup
271 * 271 *
272 * Locate any valid recovery information from the journal and set up the 272 * Locate any valid recovery information from the journal and set up the
273 * journal structures in memory to ignore it (presumably because the 273 * journal structures in memory to ignore it (presumably because the
274 * caller has evidence that it is out of date). 274 * caller has evidence that it is out of date).
275 * This function does'nt appear to be exorted.. 275 * This function does'nt appear to be exorted..
276 * 276 *
277 * We perform one pass over the journal to allow us to tell the user how 277 * We perform one pass over the journal to allow us to tell the user how
278 * much recovery information is being erased, and to let us initialise 278 * much recovery information is being erased, and to let us initialise
279 * the journal transaction sequence numbers to the next unused ID. 279 * the journal transaction sequence numbers to the next unused ID.
280 */ 280 */
281int journal_skip_recovery(journal_t *journal) 281int journal_skip_recovery(journal_t *journal)
282{ 282{
@@ -297,7 +297,7 @@ int journal_skip_recovery(journal_t *journal)
297#ifdef CONFIG_JBD_DEBUG 297#ifdef CONFIG_JBD_DEBUG
298 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); 298 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
299#endif 299#endif
300 jbd_debug(0, 300 jbd_debug(0,
301 "JBD: ignoring %d transaction%s from the journal.\n", 301 "JBD: ignoring %d transaction%s from the journal.\n",
302 dropped, (dropped == 1) ? "" : "s"); 302 dropped, (dropped == 1) ? "" : "s");
303 journal->j_transaction_sequence = ++info.end_transaction; 303 journal->j_transaction_sequence = ++info.end_transaction;
@@ -314,7 +314,7 @@ static int do_one_pass(journal_t *journal,
314 unsigned long next_log_block; 314 unsigned long next_log_block;
315 int err, success = 0; 315 int err, success = 0;
316 journal_superblock_t * sb; 316 journal_superblock_t * sb;
317 journal_header_t * tmp; 317 journal_header_t * tmp;
318 struct buffer_head * bh; 318 struct buffer_head * bh;
319 unsigned int sequence; 319 unsigned int sequence;
320 int blocktype; 320 int blocktype;
@@ -324,10 +324,10 @@ static int do_one_pass(journal_t *journal,
324 MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) 324 MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
325 / sizeof(journal_block_tag_t)); 325 / sizeof(journal_block_tag_t));
326 326
327 /* 327 /*
328 * First thing is to establish what we expect to find in the log 328 * First thing is to establish what we expect to find in the log
329 * (in terms of transaction IDs), and where (in terms of log 329 * (in terms of transaction IDs), and where (in terms of log
330 * block offsets): query the superblock. 330 * block offsets): query the superblock.
331 */ 331 */
332 332
333 sb = journal->j_superblock; 333 sb = journal->j_superblock;
@@ -344,7 +344,7 @@ static int do_one_pass(journal_t *journal,
344 * Now we walk through the log, transaction by transaction, 344 * Now we walk through the log, transaction by transaction,
345 * making sure that each transaction has a commit block in the 345 * making sure that each transaction has a commit block in the
346 * expected place. Each complete transaction gets replayed back 346 * expected place. Each complete transaction gets replayed back
347 * into the main filesystem. 347 * into the main filesystem.
348 */ 348 */
349 349
350 while (1) { 350 while (1) {
@@ -379,8 +379,8 @@ static int do_one_pass(journal_t *journal,
379 next_log_block++; 379 next_log_block++;
380 wrap(journal, next_log_block); 380 wrap(journal, next_log_block);
381 381
382 /* What kind of buffer is it? 382 /* What kind of buffer is it?
383 * 383 *
384 * If it is a descriptor block, check that it has the 384 * If it is a descriptor block, check that it has the
385 * expected sequence number. Otherwise, we're all done 385 * expected sequence number. Otherwise, we're all done
386 * here. */ 386 * here. */
@@ -394,7 +394,7 @@ static int do_one_pass(journal_t *journal,
394 394
395 blocktype = be32_to_cpu(tmp->h_blocktype); 395 blocktype = be32_to_cpu(tmp->h_blocktype);
396 sequence = be32_to_cpu(tmp->h_sequence); 396 sequence = be32_to_cpu(tmp->h_sequence);
397 jbd_debug(3, "Found magic %d, sequence %d\n", 397 jbd_debug(3, "Found magic %d, sequence %d\n",
398 blocktype, sequence); 398 blocktype, sequence);
399 399
400 if (sequence != next_commit_ID) { 400 if (sequence != next_commit_ID) {
@@ -438,7 +438,7 @@ static int do_one_pass(journal_t *journal,
438 /* Recover what we can, but 438 /* Recover what we can, but
439 * report failure at the end. */ 439 * report failure at the end. */
440 success = err; 440 success = err;
441 printk (KERN_ERR 441 printk (KERN_ERR
442 "JBD: IO error %d recovering " 442 "JBD: IO error %d recovering "
443 "block %ld in log\n", 443 "block %ld in log\n",
444 err, io_block); 444 err, io_block);
@@ -452,7 +452,7 @@ static int do_one_pass(journal_t *journal,
452 * revoked, then we're all done 452 * revoked, then we're all done
453 * here. */ 453 * here. */
454 if (journal_test_revoke 454 if (journal_test_revoke
455 (journal, blocknr, 455 (journal, blocknr,
456 next_commit_ID)) { 456 next_commit_ID)) {
457 brelse(obh); 457 brelse(obh);
458 ++info->nr_revoke_hits; 458 ++info->nr_revoke_hits;
@@ -465,7 +465,7 @@ static int do_one_pass(journal_t *journal,
465 blocknr, 465 blocknr,
466 journal->j_blocksize); 466 journal->j_blocksize);
467 if (nbh == NULL) { 467 if (nbh == NULL) {
468 printk(KERN_ERR 468 printk(KERN_ERR
469 "JBD: Out of memory " 469 "JBD: Out of memory "
470 "during recovery.\n"); 470 "during recovery.\n");
471 err = -ENOMEM; 471 err = -ENOMEM;
@@ -537,7 +537,7 @@ static int do_one_pass(journal_t *journal,
537 } 537 }
538 538
539 done: 539 done:
540 /* 540 /*
541 * We broke out of the log scan loop: either we came to the 541 * We broke out of the log scan loop: either we came to the
542 * known end of the log or we found an unexpected block in the 542 * known end of the log or we found an unexpected block in the
543 * log. If the latter happened, then we know that the "current" 543 * log. If the latter happened, then we know that the "current"
@@ -567,7 +567,7 @@ static int do_one_pass(journal_t *journal,
567 567
568/* Scan a revoke record, marking all blocks mentioned as revoked. */ 568/* Scan a revoke record, marking all blocks mentioned as revoked. */
569 569
570static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 570static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
571 tid_t sequence, struct recovery_info *info) 571 tid_t sequence, struct recovery_info *info)
572{ 572{
573 journal_revoke_header_t *header; 573 journal_revoke_header_t *header;
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index a56144183462..d204ab394f36 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * linux/fs/revoke.c 2 * linux/fs/revoke.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 2000 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 2000
5 * 5 *
6 * Copyright 2000 Red Hat corp --- All Rights Reserved 6 * Copyright 2000 Red Hat corp --- All Rights Reserved
@@ -15,10 +15,10 @@
15 * Revoke is the mechanism used to prevent old log records for deleted 15 * Revoke is the mechanism used to prevent old log records for deleted
16 * metadata from being replayed on top of newer data using the same 16 * metadata from being replayed on top of newer data using the same
17 * blocks. The revoke mechanism is used in two separate places: 17 * blocks. The revoke mechanism is used in two separate places:
18 * 18 *
19 * + Commit: during commit we write the entire list of the current 19 * + Commit: during commit we write the entire list of the current
20 * transaction's revoked blocks to the journal 20 * transaction's revoked blocks to the journal
21 * 21 *
22 * + Recovery: during recovery we record the transaction ID of all 22 * + Recovery: during recovery we record the transaction ID of all
23 * revoked blocks. If there are multiple revoke records in the log 23 * revoked blocks. If there are multiple revoke records in the log
24 * for a single block, only the last one counts, and if there is a log 24 * for a single block, only the last one counts, and if there is a log
@@ -29,7 +29,7 @@
29 * single transaction: 29 * single transaction:
30 * 30 *
31 * Block is revoked and then journaled: 31 * Block is revoked and then journaled:
32 * The desired end result is the journaling of the new block, so we 32 * The desired end result is the journaling of the new block, so we
33 * cancel the revoke before the transaction commits. 33 * cancel the revoke before the transaction commits.
34 * 34 *
35 * Block is journaled and then revoked: 35 * Block is journaled and then revoked:
@@ -41,7 +41,7 @@
41 * transaction must have happened after the block was journaled and so 41 * transaction must have happened after the block was journaled and so
42 * the revoke must take precedence. 42 * the revoke must take precedence.
43 * 43 *
44 * Block is revoked and then written as data: 44 * Block is revoked and then written as data:
45 * The data write is allowed to succeed, but the revoke is _not_ 45 * The data write is allowed to succeed, but the revoke is _not_
46 * cancelled. We still need to prevent old log records from 46 * cancelled. We still need to prevent old log records from
47 * overwriting the new data. We don't even need to clear the revoke 47 * overwriting the new data. We don't even need to clear the revoke
@@ -54,7 +54,7 @@
54 * buffer has not been revoked, and cancel_revoke 54 * buffer has not been revoked, and cancel_revoke
55 * need do nothing. 55 * need do nothing.
56 * RevokeValid set, Revoked set: 56 * RevokeValid set, Revoked set:
57 * buffer has been revoked. 57 * buffer has been revoked.
58 */ 58 */
59 59
60#ifndef __KERNEL__ 60#ifndef __KERNEL__
@@ -70,14 +70,14 @@
70#include <linux/init.h> 70#include <linux/init.h>
71#endif 71#endif
72 72
73static kmem_cache_t *revoke_record_cache; 73static struct kmem_cache *revoke_record_cache;
74static kmem_cache_t *revoke_table_cache; 74static struct kmem_cache *revoke_table_cache;
75 75
76/* Each revoke record represents one single revoked block. During 76/* Each revoke record represents one single revoked block. During
77 journal replay, this involves recording the transaction ID of the 77 journal replay, this involves recording the transaction ID of the
78 last transaction to revoke this block. */ 78 last transaction to revoke this block. */
79 79
80struct jbd_revoke_record_s 80struct jbd_revoke_record_s
81{ 81{
82 struct list_head hash; 82 struct list_head hash;
83 tid_t sequence; /* Used for recovery only */ 83 tid_t sequence; /* Used for recovery only */
@@ -90,8 +90,8 @@ struct jbd_revoke_table_s
90{ 90{
91 /* It is conceivable that we might want a larger hash table 91 /* It is conceivable that we might want a larger hash table
92 * for recovery. Must be a power of two. */ 92 * for recovery. Must be a power of two. */
93 int hash_size; 93 int hash_size;
94 int hash_shift; 94 int hash_shift;
95 struct list_head *hash_table; 95 struct list_head *hash_table;
96}; 96};
97 97
@@ -301,22 +301,22 @@ void journal_destroy_revoke(journal_t *journal)
301 301
302#ifdef __KERNEL__ 302#ifdef __KERNEL__
303 303
304/* 304/*
305 * journal_revoke: revoke a given buffer_head from the journal. This 305 * journal_revoke: revoke a given buffer_head from the journal. This
306 * prevents the block from being replayed during recovery if we take a 306 * prevents the block from being replayed during recovery if we take a
307 * crash after this current transaction commits. Any subsequent 307 * crash after this current transaction commits. Any subsequent
308 * metadata writes of the buffer in this transaction cancel the 308 * metadata writes of the buffer in this transaction cancel the
309 * revoke. 309 * revoke.
310 * 310 *
311 * Note that this call may block --- it is up to the caller to make 311 * Note that this call may block --- it is up to the caller to make
312 * sure that there are no further calls to journal_write_metadata 312 * sure that there are no further calls to journal_write_metadata
313 * before the revoke is complete. In ext3, this implies calling the 313 * before the revoke is complete. In ext3, this implies calling the
314 * revoke before clearing the block bitmap when we are deleting 314 * revoke before clearing the block bitmap when we are deleting
315 * metadata. 315 * metadata.
316 * 316 *
317 * Revoke performs a journal_forget on any buffer_head passed in as a 317 * Revoke performs a journal_forget on any buffer_head passed in as a
318 * parameter, but does _not_ forget the buffer_head if the bh was only 318 * parameter, but does _not_ forget the buffer_head if the bh was only
319 * found implicitly. 319 * found implicitly.
320 * 320 *
321 * bh_in may not be a journalled buffer - it may have come off 321 * bh_in may not be a journalled buffer - it may have come off
322 * the hash tables without an attached journal_head. 322 * the hash tables without an attached journal_head.
@@ -325,7 +325,7 @@ void journal_destroy_revoke(journal_t *journal)
325 * by one. 325 * by one.
326 */ 326 */
327 327
328int journal_revoke(handle_t *handle, unsigned long blocknr, 328int journal_revoke(handle_t *handle, unsigned long blocknr,
329 struct buffer_head *bh_in) 329 struct buffer_head *bh_in)
330{ 330{
331 struct buffer_head *bh = NULL; 331 struct buffer_head *bh = NULL;
@@ -487,7 +487,7 @@ void journal_switch_revoke_table(journal_t *journal)
487 else 487 else
488 journal->j_revoke = journal->j_revoke_table[0]; 488 journal->j_revoke = journal->j_revoke_table[0];
489 489
490 for (i = 0; i < journal->j_revoke->hash_size; i++) 490 for (i = 0; i < journal->j_revoke->hash_size; i++)
491 INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]); 491 INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]);
492} 492}
493 493
@@ -498,7 +498,7 @@ void journal_switch_revoke_table(journal_t *journal)
498 * Called with the journal lock held. 498 * Called with the journal lock held.
499 */ 499 */
500 500
501void journal_write_revoke_records(journal_t *journal, 501void journal_write_revoke_records(journal_t *journal,
502 transaction_t *transaction) 502 transaction_t *transaction)
503{ 503{
504 struct journal_head *descriptor; 504 struct journal_head *descriptor;
@@ -507,7 +507,7 @@ void journal_write_revoke_records(journal_t *journal,
507 struct list_head *hash_list; 507 struct list_head *hash_list;
508 int i, offset, count; 508 int i, offset, count;
509 509
510 descriptor = NULL; 510 descriptor = NULL;
511 offset = 0; 511 offset = 0;
512 count = 0; 512 count = 0;
513 513
@@ -519,10 +519,10 @@ void journal_write_revoke_records(journal_t *journal,
519 hash_list = &revoke->hash_table[i]; 519 hash_list = &revoke->hash_table[i];
520 520
521 while (!list_empty(hash_list)) { 521 while (!list_empty(hash_list)) {
522 record = (struct jbd_revoke_record_s *) 522 record = (struct jbd_revoke_record_s *)
523 hash_list->next; 523 hash_list->next;
524 write_one_revoke_record(journal, transaction, 524 write_one_revoke_record(journal, transaction,
525 &descriptor, &offset, 525 &descriptor, &offset,
526 record); 526 record);
527 count++; 527 count++;
528 list_del(&record->hash); 528 list_del(&record->hash);
@@ -534,14 +534,14 @@ void journal_write_revoke_records(journal_t *journal,
534 jbd_debug(1, "Wrote %d revoke records\n", count); 534 jbd_debug(1, "Wrote %d revoke records\n", count);
535} 535}
536 536
537/* 537/*
538 * Write out one revoke record. We need to create a new descriptor 538 * Write out one revoke record. We need to create a new descriptor
539 * block if the old one is full or if we have not already created one. 539 * block if the old one is full or if we have not already created one.
540 */ 540 */
541 541
542static void write_one_revoke_record(journal_t *journal, 542static void write_one_revoke_record(journal_t *journal,
543 transaction_t *transaction, 543 transaction_t *transaction,
544 struct journal_head **descriptorp, 544 struct journal_head **descriptorp,
545 int *offsetp, 545 int *offsetp,
546 struct jbd_revoke_record_s *record) 546 struct jbd_revoke_record_s *record)
547{ 547{
@@ -584,21 +584,21 @@ static void write_one_revoke_record(journal_t *journal,
584 *descriptorp = descriptor; 584 *descriptorp = descriptor;
585 } 585 }
586 586
587 * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = 587 * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
588 cpu_to_be32(record->blocknr); 588 cpu_to_be32(record->blocknr);
589 offset += 4; 589 offset += 4;
590 *offsetp = offset; 590 *offsetp = offset;
591} 591}
592 592
593/* 593/*
594 * Flush a revoke descriptor out to the journal. If we are aborting, 594 * Flush a revoke descriptor out to the journal. If we are aborting,
595 * this is a noop; otherwise we are generating a buffer which needs to 595 * this is a noop; otherwise we are generating a buffer which needs to
596 * be waited for during commit, so it has to go onto the appropriate 596 * be waited for during commit, so it has to go onto the appropriate
597 * journal buffer list. 597 * journal buffer list.
598 */ 598 */
599 599
600static void flush_descriptor(journal_t *journal, 600static void flush_descriptor(journal_t *journal,
601 struct journal_head *descriptor, 601 struct journal_head *descriptor,
602 int offset) 602 int offset)
603{ 603{
604 journal_revoke_header_t *header; 604 journal_revoke_header_t *header;
@@ -618,7 +618,7 @@ static void flush_descriptor(journal_t *journal,
618} 618}
619#endif 619#endif
620 620
621/* 621/*
622 * Revoke support for recovery. 622 * Revoke support for recovery.
623 * 623 *
624 * Recovery needs to be able to: 624 * Recovery needs to be able to:
@@ -629,7 +629,7 @@ static void flush_descriptor(journal_t *journal,
629 * check whether a given block in a given transaction should be replayed 629 * check whether a given block in a given transaction should be replayed
630 * (ie. has not been revoked by a revoke record in that or a subsequent 630 * (ie. has not been revoked by a revoke record in that or a subsequent
631 * transaction) 631 * transaction)
632 * 632 *
633 * empty the revoke table after recovery. 633 * empty the revoke table after recovery.
634 */ 634 */
635 635
@@ -637,11 +637,11 @@ static void flush_descriptor(journal_t *journal,
637 * First, setting revoke records. We create a new revoke record for 637 * First, setting revoke records. We create a new revoke record for
638 * every block ever revoked in the log as we scan it for recovery, and 638 * every block ever revoked in the log as we scan it for recovery, and
639 * we update the existing records if we find multiple revokes for a 639 * we update the existing records if we find multiple revokes for a
640 * single block. 640 * single block.
641 */ 641 */
642 642
643int journal_set_revoke(journal_t *journal, 643int journal_set_revoke(journal_t *journal,
644 unsigned long blocknr, 644 unsigned long blocknr,
645 tid_t sequence) 645 tid_t sequence)
646{ 646{
647 struct jbd_revoke_record_s *record; 647 struct jbd_revoke_record_s *record;
@@ -653,18 +653,18 @@ int journal_set_revoke(journal_t *journal,
653 if (tid_gt(sequence, record->sequence)) 653 if (tid_gt(sequence, record->sequence))
654 record->sequence = sequence; 654 record->sequence = sequence;
655 return 0; 655 return 0;
656 } 656 }
657 return insert_revoke_hash(journal, blocknr, sequence); 657 return insert_revoke_hash(journal, blocknr, sequence);
658} 658}
659 659
660/* 660/*
661 * Test revoke records. For a given block referenced in the log, has 661 * Test revoke records. For a given block referenced in the log, has
662 * that block been revoked? A revoke record with a given transaction 662 * that block been revoked? A revoke record with a given transaction
663 * sequence number revokes all blocks in that transaction and earlier 663 * sequence number revokes all blocks in that transaction and earlier
664 * ones, but later transactions still need replayed. 664 * ones, but later transactions still need replayed.
665 */ 665 */
666 666
667int journal_test_revoke(journal_t *journal, 667int journal_test_revoke(journal_t *journal,
668 unsigned long blocknr, 668 unsigned long blocknr,
669 tid_t sequence) 669 tid_t sequence)
670{ 670{
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index f5169a96260e..cceaf57e3778 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * linux/fs/transaction.c 2 * linux/fs/transaction.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
5 * 5 *
6 * Copyright 1998 Red Hat corp --- All Rights Reserved 6 * Copyright 1998 Red Hat corp --- All Rights Reserved
@@ -10,7 +10,7 @@
10 * option, any later version, incorporated herein by reference. 10 * option, any later version, incorporated herein by reference.
11 * 11 *
12 * Generic filesystem transaction handling code; part of the ext2fs 12 * Generic filesystem transaction handling code; part of the ext2fs
13 * journaling system. 13 * journaling system.
14 * 14 *
15 * This file manages transactions (compound commits managed by the 15 * This file manages transactions (compound commits managed by the
16 * journaling code) and handles (individual atomic operations by the 16 * journaling code) and handles (individual atomic operations by the
@@ -27,6 +27,8 @@
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29 29
30static void __journal_temp_unlink_buffer(struct journal_head *jh);
31
30/* 32/*
31 * get_transaction: obtain a new transaction_t object. 33 * get_transaction: obtain a new transaction_t object.
32 * 34 *
@@ -53,7 +55,7 @@ get_transaction(journal_t *journal, transaction_t *transaction)
53 spin_lock_init(&transaction->t_handle_lock); 55 spin_lock_init(&transaction->t_handle_lock);
54 56
55 /* Set up the commit timer for the new transaction. */ 57 /* Set up the commit timer for the new transaction. */
56 journal->j_commit_timer.expires = transaction->t_expires; 58 journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
57 add_timer(&journal->j_commit_timer); 59 add_timer(&journal->j_commit_timer);
58 60
59 J_ASSERT(journal->j_running_transaction == NULL); 61 J_ASSERT(journal->j_running_transaction == NULL);
@@ -74,7 +76,7 @@ get_transaction(journal_t *journal, transaction_t *transaction)
74 * start_this_handle: Given a handle, deal with any locking or stalling 76 * start_this_handle: Given a handle, deal with any locking or stalling
75 * needed to make sure that there is enough journal space for the handle 77 * needed to make sure that there is enough journal space for the handle
76 * to begin. Attach the handle to a transaction and set up the 78 * to begin. Attach the handle to a transaction and set up the
77 * transaction's buffer credits. 79 * transaction's buffer credits.
78 */ 80 */
79 81
80static int start_this_handle(journal_t *journal, handle_t *handle) 82static int start_this_handle(journal_t *journal, handle_t *handle)
@@ -117,7 +119,7 @@ repeat_locked:
117 if (is_journal_aborted(journal) || 119 if (is_journal_aborted(journal) ||
118 (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) { 120 (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
119 spin_unlock(&journal->j_state_lock); 121 spin_unlock(&journal->j_state_lock);
120 ret = -EROFS; 122 ret = -EROFS;
121 goto out; 123 goto out;
122 } 124 }
123 125
@@ -182,7 +184,7 @@ repeat_locked:
182 goto repeat; 184 goto repeat;
183 } 185 }
184 186
185 /* 187 /*
186 * The commit code assumes that it can get enough log space 188 * The commit code assumes that it can get enough log space
187 * without forcing a checkpoint. This is *critical* for 189 * without forcing a checkpoint. This is *critical* for
188 * correctness: a checkpoint of a buffer which is also 190 * correctness: a checkpoint of a buffer which is also
@@ -191,7 +193,7 @@ repeat_locked:
191 * 193 *
192 * We must therefore ensure the necessary space in the journal 194 * We must therefore ensure the necessary space in the journal
193 * *before* starting to dirty potentially checkpointed buffers 195 * *before* starting to dirty potentially checkpointed buffers
194 * in the new transaction. 196 * in the new transaction.
195 * 197 *
196 * The worst part is, any transaction currently committing can 198 * The worst part is, any transaction currently committing can
197 * reduce the free space arbitrarily. Be careful to account for 199 * reduce the free space arbitrarily. Be careful to account for
@@ -246,13 +248,13 @@ static handle_t *new_handle(int nblocks)
246} 248}
247 249
248/** 250/**
249 * handle_t *journal_start() - Obtain a new handle. 251 * handle_t *journal_start() - Obtain a new handle.
250 * @journal: Journal to start transaction on. 252 * @journal: Journal to start transaction on.
251 * @nblocks: number of block buffer we might modify 253 * @nblocks: number of block buffer we might modify
252 * 254 *
253 * We make sure that the transaction can guarantee at least nblocks of 255 * We make sure that the transaction can guarantee at least nblocks of
254 * modified buffers in the log. We block until the log can guarantee 256 * modified buffers in the log. We block until the log can guarantee
255 * that much space. 257 * that much space.
256 * 258 *
257 * This function is visible to journal users (like ext3fs), so is not 259 * This function is visible to journal users (like ext3fs), so is not
258 * called with the journal already locked. 260 * called with the journal already locked.
@@ -292,11 +294,11 @@ handle_t *journal_start(journal_t *journal, int nblocks)
292 * int journal_extend() - extend buffer credits. 294 * int journal_extend() - extend buffer credits.
293 * @handle: handle to 'extend' 295 * @handle: handle to 'extend'
294 * @nblocks: nr blocks to try to extend by. 296 * @nblocks: nr blocks to try to extend by.
295 * 297 *
296 * Some transactions, such as large extends and truncates, can be done 298 * Some transactions, such as large extends and truncates, can be done
297 * atomically all at once or in several stages. The operation requests 299 * atomically all at once or in several stages. The operation requests
298 * a credit for a number of buffer modications in advance, but can 300 * a credit for a number of buffer modications in advance, but can
299 * extend its credit if it needs more. 301 * extend its credit if it needs more.
300 * 302 *
301 * journal_extend tries to give the running handle more buffer credits. 303 * journal_extend tries to give the running handle more buffer credits.
302 * It does not guarantee that allocation - this is a best-effort only. 304 * It does not guarantee that allocation - this is a best-effort only.
@@ -363,7 +365,7 @@ out:
363 * int journal_restart() - restart a handle . 365 * int journal_restart() - restart a handle .
364 * @handle: handle to restart 366 * @handle: handle to restart
365 * @nblocks: nr credits requested 367 * @nblocks: nr credits requested
366 * 368 *
367 * Restart a handle for a multi-transaction filesystem 369 * Restart a handle for a multi-transaction filesystem
368 * operation. 370 * operation.
369 * 371 *
@@ -462,7 +464,7 @@ void journal_lock_updates(journal_t *journal)
462/** 464/**
463 * void journal_unlock_updates (journal_t* journal) - release barrier 465 * void journal_unlock_updates (journal_t* journal) - release barrier
464 * @journal: Journal to release the barrier on. 466 * @journal: Journal to release the barrier on.
465 * 467 *
466 * Release a transaction barrier obtained with journal_lock_updates(). 468 * Release a transaction barrier obtained with journal_lock_updates().
467 * 469 *
468 * Should be called without the journal lock held. 470 * Should be called without the journal lock held.
@@ -547,8 +549,8 @@ repeat:
547 jbd_lock_bh_state(bh); 549 jbd_lock_bh_state(bh);
548 550
549 /* We now hold the buffer lock so it is safe to query the buffer 551 /* We now hold the buffer lock so it is safe to query the buffer
550 * state. Is the buffer dirty? 552 * state. Is the buffer dirty?
551 * 553 *
552 * If so, there are two possibilities. The buffer may be 554 * If so, there are two possibilities. The buffer may be
553 * non-journaled, and undergoing a quite legitimate writeback. 555 * non-journaled, and undergoing a quite legitimate writeback.
554 * Otherwise, it is journaled, and we don't expect dirty buffers 556 * Otherwise, it is journaled, and we don't expect dirty buffers
@@ -566,7 +568,7 @@ repeat:
566 */ 568 */
567 if (jh->b_transaction) { 569 if (jh->b_transaction) {
568 J_ASSERT_JH(jh, 570 J_ASSERT_JH(jh,
569 jh->b_transaction == transaction || 571 jh->b_transaction == transaction ||
570 jh->b_transaction == 572 jh->b_transaction ==
571 journal->j_committing_transaction); 573 journal->j_committing_transaction);
572 if (jh->b_next_transaction) 574 if (jh->b_next_transaction)
@@ -580,7 +582,7 @@ repeat:
580 */ 582 */
581 JBUFFER_TRACE(jh, "Unexpected dirty buffer"); 583 JBUFFER_TRACE(jh, "Unexpected dirty buffer");
582 jbd_unexpected_dirty_buffer(jh); 584 jbd_unexpected_dirty_buffer(jh);
583 } 585 }
584 586
585 unlock_buffer(bh); 587 unlock_buffer(bh);
586 588
@@ -653,7 +655,7 @@ repeat:
653 * buffer had better remain locked during the kmalloc, 655 * buffer had better remain locked during the kmalloc,
654 * but that should be true --- we hold the journal lock 656 * but that should be true --- we hold the journal lock
655 * still and the buffer is already on the BUF_JOURNAL 657 * still and the buffer is already on the BUF_JOURNAL
656 * list so won't be flushed. 658 * list so won't be flushed.
657 * 659 *
658 * Subtle point, though: if this is a get_undo_access, 660 * Subtle point, though: if this is a get_undo_access,
659 * then we will be relying on the frozen_data to contain 661 * then we will be relying on the frozen_data to contain
@@ -765,8 +767,8 @@ int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
765 * manually rather than reading off disk), then we need to keep the 767 * manually rather than reading off disk), then we need to keep the
766 * buffer_head locked until it has been completely filled with new 768 * buffer_head locked until it has been completely filled with new
767 * data. In this case, we should be able to make the assertion that 769 * data. In this case, we should be able to make the assertion that
768 * the bh is not already part of an existing transaction. 770 * the bh is not already part of an existing transaction.
769 * 771 *
770 * The buffer should already be locked by the caller by this point. 772 * The buffer should already be locked by the caller by this point.
771 * There is no lock ranking violation: it was a newly created, 773 * There is no lock ranking violation: it was a newly created,
772 * unlocked buffer beforehand. */ 774 * unlocked buffer beforehand. */
@@ -778,7 +780,7 @@ int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
778 * 780 *
779 * Call this if you create a new bh. 781 * Call this if you create a new bh.
780 */ 782 */
781int journal_get_create_access(handle_t *handle, struct buffer_head *bh) 783int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
782{ 784{
783 transaction_t *transaction = handle->h_transaction; 785 transaction_t *transaction = handle->h_transaction;
784 journal_t *journal = transaction->t_journal; 786 journal_t *journal = transaction->t_journal;
@@ -847,13 +849,13 @@ out:
847 * do not reuse freed space until the deallocation has been committed, 849 * do not reuse freed space until the deallocation has been committed,
848 * since if we overwrote that space we would make the delete 850 * since if we overwrote that space we would make the delete
849 * un-rewindable in case of a crash. 851 * un-rewindable in case of a crash.
850 * 852 *
851 * To deal with that, journal_get_undo_access requests write access to a 853 * To deal with that, journal_get_undo_access requests write access to a
852 * buffer for parts of non-rewindable operations such as delete 854 * buffer for parts of non-rewindable operations such as delete
853 * operations on the bitmaps. The journaling code must keep a copy of 855 * operations on the bitmaps. The journaling code must keep a copy of
854 * the buffer's contents prior to the undo_access call until such time 856 * the buffer's contents prior to the undo_access call until such time
855 * as we know that the buffer has definitely been committed to disk. 857 * as we know that the buffer has definitely been committed to disk.
856 * 858 *
857 * We never need to know which transaction the committed data is part 859 * We never need to know which transaction the committed data is part
858 * of, buffers touched here are guaranteed to be dirtied later and so 860 * of, buffers touched here are guaranteed to be dirtied later and so
859 * will be committed to a new transaction in due course, at which point 861 * will be committed to a new transaction in due course, at which point
@@ -911,13 +913,13 @@ out:
911 return err; 913 return err;
912} 914}
913 915
914/** 916/**
915 * int journal_dirty_data() - mark a buffer as containing dirty data which 917 * int journal_dirty_data() - mark a buffer as containing dirty data which
916 * needs to be flushed before we can commit the 918 * needs to be flushed before we can commit the
917 * current transaction. 919 * current transaction.
918 * @handle: transaction 920 * @handle: transaction
919 * @bh: bufferhead to mark 921 * @bh: bufferhead to mark
920 * 922 *
921 * The buffer is placed on the transaction's data list and is marked as 923 * The buffer is placed on the transaction's data list and is marked as
922 * belonging to the transaction. 924 * belonging to the transaction.
923 * 925 *
@@ -946,15 +948,15 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
946 948
947 /* 949 /*
948 * What if the buffer is already part of a running transaction? 950 * What if the buffer is already part of a running transaction?
949 * 951 *
950 * There are two cases: 952 * There are two cases:
951 * 1) It is part of the current running transaction. Refile it, 953 * 1) It is part of the current running transaction. Refile it,
952 * just in case we have allocated it as metadata, deallocated 954 * just in case we have allocated it as metadata, deallocated
953 * it, then reallocated it as data. 955 * it, then reallocated it as data.
954 * 2) It is part of the previous, still-committing transaction. 956 * 2) It is part of the previous, still-committing transaction.
955 * If all we want to do is to guarantee that the buffer will be 957 * If all we want to do is to guarantee that the buffer will be
956 * written to disk before this new transaction commits, then 958 * written to disk before this new transaction commits, then
957 * being sure that the *previous* transaction has this same 959 * being sure that the *previous* transaction has this same
958 * property is sufficient for us! Just leave it on its old 960 * property is sufficient for us! Just leave it on its old
959 * transaction. 961 * transaction.
960 * 962 *
@@ -967,6 +969,13 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
967 */ 969 */
968 jbd_lock_bh_state(bh); 970 jbd_lock_bh_state(bh);
969 spin_lock(&journal->j_list_lock); 971 spin_lock(&journal->j_list_lock);
972
973 /* Now that we have bh_state locked, are we really still mapped? */
974 if (!buffer_mapped(bh)) {
975 JBUFFER_TRACE(jh, "unmapped buffer, bailing out");
976 goto no_journal;
977 }
978
970 if (jh->b_transaction) { 979 if (jh->b_transaction) {
971 JBUFFER_TRACE(jh, "has transaction"); 980 JBUFFER_TRACE(jh, "has transaction");
972 if (jh->b_transaction != handle->h_transaction) { 981 if (jh->b_transaction != handle->h_transaction) {
@@ -1028,6 +1037,11 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1028 sync_dirty_buffer(bh); 1037 sync_dirty_buffer(bh);
1029 jbd_lock_bh_state(bh); 1038 jbd_lock_bh_state(bh);
1030 spin_lock(&journal->j_list_lock); 1039 spin_lock(&journal->j_list_lock);
1040 /* Since we dropped the lock... */
1041 if (!buffer_mapped(bh)) {
1042 JBUFFER_TRACE(jh, "buffer got unmapped");
1043 goto no_journal;
1044 }
1031 /* The buffer may become locked again at any 1045 /* The buffer may become locked again at any
1032 time if it is redirtied */ 1046 time if it is redirtied */
1033 } 1047 }
@@ -1076,18 +1090,18 @@ no_journal:
1076 return 0; 1090 return 0;
1077} 1091}
1078 1092
1079/** 1093/**
1080 * int journal_dirty_metadata() - mark a buffer as containing dirty metadata 1094 * int journal_dirty_metadata() - mark a buffer as containing dirty metadata
1081 * @handle: transaction to add buffer to. 1095 * @handle: transaction to add buffer to.
1082 * @bh: buffer to mark 1096 * @bh: buffer to mark
1083 * 1097 *
1084 * mark dirty metadata which needs to be journaled as part of the current 1098 * mark dirty metadata which needs to be journaled as part of the current
1085 * transaction. 1099 * transaction.
1086 * 1100 *
1087 * The buffer is placed on the transaction's metadata list and is marked 1101 * The buffer is placed on the transaction's metadata list and is marked
1088 * as belonging to the transaction. 1102 * as belonging to the transaction.
1089 * 1103 *
1090 * Returns error number or 0 on success. 1104 * Returns error number or 0 on success.
1091 * 1105 *
1092 * Special care needs to be taken if the buffer already belongs to the 1106 * Special care needs to be taken if the buffer already belongs to the
1093 * current committing transaction (in which case we should have frozen 1107 * current committing transaction (in which case we should have frozen
@@ -1135,11 +1149,11 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1135 1149
1136 set_buffer_jbddirty(bh); 1150 set_buffer_jbddirty(bh);
1137 1151
1138 /* 1152 /*
1139 * Metadata already on the current transaction list doesn't 1153 * Metadata already on the current transaction list doesn't
1140 * need to be filed. Metadata on another transaction's list must 1154 * need to be filed. Metadata on another transaction's list must
1141 * be committing, and will be refiled once the commit completes: 1155 * be committing, and will be refiled once the commit completes:
1142 * leave it alone for now. 1156 * leave it alone for now.
1143 */ 1157 */
1144 if (jh->b_transaction != transaction) { 1158 if (jh->b_transaction != transaction) {
1145 JBUFFER_TRACE(jh, "already on other transaction"); 1159 JBUFFER_TRACE(jh, "already on other transaction");
@@ -1165,7 +1179,7 @@ out:
1165 return 0; 1179 return 0;
1166} 1180}
1167 1181
1168/* 1182/*
1169 * journal_release_buffer: undo a get_write_access without any buffer 1183 * journal_release_buffer: undo a get_write_access without any buffer
1170 * updates, if the update decided in the end that it didn't need access. 1184 * updates, if the update decided in the end that it didn't need access.
1171 * 1185 *
@@ -1176,20 +1190,20 @@ journal_release_buffer(handle_t *handle, struct buffer_head *bh)
1176 BUFFER_TRACE(bh, "entry"); 1190 BUFFER_TRACE(bh, "entry");
1177} 1191}
1178 1192
1179/** 1193/**
1180 * void journal_forget() - bforget() for potentially-journaled buffers. 1194 * void journal_forget() - bforget() for potentially-journaled buffers.
1181 * @handle: transaction handle 1195 * @handle: transaction handle
1182 * @bh: bh to 'forget' 1196 * @bh: bh to 'forget'
1183 * 1197 *
1184 * We can only do the bforget if there are no commits pending against the 1198 * We can only do the bforget if there are no commits pending against the
1185 * buffer. If the buffer is dirty in the current running transaction we 1199 * buffer. If the buffer is dirty in the current running transaction we
1186 * can safely unlink it. 1200 * can safely unlink it.
1187 * 1201 *
1188 * bh may not be a journalled buffer at all - it may be a non-JBD 1202 * bh may not be a journalled buffer at all - it may be a non-JBD
1189 * buffer which came off the hashtable. Check for this. 1203 * buffer which came off the hashtable. Check for this.
1190 * 1204 *
1191 * Decrements bh->b_count by one. 1205 * Decrements bh->b_count by one.
1192 * 1206 *
1193 * Allow this call even if the handle has aborted --- it may be part of 1207 * Allow this call even if the handle has aborted --- it may be part of
1194 * the caller's cleanup after an abort. 1208 * the caller's cleanup after an abort.
1195 */ 1209 */
@@ -1237,7 +1251,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1237 1251
1238 drop_reserve = 1; 1252 drop_reserve = 1;
1239 1253
1240 /* 1254 /*
1241 * We are no longer going to journal this buffer. 1255 * We are no longer going to journal this buffer.
1242 * However, the commit of this transaction is still 1256 * However, the commit of this transaction is still
1243 * important to the buffer: the delete that we are now 1257 * important to the buffer: the delete that we are now
@@ -1246,7 +1260,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1246 * 1260 *
1247 * So, if we have a checkpoint on the buffer, we should 1261 * So, if we have a checkpoint on the buffer, we should
1248 * now refile the buffer on our BJ_Forget list so that 1262 * now refile the buffer on our BJ_Forget list so that
1249 * we know to remove the checkpoint after we commit. 1263 * we know to remove the checkpoint after we commit.
1250 */ 1264 */
1251 1265
1252 if (jh->b_cp_transaction) { 1266 if (jh->b_cp_transaction) {
@@ -1264,7 +1278,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1264 } 1278 }
1265 } 1279 }
1266 } else if (jh->b_transaction) { 1280 } else if (jh->b_transaction) {
1267 J_ASSERT_JH(jh, (jh->b_transaction == 1281 J_ASSERT_JH(jh, (jh->b_transaction ==
1268 journal->j_committing_transaction)); 1282 journal->j_committing_transaction));
1269 /* However, if the buffer is still owned by a prior 1283 /* However, if the buffer is still owned by a prior
1270 * (committing) transaction, we can't drop it yet... */ 1284 * (committing) transaction, we can't drop it yet... */
@@ -1294,7 +1308,7 @@ drop:
1294/** 1308/**
1295 * int journal_stop() - complete a transaction 1309 * int journal_stop() - complete a transaction
1296 * @handle: tranaction to complete. 1310 * @handle: tranaction to complete.
1297 * 1311 *
1298 * All done for a particular handle. 1312 * All done for a particular handle.
1299 * 1313 *
1300 * There is not much action needed here. We just return any remaining 1314 * There is not much action needed here. We just return any remaining
@@ -1303,7 +1317,7 @@ drop:
1303 * filesystem is marked for synchronous update. 1317 * filesystem is marked for synchronous update.
1304 * 1318 *
1305 * journal_stop itself will not usually return an error, but it may 1319 * journal_stop itself will not usually return an error, but it may
1306 * do so in unusual circumstances. In particular, expect it to 1320 * do so in unusual circumstances. In particular, expect it to
1307 * return -EIO if a journal_abort has been executed since the 1321 * return -EIO if a journal_abort has been executed since the
1308 * transaction began. 1322 * transaction began.
1309 */ 1323 */
@@ -1314,13 +1328,14 @@ int journal_stop(handle_t *handle)
1314 int old_handle_count, err; 1328 int old_handle_count, err;
1315 pid_t pid; 1329 pid_t pid;
1316 1330
1317 J_ASSERT(transaction->t_updates > 0);
1318 J_ASSERT(journal_current_handle() == handle); 1331 J_ASSERT(journal_current_handle() == handle);
1319 1332
1320 if (is_handle_aborted(handle)) 1333 if (is_handle_aborted(handle))
1321 err = -EIO; 1334 err = -EIO;
1322 else 1335 else {
1336 J_ASSERT(transaction->t_updates > 0);
1323 err = 0; 1337 err = 0;
1338 }
1324 1339
1325 if (--handle->h_ref > 0) { 1340 if (--handle->h_ref > 0) {
1326 jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, 1341 jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
@@ -1373,7 +1388,7 @@ int journal_stop(handle_t *handle)
1373 if (handle->h_sync || 1388 if (handle->h_sync ||
1374 transaction->t_outstanding_credits > 1389 transaction->t_outstanding_credits >
1375 journal->j_max_transaction_buffers || 1390 journal->j_max_transaction_buffers ||
1376 time_after_eq(jiffies, transaction->t_expires)) { 1391 time_after_eq(jiffies, transaction->t_expires)) {
1377 /* Do this even for aborted journals: an abort still 1392 /* Do this even for aborted journals: an abort still
1378 * completes the commit thread, it just doesn't write 1393 * completes the commit thread, it just doesn't write
1379 * anything to disk. */ 1394 * anything to disk. */
@@ -1388,7 +1403,7 @@ int journal_stop(handle_t *handle)
1388 1403
1389 /* 1404 /*
1390 * Special case: JFS_SYNC synchronous updates require us 1405 * Special case: JFS_SYNC synchronous updates require us
1391 * to wait for the commit to complete. 1406 * to wait for the commit to complete.
1392 */ 1407 */
1393 if (handle->h_sync && !(current->flags & PF_MEMALLOC)) 1408 if (handle->h_sync && !(current->flags & PF_MEMALLOC))
1394 err = log_wait_commit(journal, tid); 1409 err = log_wait_commit(journal, tid);
@@ -1439,7 +1454,7 @@ int journal_force_commit(journal_t *journal)
1439 * jbd_lock_bh_state(jh2bh(jh)) is held. 1454 * jbd_lock_bh_state(jh2bh(jh)) is held.
1440 */ 1455 */
1441 1456
1442static inline void 1457static inline void
1443__blist_add_buffer(struct journal_head **list, struct journal_head *jh) 1458__blist_add_buffer(struct journal_head **list, struct journal_head *jh)
1444{ 1459{
1445 if (!*list) { 1460 if (!*list) {
@@ -1454,7 +1469,7 @@ __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
1454 } 1469 }
1455} 1470}
1456 1471
1457/* 1472/*
1458 * Remove a buffer from a transaction list, given the transaction's list 1473 * Remove a buffer from a transaction list, given the transaction's list
1459 * head pointer. 1474 * head pointer.
1460 * 1475 *
@@ -1475,7 +1490,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1475 jh->b_tnext->b_tprev = jh->b_tprev; 1490 jh->b_tnext->b_tprev = jh->b_tprev;
1476} 1491}
1477 1492
1478/* 1493/*
1479 * Remove a buffer from the appropriate transaction list. 1494 * Remove a buffer from the appropriate transaction list.
1480 * 1495 *
1481 * Note that this function can *change* the value of 1496 * Note that this function can *change* the value of
@@ -1486,7 +1501,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1486 * 1501 *
1487 * Called under j_list_lock. The journal may not be locked. 1502 * Called under j_list_lock. The journal may not be locked.
1488 */ 1503 */
1489void __journal_temp_unlink_buffer(struct journal_head *jh) 1504static void __journal_temp_unlink_buffer(struct journal_head *jh)
1490{ 1505{
1491 struct journal_head **list = NULL; 1506 struct journal_head **list = NULL;
1492 transaction_t *transaction; 1507 transaction_t *transaction;
@@ -1595,17 +1610,17 @@ out:
1595} 1610}
1596 1611
1597 1612
1598/** 1613/**
1599 * int journal_try_to_free_buffers() - try to free page buffers. 1614 * int journal_try_to_free_buffers() - try to free page buffers.
1600 * @journal: journal for operation 1615 * @journal: journal for operation
1601 * @page: to try and free 1616 * @page: to try and free
1602 * @unused_gfp_mask: unused 1617 * @unused_gfp_mask: unused
1603 * 1618 *
1604 * 1619 *
1605 * For all the buffers on this page, 1620 * For all the buffers on this page,
1606 * if they are fully written out ordered data, move them onto BUF_CLEAN 1621 * if they are fully written out ordered data, move them onto BUF_CLEAN
1607 * so try_to_free_buffers() can reap them. 1622 * so try_to_free_buffers() can reap them.
1608 * 1623 *
1609 * This function returns non-zero if we wish try_to_free_buffers() 1624 * This function returns non-zero if we wish try_to_free_buffers()
1610 * to be called. We do this if the page is releasable by try_to_free_buffers(). 1625 * to be called. We do this if the page is releasable by try_to_free_buffers().
1611 * We also do it if the page has locked or dirty buffers and the caller wants 1626 * We also do it if the page has locked or dirty buffers and the caller wants
@@ -1629,7 +1644,7 @@ out:
1629 * cannot happen because we never reallocate freed data as metadata 1644 * cannot happen because we never reallocate freed data as metadata
1630 * while the data is part of a transaction. Yes? 1645 * while the data is part of a transaction. Yes?
1631 */ 1646 */
1632int journal_try_to_free_buffers(journal_t *journal, 1647int journal_try_to_free_buffers(journal_t *journal,
1633 struct page *page, gfp_t unused_gfp_mask) 1648 struct page *page, gfp_t unused_gfp_mask)
1634{ 1649{
1635 struct buffer_head *head; 1650 struct buffer_head *head;
@@ -1697,7 +1712,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1697} 1712}
1698 1713
1699/* 1714/*
1700 * journal_invalidatepage 1715 * journal_invalidatepage
1701 * 1716 *
1702 * This code is tricky. It has a number of cases to deal with. 1717 * This code is tricky. It has a number of cases to deal with.
1703 * 1718 *
@@ -1705,15 +1720,15 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1705 * 1720 *
1706 * i_size must be updated on disk before we start calling invalidatepage on the 1721 * i_size must be updated on disk before we start calling invalidatepage on the
1707 * data. 1722 * data.
1708 * 1723 *
1709 * This is done in ext3 by defining an ext3_setattr method which 1724 * This is done in ext3 by defining an ext3_setattr method which
1710 * updates i_size before truncate gets going. By maintaining this 1725 * updates i_size before truncate gets going. By maintaining this
1711 * invariant, we can be sure that it is safe to throw away any buffers 1726 * invariant, we can be sure that it is safe to throw away any buffers
1712 * attached to the current transaction: once the transaction commits, 1727 * attached to the current transaction: once the transaction commits,
1713 * we know that the data will not be needed. 1728 * we know that the data will not be needed.
1714 * 1729 *
1715 * Note however that we can *not* throw away data belonging to the 1730 * Note however that we can *not* throw away data belonging to the
1716 * previous, committing transaction! 1731 * previous, committing transaction!
1717 * 1732 *
1718 * Any disk blocks which *are* part of the previous, committing 1733 * Any disk blocks which *are* part of the previous, committing
1719 * transaction (and which therefore cannot be discarded immediately) are 1734 * transaction (and which therefore cannot be discarded immediately) are
@@ -1732,7 +1747,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1732 * don't make guarantees about the order in which data hits disk --- in 1747 * don't make guarantees about the order in which data hits disk --- in
1733 * particular we don't guarantee that new dirty data is flushed before 1748 * particular we don't guarantee that new dirty data is flushed before
1734 * transaction commit --- so it is always safe just to discard data 1749 * transaction commit --- so it is always safe just to discard data
1735 * immediately in that mode. --sct 1750 * immediately in that mode. --sct
1736 */ 1751 */
1737 1752
1738/* 1753/*
@@ -1823,6 +1838,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1823 } 1838 }
1824 } 1839 }
1825 } else if (transaction == journal->j_committing_transaction) { 1840 } else if (transaction == journal->j_committing_transaction) {
1841 JBUFFER_TRACE(jh, "on committing transaction");
1826 if (jh->b_jlist == BJ_Locked) { 1842 if (jh->b_jlist == BJ_Locked) {
1827 /* 1843 /*
1828 * The buffer is on the committing transaction's locked 1844 * The buffer is on the committing transaction's locked
@@ -1837,7 +1853,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1837 * can remove it's next_transaction pointer from the 1853 * can remove it's next_transaction pointer from the
1838 * running transaction if that is set, but nothing 1854 * running transaction if that is set, but nothing
1839 * else. */ 1855 * else. */
1840 JBUFFER_TRACE(jh, "on committing transaction");
1841 set_buffer_freed(bh); 1856 set_buffer_freed(bh);
1842 if (jh->b_next_transaction) { 1857 if (jh->b_next_transaction) {
1843 J_ASSERT(jh->b_next_transaction == 1858 J_ASSERT(jh->b_next_transaction ==
@@ -1857,6 +1872,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1857 * i_size already for this truncate so recovery will not 1872 * i_size already for this truncate so recovery will not
1858 * expose the disk blocks we are discarding here.) */ 1873 * expose the disk blocks we are discarding here.) */
1859 J_ASSERT_JH(jh, transaction == journal->j_running_transaction); 1874 J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
1875 JBUFFER_TRACE(jh, "on running transaction");
1860 may_free = __dispose_buffer(jh, transaction); 1876 may_free = __dispose_buffer(jh, transaction);
1861 } 1877 }
1862 1878
@@ -1876,9 +1892,9 @@ zap_buffer_unlocked:
1876 return may_free; 1892 return may_free;
1877} 1893}
1878 1894
1879/** 1895/**
1880 * void journal_invalidatepage() 1896 * void journal_invalidatepage()
1881 * @journal: journal to use for flush... 1897 * @journal: journal to use for flush...
1882 * @page: page to flush 1898 * @page: page to flush
1883 * @offset: length of page to invalidate. 1899 * @offset: length of page to invalidate.
1884 * 1900 *
@@ -1886,7 +1902,7 @@ zap_buffer_unlocked:
1886 * 1902 *
1887 */ 1903 */
1888void journal_invalidatepage(journal_t *journal, 1904void journal_invalidatepage(journal_t *journal,
1889 struct page *page, 1905 struct page *page,
1890 unsigned long offset) 1906 unsigned long offset)
1891{ 1907{
1892 struct buffer_head *head, *bh, *next; 1908 struct buffer_head *head, *bh, *next;
@@ -1908,7 +1924,7 @@ void journal_invalidatepage(journal_t *journal,
1908 next = bh->b_this_page; 1924 next = bh->b_this_page;
1909 1925
1910 if (offset <= curr_off) { 1926 if (offset <= curr_off) {
1911 /* This block is wholly outside the truncation point */ 1927 /* This block is wholly outside the truncation point */
1912 lock_buffer(bh); 1928 lock_buffer(bh);
1913 may_free &= journal_unmap_buffer(journal, bh); 1929 may_free &= journal_unmap_buffer(journal, bh);
1914 unlock_buffer(bh); 1930 unlock_buffer(bh);
@@ -1924,8 +1940,8 @@ void journal_invalidatepage(journal_t *journal,
1924 } 1940 }
1925} 1941}
1926 1942
1927/* 1943/*
1928 * File a buffer on the given transaction list. 1944 * File a buffer on the given transaction list.
1929 */ 1945 */
1930void __journal_file_buffer(struct journal_head *jh, 1946void __journal_file_buffer(struct journal_head *jh,
1931 transaction_t *transaction, int jlist) 1947 transaction_t *transaction, int jlist)
@@ -1948,7 +1964,7 @@ void __journal_file_buffer(struct journal_head *jh,
1948 * with __jbd_unexpected_dirty_buffer()'s handling of dirty 1964 * with __jbd_unexpected_dirty_buffer()'s handling of dirty
1949 * state. */ 1965 * state. */
1950 1966
1951 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 1967 if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
1952 jlist == BJ_Shadow || jlist == BJ_Forget) { 1968 jlist == BJ_Shadow || jlist == BJ_Forget) {
1953 if (test_clear_buffer_dirty(bh) || 1969 if (test_clear_buffer_dirty(bh) ||
1954 test_clear_buffer_jbddirty(bh)) 1970 test_clear_buffer_jbddirty(bh))
@@ -2008,7 +2024,7 @@ void journal_file_buffer(struct journal_head *jh,
2008 jbd_unlock_bh_state(jh2bh(jh)); 2024 jbd_unlock_bh_state(jh2bh(jh));
2009} 2025}
2010 2026
2011/* 2027/*
2012 * Remove a buffer from its current buffer list in preparation for 2028 * Remove a buffer from its current buffer list in preparation for
2013 * dropping it from its current transaction entirely. If the buffer has 2029 * dropping it from its current transaction entirely. If the buffer has
2014 * already started to be used by a subsequent transaction, refile the 2030 * already started to be used by a subsequent transaction, refile the
@@ -2060,7 +2076,7 @@ void __journal_refile_buffer(struct journal_head *jh)
2060 * to the caller to remove the journal_head if necessary. For the 2076 * to the caller to remove the journal_head if necessary. For the
2061 * unlocked journal_refile_buffer call, the caller isn't going to be 2077 * unlocked journal_refile_buffer call, the caller isn't going to be
2062 * doing anything else to the buffer so we need to do the cleanup 2078 * doing anything else to the buffer so we need to do the cleanup
2063 * ourselves to avoid a jh leak. 2079 * ourselves to avoid a jh leak.
2064 * 2080 *
2065 * *** The journal_head may be freed by this call! *** 2081 * *** The journal_head may be freed by this call! ***
2066 */ 2082 */