aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd')
-rw-r--r--fs/jbd/checkpoint.c33
-rw-r--r--fs/jbd/commit.c182
-rw-r--r--fs/jbd/journal.c74
-rw-r--r--fs/jbd/recovery.c56
-rw-r--r--fs/jbd/revoke.c70
-rw-r--r--fs/jbd/transaction.c134
6 files changed, 294 insertions, 255 deletions
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 47678a26c13b..0208cc7ac5d0 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * linux/fs/checkpoint.c 2 * linux/fs/checkpoint.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 * 5 *
6 * Copyright 1999 Red Hat Software --- All Rights Reserved 6 * Copyright 1999 Red Hat Software --- All Rights Reserved
@@ -9,8 +9,8 @@
9 * the terms of the GNU General Public License, version 2, or at your 9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference. 10 * option, any later version, incorporated herein by reference.
11 * 11 *
12 * Checkpoint routines for the generic filesystem journaling code. 12 * Checkpoint routines for the generic filesystem journaling code.
13 * Part of the ext2fs journaling system. 13 * Part of the ext2fs journaling system.
14 * 14 *
15 * Checkpointing is the process of ensuring that a section of the log is 15 * Checkpointing is the process of ensuring that a section of the log is
16 * committed fully to disk, so that that portion of the log can be 16 * committed fully to disk, so that that portion of the log can be
@@ -145,6 +145,7 @@ void __log_wait_for_space(journal_t *journal)
145 * jbd_unlock_bh_state(). 145 * jbd_unlock_bh_state().
146 */ 146 */
147static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) 147static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
148 __releases(journal->j_list_lock)
148{ 149{
149 get_bh(bh); 150 get_bh(bh);
150 spin_unlock(&journal->j_list_lock); 151 spin_unlock(&journal->j_list_lock);
@@ -225,7 +226,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
225 * Try to flush one buffer from the checkpoint list to disk. 226 * Try to flush one buffer from the checkpoint list to disk.
226 * 227 *
227 * Return 1 if something happened which requires us to abort the current 228 * Return 1 if something happened which requires us to abort the current
228 * scan of the checkpoint list. 229 * scan of the checkpoint list.
229 * 230 *
230 * Called with j_list_lock held and drops it if 1 is returned 231 * Called with j_list_lock held and drops it if 1 is returned
231 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 232 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@@ -269,7 +270,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
269 * possibly block, while still holding the journal lock. 270 * possibly block, while still holding the journal lock.
270 * We cannot afford to let the transaction logic start 271 * We cannot afford to let the transaction logic start
271 * messing around with this buffer before we write it to 272 * messing around with this buffer before we write it to
272 * disk, as that would break recoverability. 273 * disk, as that would break recoverability.
273 */ 274 */
274 BUFFER_TRACE(bh, "queue"); 275 BUFFER_TRACE(bh, "queue");
275 get_bh(bh); 276 get_bh(bh);
@@ -292,7 +293,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
292 * Perform an actual checkpoint. We take the first transaction on the 293 * Perform an actual checkpoint. We take the first transaction on the
293 * list of transactions to be checkpointed and send all its buffers 294 * list of transactions to be checkpointed and send all its buffers
294 * to disk. We submit larger chunks of data at once. 295 * to disk. We submit larger chunks of data at once.
295 * 296 *
296 * The journal should be locked before calling this function. 297 * The journal should be locked before calling this function.
297 */ 298 */
298int log_do_checkpoint(journal_t *journal) 299int log_do_checkpoint(journal_t *journal)
@@ -303,10 +304,10 @@ int log_do_checkpoint(journal_t *journal)
303 304
304 jbd_debug(1, "Start checkpoint\n"); 305 jbd_debug(1, "Start checkpoint\n");
305 306
306 /* 307 /*
307 * First thing: if there are any transactions in the log which 308 * First thing: if there are any transactions in the log which
308 * don't need checkpointing, just eliminate them from the 309 * don't need checkpointing, just eliminate them from the
309 * journal straight away. 310 * journal straight away.
310 */ 311 */
311 result = cleanup_journal_tail(journal); 312 result = cleanup_journal_tail(journal);
312 jbd_debug(1, "cleanup_journal_tail returned %d\n", result); 313 jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
@@ -384,9 +385,9 @@ out:
384 * we have already got rid of any since the last update of the log tail 385 * we have already got rid of any since the last update of the log tail
385 * in the journal superblock. If so, we can instantly roll the 386 * in the journal superblock. If so, we can instantly roll the
386 * superblock forward to remove those transactions from the log. 387 * superblock forward to remove those transactions from the log.
387 * 388 *
388 * Return <0 on error, 0 on success, 1 if there was nothing to clean up. 389 * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
389 * 390 *
390 * Called with the journal lock held. 391 * Called with the journal lock held.
391 * 392 *
392 * This is the only part of the journaling code which really needs to be 393 * This is the only part of the journaling code which really needs to be
@@ -403,8 +404,8 @@ int cleanup_journal_tail(journal_t *journal)
403 unsigned long blocknr, freed; 404 unsigned long blocknr, freed;
404 405
405 /* OK, work out the oldest transaction remaining in the log, and 406 /* OK, work out the oldest transaction remaining in the log, and
406 * the log block it starts at. 407 * the log block it starts at.
407 * 408 *
408 * If the log is now empty, we need to work out which is the 409 * If the log is now empty, we need to work out which is the
409 * next transaction ID we will write, and where it will 410 * next transaction ID we will write, and where it will
410 * start. */ 411 * start. */
@@ -479,7 +480,7 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
479 if (!jh) 480 if (!jh)
480 return 0; 481 return 0;
481 482
482 last_jh = jh->b_cpprev; 483 last_jh = jh->b_cpprev;
483 do { 484 do {
484 jh = next_jh; 485 jh = next_jh;
485 next_jh = jh->b_cpnext; 486 next_jh = jh->b_cpnext;
@@ -557,7 +558,7 @@ out:
557 return ret; 558 return ret;
558} 559}
559 560
560/* 561/*
561 * journal_remove_checkpoint: called after a buffer has been committed 562 * journal_remove_checkpoint: called after a buffer has been committed
562 * to disk (either by being write-back flushed to disk, or being 563 * to disk (either by being write-back flushed to disk, or being
563 * committed to the log). 564 * committed to the log).
@@ -635,7 +636,7 @@ out:
635 * Called with the journal locked. 636 * Called with the journal locked.
636 * Called with j_list_lock held. 637 * Called with j_list_lock held.
637 */ 638 */
638void __journal_insert_checkpoint(struct journal_head *jh, 639void __journal_insert_checkpoint(struct journal_head *jh,
639 transaction_t *transaction) 640 transaction_t *transaction)
640{ 641{
641 JBUFFER_TRACE(jh, "entry"); 642 JBUFFER_TRACE(jh, "entry");
@@ -657,7 +658,7 @@ void __journal_insert_checkpoint(struct journal_head *jh,
657 658
658/* 659/*
659 * We've finished with this transaction structure: adios... 660 * We've finished with this transaction structure: adios...
660 * 661 *
661 * The transaction must have no links except for the checkpoint by this 662 * The transaction must have no links except for the checkpoint by this
662 * point. 663 * point.
663 * 664 *
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 42da60784311..32a8caf0c41e 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -160,6 +160,117 @@ static int journal_write_commit_record(journal_t *journal,
160 return (ret == -EIO); 160 return (ret == -EIO);
161} 161}
162 162
163static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
164{
165 int i;
166
167 for (i = 0; i < bufs; i++) {
168 wbuf[i]->b_end_io = end_buffer_write_sync;
169 /* We use-up our safety reference in submit_bh() */
170 submit_bh(WRITE, wbuf[i]);
171 }
172}
173
174/*
175 * Submit all the data buffers to disk
176 */
177static void journal_submit_data_buffers(journal_t *journal,
178 transaction_t *commit_transaction)
179{
180 struct journal_head *jh;
181 struct buffer_head *bh;
182 int locked;
183 int bufs = 0;
184 struct buffer_head **wbuf = journal->j_wbuf;
185
186 /*
187 * Whenever we unlock the journal and sleep, things can get added
188 * onto ->t_sync_datalist, so we have to keep looping back to
189 * write_out_data until we *know* that the list is empty.
190 *
191 * Cleanup any flushed data buffers from the data list. Even in
192 * abort mode, we want to flush this out as soon as possible.
193 */
194write_out_data:
195 cond_resched();
196 spin_lock(&journal->j_list_lock);
197
198 while (commit_transaction->t_sync_datalist) {
199 jh = commit_transaction->t_sync_datalist;
200 bh = jh2bh(jh);
201 locked = 0;
202
203 /* Get reference just to make sure buffer does not disappear
204 * when we are forced to drop various locks */
205 get_bh(bh);
206 /* If the buffer is dirty, we need to submit IO and hence
207 * we need the buffer lock. We try to lock the buffer without
208 * blocking. If we fail, we need to drop j_list_lock and do
209 * blocking lock_buffer().
210 */
211 if (buffer_dirty(bh)) {
212 if (test_set_buffer_locked(bh)) {
213 BUFFER_TRACE(bh, "needs blocking lock");
214 spin_unlock(&journal->j_list_lock);
215 /* Write out all data to prevent deadlocks */
216 journal_do_submit_data(wbuf, bufs);
217 bufs = 0;
218 lock_buffer(bh);
219 spin_lock(&journal->j_list_lock);
220 }
221 locked = 1;
222 }
223 /* We have to get bh_state lock. Again out of order, sigh. */
224 if (!inverted_lock(journal, bh)) {
225 jbd_lock_bh_state(bh);
226 spin_lock(&journal->j_list_lock);
227 }
228 /* Someone already cleaned up the buffer? */
229 if (!buffer_jbd(bh)
230 || jh->b_transaction != commit_transaction
231 || jh->b_jlist != BJ_SyncData) {
232 jbd_unlock_bh_state(bh);
233 if (locked)
234 unlock_buffer(bh);
235 BUFFER_TRACE(bh, "already cleaned up");
236 put_bh(bh);
237 continue;
238 }
239 if (locked && test_clear_buffer_dirty(bh)) {
240 BUFFER_TRACE(bh, "needs writeout, adding to array");
241 wbuf[bufs++] = bh;
242 __journal_file_buffer(jh, commit_transaction,
243 BJ_Locked);
244 jbd_unlock_bh_state(bh);
245 if (bufs == journal->j_wbufsize) {
246 spin_unlock(&journal->j_list_lock);
247 journal_do_submit_data(wbuf, bufs);
248 bufs = 0;
249 goto write_out_data;
250 }
251 }
252 else {
253 BUFFER_TRACE(bh, "writeout complete: unfile");
254 __journal_unfile_buffer(jh);
255 jbd_unlock_bh_state(bh);
256 if (locked)
257 unlock_buffer(bh);
258 journal_remove_journal_head(bh);
259 /* Once for our safety reference, once for
260 * journal_remove_journal_head() */
261 put_bh(bh);
262 put_bh(bh);
263 }
264
265 if (lock_need_resched(&journal->j_list_lock)) {
266 spin_unlock(&journal->j_list_lock);
267 goto write_out_data;
268 }
269 }
270 spin_unlock(&journal->j_list_lock);
271 journal_do_submit_data(wbuf, bufs);
272}
273
163/* 274/*
164 * journal_commit_transaction 275 * journal_commit_transaction
165 * 276 *
@@ -313,80 +424,13 @@ void journal_commit_transaction(journal_t *journal)
313 * Now start flushing things to disk, in the order they appear 424 * Now start flushing things to disk, in the order they appear
314 * on the transaction lists. Data blocks go first. 425 * on the transaction lists. Data blocks go first.
315 */ 426 */
316
317 err = 0; 427 err = 0;
318 /* 428 journal_submit_data_buffers(journal, commit_transaction);
319 * Whenever we unlock the journal and sleep, things can get added
320 * onto ->t_sync_datalist, so we have to keep looping back to
321 * write_out_data until we *know* that the list is empty.
322 */
323 bufs = 0;
324 /*
325 * Cleanup any flushed data buffers from the data list. Even in
326 * abort mode, we want to flush this out as soon as possible.
327 */
328write_out_data:
329 cond_resched();
330 spin_lock(&journal->j_list_lock);
331
332 while (commit_transaction->t_sync_datalist) {
333 struct buffer_head *bh;
334
335 jh = commit_transaction->t_sync_datalist;
336 commit_transaction->t_sync_datalist = jh->b_tnext;
337 bh = jh2bh(jh);
338 if (buffer_locked(bh)) {
339 BUFFER_TRACE(bh, "locked");
340 if (!inverted_lock(journal, bh))
341 goto write_out_data;
342 __journal_temp_unlink_buffer(jh);
343 __journal_file_buffer(jh, commit_transaction,
344 BJ_Locked);
345 jbd_unlock_bh_state(bh);
346 if (lock_need_resched(&journal->j_list_lock)) {
347 spin_unlock(&journal->j_list_lock);
348 goto write_out_data;
349 }
350 } else {
351 if (buffer_dirty(bh)) {
352 BUFFER_TRACE(bh, "start journal writeout");
353 get_bh(bh);
354 wbuf[bufs++] = bh;
355 if (bufs == journal->j_wbufsize) {
356 jbd_debug(2, "submit %d writes\n",
357 bufs);
358 spin_unlock(&journal->j_list_lock);
359 ll_rw_block(SWRITE, bufs, wbuf);
360 journal_brelse_array(wbuf, bufs);
361 bufs = 0;
362 goto write_out_data;
363 }
364 } else {
365 BUFFER_TRACE(bh, "writeout complete: unfile");
366 if (!inverted_lock(journal, bh))
367 goto write_out_data;
368 __journal_unfile_buffer(jh);
369 jbd_unlock_bh_state(bh);
370 journal_remove_journal_head(bh);
371 put_bh(bh);
372 if (lock_need_resched(&journal->j_list_lock)) {
373 spin_unlock(&journal->j_list_lock);
374 goto write_out_data;
375 }
376 }
377 }
378 }
379
380 if (bufs) {
381 spin_unlock(&journal->j_list_lock);
382 ll_rw_block(SWRITE, bufs, wbuf);
383 journal_brelse_array(wbuf, bufs);
384 spin_lock(&journal->j_list_lock);
385 }
386 429
387 /* 430 /*
388 * Wait for all previously submitted IO to complete. 431 * Wait for all previously submitted IO to complete.
389 */ 432 */
433 spin_lock(&journal->j_list_lock);
390 while (commit_transaction->t_locked_list) { 434 while (commit_transaction->t_locked_list) {
391 struct buffer_head *bh; 435 struct buffer_head *bh;
392 436
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f66724ce443a..2fc66c3e6681 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -181,7 +181,7 @@ loop:
181 transaction->t_expires)) 181 transaction->t_expires))
182 should_sleep = 0; 182 should_sleep = 0;
183 if (journal->j_flags & JFS_UNMOUNT) 183 if (journal->j_flags & JFS_UNMOUNT)
184 should_sleep = 0; 184 should_sleep = 0;
185 if (should_sleep) { 185 if (should_sleep) {
186 spin_unlock(&journal->j_state_lock); 186 spin_unlock(&journal->j_state_lock);
187 schedule(); 187 schedule();
@@ -271,7 +271,7 @@ static void journal_kill_thread(journal_t *journal)
271int journal_write_metadata_buffer(transaction_t *transaction, 271int journal_write_metadata_buffer(transaction_t *transaction,
272 struct journal_head *jh_in, 272 struct journal_head *jh_in,
273 struct journal_head **jh_out, 273 struct journal_head **jh_out,
274 int blocknr) 274 unsigned long blocknr)
275{ 275{
276 int need_copy_out = 0; 276 int need_copy_out = 0;
277 int done_copy_out = 0; 277 int done_copy_out = 0;
@@ -578,7 +578,7 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp)
578 * this is a no-op. If needed, we can use j_blk_offset - everything is 578 * this is a no-op. If needed, we can use j_blk_offset - everything is
579 * ready. 579 * ready.
580 */ 580 */
581int journal_bmap(journal_t *journal, unsigned long blocknr, 581int journal_bmap(journal_t *journal, unsigned long blocknr,
582 unsigned long *retp) 582 unsigned long *retp)
583{ 583{
584 int err = 0; 584 int err = 0;
@@ -696,13 +696,13 @@ fail:
696 * @bdev: Block device on which to create the journal 696 * @bdev: Block device on which to create the journal
697 * @fs_dev: Device which hold journalled filesystem for this journal. 697 * @fs_dev: Device which hold journalled filesystem for this journal.
698 * @start: Block nr Start of journal. 698 * @start: Block nr Start of journal.
699 * @len: Lenght of the journal in blocks. 699 * @len: Length of the journal in blocks.
700 * @blocksize: blocksize of journalling device 700 * @blocksize: blocksize of journalling device
701 * @returns: a newly created journal_t * 701 * @returns: a newly created journal_t *
702 * 702 *
703 * journal_init_dev creates a journal which maps a fixed contiguous 703 * journal_init_dev creates a journal which maps a fixed contiguous
704 * range of blocks on an arbitrary block device. 704 * range of blocks on an arbitrary block device.
705 * 705 *
706 */ 706 */
707journal_t * journal_init_dev(struct block_device *bdev, 707journal_t * journal_init_dev(struct block_device *bdev,
708 struct block_device *fs_dev, 708 struct block_device *fs_dev,
@@ -739,11 +739,11 @@ journal_t * journal_init_dev(struct block_device *bdev,
739 739
740 return journal; 740 return journal;
741} 741}
742 742
743/** 743/**
744 * journal_t * journal_init_inode () - creates a journal which maps to a inode. 744 * journal_t * journal_init_inode () - creates a journal which maps to a inode.
745 * @inode: An inode to create the journal in 745 * @inode: An inode to create the journal in
746 * 746 *
747 * journal_init_inode creates a journal which maps an on-disk inode as 747 * journal_init_inode creates a journal which maps an on-disk inode as
748 * the journal. The inode must exist already, must support bmap() and 748 * the journal. The inode must exist already, must support bmap() and
749 * must have all data blocks preallocated. 749 * must have all data blocks preallocated.
@@ -763,7 +763,7 @@ journal_t * journal_init_inode (struct inode *inode)
763 journal->j_inode = inode; 763 journal->j_inode = inode;
764 jbd_debug(1, 764 jbd_debug(1,
765 "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", 765 "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
766 journal, inode->i_sb->s_id, inode->i_ino, 766 journal, inode->i_sb->s_id, inode->i_ino,
767 (long long) inode->i_size, 767 (long long) inode->i_size,
768 inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); 768 inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
769 769
@@ -798,10 +798,10 @@ journal_t * journal_init_inode (struct inode *inode)
798 return journal; 798 return journal;
799} 799}
800 800
801/* 801/*
802 * If the journal init or create aborts, we need to mark the journal 802 * If the journal init or create aborts, we need to mark the journal
803 * superblock as being NULL to prevent the journal destroy from writing 803 * superblock as being NULL to prevent the journal destroy from writing
804 * back a bogus superblock. 804 * back a bogus superblock.
805 */ 805 */
806static void journal_fail_superblock (journal_t *journal) 806static void journal_fail_superblock (journal_t *journal)
807{ 807{
@@ -820,7 +820,7 @@ static void journal_fail_superblock (journal_t *journal)
820static int journal_reset(journal_t *journal) 820static int journal_reset(journal_t *journal)
821{ 821{
822 journal_superblock_t *sb = journal->j_superblock; 822 journal_superblock_t *sb = journal->j_superblock;
823 unsigned int first, last; 823 unsigned long first, last;
824 824
825 first = be32_to_cpu(sb->s_first); 825 first = be32_to_cpu(sb->s_first);
826 last = be32_to_cpu(sb->s_maxlen); 826 last = be32_to_cpu(sb->s_maxlen);
@@ -844,13 +844,13 @@ static int journal_reset(journal_t *journal)
844 return 0; 844 return 0;
845} 845}
846 846
847/** 847/**
848 * int journal_create() - Initialise the new journal file 848 * int journal_create() - Initialise the new journal file
849 * @journal: Journal to create. This structure must have been initialised 849 * @journal: Journal to create. This structure must have been initialised
850 * 850 *
851 * Given a journal_t structure which tells us which disk blocks we can 851 * Given a journal_t structure which tells us which disk blocks we can
852 * use, create a new journal superblock and initialise all of the 852 * use, create a new journal superblock and initialise all of the
853 * journal fields from scratch. 853 * journal fields from scratch.
854 **/ 854 **/
855int journal_create(journal_t *journal) 855int journal_create(journal_t *journal)
856{ 856{
@@ -915,7 +915,7 @@ int journal_create(journal_t *journal)
915 return journal_reset(journal); 915 return journal_reset(journal);
916} 916}
917 917
918/** 918/**
919 * void journal_update_superblock() - Update journal sb on disk. 919 * void journal_update_superblock() - Update journal sb on disk.
920 * @journal: The journal to update. 920 * @journal: The journal to update.
921 * @wait: Set to '0' if you don't want to wait for IO completion. 921 * @wait: Set to '0' if you don't want to wait for IO completion.
@@ -939,7 +939,7 @@ void journal_update_superblock(journal_t *journal, int wait)
939 journal->j_transaction_sequence) { 939 journal->j_transaction_sequence) {
940 jbd_debug(1,"JBD: Skipping superblock update on recovered sb " 940 jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
941 "(start %ld, seq %d, errno %d)\n", 941 "(start %ld, seq %d, errno %d)\n",
942 journal->j_tail, journal->j_tail_sequence, 942 journal->j_tail, journal->j_tail_sequence,
943 journal->j_errno); 943 journal->j_errno);
944 goto out; 944 goto out;
945 } 945 }
@@ -1062,7 +1062,7 @@ static int load_superblock(journal_t *journal)
1062/** 1062/**
1063 * int journal_load() - Read journal from disk. 1063 * int journal_load() - Read journal from disk.
1064 * @journal: Journal to act on. 1064 * @journal: Journal to act on.
1065 * 1065 *
1066 * Given a journal_t structure which tells us which disk blocks contain 1066 * Given a journal_t structure which tells us which disk blocks contain
1067 * a journal, read the journal from disk to initialise the in-memory 1067 * a journal, read the journal from disk to initialise the in-memory
1068 * structures. 1068 * structures.
@@ -1094,7 +1094,7 @@ int journal_load(journal_t *journal)
1094 /* 1094 /*
1095 * Create a slab for this blocksize 1095 * Create a slab for this blocksize
1096 */ 1096 */
1097 err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize)); 1097 err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
1098 if (err) 1098 if (err)
1099 return err; 1099 return err;
1100 1100
@@ -1172,9 +1172,9 @@ void journal_destroy(journal_t *journal)
1172 * @compat: bitmask of compatible features 1172 * @compat: bitmask of compatible features
1173 * @ro: bitmask of features that force read-only mount 1173 * @ro: bitmask of features that force read-only mount
1174 * @incompat: bitmask of incompatible features 1174 * @incompat: bitmask of incompatible features
1175 * 1175 *
1176 * Check whether the journal uses all of a given set of 1176 * Check whether the journal uses all of a given set of
1177 * features. Return true (non-zero) if it does. 1177 * features. Return true (non-zero) if it does.
1178 **/ 1178 **/
1179 1179
1180int journal_check_used_features (journal_t *journal, unsigned long compat, 1180int journal_check_used_features (journal_t *journal, unsigned long compat,
@@ -1203,7 +1203,7 @@ int journal_check_used_features (journal_t *journal, unsigned long compat,
1203 * @compat: bitmask of compatible features 1203 * @compat: bitmask of compatible features
1204 * @ro: bitmask of features that force read-only mount 1204 * @ro: bitmask of features that force read-only mount
1205 * @incompat: bitmask of incompatible features 1205 * @incompat: bitmask of incompatible features
1206 * 1206 *
1207 * Check whether the journaling code supports the use of 1207 * Check whether the journaling code supports the use of
1208 * all of a given set of features on this journal. Return true 1208 * all of a given set of features on this journal. Return true
1209 * (non-zero) if it can. */ 1209 * (non-zero) if it can. */
@@ -1241,7 +1241,7 @@ int journal_check_available_features (journal_t *journal, unsigned long compat,
1241 * @incompat: bitmask of incompatible features 1241 * @incompat: bitmask of incompatible features
1242 * 1242 *
1243 * Mark a given journal feature as present on the 1243 * Mark a given journal feature as present on the
1244 * superblock. Returns true if the requested features could be set. 1244 * superblock. Returns true if the requested features could be set.
1245 * 1245 *
1246 */ 1246 */
1247 1247
@@ -1327,7 +1327,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
1327/** 1327/**
1328 * int journal_flush () - Flush journal 1328 * int journal_flush () - Flush journal
1329 * @journal: Journal to act on. 1329 * @journal: Journal to act on.
1330 * 1330 *
1331 * Flush all data for a given journal to disk and empty the journal. 1331 * Flush all data for a given journal to disk and empty the journal.
1332 * Filesystems can use this when remounting readonly to ensure that 1332 * Filesystems can use this when remounting readonly to ensure that
1333 * recovery does not need to happen on remount. 1333 * recovery does not need to happen on remount.
@@ -1394,7 +1394,7 @@ int journal_flush(journal_t *journal)
1394 * int journal_wipe() - Wipe journal contents 1394 * int journal_wipe() - Wipe journal contents
1395 * @journal: Journal to act on. 1395 * @journal: Journal to act on.
1396 * @write: flag (see below) 1396 * @write: flag (see below)
1397 * 1397 *
1398 * Wipe out all of the contents of a journal, safely. This will produce 1398 * Wipe out all of the contents of a journal, safely. This will produce
1399 * a warning if the journal contains any valid recovery information. 1399 * a warning if the journal contains any valid recovery information.
1400 * Must be called between journal_init_*() and journal_load(). 1400 * Must be called between journal_init_*() and journal_load().
@@ -1449,7 +1449,7 @@ static const char *journal_dev_name(journal_t *journal, char *buffer)
1449 1449
1450/* 1450/*
1451 * Journal abort has very specific semantics, which we describe 1451 * Journal abort has very specific semantics, which we describe
1452 * for journal abort. 1452 * for journal abort.
1453 * 1453 *
1454 * Two internal function, which provide abort to te jbd layer 1454 * Two internal function, which provide abort to te jbd layer
1455 * itself are here. 1455 * itself are here.
@@ -1504,7 +1504,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
1504 * Perform a complete, immediate shutdown of the ENTIRE 1504 * Perform a complete, immediate shutdown of the ENTIRE
1505 * journal (not of a single transaction). This operation cannot be 1505 * journal (not of a single transaction). This operation cannot be
1506 * undone without closing and reopening the journal. 1506 * undone without closing and reopening the journal.
1507 * 1507 *
1508 * The journal_abort function is intended to support higher level error 1508 * The journal_abort function is intended to support higher level error
1509 * recovery mechanisms such as the ext2/ext3 remount-readonly error 1509 * recovery mechanisms such as the ext2/ext3 remount-readonly error
1510 * mode. 1510 * mode.
@@ -1538,7 +1538,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
1538 * supply an errno; a null errno implies that absolutely no further 1538 * supply an errno; a null errno implies that absolutely no further
1539 * writes are done to the journal (unless there are any already in 1539 * writes are done to the journal (unless there are any already in
1540 * progress). 1540 * progress).
1541 * 1541 *
1542 */ 1542 */
1543 1543
1544void journal_abort(journal_t *journal, int errno) 1544void journal_abort(journal_t *journal, int errno)
@@ -1546,7 +1546,7 @@ void journal_abort(journal_t *journal, int errno)
1546 __journal_abort_soft(journal, errno); 1546 __journal_abort_soft(journal, errno);
1547} 1547}
1548 1548
1549/** 1549/**
1550 * int journal_errno () - returns the journal's error state. 1550 * int journal_errno () - returns the journal's error state.
1551 * @journal: journal to examine. 1551 * @journal: journal to examine.
1552 * 1552 *
@@ -1570,7 +1570,7 @@ int journal_errno(journal_t *journal)
1570 return err; 1570 return err;
1571} 1571}
1572 1572
1573/** 1573/**
1574 * int journal_clear_err () - clears the journal's error state 1574 * int journal_clear_err () - clears the journal's error state
1575 * @journal: journal to act on. 1575 * @journal: journal to act on.
1576 * 1576 *
@@ -1590,7 +1590,7 @@ int journal_clear_err(journal_t *journal)
1590 return err; 1590 return err;
1591} 1591}
1592 1592
1593/** 1593/**
1594 * void journal_ack_err() - Ack journal err. 1594 * void journal_ack_err() - Ack journal err.
1595 * @journal: journal to act on. 1595 * @journal: journal to act on.
1596 * 1596 *
@@ -1612,7 +1612,7 @@ int journal_blocks_per_page(struct inode *inode)
1612 1612
1613/* 1613/*
1614 * Simple support for retrying memory allocations. Introduced to help to 1614 * Simple support for retrying memory allocations. Introduced to help to
1615 * debug different VM deadlock avoidance strategies. 1615 * debug different VM deadlock avoidance strategies.
1616 */ 1616 */
1617void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) 1617void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
1618{ 1618{
@@ -2047,13 +2047,7 @@ static int __init journal_init(void)
2047{ 2047{
2048 int ret; 2048 int ret;
2049 2049
2050/* Static check for data structure consistency. There's no code 2050 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
2051 * invoked --- we'll just get a linker failure if things aren't right.
2052 */
2053 extern void journal_bad_superblock_size(void);
2054 if (sizeof(struct journal_superblock_s) != 1024)
2055 journal_bad_superblock_size();
2056
2057 2051
2058 ret = journal_init_caches(); 2052 ret = journal_init_caches();
2059 if (ret != 0) 2053 if (ret != 0)
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index de5bafb4e853..445eed6ce5dc 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * linux/fs/recovery.c 2 * linux/fs/recovery.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 * 5 *
6 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 6 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
@@ -10,7 +10,7 @@
10 * option, any later version, incorporated herein by reference. 10 * option, any later version, incorporated herein by reference.
11 * 11 *
12 * Journal recovery routines for the generic filesystem journaling code; 12 * Journal recovery routines for the generic filesystem journaling code;
13 * part of the ext2fs journaling system. 13 * part of the ext2fs journaling system.
14 */ 14 */
15 15
16#ifndef __KERNEL__ 16#ifndef __KERNEL__
@@ -25,9 +25,9 @@
25 25
26/* 26/*
27 * Maintain information about the progress of the recovery job, so that 27 * Maintain information about the progress of the recovery job, so that
28 * the different passes can carry information between them. 28 * the different passes can carry information between them.
29 */ 29 */
30struct recovery_info 30struct recovery_info
31{ 31{
32 tid_t start_transaction; 32 tid_t start_transaction;
33 tid_t end_transaction; 33 tid_t end_transaction;
@@ -116,7 +116,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
116 err = 0; 116 err = 0;
117 117
118failed: 118failed:
119 if (nbufs) 119 if (nbufs)
120 journal_brelse_array(bufs, nbufs); 120 journal_brelse_array(bufs, nbufs);
121 return err; 121 return err;
122} 122}
@@ -128,7 +128,7 @@ failed:
128 * Read a block from the journal 128 * Read a block from the journal
129 */ 129 */
130 130
131static int jread(struct buffer_head **bhp, journal_t *journal, 131static int jread(struct buffer_head **bhp, journal_t *journal,
132 unsigned int offset) 132 unsigned int offset)
133{ 133{
134 int err; 134 int err;
@@ -212,14 +212,14 @@ do { \
212/** 212/**
213 * journal_recover - recovers a on-disk journal 213 * journal_recover - recovers a on-disk journal
214 * @journal: the journal to recover 214 * @journal: the journal to recover
215 * 215 *
216 * The primary function for recovering the log contents when mounting a 216 * The primary function for recovering the log contents when mounting a
217 * journaled device. 217 * journaled device.
218 * 218 *
219 * Recovery is done in three passes. In the first pass, we look for the 219 * Recovery is done in three passes. In the first pass, we look for the
220 * end of the log. In the second, we assemble the list of revoke 220 * end of the log. In the second, we assemble the list of revoke
221 * blocks. In the third and final pass, we replay any un-revoked blocks 221 * blocks. In the third and final pass, we replay any un-revoked blocks
222 * in the log. 222 * in the log.
223 */ 223 */
224int journal_recover(journal_t *journal) 224int journal_recover(journal_t *journal)
225{ 225{
@@ -231,10 +231,10 @@ int journal_recover(journal_t *journal)
231 memset(&info, 0, sizeof(info)); 231 memset(&info, 0, sizeof(info));
232 sb = journal->j_superblock; 232 sb = journal->j_superblock;
233 233
234 /* 234 /*
235 * The journal superblock's s_start field (the current log head) 235 * The journal superblock's s_start field (the current log head)
236 * is always zero if, and only if, the journal was cleanly 236 * is always zero if, and only if, the journal was cleanly
237 * unmounted. 237 * unmounted.
238 */ 238 */
239 239
240 if (!sb->s_start) { 240 if (!sb->s_start) {
@@ -253,7 +253,7 @@ int journal_recover(journal_t *journal)
253 jbd_debug(0, "JBD: recovery, exit status %d, " 253 jbd_debug(0, "JBD: recovery, exit status %d, "
254 "recovered transactions %u to %u\n", 254 "recovered transactions %u to %u\n",
255 err, info.start_transaction, info.end_transaction); 255 err, info.start_transaction, info.end_transaction);
256 jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n", 256 jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
257 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 257 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
258 258
259 /* Restart the log at the next transaction ID, thus invalidating 259 /* Restart the log at the next transaction ID, thus invalidating
@@ -268,15 +268,15 @@ int journal_recover(journal_t *journal)
268/** 268/**
269 * journal_skip_recovery - Start journal and wipe exiting records 269 * journal_skip_recovery - Start journal and wipe exiting records
270 * @journal: journal to startup 270 * @journal: journal to startup
271 * 271 *
272 * Locate any valid recovery information from the journal and set up the 272 * Locate any valid recovery information from the journal and set up the
273 * journal structures in memory to ignore it (presumably because the 273 * journal structures in memory to ignore it (presumably because the
274 * caller has evidence that it is out of date). 274 * caller has evidence that it is out of date).
275 * This function does'nt appear to be exorted.. 275 * This function does'nt appear to be exorted..
276 * 276 *
277 * We perform one pass over the journal to allow us to tell the user how 277 * We perform one pass over the journal to allow us to tell the user how
278 * much recovery information is being erased, and to let us initialise 278 * much recovery information is being erased, and to let us initialise
279 * the journal transaction sequence numbers to the next unused ID. 279 * the journal transaction sequence numbers to the next unused ID.
280 */ 280 */
281int journal_skip_recovery(journal_t *journal) 281int journal_skip_recovery(journal_t *journal)
282{ 282{
@@ -297,7 +297,7 @@ int journal_skip_recovery(journal_t *journal)
297#ifdef CONFIG_JBD_DEBUG 297#ifdef CONFIG_JBD_DEBUG
298 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); 298 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
299#endif 299#endif
300 jbd_debug(0, 300 jbd_debug(0,
301 "JBD: ignoring %d transaction%s from the journal.\n", 301 "JBD: ignoring %d transaction%s from the journal.\n",
302 dropped, (dropped == 1) ? "" : "s"); 302 dropped, (dropped == 1) ? "" : "s");
303 journal->j_transaction_sequence = ++info.end_transaction; 303 journal->j_transaction_sequence = ++info.end_transaction;
@@ -314,7 +314,7 @@ static int do_one_pass(journal_t *journal,
314 unsigned long next_log_block; 314 unsigned long next_log_block;
315 int err, success = 0; 315 int err, success = 0;
316 journal_superblock_t * sb; 316 journal_superblock_t * sb;
317 journal_header_t * tmp; 317 journal_header_t * tmp;
318 struct buffer_head * bh; 318 struct buffer_head * bh;
319 unsigned int sequence; 319 unsigned int sequence;
320 int blocktype; 320 int blocktype;
@@ -324,10 +324,10 @@ static int do_one_pass(journal_t *journal,
324 MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) 324 MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
325 / sizeof(journal_block_tag_t)); 325 / sizeof(journal_block_tag_t));
326 326
327 /* 327 /*
328 * First thing is to establish what we expect to find in the log 328 * First thing is to establish what we expect to find in the log
329 * (in terms of transaction IDs), and where (in terms of log 329 * (in terms of transaction IDs), and where (in terms of log
330 * block offsets): query the superblock. 330 * block offsets): query the superblock.
331 */ 331 */
332 332
333 sb = journal->j_superblock; 333 sb = journal->j_superblock;
@@ -344,7 +344,7 @@ static int do_one_pass(journal_t *journal,
344 * Now we walk through the log, transaction by transaction, 344 * Now we walk through the log, transaction by transaction,
345 * making sure that each transaction has a commit block in the 345 * making sure that each transaction has a commit block in the
346 * expected place. Each complete transaction gets replayed back 346 * expected place. Each complete transaction gets replayed back
347 * into the main filesystem. 347 * into the main filesystem.
348 */ 348 */
349 349
350 while (1) { 350 while (1) {
@@ -379,8 +379,8 @@ static int do_one_pass(journal_t *journal,
379 next_log_block++; 379 next_log_block++;
380 wrap(journal, next_log_block); 380 wrap(journal, next_log_block);
381 381
382 /* What kind of buffer is it? 382 /* What kind of buffer is it?
383 * 383 *
384 * If it is a descriptor block, check that it has the 384 * If it is a descriptor block, check that it has the
385 * expected sequence number. Otherwise, we're all done 385 * expected sequence number. Otherwise, we're all done
386 * here. */ 386 * here. */
@@ -394,7 +394,7 @@ static int do_one_pass(journal_t *journal,
394 394
395 blocktype = be32_to_cpu(tmp->h_blocktype); 395 blocktype = be32_to_cpu(tmp->h_blocktype);
396 sequence = be32_to_cpu(tmp->h_sequence); 396 sequence = be32_to_cpu(tmp->h_sequence);
397 jbd_debug(3, "Found magic %d, sequence %d\n", 397 jbd_debug(3, "Found magic %d, sequence %d\n",
398 blocktype, sequence); 398 blocktype, sequence);
399 399
400 if (sequence != next_commit_ID) { 400 if (sequence != next_commit_ID) {
@@ -438,7 +438,7 @@ static int do_one_pass(journal_t *journal,
438 /* Recover what we can, but 438 /* Recover what we can, but
439 * report failure at the end. */ 439 * report failure at the end. */
440 success = err; 440 success = err;
441 printk (KERN_ERR 441 printk (KERN_ERR
442 "JBD: IO error %d recovering " 442 "JBD: IO error %d recovering "
443 "block %ld in log\n", 443 "block %ld in log\n",
444 err, io_block); 444 err, io_block);
@@ -452,7 +452,7 @@ static int do_one_pass(journal_t *journal,
452 * revoked, then we're all done 452 * revoked, then we're all done
453 * here. */ 453 * here. */
454 if (journal_test_revoke 454 if (journal_test_revoke
455 (journal, blocknr, 455 (journal, blocknr,
456 next_commit_ID)) { 456 next_commit_ID)) {
457 brelse(obh); 457 brelse(obh);
458 ++info->nr_revoke_hits; 458 ++info->nr_revoke_hits;
@@ -465,7 +465,7 @@ static int do_one_pass(journal_t *journal,
465 blocknr, 465 blocknr,
466 journal->j_blocksize); 466 journal->j_blocksize);
467 if (nbh == NULL) { 467 if (nbh == NULL) {
468 printk(KERN_ERR 468 printk(KERN_ERR
469 "JBD: Out of memory " 469 "JBD: Out of memory "
470 "during recovery.\n"); 470 "during recovery.\n");
471 err = -ENOMEM; 471 err = -ENOMEM;
@@ -537,7 +537,7 @@ static int do_one_pass(journal_t *journal,
537 } 537 }
538 538
539 done: 539 done:
540 /* 540 /*
541 * We broke out of the log scan loop: either we came to the 541 * We broke out of the log scan loop: either we came to the
542 * known end of the log or we found an unexpected block in the 542 * known end of the log or we found an unexpected block in the
543 * log. If the latter happened, then we know that the "current" 543 * log. If the latter happened, then we know that the "current"
@@ -567,7 +567,7 @@ static int do_one_pass(journal_t *journal,
567 567
568/* Scan a revoke record, marking all blocks mentioned as revoked. */ 568/* Scan a revoke record, marking all blocks mentioned as revoked. */
569 569
570static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 570static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
571 tid_t sequence, struct recovery_info *info) 571 tid_t sequence, struct recovery_info *info)
572{ 572{
573 journal_revoke_header_t *header; 573 journal_revoke_header_t *header;
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index a56144183462..c532429d8d9b 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * linux/fs/revoke.c 2 * linux/fs/revoke.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 2000 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 2000
5 * 5 *
6 * Copyright 2000 Red Hat corp --- All Rights Reserved 6 * Copyright 2000 Red Hat corp --- All Rights Reserved
@@ -15,10 +15,10 @@
15 * Revoke is the mechanism used to prevent old log records for deleted 15 * Revoke is the mechanism used to prevent old log records for deleted
16 * metadata from being replayed on top of newer data using the same 16 * metadata from being replayed on top of newer data using the same
17 * blocks. The revoke mechanism is used in two separate places: 17 * blocks. The revoke mechanism is used in two separate places:
18 * 18 *
19 * + Commit: during commit we write the entire list of the current 19 * + Commit: during commit we write the entire list of the current
20 * transaction's revoked blocks to the journal 20 * transaction's revoked blocks to the journal
21 * 21 *
22 * + Recovery: during recovery we record the transaction ID of all 22 * + Recovery: during recovery we record the transaction ID of all
23 * revoked blocks. If there are multiple revoke records in the log 23 * revoked blocks. If there are multiple revoke records in the log
24 * for a single block, only the last one counts, and if there is a log 24 * for a single block, only the last one counts, and if there is a log
@@ -29,7 +29,7 @@
29 * single transaction: 29 * single transaction:
30 * 30 *
31 * Block is revoked and then journaled: 31 * Block is revoked and then journaled:
32 * The desired end result is the journaling of the new block, so we 32 * The desired end result is the journaling of the new block, so we
33 * cancel the revoke before the transaction commits. 33 * cancel the revoke before the transaction commits.
34 * 34 *
35 * Block is journaled and then revoked: 35 * Block is journaled and then revoked:
@@ -41,7 +41,7 @@
41 * transaction must have happened after the block was journaled and so 41 * transaction must have happened after the block was journaled and so
42 * the revoke must take precedence. 42 * the revoke must take precedence.
43 * 43 *
44 * Block is revoked and then written as data: 44 * Block is revoked and then written as data:
45 * The data write is allowed to succeed, but the revoke is _not_ 45 * The data write is allowed to succeed, but the revoke is _not_
46 * cancelled. We still need to prevent old log records from 46 * cancelled. We still need to prevent old log records from
47 * overwriting the new data. We don't even need to clear the revoke 47 * overwriting the new data. We don't even need to clear the revoke
@@ -54,7 +54,7 @@
54 * buffer has not been revoked, and cancel_revoke 54 * buffer has not been revoked, and cancel_revoke
55 * need do nothing. 55 * need do nothing.
56 * RevokeValid set, Revoked set: 56 * RevokeValid set, Revoked set:
57 * buffer has been revoked. 57 * buffer has been revoked.
58 */ 58 */
59 59
60#ifndef __KERNEL__ 60#ifndef __KERNEL__
@@ -77,7 +77,7 @@ static kmem_cache_t *revoke_table_cache;
77 journal replay, this involves recording the transaction ID of the 77 journal replay, this involves recording the transaction ID of the
78 last transaction to revoke this block. */ 78 last transaction to revoke this block. */
79 79
80struct jbd_revoke_record_s 80struct jbd_revoke_record_s
81{ 81{
82 struct list_head hash; 82 struct list_head hash;
83 tid_t sequence; /* Used for recovery only */ 83 tid_t sequence; /* Used for recovery only */
@@ -90,8 +90,8 @@ struct jbd_revoke_table_s
90{ 90{
91 /* It is conceivable that we might want a larger hash table 91 /* It is conceivable that we might want a larger hash table
92 * for recovery. Must be a power of two. */ 92 * for recovery. Must be a power of two. */
93 int hash_size; 93 int hash_size;
94 int hash_shift; 94 int hash_shift;
95 struct list_head *hash_table; 95 struct list_head *hash_table;
96}; 96};
97 97
@@ -301,22 +301,22 @@ void journal_destroy_revoke(journal_t *journal)
301 301
302#ifdef __KERNEL__ 302#ifdef __KERNEL__
303 303
304/* 304/*
305 * journal_revoke: revoke a given buffer_head from the journal. This 305 * journal_revoke: revoke a given buffer_head from the journal. This
306 * prevents the block from being replayed during recovery if we take a 306 * prevents the block from being replayed during recovery if we take a
307 * crash after this current transaction commits. Any subsequent 307 * crash after this current transaction commits. Any subsequent
308 * metadata writes of the buffer in this transaction cancel the 308 * metadata writes of the buffer in this transaction cancel the
309 * revoke. 309 * revoke.
310 * 310 *
311 * Note that this call may block --- it is up to the caller to make 311 * Note that this call may block --- it is up to the caller to make
312 * sure that there are no further calls to journal_write_metadata 312 * sure that there are no further calls to journal_write_metadata
313 * before the revoke is complete. In ext3, this implies calling the 313 * before the revoke is complete. In ext3, this implies calling the
314 * revoke before clearing the block bitmap when we are deleting 314 * revoke before clearing the block bitmap when we are deleting
315 * metadata. 315 * metadata.
316 * 316 *
317 * Revoke performs a journal_forget on any buffer_head passed in as a 317 * Revoke performs a journal_forget on any buffer_head passed in as a
318 * parameter, but does _not_ forget the buffer_head if the bh was only 318 * parameter, but does _not_ forget the buffer_head if the bh was only
319 * found implicitly. 319 * found implicitly.
320 * 320 *
321 * bh_in may not be a journalled buffer - it may have come off 321 * bh_in may not be a journalled buffer - it may have come off
322 * the hash tables without an attached journal_head. 322 * the hash tables without an attached journal_head.
@@ -325,7 +325,7 @@ void journal_destroy_revoke(journal_t *journal)
325 * by one. 325 * by one.
326 */ 326 */
327 327
328int journal_revoke(handle_t *handle, unsigned long blocknr, 328int journal_revoke(handle_t *handle, unsigned long blocknr,
329 struct buffer_head *bh_in) 329 struct buffer_head *bh_in)
330{ 330{
331 struct buffer_head *bh = NULL; 331 struct buffer_head *bh = NULL;
@@ -487,7 +487,7 @@ void journal_switch_revoke_table(journal_t *journal)
487 else 487 else
488 journal->j_revoke = journal->j_revoke_table[0]; 488 journal->j_revoke = journal->j_revoke_table[0];
489 489
490 for (i = 0; i < journal->j_revoke->hash_size; i++) 490 for (i = 0; i < journal->j_revoke->hash_size; i++)
491 INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]); 491 INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]);
492} 492}
493 493
@@ -498,7 +498,7 @@ void journal_switch_revoke_table(journal_t *journal)
498 * Called with the journal lock held. 498 * Called with the journal lock held.
499 */ 499 */
500 500
501void journal_write_revoke_records(journal_t *journal, 501void journal_write_revoke_records(journal_t *journal,
502 transaction_t *transaction) 502 transaction_t *transaction)
503{ 503{
504 struct journal_head *descriptor; 504 struct journal_head *descriptor;
@@ -507,7 +507,7 @@ void journal_write_revoke_records(journal_t *journal,
507 struct list_head *hash_list; 507 struct list_head *hash_list;
508 int i, offset, count; 508 int i, offset, count;
509 509
510 descriptor = NULL; 510 descriptor = NULL;
511 offset = 0; 511 offset = 0;
512 count = 0; 512 count = 0;
513 513
@@ -519,10 +519,10 @@ void journal_write_revoke_records(journal_t *journal,
519 hash_list = &revoke->hash_table[i]; 519 hash_list = &revoke->hash_table[i];
520 520
521 while (!list_empty(hash_list)) { 521 while (!list_empty(hash_list)) {
522 record = (struct jbd_revoke_record_s *) 522 record = (struct jbd_revoke_record_s *)
523 hash_list->next; 523 hash_list->next;
524 write_one_revoke_record(journal, transaction, 524 write_one_revoke_record(journal, transaction,
525 &descriptor, &offset, 525 &descriptor, &offset,
526 record); 526 record);
527 count++; 527 count++;
528 list_del(&record->hash); 528 list_del(&record->hash);
@@ -534,14 +534,14 @@ void journal_write_revoke_records(journal_t *journal,
534 jbd_debug(1, "Wrote %d revoke records\n", count); 534 jbd_debug(1, "Wrote %d revoke records\n", count);
535} 535}
536 536
537/* 537/*
538 * Write out one revoke record. We need to create a new descriptor 538 * Write out one revoke record. We need to create a new descriptor
539 * block if the old one is full or if we have not already created one. 539 * block if the old one is full or if we have not already created one.
540 */ 540 */
541 541
542static void write_one_revoke_record(journal_t *journal, 542static void write_one_revoke_record(journal_t *journal,
543 transaction_t *transaction, 543 transaction_t *transaction,
544 struct journal_head **descriptorp, 544 struct journal_head **descriptorp,
545 int *offsetp, 545 int *offsetp,
546 struct jbd_revoke_record_s *record) 546 struct jbd_revoke_record_s *record)
547{ 547{
@@ -584,21 +584,21 @@ static void write_one_revoke_record(journal_t *journal,
584 *descriptorp = descriptor; 584 *descriptorp = descriptor;
585 } 585 }
586 586
587 * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = 587 * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
588 cpu_to_be32(record->blocknr); 588 cpu_to_be32(record->blocknr);
589 offset += 4; 589 offset += 4;
590 *offsetp = offset; 590 *offsetp = offset;
591} 591}
592 592
593/* 593/*
594 * Flush a revoke descriptor out to the journal. If we are aborting, 594 * Flush a revoke descriptor out to the journal. If we are aborting,
595 * this is a noop; otherwise we are generating a buffer which needs to 595 * this is a noop; otherwise we are generating a buffer which needs to
596 * be waited for during commit, so it has to go onto the appropriate 596 * be waited for during commit, so it has to go onto the appropriate
597 * journal buffer list. 597 * journal buffer list.
598 */ 598 */
599 599
600static void flush_descriptor(journal_t *journal, 600static void flush_descriptor(journal_t *journal,
601 struct journal_head *descriptor, 601 struct journal_head *descriptor,
602 int offset) 602 int offset)
603{ 603{
604 journal_revoke_header_t *header; 604 journal_revoke_header_t *header;
@@ -618,7 +618,7 @@ static void flush_descriptor(journal_t *journal,
618} 618}
619#endif 619#endif
620 620
621/* 621/*
622 * Revoke support for recovery. 622 * Revoke support for recovery.
623 * 623 *
624 * Recovery needs to be able to: 624 * Recovery needs to be able to:
@@ -629,7 +629,7 @@ static void flush_descriptor(journal_t *journal,
629 * check whether a given block in a given transaction should be replayed 629 * check whether a given block in a given transaction should be replayed
630 * (ie. has not been revoked by a revoke record in that or a subsequent 630 * (ie. has not been revoked by a revoke record in that or a subsequent
631 * transaction) 631 * transaction)
632 * 632 *
633 * empty the revoke table after recovery. 633 * empty the revoke table after recovery.
634 */ 634 */
635 635
@@ -637,11 +637,11 @@ static void flush_descriptor(journal_t *journal,
637 * First, setting revoke records. We create a new revoke record for 637 * First, setting revoke records. We create a new revoke record for
638 * every block ever revoked in the log as we scan it for recovery, and 638 * every block ever revoked in the log as we scan it for recovery, and
639 * we update the existing records if we find multiple revokes for a 639 * we update the existing records if we find multiple revokes for a
640 * single block. 640 * single block.
641 */ 641 */
642 642
643int journal_set_revoke(journal_t *journal, 643int journal_set_revoke(journal_t *journal,
644 unsigned long blocknr, 644 unsigned long blocknr,
645 tid_t sequence) 645 tid_t sequence)
646{ 646{
647 struct jbd_revoke_record_s *record; 647 struct jbd_revoke_record_s *record;
@@ -653,18 +653,18 @@ int journal_set_revoke(journal_t *journal,
653 if (tid_gt(sequence, record->sequence)) 653 if (tid_gt(sequence, record->sequence))
654 record->sequence = sequence; 654 record->sequence = sequence;
655 return 0; 655 return 0;
656 } 656 }
657 return insert_revoke_hash(journal, blocknr, sequence); 657 return insert_revoke_hash(journal, blocknr, sequence);
658} 658}
659 659
660/* 660/*
661 * Test revoke records. For a given block referenced in the log, has 661 * Test revoke records. For a given block referenced in the log, has
662 * that block been revoked? A revoke record with a given transaction 662 * that block been revoked? A revoke record with a given transaction
663 * sequence number revokes all blocks in that transaction and earlier 663 * sequence number revokes all blocks in that transaction and earlier
664 * ones, but later transactions still need replayed. 664 * ones, but later transactions still need replayed.
665 */ 665 */
666 666
667int journal_test_revoke(journal_t *journal, 667int journal_test_revoke(journal_t *journal,
668 unsigned long blocknr, 668 unsigned long blocknr,
669 tid_t sequence) 669 tid_t sequence)
670{ 670{
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index f5169a96260e..e1b3c8af4d17 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * linux/fs/transaction.c 2 * linux/fs/transaction.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
5 * 5 *
6 * Copyright 1998 Red Hat corp --- All Rights Reserved 6 * Copyright 1998 Red Hat corp --- All Rights Reserved
@@ -10,7 +10,7 @@
10 * option, any later version, incorporated herein by reference. 10 * option, any later version, incorporated herein by reference.
11 * 11 *
12 * Generic filesystem transaction handling code; part of the ext2fs 12 * Generic filesystem transaction handling code; part of the ext2fs
13 * journaling system. 13 * journaling system.
14 * 14 *
15 * This file manages transactions (compound commits managed by the 15 * This file manages transactions (compound commits managed by the
16 * journaling code) and handles (individual atomic operations by the 16 * journaling code) and handles (individual atomic operations by the
@@ -74,7 +74,7 @@ get_transaction(journal_t *journal, transaction_t *transaction)
74 * start_this_handle: Given a handle, deal with any locking or stalling 74 * start_this_handle: Given a handle, deal with any locking or stalling
75 * needed to make sure that there is enough journal space for the handle 75 * needed to make sure that there is enough journal space for the handle
76 * to begin. Attach the handle to a transaction and set up the 76 * to begin. Attach the handle to a transaction and set up the
77 * transaction's buffer credits. 77 * transaction's buffer credits.
78 */ 78 */
79 79
80static int start_this_handle(journal_t *journal, handle_t *handle) 80static int start_this_handle(journal_t *journal, handle_t *handle)
@@ -117,7 +117,7 @@ repeat_locked:
117 if (is_journal_aborted(journal) || 117 if (is_journal_aborted(journal) ||
118 (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) { 118 (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
119 spin_unlock(&journal->j_state_lock); 119 spin_unlock(&journal->j_state_lock);
120 ret = -EROFS; 120 ret = -EROFS;
121 goto out; 121 goto out;
122 } 122 }
123 123
@@ -182,7 +182,7 @@ repeat_locked:
182 goto repeat; 182 goto repeat;
183 } 183 }
184 184
185 /* 185 /*
186 * The commit code assumes that it can get enough log space 186 * The commit code assumes that it can get enough log space
187 * without forcing a checkpoint. This is *critical* for 187 * without forcing a checkpoint. This is *critical* for
188 * correctness: a checkpoint of a buffer which is also 188 * correctness: a checkpoint of a buffer which is also
@@ -191,7 +191,7 @@ repeat_locked:
191 * 191 *
192 * We must therefore ensure the necessary space in the journal 192 * We must therefore ensure the necessary space in the journal
193 * *before* starting to dirty potentially checkpointed buffers 193 * *before* starting to dirty potentially checkpointed buffers
194 * in the new transaction. 194 * in the new transaction.
195 * 195 *
196 * The worst part is, any transaction currently committing can 196 * The worst part is, any transaction currently committing can
197 * reduce the free space arbitrarily. Be careful to account for 197 * reduce the free space arbitrarily. Be careful to account for
@@ -246,13 +246,13 @@ static handle_t *new_handle(int nblocks)
246} 246}
247 247
248/** 248/**
249 * handle_t *journal_start() - Obtain a new handle. 249 * handle_t *journal_start() - Obtain a new handle.
250 * @journal: Journal to start transaction on. 250 * @journal: Journal to start transaction on.
251 * @nblocks: number of block buffer we might modify 251 * @nblocks: number of block buffer we might modify
252 * 252 *
253 * We make sure that the transaction can guarantee at least nblocks of 253 * We make sure that the transaction can guarantee at least nblocks of
254 * modified buffers in the log. We block until the log can guarantee 254 * modified buffers in the log. We block until the log can guarantee
255 * that much space. 255 * that much space.
256 * 256 *
257 * This function is visible to journal users (like ext3fs), so is not 257 * This function is visible to journal users (like ext3fs), so is not
258 * called with the journal already locked. 258 * called with the journal already locked.
@@ -292,11 +292,11 @@ handle_t *journal_start(journal_t *journal, int nblocks)
292 * int journal_extend() - extend buffer credits. 292 * int journal_extend() - extend buffer credits.
293 * @handle: handle to 'extend' 293 * @handle: handle to 'extend'
294 * @nblocks: nr blocks to try to extend by. 294 * @nblocks: nr blocks to try to extend by.
295 * 295 *
296 * Some transactions, such as large extends and truncates, can be done 296 * Some transactions, such as large extends and truncates, can be done
297 * atomically all at once or in several stages. The operation requests 297 * atomically all at once or in several stages. The operation requests
298 * a credit for a number of buffer modications in advance, but can 298 * a credit for a number of buffer modications in advance, but can
299 * extend its credit if it needs more. 299 * extend its credit if it needs more.
300 * 300 *
301 * journal_extend tries to give the running handle more buffer credits. 301 * journal_extend tries to give the running handle more buffer credits.
302 * It does not guarantee that allocation - this is a best-effort only. 302 * It does not guarantee that allocation - this is a best-effort only.
@@ -363,7 +363,7 @@ out:
363 * int journal_restart() - restart a handle . 363 * int journal_restart() - restart a handle .
364 * @handle: handle to restart 364 * @handle: handle to restart
365 * @nblocks: nr credits requested 365 * @nblocks: nr credits requested
366 * 366 *
367 * Restart a handle for a multi-transaction filesystem 367 * Restart a handle for a multi-transaction filesystem
368 * operation. 368 * operation.
369 * 369 *
@@ -462,7 +462,7 @@ void journal_lock_updates(journal_t *journal)
462/** 462/**
463 * void journal_unlock_updates (journal_t* journal) - release barrier 463 * void journal_unlock_updates (journal_t* journal) - release barrier
464 * @journal: Journal to release the barrier on. 464 * @journal: Journal to release the barrier on.
465 * 465 *
466 * Release a transaction barrier obtained with journal_lock_updates(). 466 * Release a transaction barrier obtained with journal_lock_updates().
467 * 467 *
468 * Should be called without the journal lock held. 468 * Should be called without the journal lock held.
@@ -547,8 +547,8 @@ repeat:
547 jbd_lock_bh_state(bh); 547 jbd_lock_bh_state(bh);
548 548
549 /* We now hold the buffer lock so it is safe to query the buffer 549 /* We now hold the buffer lock so it is safe to query the buffer
550 * state. Is the buffer dirty? 550 * state. Is the buffer dirty?
551 * 551 *
552 * If so, there are two possibilities. The buffer may be 552 * If so, there are two possibilities. The buffer may be
553 * non-journaled, and undergoing a quite legitimate writeback. 553 * non-journaled, and undergoing a quite legitimate writeback.
554 * Otherwise, it is journaled, and we don't expect dirty buffers 554 * Otherwise, it is journaled, and we don't expect dirty buffers
@@ -566,7 +566,7 @@ repeat:
566 */ 566 */
567 if (jh->b_transaction) { 567 if (jh->b_transaction) {
568 J_ASSERT_JH(jh, 568 J_ASSERT_JH(jh,
569 jh->b_transaction == transaction || 569 jh->b_transaction == transaction ||
570 jh->b_transaction == 570 jh->b_transaction ==
571 journal->j_committing_transaction); 571 journal->j_committing_transaction);
572 if (jh->b_next_transaction) 572 if (jh->b_next_transaction)
@@ -580,7 +580,7 @@ repeat:
580 */ 580 */
581 JBUFFER_TRACE(jh, "Unexpected dirty buffer"); 581 JBUFFER_TRACE(jh, "Unexpected dirty buffer");
582 jbd_unexpected_dirty_buffer(jh); 582 jbd_unexpected_dirty_buffer(jh);
583 } 583 }
584 584
585 unlock_buffer(bh); 585 unlock_buffer(bh);
586 586
@@ -653,7 +653,7 @@ repeat:
653 * buffer had better remain locked during the kmalloc, 653 * buffer had better remain locked during the kmalloc,
654 * but that should be true --- we hold the journal lock 654 * but that should be true --- we hold the journal lock
655 * still and the buffer is already on the BUF_JOURNAL 655 * still and the buffer is already on the BUF_JOURNAL
656 * list so won't be flushed. 656 * list so won't be flushed.
657 * 657 *
658 * Subtle point, though: if this is a get_undo_access, 658 * Subtle point, though: if this is a get_undo_access,
659 * then we will be relying on the frozen_data to contain 659 * then we will be relying on the frozen_data to contain
@@ -765,8 +765,8 @@ int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
765 * manually rather than reading off disk), then we need to keep the 765 * manually rather than reading off disk), then we need to keep the
766 * buffer_head locked until it has been completely filled with new 766 * buffer_head locked until it has been completely filled with new
767 * data. In this case, we should be able to make the assertion that 767 * data. In this case, we should be able to make the assertion that
768 * the bh is not already part of an existing transaction. 768 * the bh is not already part of an existing transaction.
769 * 769 *
770 * The buffer should already be locked by the caller by this point. 770 * The buffer should already be locked by the caller by this point.
771 * There is no lock ranking violation: it was a newly created, 771 * There is no lock ranking violation: it was a newly created,
772 * unlocked buffer beforehand. */ 772 * unlocked buffer beforehand. */
@@ -778,7 +778,7 @@ int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
778 * 778 *
779 * Call this if you create a new bh. 779 * Call this if you create a new bh.
780 */ 780 */
781int journal_get_create_access(handle_t *handle, struct buffer_head *bh) 781int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
782{ 782{
783 transaction_t *transaction = handle->h_transaction; 783 transaction_t *transaction = handle->h_transaction;
784 journal_t *journal = transaction->t_journal; 784 journal_t *journal = transaction->t_journal;
@@ -847,13 +847,13 @@ out:
847 * do not reuse freed space until the deallocation has been committed, 847 * do not reuse freed space until the deallocation has been committed,
848 * since if we overwrote that space we would make the delete 848 * since if we overwrote that space we would make the delete
849 * un-rewindable in case of a crash. 849 * un-rewindable in case of a crash.
850 * 850 *
851 * To deal with that, journal_get_undo_access requests write access to a 851 * To deal with that, journal_get_undo_access requests write access to a
852 * buffer for parts of non-rewindable operations such as delete 852 * buffer for parts of non-rewindable operations such as delete
853 * operations on the bitmaps. The journaling code must keep a copy of 853 * operations on the bitmaps. The journaling code must keep a copy of
854 * the buffer's contents prior to the undo_access call until such time 854 * the buffer's contents prior to the undo_access call until such time
855 * as we know that the buffer has definitely been committed to disk. 855 * as we know that the buffer has definitely been committed to disk.
856 * 856 *
857 * We never need to know which transaction the committed data is part 857 * We never need to know which transaction the committed data is part
858 * of, buffers touched here are guaranteed to be dirtied later and so 858 * of, buffers touched here are guaranteed to be dirtied later and so
859 * will be committed to a new transaction in due course, at which point 859 * will be committed to a new transaction in due course, at which point
@@ -911,13 +911,13 @@ out:
911 return err; 911 return err;
912} 912}
913 913
914/** 914/**
915 * int journal_dirty_data() - mark a buffer as containing dirty data which 915 * int journal_dirty_data() - mark a buffer as containing dirty data which
916 * needs to be flushed before we can commit the 916 * needs to be flushed before we can commit the
917 * current transaction. 917 * current transaction.
918 * @handle: transaction 918 * @handle: transaction
919 * @bh: bufferhead to mark 919 * @bh: bufferhead to mark
920 * 920 *
921 * The buffer is placed on the transaction's data list and is marked as 921 * The buffer is placed on the transaction's data list and is marked as
922 * belonging to the transaction. 922 * belonging to the transaction.
923 * 923 *
@@ -946,15 +946,15 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
946 946
947 /* 947 /*
948 * What if the buffer is already part of a running transaction? 948 * What if the buffer is already part of a running transaction?
949 * 949 *
950 * There are two cases: 950 * There are two cases:
951 * 1) It is part of the current running transaction. Refile it, 951 * 1) It is part of the current running transaction. Refile it,
952 * just in case we have allocated it as metadata, deallocated 952 * just in case we have allocated it as metadata, deallocated
953 * it, then reallocated it as data. 953 * it, then reallocated it as data.
954 * 2) It is part of the previous, still-committing transaction. 954 * 2) It is part of the previous, still-committing transaction.
955 * If all we want to do is to guarantee that the buffer will be 955 * If all we want to do is to guarantee that the buffer will be
956 * written to disk before this new transaction commits, then 956 * written to disk before this new transaction commits, then
957 * being sure that the *previous* transaction has this same 957 * being sure that the *previous* transaction has this same
958 * property is sufficient for us! Just leave it on its old 958 * property is sufficient for us! Just leave it on its old
959 * transaction. 959 * transaction.
960 * 960 *
@@ -1076,18 +1076,18 @@ no_journal:
1076 return 0; 1076 return 0;
1077} 1077}
1078 1078
1079/** 1079/**
1080 * int journal_dirty_metadata() - mark a buffer as containing dirty metadata 1080 * int journal_dirty_metadata() - mark a buffer as containing dirty metadata
1081 * @handle: transaction to add buffer to. 1081 * @handle: transaction to add buffer to.
1082 * @bh: buffer to mark 1082 * @bh: buffer to mark
1083 * 1083 *
1084 * mark dirty metadata which needs to be journaled as part of the current 1084 * mark dirty metadata which needs to be journaled as part of the current
1085 * transaction. 1085 * transaction.
1086 * 1086 *
1087 * The buffer is placed on the transaction's metadata list and is marked 1087 * The buffer is placed on the transaction's metadata list and is marked
1088 * as belonging to the transaction. 1088 * as belonging to the transaction.
1089 * 1089 *
1090 * Returns error number or 0 on success. 1090 * Returns error number or 0 on success.
1091 * 1091 *
1092 * Special care needs to be taken if the buffer already belongs to the 1092 * Special care needs to be taken if the buffer already belongs to the
1093 * current committing transaction (in which case we should have frozen 1093 * current committing transaction (in which case we should have frozen
@@ -1135,11 +1135,11 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1135 1135
1136 set_buffer_jbddirty(bh); 1136 set_buffer_jbddirty(bh);
1137 1137
1138 /* 1138 /*
1139 * Metadata already on the current transaction list doesn't 1139 * Metadata already on the current transaction list doesn't
1140 * need to be filed. Metadata on another transaction's list must 1140 * need to be filed. Metadata on another transaction's list must
1141 * be committing, and will be refiled once the commit completes: 1141 * be committing, and will be refiled once the commit completes:
1142 * leave it alone for now. 1142 * leave it alone for now.
1143 */ 1143 */
1144 if (jh->b_transaction != transaction) { 1144 if (jh->b_transaction != transaction) {
1145 JBUFFER_TRACE(jh, "already on other transaction"); 1145 JBUFFER_TRACE(jh, "already on other transaction");
@@ -1165,7 +1165,7 @@ out:
1165 return 0; 1165 return 0;
1166} 1166}
1167 1167
1168/* 1168/*
1169 * journal_release_buffer: undo a get_write_access without any buffer 1169 * journal_release_buffer: undo a get_write_access without any buffer
1170 * updates, if the update decided in the end that it didn't need access. 1170 * updates, if the update decided in the end that it didn't need access.
1171 * 1171 *
@@ -1176,20 +1176,20 @@ journal_release_buffer(handle_t *handle, struct buffer_head *bh)
1176 BUFFER_TRACE(bh, "entry"); 1176 BUFFER_TRACE(bh, "entry");
1177} 1177}
1178 1178
1179/** 1179/**
1180 * void journal_forget() - bforget() for potentially-journaled buffers. 1180 * void journal_forget() - bforget() for potentially-journaled buffers.
1181 * @handle: transaction handle 1181 * @handle: transaction handle
1182 * @bh: bh to 'forget' 1182 * @bh: bh to 'forget'
1183 * 1183 *
1184 * We can only do the bforget if there are no commits pending against the 1184 * We can only do the bforget if there are no commits pending against the
1185 * buffer. If the buffer is dirty in the current running transaction we 1185 * buffer. If the buffer is dirty in the current running transaction we
1186 * can safely unlink it. 1186 * can safely unlink it.
1187 * 1187 *
1188 * bh may not be a journalled buffer at all - it may be a non-JBD 1188 * bh may not be a journalled buffer at all - it may be a non-JBD
1189 * buffer which came off the hashtable. Check for this. 1189 * buffer which came off the hashtable. Check for this.
1190 * 1190 *
1191 * Decrements bh->b_count by one. 1191 * Decrements bh->b_count by one.
1192 * 1192 *
1193 * Allow this call even if the handle has aborted --- it may be part of 1193 * Allow this call even if the handle has aborted --- it may be part of
1194 * the caller's cleanup after an abort. 1194 * the caller's cleanup after an abort.
1195 */ 1195 */
@@ -1237,7 +1237,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1237 1237
1238 drop_reserve = 1; 1238 drop_reserve = 1;
1239 1239
1240 /* 1240 /*
1241 * We are no longer going to journal this buffer. 1241 * We are no longer going to journal this buffer.
1242 * However, the commit of this transaction is still 1242 * However, the commit of this transaction is still
1243 * important to the buffer: the delete that we are now 1243 * important to the buffer: the delete that we are now
@@ -1246,7 +1246,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1246 * 1246 *
1247 * So, if we have a checkpoint on the buffer, we should 1247 * So, if we have a checkpoint on the buffer, we should
1248 * now refile the buffer on our BJ_Forget list so that 1248 * now refile the buffer on our BJ_Forget list so that
1249 * we know to remove the checkpoint after we commit. 1249 * we know to remove the checkpoint after we commit.
1250 */ 1250 */
1251 1251
1252 if (jh->b_cp_transaction) { 1252 if (jh->b_cp_transaction) {
@@ -1264,7 +1264,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1264 } 1264 }
1265 } 1265 }
1266 } else if (jh->b_transaction) { 1266 } else if (jh->b_transaction) {
1267 J_ASSERT_JH(jh, (jh->b_transaction == 1267 J_ASSERT_JH(jh, (jh->b_transaction ==
1268 journal->j_committing_transaction)); 1268 journal->j_committing_transaction));
1269 /* However, if the buffer is still owned by a prior 1269 /* However, if the buffer is still owned by a prior
1270 * (committing) transaction, we can't drop it yet... */ 1270 * (committing) transaction, we can't drop it yet... */
@@ -1294,7 +1294,7 @@ drop:
1294/** 1294/**
1295 * int journal_stop() - complete a transaction 1295 * int journal_stop() - complete a transaction
1296 * @handle: tranaction to complete. 1296 * @handle: tranaction to complete.
1297 * 1297 *
1298 * All done for a particular handle. 1298 * All done for a particular handle.
1299 * 1299 *
1300 * There is not much action needed here. We just return any remaining 1300 * There is not much action needed here. We just return any remaining
@@ -1303,7 +1303,7 @@ drop:
1303 * filesystem is marked for synchronous update. 1303 * filesystem is marked for synchronous update.
1304 * 1304 *
1305 * journal_stop itself will not usually return an error, but it may 1305 * journal_stop itself will not usually return an error, but it may
1306 * do so in unusual circumstances. In particular, expect it to 1306 * do so in unusual circumstances. In particular, expect it to
1307 * return -EIO if a journal_abort has been executed since the 1307 * return -EIO if a journal_abort has been executed since the
1308 * transaction began. 1308 * transaction began.
1309 */ 1309 */
@@ -1373,7 +1373,7 @@ int journal_stop(handle_t *handle)
1373 if (handle->h_sync || 1373 if (handle->h_sync ||
1374 transaction->t_outstanding_credits > 1374 transaction->t_outstanding_credits >
1375 journal->j_max_transaction_buffers || 1375 journal->j_max_transaction_buffers ||
1376 time_after_eq(jiffies, transaction->t_expires)) { 1376 time_after_eq(jiffies, transaction->t_expires)) {
1377 /* Do this even for aborted journals: an abort still 1377 /* Do this even for aborted journals: an abort still
1378 * completes the commit thread, it just doesn't write 1378 * completes the commit thread, it just doesn't write
1379 * anything to disk. */ 1379 * anything to disk. */
@@ -1388,7 +1388,7 @@ int journal_stop(handle_t *handle)
1388 1388
1389 /* 1389 /*
1390 * Special case: JFS_SYNC synchronous updates require us 1390 * Special case: JFS_SYNC synchronous updates require us
1391 * to wait for the commit to complete. 1391 * to wait for the commit to complete.
1392 */ 1392 */
1393 if (handle->h_sync && !(current->flags & PF_MEMALLOC)) 1393 if (handle->h_sync && !(current->flags & PF_MEMALLOC))
1394 err = log_wait_commit(journal, tid); 1394 err = log_wait_commit(journal, tid);
@@ -1439,7 +1439,7 @@ int journal_force_commit(journal_t *journal)
1439 * jbd_lock_bh_state(jh2bh(jh)) is held. 1439 * jbd_lock_bh_state(jh2bh(jh)) is held.
1440 */ 1440 */
1441 1441
1442static inline void 1442static inline void
1443__blist_add_buffer(struct journal_head **list, struct journal_head *jh) 1443__blist_add_buffer(struct journal_head **list, struct journal_head *jh)
1444{ 1444{
1445 if (!*list) { 1445 if (!*list) {
@@ -1454,7 +1454,7 @@ __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
1454 } 1454 }
1455} 1455}
1456 1456
1457/* 1457/*
1458 * Remove a buffer from a transaction list, given the transaction's list 1458 * Remove a buffer from a transaction list, given the transaction's list
1459 * head pointer. 1459 * head pointer.
1460 * 1460 *
@@ -1475,7 +1475,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1475 jh->b_tnext->b_tprev = jh->b_tprev; 1475 jh->b_tnext->b_tprev = jh->b_tprev;
1476} 1476}
1477 1477
1478/* 1478/*
1479 * Remove a buffer from the appropriate transaction list. 1479 * Remove a buffer from the appropriate transaction list.
1480 * 1480 *
1481 * Note that this function can *change* the value of 1481 * Note that this function can *change* the value of
@@ -1595,17 +1595,17 @@ out:
1595} 1595}
1596 1596
1597 1597
1598/** 1598/**
1599 * int journal_try_to_free_buffers() - try to free page buffers. 1599 * int journal_try_to_free_buffers() - try to free page buffers.
1600 * @journal: journal for operation 1600 * @journal: journal for operation
1601 * @page: to try and free 1601 * @page: to try and free
1602 * @unused_gfp_mask: unused 1602 * @unused_gfp_mask: unused
1603 * 1603 *
1604 * 1604 *
1605 * For all the buffers on this page, 1605 * For all the buffers on this page,
1606 * if they are fully written out ordered data, move them onto BUF_CLEAN 1606 * if they are fully written out ordered data, move them onto BUF_CLEAN
1607 * so try_to_free_buffers() can reap them. 1607 * so try_to_free_buffers() can reap them.
1608 * 1608 *
1609 * This function returns non-zero if we wish try_to_free_buffers() 1609 * This function returns non-zero if we wish try_to_free_buffers()
1610 * to be called. We do this if the page is releasable by try_to_free_buffers(). 1610 * to be called. We do this if the page is releasable by try_to_free_buffers().
1611 * We also do it if the page has locked or dirty buffers and the caller wants 1611 * We also do it if the page has locked or dirty buffers and the caller wants
@@ -1629,7 +1629,7 @@ out:
1629 * cannot happen because we never reallocate freed data as metadata 1629 * cannot happen because we never reallocate freed data as metadata
1630 * while the data is part of a transaction. Yes? 1630 * while the data is part of a transaction. Yes?
1631 */ 1631 */
1632int journal_try_to_free_buffers(journal_t *journal, 1632int journal_try_to_free_buffers(journal_t *journal,
1633 struct page *page, gfp_t unused_gfp_mask) 1633 struct page *page, gfp_t unused_gfp_mask)
1634{ 1634{
1635 struct buffer_head *head; 1635 struct buffer_head *head;
@@ -1697,7 +1697,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1697} 1697}
1698 1698
1699/* 1699/*
1700 * journal_invalidatepage 1700 * journal_invalidatepage
1701 * 1701 *
1702 * This code is tricky. It has a number of cases to deal with. 1702 * This code is tricky. It has a number of cases to deal with.
1703 * 1703 *
@@ -1705,15 +1705,15 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1705 * 1705 *
1706 * i_size must be updated on disk before we start calling invalidatepage on the 1706 * i_size must be updated on disk before we start calling invalidatepage on the
1707 * data. 1707 * data.
1708 * 1708 *
1709 * This is done in ext3 by defining an ext3_setattr method which 1709 * This is done in ext3 by defining an ext3_setattr method which
1710 * updates i_size before truncate gets going. By maintaining this 1710 * updates i_size before truncate gets going. By maintaining this
1711 * invariant, we can be sure that it is safe to throw away any buffers 1711 * invariant, we can be sure that it is safe to throw away any buffers
1712 * attached to the current transaction: once the transaction commits, 1712 * attached to the current transaction: once the transaction commits,
1713 * we know that the data will not be needed. 1713 * we know that the data will not be needed.
1714 * 1714 *
1715 * Note however that we can *not* throw away data belonging to the 1715 * Note however that we can *not* throw away data belonging to the
1716 * previous, committing transaction! 1716 * previous, committing transaction!
1717 * 1717 *
1718 * Any disk blocks which *are* part of the previous, committing 1718 * Any disk blocks which *are* part of the previous, committing
1719 * transaction (and which therefore cannot be discarded immediately) are 1719 * transaction (and which therefore cannot be discarded immediately) are
@@ -1732,7 +1732,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1732 * don't make guarantees about the order in which data hits disk --- in 1732 * don't make guarantees about the order in which data hits disk --- in
1733 * particular we don't guarantee that new dirty data is flushed before 1733 * particular we don't guarantee that new dirty data is flushed before
1734 * transaction commit --- so it is always safe just to discard data 1734 * transaction commit --- so it is always safe just to discard data
1735 * immediately in that mode. --sct 1735 * immediately in that mode. --sct
1736 */ 1736 */
1737 1737
1738/* 1738/*
@@ -1876,9 +1876,9 @@ zap_buffer_unlocked:
1876 return may_free; 1876 return may_free;
1877} 1877}
1878 1878
1879/** 1879/**
1880 * void journal_invalidatepage() 1880 * void journal_invalidatepage()
1881 * @journal: journal to use for flush... 1881 * @journal: journal to use for flush...
1882 * @page: page to flush 1882 * @page: page to flush
1883 * @offset: length of page to invalidate. 1883 * @offset: length of page to invalidate.
1884 * 1884 *
@@ -1886,7 +1886,7 @@ zap_buffer_unlocked:
1886 * 1886 *
1887 */ 1887 */
1888void journal_invalidatepage(journal_t *journal, 1888void journal_invalidatepage(journal_t *journal,
1889 struct page *page, 1889 struct page *page,
1890 unsigned long offset) 1890 unsigned long offset)
1891{ 1891{
1892 struct buffer_head *head, *bh, *next; 1892 struct buffer_head *head, *bh, *next;
@@ -1908,7 +1908,7 @@ void journal_invalidatepage(journal_t *journal,
1908 next = bh->b_this_page; 1908 next = bh->b_this_page;
1909 1909
1910 if (offset <= curr_off) { 1910 if (offset <= curr_off) {
1911 /* This block is wholly outside the truncation point */ 1911 /* This block is wholly outside the truncation point */
1912 lock_buffer(bh); 1912 lock_buffer(bh);
1913 may_free &= journal_unmap_buffer(journal, bh); 1913 may_free &= journal_unmap_buffer(journal, bh);
1914 unlock_buffer(bh); 1914 unlock_buffer(bh);
@@ -1924,8 +1924,8 @@ void journal_invalidatepage(journal_t *journal,
1924 } 1924 }
1925} 1925}
1926 1926
1927/* 1927/*
1928 * File a buffer on the given transaction list. 1928 * File a buffer on the given transaction list.
1929 */ 1929 */
1930void __journal_file_buffer(struct journal_head *jh, 1930void __journal_file_buffer(struct journal_head *jh,
1931 transaction_t *transaction, int jlist) 1931 transaction_t *transaction, int jlist)
@@ -1948,7 +1948,7 @@ void __journal_file_buffer(struct journal_head *jh,
1948 * with __jbd_unexpected_dirty_buffer()'s handling of dirty 1948 * with __jbd_unexpected_dirty_buffer()'s handling of dirty
1949 * state. */ 1949 * state. */
1950 1950
1951 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 1951 if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
1952 jlist == BJ_Shadow || jlist == BJ_Forget) { 1952 jlist == BJ_Shadow || jlist == BJ_Forget) {
1953 if (test_clear_buffer_dirty(bh) || 1953 if (test_clear_buffer_dirty(bh) ||
1954 test_clear_buffer_jbddirty(bh)) 1954 test_clear_buffer_jbddirty(bh))
@@ -2008,7 +2008,7 @@ void journal_file_buffer(struct journal_head *jh,
2008 jbd_unlock_bh_state(jh2bh(jh)); 2008 jbd_unlock_bh_state(jh2bh(jh));
2009} 2009}
2010 2010
2011/* 2011/*
2012 * Remove a buffer from its current buffer list in preparation for 2012 * Remove a buffer from its current buffer list in preparation for
2013 * dropping it from its current transaction entirely. If the buffer has 2013 * dropping it from its current transaction entirely. If the buffer has
2014 * already started to be used by a subsequent transaction, refile the 2014 * already started to be used by a subsequent transaction, refile the
@@ -2060,7 +2060,7 @@ void __journal_refile_buffer(struct journal_head *jh)
2060 * to the caller to remove the journal_head if necessary. For the 2060 * to the caller to remove the journal_head if necessary. For the
2061 * unlocked journal_refile_buffer call, the caller isn't going to be 2061 * unlocked journal_refile_buffer call, the caller isn't going to be
2062 * doing anything else to the buffer so we need to do the cleanup 2062 * doing anything else to the buffer so we need to do the cleanup
2063 * ourselves to avoid a jh leak. 2063 * ourselves to avoid a jh leak.
2064 * 2064 *
2065 * *** The journal_head may be freed by this call! *** 2065 * *** The journal_head may be freed by this call! ***
2066 */ 2066 */