aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd
diff options
context:
space:
mode:
authorMichal Marek <mmarek@suse.cz>2011-03-09 10:15:44 -0500
committerMichal Marek <mmarek@suse.cz>2011-03-09 10:15:44 -0500
commit2d8ad8719591fa803b0d589ed057fa46f49b7155 (patch)
tree4ae051577dad1161c91dafbf4207bb10a9dc91bb /fs/jbd
parent9b4ce7bce5f30712fd926ab4599a803314a07719 (diff)
parentc56eb8fb6dccb83d9fe62fd4dc00c834de9bc470 (diff)
Merge commit 'v2.6.38-rc1' into kbuild/packaging
Diffstat (limited to 'fs/jbd')
-rw-r--r--fs/jbd/checkpoint.c8
-rw-r--r--fs/jbd/commit.c60
-rw-r--r--fs/jbd/journal.c86
-rw-r--r--fs/jbd/recovery.c14
-rw-r--r--fs/jbd/revoke.c2
-rw-r--r--fs/jbd/transaction.c53
6 files changed, 134 insertions, 89 deletions
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index b0435dd0654d..e4b87bc1fa56 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -221,7 +221,7 @@ restart:
221 goto restart; 221 goto restart;
222 } 222 }
223 if (buffer_locked(bh)) { 223 if (buffer_locked(bh)) {
224 atomic_inc(&bh->b_count); 224 get_bh(bh);
225 spin_unlock(&journal->j_list_lock); 225 spin_unlock(&journal->j_list_lock);
226 jbd_unlock_bh_state(bh); 226 jbd_unlock_bh_state(bh);
227 wait_on_buffer(bh); 227 wait_on_buffer(bh);
@@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
254{ 254{
255 int i; 255 int i;
256 256
257 ll_rw_block(SWRITE, *batch_count, bhs); 257 for (i = 0; i < *batch_count; i++)
258 write_dirty_buffer(bhs[i], WRITE);
259
258 for (i = 0; i < *batch_count; i++) { 260 for (i = 0; i < *batch_count; i++) {
259 struct buffer_head *bh = bhs[i]; 261 struct buffer_head *bh = bhs[i];
260 clear_buffer_jwrite(bh); 262 clear_buffer_jwrite(bh);
@@ -281,7 +283,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
281 int ret = 0; 283 int ret = 0;
282 284
283 if (buffer_locked(bh)) { 285 if (buffer_locked(bh)) {
284 atomic_inc(&bh->b_count); 286 get_bh(bh);
285 spin_unlock(&journal->j_list_lock); 287 spin_unlock(&journal->j_list_lock);
286 jbd_unlock_bh_state(bh); 288 jbd_unlock_bh_state(bh);
287 wait_on_buffer(bh); 289 wait_on_buffer(bh);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 4bd882548c45..34a4861c14b8 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -17,7 +17,6 @@
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/jbd.h> 18#include <linux/jbd.h>
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/slab.h>
21#include <linux/mm.h> 20#include <linux/mm.h>
22#include <linux/pagemap.h> 21#include <linux/pagemap.h>
23#include <linux/bio.h> 22#include <linux/bio.h>
@@ -120,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal,
120 struct buffer_head *bh; 119 struct buffer_head *bh;
121 journal_header_t *header; 120 journal_header_t *header;
122 int ret; 121 int ret;
123 int barrier_done = 0;
124 122
125 if (is_journal_aborted(journal)) 123 if (is_journal_aborted(journal))
126 return 0; 124 return 0;
@@ -138,34 +136,12 @@ static int journal_write_commit_record(journal_t *journal,
138 136
139 JBUFFER_TRACE(descriptor, "write commit block"); 137 JBUFFER_TRACE(descriptor, "write commit block");
140 set_buffer_dirty(bh); 138 set_buffer_dirty(bh);
141 if (journal->j_flags & JFS_BARRIER) {
142 set_buffer_ordered(bh);
143 barrier_done = 1;
144 }
145 ret = sync_dirty_buffer(bh);
146 if (barrier_done)
147 clear_buffer_ordered(bh);
148 /* is it possible for another commit to fail at roughly
149 * the same time as this one? If so, we don't want to
150 * trust the barrier flag in the super, but instead want
151 * to remember if we sent a barrier request
152 */
153 if (ret == -EOPNOTSUPP && barrier_done) {
154 char b[BDEVNAME_SIZE];
155
156 printk(KERN_WARNING
157 "JBD: barrier-based sync failed on %s - "
158 "disabling barriers\n",
159 bdevname(journal->j_dev, b));
160 spin_lock(&journal->j_state_lock);
161 journal->j_flags &= ~JFS_BARRIER;
162 spin_unlock(&journal->j_state_lock);
163 139
164 /* And try again, without the barrier */ 140 if (journal->j_flags & JFS_BARRIER)
165 set_buffer_uptodate(bh); 141 ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_FLUSH_FUA);
166 set_buffer_dirty(bh); 142 else
167 ret = sync_dirty_buffer(bh); 143 ret = sync_dirty_buffer(bh);
168 } 144
169 put_bh(bh); /* One for getblk() */ 145 put_bh(bh); /* One for getblk() */
170 journal_put_journal_head(descriptor); 146 journal_put_journal_head(descriptor);
171 147
@@ -318,7 +294,7 @@ void journal_commit_transaction(journal_t *journal)
318 int first_tag = 0; 294 int first_tag = 0;
319 int tag_flag; 295 int tag_flag;
320 int i; 296 int i;
321 int write_op = WRITE; 297 int write_op = WRITE_SYNC;
322 298
323 /* 299 /*
324 * First job: lock down the current transaction and wait for 300 * First job: lock down the current transaction and wait for
@@ -611,13 +587,13 @@ void journal_commit_transaction(journal_t *journal)
611 /* Bump b_count to prevent truncate from stumbling over 587 /* Bump b_count to prevent truncate from stumbling over
612 the shadowed buffer! @@@ This can go if we ever get 588 the shadowed buffer! @@@ This can go if we ever get
613 rid of the BJ_IO/BJ_Shadow pairing of buffers. */ 589 rid of the BJ_IO/BJ_Shadow pairing of buffers. */
614 atomic_inc(&jh2bh(jh)->b_count); 590 get_bh(jh2bh(jh));
615 591
616 /* Make a temporary IO buffer with which to write it out 592 /* Make a temporary IO buffer with which to write it out
617 (this will requeue both the metadata buffer and the 593 (this will requeue both the metadata buffer and the
618 temporary IO buffer). new_bh goes on BJ_IO*/ 594 temporary IO buffer). new_bh goes on BJ_IO*/
619 595
620 set_bit(BH_JWrite, &jh2bh(jh)->b_state); 596 set_buffer_jwrite(jh2bh(jh));
621 /* 597 /*
622 * akpm: journal_write_metadata_buffer() sets 598 * akpm: journal_write_metadata_buffer() sets
623 * new_bh->b_transaction to commit_transaction. 599 * new_bh->b_transaction to commit_transaction.
@@ -627,7 +603,7 @@ void journal_commit_transaction(journal_t *journal)
627 JBUFFER_TRACE(jh, "ph3: write metadata"); 603 JBUFFER_TRACE(jh, "ph3: write metadata");
628 flags = journal_write_metadata_buffer(commit_transaction, 604 flags = journal_write_metadata_buffer(commit_transaction,
629 jh, &new_jh, blocknr); 605 jh, &new_jh, blocknr);
630 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); 606 set_buffer_jwrite(jh2bh(new_jh));
631 wbuf[bufs++] = jh2bh(new_jh); 607 wbuf[bufs++] = jh2bh(new_jh);
632 608
633 /* Record the new block's tag in the current descriptor 609 /* Record the new block's tag in the current descriptor
@@ -737,7 +713,7 @@ wait_for_iobuf:
737 shadowed buffer */ 713 shadowed buffer */
738 jh = commit_transaction->t_shadow_list->b_tprev; 714 jh = commit_transaction->t_shadow_list->b_tprev;
739 bh = jh2bh(jh); 715 bh = jh2bh(jh);
740 clear_bit(BH_JWrite, &bh->b_state); 716 clear_buffer_jwrite(bh);
741 J_ASSERT_BH(bh, buffer_jbddirty(bh)); 717 J_ASSERT_BH(bh, buffer_jbddirty(bh));
742 718
743 /* The metadata is now released for reuse, but we need 719 /* The metadata is now released for reuse, but we need
@@ -787,6 +763,12 @@ wait_for_iobuf:
787 763
788 jbd_debug(3, "JBD: commit phase 6\n"); 764 jbd_debug(3, "JBD: commit phase 6\n");
789 765
766 /* All metadata is written, now write commit record and do cleanup */
767 spin_lock(&journal->j_state_lock);
768 J_ASSERT(commit_transaction->t_state == T_COMMIT);
769 commit_transaction->t_state = T_COMMIT_RECORD;
770 spin_unlock(&journal->j_state_lock);
771
790 if (journal_write_commit_record(journal, commit_transaction)) 772 if (journal_write_commit_record(journal, commit_transaction))
791 err = -EIO; 773 err = -EIO;
792 774
@@ -862,12 +844,12 @@ restart_loop:
862 /* A buffer which has been freed while still being 844 /* A buffer which has been freed while still being
863 * journaled by a previous transaction may end up still 845 * journaled by a previous transaction may end up still
864 * being dirty here, but we want to avoid writing back 846 * being dirty here, but we want to avoid writing back
865 * that buffer in the future now that the last use has 847 * that buffer in the future after the "add to orphan"
866 * been committed. That's not only a performance gain, 848 * operation been committed, That's not only a performance
867 * it also stops aliasing problems if the buffer is left 849 * gain, it also stops aliasing problems if the buffer is
868 * behind for writeback and gets reallocated for another 850 * left behind for writeback and gets reallocated for another
869 * use in a different page. */ 851 * use in a different page. */
870 if (buffer_freed(bh)) { 852 if (buffer_freed(bh) && !jh->b_next_transaction) {
871 clear_buffer_freed(bh); 853 clear_buffer_freed(bh);
872 clear_buffer_jbddirty(bh); 854 clear_buffer_jbddirty(bh);
873 } 855 }
@@ -924,7 +906,7 @@ restart_loop:
924 906
925 jbd_debug(3, "JBD: commit phase 8\n"); 907 jbd_debug(3, "JBD: commit phase 8\n");
926 908
927 J_ASSERT(commit_transaction->t_state == T_COMMIT); 909 J_ASSERT(commit_transaction->t_state == T_COMMIT_RECORD);
928 910
929 commit_transaction->t_state = T_FINISHED; 911 commit_transaction->t_state = T_FINISHED;
930 J_ASSERT(commit_transaction == journal->j_committing_transaction); 912 J_ASSERT(commit_transaction == journal->j_committing_transaction);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index bd224eec9b07..da1b5e4ffce1 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -36,6 +36,7 @@
36#include <linux/poison.h> 36#include <linux/poison.h>
37#include <linux/proc_fs.h> 37#include <linux/proc_fs.h>
38#include <linux/debugfs.h> 38#include <linux/debugfs.h>
39#include <linux/ratelimit.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/page.h> 42#include <asm/page.h>
@@ -84,6 +85,7 @@ EXPORT_SYMBOL(journal_force_commit);
84 85
85static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 86static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
86static void __journal_abort_soft (journal_t *journal, int errno); 87static void __journal_abort_soft (journal_t *journal, int errno);
88static const char *journal_dev_name(journal_t *journal, char *buffer);
87 89
88/* 90/*
89 * Helper function used to manage commit timeouts 91 * Helper function used to manage commit timeouts
@@ -439,7 +441,7 @@ int __log_start_commit(journal_t *journal, tid_t target)
439 */ 441 */
440 if (!tid_geq(journal->j_commit_request, target)) { 442 if (!tid_geq(journal->j_commit_request, target)) {
441 /* 443 /*
442 * We want a new commit: OK, mark the request and wakup the 444 * We want a new commit: OK, mark the request and wakeup the
443 * commit thread. We do _not_ do the commit ourselves. 445 * commit thread. We do _not_ do the commit ourselves.
444 */ 446 */
445 447
@@ -565,6 +567,38 @@ int log_wait_commit(journal_t *journal, tid_t tid)
565} 567}
566 568
567/* 569/*
570 * Return 1 if a given transaction has not yet sent barrier request
571 * connected with a transaction commit. If 0 is returned, transaction
572 * may or may not have sent the barrier. Used to avoid sending barrier
573 * twice in common cases.
574 */
575int journal_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
576{
577 int ret = 0;
578 transaction_t *commit_trans;
579
580 if (!(journal->j_flags & JFS_BARRIER))
581 return 0;
582 spin_lock(&journal->j_state_lock);
583 /* Transaction already committed? */
584 if (tid_geq(journal->j_commit_sequence, tid))
585 goto out;
586 /*
587 * Transaction is being committed and we already proceeded to
588 * writing commit record?
589 */
590 commit_trans = journal->j_committing_transaction;
591 if (commit_trans && commit_trans->t_tid == tid &&
592 commit_trans->t_state >= T_COMMIT_RECORD)
593 goto out;
594 ret = 1;
595out:
596 spin_unlock(&journal->j_state_lock);
597 return ret;
598}
599EXPORT_SYMBOL(journal_trans_will_send_data_barrier);
600
601/*
568 * Log buffer allocation routines: 602 * Log buffer allocation routines:
569 */ 603 */
570 604
@@ -918,6 +952,8 @@ int journal_create(journal_t *journal)
918 if (err) 952 if (err)
919 return err; 953 return err;
920 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 954 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
955 if (unlikely(!bh))
956 return -ENOMEM;
921 lock_buffer(bh); 957 lock_buffer(bh);
922 memset (bh->b_data, 0, journal->j_blocksize); 958 memset (bh->b_data, 0, journal->j_blocksize);
923 BUFFER_TRACE(bh, "marking dirty"); 959 BUFFER_TRACE(bh, "marking dirty");
@@ -978,6 +1014,23 @@ void journal_update_superblock(journal_t *journal, int wait)
978 goto out; 1014 goto out;
979 } 1015 }
980 1016
1017 if (buffer_write_io_error(bh)) {
1018 char b[BDEVNAME_SIZE];
1019 /*
1020 * Oh, dear. A previous attempt to write the journal
1021 * superblock failed. This could happen because the
1022 * USB device was yanked out. Or it could happen to
1023 * be a transient write error and maybe the block will
1024 * be remapped. Nothing we can do but to retry the
1025 * write and hope for the best.
1026 */
1027 printk(KERN_ERR "JBD: previous I/O error detected "
1028 "for journal superblock update for %s.\n",
1029 journal_dev_name(journal, b));
1030 clear_buffer_write_io_error(bh);
1031 set_buffer_uptodate(bh);
1032 }
1033
981 spin_lock(&journal->j_state_lock); 1034 spin_lock(&journal->j_state_lock);
982 jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n", 1035 jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n",
983 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1036 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
@@ -989,10 +1042,18 @@ void journal_update_superblock(journal_t *journal, int wait)
989 1042
990 BUFFER_TRACE(bh, "marking dirty"); 1043 BUFFER_TRACE(bh, "marking dirty");
991 mark_buffer_dirty(bh); 1044 mark_buffer_dirty(bh);
992 if (wait) 1045 if (wait) {
993 sync_dirty_buffer(bh); 1046 sync_dirty_buffer(bh);
994 else 1047 if (buffer_write_io_error(bh)) {
995 ll_rw_block(SWRITE, 1, &bh); 1048 char b[BDEVNAME_SIZE];
1049 printk(KERN_ERR "JBD: I/O error detected "
1050 "when updating journal superblock for %s.\n",
1051 journal_dev_name(journal, b));
1052 clear_buffer_write_io_error(bh);
1053 set_buffer_uptodate(bh);
1054 }
1055 } else
1056 write_dirty_buffer(bh, WRITE);
996 1057
997out: 1058out:
998 /* If we have just flushed the log (by marking s_start==0), then 1059 /* If we have just flushed the log (by marking s_start==0), then
@@ -1157,6 +1218,7 @@ int journal_destroy(journal_t *journal)
1157{ 1218{
1158 int err = 0; 1219 int err = 0;
1159 1220
1221
1160 /* Wait for the commit thread to wake up and die. */ 1222 /* Wait for the commit thread to wake up and die. */
1161 journal_kill_thread(journal); 1223 journal_kill_thread(journal);
1162 1224
@@ -1248,13 +1310,9 @@ int journal_check_used_features (journal_t *journal, unsigned long compat,
1248int journal_check_available_features (journal_t *journal, unsigned long compat, 1310int journal_check_available_features (journal_t *journal, unsigned long compat,
1249 unsigned long ro, unsigned long incompat) 1311 unsigned long ro, unsigned long incompat)
1250{ 1312{
1251 journal_superblock_t *sb;
1252
1253 if (!compat && !ro && !incompat) 1313 if (!compat && !ro && !incompat)
1254 return 1; 1314 return 1;
1255 1315
1256 sb = journal->j_superblock;
1257
1258 /* We can support any known requested features iff the 1316 /* We can support any known requested features iff the
1259 * superblock is in version 2. Otherwise we fail to support any 1317 * superblock is in version 2. Otherwise we fail to support any
1260 * extended sb features. */ 1318 * extended sb features. */
@@ -1448,7 +1506,6 @@ int journal_flush(journal_t *journal)
1448 1506
1449int journal_wipe(journal_t *journal, int write) 1507int journal_wipe(journal_t *journal, int write)
1450{ 1508{
1451 journal_superblock_t *sb;
1452 int err = 0; 1509 int err = 0;
1453 1510
1454 J_ASSERT (!(journal->j_flags & JFS_LOADED)); 1511 J_ASSERT (!(journal->j_flags & JFS_LOADED));
@@ -1457,8 +1514,6 @@ int journal_wipe(journal_t *journal, int write)
1457 if (err) 1514 if (err)
1458 return err; 1515 return err;
1459 1516
1460 sb = journal->j_superblock;
1461
1462 if (!journal->j_tail) 1517 if (!journal->j_tail)
1463 goto no_recovery; 1518 goto no_recovery;
1464 1519
@@ -1693,7 +1748,6 @@ static void journal_destroy_journal_head_cache(void)
1693static struct journal_head *journal_alloc_journal_head(void) 1748static struct journal_head *journal_alloc_journal_head(void)
1694{ 1749{
1695 struct journal_head *ret; 1750 struct journal_head *ret;
1696 static unsigned long last_warning;
1697 1751
1698#ifdef CONFIG_JBD_DEBUG 1752#ifdef CONFIG_JBD_DEBUG
1699 atomic_inc(&nr_journal_heads); 1753 atomic_inc(&nr_journal_heads);
@@ -1701,11 +1755,9 @@ static struct journal_head *journal_alloc_journal_head(void)
1701 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS); 1755 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
1702 if (ret == NULL) { 1756 if (ret == NULL) {
1703 jbd_debug(1, "out of memory for journal_head\n"); 1757 jbd_debug(1, "out of memory for journal_head\n");
1704 if (time_after(jiffies, last_warning + 5*HZ)) { 1758 printk_ratelimited(KERN_NOTICE "ENOMEM in %s, retrying.\n",
1705 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", 1759 __func__);
1706 __func__); 1760
1707 last_warning = jiffies;
1708 }
1709 while (ret == NULL) { 1761 while (ret == NULL) {
1710 yield(); 1762 yield();
1711 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS); 1763 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index cb1a49ae605e..5b43e96788e6 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -20,7 +20,6 @@
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/jbd.h> 21#include <linux/jbd.h>
22#include <linux/errno.h> 22#include <linux/errno.h>
23#include <linux/slab.h>
24#endif 23#endif
25 24
26/* 25/*
@@ -284,12 +283,9 @@ int journal_recover(journal_t *journal)
284int journal_skip_recovery(journal_t *journal) 283int journal_skip_recovery(journal_t *journal)
285{ 284{
286 int err; 285 int err;
287 journal_superblock_t * sb;
288
289 struct recovery_info info; 286 struct recovery_info info;
290 287
291 memset (&info, 0, sizeof(info)); 288 memset (&info, 0, sizeof(info));
292 sb = journal->j_superblock;
293 289
294 err = do_one_pass(journal, &info, PASS_SCAN); 290 err = do_one_pass(journal, &info, PASS_SCAN);
295 291
@@ -298,11 +294,12 @@ int journal_skip_recovery(journal_t *journal)
298 ++journal->j_transaction_sequence; 294 ++journal->j_transaction_sequence;
299 } else { 295 } else {
300#ifdef CONFIG_JBD_DEBUG 296#ifdef CONFIG_JBD_DEBUG
301 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); 297 int dropped = info.end_transaction -
302#endif 298 be32_to_cpu(journal->j_superblock->s_sequence);
303 jbd_debug(1, 299 jbd_debug(1,
304 "JBD: ignoring %d transaction%s from the journal.\n", 300 "JBD: ignoring %d transaction%s from the journal.\n",
305 dropped, (dropped == 1) ? "" : "s"); 301 dropped, (dropped == 1) ? "" : "s");
302#endif
306 journal->j_transaction_sequence = ++info.end_transaction; 303 journal->j_transaction_sequence = ++info.end_transaction;
307 } 304 }
308 305
@@ -322,11 +319,6 @@ static int do_one_pass(journal_t *journal,
322 unsigned int sequence; 319 unsigned int sequence;
323 int blocktype; 320 int blocktype;
324 321
325 /* Precompute the maximum metadata descriptors in a descriptor block */
326 int MAX_BLOCKS_PER_DESC;
327 MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
328 / sizeof(journal_block_tag_t));
329
330 /* 322 /*
331 * First thing is to establish what we expect to find in the log 323 * First thing is to establish what we expect to find in the log
332 * (in terms of transaction IDs), and where (in terms of log 324 * (in terms of transaction IDs), and where (in terms of log
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index ad717328343a..d29018307e2e 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal,
617 set_buffer_jwrite(bh); 617 set_buffer_jwrite(bh);
618 BUFFER_TRACE(bh, "write"); 618 BUFFER_TRACE(bh, "write");
619 set_buffer_dirty(bh); 619 set_buffer_dirty(bh);
620 ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); 620 write_dirty_buffer(bh, write_op);
621} 621}
622#endif 622#endif
623 623
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 006f9ad838a2..5b2e4c30a2a1 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -207,7 +207,7 @@ repeat_locked:
207 * the committing transaction. Really, we only need to give it 207 * the committing transaction. Really, we only need to give it
208 * committing_transaction->t_outstanding_credits plus "enough" for 208 * committing_transaction->t_outstanding_credits plus "enough" for
209 * the log control blocks. 209 * the log control blocks.
210 * Also, this test is inconsitent with the matching one in 210 * Also, this test is inconsistent with the matching one in
211 * journal_extend(). 211 * journal_extend().
212 */ 212 */
213 if (__log_space_left(journal) < jbd_space_needed(journal)) { 213 if (__log_space_left(journal) < jbd_space_needed(journal)) {
@@ -293,9 +293,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
293 jbd_free_handle(handle); 293 jbd_free_handle(handle);
294 current->journal_info = NULL; 294 current->journal_info = NULL;
295 handle = ERR_PTR(err); 295 handle = ERR_PTR(err);
296 goto out;
297 } 296 }
298out:
299 return handle; 297 return handle;
300} 298}
301 299
@@ -528,7 +526,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
528 transaction = handle->h_transaction; 526 transaction = handle->h_transaction;
529 journal = transaction->t_journal; 527 journal = transaction->t_journal;
530 528
531 jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy); 529 jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
532 530
533 JBUFFER_TRACE(jh, "entry"); 531 JBUFFER_TRACE(jh, "entry");
534repeat: 532repeat:
@@ -713,7 +711,7 @@ done:
713 J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), 711 J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
714 "Possible IO failure.\n"); 712 "Possible IO failure.\n");
715 page = jh2bh(jh)->b_page; 713 page = jh2bh(jh)->b_page;
716 offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; 714 offset = offset_in_page(jh2bh(jh)->b_data);
717 source = kmap_atomic(page, KM_USER0); 715 source = kmap_atomic(page, KM_USER0);
718 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 716 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
719 kunmap_atomic(source, KM_USER0); 717 kunmap_atomic(source, KM_USER0);
@@ -1398,7 +1396,7 @@ int journal_stop(handle_t *handle)
1398 * the case where our storage is so fast that it is more optimal to go 1396 * the case where our storage is so fast that it is more optimal to go
1399 * ahead and force a flush and wait for the transaction to be committed 1397 * ahead and force a flush and wait for the transaction to be committed
1400 * than it is to wait for an arbitrary amount of time for new writers to 1398 * than it is to wait for an arbitrary amount of time for new writers to
1401 * join the transaction. We acheive this by measuring how long it takes 1399 * join the transaction. We achieve this by measuring how long it takes
1402 * to commit a transaction, and compare it with how long this 1400 * to commit a transaction, and compare it with how long this
1403 * transaction has been running, and if run time < commit time then we 1401 * transaction has been running, and if run time < commit time then we
1404 * sleep for the delta and commit. This greatly helps super fast disks 1402 * sleep for the delta and commit. This greatly helps super fast disks
@@ -1864,6 +1862,21 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1864 if (!jh) 1862 if (!jh)
1865 goto zap_buffer_no_jh; 1863 goto zap_buffer_no_jh;
1866 1864
1865 /*
1866 * We cannot remove the buffer from checkpoint lists until the
1867 * transaction adding inode to orphan list (let's call it T)
1868 * is committed. Otherwise if the transaction changing the
1869 * buffer would be cleaned from the journal before T is
1870 * committed, a crash will cause that the correct contents of
1871 * the buffer will be lost. On the other hand we have to
1872 * clear the buffer dirty bit at latest at the moment when the
1873 * transaction marking the buffer as freed in the filesystem
1874 * structures is committed because from that moment on the
1875 * buffer can be reallocated and used by a different page.
1876 * Since the block hasn't been freed yet but the inode has
1877 * already been added to orphan list, it is safe for us to add
1878 * the buffer to BJ_Forget list of the newest transaction.
1879 */
1867 transaction = jh->b_transaction; 1880 transaction = jh->b_transaction;
1868 if (transaction == NULL) { 1881 if (transaction == NULL) {
1869 /* First case: not on any transaction. If it 1882 /* First case: not on any transaction. If it
@@ -1929,16 +1942,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1929 goto zap_buffer; 1942 goto zap_buffer;
1930 } 1943 }
1931 /* 1944 /*
1932 * If it is committing, we simply cannot touch it. We 1945 * The buffer is committing, we simply cannot touch
1933 * can remove it's next_transaction pointer from the 1946 * it. So we just set j_next_transaction to the
1934 * running transaction if that is set, but nothing 1947 * running transaction (if there is one) and mark
1935 * else. */ 1948 * buffer as freed so that commit code knows it should
1949 * clear dirty bits when it is done with the buffer.
1950 */
1936 set_buffer_freed(bh); 1951 set_buffer_freed(bh);
1937 if (jh->b_next_transaction) { 1952 if (journal->j_running_transaction && buffer_jbddirty(bh))
1938 J_ASSERT(jh->b_next_transaction == 1953 jh->b_next_transaction = journal->j_running_transaction;
1939 journal->j_running_transaction);
1940 jh->b_next_transaction = NULL;
1941 }
1942 journal_put_journal_head(jh); 1954 journal_put_journal_head(jh);
1943 spin_unlock(&journal->j_list_lock); 1955 spin_unlock(&journal->j_list_lock);
1944 jbd_unlock_bh_state(bh); 1956 jbd_unlock_bh_state(bh);
@@ -2120,7 +2132,7 @@ void journal_file_buffer(struct journal_head *jh,
2120 */ 2132 */
2121void __journal_refile_buffer(struct journal_head *jh) 2133void __journal_refile_buffer(struct journal_head *jh)
2122{ 2134{
2123 int was_dirty; 2135 int was_dirty, jlist;
2124 struct buffer_head *bh = jh2bh(jh); 2136 struct buffer_head *bh = jh2bh(jh);
2125 2137
2126 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); 2138 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
@@ -2142,8 +2154,13 @@ void __journal_refile_buffer(struct journal_head *jh)
2142 __journal_temp_unlink_buffer(jh); 2154 __journal_temp_unlink_buffer(jh);
2143 jh->b_transaction = jh->b_next_transaction; 2155 jh->b_transaction = jh->b_next_transaction;
2144 jh->b_next_transaction = NULL; 2156 jh->b_next_transaction = NULL;
2145 __journal_file_buffer(jh, jh->b_transaction, 2157 if (buffer_freed(bh))
2146 jh->b_modified ? BJ_Metadata : BJ_Reserved); 2158 jlist = BJ_Forget;
2159 else if (jh->b_modified)
2160 jlist = BJ_Metadata;
2161 else
2162 jlist = BJ_Reserved;
2163 __journal_file_buffer(jh, jh->b_transaction, jlist);
2147 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 2164 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
2148 2165
2149 if (was_dirty) 2166 if (was_dirty)