aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2013-11-14 20:38:05 -0500
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2013-11-14 20:38:05 -0500
commit42249094f79422fbf5ed4b54eeb48ff096809b8f (patch)
tree91e6850c8c7e8cc284cf8bb6363f8662f84011f4 /fs/jbd2
parent936816161978ca716a56c5e553c68f25972b1e3a (diff)
parent2c027b7c48a888ab173ba45babb4525e278375d9 (diff)
Merge branch 'next' into for-linus
Merge first round of changes for 3.13 merge window.
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/Kconfig6
-rw-r--r--fs/jbd2/checkpoint.c22
-rw-r--r--fs/jbd2/commit.c186
-rw-r--r--fs/jbd2/journal.c171
-rw-r--r--fs/jbd2/recovery.c31
-rw-r--r--fs/jbd2/revoke.c49
-rw-r--r--fs/jbd2/transaction.c526
7 files changed, 552 insertions, 439 deletions
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
index 69a48c2944da..5a9f5534d57b 100644
--- a/fs/jbd2/Kconfig
+++ b/fs/jbd2/Kconfig
@@ -20,7 +20,7 @@ config JBD2
20 20
21config JBD2_DEBUG 21config JBD2_DEBUG
22 bool "JBD2 (ext4) debugging support" 22 bool "JBD2 (ext4) debugging support"
23 depends on JBD2 && DEBUG_FS 23 depends on JBD2
24 help 24 help
25 If you are using the ext4 journaled file system (or 25 If you are using the ext4 journaled file system (or
26 potentially any other filesystem/device using JBD2), this option 26 potentially any other filesystem/device using JBD2), this option
@@ -29,7 +29,7 @@ config JBD2_DEBUG
29 By default, the debugging output will be turned off. 29 By default, the debugging output will be turned off.
30 30
31 If you select Y here, then you will be able to turn on debugging 31 If you select Y here, then you will be able to turn on debugging
32 with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a 32 with "echo N > /sys/module/jbd2/parameters/jbd2_debug", where N is a
33 number between 1 and 5. The higher the number, the more debugging 33 number between 1 and 5. The higher the number, the more debugging
34 output is generated. To turn debugging off again, do 34 output is generated. To turn debugging off again, do
35 "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug". 35 "echo 0 > /sys/module/jbd2/parameters/jbd2_debug".
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index c78841ee81cf..7f34f4716165 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -120,8 +120,8 @@ void __jbd2_log_wait_for_space(journal_t *journal)
120 int nblocks, space_left; 120 int nblocks, space_left;
121 /* assert_spin_locked(&journal->j_state_lock); */ 121 /* assert_spin_locked(&journal->j_state_lock); */
122 122
123 nblocks = jbd_space_needed(journal); 123 nblocks = jbd2_space_needed(journal);
124 while (__jbd2_log_space_left(journal) < nblocks) { 124 while (jbd2_log_space_left(journal) < nblocks) {
125 if (journal->j_flags & JBD2_ABORT) 125 if (journal->j_flags & JBD2_ABORT)
126 return; 126 return;
127 write_unlock(&journal->j_state_lock); 127 write_unlock(&journal->j_state_lock);
@@ -140,8 +140,8 @@ void __jbd2_log_wait_for_space(journal_t *journal)
140 */ 140 */
141 write_lock(&journal->j_state_lock); 141 write_lock(&journal->j_state_lock);
142 spin_lock(&journal->j_list_lock); 142 spin_lock(&journal->j_list_lock);
143 nblocks = jbd_space_needed(journal); 143 nblocks = jbd2_space_needed(journal);
144 space_left = __jbd2_log_space_left(journal); 144 space_left = jbd2_log_space_left(journal);
145 if (space_left < nblocks) { 145 if (space_left < nblocks) {
146 int chkpt = journal->j_checkpoint_transactions != NULL; 146 int chkpt = journal->j_checkpoint_transactions != NULL;
147 tid_t tid = 0; 147 tid_t tid = 0;
@@ -156,7 +156,15 @@ void __jbd2_log_wait_for_space(journal_t *journal)
156 /* We were able to recover space; yay! */ 156 /* We were able to recover space; yay! */
157 ; 157 ;
158 } else if (tid) { 158 } else if (tid) {
159 /*
160 * jbd2_journal_commit_transaction() may want
161 * to take the checkpoint_mutex if JBD2_FLUSHED
162 * is set. So we need to temporarily drop it.
163 */
164 mutex_unlock(&journal->j_checkpoint_mutex);
159 jbd2_log_wait_commit(journal, tid); 165 jbd2_log_wait_commit(journal, tid);
166 write_lock(&journal->j_state_lock);
167 continue;
160 } else { 168 } else {
161 printk(KERN_ERR "%s: needed %d blocks and " 169 printk(KERN_ERR "%s: needed %d blocks and "
162 "only had %d space available\n", 170 "only had %d space available\n",
@@ -625,10 +633,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
625 633
626 __jbd2_journal_drop_transaction(journal, transaction); 634 __jbd2_journal_drop_transaction(journal, transaction);
627 jbd2_journal_free_transaction(transaction); 635 jbd2_journal_free_transaction(transaction);
628
629 /* Just in case anybody was waiting for more transactions to be
630 checkpointed... */
631 wake_up(&journal->j_wait_logspace);
632 ret = 1; 636 ret = 1;
633out: 637out:
634 return ret; 638 return ret;
@@ -690,9 +694,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
690 J_ASSERT(transaction->t_state == T_FINISHED); 694 J_ASSERT(transaction->t_state == T_FINISHED);
691 J_ASSERT(transaction->t_buffers == NULL); 695 J_ASSERT(transaction->t_buffers == NULL);
692 J_ASSERT(transaction->t_forget == NULL); 696 J_ASSERT(transaction->t_forget == NULL);
693 J_ASSERT(transaction->t_iobuf_list == NULL);
694 J_ASSERT(transaction->t_shadow_list == NULL); 697 J_ASSERT(transaction->t_shadow_list == NULL);
695 J_ASSERT(transaction->t_log_list == NULL);
696 J_ASSERT(transaction->t_checkpoint_list == NULL); 698 J_ASSERT(transaction->t_checkpoint_list == NULL);
697 J_ASSERT(transaction->t_checkpoint_io_list == NULL); 699 J_ASSERT(transaction->t_checkpoint_io_list == NULL);
698 J_ASSERT(atomic_read(&transaction->t_updates) == 0); 700 J_ASSERT(atomic_read(&transaction->t_updates) == 0);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 0f53946f13c1..cf2fc0594063 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -30,15 +30,22 @@
30#include <trace/events/jbd2.h> 30#include <trace/events/jbd2.h>
31 31
32/* 32/*
33 * Default IO end handler for temporary BJ_IO buffer_heads. 33 * IO end handler for temporary buffer_heads handling writes to the journal.
34 */ 34 */
35static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 35static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
36{ 36{
37 struct buffer_head *orig_bh = bh->b_private;
38
37 BUFFER_TRACE(bh, ""); 39 BUFFER_TRACE(bh, "");
38 if (uptodate) 40 if (uptodate)
39 set_buffer_uptodate(bh); 41 set_buffer_uptodate(bh);
40 else 42 else
41 clear_buffer_uptodate(bh); 43 clear_buffer_uptodate(bh);
44 if (orig_bh) {
45 clear_bit_unlock(BH_Shadow, &orig_bh->b_state);
46 smp_mb__after_clear_bit();
47 wake_up_bit(&orig_bh->b_state, BH_Shadow);
48 }
42 unlock_buffer(bh); 49 unlock_buffer(bh);
43} 50}
44 51
@@ -85,8 +92,7 @@ nope:
85 __brelse(bh); 92 __brelse(bh);
86} 93}
87 94
88static void jbd2_commit_block_csum_set(journal_t *j, 95static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
89 struct journal_head *descriptor)
90{ 96{
91 struct commit_header *h; 97 struct commit_header *h;
92 __u32 csum; 98 __u32 csum;
@@ -94,12 +100,11 @@ static void jbd2_commit_block_csum_set(journal_t *j,
94 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 100 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
95 return; 101 return;
96 102
97 h = (struct commit_header *)(jh2bh(descriptor)->b_data); 103 h = (struct commit_header *)(bh->b_data);
98 h->h_chksum_type = 0; 104 h->h_chksum_type = 0;
99 h->h_chksum_size = 0; 105 h->h_chksum_size = 0;
100 h->h_chksum[0] = 0; 106 h->h_chksum[0] = 0;
101 csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, 107 csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
102 j->j_blocksize);
103 h->h_chksum[0] = cpu_to_be32(csum); 108 h->h_chksum[0] = cpu_to_be32(csum);
104} 109}
105 110
@@ -116,7 +121,6 @@ static int journal_submit_commit_record(journal_t *journal,
116 struct buffer_head **cbh, 121 struct buffer_head **cbh,
117 __u32 crc32_sum) 122 __u32 crc32_sum)
118{ 123{
119 struct journal_head *descriptor;
120 struct commit_header *tmp; 124 struct commit_header *tmp;
121 struct buffer_head *bh; 125 struct buffer_head *bh;
122 int ret; 126 int ret;
@@ -127,12 +131,10 @@ static int journal_submit_commit_record(journal_t *journal,
127 if (is_journal_aborted(journal)) 131 if (is_journal_aborted(journal))
128 return 0; 132 return 0;
129 133
130 descriptor = jbd2_journal_get_descriptor_buffer(journal); 134 bh = jbd2_journal_get_descriptor_buffer(journal);
131 if (!descriptor) 135 if (!bh)
132 return 1; 136 return 1;
133 137
134 bh = jh2bh(descriptor);
135
136 tmp = (struct commit_header *)bh->b_data; 138 tmp = (struct commit_header *)bh->b_data;
137 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 139 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
138 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); 140 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
@@ -146,9 +148,9 @@ static int journal_submit_commit_record(journal_t *journal,
146 tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; 148 tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE;
147 tmp->h_chksum[0] = cpu_to_be32(crc32_sum); 149 tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
148 } 150 }
149 jbd2_commit_block_csum_set(journal, descriptor); 151 jbd2_commit_block_csum_set(journal, bh);
150 152
151 JBUFFER_TRACE(descriptor, "submit commit block"); 153 BUFFER_TRACE(bh, "submit commit block");
152 lock_buffer(bh); 154 lock_buffer(bh);
153 clear_buffer_dirty(bh); 155 clear_buffer_dirty(bh);
154 set_buffer_uptodate(bh); 156 set_buffer_uptodate(bh);
@@ -180,7 +182,6 @@ static int journal_wait_on_commit_record(journal_t *journal,
180 if (unlikely(!buffer_uptodate(bh))) 182 if (unlikely(!buffer_uptodate(bh)))
181 ret = -EIO; 183 ret = -EIO;
182 put_bh(bh); /* One for getblk() */ 184 put_bh(bh); /* One for getblk() */
183 jbd2_journal_put_journal_head(bh2jh(bh));
184 185
185 return ret; 186 return ret;
186} 187}
@@ -321,7 +322,7 @@ static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
321} 322}
322 323
323static void jbd2_descr_block_csum_set(journal_t *j, 324static void jbd2_descr_block_csum_set(journal_t *j,
324 struct journal_head *descriptor) 325 struct buffer_head *bh)
325{ 326{
326 struct jbd2_journal_block_tail *tail; 327 struct jbd2_journal_block_tail *tail;
327 __u32 csum; 328 __u32 csum;
@@ -329,12 +330,10 @@ static void jbd2_descr_block_csum_set(journal_t *j,
329 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 330 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
330 return; 331 return;
331 332
332 tail = (struct jbd2_journal_block_tail *) 333 tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
333 (jh2bh(descriptor)->b_data + j->j_blocksize -
334 sizeof(struct jbd2_journal_block_tail)); 334 sizeof(struct jbd2_journal_block_tail));
335 tail->t_checksum = 0; 335 tail->t_checksum = 0;
336 csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, 336 csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
337 j->j_blocksize);
338 tail->t_checksum = cpu_to_be32(csum); 337 tail->t_checksum = cpu_to_be32(csum);
339} 338}
340 339
@@ -343,20 +342,21 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
343{ 342{
344 struct page *page = bh->b_page; 343 struct page *page = bh->b_page;
345 __u8 *addr; 344 __u8 *addr;
346 __u32 csum; 345 __u32 csum32;
346 __be32 seq;
347 347
348 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 348 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
349 return; 349 return;
350 350
351 sequence = cpu_to_be32(sequence); 351 seq = cpu_to_be32(sequence);
352 addr = kmap_atomic(page); 352 addr = kmap_atomic(page);
353 csum = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, 353 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
354 sizeof(sequence)); 354 csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data),
355 csum = jbd2_chksum(j, csum, addr + offset_in_page(bh->b_data), 355 bh->b_size);
356 bh->b_size);
357 kunmap_atomic(addr); 356 kunmap_atomic(addr);
358 357
359 tag->t_checksum = cpu_to_be32(csum); 358 /* We only have space to store the lower 16 bits of the crc32c. */
359 tag->t_checksum = cpu_to_be16(csum32);
360} 360}
361/* 361/*
362 * jbd2_journal_commit_transaction 362 * jbd2_journal_commit_transaction
@@ -368,7 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
368{ 368{
369 struct transaction_stats_s stats; 369 struct transaction_stats_s stats;
370 transaction_t *commit_transaction; 370 transaction_t *commit_transaction;
371 struct journal_head *jh, *new_jh, *descriptor; 371 struct journal_head *jh;
372 struct buffer_head *descriptor;
372 struct buffer_head **wbuf = journal->j_wbuf; 373 struct buffer_head **wbuf = journal->j_wbuf;
373 int bufs; 374 int bufs;
374 int flags; 375 int flags;
@@ -392,6 +393,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
392 tid_t first_tid; 393 tid_t first_tid;
393 int update_tail; 394 int update_tail;
394 int csum_size = 0; 395 int csum_size = 0;
396 LIST_HEAD(io_bufs);
397 LIST_HEAD(log_bufs);
395 398
396 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 399 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
397 csum_size = sizeof(struct jbd2_journal_block_tail); 400 csum_size = sizeof(struct jbd2_journal_block_tail);
@@ -424,13 +427,13 @@ void jbd2_journal_commit_transaction(journal_t *journal)
424 J_ASSERT(journal->j_committing_transaction == NULL); 427 J_ASSERT(journal->j_committing_transaction == NULL);
425 428
426 commit_transaction = journal->j_running_transaction; 429 commit_transaction = journal->j_running_transaction;
427 J_ASSERT(commit_transaction->t_state == T_RUNNING);
428 430
429 trace_jbd2_start_commit(journal, commit_transaction); 431 trace_jbd2_start_commit(journal, commit_transaction);
430 jbd_debug(1, "JBD2: starting commit of transaction %d\n", 432 jbd_debug(1, "JBD2: starting commit of transaction %d\n",
431 commit_transaction->t_tid); 433 commit_transaction->t_tid);
432 434
433 write_lock(&journal->j_state_lock); 435 write_lock(&journal->j_state_lock);
436 J_ASSERT(commit_transaction->t_state == T_RUNNING);
434 commit_transaction->t_state = T_LOCKED; 437 commit_transaction->t_state = T_LOCKED;
435 438
436 trace_jbd2_commit_locking(journal, commit_transaction); 439 trace_jbd2_commit_locking(journal, commit_transaction);
@@ -520,6 +523,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
520 */ 523 */
521 jbd2_journal_switch_revoke_table(journal); 524 jbd2_journal_switch_revoke_table(journal);
522 525
526 /*
527 * Reserved credits cannot be claimed anymore, free them
528 */
529 atomic_sub(atomic_read(&journal->j_reserved_credits),
530 &commit_transaction->t_outstanding_credits);
531
523 trace_jbd2_commit_flushing(journal, commit_transaction); 532 trace_jbd2_commit_flushing(journal, commit_transaction);
524 stats.run.rs_flushing = jiffies; 533 stats.run.rs_flushing = jiffies;
525 stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, 534 stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
@@ -533,7 +542,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
533 wake_up(&journal->j_wait_transaction_locked); 542 wake_up(&journal->j_wait_transaction_locked);
534 write_unlock(&journal->j_state_lock); 543 write_unlock(&journal->j_state_lock);
535 544
536 jbd_debug(3, "JBD2: commit phase 2\n"); 545 jbd_debug(3, "JBD2: commit phase 2a\n");
537 546
538 /* 547 /*
539 * Now start flushing things to disk, in the order they appear 548 * Now start flushing things to disk, in the order they appear
@@ -545,10 +554,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
545 554
546 blk_start_plug(&plug); 555 blk_start_plug(&plug);
547 jbd2_journal_write_revoke_records(journal, commit_transaction, 556 jbd2_journal_write_revoke_records(journal, commit_transaction,
548 WRITE_SYNC); 557 &log_bufs, WRITE_SYNC);
549 blk_finish_plug(&plug); 558 blk_finish_plug(&plug);
550 559
551 jbd_debug(3, "JBD2: commit phase 2\n"); 560 jbd_debug(3, "JBD2: commit phase 2b\n");
552 561
553 /* 562 /*
554 * Way to go: we have now written out all of the data for a 563 * Way to go: we have now written out all of the data for a
@@ -571,8 +580,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
571 atomic_read(&commit_transaction->t_outstanding_credits)); 580 atomic_read(&commit_transaction->t_outstanding_credits));
572 581
573 err = 0; 582 err = 0;
574 descriptor = NULL;
575 bufs = 0; 583 bufs = 0;
584 descriptor = NULL;
576 blk_start_plug(&plug); 585 blk_start_plug(&plug);
577 while (commit_transaction->t_buffers) { 586 while (commit_transaction->t_buffers) {
578 587
@@ -604,8 +613,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
604 record the metadata buffer. */ 613 record the metadata buffer. */
605 614
606 if (!descriptor) { 615 if (!descriptor) {
607 struct buffer_head *bh;
608
609 J_ASSERT (bufs == 0); 616 J_ASSERT (bufs == 0);
610 617
611 jbd_debug(4, "JBD2: get descriptor\n"); 618 jbd_debug(4, "JBD2: get descriptor\n");
@@ -616,26 +623,26 @@ void jbd2_journal_commit_transaction(journal_t *journal)
616 continue; 623 continue;
617 } 624 }
618 625
619 bh = jh2bh(descriptor);
620 jbd_debug(4, "JBD2: got buffer %llu (%p)\n", 626 jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
621 (unsigned long long)bh->b_blocknr, bh->b_data); 627 (unsigned long long)descriptor->b_blocknr,
622 header = (journal_header_t *)&bh->b_data[0]; 628 descriptor->b_data);
629 header = (journal_header_t *)descriptor->b_data;
623 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 630 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
624 header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK); 631 header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK);
625 header->h_sequence = cpu_to_be32(commit_transaction->t_tid); 632 header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
626 633
627 tagp = &bh->b_data[sizeof(journal_header_t)]; 634 tagp = &descriptor->b_data[sizeof(journal_header_t)];
628 space_left = bh->b_size - sizeof(journal_header_t); 635 space_left = descriptor->b_size -
636 sizeof(journal_header_t);
629 first_tag = 1; 637 first_tag = 1;
630 set_buffer_jwrite(bh); 638 set_buffer_jwrite(descriptor);
631 set_buffer_dirty(bh); 639 set_buffer_dirty(descriptor);
632 wbuf[bufs++] = bh; 640 wbuf[bufs++] = descriptor;
633 641
634 /* Record it so that we can wait for IO 642 /* Record it so that we can wait for IO
635 completion later */ 643 completion later */
636 BUFFER_TRACE(bh, "ph3: file as descriptor"); 644 BUFFER_TRACE(descriptor, "ph3: file as descriptor");
637 jbd2_journal_file_buffer(descriptor, commit_transaction, 645 jbd2_file_log_bh(&log_bufs, descriptor);
638 BJ_LogCtl);
639 } 646 }
640 647
641 /* Where is the buffer to be written? */ 648 /* Where is the buffer to be written? */
@@ -658,29 +665,22 @@ void jbd2_journal_commit_transaction(journal_t *journal)
658 665
659 /* Bump b_count to prevent truncate from stumbling over 666 /* Bump b_count to prevent truncate from stumbling over
660 the shadowed buffer! @@@ This can go if we ever get 667 the shadowed buffer! @@@ This can go if we ever get
661 rid of the BJ_IO/BJ_Shadow pairing of buffers. */ 668 rid of the shadow pairing of buffers. */
662 atomic_inc(&jh2bh(jh)->b_count); 669 atomic_inc(&jh2bh(jh)->b_count);
663 670
664 /* Make a temporary IO buffer with which to write it out
665 (this will requeue both the metadata buffer and the
666 temporary IO buffer). new_bh goes on BJ_IO*/
667
668 set_bit(BH_JWrite, &jh2bh(jh)->b_state);
669 /* 671 /*
670 * akpm: jbd2_journal_write_metadata_buffer() sets 672 * Make a temporary IO buffer with which to write it out
671 * new_bh->b_transaction to commit_transaction. 673 * (this will requeue the metadata buffer to BJ_Shadow).
672 * We need to clean this up before we release new_bh
673 * (which is of type BJ_IO)
674 */ 674 */
675 set_bit(BH_JWrite, &jh2bh(jh)->b_state);
675 JBUFFER_TRACE(jh, "ph3: write metadata"); 676 JBUFFER_TRACE(jh, "ph3: write metadata");
676 flags = jbd2_journal_write_metadata_buffer(commit_transaction, 677 flags = jbd2_journal_write_metadata_buffer(commit_transaction,
677 jh, &new_jh, blocknr); 678 jh, &wbuf[bufs], blocknr);
678 if (flags < 0) { 679 if (flags < 0) {
679 jbd2_journal_abort(journal, flags); 680 jbd2_journal_abort(journal, flags);
680 continue; 681 continue;
681 } 682 }
682 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); 683 jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
683 wbuf[bufs++] = jh2bh(new_jh);
684 684
685 /* Record the new block's tag in the current descriptor 685 /* Record the new block's tag in the current descriptor
686 buffer */ 686 buffer */
@@ -694,10 +694,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
694 tag = (journal_block_tag_t *) tagp; 694 tag = (journal_block_tag_t *) tagp;
695 write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); 695 write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
696 tag->t_flags = cpu_to_be16(tag_flag); 696 tag->t_flags = cpu_to_be16(tag_flag);
697 jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), 697 jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
698 commit_transaction->t_tid); 698 commit_transaction->t_tid);
699 tagp += tag_bytes; 699 tagp += tag_bytes;
700 space_left -= tag_bytes; 700 space_left -= tag_bytes;
701 bufs++;
701 702
702 if (first_tag) { 703 if (first_tag) {
703 memcpy (tagp, journal->j_uuid, 16); 704 memcpy (tagp, journal->j_uuid, 16);
@@ -809,7 +810,7 @@ start_journal_io:
809 the log. Before we can commit it, wait for the IO so far to 810 the log. Before we can commit it, wait for the IO so far to
810 complete. Control buffers being written are on the 811 complete. Control buffers being written are on the
811 transaction's t_log_list queue, and metadata buffers are on 812 transaction's t_log_list queue, and metadata buffers are on
812 the t_iobuf_list queue. 813 the io_bufs list.
813 814
814 Wait for the buffers in reverse order. That way we are 815 Wait for the buffers in reverse order. That way we are
815 less likely to be woken up until all IOs have completed, and 816 less likely to be woken up until all IOs have completed, and
@@ -818,47 +819,33 @@ start_journal_io:
818 819
819 jbd_debug(3, "JBD2: commit phase 3\n"); 820 jbd_debug(3, "JBD2: commit phase 3\n");
820 821
821 /* 822 while (!list_empty(&io_bufs)) {
822 * akpm: these are BJ_IO, and j_list_lock is not needed. 823 struct buffer_head *bh = list_entry(io_bufs.prev,
823 * See __journal_try_to_free_buffer. 824 struct buffer_head,
824 */ 825 b_assoc_buffers);
825wait_for_iobuf:
826 while (commit_transaction->t_iobuf_list != NULL) {
827 struct buffer_head *bh;
828 826
829 jh = commit_transaction->t_iobuf_list->b_tprev; 827 wait_on_buffer(bh);
830 bh = jh2bh(jh); 828 cond_resched();
831 if (buffer_locked(bh)) {
832 wait_on_buffer(bh);
833 goto wait_for_iobuf;
834 }
835 if (cond_resched())
836 goto wait_for_iobuf;
837 829
838 if (unlikely(!buffer_uptodate(bh))) 830 if (unlikely(!buffer_uptodate(bh)))
839 err = -EIO; 831 err = -EIO;
840 832 jbd2_unfile_log_bh(bh);
841 clear_buffer_jwrite(bh);
842
843 JBUFFER_TRACE(jh, "ph4: unfile after journal write");
844 jbd2_journal_unfile_buffer(journal, jh);
845 833
846 /* 834 /*
847 * ->t_iobuf_list should contain only dummy buffer_heads 835 * The list contains temporary buffer heads created by
848 * which were created by jbd2_journal_write_metadata_buffer(). 836 * jbd2_journal_write_metadata_buffer().
849 */ 837 */
850 BUFFER_TRACE(bh, "dumping temporary bh"); 838 BUFFER_TRACE(bh, "dumping temporary bh");
851 jbd2_journal_put_journal_head(jh);
852 __brelse(bh); 839 __brelse(bh);
853 J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); 840 J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
854 free_buffer_head(bh); 841 free_buffer_head(bh);
855 842
856 /* We also have to unlock and free the corresponding 843 /* We also have to refile the corresponding shadowed buffer */
857 shadowed buffer */
858 jh = commit_transaction->t_shadow_list->b_tprev; 844 jh = commit_transaction->t_shadow_list->b_tprev;
859 bh = jh2bh(jh); 845 bh = jh2bh(jh);
860 clear_bit(BH_JWrite, &bh->b_state); 846 clear_buffer_jwrite(bh);
861 J_ASSERT_BH(bh, buffer_jbddirty(bh)); 847 J_ASSERT_BH(bh, buffer_jbddirty(bh));
848 J_ASSERT_BH(bh, !buffer_shadow(bh));
862 849
863 /* The metadata is now released for reuse, but we need 850 /* The metadata is now released for reuse, but we need
864 to remember it against this transaction so that when 851 to remember it against this transaction so that when
@@ -866,14 +853,6 @@ wait_for_iobuf:
866 required. */ 853 required. */
867 JBUFFER_TRACE(jh, "file as BJ_Forget"); 854 JBUFFER_TRACE(jh, "file as BJ_Forget");
868 jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); 855 jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
869 /*
870 * Wake up any transactions which were waiting for this IO to
871 * complete. The barrier must be here so that changes by
872 * jbd2_journal_file_buffer() take effect before wake_up_bit()
873 * does the waitqueue check.
874 */
875 smp_mb();
876 wake_up_bit(&bh->b_state, BH_Unshadow);
877 JBUFFER_TRACE(jh, "brelse shadowed buffer"); 856 JBUFFER_TRACE(jh, "brelse shadowed buffer");
878 __brelse(bh); 857 __brelse(bh);
879 } 858 }
@@ -883,26 +862,19 @@ wait_for_iobuf:
883 jbd_debug(3, "JBD2: commit phase 4\n"); 862 jbd_debug(3, "JBD2: commit phase 4\n");
884 863
885 /* Here we wait for the revoke record and descriptor record buffers */ 864 /* Here we wait for the revoke record and descriptor record buffers */
886 wait_for_ctlbuf: 865 while (!list_empty(&log_bufs)) {
887 while (commit_transaction->t_log_list != NULL) {
888 struct buffer_head *bh; 866 struct buffer_head *bh;
889 867
890 jh = commit_transaction->t_log_list->b_tprev; 868 bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers);
891 bh = jh2bh(jh); 869 wait_on_buffer(bh);
892 if (buffer_locked(bh)) { 870 cond_resched();
893 wait_on_buffer(bh);
894 goto wait_for_ctlbuf;
895 }
896 if (cond_resched())
897 goto wait_for_ctlbuf;
898 871
899 if (unlikely(!buffer_uptodate(bh))) 872 if (unlikely(!buffer_uptodate(bh)))
900 err = -EIO; 873 err = -EIO;
901 874
902 BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); 875 BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
903 clear_buffer_jwrite(bh); 876 clear_buffer_jwrite(bh);
904 jbd2_journal_unfile_buffer(journal, jh); 877 jbd2_unfile_log_bh(bh);
905 jbd2_journal_put_journal_head(jh);
906 __brelse(bh); /* One for getblk */ 878 __brelse(bh); /* One for getblk */
907 /* AKPM: bforget here */ 879 /* AKPM: bforget here */
908 } 880 }
@@ -952,9 +924,7 @@ wait_for_iobuf:
952 J_ASSERT(list_empty(&commit_transaction->t_inode_list)); 924 J_ASSERT(list_empty(&commit_transaction->t_inode_list));
953 J_ASSERT(commit_transaction->t_buffers == NULL); 925 J_ASSERT(commit_transaction->t_buffers == NULL);
954 J_ASSERT(commit_transaction->t_checkpoint_list == NULL); 926 J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
955 J_ASSERT(commit_transaction->t_iobuf_list == NULL);
956 J_ASSERT(commit_transaction->t_shadow_list == NULL); 927 J_ASSERT(commit_transaction->t_shadow_list == NULL);
957 J_ASSERT(commit_transaction->t_log_list == NULL);
958 928
959restart_loop: 929restart_loop:
960 /* 930 /*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 95457576e434..52032647dd4a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -103,6 +103,24 @@ EXPORT_SYMBOL(jbd2_inode_cache);
103static void __journal_abort_soft (journal_t *journal, int errno); 103static void __journal_abort_soft (journal_t *journal, int errno);
104static int jbd2_journal_create_slab(size_t slab_size); 104static int jbd2_journal_create_slab(size_t slab_size);
105 105
106#ifdef CONFIG_JBD2_DEBUG
107void __jbd2_debug(int level, const char *file, const char *func,
108 unsigned int line, const char *fmt, ...)
109{
110 struct va_format vaf;
111 va_list args;
112
113 if (level > jbd2_journal_enable_debug)
114 return;
115 va_start(args, fmt);
116 vaf.fmt = fmt;
117 vaf.va = &args;
118 printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf);
119 va_end(args);
120}
121EXPORT_SYMBOL(__jbd2_debug);
122#endif
123
106/* Checksumming functions */ 124/* Checksumming functions */
107int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) 125int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
108{ 126{
@@ -112,9 +130,10 @@ int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
112 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; 130 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
113} 131}
114 132
115static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) 133static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
116{ 134{
117 __u32 csum, old_csum; 135 __u32 csum;
136 __be32 old_csum;
118 137
119 old_csum = sb->s_checksum; 138 old_csum = sb->s_checksum;
120 sb->s_checksum = 0; 139 sb->s_checksum = 0;
@@ -310,14 +329,12 @@ static void journal_kill_thread(journal_t *journal)
310 * 329 *
311 * If the source buffer has already been modified by a new transaction 330 * If the source buffer has already been modified by a new transaction
312 * since we took the last commit snapshot, we use the frozen copy of 331 * since we took the last commit snapshot, we use the frozen copy of
313 * that data for IO. If we end up using the existing buffer_head's data 332 * that data for IO. If we end up using the existing buffer_head's data
314 * for the write, then we *have* to lock the buffer to prevent anyone 333 * for the write, then we have to make sure nobody modifies it while the
315 * else from using and possibly modifying it while the IO is in 334 * IO is in progress. do_get_write_access() handles this.
316 * progress.
317 * 335 *
318 * The function returns a pointer to the buffer_heads to be used for IO. 336 * The function returns a pointer to the buffer_head to be used for IO.
319 * 337 *
320 * We assume that the journal has already been locked in this function.
321 * 338 *
322 * Return value: 339 * Return value:
323 * <0: Error 340 * <0: Error
@@ -330,15 +347,14 @@ static void journal_kill_thread(journal_t *journal)
330 347
331int jbd2_journal_write_metadata_buffer(transaction_t *transaction, 348int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
332 struct journal_head *jh_in, 349 struct journal_head *jh_in,
333 struct journal_head **jh_out, 350 struct buffer_head **bh_out,
334 unsigned long long blocknr) 351 sector_t blocknr)
335{ 352{
336 int need_copy_out = 0; 353 int need_copy_out = 0;
337 int done_copy_out = 0; 354 int done_copy_out = 0;
338 int do_escape = 0; 355 int do_escape = 0;
339 char *mapped_data; 356 char *mapped_data;
340 struct buffer_head *new_bh; 357 struct buffer_head *new_bh;
341 struct journal_head *new_jh;
342 struct page *new_page; 358 struct page *new_page;
343 unsigned int new_offset; 359 unsigned int new_offset;
344 struct buffer_head *bh_in = jh2bh(jh_in); 360 struct buffer_head *bh_in = jh2bh(jh_in);
@@ -368,14 +384,13 @@ retry_alloc:
368 384
369 /* keep subsequent assertions sane */ 385 /* keep subsequent assertions sane */
370 atomic_set(&new_bh->b_count, 1); 386 atomic_set(&new_bh->b_count, 1);
371 new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
372 387
388 jbd_lock_bh_state(bh_in);
389repeat:
373 /* 390 /*
374 * If a new transaction has already done a buffer copy-out, then 391 * If a new transaction has already done a buffer copy-out, then
375 * we use that version of the data for the commit. 392 * we use that version of the data for the commit.
376 */ 393 */
377 jbd_lock_bh_state(bh_in);
378repeat:
379 if (jh_in->b_frozen_data) { 394 if (jh_in->b_frozen_data) {
380 done_copy_out = 1; 395 done_copy_out = 1;
381 new_page = virt_to_page(jh_in->b_frozen_data); 396 new_page = virt_to_page(jh_in->b_frozen_data);
@@ -415,7 +430,7 @@ repeat:
415 jbd_unlock_bh_state(bh_in); 430 jbd_unlock_bh_state(bh_in);
416 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); 431 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
417 if (!tmp) { 432 if (!tmp) {
418 jbd2_journal_put_journal_head(new_jh); 433 brelse(new_bh);
419 return -ENOMEM; 434 return -ENOMEM;
420 } 435 }
421 jbd_lock_bh_state(bh_in); 436 jbd_lock_bh_state(bh_in);
@@ -426,7 +441,7 @@ repeat:
426 441
427 jh_in->b_frozen_data = tmp; 442 jh_in->b_frozen_data = tmp;
428 mapped_data = kmap_atomic(new_page); 443 mapped_data = kmap_atomic(new_page);
429 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); 444 memcpy(tmp, mapped_data + new_offset, bh_in->b_size);
430 kunmap_atomic(mapped_data); 445 kunmap_atomic(mapped_data);
431 446
432 new_page = virt_to_page(tmp); 447 new_page = virt_to_page(tmp);
@@ -452,14 +467,14 @@ repeat:
452 } 467 }
453 468
454 set_bh_page(new_bh, new_page, new_offset); 469 set_bh_page(new_bh, new_page, new_offset);
455 new_jh->b_transaction = NULL; 470 new_bh->b_size = bh_in->b_size;
456 new_bh->b_size = jh2bh(jh_in)->b_size; 471 new_bh->b_bdev = journal->j_dev;
457 new_bh->b_bdev = transaction->t_journal->j_dev;
458 new_bh->b_blocknr = blocknr; 472 new_bh->b_blocknr = blocknr;
473 new_bh->b_private = bh_in;
459 set_buffer_mapped(new_bh); 474 set_buffer_mapped(new_bh);
460 set_buffer_dirty(new_bh); 475 set_buffer_dirty(new_bh);
461 476
462 *jh_out = new_jh; 477 *bh_out = new_bh;
463 478
464 /* 479 /*
465 * The to-be-written buffer needs to get moved to the io queue, 480 * The to-be-written buffer needs to get moved to the io queue,
@@ -470,11 +485,9 @@ repeat:
470 spin_lock(&journal->j_list_lock); 485 spin_lock(&journal->j_list_lock);
471 __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); 486 __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
472 spin_unlock(&journal->j_list_lock); 487 spin_unlock(&journal->j_list_lock);
488 set_buffer_shadow(bh_in);
473 jbd_unlock_bh_state(bh_in); 489 jbd_unlock_bh_state(bh_in);
474 490
475 JBUFFER_TRACE(new_jh, "file as BJ_IO");
476 jbd2_journal_file_buffer(new_jh, transaction, BJ_IO);
477
478 return do_escape | (done_copy_out << 1); 491 return do_escape | (done_copy_out << 1);
479} 492}
480 493
@@ -484,35 +497,6 @@ repeat:
484 */ 497 */
485 498
486/* 499/*
487 * __jbd2_log_space_left: Return the number of free blocks left in the journal.
488 *
489 * Called with the journal already locked.
490 *
491 * Called under j_state_lock
492 */
493
494int __jbd2_log_space_left(journal_t *journal)
495{
496 int left = journal->j_free;
497
498 /* assert_spin_locked(&journal->j_state_lock); */
499
500 /*
501 * Be pessimistic here about the number of those free blocks which
502 * might be required for log descriptor control blocks.
503 */
504
505#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */
506
507 left -= MIN_LOG_RESERVED_BLOCKS;
508
509 if (left <= 0)
510 return 0;
511 left -= (left >> 3);
512 return left;
513}
514
515/*
516 * Called with j_state_lock locked for writing. 500 * Called with j_state_lock locked for writing.
517 * Returns true if a transaction commit was started. 501 * Returns true if a transaction commit was started.
518 */ 502 */
@@ -564,20 +548,17 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid)
564} 548}
565 549
566/* 550/*
567 * Force and wait upon a commit if the calling process is not within 551 * Force and wait any uncommitted transactions. We can only force the running
568 * transaction. This is used for forcing out undo-protected data which contains 552 * transaction if we don't have an active handle, otherwise, we will deadlock.
569 * bitmaps, when the fs is running out of space. 553 * Returns: <0 in case of error,
570 * 554 * 0 if nothing to commit,
571 * We can only force the running transaction if we don't have an active handle; 555 * 1 if transaction was successfully committed.
572 * otherwise, we will deadlock.
573 *
574 * Returns true if a transaction was started.
575 */ 556 */
576int jbd2_journal_force_commit_nested(journal_t *journal) 557static int __jbd2_journal_force_commit(journal_t *journal)
577{ 558{
578 transaction_t *transaction = NULL; 559 transaction_t *transaction = NULL;
579 tid_t tid; 560 tid_t tid;
580 int need_to_start = 0; 561 int need_to_start = 0, ret = 0;
581 562
582 read_lock(&journal->j_state_lock); 563 read_lock(&journal->j_state_lock);
583 if (journal->j_running_transaction && !current->journal_info) { 564 if (journal->j_running_transaction && !current->journal_info) {
@@ -588,16 +569,53 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
588 transaction = journal->j_committing_transaction; 569 transaction = journal->j_committing_transaction;
589 570
590 if (!transaction) { 571 if (!transaction) {
572 /* Nothing to commit */
591 read_unlock(&journal->j_state_lock); 573 read_unlock(&journal->j_state_lock);
592 return 0; /* Nothing to retry */ 574 return 0;
593 } 575 }
594
595 tid = transaction->t_tid; 576 tid = transaction->t_tid;
596 read_unlock(&journal->j_state_lock); 577 read_unlock(&journal->j_state_lock);
597 if (need_to_start) 578 if (need_to_start)
598 jbd2_log_start_commit(journal, tid); 579 jbd2_log_start_commit(journal, tid);
599 jbd2_log_wait_commit(journal, tid); 580 ret = jbd2_log_wait_commit(journal, tid);
600 return 1; 581 if (!ret)
582 ret = 1;
583
584 return ret;
585}
586
587/**
588 * Force and wait upon a commit if the calling process is not within
589 * transaction. This is used for forcing out undo-protected data which contains
590 * bitmaps, when the fs is running out of space.
591 *
592 * @journal: journal to force
593 * Returns true if progress was made.
594 */
595int jbd2_journal_force_commit_nested(journal_t *journal)
596{
597 int ret;
598
599 ret = __jbd2_journal_force_commit(journal);
600 return ret > 0;
601}
602
603/**
604 * int journal_force_commit() - force any uncommitted transactions
605 * @journal: journal to force
606 *
607 * Caller want unconditional commit. We can only force the running transaction
608 * if we don't have an active handle, otherwise, we will deadlock.
609 */
610int jbd2_journal_force_commit(journal_t *journal)
611{
612 int ret;
613
614 J_ASSERT(!current->journal_info);
615 ret = __jbd2_journal_force_commit(journal);
616 if (ret > 0)
617 ret = 0;
618 return ret;
601} 619}
602 620
603/* 621/*
@@ -798,7 +816,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
798 * But we don't bother doing that, so there will be coherency problems with 816 * But we don't bother doing that, so there will be coherency problems with
799 * mmaps of blockdevs which hold live JBD-controlled filesystems. 817 * mmaps of blockdevs which hold live JBD-controlled filesystems.
800 */ 818 */
801struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) 819struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
802{ 820{
803 struct buffer_head *bh; 821 struct buffer_head *bh;
804 unsigned long long blocknr; 822 unsigned long long blocknr;
@@ -817,7 +835,7 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
817 set_buffer_uptodate(bh); 835 set_buffer_uptodate(bh);
818 unlock_buffer(bh); 836 unlock_buffer(bh);
819 BUFFER_TRACE(bh, "return this buffer"); 837 BUFFER_TRACE(bh, "return this buffer");
820 return jbd2_journal_add_journal_head(bh); 838 return bh;
821} 839}
822 840
823/* 841/*
@@ -1062,11 +1080,10 @@ static journal_t * journal_init_common (void)
1062 return NULL; 1080 return NULL;
1063 1081
1064 init_waitqueue_head(&journal->j_wait_transaction_locked); 1082 init_waitqueue_head(&journal->j_wait_transaction_locked);
1065 init_waitqueue_head(&journal->j_wait_logspace);
1066 init_waitqueue_head(&journal->j_wait_done_commit); 1083 init_waitqueue_head(&journal->j_wait_done_commit);
1067 init_waitqueue_head(&journal->j_wait_checkpoint);
1068 init_waitqueue_head(&journal->j_wait_commit); 1084 init_waitqueue_head(&journal->j_wait_commit);
1069 init_waitqueue_head(&journal->j_wait_updates); 1085 init_waitqueue_head(&journal->j_wait_updates);
1086 init_waitqueue_head(&journal->j_wait_reserved);
1070 mutex_init(&journal->j_barrier); 1087 mutex_init(&journal->j_barrier);
1071 mutex_init(&journal->j_checkpoint_mutex); 1088 mutex_init(&journal->j_checkpoint_mutex);
1072 spin_lock_init(&journal->j_revoke_lock); 1089 spin_lock_init(&journal->j_revoke_lock);
@@ -1076,6 +1093,7 @@ static journal_t * journal_init_common (void)
1076 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); 1093 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
1077 journal->j_min_batch_time = 0; 1094 journal->j_min_batch_time = 0;
1078 journal->j_max_batch_time = 15000; /* 15ms */ 1095 journal->j_max_batch_time = 15000; /* 15ms */
1096 atomic_set(&journal->j_reserved_credits, 0);
1079 1097
1080 /* The journal is marked for error until we succeed with recovery! */ 1098 /* The journal is marked for error until we succeed with recovery! */
1081 journal->j_flags = JBD2_ABORT; 1099 journal->j_flags = JBD2_ABORT;
@@ -1318,6 +1336,7 @@ static int journal_reset(journal_t *journal)
1318static void jbd2_write_superblock(journal_t *journal, int write_op) 1336static void jbd2_write_superblock(journal_t *journal, int write_op)
1319{ 1337{
1320 struct buffer_head *bh = journal->j_sb_buffer; 1338 struct buffer_head *bh = journal->j_sb_buffer;
1339 journal_superblock_t *sb = journal->j_superblock;
1321 int ret; 1340 int ret;
1322 1341
1323 trace_jbd2_write_superblock(journal, write_op); 1342 trace_jbd2_write_superblock(journal, write_op);
@@ -1339,6 +1358,7 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
1339 clear_buffer_write_io_error(bh); 1358 clear_buffer_write_io_error(bh);
1340 set_buffer_uptodate(bh); 1359 set_buffer_uptodate(bh);
1341 } 1360 }
1361 jbd2_superblock_csum_set(journal, sb);
1342 get_bh(bh); 1362 get_bh(bh);
1343 bh->b_end_io = end_buffer_write_sync; 1363 bh->b_end_io = end_buffer_write_sync;
1344 ret = submit_bh(write_op, bh); 1364 ret = submit_bh(write_op, bh);
@@ -1435,7 +1455,6 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
1435 jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", 1455 jbd_debug(1, "JBD2: updating superblock error (errno %d)\n",
1436 journal->j_errno); 1456 journal->j_errno);
1437 sb->s_errno = cpu_to_be32(journal->j_errno); 1457 sb->s_errno = cpu_to_be32(journal->j_errno);
1438 jbd2_superblock_csum_set(journal, sb);
1439 read_unlock(&journal->j_state_lock); 1458 read_unlock(&journal->j_state_lock);
1440 1459
1441 jbd2_write_superblock(journal, WRITE_SYNC); 1460 jbd2_write_superblock(journal, WRITE_SYNC);
@@ -2325,13 +2344,13 @@ static struct journal_head *journal_alloc_journal_head(void)
2325#ifdef CONFIG_JBD2_DEBUG 2344#ifdef CONFIG_JBD2_DEBUG
2326 atomic_inc(&nr_journal_heads); 2345 atomic_inc(&nr_journal_heads);
2327#endif 2346#endif
2328 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); 2347 ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS);
2329 if (!ret) { 2348 if (!ret) {
2330 jbd_debug(1, "out of memory for journal_head\n"); 2349 jbd_debug(1, "out of memory for journal_head\n");
2331 pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__); 2350 pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
2332 while (!ret) { 2351 while (!ret) {
2333 yield(); 2352 yield();
2334 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); 2353 ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS);
2335 } 2354 }
2336 } 2355 }
2337 return ret; 2356 return ret;
@@ -2393,10 +2412,8 @@ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
2393 struct journal_head *new_jh = NULL; 2412 struct journal_head *new_jh = NULL;
2394 2413
2395repeat: 2414repeat:
2396 if (!buffer_jbd(bh)) { 2415 if (!buffer_jbd(bh))
2397 new_jh = journal_alloc_journal_head(); 2416 new_jh = journal_alloc_journal_head();
2398 memset(new_jh, 0, sizeof(*new_jh));
2399 }
2400 2417
2401 jbd_lock_bh_journal_head(bh); 2418 jbd_lock_bh_journal_head(bh);
2402 if (buffer_jbd(bh)) { 2419 if (buffer_jbd(bh)) {
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 626846bac32f..3929c50428b1 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -178,7 +178,8 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
178 void *buf) 178 void *buf)
179{ 179{
180 struct jbd2_journal_block_tail *tail; 180 struct jbd2_journal_block_tail *tail;
181 __u32 provided, calculated; 181 __be32 provided;
182 __u32 calculated;
182 183
183 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 184 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
184 return 1; 185 return 1;
@@ -190,8 +191,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
190 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 191 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
191 tail->t_checksum = provided; 192 tail->t_checksum = provided;
192 193
193 provided = be32_to_cpu(provided); 194 return provided == cpu_to_be32(calculated);
194 return provided == calculated;
195} 195}
196 196
197/* 197/*
@@ -381,7 +381,8 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
381static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 381static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
382{ 382{
383 struct commit_header *h; 383 struct commit_header *h;
384 __u32 provided, calculated; 384 __be32 provided;
385 __u32 calculated;
385 386
386 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 387 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
387 return 1; 388 return 1;
@@ -392,25 +393,23 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
392 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 393 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
393 h->h_chksum[0] = provided; 394 h->h_chksum[0] = provided;
394 395
395 provided = be32_to_cpu(provided); 396 return provided == cpu_to_be32(calculated);
396 return provided == calculated;
397} 397}
398 398
399static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 399static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
400 void *buf, __u32 sequence) 400 void *buf, __u32 sequence)
401{ 401{
402 __u32 provided, calculated; 402 __u32 csum32;
403 __be32 seq;
403 404
404 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 405 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
405 return 1; 406 return 1;
406 407
407 sequence = cpu_to_be32(sequence); 408 seq = cpu_to_be32(sequence);
408 calculated = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, 409 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
409 sizeof(sequence)); 410 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
410 calculated = jbd2_chksum(j, calculated, buf, j->j_blocksize);
411 provided = be32_to_cpu(tag->t_checksum);
412 411
413 return provided == cpu_to_be32(calculated); 412 return tag->t_checksum == cpu_to_be16(csum32);
414} 413}
415 414
416static int do_one_pass(journal_t *journal, 415static int do_one_pass(journal_t *journal,
@@ -809,7 +808,8 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
809 void *buf) 808 void *buf)
810{ 809{
811 struct jbd2_journal_revoke_tail *tail; 810 struct jbd2_journal_revoke_tail *tail;
812 __u32 provided, calculated; 811 __be32 provided;
812 __u32 calculated;
813 813
814 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 814 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
815 return 1; 815 return 1;
@@ -821,8 +821,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
821 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 821 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
822 tail->r_checksum = provided; 822 tail->r_checksum = provided;
823 823
824 provided = be32_to_cpu(provided); 824 return provided == cpu_to_be32(calculated);
825 return provided == calculated;
826} 825}
827 826
828/* Scan a revoke record, marking all blocks mentioned as revoked. */ 827/* Scan a revoke record, marking all blocks mentioned as revoked. */
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index f30b80b4ce8b..198c9c10276d 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -122,9 +122,10 @@ struct jbd2_revoke_table_s
122 122
123#ifdef __KERNEL__ 123#ifdef __KERNEL__
124static void write_one_revoke_record(journal_t *, transaction_t *, 124static void write_one_revoke_record(journal_t *, transaction_t *,
125 struct journal_head **, int *, 125 struct list_head *,
126 struct buffer_head **, int *,
126 struct jbd2_revoke_record_s *, int); 127 struct jbd2_revoke_record_s *, int);
127static void flush_descriptor(journal_t *, struct journal_head *, int, int); 128static void flush_descriptor(journal_t *, struct buffer_head *, int, int);
128#endif 129#endif
129 130
130/* Utility functions to maintain the revoke table */ 131/* Utility functions to maintain the revoke table */
@@ -531,9 +532,10 @@ void jbd2_journal_switch_revoke_table(journal_t *journal)
531 */ 532 */
532void jbd2_journal_write_revoke_records(journal_t *journal, 533void jbd2_journal_write_revoke_records(journal_t *journal,
533 transaction_t *transaction, 534 transaction_t *transaction,
535 struct list_head *log_bufs,
534 int write_op) 536 int write_op)
535{ 537{
536 struct journal_head *descriptor; 538 struct buffer_head *descriptor;
537 struct jbd2_revoke_record_s *record; 539 struct jbd2_revoke_record_s *record;
538 struct jbd2_revoke_table_s *revoke; 540 struct jbd2_revoke_table_s *revoke;
539 struct list_head *hash_list; 541 struct list_head *hash_list;
@@ -553,7 +555,7 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
553 while (!list_empty(hash_list)) { 555 while (!list_empty(hash_list)) {
554 record = (struct jbd2_revoke_record_s *) 556 record = (struct jbd2_revoke_record_s *)
555 hash_list->next; 557 hash_list->next;
556 write_one_revoke_record(journal, transaction, 558 write_one_revoke_record(journal, transaction, log_bufs,
557 &descriptor, &offset, 559 &descriptor, &offset,
558 record, write_op); 560 record, write_op);
559 count++; 561 count++;
@@ -573,13 +575,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
573 575
574static void write_one_revoke_record(journal_t *journal, 576static void write_one_revoke_record(journal_t *journal,
575 transaction_t *transaction, 577 transaction_t *transaction,
576 struct journal_head **descriptorp, 578 struct list_head *log_bufs,
579 struct buffer_head **descriptorp,
577 int *offsetp, 580 int *offsetp,
578 struct jbd2_revoke_record_s *record, 581 struct jbd2_revoke_record_s *record,
579 int write_op) 582 int write_op)
580{ 583{
581 int csum_size = 0; 584 int csum_size = 0;
582 struct journal_head *descriptor; 585 struct buffer_head *descriptor;
583 int offset; 586 int offset;
584 journal_header_t *header; 587 journal_header_t *header;
585 588
@@ -609,26 +612,26 @@ static void write_one_revoke_record(journal_t *journal,
609 descriptor = jbd2_journal_get_descriptor_buffer(journal); 612 descriptor = jbd2_journal_get_descriptor_buffer(journal);
610 if (!descriptor) 613 if (!descriptor)
611 return; 614 return;
612 header = (journal_header_t *) &jh2bh(descriptor)->b_data[0]; 615 header = (journal_header_t *)descriptor->b_data;
613 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 616 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
614 header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK); 617 header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK);
615 header->h_sequence = cpu_to_be32(transaction->t_tid); 618 header->h_sequence = cpu_to_be32(transaction->t_tid);
616 619
617 /* Record it so that we can wait for IO completion later */ 620 /* Record it so that we can wait for IO completion later */
618 JBUFFER_TRACE(descriptor, "file as BJ_LogCtl"); 621 BUFFER_TRACE(descriptor, "file in log_bufs");
619 jbd2_journal_file_buffer(descriptor, transaction, BJ_LogCtl); 622 jbd2_file_log_bh(log_bufs, descriptor);
620 623
621 offset = sizeof(jbd2_journal_revoke_header_t); 624 offset = sizeof(jbd2_journal_revoke_header_t);
622 *descriptorp = descriptor; 625 *descriptorp = descriptor;
623 } 626 }
624 627
625 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { 628 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) {
626 * ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) = 629 * ((__be64 *)(&descriptor->b_data[offset])) =
627 cpu_to_be64(record->blocknr); 630 cpu_to_be64(record->blocknr);
628 offset += 8; 631 offset += 8;
629 632
630 } else { 633 } else {
631 * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = 634 * ((__be32 *)(&descriptor->b_data[offset])) =
632 cpu_to_be32(record->blocknr); 635 cpu_to_be32(record->blocknr);
633 offset += 4; 636 offset += 4;
634 } 637 }
@@ -636,8 +639,7 @@ static void write_one_revoke_record(journal_t *journal,
636 *offsetp = offset; 639 *offsetp = offset;
637} 640}
638 641
639static void jbd2_revoke_csum_set(journal_t *j, 642static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
640 struct journal_head *descriptor)
641{ 643{
642 struct jbd2_journal_revoke_tail *tail; 644 struct jbd2_journal_revoke_tail *tail;
643 __u32 csum; 645 __u32 csum;
@@ -645,12 +647,10 @@ static void jbd2_revoke_csum_set(journal_t *j,
645 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 647 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
646 return; 648 return;
647 649
648 tail = (struct jbd2_journal_revoke_tail *) 650 tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize -
649 (jh2bh(descriptor)->b_data + j->j_blocksize -
650 sizeof(struct jbd2_journal_revoke_tail)); 651 sizeof(struct jbd2_journal_revoke_tail));
651 tail->r_checksum = 0; 652 tail->r_checksum = 0;
652 csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, 653 csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
653 j->j_blocksize);
654 tail->r_checksum = cpu_to_be32(csum); 654 tail->r_checksum = cpu_to_be32(csum);
655} 655}
656 656
@@ -662,25 +662,24 @@ static void jbd2_revoke_csum_set(journal_t *j,
662 */ 662 */
663 663
664static void flush_descriptor(journal_t *journal, 664static void flush_descriptor(journal_t *journal,
665 struct journal_head *descriptor, 665 struct buffer_head *descriptor,
666 int offset, int write_op) 666 int offset, int write_op)
667{ 667{
668 jbd2_journal_revoke_header_t *header; 668 jbd2_journal_revoke_header_t *header;
669 struct buffer_head *bh = jh2bh(descriptor);
670 669
671 if (is_journal_aborted(journal)) { 670 if (is_journal_aborted(journal)) {
672 put_bh(bh); 671 put_bh(descriptor);
673 return; 672 return;
674 } 673 }
675 674
676 header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; 675 header = (jbd2_journal_revoke_header_t *)descriptor->b_data;
677 header->r_count = cpu_to_be32(offset); 676 header->r_count = cpu_to_be32(offset);
678 jbd2_revoke_csum_set(journal, descriptor); 677 jbd2_revoke_csum_set(journal, descriptor);
679 678
680 set_buffer_jwrite(bh); 679 set_buffer_jwrite(descriptor);
681 BUFFER_TRACE(bh, "write"); 680 BUFFER_TRACE(descriptor, "write");
682 set_buffer_dirty(bh); 681 set_buffer_dirty(descriptor);
683 write_dirty_buffer(bh, write_op); 682 write_dirty_buffer(descriptor, write_op);
684} 683}
685#endif 684#endif
686 685
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 10f524c59ea8..7aa9a32573bb 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -89,7 +89,8 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
89 transaction->t_expires = jiffies + journal->j_commit_interval; 89 transaction->t_expires = jiffies + journal->j_commit_interval;
90 spin_lock_init(&transaction->t_handle_lock); 90 spin_lock_init(&transaction->t_handle_lock);
91 atomic_set(&transaction->t_updates, 0); 91 atomic_set(&transaction->t_updates, 0);
92 atomic_set(&transaction->t_outstanding_credits, 0); 92 atomic_set(&transaction->t_outstanding_credits,
93 atomic_read(&journal->j_reserved_credits));
93 atomic_set(&transaction->t_handle_count, 0); 94 atomic_set(&transaction->t_handle_count, 0);
94 INIT_LIST_HEAD(&transaction->t_inode_list); 95 INIT_LIST_HEAD(&transaction->t_inode_list);
95 INIT_LIST_HEAD(&transaction->t_private_list); 96 INIT_LIST_HEAD(&transaction->t_private_list);
@@ -141,6 +142,112 @@ static inline void update_t_max_wait(transaction_t *transaction,
141} 142}
142 143
143/* 144/*
145 * Wait until running transaction passes T_LOCKED state. Also starts the commit
146 * if needed. The function expects running transaction to exist and releases
147 * j_state_lock.
148 */
149static void wait_transaction_locked(journal_t *journal)
150 __releases(journal->j_state_lock)
151{
152 DEFINE_WAIT(wait);
153 int need_to_start;
154 tid_t tid = journal->j_running_transaction->t_tid;
155
156 prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
157 TASK_UNINTERRUPTIBLE);
158 need_to_start = !tid_geq(journal->j_commit_request, tid);
159 read_unlock(&journal->j_state_lock);
160 if (need_to_start)
161 jbd2_log_start_commit(journal, tid);
162 schedule();
163 finish_wait(&journal->j_wait_transaction_locked, &wait);
164}
165
166static void sub_reserved_credits(journal_t *journal, int blocks)
167{
168 atomic_sub(blocks, &journal->j_reserved_credits);
169 wake_up(&journal->j_wait_reserved);
170}
171
172/*
173 * Wait until we can add credits for handle to the running transaction. Called
174 * with j_state_lock held for reading. Returns 0 if handle joined the running
175 * transaction. Returns 1 if we had to wait, j_state_lock is dropped, and
176 * caller must retry.
177 */
178static int add_transaction_credits(journal_t *journal, int blocks,
179 int rsv_blocks)
180{
181 transaction_t *t = journal->j_running_transaction;
182 int needed;
183 int total = blocks + rsv_blocks;
184
185 /*
186 * If the current transaction is locked down for commit, wait
187 * for the lock to be released.
188 */
189 if (t->t_state == T_LOCKED) {
190 wait_transaction_locked(journal);
191 return 1;
192 }
193
194 /*
195 * If there is not enough space left in the log to write all
196 * potential buffers requested by this operation, we need to
197 * stall pending a log checkpoint to free some more log space.
198 */
199 needed = atomic_add_return(total, &t->t_outstanding_credits);
200 if (needed > journal->j_max_transaction_buffers) {
201 /*
202 * If the current transaction is already too large,
203 * then start to commit it: we can then go back and
204 * attach this handle to a new transaction.
205 */
206 atomic_sub(total, &t->t_outstanding_credits);
207 wait_transaction_locked(journal);
208 return 1;
209 }
210
211 /*
212 * The commit code assumes that it can get enough log space
213 * without forcing a checkpoint. This is *critical* for
214 * correctness: a checkpoint of a buffer which is also
215 * associated with a committing transaction creates a deadlock,
216 * so commit simply cannot force through checkpoints.
217 *
218 * We must therefore ensure the necessary space in the journal
219 * *before* starting to dirty potentially checkpointed buffers
220 * in the new transaction.
221 */
222 if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) {
223 atomic_sub(total, &t->t_outstanding_credits);
224 read_unlock(&journal->j_state_lock);
225 write_lock(&journal->j_state_lock);
226 if (jbd2_log_space_left(journal) < jbd2_space_needed(journal))
227 __jbd2_log_wait_for_space(journal);
228 write_unlock(&journal->j_state_lock);
229 return 1;
230 }
231
232 /* No reservation? We are done... */
233 if (!rsv_blocks)
234 return 0;
235
236 needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits);
237 /* We allow at most half of a transaction to be reserved */
238 if (needed > journal->j_max_transaction_buffers / 2) {
239 sub_reserved_credits(journal, rsv_blocks);
240 atomic_sub(total, &t->t_outstanding_credits);
241 read_unlock(&journal->j_state_lock);
242 wait_event(journal->j_wait_reserved,
243 atomic_read(&journal->j_reserved_credits) + rsv_blocks
244 <= journal->j_max_transaction_buffers / 2);
245 return 1;
246 }
247 return 0;
248}
249
250/*
144 * start_this_handle: Given a handle, deal with any locking or stalling 251 * start_this_handle: Given a handle, deal with any locking or stalling
145 * needed to make sure that there is enough journal space for the handle 252 * needed to make sure that there is enough journal space for the handle
146 * to begin. Attach the handle to a transaction and set up the 253 * to begin. Attach the handle to a transaction and set up the
@@ -151,18 +258,24 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
151 gfp_t gfp_mask) 258 gfp_t gfp_mask)
152{ 259{
153 transaction_t *transaction, *new_transaction = NULL; 260 transaction_t *transaction, *new_transaction = NULL;
154 tid_t tid; 261 int blocks = handle->h_buffer_credits;
155 int needed, need_to_start; 262 int rsv_blocks = 0;
156 int nblocks = handle->h_buffer_credits;
157 unsigned long ts = jiffies; 263 unsigned long ts = jiffies;
158 264
159 if (nblocks > journal->j_max_transaction_buffers) { 265 /*
266 * 1/2 of transaction can be reserved so we can practically handle
267 * only 1/2 of maximum transaction size per operation
268 */
269 if (WARN_ON(blocks > journal->j_max_transaction_buffers / 2)) {
160 printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n", 270 printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n",
161 current->comm, nblocks, 271 current->comm, blocks,
162 journal->j_max_transaction_buffers); 272 journal->j_max_transaction_buffers / 2);
163 return -ENOSPC; 273 return -ENOSPC;
164 } 274 }
165 275
276 if (handle->h_rsv_handle)
277 rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
278
166alloc_transaction: 279alloc_transaction:
167 if (!journal->j_running_transaction) { 280 if (!journal->j_running_transaction) {
168 new_transaction = kmem_cache_zalloc(transaction_cache, 281 new_transaction = kmem_cache_zalloc(transaction_cache,
@@ -199,8 +312,12 @@ repeat:
199 return -EROFS; 312 return -EROFS;
200 } 313 }
201 314
202 /* Wait on the journal's transaction barrier if necessary */ 315 /*
203 if (journal->j_barrier_count) { 316 * Wait on the journal's transaction barrier if necessary. Specifically
317 * we allow reserved handles to proceed because otherwise commit could
318 * deadlock on page writeback not being able to complete.
319 */
320 if (!handle->h_reserved && journal->j_barrier_count) {
204 read_unlock(&journal->j_state_lock); 321 read_unlock(&journal->j_state_lock);
205 wait_event(journal->j_wait_transaction_locked, 322 wait_event(journal->j_wait_transaction_locked,
206 journal->j_barrier_count == 0); 323 journal->j_barrier_count == 0);
@@ -213,7 +330,7 @@ repeat:
213 goto alloc_transaction; 330 goto alloc_transaction;
214 write_lock(&journal->j_state_lock); 331 write_lock(&journal->j_state_lock);
215 if (!journal->j_running_transaction && 332 if (!journal->j_running_transaction &&
216 !journal->j_barrier_count) { 333 (handle->h_reserved || !journal->j_barrier_count)) {
217 jbd2_get_transaction(journal, new_transaction); 334 jbd2_get_transaction(journal, new_transaction);
218 new_transaction = NULL; 335 new_transaction = NULL;
219 } 336 }
@@ -223,85 +340,18 @@ repeat:
223 340
224 transaction = journal->j_running_transaction; 341 transaction = journal->j_running_transaction;
225 342
226 /* 343 if (!handle->h_reserved) {
227 * If the current transaction is locked down for commit, wait for the 344 /* We may have dropped j_state_lock - restart in that case */
228 * lock to be released. 345 if (add_transaction_credits(journal, blocks, rsv_blocks))
229 */ 346 goto repeat;
230 if (transaction->t_state == T_LOCKED) { 347 } else {
231 DEFINE_WAIT(wait);
232
233 prepare_to_wait(&journal->j_wait_transaction_locked,
234 &wait, TASK_UNINTERRUPTIBLE);
235 read_unlock(&journal->j_state_lock);
236 schedule();
237 finish_wait(&journal->j_wait_transaction_locked, &wait);
238 goto repeat;
239 }
240
241 /*
242 * If there is not enough space left in the log to write all potential
243 * buffers requested by this operation, we need to stall pending a log
244 * checkpoint to free some more log space.
245 */
246 needed = atomic_add_return(nblocks,
247 &transaction->t_outstanding_credits);
248
249 if (needed > journal->j_max_transaction_buffers) {
250 /* 348 /*
251 * If the current transaction is already too large, then start 349 * We have handle reserved so we are allowed to join T_LOCKED
252 * to commit it: we can then go back and attach this handle to 350 * transaction and we don't have to check for transaction size
253 * a new transaction. 351 * and journal space.
254 */ 352 */
255 DEFINE_WAIT(wait); 353 sub_reserved_credits(journal, blocks);
256 354 handle->h_reserved = 0;
257 jbd_debug(2, "Handle %p starting new commit...\n", handle);
258 atomic_sub(nblocks, &transaction->t_outstanding_credits);
259 prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
260 TASK_UNINTERRUPTIBLE);
261 tid = transaction->t_tid;
262 need_to_start = !tid_geq(journal->j_commit_request, tid);
263 read_unlock(&journal->j_state_lock);
264 if (need_to_start)
265 jbd2_log_start_commit(journal, tid);
266 schedule();
267 finish_wait(&journal->j_wait_transaction_locked, &wait);
268 goto repeat;
269 }
270
271 /*
272 * The commit code assumes that it can get enough log space
273 * without forcing a checkpoint. This is *critical* for
274 * correctness: a checkpoint of a buffer which is also
275 * associated with a committing transaction creates a deadlock,
276 * so commit simply cannot force through checkpoints.
277 *
278 * We must therefore ensure the necessary space in the journal
279 * *before* starting to dirty potentially checkpointed buffers
280 * in the new transaction.
281 *
282 * The worst part is, any transaction currently committing can
283 * reduce the free space arbitrarily. Be careful to account for
284 * those buffers when checkpointing.
285 */
286
287 /*
288 * @@@ AKPM: This seems rather over-defensive. We're giving commit
289 * a _lot_ of headroom: 1/4 of the journal plus the size of
290 * the committing transaction. Really, we only need to give it
291 * committing_transaction->t_outstanding_credits plus "enough" for
292 * the log control blocks.
293 * Also, this test is inconsistent with the matching one in
294 * jbd2_journal_extend().
295 */
296 if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
297 jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
298 atomic_sub(nblocks, &transaction->t_outstanding_credits);
299 read_unlock(&journal->j_state_lock);
300 write_lock(&journal->j_state_lock);
301 if (__jbd2_log_space_left(journal) < jbd_space_needed(journal))
302 __jbd2_log_wait_for_space(journal);
303 write_unlock(&journal->j_state_lock);
304 goto repeat;
305 } 355 }
306 356
307 /* OK, account for the buffers that this operation expects to 357 /* OK, account for the buffers that this operation expects to
@@ -309,15 +359,16 @@ repeat:
309 */ 359 */
310 update_t_max_wait(transaction, ts); 360 update_t_max_wait(transaction, ts);
311 handle->h_transaction = transaction; 361 handle->h_transaction = transaction;
312 handle->h_requested_credits = nblocks; 362 handle->h_requested_credits = blocks;
313 handle->h_start_jiffies = jiffies; 363 handle->h_start_jiffies = jiffies;
314 atomic_inc(&transaction->t_updates); 364 atomic_inc(&transaction->t_updates);
315 atomic_inc(&transaction->t_handle_count); 365 atomic_inc(&transaction->t_handle_count);
316 jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", 366 jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n",
317 handle, nblocks, 367 handle, blocks,
318 atomic_read(&transaction->t_outstanding_credits), 368 atomic_read(&transaction->t_outstanding_credits),
319 __jbd2_log_space_left(journal)); 369 jbd2_log_space_left(journal));
320 read_unlock(&journal->j_state_lock); 370 read_unlock(&journal->j_state_lock);
371 current->journal_info = handle;
321 372
322 lock_map_acquire(&handle->h_lockdep_map); 373 lock_map_acquire(&handle->h_lockdep_map);
323 jbd2_journal_free_transaction(new_transaction); 374 jbd2_journal_free_transaction(new_transaction);
@@ -348,16 +399,21 @@ static handle_t *new_handle(int nblocks)
348 * 399 *
349 * We make sure that the transaction can guarantee at least nblocks of 400 * We make sure that the transaction can guarantee at least nblocks of
350 * modified buffers in the log. We block until the log can guarantee 401 * modified buffers in the log. We block until the log can guarantee
351 * that much space. 402 * that much space. Additionally, if rsv_blocks > 0, we also create another
352 * 403 * handle with rsv_blocks reserved blocks in the journal. This handle is
353 * This function is visible to journal users (like ext3fs), so is not 404 * is stored in h_rsv_handle. It is not attached to any particular transaction
354 * called with the journal already locked. 405 * and thus doesn't block transaction commit. If the caller uses this reserved
406 * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop()
407 * on the parent handle will dispose the reserved one. Reserved handle has to
408 * be converted to a normal handle using jbd2_journal_start_reserved() before
409 * it can be used.
355 * 410 *
356 * Return a pointer to a newly allocated handle, or an ERR_PTR() value 411 * Return a pointer to a newly allocated handle, or an ERR_PTR() value
357 * on failure. 412 * on failure.
358 */ 413 */
359handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask, 414handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
360 unsigned int type, unsigned int line_no) 415 gfp_t gfp_mask, unsigned int type,
416 unsigned int line_no)
361{ 417{
362 handle_t *handle = journal_current_handle(); 418 handle_t *handle = journal_current_handle();
363 int err; 419 int err;
@@ -374,13 +430,24 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask,
374 handle = new_handle(nblocks); 430 handle = new_handle(nblocks);
375 if (!handle) 431 if (!handle)
376 return ERR_PTR(-ENOMEM); 432 return ERR_PTR(-ENOMEM);
433 if (rsv_blocks) {
434 handle_t *rsv_handle;
377 435
378 current->journal_info = handle; 436 rsv_handle = new_handle(rsv_blocks);
437 if (!rsv_handle) {
438 jbd2_free_handle(handle);
439 return ERR_PTR(-ENOMEM);
440 }
441 rsv_handle->h_reserved = 1;
442 rsv_handle->h_journal = journal;
443 handle->h_rsv_handle = rsv_handle;
444 }
379 445
380 err = start_this_handle(journal, handle, gfp_mask); 446 err = start_this_handle(journal, handle, gfp_mask);
381 if (err < 0) { 447 if (err < 0) {
448 if (handle->h_rsv_handle)
449 jbd2_free_handle(handle->h_rsv_handle);
382 jbd2_free_handle(handle); 450 jbd2_free_handle(handle);
383 current->journal_info = NULL;
384 return ERR_PTR(err); 451 return ERR_PTR(err);
385 } 452 }
386 handle->h_type = type; 453 handle->h_type = type;
@@ -395,10 +462,65 @@ EXPORT_SYMBOL(jbd2__journal_start);
395 462
396handle_t *jbd2_journal_start(journal_t *journal, int nblocks) 463handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
397{ 464{
398 return jbd2__journal_start(journal, nblocks, GFP_NOFS, 0, 0); 465 return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0);
399} 466}
400EXPORT_SYMBOL(jbd2_journal_start); 467EXPORT_SYMBOL(jbd2_journal_start);
401 468
469void jbd2_journal_free_reserved(handle_t *handle)
470{
471 journal_t *journal = handle->h_journal;
472
473 WARN_ON(!handle->h_reserved);
474 sub_reserved_credits(journal, handle->h_buffer_credits);
475 jbd2_free_handle(handle);
476}
477EXPORT_SYMBOL(jbd2_journal_free_reserved);
478
479/**
480 * int jbd2_journal_start_reserved(handle_t *handle) - start reserved handle
481 * @handle: handle to start
482 *
483 * Start handle that has been previously reserved with jbd2_journal_reserve().
484 * This attaches @handle to the running transaction (or creates one if there's
485 * not transaction running). Unlike jbd2_journal_start() this function cannot
486 * block on journal commit, checkpointing, or similar stuff. It can block on
487 * memory allocation or frozen journal though.
488 *
489 * Return 0 on success, non-zero on error - handle is freed in that case.
490 */
491int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
492 unsigned int line_no)
493{
494 journal_t *journal = handle->h_journal;
495 int ret = -EIO;
496
497 if (WARN_ON(!handle->h_reserved)) {
498 /* Someone passed in normal handle? Just stop it. */
499 jbd2_journal_stop(handle);
500 return ret;
501 }
502 /*
503 * Usefulness of mixing of reserved and unreserved handles is
504 * questionable. So far nobody seems to need it so just error out.
505 */
506 if (WARN_ON(current->journal_info)) {
507 jbd2_journal_free_reserved(handle);
508 return ret;
509 }
510
511 handle->h_journal = NULL;
512 /*
513 * GFP_NOFS is here because callers are likely from writeback or
514 * similarly constrained call sites
515 */
516 ret = start_this_handle(journal, handle, GFP_NOFS);
517 if (ret < 0)
518 jbd2_journal_free_reserved(handle);
519 handle->h_type = type;
520 handle->h_line_no = line_no;
521 return ret;
522}
523EXPORT_SYMBOL(jbd2_journal_start_reserved);
402 524
403/** 525/**
404 * int jbd2_journal_extend() - extend buffer credits. 526 * int jbd2_journal_extend() - extend buffer credits.
@@ -423,49 +545,53 @@ EXPORT_SYMBOL(jbd2_journal_start);
423int jbd2_journal_extend(handle_t *handle, int nblocks) 545int jbd2_journal_extend(handle_t *handle, int nblocks)
424{ 546{
425 transaction_t *transaction = handle->h_transaction; 547 transaction_t *transaction = handle->h_transaction;
426 journal_t *journal = transaction->t_journal; 548 journal_t *journal;
427 int result; 549 int result;
428 int wanted; 550 int wanted;
429 551
430 result = -EIO; 552 WARN_ON(!transaction);
431 if (is_handle_aborted(handle)) 553 if (is_handle_aborted(handle))
432 goto out; 554 return -EROFS;
555 journal = transaction->t_journal;
433 556
434 result = 1; 557 result = 1;
435 558
436 read_lock(&journal->j_state_lock); 559 read_lock(&journal->j_state_lock);
437 560
438 /* Don't extend a locked-down transaction! */ 561 /* Don't extend a locked-down transaction! */
439 if (handle->h_transaction->t_state != T_RUNNING) { 562 if (transaction->t_state != T_RUNNING) {
440 jbd_debug(3, "denied handle %p %d blocks: " 563 jbd_debug(3, "denied handle %p %d blocks: "
441 "transaction not running\n", handle, nblocks); 564 "transaction not running\n", handle, nblocks);
442 goto error_out; 565 goto error_out;
443 } 566 }
444 567
445 spin_lock(&transaction->t_handle_lock); 568 spin_lock(&transaction->t_handle_lock);
446 wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks; 569 wanted = atomic_add_return(nblocks,
570 &transaction->t_outstanding_credits);
447 571
448 if (wanted > journal->j_max_transaction_buffers) { 572 if (wanted > journal->j_max_transaction_buffers) {
449 jbd_debug(3, "denied handle %p %d blocks: " 573 jbd_debug(3, "denied handle %p %d blocks: "
450 "transaction too large\n", handle, nblocks); 574 "transaction too large\n", handle, nblocks);
575 atomic_sub(nblocks, &transaction->t_outstanding_credits);
451 goto unlock; 576 goto unlock;
452 } 577 }
453 578
454 if (wanted > __jbd2_log_space_left(journal)) { 579 if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) >
580 jbd2_log_space_left(journal)) {
455 jbd_debug(3, "denied handle %p %d blocks: " 581 jbd_debug(3, "denied handle %p %d blocks: "
456 "insufficient log space\n", handle, nblocks); 582 "insufficient log space\n", handle, nblocks);
583 atomic_sub(nblocks, &transaction->t_outstanding_credits);
457 goto unlock; 584 goto unlock;
458 } 585 }
459 586
460 trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, 587 trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
461 handle->h_transaction->t_tid, 588 transaction->t_tid,
462 handle->h_type, handle->h_line_no, 589 handle->h_type, handle->h_line_no,
463 handle->h_buffer_credits, 590 handle->h_buffer_credits,
464 nblocks); 591 nblocks);
465 592
466 handle->h_buffer_credits += nblocks; 593 handle->h_buffer_credits += nblocks;
467 handle->h_requested_credits += nblocks; 594 handle->h_requested_credits += nblocks;
468 atomic_add(nblocks, &transaction->t_outstanding_credits);
469 result = 0; 595 result = 0;
470 596
471 jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); 597 jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
@@ -473,7 +599,6 @@ unlock:
473 spin_unlock(&transaction->t_handle_lock); 599 spin_unlock(&transaction->t_handle_lock);
474error_out: 600error_out:
475 read_unlock(&journal->j_state_lock); 601 read_unlock(&journal->j_state_lock);
476out:
477 return result; 602 return result;
478} 603}
479 604
@@ -490,19 +615,22 @@ out:
490 * to a running handle, a call to jbd2_journal_restart will commit the 615 * to a running handle, a call to jbd2_journal_restart will commit the
491 * handle's transaction so far and reattach the handle to a new 616 * handle's transaction so far and reattach the handle to a new
492 * transaction capabable of guaranteeing the requested number of 617 * transaction capabable of guaranteeing the requested number of
493 * credits. 618 * credits. We preserve reserved handle if there's any attached to the
619 * passed in handle.
494 */ 620 */
495int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) 621int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
496{ 622{
497 transaction_t *transaction = handle->h_transaction; 623 transaction_t *transaction = handle->h_transaction;
498 journal_t *journal = transaction->t_journal; 624 journal_t *journal;
499 tid_t tid; 625 tid_t tid;
500 int need_to_start, ret; 626 int need_to_start, ret;
501 627
628 WARN_ON(!transaction);
502 /* If we've had an abort of any type, don't even think about 629 /* If we've had an abort of any type, don't even think about
503 * actually doing the restart! */ 630 * actually doing the restart! */
504 if (is_handle_aborted(handle)) 631 if (is_handle_aborted(handle))
505 return 0; 632 return 0;
633 journal = transaction->t_journal;
506 634
507 /* 635 /*
508 * First unlink the handle from its current transaction, and start the 636 * First unlink the handle from its current transaction, and start the
@@ -515,12 +643,18 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
515 spin_lock(&transaction->t_handle_lock); 643 spin_lock(&transaction->t_handle_lock);
516 atomic_sub(handle->h_buffer_credits, 644 atomic_sub(handle->h_buffer_credits,
517 &transaction->t_outstanding_credits); 645 &transaction->t_outstanding_credits);
646 if (handle->h_rsv_handle) {
647 sub_reserved_credits(journal,
648 handle->h_rsv_handle->h_buffer_credits);
649 }
518 if (atomic_dec_and_test(&transaction->t_updates)) 650 if (atomic_dec_and_test(&transaction->t_updates))
519 wake_up(&journal->j_wait_updates); 651 wake_up(&journal->j_wait_updates);
652 tid = transaction->t_tid;
520 spin_unlock(&transaction->t_handle_lock); 653 spin_unlock(&transaction->t_handle_lock);
654 handle->h_transaction = NULL;
655 current->journal_info = NULL;
521 656
522 jbd_debug(2, "restarting handle %p\n", handle); 657 jbd_debug(2, "restarting handle %p\n", handle);
523 tid = transaction->t_tid;
524 need_to_start = !tid_geq(journal->j_commit_request, tid); 658 need_to_start = !tid_geq(journal->j_commit_request, tid);
525 read_unlock(&journal->j_state_lock); 659 read_unlock(&journal->j_state_lock);
526 if (need_to_start) 660 if (need_to_start)
@@ -557,6 +691,14 @@ void jbd2_journal_lock_updates(journal_t *journal)
557 write_lock(&journal->j_state_lock); 691 write_lock(&journal->j_state_lock);
558 ++journal->j_barrier_count; 692 ++journal->j_barrier_count;
559 693
694 /* Wait until there are no reserved handles */
695 if (atomic_read(&journal->j_reserved_credits)) {
696 write_unlock(&journal->j_state_lock);
697 wait_event(journal->j_wait_reserved,
698 atomic_read(&journal->j_reserved_credits) == 0);
699 write_lock(&journal->j_state_lock);
700 }
701
560 /* Wait until there are no running updates */ 702 /* Wait until there are no running updates */
561 while (1) { 703 while (1) {
562 transaction_t *transaction = journal->j_running_transaction; 704 transaction_t *transaction = journal->j_running_transaction;
@@ -619,6 +761,12 @@ static void warn_dirty_buffer(struct buffer_head *bh)
619 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); 761 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
620} 762}
621 763
764static int sleep_on_shadow_bh(void *word)
765{
766 io_schedule();
767 return 0;
768}
769
622/* 770/*
623 * If the buffer is already part of the current transaction, then there 771 * If the buffer is already part of the current transaction, then there
624 * is nothing we need to do. If it is already part of a prior 772 * is nothing we need to do. If it is already part of a prior
@@ -634,17 +782,16 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
634 int force_copy) 782 int force_copy)
635{ 783{
636 struct buffer_head *bh; 784 struct buffer_head *bh;
637 transaction_t *transaction; 785 transaction_t *transaction = handle->h_transaction;
638 journal_t *journal; 786 journal_t *journal;
639 int error; 787 int error;
640 char *frozen_buffer = NULL; 788 char *frozen_buffer = NULL;
641 int need_copy = 0; 789 int need_copy = 0;
642 unsigned long start_lock, time_lock; 790 unsigned long start_lock, time_lock;
643 791
792 WARN_ON(!transaction);
644 if (is_handle_aborted(handle)) 793 if (is_handle_aborted(handle))
645 return -EROFS; 794 return -EROFS;
646
647 transaction = handle->h_transaction;
648 journal = transaction->t_journal; 795 journal = transaction->t_journal;
649 796
650 jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); 797 jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
@@ -754,41 +901,29 @@ repeat:
754 * journaled. If the primary copy is already going to 901 * journaled. If the primary copy is already going to
755 * disk then we cannot do copy-out here. */ 902 * disk then we cannot do copy-out here. */
756 903
757 if (jh->b_jlist == BJ_Shadow) { 904 if (buffer_shadow(bh)) {
758 DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow);
759 wait_queue_head_t *wqh;
760
761 wqh = bit_waitqueue(&bh->b_state, BH_Unshadow);
762
763 JBUFFER_TRACE(jh, "on shadow: sleep"); 905 JBUFFER_TRACE(jh, "on shadow: sleep");
764 jbd_unlock_bh_state(bh); 906 jbd_unlock_bh_state(bh);
765 /* commit wakes up all shadow buffers after IO */ 907 wait_on_bit(&bh->b_state, BH_Shadow,
766 for ( ; ; ) { 908 sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE);
767 prepare_to_wait(wqh, &wait.wait,
768 TASK_UNINTERRUPTIBLE);
769 if (jh->b_jlist != BJ_Shadow)
770 break;
771 schedule();
772 }
773 finish_wait(wqh, &wait.wait);
774 goto repeat; 909 goto repeat;
775 } 910 }
776 911
777 /* Only do the copy if the currently-owning transaction 912 /*
778 * still needs it. If it is on the Forget list, the 913 * Only do the copy if the currently-owning transaction still
779 * committing transaction is past that stage. The 914 * needs it. If buffer isn't on BJ_Metadata list, the
780 * buffer had better remain locked during the kmalloc, 915 * committing transaction is past that stage (here we use the
781 * but that should be true --- we hold the journal lock 916 * fact that BH_Shadow is set under bh_state lock together with
782 * still and the buffer is already on the BUF_JOURNAL 917 * refiling to BJ_Shadow list and at this point we know the
783 * list so won't be flushed. 918 * buffer doesn't have BH_Shadow set).
784 * 919 *
785 * Subtle point, though: if this is a get_undo_access, 920 * Subtle point, though: if this is a get_undo_access,
786 * then we will be relying on the frozen_data to contain 921 * then we will be relying on the frozen_data to contain
787 * the new value of the committed_data record after the 922 * the new value of the committed_data record after the
788 * transaction, so we HAVE to force the frozen_data copy 923 * transaction, so we HAVE to force the frozen_data copy
789 * in that case. */ 924 * in that case.
790 925 */
791 if (jh->b_jlist != BJ_Forget || force_copy) { 926 if (jh->b_jlist == BJ_Metadata || force_copy) {
792 JBUFFER_TRACE(jh, "generate frozen data"); 927 JBUFFER_TRACE(jh, "generate frozen data");
793 if (!frozen_buffer) { 928 if (!frozen_buffer) {
794 JBUFFER_TRACE(jh, "allocate memory for buffer"); 929 JBUFFER_TRACE(jh, "allocate memory for buffer");
@@ -915,14 +1050,16 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
915int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) 1050int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
916{ 1051{
917 transaction_t *transaction = handle->h_transaction; 1052 transaction_t *transaction = handle->h_transaction;
918 journal_t *journal = transaction->t_journal; 1053 journal_t *journal;
919 struct journal_head *jh = jbd2_journal_add_journal_head(bh); 1054 struct journal_head *jh = jbd2_journal_add_journal_head(bh);
920 int err; 1055 int err;
921 1056
922 jbd_debug(5, "journal_head %p\n", jh); 1057 jbd_debug(5, "journal_head %p\n", jh);
1058 WARN_ON(!transaction);
923 err = -EROFS; 1059 err = -EROFS;
924 if (is_handle_aborted(handle)) 1060 if (is_handle_aborted(handle))
925 goto out; 1061 goto out;
1062 journal = transaction->t_journal;
926 err = 0; 1063 err = 0;
927 1064
928 JBUFFER_TRACE(jh, "entry"); 1065 JBUFFER_TRACE(jh, "entry");
@@ -1128,12 +1265,14 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
1128int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) 1265int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1129{ 1266{
1130 transaction_t *transaction = handle->h_transaction; 1267 transaction_t *transaction = handle->h_transaction;
1131 journal_t *journal = transaction->t_journal; 1268 journal_t *journal;
1132 struct journal_head *jh; 1269 struct journal_head *jh;
1133 int ret = 0; 1270 int ret = 0;
1134 1271
1272 WARN_ON(!transaction);
1135 if (is_handle_aborted(handle)) 1273 if (is_handle_aborted(handle))
1136 goto out; 1274 return -EROFS;
1275 journal = transaction->t_journal;
1137 jh = jbd2_journal_grab_journal_head(bh); 1276 jh = jbd2_journal_grab_journal_head(bh);
1138 if (!jh) { 1277 if (!jh) {
1139 ret = -EUCLEAN; 1278 ret = -EUCLEAN;
@@ -1227,7 +1366,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1227 1366
1228 JBUFFER_TRACE(jh, "file as BJ_Metadata"); 1367 JBUFFER_TRACE(jh, "file as BJ_Metadata");
1229 spin_lock(&journal->j_list_lock); 1368 spin_lock(&journal->j_list_lock);
1230 __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); 1369 __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
1231 spin_unlock(&journal->j_list_lock); 1370 spin_unlock(&journal->j_list_lock);
1232out_unlock_bh: 1371out_unlock_bh:
1233 jbd_unlock_bh_state(bh); 1372 jbd_unlock_bh_state(bh);
@@ -1258,12 +1397,17 @@ out:
1258int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) 1397int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1259{ 1398{
1260 transaction_t *transaction = handle->h_transaction; 1399 transaction_t *transaction = handle->h_transaction;
1261 journal_t *journal = transaction->t_journal; 1400 journal_t *journal;
1262 struct journal_head *jh; 1401 struct journal_head *jh;
1263 int drop_reserve = 0; 1402 int drop_reserve = 0;
1264 int err = 0; 1403 int err = 0;
1265 int was_modified = 0; 1404 int was_modified = 0;
1266 1405
1406 WARN_ON(!transaction);
1407 if (is_handle_aborted(handle))
1408 return -EROFS;
1409 journal = transaction->t_journal;
1410
1267 BUFFER_TRACE(bh, "entry"); 1411 BUFFER_TRACE(bh, "entry");
1268 1412
1269 jbd_lock_bh_state(bh); 1413 jbd_lock_bh_state(bh);
@@ -1290,7 +1434,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1290 */ 1434 */
1291 jh->b_modified = 0; 1435 jh->b_modified = 0;
1292 1436
1293 if (jh->b_transaction == handle->h_transaction) { 1437 if (jh->b_transaction == transaction) {
1294 J_ASSERT_JH(jh, !jh->b_frozen_data); 1438 J_ASSERT_JH(jh, !jh->b_frozen_data);
1295 1439
1296 /* If we are forgetting a buffer which is already part 1440 /* If we are forgetting a buffer which is already part
@@ -1385,19 +1529,21 @@ drop:
1385int jbd2_journal_stop(handle_t *handle) 1529int jbd2_journal_stop(handle_t *handle)
1386{ 1530{
1387 transaction_t *transaction = handle->h_transaction; 1531 transaction_t *transaction = handle->h_transaction;
1388 journal_t *journal = transaction->t_journal; 1532 journal_t *journal;
1389 int err, wait_for_commit = 0; 1533 int err = 0, wait_for_commit = 0;
1390 tid_t tid; 1534 tid_t tid;
1391 pid_t pid; 1535 pid_t pid;
1392 1536
1537 if (!transaction)
1538 goto free_and_exit;
1539 journal = transaction->t_journal;
1540
1393 J_ASSERT(journal_current_handle() == handle); 1541 J_ASSERT(journal_current_handle() == handle);
1394 1542
1395 if (is_handle_aborted(handle)) 1543 if (is_handle_aborted(handle))
1396 err = -EIO; 1544 err = -EIO;
1397 else { 1545 else
1398 J_ASSERT(atomic_read(&transaction->t_updates) > 0); 1546 J_ASSERT(atomic_read(&transaction->t_updates) > 0);
1399 err = 0;
1400 }
1401 1547
1402 if (--handle->h_ref > 0) { 1548 if (--handle->h_ref > 0) {
1403 jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, 1549 jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
@@ -1407,7 +1553,7 @@ int jbd2_journal_stop(handle_t *handle)
1407 1553
1408 jbd_debug(4, "Handle %p going down\n", handle); 1554 jbd_debug(4, "Handle %p going down\n", handle);
1409 trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev, 1555 trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
1410 handle->h_transaction->t_tid, 1556 transaction->t_tid,
1411 handle->h_type, handle->h_line_no, 1557 handle->h_type, handle->h_line_no,
1412 jiffies - handle->h_start_jiffies, 1558 jiffies - handle->h_start_jiffies,
1413 handle->h_sync, handle->h_requested_credits, 1559 handle->h_sync, handle->h_requested_credits,
@@ -1518,33 +1664,13 @@ int jbd2_journal_stop(handle_t *handle)
1518 1664
1519 lock_map_release(&handle->h_lockdep_map); 1665 lock_map_release(&handle->h_lockdep_map);
1520 1666
1667 if (handle->h_rsv_handle)
1668 jbd2_journal_free_reserved(handle->h_rsv_handle);
1669free_and_exit:
1521 jbd2_free_handle(handle); 1670 jbd2_free_handle(handle);
1522 return err; 1671 return err;
1523} 1672}
1524 1673
1525/**
1526 * int jbd2_journal_force_commit() - force any uncommitted transactions
1527 * @journal: journal to force
1528 *
1529 * For synchronous operations: force any uncommitted transactions
1530 * to disk. May seem kludgy, but it reuses all the handle batching
1531 * code in a very simple manner.
1532 */
1533int jbd2_journal_force_commit(journal_t *journal)
1534{
1535 handle_t *handle;
1536 int ret;
1537
1538 handle = jbd2_journal_start(journal, 1);
1539 if (IS_ERR(handle)) {
1540 ret = PTR_ERR(handle);
1541 } else {
1542 handle->h_sync = 1;
1543 ret = jbd2_journal_stop(handle);
1544 }
1545 return ret;
1546}
1547
1548/* 1674/*
1549 * 1675 *
1550 * List management code snippets: various functions for manipulating the 1676 * List management code snippets: various functions for manipulating the
@@ -1601,10 +1727,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1601 * Remove a buffer from the appropriate transaction list. 1727 * Remove a buffer from the appropriate transaction list.
1602 * 1728 *
1603 * Note that this function can *change* the value of 1729 * Note that this function can *change* the value of
1604 * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, 1730 * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or
1605 * t_log_list or t_reserved_list. If the caller is holding onto a copy of one 1731 * t_reserved_list. If the caller is holding onto a copy of one of these
1606 * of these pointers, it could go bad. Generally the caller needs to re-read 1732 * pointers, it could go bad. Generally the caller needs to re-read the
1607 * the pointer from the transaction_t. 1733 * pointer from the transaction_t.
1608 * 1734 *
1609 * Called under j_list_lock. 1735 * Called under j_list_lock.
1610 */ 1736 */
@@ -1634,15 +1760,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1634 case BJ_Forget: 1760 case BJ_Forget:
1635 list = &transaction->t_forget; 1761 list = &transaction->t_forget;
1636 break; 1762 break;
1637 case BJ_IO:
1638 list = &transaction->t_iobuf_list;
1639 break;
1640 case BJ_Shadow: 1763 case BJ_Shadow:
1641 list = &transaction->t_shadow_list; 1764 list = &transaction->t_shadow_list;
1642 break; 1765 break;
1643 case BJ_LogCtl:
1644 list = &transaction->t_log_list;
1645 break;
1646 case BJ_Reserved: 1766 case BJ_Reserved:
1647 list = &transaction->t_reserved_list; 1767 list = &transaction->t_reserved_list;
1648 break; 1768 break;
@@ -2034,18 +2154,23 @@ zap_buffer_unlocked:
2034 * void jbd2_journal_invalidatepage() 2154 * void jbd2_journal_invalidatepage()
2035 * @journal: journal to use for flush... 2155 * @journal: journal to use for flush...
2036 * @page: page to flush 2156 * @page: page to flush
2037 * @offset: length of page to invalidate. 2157 * @offset: start of the range to invalidate
2158 * @length: length of the range to invalidate
2038 * 2159 *
2039 * Reap page buffers containing data after offset in page. Can return -EBUSY 2160 * Reap page buffers containing data after in the specified range in page.
2040 * if buffers are part of the committing transaction and the page is straddling 2161 * Can return -EBUSY if buffers are part of the committing transaction and
2041 * i_size. Caller then has to wait for current commit and try again. 2162 * the page is straddling i_size. Caller then has to wait for current commit
2163 * and try again.
2042 */ 2164 */
2043int jbd2_journal_invalidatepage(journal_t *journal, 2165int jbd2_journal_invalidatepage(journal_t *journal,
2044 struct page *page, 2166 struct page *page,
2045 unsigned long offset) 2167 unsigned int offset,
2168 unsigned int length)
2046{ 2169{
2047 struct buffer_head *head, *bh, *next; 2170 struct buffer_head *head, *bh, *next;
2171 unsigned int stop = offset + length;
2048 unsigned int curr_off = 0; 2172 unsigned int curr_off = 0;
2173 int partial_page = (offset || length < PAGE_CACHE_SIZE);
2049 int may_free = 1; 2174 int may_free = 1;
2050 int ret = 0; 2175 int ret = 0;
2051 2176
@@ -2054,6 +2179,8 @@ int jbd2_journal_invalidatepage(journal_t *journal,
2054 if (!page_has_buffers(page)) 2179 if (!page_has_buffers(page))
2055 return 0; 2180 return 0;
2056 2181
2182 BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
2183
2057 /* We will potentially be playing with lists other than just the 2184 /* We will potentially be playing with lists other than just the
2058 * data lists (especially for journaled data mode), so be 2185 * data lists (especially for journaled data mode), so be
2059 * cautious in our locking. */ 2186 * cautious in our locking. */
@@ -2063,10 +2190,13 @@ int jbd2_journal_invalidatepage(journal_t *journal,
2063 unsigned int next_off = curr_off + bh->b_size; 2190 unsigned int next_off = curr_off + bh->b_size;
2064 next = bh->b_this_page; 2191 next = bh->b_this_page;
2065 2192
2193 if (next_off > stop)
2194 return 0;
2195
2066 if (offset <= curr_off) { 2196 if (offset <= curr_off) {
2067 /* This block is wholly outside the truncation point */ 2197 /* This block is wholly outside the truncation point */
2068 lock_buffer(bh); 2198 lock_buffer(bh);
2069 ret = journal_unmap_buffer(journal, bh, offset > 0); 2199 ret = journal_unmap_buffer(journal, bh, partial_page);
2070 unlock_buffer(bh); 2200 unlock_buffer(bh);
2071 if (ret < 0) 2201 if (ret < 0)
2072 return ret; 2202 return ret;
@@ -2077,7 +2207,7 @@ int jbd2_journal_invalidatepage(journal_t *journal,
2077 2207
2078 } while (bh != head); 2208 } while (bh != head);
2079 2209
2080 if (!offset) { 2210 if (!partial_page) {
2081 if (may_free && try_to_free_buffers(page)) 2211 if (may_free && try_to_free_buffers(page))
2082 J_ASSERT(!page_has_buffers(page)); 2212 J_ASSERT(!page_has_buffers(page));
2083 } 2213 }
@@ -2138,15 +2268,9 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
2138 case BJ_Forget: 2268 case BJ_Forget:
2139 list = &transaction->t_forget; 2269 list = &transaction->t_forget;
2140 break; 2270 break;
2141 case BJ_IO:
2142 list = &transaction->t_iobuf_list;
2143 break;
2144 case BJ_Shadow: 2271 case BJ_Shadow:
2145 list = &transaction->t_shadow_list; 2272 list = &transaction->t_shadow_list;
2146 break; 2273 break;
2147 case BJ_LogCtl:
2148 list = &transaction->t_log_list;
2149 break;
2150 case BJ_Reserved: 2274 case BJ_Reserved:
2151 list = &transaction->t_reserved_list; 2275 list = &transaction->t_reserved_list;
2152 break; 2276 break;
@@ -2248,10 +2372,12 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
2248int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) 2372int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
2249{ 2373{
2250 transaction_t *transaction = handle->h_transaction; 2374 transaction_t *transaction = handle->h_transaction;
2251 journal_t *journal = transaction->t_journal; 2375 journal_t *journal;
2252 2376
2377 WARN_ON(!transaction);
2253 if (is_handle_aborted(handle)) 2378 if (is_handle_aborted(handle))
2254 return -EIO; 2379 return -EROFS;
2380 journal = transaction->t_journal;
2255 2381
2256 jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, 2382 jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
2257 transaction->t_tid); 2383 transaction->t_tid);