diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2013-11-14 20:38:05 -0500 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2013-11-14 20:38:05 -0500 |
commit | 42249094f79422fbf5ed4b54eeb48ff096809b8f (patch) | |
tree | 91e6850c8c7e8cc284cf8bb6363f8662f84011f4 /fs/jbd2 | |
parent | 936816161978ca716a56c5e553c68f25972b1e3a (diff) | |
parent | 2c027b7c48a888ab173ba45babb4525e278375d9 (diff) |
Merge branch 'next' into for-linus
Merge first round of changes for 3.13 merge window.
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/Kconfig | 6 | ||||
-rw-r--r-- | fs/jbd2/checkpoint.c | 22 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 186 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 171 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 31 | ||||
-rw-r--r-- | fs/jbd2/revoke.c | 49 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 526 |
7 files changed, 552 insertions, 439 deletions
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig index 69a48c2944da..5a9f5534d57b 100644 --- a/fs/jbd2/Kconfig +++ b/fs/jbd2/Kconfig | |||
@@ -20,7 +20,7 @@ config JBD2 | |||
20 | 20 | ||
21 | config JBD2_DEBUG | 21 | config JBD2_DEBUG |
22 | bool "JBD2 (ext4) debugging support" | 22 | bool "JBD2 (ext4) debugging support" |
23 | depends on JBD2 && DEBUG_FS | 23 | depends on JBD2 |
24 | help | 24 | help |
25 | If you are using the ext4 journaled file system (or | 25 | If you are using the ext4 journaled file system (or |
26 | potentially any other filesystem/device using JBD2), this option | 26 | potentially any other filesystem/device using JBD2), this option |
@@ -29,7 +29,7 @@ config JBD2_DEBUG | |||
29 | By default, the debugging output will be turned off. | 29 | By default, the debugging output will be turned off. |
30 | 30 | ||
31 | If you select Y here, then you will be able to turn on debugging | 31 | If you select Y here, then you will be able to turn on debugging |
32 | with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a | 32 | with "echo N > /sys/module/jbd2/parameters/jbd2_debug", where N is a |
33 | number between 1 and 5. The higher the number, the more debugging | 33 | number between 1 and 5. The higher the number, the more debugging |
34 | output is generated. To turn debugging off again, do | 34 | output is generated. To turn debugging off again, do |
35 | "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug". | 35 | "echo 0 > /sys/module/jbd2/parameters/jbd2_debug". |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index c78841ee81cf..7f34f4716165 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -120,8 +120,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
120 | int nblocks, space_left; | 120 | int nblocks, space_left; |
121 | /* assert_spin_locked(&journal->j_state_lock); */ | 121 | /* assert_spin_locked(&journal->j_state_lock); */ |
122 | 122 | ||
123 | nblocks = jbd_space_needed(journal); | 123 | nblocks = jbd2_space_needed(journal); |
124 | while (__jbd2_log_space_left(journal) < nblocks) { | 124 | while (jbd2_log_space_left(journal) < nblocks) { |
125 | if (journal->j_flags & JBD2_ABORT) | 125 | if (journal->j_flags & JBD2_ABORT) |
126 | return; | 126 | return; |
127 | write_unlock(&journal->j_state_lock); | 127 | write_unlock(&journal->j_state_lock); |
@@ -140,8 +140,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
140 | */ | 140 | */ |
141 | write_lock(&journal->j_state_lock); | 141 | write_lock(&journal->j_state_lock); |
142 | spin_lock(&journal->j_list_lock); | 142 | spin_lock(&journal->j_list_lock); |
143 | nblocks = jbd_space_needed(journal); | 143 | nblocks = jbd2_space_needed(journal); |
144 | space_left = __jbd2_log_space_left(journal); | 144 | space_left = jbd2_log_space_left(journal); |
145 | if (space_left < nblocks) { | 145 | if (space_left < nblocks) { |
146 | int chkpt = journal->j_checkpoint_transactions != NULL; | 146 | int chkpt = journal->j_checkpoint_transactions != NULL; |
147 | tid_t tid = 0; | 147 | tid_t tid = 0; |
@@ -156,7 +156,15 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
156 | /* We were able to recover space; yay! */ | 156 | /* We were able to recover space; yay! */ |
157 | ; | 157 | ; |
158 | } else if (tid) { | 158 | } else if (tid) { |
159 | /* | ||
160 | * jbd2_journal_commit_transaction() may want | ||
161 | * to take the checkpoint_mutex if JBD2_FLUSHED | ||
162 | * is set. So we need to temporarily drop it. | ||
163 | */ | ||
164 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
159 | jbd2_log_wait_commit(journal, tid); | 165 | jbd2_log_wait_commit(journal, tid); |
166 | write_lock(&journal->j_state_lock); | ||
167 | continue; | ||
160 | } else { | 168 | } else { |
161 | printk(KERN_ERR "%s: needed %d blocks and " | 169 | printk(KERN_ERR "%s: needed %d blocks and " |
162 | "only had %d space available\n", | 170 | "only had %d space available\n", |
@@ -625,10 +633,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
625 | 633 | ||
626 | __jbd2_journal_drop_transaction(journal, transaction); | 634 | __jbd2_journal_drop_transaction(journal, transaction); |
627 | jbd2_journal_free_transaction(transaction); | 635 | jbd2_journal_free_transaction(transaction); |
628 | |||
629 | /* Just in case anybody was waiting for more transactions to be | ||
630 | checkpointed... */ | ||
631 | wake_up(&journal->j_wait_logspace); | ||
632 | ret = 1; | 636 | ret = 1; |
633 | out: | 637 | out: |
634 | return ret; | 638 | return ret; |
@@ -690,9 +694,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact | |||
690 | J_ASSERT(transaction->t_state == T_FINISHED); | 694 | J_ASSERT(transaction->t_state == T_FINISHED); |
691 | J_ASSERT(transaction->t_buffers == NULL); | 695 | J_ASSERT(transaction->t_buffers == NULL); |
692 | J_ASSERT(transaction->t_forget == NULL); | 696 | J_ASSERT(transaction->t_forget == NULL); |
693 | J_ASSERT(transaction->t_iobuf_list == NULL); | ||
694 | J_ASSERT(transaction->t_shadow_list == NULL); | 697 | J_ASSERT(transaction->t_shadow_list == NULL); |
695 | J_ASSERT(transaction->t_log_list == NULL); | ||
696 | J_ASSERT(transaction->t_checkpoint_list == NULL); | 698 | J_ASSERT(transaction->t_checkpoint_list == NULL); |
697 | J_ASSERT(transaction->t_checkpoint_io_list == NULL); | 699 | J_ASSERT(transaction->t_checkpoint_io_list == NULL); |
698 | J_ASSERT(atomic_read(&transaction->t_updates) == 0); | 700 | J_ASSERT(atomic_read(&transaction->t_updates) == 0); |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0f53946f13c1..cf2fc0594063 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -30,15 +30,22 @@ | |||
30 | #include <trace/events/jbd2.h> | 30 | #include <trace/events/jbd2.h> |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * Default IO end handler for temporary BJ_IO buffer_heads. | 33 | * IO end handler for temporary buffer_heads handling writes to the journal. |
34 | */ | 34 | */ |
35 | static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) | 35 | static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) |
36 | { | 36 | { |
37 | struct buffer_head *orig_bh = bh->b_private; | ||
38 | |||
37 | BUFFER_TRACE(bh, ""); | 39 | BUFFER_TRACE(bh, ""); |
38 | if (uptodate) | 40 | if (uptodate) |
39 | set_buffer_uptodate(bh); | 41 | set_buffer_uptodate(bh); |
40 | else | 42 | else |
41 | clear_buffer_uptodate(bh); | 43 | clear_buffer_uptodate(bh); |
44 | if (orig_bh) { | ||
45 | clear_bit_unlock(BH_Shadow, &orig_bh->b_state); | ||
46 | smp_mb__after_clear_bit(); | ||
47 | wake_up_bit(&orig_bh->b_state, BH_Shadow); | ||
48 | } | ||
42 | unlock_buffer(bh); | 49 | unlock_buffer(bh); |
43 | } | 50 | } |
44 | 51 | ||
@@ -85,8 +92,7 @@ nope: | |||
85 | __brelse(bh); | 92 | __brelse(bh); |
86 | } | 93 | } |
87 | 94 | ||
88 | static void jbd2_commit_block_csum_set(journal_t *j, | 95 | static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh) |
89 | struct journal_head *descriptor) | ||
90 | { | 96 | { |
91 | struct commit_header *h; | 97 | struct commit_header *h; |
92 | __u32 csum; | 98 | __u32 csum; |
@@ -94,12 +100,11 @@ static void jbd2_commit_block_csum_set(journal_t *j, | |||
94 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 100 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
95 | return; | 101 | return; |
96 | 102 | ||
97 | h = (struct commit_header *)(jh2bh(descriptor)->b_data); | 103 | h = (struct commit_header *)(bh->b_data); |
98 | h->h_chksum_type = 0; | 104 | h->h_chksum_type = 0; |
99 | h->h_chksum_size = 0; | 105 | h->h_chksum_size = 0; |
100 | h->h_chksum[0] = 0; | 106 | h->h_chksum[0] = 0; |
101 | csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, | 107 | csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); |
102 | j->j_blocksize); | ||
103 | h->h_chksum[0] = cpu_to_be32(csum); | 108 | h->h_chksum[0] = cpu_to_be32(csum); |
104 | } | 109 | } |
105 | 110 | ||
@@ -116,7 +121,6 @@ static int journal_submit_commit_record(journal_t *journal, | |||
116 | struct buffer_head **cbh, | 121 | struct buffer_head **cbh, |
117 | __u32 crc32_sum) | 122 | __u32 crc32_sum) |
118 | { | 123 | { |
119 | struct journal_head *descriptor; | ||
120 | struct commit_header *tmp; | 124 | struct commit_header *tmp; |
121 | struct buffer_head *bh; | 125 | struct buffer_head *bh; |
122 | int ret; | 126 | int ret; |
@@ -127,12 +131,10 @@ static int journal_submit_commit_record(journal_t *journal, | |||
127 | if (is_journal_aborted(journal)) | 131 | if (is_journal_aborted(journal)) |
128 | return 0; | 132 | return 0; |
129 | 133 | ||
130 | descriptor = jbd2_journal_get_descriptor_buffer(journal); | 134 | bh = jbd2_journal_get_descriptor_buffer(journal); |
131 | if (!descriptor) | 135 | if (!bh) |
132 | return 1; | 136 | return 1; |
133 | 137 | ||
134 | bh = jh2bh(descriptor); | ||
135 | |||
136 | tmp = (struct commit_header *)bh->b_data; | 138 | tmp = (struct commit_header *)bh->b_data; |
137 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 139 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
138 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); | 140 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); |
@@ -146,9 +148,9 @@ static int journal_submit_commit_record(journal_t *journal, | |||
146 | tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; | 148 | tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; |
147 | tmp->h_chksum[0] = cpu_to_be32(crc32_sum); | 149 | tmp->h_chksum[0] = cpu_to_be32(crc32_sum); |
148 | } | 150 | } |
149 | jbd2_commit_block_csum_set(journal, descriptor); | 151 | jbd2_commit_block_csum_set(journal, bh); |
150 | 152 | ||
151 | JBUFFER_TRACE(descriptor, "submit commit block"); | 153 | BUFFER_TRACE(bh, "submit commit block"); |
152 | lock_buffer(bh); | 154 | lock_buffer(bh); |
153 | clear_buffer_dirty(bh); | 155 | clear_buffer_dirty(bh); |
154 | set_buffer_uptodate(bh); | 156 | set_buffer_uptodate(bh); |
@@ -180,7 +182,6 @@ static int journal_wait_on_commit_record(journal_t *journal, | |||
180 | if (unlikely(!buffer_uptodate(bh))) | 182 | if (unlikely(!buffer_uptodate(bh))) |
181 | ret = -EIO; | 183 | ret = -EIO; |
182 | put_bh(bh); /* One for getblk() */ | 184 | put_bh(bh); /* One for getblk() */ |
183 | jbd2_journal_put_journal_head(bh2jh(bh)); | ||
184 | 185 | ||
185 | return ret; | 186 | return ret; |
186 | } | 187 | } |
@@ -321,7 +322,7 @@ static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, | |||
321 | } | 322 | } |
322 | 323 | ||
323 | static void jbd2_descr_block_csum_set(journal_t *j, | 324 | static void jbd2_descr_block_csum_set(journal_t *j, |
324 | struct journal_head *descriptor) | 325 | struct buffer_head *bh) |
325 | { | 326 | { |
326 | struct jbd2_journal_block_tail *tail; | 327 | struct jbd2_journal_block_tail *tail; |
327 | __u32 csum; | 328 | __u32 csum; |
@@ -329,12 +330,10 @@ static void jbd2_descr_block_csum_set(journal_t *j, | |||
329 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 330 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
330 | return; | 331 | return; |
331 | 332 | ||
332 | tail = (struct jbd2_journal_block_tail *) | 333 | tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - |
333 | (jh2bh(descriptor)->b_data + j->j_blocksize - | ||
334 | sizeof(struct jbd2_journal_block_tail)); | 334 | sizeof(struct jbd2_journal_block_tail)); |
335 | tail->t_checksum = 0; | 335 | tail->t_checksum = 0; |
336 | csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, | 336 | csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); |
337 | j->j_blocksize); | ||
338 | tail->t_checksum = cpu_to_be32(csum); | 337 | tail->t_checksum = cpu_to_be32(csum); |
339 | } | 338 | } |
340 | 339 | ||
@@ -343,20 +342,21 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, | |||
343 | { | 342 | { |
344 | struct page *page = bh->b_page; | 343 | struct page *page = bh->b_page; |
345 | __u8 *addr; | 344 | __u8 *addr; |
346 | __u32 csum; | 345 | __u32 csum32; |
346 | __be32 seq; | ||
347 | 347 | ||
348 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 348 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
349 | return; | 349 | return; |
350 | 350 | ||
351 | sequence = cpu_to_be32(sequence); | 351 | seq = cpu_to_be32(sequence); |
352 | addr = kmap_atomic(page); | 352 | addr = kmap_atomic(page); |
353 | csum = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, | 353 | csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); |
354 | sizeof(sequence)); | 354 | csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data), |
355 | csum = jbd2_chksum(j, csum, addr + offset_in_page(bh->b_data), | 355 | bh->b_size); |
356 | bh->b_size); | ||
357 | kunmap_atomic(addr); | 356 | kunmap_atomic(addr); |
358 | 357 | ||
359 | tag->t_checksum = cpu_to_be32(csum); | 358 | /* We only have space to store the lower 16 bits of the crc32c. */ |
359 | tag->t_checksum = cpu_to_be16(csum32); | ||
360 | } | 360 | } |
361 | /* | 361 | /* |
362 | * jbd2_journal_commit_transaction | 362 | * jbd2_journal_commit_transaction |
@@ -368,7 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
368 | { | 368 | { |
369 | struct transaction_stats_s stats; | 369 | struct transaction_stats_s stats; |
370 | transaction_t *commit_transaction; | 370 | transaction_t *commit_transaction; |
371 | struct journal_head *jh, *new_jh, *descriptor; | 371 | struct journal_head *jh; |
372 | struct buffer_head *descriptor; | ||
372 | struct buffer_head **wbuf = journal->j_wbuf; | 373 | struct buffer_head **wbuf = journal->j_wbuf; |
373 | int bufs; | 374 | int bufs; |
374 | int flags; | 375 | int flags; |
@@ -392,6 +393,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
392 | tid_t first_tid; | 393 | tid_t first_tid; |
393 | int update_tail; | 394 | int update_tail; |
394 | int csum_size = 0; | 395 | int csum_size = 0; |
396 | LIST_HEAD(io_bufs); | ||
397 | LIST_HEAD(log_bufs); | ||
395 | 398 | ||
396 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 399 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
397 | csum_size = sizeof(struct jbd2_journal_block_tail); | 400 | csum_size = sizeof(struct jbd2_journal_block_tail); |
@@ -424,13 +427,13 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
424 | J_ASSERT(journal->j_committing_transaction == NULL); | 427 | J_ASSERT(journal->j_committing_transaction == NULL); |
425 | 428 | ||
426 | commit_transaction = journal->j_running_transaction; | 429 | commit_transaction = journal->j_running_transaction; |
427 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | ||
428 | 430 | ||
429 | trace_jbd2_start_commit(journal, commit_transaction); | 431 | trace_jbd2_start_commit(journal, commit_transaction); |
430 | jbd_debug(1, "JBD2: starting commit of transaction %d\n", | 432 | jbd_debug(1, "JBD2: starting commit of transaction %d\n", |
431 | commit_transaction->t_tid); | 433 | commit_transaction->t_tid); |
432 | 434 | ||
433 | write_lock(&journal->j_state_lock); | 435 | write_lock(&journal->j_state_lock); |
436 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | ||
434 | commit_transaction->t_state = T_LOCKED; | 437 | commit_transaction->t_state = T_LOCKED; |
435 | 438 | ||
436 | trace_jbd2_commit_locking(journal, commit_transaction); | 439 | trace_jbd2_commit_locking(journal, commit_transaction); |
@@ -520,6 +523,12 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
520 | */ | 523 | */ |
521 | jbd2_journal_switch_revoke_table(journal); | 524 | jbd2_journal_switch_revoke_table(journal); |
522 | 525 | ||
526 | /* | ||
527 | * Reserved credits cannot be claimed anymore, free them | ||
528 | */ | ||
529 | atomic_sub(atomic_read(&journal->j_reserved_credits), | ||
530 | &commit_transaction->t_outstanding_credits); | ||
531 | |||
523 | trace_jbd2_commit_flushing(journal, commit_transaction); | 532 | trace_jbd2_commit_flushing(journal, commit_transaction); |
524 | stats.run.rs_flushing = jiffies; | 533 | stats.run.rs_flushing = jiffies; |
525 | stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, | 534 | stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, |
@@ -533,7 +542,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
533 | wake_up(&journal->j_wait_transaction_locked); | 542 | wake_up(&journal->j_wait_transaction_locked); |
534 | write_unlock(&journal->j_state_lock); | 543 | write_unlock(&journal->j_state_lock); |
535 | 544 | ||
536 | jbd_debug(3, "JBD2: commit phase 2\n"); | 545 | jbd_debug(3, "JBD2: commit phase 2a\n"); |
537 | 546 | ||
538 | /* | 547 | /* |
539 | * Now start flushing things to disk, in the order they appear | 548 | * Now start flushing things to disk, in the order they appear |
@@ -545,10 +554,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
545 | 554 | ||
546 | blk_start_plug(&plug); | 555 | blk_start_plug(&plug); |
547 | jbd2_journal_write_revoke_records(journal, commit_transaction, | 556 | jbd2_journal_write_revoke_records(journal, commit_transaction, |
548 | WRITE_SYNC); | 557 | &log_bufs, WRITE_SYNC); |
549 | blk_finish_plug(&plug); | 558 | blk_finish_plug(&plug); |
550 | 559 | ||
551 | jbd_debug(3, "JBD2: commit phase 2\n"); | 560 | jbd_debug(3, "JBD2: commit phase 2b\n"); |
552 | 561 | ||
553 | /* | 562 | /* |
554 | * Way to go: we have now written out all of the data for a | 563 | * Way to go: we have now written out all of the data for a |
@@ -571,8 +580,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
571 | atomic_read(&commit_transaction->t_outstanding_credits)); | 580 | atomic_read(&commit_transaction->t_outstanding_credits)); |
572 | 581 | ||
573 | err = 0; | 582 | err = 0; |
574 | descriptor = NULL; | ||
575 | bufs = 0; | 583 | bufs = 0; |
584 | descriptor = NULL; | ||
576 | blk_start_plug(&plug); | 585 | blk_start_plug(&plug); |
577 | while (commit_transaction->t_buffers) { | 586 | while (commit_transaction->t_buffers) { |
578 | 587 | ||
@@ -604,8 +613,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
604 | record the metadata buffer. */ | 613 | record the metadata buffer. */ |
605 | 614 | ||
606 | if (!descriptor) { | 615 | if (!descriptor) { |
607 | struct buffer_head *bh; | ||
608 | |||
609 | J_ASSERT (bufs == 0); | 616 | J_ASSERT (bufs == 0); |
610 | 617 | ||
611 | jbd_debug(4, "JBD2: get descriptor\n"); | 618 | jbd_debug(4, "JBD2: get descriptor\n"); |
@@ -616,26 +623,26 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
616 | continue; | 623 | continue; |
617 | } | 624 | } |
618 | 625 | ||
619 | bh = jh2bh(descriptor); | ||
620 | jbd_debug(4, "JBD2: got buffer %llu (%p)\n", | 626 | jbd_debug(4, "JBD2: got buffer %llu (%p)\n", |
621 | (unsigned long long)bh->b_blocknr, bh->b_data); | 627 | (unsigned long long)descriptor->b_blocknr, |
622 | header = (journal_header_t *)&bh->b_data[0]; | 628 | descriptor->b_data); |
629 | header = (journal_header_t *)descriptor->b_data; | ||
623 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 630 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
624 | header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK); | 631 | header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK); |
625 | header->h_sequence = cpu_to_be32(commit_transaction->t_tid); | 632 | header->h_sequence = cpu_to_be32(commit_transaction->t_tid); |
626 | 633 | ||
627 | tagp = &bh->b_data[sizeof(journal_header_t)]; | 634 | tagp = &descriptor->b_data[sizeof(journal_header_t)]; |
628 | space_left = bh->b_size - sizeof(journal_header_t); | 635 | space_left = descriptor->b_size - |
636 | sizeof(journal_header_t); | ||
629 | first_tag = 1; | 637 | first_tag = 1; |
630 | set_buffer_jwrite(bh); | 638 | set_buffer_jwrite(descriptor); |
631 | set_buffer_dirty(bh); | 639 | set_buffer_dirty(descriptor); |
632 | wbuf[bufs++] = bh; | 640 | wbuf[bufs++] = descriptor; |
633 | 641 | ||
634 | /* Record it so that we can wait for IO | 642 | /* Record it so that we can wait for IO |
635 | completion later */ | 643 | completion later */ |
636 | BUFFER_TRACE(bh, "ph3: file as descriptor"); | 644 | BUFFER_TRACE(descriptor, "ph3: file as descriptor"); |
637 | jbd2_journal_file_buffer(descriptor, commit_transaction, | 645 | jbd2_file_log_bh(&log_bufs, descriptor); |
638 | BJ_LogCtl); | ||
639 | } | 646 | } |
640 | 647 | ||
641 | /* Where is the buffer to be written? */ | 648 | /* Where is the buffer to be written? */ |
@@ -658,29 +665,22 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
658 | 665 | ||
659 | /* Bump b_count to prevent truncate from stumbling over | 666 | /* Bump b_count to prevent truncate from stumbling over |
660 | the shadowed buffer! @@@ This can go if we ever get | 667 | the shadowed buffer! @@@ This can go if we ever get |
661 | rid of the BJ_IO/BJ_Shadow pairing of buffers. */ | 668 | rid of the shadow pairing of buffers. */ |
662 | atomic_inc(&jh2bh(jh)->b_count); | 669 | atomic_inc(&jh2bh(jh)->b_count); |
663 | 670 | ||
664 | /* Make a temporary IO buffer with which to write it out | ||
665 | (this will requeue both the metadata buffer and the | ||
666 | temporary IO buffer). new_bh goes on BJ_IO*/ | ||
667 | |||
668 | set_bit(BH_JWrite, &jh2bh(jh)->b_state); | ||
669 | /* | 671 | /* |
670 | * akpm: jbd2_journal_write_metadata_buffer() sets | 672 | * Make a temporary IO buffer with which to write it out |
671 | * new_bh->b_transaction to commit_transaction. | 673 | * (this will requeue the metadata buffer to BJ_Shadow). |
672 | * We need to clean this up before we release new_bh | ||
673 | * (which is of type BJ_IO) | ||
674 | */ | 674 | */ |
675 | set_bit(BH_JWrite, &jh2bh(jh)->b_state); | ||
675 | JBUFFER_TRACE(jh, "ph3: write metadata"); | 676 | JBUFFER_TRACE(jh, "ph3: write metadata"); |
676 | flags = jbd2_journal_write_metadata_buffer(commit_transaction, | 677 | flags = jbd2_journal_write_metadata_buffer(commit_transaction, |
677 | jh, &new_jh, blocknr); | 678 | jh, &wbuf[bufs], blocknr); |
678 | if (flags < 0) { | 679 | if (flags < 0) { |
679 | jbd2_journal_abort(journal, flags); | 680 | jbd2_journal_abort(journal, flags); |
680 | continue; | 681 | continue; |
681 | } | 682 | } |
682 | set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); | 683 | jbd2_file_log_bh(&io_bufs, wbuf[bufs]); |
683 | wbuf[bufs++] = jh2bh(new_jh); | ||
684 | 684 | ||
685 | /* Record the new block's tag in the current descriptor | 685 | /* Record the new block's tag in the current descriptor |
686 | buffer */ | 686 | buffer */ |
@@ -694,10 +694,11 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
694 | tag = (journal_block_tag_t *) tagp; | 694 | tag = (journal_block_tag_t *) tagp; |
695 | write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); | 695 | write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); |
696 | tag->t_flags = cpu_to_be16(tag_flag); | 696 | tag->t_flags = cpu_to_be16(tag_flag); |
697 | jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), | 697 | jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], |
698 | commit_transaction->t_tid); | 698 | commit_transaction->t_tid); |
699 | tagp += tag_bytes; | 699 | tagp += tag_bytes; |
700 | space_left -= tag_bytes; | 700 | space_left -= tag_bytes; |
701 | bufs++; | ||
701 | 702 | ||
702 | if (first_tag) { | 703 | if (first_tag) { |
703 | memcpy (tagp, journal->j_uuid, 16); | 704 | memcpy (tagp, journal->j_uuid, 16); |
@@ -809,7 +810,7 @@ start_journal_io: | |||
809 | the log. Before we can commit it, wait for the IO so far to | 810 | the log. Before we can commit it, wait for the IO so far to |
810 | complete. Control buffers being written are on the | 811 | complete. Control buffers being written are on the |
811 | transaction's t_log_list queue, and metadata buffers are on | 812 | transaction's t_log_list queue, and metadata buffers are on |
812 | the t_iobuf_list queue. | 813 | the io_bufs list. |
813 | 814 | ||
814 | Wait for the buffers in reverse order. That way we are | 815 | Wait for the buffers in reverse order. That way we are |
815 | less likely to be woken up until all IOs have completed, and | 816 | less likely to be woken up until all IOs have completed, and |
@@ -818,47 +819,33 @@ start_journal_io: | |||
818 | 819 | ||
819 | jbd_debug(3, "JBD2: commit phase 3\n"); | 820 | jbd_debug(3, "JBD2: commit phase 3\n"); |
820 | 821 | ||
821 | /* | 822 | while (!list_empty(&io_bufs)) { |
822 | * akpm: these are BJ_IO, and j_list_lock is not needed. | 823 | struct buffer_head *bh = list_entry(io_bufs.prev, |
823 | * See __journal_try_to_free_buffer. | 824 | struct buffer_head, |
824 | */ | 825 | b_assoc_buffers); |
825 | wait_for_iobuf: | ||
826 | while (commit_transaction->t_iobuf_list != NULL) { | ||
827 | struct buffer_head *bh; | ||
828 | 826 | ||
829 | jh = commit_transaction->t_iobuf_list->b_tprev; | 827 | wait_on_buffer(bh); |
830 | bh = jh2bh(jh); | 828 | cond_resched(); |
831 | if (buffer_locked(bh)) { | ||
832 | wait_on_buffer(bh); | ||
833 | goto wait_for_iobuf; | ||
834 | } | ||
835 | if (cond_resched()) | ||
836 | goto wait_for_iobuf; | ||
837 | 829 | ||
838 | if (unlikely(!buffer_uptodate(bh))) | 830 | if (unlikely(!buffer_uptodate(bh))) |
839 | err = -EIO; | 831 | err = -EIO; |
840 | 832 | jbd2_unfile_log_bh(bh); | |
841 | clear_buffer_jwrite(bh); | ||
842 | |||
843 | JBUFFER_TRACE(jh, "ph4: unfile after journal write"); | ||
844 | jbd2_journal_unfile_buffer(journal, jh); | ||
845 | 833 | ||
846 | /* | 834 | /* |
847 | * ->t_iobuf_list should contain only dummy buffer_heads | 835 | * The list contains temporary buffer heads created by |
848 | * which were created by jbd2_journal_write_metadata_buffer(). | 836 | * jbd2_journal_write_metadata_buffer(). |
849 | */ | 837 | */ |
850 | BUFFER_TRACE(bh, "dumping temporary bh"); | 838 | BUFFER_TRACE(bh, "dumping temporary bh"); |
851 | jbd2_journal_put_journal_head(jh); | ||
852 | __brelse(bh); | 839 | __brelse(bh); |
853 | J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); | 840 | J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); |
854 | free_buffer_head(bh); | 841 | free_buffer_head(bh); |
855 | 842 | ||
856 | /* We also have to unlock and free the corresponding | 843 | /* We also have to refile the corresponding shadowed buffer */ |
857 | shadowed buffer */ | ||
858 | jh = commit_transaction->t_shadow_list->b_tprev; | 844 | jh = commit_transaction->t_shadow_list->b_tprev; |
859 | bh = jh2bh(jh); | 845 | bh = jh2bh(jh); |
860 | clear_bit(BH_JWrite, &bh->b_state); | 846 | clear_buffer_jwrite(bh); |
861 | J_ASSERT_BH(bh, buffer_jbddirty(bh)); | 847 | J_ASSERT_BH(bh, buffer_jbddirty(bh)); |
848 | J_ASSERT_BH(bh, !buffer_shadow(bh)); | ||
862 | 849 | ||
863 | /* The metadata is now released for reuse, but we need | 850 | /* The metadata is now released for reuse, but we need |
864 | to remember it against this transaction so that when | 851 | to remember it against this transaction so that when |
@@ -866,14 +853,6 @@ wait_for_iobuf: | |||
866 | required. */ | 853 | required. */ |
867 | JBUFFER_TRACE(jh, "file as BJ_Forget"); | 854 | JBUFFER_TRACE(jh, "file as BJ_Forget"); |
868 | jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); | 855 | jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); |
869 | /* | ||
870 | * Wake up any transactions which were waiting for this IO to | ||
871 | * complete. The barrier must be here so that changes by | ||
872 | * jbd2_journal_file_buffer() take effect before wake_up_bit() | ||
873 | * does the waitqueue check. | ||
874 | */ | ||
875 | smp_mb(); | ||
876 | wake_up_bit(&bh->b_state, BH_Unshadow); | ||
877 | JBUFFER_TRACE(jh, "brelse shadowed buffer"); | 856 | JBUFFER_TRACE(jh, "brelse shadowed buffer"); |
878 | __brelse(bh); | 857 | __brelse(bh); |
879 | } | 858 | } |
@@ -883,26 +862,19 @@ wait_for_iobuf: | |||
883 | jbd_debug(3, "JBD2: commit phase 4\n"); | 862 | jbd_debug(3, "JBD2: commit phase 4\n"); |
884 | 863 | ||
885 | /* Here we wait for the revoke record and descriptor record buffers */ | 864 | /* Here we wait for the revoke record and descriptor record buffers */ |
886 | wait_for_ctlbuf: | 865 | while (!list_empty(&log_bufs)) { |
887 | while (commit_transaction->t_log_list != NULL) { | ||
888 | struct buffer_head *bh; | 866 | struct buffer_head *bh; |
889 | 867 | ||
890 | jh = commit_transaction->t_log_list->b_tprev; | 868 | bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers); |
891 | bh = jh2bh(jh); | 869 | wait_on_buffer(bh); |
892 | if (buffer_locked(bh)) { | 870 | cond_resched(); |
893 | wait_on_buffer(bh); | ||
894 | goto wait_for_ctlbuf; | ||
895 | } | ||
896 | if (cond_resched()) | ||
897 | goto wait_for_ctlbuf; | ||
898 | 871 | ||
899 | if (unlikely(!buffer_uptodate(bh))) | 872 | if (unlikely(!buffer_uptodate(bh))) |
900 | err = -EIO; | 873 | err = -EIO; |
901 | 874 | ||
902 | BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); | 875 | BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); |
903 | clear_buffer_jwrite(bh); | 876 | clear_buffer_jwrite(bh); |
904 | jbd2_journal_unfile_buffer(journal, jh); | 877 | jbd2_unfile_log_bh(bh); |
905 | jbd2_journal_put_journal_head(jh); | ||
906 | __brelse(bh); /* One for getblk */ | 878 | __brelse(bh); /* One for getblk */ |
907 | /* AKPM: bforget here */ | 879 | /* AKPM: bforget here */ |
908 | } | 880 | } |
@@ -952,9 +924,7 @@ wait_for_iobuf: | |||
952 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); | 924 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); |
953 | J_ASSERT(commit_transaction->t_buffers == NULL); | 925 | J_ASSERT(commit_transaction->t_buffers == NULL); |
954 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); | 926 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); |
955 | J_ASSERT(commit_transaction->t_iobuf_list == NULL); | ||
956 | J_ASSERT(commit_transaction->t_shadow_list == NULL); | 927 | J_ASSERT(commit_transaction->t_shadow_list == NULL); |
957 | J_ASSERT(commit_transaction->t_log_list == NULL); | ||
958 | 928 | ||
959 | restart_loop: | 929 | restart_loop: |
960 | /* | 930 | /* |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 95457576e434..52032647dd4a 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -103,6 +103,24 @@ EXPORT_SYMBOL(jbd2_inode_cache); | |||
103 | static void __journal_abort_soft (journal_t *journal, int errno); | 103 | static void __journal_abort_soft (journal_t *journal, int errno); |
104 | static int jbd2_journal_create_slab(size_t slab_size); | 104 | static int jbd2_journal_create_slab(size_t slab_size); |
105 | 105 | ||
106 | #ifdef CONFIG_JBD2_DEBUG | ||
107 | void __jbd2_debug(int level, const char *file, const char *func, | ||
108 | unsigned int line, const char *fmt, ...) | ||
109 | { | ||
110 | struct va_format vaf; | ||
111 | va_list args; | ||
112 | |||
113 | if (level > jbd2_journal_enable_debug) | ||
114 | return; | ||
115 | va_start(args, fmt); | ||
116 | vaf.fmt = fmt; | ||
117 | vaf.va = &args; | ||
118 | printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf); | ||
119 | va_end(args); | ||
120 | } | ||
121 | EXPORT_SYMBOL(__jbd2_debug); | ||
122 | #endif | ||
123 | |||
106 | /* Checksumming functions */ | 124 | /* Checksumming functions */ |
107 | int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) | 125 | int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) |
108 | { | 126 | { |
@@ -112,9 +130,10 @@ int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) | |||
112 | return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; | 130 | return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; |
113 | } | 131 | } |
114 | 132 | ||
115 | static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) | 133 | static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) |
116 | { | 134 | { |
117 | __u32 csum, old_csum; | 135 | __u32 csum; |
136 | __be32 old_csum; | ||
118 | 137 | ||
119 | old_csum = sb->s_checksum; | 138 | old_csum = sb->s_checksum; |
120 | sb->s_checksum = 0; | 139 | sb->s_checksum = 0; |
@@ -310,14 +329,12 @@ static void journal_kill_thread(journal_t *journal) | |||
310 | * | 329 | * |
311 | * If the source buffer has already been modified by a new transaction | 330 | * If the source buffer has already been modified by a new transaction |
312 | * since we took the last commit snapshot, we use the frozen copy of | 331 | * since we took the last commit snapshot, we use the frozen copy of |
313 | * that data for IO. If we end up using the existing buffer_head's data | 332 | * that data for IO. If we end up using the existing buffer_head's data |
314 | * for the write, then we *have* to lock the buffer to prevent anyone | 333 | * for the write, then we have to make sure nobody modifies it while the |
315 | * else from using and possibly modifying it while the IO is in | 334 | * IO is in progress. do_get_write_access() handles this. |
316 | * progress. | ||
317 | * | 335 | * |
318 | * The function returns a pointer to the buffer_heads to be used for IO. | 336 | * The function returns a pointer to the buffer_head to be used for IO. |
319 | * | 337 | * |
320 | * We assume that the journal has already been locked in this function. | ||
321 | * | 338 | * |
322 | * Return value: | 339 | * Return value: |
323 | * <0: Error | 340 | * <0: Error |
@@ -330,15 +347,14 @@ static void journal_kill_thread(journal_t *journal) | |||
330 | 347 | ||
331 | int jbd2_journal_write_metadata_buffer(transaction_t *transaction, | 348 | int jbd2_journal_write_metadata_buffer(transaction_t *transaction, |
332 | struct journal_head *jh_in, | 349 | struct journal_head *jh_in, |
333 | struct journal_head **jh_out, | 350 | struct buffer_head **bh_out, |
334 | unsigned long long blocknr) | 351 | sector_t blocknr) |
335 | { | 352 | { |
336 | int need_copy_out = 0; | 353 | int need_copy_out = 0; |
337 | int done_copy_out = 0; | 354 | int done_copy_out = 0; |
338 | int do_escape = 0; | 355 | int do_escape = 0; |
339 | char *mapped_data; | 356 | char *mapped_data; |
340 | struct buffer_head *new_bh; | 357 | struct buffer_head *new_bh; |
341 | struct journal_head *new_jh; | ||
342 | struct page *new_page; | 358 | struct page *new_page; |
343 | unsigned int new_offset; | 359 | unsigned int new_offset; |
344 | struct buffer_head *bh_in = jh2bh(jh_in); | 360 | struct buffer_head *bh_in = jh2bh(jh_in); |
@@ -368,14 +384,13 @@ retry_alloc: | |||
368 | 384 | ||
369 | /* keep subsequent assertions sane */ | 385 | /* keep subsequent assertions sane */ |
370 | atomic_set(&new_bh->b_count, 1); | 386 | atomic_set(&new_bh->b_count, 1); |
371 | new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ | ||
372 | 387 | ||
388 | jbd_lock_bh_state(bh_in); | ||
389 | repeat: | ||
373 | /* | 390 | /* |
374 | * If a new transaction has already done a buffer copy-out, then | 391 | * If a new transaction has already done a buffer copy-out, then |
375 | * we use that version of the data for the commit. | 392 | * we use that version of the data for the commit. |
376 | */ | 393 | */ |
377 | jbd_lock_bh_state(bh_in); | ||
378 | repeat: | ||
379 | if (jh_in->b_frozen_data) { | 394 | if (jh_in->b_frozen_data) { |
380 | done_copy_out = 1; | 395 | done_copy_out = 1; |
381 | new_page = virt_to_page(jh_in->b_frozen_data); | 396 | new_page = virt_to_page(jh_in->b_frozen_data); |
@@ -415,7 +430,7 @@ repeat: | |||
415 | jbd_unlock_bh_state(bh_in); | 430 | jbd_unlock_bh_state(bh_in); |
416 | tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); | 431 | tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); |
417 | if (!tmp) { | 432 | if (!tmp) { |
418 | jbd2_journal_put_journal_head(new_jh); | 433 | brelse(new_bh); |
419 | return -ENOMEM; | 434 | return -ENOMEM; |
420 | } | 435 | } |
421 | jbd_lock_bh_state(bh_in); | 436 | jbd_lock_bh_state(bh_in); |
@@ -426,7 +441,7 @@ repeat: | |||
426 | 441 | ||
427 | jh_in->b_frozen_data = tmp; | 442 | jh_in->b_frozen_data = tmp; |
428 | mapped_data = kmap_atomic(new_page); | 443 | mapped_data = kmap_atomic(new_page); |
429 | memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); | 444 | memcpy(tmp, mapped_data + new_offset, bh_in->b_size); |
430 | kunmap_atomic(mapped_data); | 445 | kunmap_atomic(mapped_data); |
431 | 446 | ||
432 | new_page = virt_to_page(tmp); | 447 | new_page = virt_to_page(tmp); |
@@ -452,14 +467,14 @@ repeat: | |||
452 | } | 467 | } |
453 | 468 | ||
454 | set_bh_page(new_bh, new_page, new_offset); | 469 | set_bh_page(new_bh, new_page, new_offset); |
455 | new_jh->b_transaction = NULL; | 470 | new_bh->b_size = bh_in->b_size; |
456 | new_bh->b_size = jh2bh(jh_in)->b_size; | 471 | new_bh->b_bdev = journal->j_dev; |
457 | new_bh->b_bdev = transaction->t_journal->j_dev; | ||
458 | new_bh->b_blocknr = blocknr; | 472 | new_bh->b_blocknr = blocknr; |
473 | new_bh->b_private = bh_in; | ||
459 | set_buffer_mapped(new_bh); | 474 | set_buffer_mapped(new_bh); |
460 | set_buffer_dirty(new_bh); | 475 | set_buffer_dirty(new_bh); |
461 | 476 | ||
462 | *jh_out = new_jh; | 477 | *bh_out = new_bh; |
463 | 478 | ||
464 | /* | 479 | /* |
465 | * The to-be-written buffer needs to get moved to the io queue, | 480 | * The to-be-written buffer needs to get moved to the io queue, |
@@ -470,11 +485,9 @@ repeat: | |||
470 | spin_lock(&journal->j_list_lock); | 485 | spin_lock(&journal->j_list_lock); |
471 | __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); | 486 | __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); |
472 | spin_unlock(&journal->j_list_lock); | 487 | spin_unlock(&journal->j_list_lock); |
488 | set_buffer_shadow(bh_in); | ||
473 | jbd_unlock_bh_state(bh_in); | 489 | jbd_unlock_bh_state(bh_in); |
474 | 490 | ||
475 | JBUFFER_TRACE(new_jh, "file as BJ_IO"); | ||
476 | jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); | ||
477 | |||
478 | return do_escape | (done_copy_out << 1); | 491 | return do_escape | (done_copy_out << 1); |
479 | } | 492 | } |
480 | 493 | ||
@@ -484,35 +497,6 @@ repeat: | |||
484 | */ | 497 | */ |
485 | 498 | ||
486 | /* | 499 | /* |
487 | * __jbd2_log_space_left: Return the number of free blocks left in the journal. | ||
488 | * | ||
489 | * Called with the journal already locked. | ||
490 | * | ||
491 | * Called under j_state_lock | ||
492 | */ | ||
493 | |||
494 | int __jbd2_log_space_left(journal_t *journal) | ||
495 | { | ||
496 | int left = journal->j_free; | ||
497 | |||
498 | /* assert_spin_locked(&journal->j_state_lock); */ | ||
499 | |||
500 | /* | ||
501 | * Be pessimistic here about the number of those free blocks which | ||
502 | * might be required for log descriptor control blocks. | ||
503 | */ | ||
504 | |||
505 | #define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */ | ||
506 | |||
507 | left -= MIN_LOG_RESERVED_BLOCKS; | ||
508 | |||
509 | if (left <= 0) | ||
510 | return 0; | ||
511 | left -= (left >> 3); | ||
512 | return left; | ||
513 | } | ||
514 | |||
515 | /* | ||
516 | * Called with j_state_lock locked for writing. | 500 | * Called with j_state_lock locked for writing. |
517 | * Returns true if a transaction commit was started. | 501 | * Returns true if a transaction commit was started. |
518 | */ | 502 | */ |
@@ -564,20 +548,17 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid) | |||
564 | } | 548 | } |
565 | 549 | ||
566 | /* | 550 | /* |
567 | * Force and wait upon a commit if the calling process is not within | 551 | * Force and wait any uncommitted transactions. We can only force the running |
568 | * transaction. This is used for forcing out undo-protected data which contains | 552 | * transaction if we don't have an active handle, otherwise, we will deadlock. |
569 | * bitmaps, when the fs is running out of space. | 553 | * Returns: <0 in case of error, |
570 | * | 554 | * 0 if nothing to commit, |
571 | * We can only force the running transaction if we don't have an active handle; | 555 | * 1 if transaction was successfully committed. |
572 | * otherwise, we will deadlock. | ||
573 | * | ||
574 | * Returns true if a transaction was started. | ||
575 | */ | 556 | */ |
576 | int jbd2_journal_force_commit_nested(journal_t *journal) | 557 | static int __jbd2_journal_force_commit(journal_t *journal) |
577 | { | 558 | { |
578 | transaction_t *transaction = NULL; | 559 | transaction_t *transaction = NULL; |
579 | tid_t tid; | 560 | tid_t tid; |
580 | int need_to_start = 0; | 561 | int need_to_start = 0, ret = 0; |
581 | 562 | ||
582 | read_lock(&journal->j_state_lock); | 563 | read_lock(&journal->j_state_lock); |
583 | if (journal->j_running_transaction && !current->journal_info) { | 564 | if (journal->j_running_transaction && !current->journal_info) { |
@@ -588,16 +569,53 @@ int jbd2_journal_force_commit_nested(journal_t *journal) | |||
588 | transaction = journal->j_committing_transaction; | 569 | transaction = journal->j_committing_transaction; |
589 | 570 | ||
590 | if (!transaction) { | 571 | if (!transaction) { |
572 | /* Nothing to commit */ | ||
591 | read_unlock(&journal->j_state_lock); | 573 | read_unlock(&journal->j_state_lock); |
592 | return 0; /* Nothing to retry */ | 574 | return 0; |
593 | } | 575 | } |
594 | |||
595 | tid = transaction->t_tid; | 576 | tid = transaction->t_tid; |
596 | read_unlock(&journal->j_state_lock); | 577 | read_unlock(&journal->j_state_lock); |
597 | if (need_to_start) | 578 | if (need_to_start) |
598 | jbd2_log_start_commit(journal, tid); | 579 | jbd2_log_start_commit(journal, tid); |
599 | jbd2_log_wait_commit(journal, tid); | 580 | ret = jbd2_log_wait_commit(journal, tid); |
600 | return 1; | 581 | if (!ret) |
582 | ret = 1; | ||
583 | |||
584 | return ret; | ||
585 | } | ||
586 | |||
587 | /** | ||
588 | * Force and wait upon a commit if the calling process is not within | ||
589 | * transaction. This is used for forcing out undo-protected data which contains | ||
590 | * bitmaps, when the fs is running out of space. | ||
591 | * | ||
592 | * @journal: journal to force | ||
593 | * Returns true if progress was made. | ||
594 | */ | ||
595 | int jbd2_journal_force_commit_nested(journal_t *journal) | ||
596 | { | ||
597 | int ret; | ||
598 | |||
599 | ret = __jbd2_journal_force_commit(journal); | ||
600 | return ret > 0; | ||
601 | } | ||
602 | |||
603 | /** | ||
604 | * int journal_force_commit() - force any uncommitted transactions | ||
605 | * @journal: journal to force | ||
606 | * | ||
607 | * Caller want unconditional commit. We can only force the running transaction | ||
608 | * if we don't have an active handle, otherwise, we will deadlock. | ||
609 | */ | ||
610 | int jbd2_journal_force_commit(journal_t *journal) | ||
611 | { | ||
612 | int ret; | ||
613 | |||
614 | J_ASSERT(!current->journal_info); | ||
615 | ret = __jbd2_journal_force_commit(journal); | ||
616 | if (ret > 0) | ||
617 | ret = 0; | ||
618 | return ret; | ||
601 | } | 619 | } |
602 | 620 | ||
603 | /* | 621 | /* |
@@ -798,7 +816,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, | |||
798 | * But we don't bother doing that, so there will be coherency problems with | 816 | * But we don't bother doing that, so there will be coherency problems with |
799 | * mmaps of blockdevs which hold live JBD-controlled filesystems. | 817 | * mmaps of blockdevs which hold live JBD-controlled filesystems. |
800 | */ | 818 | */ |
801 | struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) | 819 | struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) |
802 | { | 820 | { |
803 | struct buffer_head *bh; | 821 | struct buffer_head *bh; |
804 | unsigned long long blocknr; | 822 | unsigned long long blocknr; |
@@ -817,7 +835,7 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) | |||
817 | set_buffer_uptodate(bh); | 835 | set_buffer_uptodate(bh); |
818 | unlock_buffer(bh); | 836 | unlock_buffer(bh); |
819 | BUFFER_TRACE(bh, "return this buffer"); | 837 | BUFFER_TRACE(bh, "return this buffer"); |
820 | return jbd2_journal_add_journal_head(bh); | 838 | return bh; |
821 | } | 839 | } |
822 | 840 | ||
823 | /* | 841 | /* |
@@ -1062,11 +1080,10 @@ static journal_t * journal_init_common (void) | |||
1062 | return NULL; | 1080 | return NULL; |
1063 | 1081 | ||
1064 | init_waitqueue_head(&journal->j_wait_transaction_locked); | 1082 | init_waitqueue_head(&journal->j_wait_transaction_locked); |
1065 | init_waitqueue_head(&journal->j_wait_logspace); | ||
1066 | init_waitqueue_head(&journal->j_wait_done_commit); | 1083 | init_waitqueue_head(&journal->j_wait_done_commit); |
1067 | init_waitqueue_head(&journal->j_wait_checkpoint); | ||
1068 | init_waitqueue_head(&journal->j_wait_commit); | 1084 | init_waitqueue_head(&journal->j_wait_commit); |
1069 | init_waitqueue_head(&journal->j_wait_updates); | 1085 | init_waitqueue_head(&journal->j_wait_updates); |
1086 | init_waitqueue_head(&journal->j_wait_reserved); | ||
1070 | mutex_init(&journal->j_barrier); | 1087 | mutex_init(&journal->j_barrier); |
1071 | mutex_init(&journal->j_checkpoint_mutex); | 1088 | mutex_init(&journal->j_checkpoint_mutex); |
1072 | spin_lock_init(&journal->j_revoke_lock); | 1089 | spin_lock_init(&journal->j_revoke_lock); |
@@ -1076,6 +1093,7 @@ static journal_t * journal_init_common (void) | |||
1076 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); | 1093 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); |
1077 | journal->j_min_batch_time = 0; | 1094 | journal->j_min_batch_time = 0; |
1078 | journal->j_max_batch_time = 15000; /* 15ms */ | 1095 | journal->j_max_batch_time = 15000; /* 15ms */ |
1096 | atomic_set(&journal->j_reserved_credits, 0); | ||
1079 | 1097 | ||
1080 | /* The journal is marked for error until we succeed with recovery! */ | 1098 | /* The journal is marked for error until we succeed with recovery! */ |
1081 | journal->j_flags = JBD2_ABORT; | 1099 | journal->j_flags = JBD2_ABORT; |
@@ -1318,6 +1336,7 @@ static int journal_reset(journal_t *journal) | |||
1318 | static void jbd2_write_superblock(journal_t *journal, int write_op) | 1336 | static void jbd2_write_superblock(journal_t *journal, int write_op) |
1319 | { | 1337 | { |
1320 | struct buffer_head *bh = journal->j_sb_buffer; | 1338 | struct buffer_head *bh = journal->j_sb_buffer; |
1339 | journal_superblock_t *sb = journal->j_superblock; | ||
1321 | int ret; | 1340 | int ret; |
1322 | 1341 | ||
1323 | trace_jbd2_write_superblock(journal, write_op); | 1342 | trace_jbd2_write_superblock(journal, write_op); |
@@ -1339,6 +1358,7 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) | |||
1339 | clear_buffer_write_io_error(bh); | 1358 | clear_buffer_write_io_error(bh); |
1340 | set_buffer_uptodate(bh); | 1359 | set_buffer_uptodate(bh); |
1341 | } | 1360 | } |
1361 | jbd2_superblock_csum_set(journal, sb); | ||
1342 | get_bh(bh); | 1362 | get_bh(bh); |
1343 | bh->b_end_io = end_buffer_write_sync; | 1363 | bh->b_end_io = end_buffer_write_sync; |
1344 | ret = submit_bh(write_op, bh); | 1364 | ret = submit_bh(write_op, bh); |
@@ -1435,7 +1455,6 @@ void jbd2_journal_update_sb_errno(journal_t *journal) | |||
1435 | jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", | 1455 | jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", |
1436 | journal->j_errno); | 1456 | journal->j_errno); |
1437 | sb->s_errno = cpu_to_be32(journal->j_errno); | 1457 | sb->s_errno = cpu_to_be32(journal->j_errno); |
1438 | jbd2_superblock_csum_set(journal, sb); | ||
1439 | read_unlock(&journal->j_state_lock); | 1458 | read_unlock(&journal->j_state_lock); |
1440 | 1459 | ||
1441 | jbd2_write_superblock(journal, WRITE_SYNC); | 1460 | jbd2_write_superblock(journal, WRITE_SYNC); |
@@ -2325,13 +2344,13 @@ static struct journal_head *journal_alloc_journal_head(void) | |||
2325 | #ifdef CONFIG_JBD2_DEBUG | 2344 | #ifdef CONFIG_JBD2_DEBUG |
2326 | atomic_inc(&nr_journal_heads); | 2345 | atomic_inc(&nr_journal_heads); |
2327 | #endif | 2346 | #endif |
2328 | ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); | 2347 | ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); |
2329 | if (!ret) { | 2348 | if (!ret) { |
2330 | jbd_debug(1, "out of memory for journal_head\n"); | 2349 | jbd_debug(1, "out of memory for journal_head\n"); |
2331 | pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__); | 2350 | pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__); |
2332 | while (!ret) { | 2351 | while (!ret) { |
2333 | yield(); | 2352 | yield(); |
2334 | ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); | 2353 | ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); |
2335 | } | 2354 | } |
2336 | } | 2355 | } |
2337 | return ret; | 2356 | return ret; |
@@ -2393,10 +2412,8 @@ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) | |||
2393 | struct journal_head *new_jh = NULL; | 2412 | struct journal_head *new_jh = NULL; |
2394 | 2413 | ||
2395 | repeat: | 2414 | repeat: |
2396 | if (!buffer_jbd(bh)) { | 2415 | if (!buffer_jbd(bh)) |
2397 | new_jh = journal_alloc_journal_head(); | 2416 | new_jh = journal_alloc_journal_head(); |
2398 | memset(new_jh, 0, sizeof(*new_jh)); | ||
2399 | } | ||
2400 | 2417 | ||
2401 | jbd_lock_bh_journal_head(bh); | 2418 | jbd_lock_bh_journal_head(bh); |
2402 | if (buffer_jbd(bh)) { | 2419 | if (buffer_jbd(bh)) { |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 626846bac32f..3929c50428b1 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -178,7 +178,8 @@ static int jbd2_descr_block_csum_verify(journal_t *j, | |||
178 | void *buf) | 178 | void *buf) |
179 | { | 179 | { |
180 | struct jbd2_journal_block_tail *tail; | 180 | struct jbd2_journal_block_tail *tail; |
181 | __u32 provided, calculated; | 181 | __be32 provided; |
182 | __u32 calculated; | ||
182 | 183 | ||
183 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 184 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
184 | return 1; | 185 | return 1; |
@@ -190,8 +191,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j, | |||
190 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); | 191 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); |
191 | tail->t_checksum = provided; | 192 | tail->t_checksum = provided; |
192 | 193 | ||
193 | provided = be32_to_cpu(provided); | 194 | return provided == cpu_to_be32(calculated); |
194 | return provided == calculated; | ||
195 | } | 195 | } |
196 | 196 | ||
197 | /* | 197 | /* |
@@ -381,7 +381,8 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh, | |||
381 | static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) | 381 | static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) |
382 | { | 382 | { |
383 | struct commit_header *h; | 383 | struct commit_header *h; |
384 | __u32 provided, calculated; | 384 | __be32 provided; |
385 | __u32 calculated; | ||
385 | 386 | ||
386 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 387 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
387 | return 1; | 388 | return 1; |
@@ -392,25 +393,23 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) | |||
392 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); | 393 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); |
393 | h->h_chksum[0] = provided; | 394 | h->h_chksum[0] = provided; |
394 | 395 | ||
395 | provided = be32_to_cpu(provided); | 396 | return provided == cpu_to_be32(calculated); |
396 | return provided == calculated; | ||
397 | } | 397 | } |
398 | 398 | ||
399 | static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, | 399 | static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, |
400 | void *buf, __u32 sequence) | 400 | void *buf, __u32 sequence) |
401 | { | 401 | { |
402 | __u32 provided, calculated; | 402 | __u32 csum32; |
403 | __be32 seq; | ||
403 | 404 | ||
404 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 405 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
405 | return 1; | 406 | return 1; |
406 | 407 | ||
407 | sequence = cpu_to_be32(sequence); | 408 | seq = cpu_to_be32(sequence); |
408 | calculated = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, | 409 | csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); |
409 | sizeof(sequence)); | 410 | csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); |
410 | calculated = jbd2_chksum(j, calculated, buf, j->j_blocksize); | ||
411 | provided = be32_to_cpu(tag->t_checksum); | ||
412 | 411 | ||
413 | return provided == cpu_to_be32(calculated); | 412 | return tag->t_checksum == cpu_to_be16(csum32); |
414 | } | 413 | } |
415 | 414 | ||
416 | static int do_one_pass(journal_t *journal, | 415 | static int do_one_pass(journal_t *journal, |
@@ -809,7 +808,8 @@ static int jbd2_revoke_block_csum_verify(journal_t *j, | |||
809 | void *buf) | 808 | void *buf) |
810 | { | 809 | { |
811 | struct jbd2_journal_revoke_tail *tail; | 810 | struct jbd2_journal_revoke_tail *tail; |
812 | __u32 provided, calculated; | 811 | __be32 provided; |
812 | __u32 calculated; | ||
813 | 813 | ||
814 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 814 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
815 | return 1; | 815 | return 1; |
@@ -821,8 +821,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j, | |||
821 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); | 821 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); |
822 | tail->r_checksum = provided; | 822 | tail->r_checksum = provided; |
823 | 823 | ||
824 | provided = be32_to_cpu(provided); | 824 | return provided == cpu_to_be32(calculated); |
825 | return provided == calculated; | ||
826 | } | 825 | } |
827 | 826 | ||
828 | /* Scan a revoke record, marking all blocks mentioned as revoked. */ | 827 | /* Scan a revoke record, marking all blocks mentioned as revoked. */ |
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index f30b80b4ce8b..198c9c10276d 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -122,9 +122,10 @@ struct jbd2_revoke_table_s | |||
122 | 122 | ||
123 | #ifdef __KERNEL__ | 123 | #ifdef __KERNEL__ |
124 | static void write_one_revoke_record(journal_t *, transaction_t *, | 124 | static void write_one_revoke_record(journal_t *, transaction_t *, |
125 | struct journal_head **, int *, | 125 | struct list_head *, |
126 | struct buffer_head **, int *, | ||
126 | struct jbd2_revoke_record_s *, int); | 127 | struct jbd2_revoke_record_s *, int); |
127 | static void flush_descriptor(journal_t *, struct journal_head *, int, int); | 128 | static void flush_descriptor(journal_t *, struct buffer_head *, int, int); |
128 | #endif | 129 | #endif |
129 | 130 | ||
130 | /* Utility functions to maintain the revoke table */ | 131 | /* Utility functions to maintain the revoke table */ |
@@ -531,9 +532,10 @@ void jbd2_journal_switch_revoke_table(journal_t *journal) | |||
531 | */ | 532 | */ |
532 | void jbd2_journal_write_revoke_records(journal_t *journal, | 533 | void jbd2_journal_write_revoke_records(journal_t *journal, |
533 | transaction_t *transaction, | 534 | transaction_t *transaction, |
535 | struct list_head *log_bufs, | ||
534 | int write_op) | 536 | int write_op) |
535 | { | 537 | { |
536 | struct journal_head *descriptor; | 538 | struct buffer_head *descriptor; |
537 | struct jbd2_revoke_record_s *record; | 539 | struct jbd2_revoke_record_s *record; |
538 | struct jbd2_revoke_table_s *revoke; | 540 | struct jbd2_revoke_table_s *revoke; |
539 | struct list_head *hash_list; | 541 | struct list_head *hash_list; |
@@ -553,7 +555,7 @@ void jbd2_journal_write_revoke_records(journal_t *journal, | |||
553 | while (!list_empty(hash_list)) { | 555 | while (!list_empty(hash_list)) { |
554 | record = (struct jbd2_revoke_record_s *) | 556 | record = (struct jbd2_revoke_record_s *) |
555 | hash_list->next; | 557 | hash_list->next; |
556 | write_one_revoke_record(journal, transaction, | 558 | write_one_revoke_record(journal, transaction, log_bufs, |
557 | &descriptor, &offset, | 559 | &descriptor, &offset, |
558 | record, write_op); | 560 | record, write_op); |
559 | count++; | 561 | count++; |
@@ -573,13 +575,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal, | |||
573 | 575 | ||
574 | static void write_one_revoke_record(journal_t *journal, | 576 | static void write_one_revoke_record(journal_t *journal, |
575 | transaction_t *transaction, | 577 | transaction_t *transaction, |
576 | struct journal_head **descriptorp, | 578 | struct list_head *log_bufs, |
579 | struct buffer_head **descriptorp, | ||
577 | int *offsetp, | 580 | int *offsetp, |
578 | struct jbd2_revoke_record_s *record, | 581 | struct jbd2_revoke_record_s *record, |
579 | int write_op) | 582 | int write_op) |
580 | { | 583 | { |
581 | int csum_size = 0; | 584 | int csum_size = 0; |
582 | struct journal_head *descriptor; | 585 | struct buffer_head *descriptor; |
583 | int offset; | 586 | int offset; |
584 | journal_header_t *header; | 587 | journal_header_t *header; |
585 | 588 | ||
@@ -609,26 +612,26 @@ static void write_one_revoke_record(journal_t *journal, | |||
609 | descriptor = jbd2_journal_get_descriptor_buffer(journal); | 612 | descriptor = jbd2_journal_get_descriptor_buffer(journal); |
610 | if (!descriptor) | 613 | if (!descriptor) |
611 | return; | 614 | return; |
612 | header = (journal_header_t *) &jh2bh(descriptor)->b_data[0]; | 615 | header = (journal_header_t *)descriptor->b_data; |
613 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 616 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
614 | header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK); | 617 | header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK); |
615 | header->h_sequence = cpu_to_be32(transaction->t_tid); | 618 | header->h_sequence = cpu_to_be32(transaction->t_tid); |
616 | 619 | ||
617 | /* Record it so that we can wait for IO completion later */ | 620 | /* Record it so that we can wait for IO completion later */ |
618 | JBUFFER_TRACE(descriptor, "file as BJ_LogCtl"); | 621 | BUFFER_TRACE(descriptor, "file in log_bufs"); |
619 | jbd2_journal_file_buffer(descriptor, transaction, BJ_LogCtl); | 622 | jbd2_file_log_bh(log_bufs, descriptor); |
620 | 623 | ||
621 | offset = sizeof(jbd2_journal_revoke_header_t); | 624 | offset = sizeof(jbd2_journal_revoke_header_t); |
622 | *descriptorp = descriptor; | 625 | *descriptorp = descriptor; |
623 | } | 626 | } |
624 | 627 | ||
625 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { | 628 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { |
626 | * ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) = | 629 | * ((__be64 *)(&descriptor->b_data[offset])) = |
627 | cpu_to_be64(record->blocknr); | 630 | cpu_to_be64(record->blocknr); |
628 | offset += 8; | 631 | offset += 8; |
629 | 632 | ||
630 | } else { | 633 | } else { |
631 | * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = | 634 | * ((__be32 *)(&descriptor->b_data[offset])) = |
632 | cpu_to_be32(record->blocknr); | 635 | cpu_to_be32(record->blocknr); |
633 | offset += 4; | 636 | offset += 4; |
634 | } | 637 | } |
@@ -636,8 +639,7 @@ static void write_one_revoke_record(journal_t *journal, | |||
636 | *offsetp = offset; | 639 | *offsetp = offset; |
637 | } | 640 | } |
638 | 641 | ||
639 | static void jbd2_revoke_csum_set(journal_t *j, | 642 | static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh) |
640 | struct journal_head *descriptor) | ||
641 | { | 643 | { |
642 | struct jbd2_journal_revoke_tail *tail; | 644 | struct jbd2_journal_revoke_tail *tail; |
643 | __u32 csum; | 645 | __u32 csum; |
@@ -645,12 +647,10 @@ static void jbd2_revoke_csum_set(journal_t *j, | |||
645 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 647 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
646 | return; | 648 | return; |
647 | 649 | ||
648 | tail = (struct jbd2_journal_revoke_tail *) | 650 | tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - |
649 | (jh2bh(descriptor)->b_data + j->j_blocksize - | ||
650 | sizeof(struct jbd2_journal_revoke_tail)); | 651 | sizeof(struct jbd2_journal_revoke_tail)); |
651 | tail->r_checksum = 0; | 652 | tail->r_checksum = 0; |
652 | csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, | 653 | csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); |
653 | j->j_blocksize); | ||
654 | tail->r_checksum = cpu_to_be32(csum); | 654 | tail->r_checksum = cpu_to_be32(csum); |
655 | } | 655 | } |
656 | 656 | ||
@@ -662,25 +662,24 @@ static void jbd2_revoke_csum_set(journal_t *j, | |||
662 | */ | 662 | */ |
663 | 663 | ||
664 | static void flush_descriptor(journal_t *journal, | 664 | static void flush_descriptor(journal_t *journal, |
665 | struct journal_head *descriptor, | 665 | struct buffer_head *descriptor, |
666 | int offset, int write_op) | 666 | int offset, int write_op) |
667 | { | 667 | { |
668 | jbd2_journal_revoke_header_t *header; | 668 | jbd2_journal_revoke_header_t *header; |
669 | struct buffer_head *bh = jh2bh(descriptor); | ||
670 | 669 | ||
671 | if (is_journal_aborted(journal)) { | 670 | if (is_journal_aborted(journal)) { |
672 | put_bh(bh); | 671 | put_bh(descriptor); |
673 | return; | 672 | return; |
674 | } | 673 | } |
675 | 674 | ||
676 | header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; | 675 | header = (jbd2_journal_revoke_header_t *)descriptor->b_data; |
677 | header->r_count = cpu_to_be32(offset); | 676 | header->r_count = cpu_to_be32(offset); |
678 | jbd2_revoke_csum_set(journal, descriptor); | 677 | jbd2_revoke_csum_set(journal, descriptor); |
679 | 678 | ||
680 | set_buffer_jwrite(bh); | 679 | set_buffer_jwrite(descriptor); |
681 | BUFFER_TRACE(bh, "write"); | 680 | BUFFER_TRACE(descriptor, "write"); |
682 | set_buffer_dirty(bh); | 681 | set_buffer_dirty(descriptor); |
683 | write_dirty_buffer(bh, write_op); | 682 | write_dirty_buffer(descriptor, write_op); |
684 | } | 683 | } |
685 | #endif | 684 | #endif |
686 | 685 | ||
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 10f524c59ea8..7aa9a32573bb 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -89,7 +89,8 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
89 | transaction->t_expires = jiffies + journal->j_commit_interval; | 89 | transaction->t_expires = jiffies + journal->j_commit_interval; |
90 | spin_lock_init(&transaction->t_handle_lock); | 90 | spin_lock_init(&transaction->t_handle_lock); |
91 | atomic_set(&transaction->t_updates, 0); | 91 | atomic_set(&transaction->t_updates, 0); |
92 | atomic_set(&transaction->t_outstanding_credits, 0); | 92 | atomic_set(&transaction->t_outstanding_credits, |
93 | atomic_read(&journal->j_reserved_credits)); | ||
93 | atomic_set(&transaction->t_handle_count, 0); | 94 | atomic_set(&transaction->t_handle_count, 0); |
94 | INIT_LIST_HEAD(&transaction->t_inode_list); | 95 | INIT_LIST_HEAD(&transaction->t_inode_list); |
95 | INIT_LIST_HEAD(&transaction->t_private_list); | 96 | INIT_LIST_HEAD(&transaction->t_private_list); |
@@ -141,6 +142,112 @@ static inline void update_t_max_wait(transaction_t *transaction, | |||
141 | } | 142 | } |
142 | 143 | ||
143 | /* | 144 | /* |
145 | * Wait until running transaction passes T_LOCKED state. Also starts the commit | ||
146 | * if needed. The function expects running transaction to exist and releases | ||
147 | * j_state_lock. | ||
148 | */ | ||
149 | static void wait_transaction_locked(journal_t *journal) | ||
150 | __releases(journal->j_state_lock) | ||
151 | { | ||
152 | DEFINE_WAIT(wait); | ||
153 | int need_to_start; | ||
154 | tid_t tid = journal->j_running_transaction->t_tid; | ||
155 | |||
156 | prepare_to_wait(&journal->j_wait_transaction_locked, &wait, | ||
157 | TASK_UNINTERRUPTIBLE); | ||
158 | need_to_start = !tid_geq(journal->j_commit_request, tid); | ||
159 | read_unlock(&journal->j_state_lock); | ||
160 | if (need_to_start) | ||
161 | jbd2_log_start_commit(journal, tid); | ||
162 | schedule(); | ||
163 | finish_wait(&journal->j_wait_transaction_locked, &wait); | ||
164 | } | ||
165 | |||
166 | static void sub_reserved_credits(journal_t *journal, int blocks) | ||
167 | { | ||
168 | atomic_sub(blocks, &journal->j_reserved_credits); | ||
169 | wake_up(&journal->j_wait_reserved); | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Wait until we can add credits for handle to the running transaction. Called | ||
174 | * with j_state_lock held for reading. Returns 0 if handle joined the running | ||
175 | * transaction. Returns 1 if we had to wait, j_state_lock is dropped, and | ||
176 | * caller must retry. | ||
177 | */ | ||
178 | static int add_transaction_credits(journal_t *journal, int blocks, | ||
179 | int rsv_blocks) | ||
180 | { | ||
181 | transaction_t *t = journal->j_running_transaction; | ||
182 | int needed; | ||
183 | int total = blocks + rsv_blocks; | ||
184 | |||
185 | /* | ||
186 | * If the current transaction is locked down for commit, wait | ||
187 | * for the lock to be released. | ||
188 | */ | ||
189 | if (t->t_state == T_LOCKED) { | ||
190 | wait_transaction_locked(journal); | ||
191 | return 1; | ||
192 | } | ||
193 | |||
194 | /* | ||
195 | * If there is not enough space left in the log to write all | ||
196 | * potential buffers requested by this operation, we need to | ||
197 | * stall pending a log checkpoint to free some more log space. | ||
198 | */ | ||
199 | needed = atomic_add_return(total, &t->t_outstanding_credits); | ||
200 | if (needed > journal->j_max_transaction_buffers) { | ||
201 | /* | ||
202 | * If the current transaction is already too large, | ||
203 | * then start to commit it: we can then go back and | ||
204 | * attach this handle to a new transaction. | ||
205 | */ | ||
206 | atomic_sub(total, &t->t_outstanding_credits); | ||
207 | wait_transaction_locked(journal); | ||
208 | return 1; | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * The commit code assumes that it can get enough log space | ||
213 | * without forcing a checkpoint. This is *critical* for | ||
214 | * correctness: a checkpoint of a buffer which is also | ||
215 | * associated with a committing transaction creates a deadlock, | ||
216 | * so commit simply cannot force through checkpoints. | ||
217 | * | ||
218 | * We must therefore ensure the necessary space in the journal | ||
219 | * *before* starting to dirty potentially checkpointed buffers | ||
220 | * in the new transaction. | ||
221 | */ | ||
222 | if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) { | ||
223 | atomic_sub(total, &t->t_outstanding_credits); | ||
224 | read_unlock(&journal->j_state_lock); | ||
225 | write_lock(&journal->j_state_lock); | ||
226 | if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) | ||
227 | __jbd2_log_wait_for_space(journal); | ||
228 | write_unlock(&journal->j_state_lock); | ||
229 | return 1; | ||
230 | } | ||
231 | |||
232 | /* No reservation? We are done... */ | ||
233 | if (!rsv_blocks) | ||
234 | return 0; | ||
235 | |||
236 | needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits); | ||
237 | /* We allow at most half of a transaction to be reserved */ | ||
238 | if (needed > journal->j_max_transaction_buffers / 2) { | ||
239 | sub_reserved_credits(journal, rsv_blocks); | ||
240 | atomic_sub(total, &t->t_outstanding_credits); | ||
241 | read_unlock(&journal->j_state_lock); | ||
242 | wait_event(journal->j_wait_reserved, | ||
243 | atomic_read(&journal->j_reserved_credits) + rsv_blocks | ||
244 | <= journal->j_max_transaction_buffers / 2); | ||
245 | return 1; | ||
246 | } | ||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | /* | ||
144 | * start_this_handle: Given a handle, deal with any locking or stalling | 251 | * start_this_handle: Given a handle, deal with any locking or stalling |
145 | * needed to make sure that there is enough journal space for the handle | 252 | * needed to make sure that there is enough journal space for the handle |
146 | * to begin. Attach the handle to a transaction and set up the | 253 | * to begin. Attach the handle to a transaction and set up the |
@@ -151,18 +258,24 @@ static int start_this_handle(journal_t *journal, handle_t *handle, | |||
151 | gfp_t gfp_mask) | 258 | gfp_t gfp_mask) |
152 | { | 259 | { |
153 | transaction_t *transaction, *new_transaction = NULL; | 260 | transaction_t *transaction, *new_transaction = NULL; |
154 | tid_t tid; | 261 | int blocks = handle->h_buffer_credits; |
155 | int needed, need_to_start; | 262 | int rsv_blocks = 0; |
156 | int nblocks = handle->h_buffer_credits; | ||
157 | unsigned long ts = jiffies; | 263 | unsigned long ts = jiffies; |
158 | 264 | ||
159 | if (nblocks > journal->j_max_transaction_buffers) { | 265 | /* |
266 | * 1/2 of transaction can be reserved so we can practically handle | ||
267 | * only 1/2 of maximum transaction size per operation | ||
268 | */ | ||
269 | if (WARN_ON(blocks > journal->j_max_transaction_buffers / 2)) { | ||
160 | printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n", | 270 | printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n", |
161 | current->comm, nblocks, | 271 | current->comm, blocks, |
162 | journal->j_max_transaction_buffers); | 272 | journal->j_max_transaction_buffers / 2); |
163 | return -ENOSPC; | 273 | return -ENOSPC; |
164 | } | 274 | } |
165 | 275 | ||
276 | if (handle->h_rsv_handle) | ||
277 | rsv_blocks = handle->h_rsv_handle->h_buffer_credits; | ||
278 | |||
166 | alloc_transaction: | 279 | alloc_transaction: |
167 | if (!journal->j_running_transaction) { | 280 | if (!journal->j_running_transaction) { |
168 | new_transaction = kmem_cache_zalloc(transaction_cache, | 281 | new_transaction = kmem_cache_zalloc(transaction_cache, |
@@ -199,8 +312,12 @@ repeat: | |||
199 | return -EROFS; | 312 | return -EROFS; |
200 | } | 313 | } |
201 | 314 | ||
202 | /* Wait on the journal's transaction barrier if necessary */ | 315 | /* |
203 | if (journal->j_barrier_count) { | 316 | * Wait on the journal's transaction barrier if necessary. Specifically |
317 | * we allow reserved handles to proceed because otherwise commit could | ||
318 | * deadlock on page writeback not being able to complete. | ||
319 | */ | ||
320 | if (!handle->h_reserved && journal->j_barrier_count) { | ||
204 | read_unlock(&journal->j_state_lock); | 321 | read_unlock(&journal->j_state_lock); |
205 | wait_event(journal->j_wait_transaction_locked, | 322 | wait_event(journal->j_wait_transaction_locked, |
206 | journal->j_barrier_count == 0); | 323 | journal->j_barrier_count == 0); |
@@ -213,7 +330,7 @@ repeat: | |||
213 | goto alloc_transaction; | 330 | goto alloc_transaction; |
214 | write_lock(&journal->j_state_lock); | 331 | write_lock(&journal->j_state_lock); |
215 | if (!journal->j_running_transaction && | 332 | if (!journal->j_running_transaction && |
216 | !journal->j_barrier_count) { | 333 | (handle->h_reserved || !journal->j_barrier_count)) { |
217 | jbd2_get_transaction(journal, new_transaction); | 334 | jbd2_get_transaction(journal, new_transaction); |
218 | new_transaction = NULL; | 335 | new_transaction = NULL; |
219 | } | 336 | } |
@@ -223,85 +340,18 @@ repeat: | |||
223 | 340 | ||
224 | transaction = journal->j_running_transaction; | 341 | transaction = journal->j_running_transaction; |
225 | 342 | ||
226 | /* | 343 | if (!handle->h_reserved) { |
227 | * If the current transaction is locked down for commit, wait for the | 344 | /* We may have dropped j_state_lock - restart in that case */ |
228 | * lock to be released. | 345 | if (add_transaction_credits(journal, blocks, rsv_blocks)) |
229 | */ | 346 | goto repeat; |
230 | if (transaction->t_state == T_LOCKED) { | 347 | } else { |
231 | DEFINE_WAIT(wait); | ||
232 | |||
233 | prepare_to_wait(&journal->j_wait_transaction_locked, | ||
234 | &wait, TASK_UNINTERRUPTIBLE); | ||
235 | read_unlock(&journal->j_state_lock); | ||
236 | schedule(); | ||
237 | finish_wait(&journal->j_wait_transaction_locked, &wait); | ||
238 | goto repeat; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * If there is not enough space left in the log to write all potential | ||
243 | * buffers requested by this operation, we need to stall pending a log | ||
244 | * checkpoint to free some more log space. | ||
245 | */ | ||
246 | needed = atomic_add_return(nblocks, | ||
247 | &transaction->t_outstanding_credits); | ||
248 | |||
249 | if (needed > journal->j_max_transaction_buffers) { | ||
250 | /* | 348 | /* |
251 | * If the current transaction is already too large, then start | 349 | * We have handle reserved so we are allowed to join T_LOCKED |
252 | * to commit it: we can then go back and attach this handle to | 350 | * transaction and we don't have to check for transaction size |
253 | * a new transaction. | 351 | * and journal space. |
254 | */ | 352 | */ |
255 | DEFINE_WAIT(wait); | 353 | sub_reserved_credits(journal, blocks); |
256 | 354 | handle->h_reserved = 0; | |
257 | jbd_debug(2, "Handle %p starting new commit...\n", handle); | ||
258 | atomic_sub(nblocks, &transaction->t_outstanding_credits); | ||
259 | prepare_to_wait(&journal->j_wait_transaction_locked, &wait, | ||
260 | TASK_UNINTERRUPTIBLE); | ||
261 | tid = transaction->t_tid; | ||
262 | need_to_start = !tid_geq(journal->j_commit_request, tid); | ||
263 | read_unlock(&journal->j_state_lock); | ||
264 | if (need_to_start) | ||
265 | jbd2_log_start_commit(journal, tid); | ||
266 | schedule(); | ||
267 | finish_wait(&journal->j_wait_transaction_locked, &wait); | ||
268 | goto repeat; | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * The commit code assumes that it can get enough log space | ||
273 | * without forcing a checkpoint. This is *critical* for | ||
274 | * correctness: a checkpoint of a buffer which is also | ||
275 | * associated with a committing transaction creates a deadlock, | ||
276 | * so commit simply cannot force through checkpoints. | ||
277 | * | ||
278 | * We must therefore ensure the necessary space in the journal | ||
279 | * *before* starting to dirty potentially checkpointed buffers | ||
280 | * in the new transaction. | ||
281 | * | ||
282 | * The worst part is, any transaction currently committing can | ||
283 | * reduce the free space arbitrarily. Be careful to account for | ||
284 | * those buffers when checkpointing. | ||
285 | */ | ||
286 | |||
287 | /* | ||
288 | * @@@ AKPM: This seems rather over-defensive. We're giving commit | ||
289 | * a _lot_ of headroom: 1/4 of the journal plus the size of | ||
290 | * the committing transaction. Really, we only need to give it | ||
291 | * committing_transaction->t_outstanding_credits plus "enough" for | ||
292 | * the log control blocks. | ||
293 | * Also, this test is inconsistent with the matching one in | ||
294 | * jbd2_journal_extend(). | ||
295 | */ | ||
296 | if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { | ||
297 | jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); | ||
298 | atomic_sub(nblocks, &transaction->t_outstanding_credits); | ||
299 | read_unlock(&journal->j_state_lock); | ||
300 | write_lock(&journal->j_state_lock); | ||
301 | if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) | ||
302 | __jbd2_log_wait_for_space(journal); | ||
303 | write_unlock(&journal->j_state_lock); | ||
304 | goto repeat; | ||
305 | } | 355 | } |
306 | 356 | ||
307 | /* OK, account for the buffers that this operation expects to | 357 | /* OK, account for the buffers that this operation expects to |
@@ -309,15 +359,16 @@ repeat: | |||
309 | */ | 359 | */ |
310 | update_t_max_wait(transaction, ts); | 360 | update_t_max_wait(transaction, ts); |
311 | handle->h_transaction = transaction; | 361 | handle->h_transaction = transaction; |
312 | handle->h_requested_credits = nblocks; | 362 | handle->h_requested_credits = blocks; |
313 | handle->h_start_jiffies = jiffies; | 363 | handle->h_start_jiffies = jiffies; |
314 | atomic_inc(&transaction->t_updates); | 364 | atomic_inc(&transaction->t_updates); |
315 | atomic_inc(&transaction->t_handle_count); | 365 | atomic_inc(&transaction->t_handle_count); |
316 | jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", | 366 | jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n", |
317 | handle, nblocks, | 367 | handle, blocks, |
318 | atomic_read(&transaction->t_outstanding_credits), | 368 | atomic_read(&transaction->t_outstanding_credits), |
319 | __jbd2_log_space_left(journal)); | 369 | jbd2_log_space_left(journal)); |
320 | read_unlock(&journal->j_state_lock); | 370 | read_unlock(&journal->j_state_lock); |
371 | current->journal_info = handle; | ||
321 | 372 | ||
322 | lock_map_acquire(&handle->h_lockdep_map); | 373 | lock_map_acquire(&handle->h_lockdep_map); |
323 | jbd2_journal_free_transaction(new_transaction); | 374 | jbd2_journal_free_transaction(new_transaction); |
@@ -348,16 +399,21 @@ static handle_t *new_handle(int nblocks) | |||
348 | * | 399 | * |
349 | * We make sure that the transaction can guarantee at least nblocks of | 400 | * We make sure that the transaction can guarantee at least nblocks of |
350 | * modified buffers in the log. We block until the log can guarantee | 401 | * modified buffers in the log. We block until the log can guarantee |
351 | * that much space. | 402 | * that much space. Additionally, if rsv_blocks > 0, we also create another |
352 | * | 403 | * handle with rsv_blocks reserved blocks in the journal. This handle is |
353 | * This function is visible to journal users (like ext3fs), so is not | 404 | * is stored in h_rsv_handle. It is not attached to any particular transaction |
354 | * called with the journal already locked. | 405 | * and thus doesn't block transaction commit. If the caller uses this reserved |
406 | * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop() | ||
407 | * on the parent handle will dispose the reserved one. Reserved handle has to | ||
408 | * be converted to a normal handle using jbd2_journal_start_reserved() before | ||
409 | * it can be used. | ||
355 | * | 410 | * |
356 | * Return a pointer to a newly allocated handle, or an ERR_PTR() value | 411 | * Return a pointer to a newly allocated handle, or an ERR_PTR() value |
357 | * on failure. | 412 | * on failure. |
358 | */ | 413 | */ |
359 | handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask, | 414 | handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks, |
360 | unsigned int type, unsigned int line_no) | 415 | gfp_t gfp_mask, unsigned int type, |
416 | unsigned int line_no) | ||
361 | { | 417 | { |
362 | handle_t *handle = journal_current_handle(); | 418 | handle_t *handle = journal_current_handle(); |
363 | int err; | 419 | int err; |
@@ -374,13 +430,24 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask, | |||
374 | handle = new_handle(nblocks); | 430 | handle = new_handle(nblocks); |
375 | if (!handle) | 431 | if (!handle) |
376 | return ERR_PTR(-ENOMEM); | 432 | return ERR_PTR(-ENOMEM); |
433 | if (rsv_blocks) { | ||
434 | handle_t *rsv_handle; | ||
377 | 435 | ||
378 | current->journal_info = handle; | 436 | rsv_handle = new_handle(rsv_blocks); |
437 | if (!rsv_handle) { | ||
438 | jbd2_free_handle(handle); | ||
439 | return ERR_PTR(-ENOMEM); | ||
440 | } | ||
441 | rsv_handle->h_reserved = 1; | ||
442 | rsv_handle->h_journal = journal; | ||
443 | handle->h_rsv_handle = rsv_handle; | ||
444 | } | ||
379 | 445 | ||
380 | err = start_this_handle(journal, handle, gfp_mask); | 446 | err = start_this_handle(journal, handle, gfp_mask); |
381 | if (err < 0) { | 447 | if (err < 0) { |
448 | if (handle->h_rsv_handle) | ||
449 | jbd2_free_handle(handle->h_rsv_handle); | ||
382 | jbd2_free_handle(handle); | 450 | jbd2_free_handle(handle); |
383 | current->journal_info = NULL; | ||
384 | return ERR_PTR(err); | 451 | return ERR_PTR(err); |
385 | } | 452 | } |
386 | handle->h_type = type; | 453 | handle->h_type = type; |
@@ -395,10 +462,65 @@ EXPORT_SYMBOL(jbd2__journal_start); | |||
395 | 462 | ||
396 | handle_t *jbd2_journal_start(journal_t *journal, int nblocks) | 463 | handle_t *jbd2_journal_start(journal_t *journal, int nblocks) |
397 | { | 464 | { |
398 | return jbd2__journal_start(journal, nblocks, GFP_NOFS, 0, 0); | 465 | return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0); |
399 | } | 466 | } |
400 | EXPORT_SYMBOL(jbd2_journal_start); | 467 | EXPORT_SYMBOL(jbd2_journal_start); |
401 | 468 | ||
469 | void jbd2_journal_free_reserved(handle_t *handle) | ||
470 | { | ||
471 | journal_t *journal = handle->h_journal; | ||
472 | |||
473 | WARN_ON(!handle->h_reserved); | ||
474 | sub_reserved_credits(journal, handle->h_buffer_credits); | ||
475 | jbd2_free_handle(handle); | ||
476 | } | ||
477 | EXPORT_SYMBOL(jbd2_journal_free_reserved); | ||
478 | |||
479 | /** | ||
480 | * int jbd2_journal_start_reserved(handle_t *handle) - start reserved handle | ||
481 | * @handle: handle to start | ||
482 | * | ||
483 | * Start handle that has been previously reserved with jbd2_journal_reserve(). | ||
484 | * This attaches @handle to the running transaction (or creates one if there's | ||
485 | * not transaction running). Unlike jbd2_journal_start() this function cannot | ||
486 | * block on journal commit, checkpointing, or similar stuff. It can block on | ||
487 | * memory allocation or frozen journal though. | ||
488 | * | ||
489 | * Return 0 on success, non-zero on error - handle is freed in that case. | ||
490 | */ | ||
491 | int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, | ||
492 | unsigned int line_no) | ||
493 | { | ||
494 | journal_t *journal = handle->h_journal; | ||
495 | int ret = -EIO; | ||
496 | |||
497 | if (WARN_ON(!handle->h_reserved)) { | ||
498 | /* Someone passed in normal handle? Just stop it. */ | ||
499 | jbd2_journal_stop(handle); | ||
500 | return ret; | ||
501 | } | ||
502 | /* | ||
503 | * Usefulness of mixing of reserved and unreserved handles is | ||
504 | * questionable. So far nobody seems to need it so just error out. | ||
505 | */ | ||
506 | if (WARN_ON(current->journal_info)) { | ||
507 | jbd2_journal_free_reserved(handle); | ||
508 | return ret; | ||
509 | } | ||
510 | |||
511 | handle->h_journal = NULL; | ||
512 | /* | ||
513 | * GFP_NOFS is here because callers are likely from writeback or | ||
514 | * similarly constrained call sites | ||
515 | */ | ||
516 | ret = start_this_handle(journal, handle, GFP_NOFS); | ||
517 | if (ret < 0) | ||
518 | jbd2_journal_free_reserved(handle); | ||
519 | handle->h_type = type; | ||
520 | handle->h_line_no = line_no; | ||
521 | return ret; | ||
522 | } | ||
523 | EXPORT_SYMBOL(jbd2_journal_start_reserved); | ||
402 | 524 | ||
403 | /** | 525 | /** |
404 | * int jbd2_journal_extend() - extend buffer credits. | 526 | * int jbd2_journal_extend() - extend buffer credits. |
@@ -423,49 +545,53 @@ EXPORT_SYMBOL(jbd2_journal_start); | |||
423 | int jbd2_journal_extend(handle_t *handle, int nblocks) | 545 | int jbd2_journal_extend(handle_t *handle, int nblocks) |
424 | { | 546 | { |
425 | transaction_t *transaction = handle->h_transaction; | 547 | transaction_t *transaction = handle->h_transaction; |
426 | journal_t *journal = transaction->t_journal; | 548 | journal_t *journal; |
427 | int result; | 549 | int result; |
428 | int wanted; | 550 | int wanted; |
429 | 551 | ||
430 | result = -EIO; | 552 | WARN_ON(!transaction); |
431 | if (is_handle_aborted(handle)) | 553 | if (is_handle_aborted(handle)) |
432 | goto out; | 554 | return -EROFS; |
555 | journal = transaction->t_journal; | ||
433 | 556 | ||
434 | result = 1; | 557 | result = 1; |
435 | 558 | ||
436 | read_lock(&journal->j_state_lock); | 559 | read_lock(&journal->j_state_lock); |
437 | 560 | ||
438 | /* Don't extend a locked-down transaction! */ | 561 | /* Don't extend a locked-down transaction! */ |
439 | if (handle->h_transaction->t_state != T_RUNNING) { | 562 | if (transaction->t_state != T_RUNNING) { |
440 | jbd_debug(3, "denied handle %p %d blocks: " | 563 | jbd_debug(3, "denied handle %p %d blocks: " |
441 | "transaction not running\n", handle, nblocks); | 564 | "transaction not running\n", handle, nblocks); |
442 | goto error_out; | 565 | goto error_out; |
443 | } | 566 | } |
444 | 567 | ||
445 | spin_lock(&transaction->t_handle_lock); | 568 | spin_lock(&transaction->t_handle_lock); |
446 | wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks; | 569 | wanted = atomic_add_return(nblocks, |
570 | &transaction->t_outstanding_credits); | ||
447 | 571 | ||
448 | if (wanted > journal->j_max_transaction_buffers) { | 572 | if (wanted > journal->j_max_transaction_buffers) { |
449 | jbd_debug(3, "denied handle %p %d blocks: " | 573 | jbd_debug(3, "denied handle %p %d blocks: " |
450 | "transaction too large\n", handle, nblocks); | 574 | "transaction too large\n", handle, nblocks); |
575 | atomic_sub(nblocks, &transaction->t_outstanding_credits); | ||
451 | goto unlock; | 576 | goto unlock; |
452 | } | 577 | } |
453 | 578 | ||
454 | if (wanted > __jbd2_log_space_left(journal)) { | 579 | if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) > |
580 | jbd2_log_space_left(journal)) { | ||
455 | jbd_debug(3, "denied handle %p %d blocks: " | 581 | jbd_debug(3, "denied handle %p %d blocks: " |
456 | "insufficient log space\n", handle, nblocks); | 582 | "insufficient log space\n", handle, nblocks); |
583 | atomic_sub(nblocks, &transaction->t_outstanding_credits); | ||
457 | goto unlock; | 584 | goto unlock; |
458 | } | 585 | } |
459 | 586 | ||
460 | trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, | 587 | trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, |
461 | handle->h_transaction->t_tid, | 588 | transaction->t_tid, |
462 | handle->h_type, handle->h_line_no, | 589 | handle->h_type, handle->h_line_no, |
463 | handle->h_buffer_credits, | 590 | handle->h_buffer_credits, |
464 | nblocks); | 591 | nblocks); |
465 | 592 | ||
466 | handle->h_buffer_credits += nblocks; | 593 | handle->h_buffer_credits += nblocks; |
467 | handle->h_requested_credits += nblocks; | 594 | handle->h_requested_credits += nblocks; |
468 | atomic_add(nblocks, &transaction->t_outstanding_credits); | ||
469 | result = 0; | 595 | result = 0; |
470 | 596 | ||
471 | jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); | 597 | jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); |
@@ -473,7 +599,6 @@ unlock: | |||
473 | spin_unlock(&transaction->t_handle_lock); | 599 | spin_unlock(&transaction->t_handle_lock); |
474 | error_out: | 600 | error_out: |
475 | read_unlock(&journal->j_state_lock); | 601 | read_unlock(&journal->j_state_lock); |
476 | out: | ||
477 | return result; | 602 | return result; |
478 | } | 603 | } |
479 | 604 | ||
@@ -490,19 +615,22 @@ out: | |||
490 | * to a running handle, a call to jbd2_journal_restart will commit the | 615 | * to a running handle, a call to jbd2_journal_restart will commit the |
491 | * handle's transaction so far and reattach the handle to a new | 616 | * handle's transaction so far and reattach the handle to a new |
492 | * transaction capabable of guaranteeing the requested number of | 617 | * transaction capabable of guaranteeing the requested number of |
493 | * credits. | 618 | * credits. We preserve reserved handle if there's any attached to the |
619 | * passed in handle. | ||
494 | */ | 620 | */ |
495 | int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) | 621 | int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) |
496 | { | 622 | { |
497 | transaction_t *transaction = handle->h_transaction; | 623 | transaction_t *transaction = handle->h_transaction; |
498 | journal_t *journal = transaction->t_journal; | 624 | journal_t *journal; |
499 | tid_t tid; | 625 | tid_t tid; |
500 | int need_to_start, ret; | 626 | int need_to_start, ret; |
501 | 627 | ||
628 | WARN_ON(!transaction); | ||
502 | /* If we've had an abort of any type, don't even think about | 629 | /* If we've had an abort of any type, don't even think about |
503 | * actually doing the restart! */ | 630 | * actually doing the restart! */ |
504 | if (is_handle_aborted(handle)) | 631 | if (is_handle_aborted(handle)) |
505 | return 0; | 632 | return 0; |
633 | journal = transaction->t_journal; | ||
506 | 634 | ||
507 | /* | 635 | /* |
508 | * First unlink the handle from its current transaction, and start the | 636 | * First unlink the handle from its current transaction, and start the |
@@ -515,12 +643,18 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) | |||
515 | spin_lock(&transaction->t_handle_lock); | 643 | spin_lock(&transaction->t_handle_lock); |
516 | atomic_sub(handle->h_buffer_credits, | 644 | atomic_sub(handle->h_buffer_credits, |
517 | &transaction->t_outstanding_credits); | 645 | &transaction->t_outstanding_credits); |
646 | if (handle->h_rsv_handle) { | ||
647 | sub_reserved_credits(journal, | ||
648 | handle->h_rsv_handle->h_buffer_credits); | ||
649 | } | ||
518 | if (atomic_dec_and_test(&transaction->t_updates)) | 650 | if (atomic_dec_and_test(&transaction->t_updates)) |
519 | wake_up(&journal->j_wait_updates); | 651 | wake_up(&journal->j_wait_updates); |
652 | tid = transaction->t_tid; | ||
520 | spin_unlock(&transaction->t_handle_lock); | 653 | spin_unlock(&transaction->t_handle_lock); |
654 | handle->h_transaction = NULL; | ||
655 | current->journal_info = NULL; | ||
521 | 656 | ||
522 | jbd_debug(2, "restarting handle %p\n", handle); | 657 | jbd_debug(2, "restarting handle %p\n", handle); |
523 | tid = transaction->t_tid; | ||
524 | need_to_start = !tid_geq(journal->j_commit_request, tid); | 658 | need_to_start = !tid_geq(journal->j_commit_request, tid); |
525 | read_unlock(&journal->j_state_lock); | 659 | read_unlock(&journal->j_state_lock); |
526 | if (need_to_start) | 660 | if (need_to_start) |
@@ -557,6 +691,14 @@ void jbd2_journal_lock_updates(journal_t *journal) | |||
557 | write_lock(&journal->j_state_lock); | 691 | write_lock(&journal->j_state_lock); |
558 | ++journal->j_barrier_count; | 692 | ++journal->j_barrier_count; |
559 | 693 | ||
694 | /* Wait until there are no reserved handles */ | ||
695 | if (atomic_read(&journal->j_reserved_credits)) { | ||
696 | write_unlock(&journal->j_state_lock); | ||
697 | wait_event(journal->j_wait_reserved, | ||
698 | atomic_read(&journal->j_reserved_credits) == 0); | ||
699 | write_lock(&journal->j_state_lock); | ||
700 | } | ||
701 | |||
560 | /* Wait until there are no running updates */ | 702 | /* Wait until there are no running updates */ |
561 | while (1) { | 703 | while (1) { |
562 | transaction_t *transaction = journal->j_running_transaction; | 704 | transaction_t *transaction = journal->j_running_transaction; |
@@ -619,6 +761,12 @@ static void warn_dirty_buffer(struct buffer_head *bh) | |||
619 | bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); | 761 | bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); |
620 | } | 762 | } |
621 | 763 | ||
764 | static int sleep_on_shadow_bh(void *word) | ||
765 | { | ||
766 | io_schedule(); | ||
767 | return 0; | ||
768 | } | ||
769 | |||
622 | /* | 770 | /* |
623 | * If the buffer is already part of the current transaction, then there | 771 | * If the buffer is already part of the current transaction, then there |
624 | * is nothing we need to do. If it is already part of a prior | 772 | * is nothing we need to do. If it is already part of a prior |
@@ -634,17 +782,16 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, | |||
634 | int force_copy) | 782 | int force_copy) |
635 | { | 783 | { |
636 | struct buffer_head *bh; | 784 | struct buffer_head *bh; |
637 | transaction_t *transaction; | 785 | transaction_t *transaction = handle->h_transaction; |
638 | journal_t *journal; | 786 | journal_t *journal; |
639 | int error; | 787 | int error; |
640 | char *frozen_buffer = NULL; | 788 | char *frozen_buffer = NULL; |
641 | int need_copy = 0; | 789 | int need_copy = 0; |
642 | unsigned long start_lock, time_lock; | 790 | unsigned long start_lock, time_lock; |
643 | 791 | ||
792 | WARN_ON(!transaction); | ||
644 | if (is_handle_aborted(handle)) | 793 | if (is_handle_aborted(handle)) |
645 | return -EROFS; | 794 | return -EROFS; |
646 | |||
647 | transaction = handle->h_transaction; | ||
648 | journal = transaction->t_journal; | 795 | journal = transaction->t_journal; |
649 | 796 | ||
650 | jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); | 797 | jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); |
@@ -754,41 +901,29 @@ repeat: | |||
754 | * journaled. If the primary copy is already going to | 901 | * journaled. If the primary copy is already going to |
755 | * disk then we cannot do copy-out here. */ | 902 | * disk then we cannot do copy-out here. */ |
756 | 903 | ||
757 | if (jh->b_jlist == BJ_Shadow) { | 904 | if (buffer_shadow(bh)) { |
758 | DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); | ||
759 | wait_queue_head_t *wqh; | ||
760 | |||
761 | wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); | ||
762 | |||
763 | JBUFFER_TRACE(jh, "on shadow: sleep"); | 905 | JBUFFER_TRACE(jh, "on shadow: sleep"); |
764 | jbd_unlock_bh_state(bh); | 906 | jbd_unlock_bh_state(bh); |
765 | /* commit wakes up all shadow buffers after IO */ | 907 | wait_on_bit(&bh->b_state, BH_Shadow, |
766 | for ( ; ; ) { | 908 | sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE); |
767 | prepare_to_wait(wqh, &wait.wait, | ||
768 | TASK_UNINTERRUPTIBLE); | ||
769 | if (jh->b_jlist != BJ_Shadow) | ||
770 | break; | ||
771 | schedule(); | ||
772 | } | ||
773 | finish_wait(wqh, &wait.wait); | ||
774 | goto repeat; | 909 | goto repeat; |
775 | } | 910 | } |
776 | 911 | ||
777 | /* Only do the copy if the currently-owning transaction | 912 | /* |
778 | * still needs it. If it is on the Forget list, the | 913 | * Only do the copy if the currently-owning transaction still |
779 | * committing transaction is past that stage. The | 914 | * needs it. If buffer isn't on BJ_Metadata list, the |
780 | * buffer had better remain locked during the kmalloc, | 915 | * committing transaction is past that stage (here we use the |
781 | * but that should be true --- we hold the journal lock | 916 | * fact that BH_Shadow is set under bh_state lock together with |
782 | * still and the buffer is already on the BUF_JOURNAL | 917 | * refiling to BJ_Shadow list and at this point we know the |
783 | * list so won't be flushed. | 918 | * buffer doesn't have BH_Shadow set). |
784 | * | 919 | * |
785 | * Subtle point, though: if this is a get_undo_access, | 920 | * Subtle point, though: if this is a get_undo_access, |
786 | * then we will be relying on the frozen_data to contain | 921 | * then we will be relying on the frozen_data to contain |
787 | * the new value of the committed_data record after the | 922 | * the new value of the committed_data record after the |
788 | * transaction, so we HAVE to force the frozen_data copy | 923 | * transaction, so we HAVE to force the frozen_data copy |
789 | * in that case. */ | 924 | * in that case. |
790 | 925 | */ | |
791 | if (jh->b_jlist != BJ_Forget || force_copy) { | 926 | if (jh->b_jlist == BJ_Metadata || force_copy) { |
792 | JBUFFER_TRACE(jh, "generate frozen data"); | 927 | JBUFFER_TRACE(jh, "generate frozen data"); |
793 | if (!frozen_buffer) { | 928 | if (!frozen_buffer) { |
794 | JBUFFER_TRACE(jh, "allocate memory for buffer"); | 929 | JBUFFER_TRACE(jh, "allocate memory for buffer"); |
@@ -915,14 +1050,16 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) | |||
915 | int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) | 1050 | int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) |
916 | { | 1051 | { |
917 | transaction_t *transaction = handle->h_transaction; | 1052 | transaction_t *transaction = handle->h_transaction; |
918 | journal_t *journal = transaction->t_journal; | 1053 | journal_t *journal; |
919 | struct journal_head *jh = jbd2_journal_add_journal_head(bh); | 1054 | struct journal_head *jh = jbd2_journal_add_journal_head(bh); |
920 | int err; | 1055 | int err; |
921 | 1056 | ||
922 | jbd_debug(5, "journal_head %p\n", jh); | 1057 | jbd_debug(5, "journal_head %p\n", jh); |
1058 | WARN_ON(!transaction); | ||
923 | err = -EROFS; | 1059 | err = -EROFS; |
924 | if (is_handle_aborted(handle)) | 1060 | if (is_handle_aborted(handle)) |
925 | goto out; | 1061 | goto out; |
1062 | journal = transaction->t_journal; | ||
926 | err = 0; | 1063 | err = 0; |
927 | 1064 | ||
928 | JBUFFER_TRACE(jh, "entry"); | 1065 | JBUFFER_TRACE(jh, "entry"); |
@@ -1128,12 +1265,14 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh, | |||
1128 | int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | 1265 | int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) |
1129 | { | 1266 | { |
1130 | transaction_t *transaction = handle->h_transaction; | 1267 | transaction_t *transaction = handle->h_transaction; |
1131 | journal_t *journal = transaction->t_journal; | 1268 | journal_t *journal; |
1132 | struct journal_head *jh; | 1269 | struct journal_head *jh; |
1133 | int ret = 0; | 1270 | int ret = 0; |
1134 | 1271 | ||
1272 | WARN_ON(!transaction); | ||
1135 | if (is_handle_aborted(handle)) | 1273 | if (is_handle_aborted(handle)) |
1136 | goto out; | 1274 | return -EROFS; |
1275 | journal = transaction->t_journal; | ||
1137 | jh = jbd2_journal_grab_journal_head(bh); | 1276 | jh = jbd2_journal_grab_journal_head(bh); |
1138 | if (!jh) { | 1277 | if (!jh) { |
1139 | ret = -EUCLEAN; | 1278 | ret = -EUCLEAN; |
@@ -1227,7 +1366,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1227 | 1366 | ||
1228 | JBUFFER_TRACE(jh, "file as BJ_Metadata"); | 1367 | JBUFFER_TRACE(jh, "file as BJ_Metadata"); |
1229 | spin_lock(&journal->j_list_lock); | 1368 | spin_lock(&journal->j_list_lock); |
1230 | __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); | 1369 | __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata); |
1231 | spin_unlock(&journal->j_list_lock); | 1370 | spin_unlock(&journal->j_list_lock); |
1232 | out_unlock_bh: | 1371 | out_unlock_bh: |
1233 | jbd_unlock_bh_state(bh); | 1372 | jbd_unlock_bh_state(bh); |
@@ -1258,12 +1397,17 @@ out: | |||
1258 | int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | 1397 | int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) |
1259 | { | 1398 | { |
1260 | transaction_t *transaction = handle->h_transaction; | 1399 | transaction_t *transaction = handle->h_transaction; |
1261 | journal_t *journal = transaction->t_journal; | 1400 | journal_t *journal; |
1262 | struct journal_head *jh; | 1401 | struct journal_head *jh; |
1263 | int drop_reserve = 0; | 1402 | int drop_reserve = 0; |
1264 | int err = 0; | 1403 | int err = 0; |
1265 | int was_modified = 0; | 1404 | int was_modified = 0; |
1266 | 1405 | ||
1406 | WARN_ON(!transaction); | ||
1407 | if (is_handle_aborted(handle)) | ||
1408 | return -EROFS; | ||
1409 | journal = transaction->t_journal; | ||
1410 | |||
1267 | BUFFER_TRACE(bh, "entry"); | 1411 | BUFFER_TRACE(bh, "entry"); |
1268 | 1412 | ||
1269 | jbd_lock_bh_state(bh); | 1413 | jbd_lock_bh_state(bh); |
@@ -1290,7 +1434,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1290 | */ | 1434 | */ |
1291 | jh->b_modified = 0; | 1435 | jh->b_modified = 0; |
1292 | 1436 | ||
1293 | if (jh->b_transaction == handle->h_transaction) { | 1437 | if (jh->b_transaction == transaction) { |
1294 | J_ASSERT_JH(jh, !jh->b_frozen_data); | 1438 | J_ASSERT_JH(jh, !jh->b_frozen_data); |
1295 | 1439 | ||
1296 | /* If we are forgetting a buffer which is already part | 1440 | /* If we are forgetting a buffer which is already part |
@@ -1385,19 +1529,21 @@ drop: | |||
1385 | int jbd2_journal_stop(handle_t *handle) | 1529 | int jbd2_journal_stop(handle_t *handle) |
1386 | { | 1530 | { |
1387 | transaction_t *transaction = handle->h_transaction; | 1531 | transaction_t *transaction = handle->h_transaction; |
1388 | journal_t *journal = transaction->t_journal; | 1532 | journal_t *journal; |
1389 | int err, wait_for_commit = 0; | 1533 | int err = 0, wait_for_commit = 0; |
1390 | tid_t tid; | 1534 | tid_t tid; |
1391 | pid_t pid; | 1535 | pid_t pid; |
1392 | 1536 | ||
1537 | if (!transaction) | ||
1538 | goto free_and_exit; | ||
1539 | journal = transaction->t_journal; | ||
1540 | |||
1393 | J_ASSERT(journal_current_handle() == handle); | 1541 | J_ASSERT(journal_current_handle() == handle); |
1394 | 1542 | ||
1395 | if (is_handle_aborted(handle)) | 1543 | if (is_handle_aborted(handle)) |
1396 | err = -EIO; | 1544 | err = -EIO; |
1397 | else { | 1545 | else |
1398 | J_ASSERT(atomic_read(&transaction->t_updates) > 0); | 1546 | J_ASSERT(atomic_read(&transaction->t_updates) > 0); |
1399 | err = 0; | ||
1400 | } | ||
1401 | 1547 | ||
1402 | if (--handle->h_ref > 0) { | 1548 | if (--handle->h_ref > 0) { |
1403 | jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, | 1549 | jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, |
@@ -1407,7 +1553,7 @@ int jbd2_journal_stop(handle_t *handle) | |||
1407 | 1553 | ||
1408 | jbd_debug(4, "Handle %p going down\n", handle); | 1554 | jbd_debug(4, "Handle %p going down\n", handle); |
1409 | trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev, | 1555 | trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev, |
1410 | handle->h_transaction->t_tid, | 1556 | transaction->t_tid, |
1411 | handle->h_type, handle->h_line_no, | 1557 | handle->h_type, handle->h_line_no, |
1412 | jiffies - handle->h_start_jiffies, | 1558 | jiffies - handle->h_start_jiffies, |
1413 | handle->h_sync, handle->h_requested_credits, | 1559 | handle->h_sync, handle->h_requested_credits, |
@@ -1518,33 +1664,13 @@ int jbd2_journal_stop(handle_t *handle) | |||
1518 | 1664 | ||
1519 | lock_map_release(&handle->h_lockdep_map); | 1665 | lock_map_release(&handle->h_lockdep_map); |
1520 | 1666 | ||
1667 | if (handle->h_rsv_handle) | ||
1668 | jbd2_journal_free_reserved(handle->h_rsv_handle); | ||
1669 | free_and_exit: | ||
1521 | jbd2_free_handle(handle); | 1670 | jbd2_free_handle(handle); |
1522 | return err; | 1671 | return err; |
1523 | } | 1672 | } |
1524 | 1673 | ||
1525 | /** | ||
1526 | * int jbd2_journal_force_commit() - force any uncommitted transactions | ||
1527 | * @journal: journal to force | ||
1528 | * | ||
1529 | * For synchronous operations: force any uncommitted transactions | ||
1530 | * to disk. May seem kludgy, but it reuses all the handle batching | ||
1531 | * code in a very simple manner. | ||
1532 | */ | ||
1533 | int jbd2_journal_force_commit(journal_t *journal) | ||
1534 | { | ||
1535 | handle_t *handle; | ||
1536 | int ret; | ||
1537 | |||
1538 | handle = jbd2_journal_start(journal, 1); | ||
1539 | if (IS_ERR(handle)) { | ||
1540 | ret = PTR_ERR(handle); | ||
1541 | } else { | ||
1542 | handle->h_sync = 1; | ||
1543 | ret = jbd2_journal_stop(handle); | ||
1544 | } | ||
1545 | return ret; | ||
1546 | } | ||
1547 | |||
1548 | /* | 1674 | /* |
1549 | * | 1675 | * |
1550 | * List management code snippets: various functions for manipulating the | 1676 | * List management code snippets: various functions for manipulating the |
@@ -1601,10 +1727,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) | |||
1601 | * Remove a buffer from the appropriate transaction list. | 1727 | * Remove a buffer from the appropriate transaction list. |
1602 | * | 1728 | * |
1603 | * Note that this function can *change* the value of | 1729 | * Note that this function can *change* the value of |
1604 | * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, | 1730 | * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or |
1605 | * t_log_list or t_reserved_list. If the caller is holding onto a copy of one | 1731 | * t_reserved_list. If the caller is holding onto a copy of one of these |
1606 | * of these pointers, it could go bad. Generally the caller needs to re-read | 1732 | * pointers, it could go bad. Generally the caller needs to re-read the |
1607 | * the pointer from the transaction_t. | 1733 | * pointer from the transaction_t. |
1608 | * | 1734 | * |
1609 | * Called under j_list_lock. | 1735 | * Called under j_list_lock. |
1610 | */ | 1736 | */ |
@@ -1634,15 +1760,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
1634 | case BJ_Forget: | 1760 | case BJ_Forget: |
1635 | list = &transaction->t_forget; | 1761 | list = &transaction->t_forget; |
1636 | break; | 1762 | break; |
1637 | case BJ_IO: | ||
1638 | list = &transaction->t_iobuf_list; | ||
1639 | break; | ||
1640 | case BJ_Shadow: | 1763 | case BJ_Shadow: |
1641 | list = &transaction->t_shadow_list; | 1764 | list = &transaction->t_shadow_list; |
1642 | break; | 1765 | break; |
1643 | case BJ_LogCtl: | ||
1644 | list = &transaction->t_log_list; | ||
1645 | break; | ||
1646 | case BJ_Reserved: | 1766 | case BJ_Reserved: |
1647 | list = &transaction->t_reserved_list; | 1767 | list = &transaction->t_reserved_list; |
1648 | break; | 1768 | break; |
@@ -2034,18 +2154,23 @@ zap_buffer_unlocked: | |||
2034 | * void jbd2_journal_invalidatepage() | 2154 | * void jbd2_journal_invalidatepage() |
2035 | * @journal: journal to use for flush... | 2155 | * @journal: journal to use for flush... |
2036 | * @page: page to flush | 2156 | * @page: page to flush |
2037 | * @offset: length of page to invalidate. | 2157 | * @offset: start of the range to invalidate |
2158 | * @length: length of the range to invalidate | ||
2038 | * | 2159 | * |
2039 | * Reap page buffers containing data after offset in page. Can return -EBUSY | 2160 | * Reap page buffers containing data after in the specified range in page. |
2040 | * if buffers are part of the committing transaction and the page is straddling | 2161 | * Can return -EBUSY if buffers are part of the committing transaction and |
2041 | * i_size. Caller then has to wait for current commit and try again. | 2162 | * the page is straddling i_size. Caller then has to wait for current commit |
2163 | * and try again. | ||
2042 | */ | 2164 | */ |
2043 | int jbd2_journal_invalidatepage(journal_t *journal, | 2165 | int jbd2_journal_invalidatepage(journal_t *journal, |
2044 | struct page *page, | 2166 | struct page *page, |
2045 | unsigned long offset) | 2167 | unsigned int offset, |
2168 | unsigned int length) | ||
2046 | { | 2169 | { |
2047 | struct buffer_head *head, *bh, *next; | 2170 | struct buffer_head *head, *bh, *next; |
2171 | unsigned int stop = offset + length; | ||
2048 | unsigned int curr_off = 0; | 2172 | unsigned int curr_off = 0; |
2173 | int partial_page = (offset || length < PAGE_CACHE_SIZE); | ||
2049 | int may_free = 1; | 2174 | int may_free = 1; |
2050 | int ret = 0; | 2175 | int ret = 0; |
2051 | 2176 | ||
@@ -2054,6 +2179,8 @@ int jbd2_journal_invalidatepage(journal_t *journal, | |||
2054 | if (!page_has_buffers(page)) | 2179 | if (!page_has_buffers(page)) |
2055 | return 0; | 2180 | return 0; |
2056 | 2181 | ||
2182 | BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); | ||
2183 | |||
2057 | /* We will potentially be playing with lists other than just the | 2184 | /* We will potentially be playing with lists other than just the |
2058 | * data lists (especially for journaled data mode), so be | 2185 | * data lists (especially for journaled data mode), so be |
2059 | * cautious in our locking. */ | 2186 | * cautious in our locking. */ |
@@ -2063,10 +2190,13 @@ int jbd2_journal_invalidatepage(journal_t *journal, | |||
2063 | unsigned int next_off = curr_off + bh->b_size; | 2190 | unsigned int next_off = curr_off + bh->b_size; |
2064 | next = bh->b_this_page; | 2191 | next = bh->b_this_page; |
2065 | 2192 | ||
2193 | if (next_off > stop) | ||
2194 | return 0; | ||
2195 | |||
2066 | if (offset <= curr_off) { | 2196 | if (offset <= curr_off) { |
2067 | /* This block is wholly outside the truncation point */ | 2197 | /* This block is wholly outside the truncation point */ |
2068 | lock_buffer(bh); | 2198 | lock_buffer(bh); |
2069 | ret = journal_unmap_buffer(journal, bh, offset > 0); | 2199 | ret = journal_unmap_buffer(journal, bh, partial_page); |
2070 | unlock_buffer(bh); | 2200 | unlock_buffer(bh); |
2071 | if (ret < 0) | 2201 | if (ret < 0) |
2072 | return ret; | 2202 | return ret; |
@@ -2077,7 +2207,7 @@ int jbd2_journal_invalidatepage(journal_t *journal, | |||
2077 | 2207 | ||
2078 | } while (bh != head); | 2208 | } while (bh != head); |
2079 | 2209 | ||
2080 | if (!offset) { | 2210 | if (!partial_page) { |
2081 | if (may_free && try_to_free_buffers(page)) | 2211 | if (may_free && try_to_free_buffers(page)) |
2082 | J_ASSERT(!page_has_buffers(page)); | 2212 | J_ASSERT(!page_has_buffers(page)); |
2083 | } | 2213 | } |
@@ -2138,15 +2268,9 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
2138 | case BJ_Forget: | 2268 | case BJ_Forget: |
2139 | list = &transaction->t_forget; | 2269 | list = &transaction->t_forget; |
2140 | break; | 2270 | break; |
2141 | case BJ_IO: | ||
2142 | list = &transaction->t_iobuf_list; | ||
2143 | break; | ||
2144 | case BJ_Shadow: | 2271 | case BJ_Shadow: |
2145 | list = &transaction->t_shadow_list; | 2272 | list = &transaction->t_shadow_list; |
2146 | break; | 2273 | break; |
2147 | case BJ_LogCtl: | ||
2148 | list = &transaction->t_log_list; | ||
2149 | break; | ||
2150 | case BJ_Reserved: | 2274 | case BJ_Reserved: |
2151 | list = &transaction->t_reserved_list; | 2275 | list = &transaction->t_reserved_list; |
2152 | break; | 2276 | break; |
@@ -2248,10 +2372,12 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) | |||
2248 | int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) | 2372 | int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) |
2249 | { | 2373 | { |
2250 | transaction_t *transaction = handle->h_transaction; | 2374 | transaction_t *transaction = handle->h_transaction; |
2251 | journal_t *journal = transaction->t_journal; | 2375 | journal_t *journal; |
2252 | 2376 | ||
2377 | WARN_ON(!transaction); | ||
2253 | if (is_handle_aborted(handle)) | 2378 | if (is_handle_aborted(handle)) |
2254 | return -EIO; | 2379 | return -EROFS; |
2380 | journal = transaction->t_journal; | ||
2255 | 2381 | ||
2256 | jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, | 2382 | jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, |
2257 | transaction->t_tid); | 2383 | transaction->t_tid); |