diff options
Diffstat (limited to 'fs/jbd2/commit.c')
-rw-r--r-- | fs/jbd2/commit.c | 131 |
1 files changed, 47 insertions, 84 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 75716d3d2be0..f3ad1598b201 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -26,7 +26,9 @@ | |||
26 | #include <linux/backing-dev.h> | 26 | #include <linux/backing-dev.h> |
27 | #include <linux/bio.h> | 27 | #include <linux/bio.h> |
28 | #include <linux/blkdev.h> | 28 | #include <linux/blkdev.h> |
29 | #include <linux/bitops.h> | ||
29 | #include <trace/events/jbd2.h> | 30 | #include <trace/events/jbd2.h> |
31 | #include <asm/system.h> | ||
30 | 32 | ||
31 | /* | 33 | /* |
32 | * Default IO end handler for temporary BJ_IO buffer_heads. | 34 | * Default IO end handler for temporary BJ_IO buffer_heads. |
@@ -101,7 +103,6 @@ static int journal_submit_commit_record(journal_t *journal, | |||
101 | struct commit_header *tmp; | 103 | struct commit_header *tmp; |
102 | struct buffer_head *bh; | 104 | struct buffer_head *bh; |
103 | int ret; | 105 | int ret; |
104 | int barrier_done = 0; | ||
105 | struct timespec now = current_kernel_time(); | 106 | struct timespec now = current_kernel_time(); |
106 | 107 | ||
107 | if (is_journal_aborted(journal)) | 108 | if (is_journal_aborted(journal)) |
@@ -135,33 +136,11 @@ static int journal_submit_commit_record(journal_t *journal, | |||
135 | 136 | ||
136 | if (journal->j_flags & JBD2_BARRIER && | 137 | if (journal->j_flags & JBD2_BARRIER && |
137 | !JBD2_HAS_INCOMPAT_FEATURE(journal, | 138 | !JBD2_HAS_INCOMPAT_FEATURE(journal, |
138 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 139 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) |
139 | set_buffer_ordered(bh); | 140 | ret = submit_bh(WRITE_SYNC_PLUG | WRITE_FLUSH_FUA, bh); |
140 | barrier_done = 1; | 141 | else |
141 | } | ||
142 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | ||
143 | if (barrier_done) | ||
144 | clear_buffer_ordered(bh); | ||
145 | |||
146 | /* is it possible for another commit to fail at roughly | ||
147 | * the same time as this one? If so, we don't want to | ||
148 | * trust the barrier flag in the super, but instead want | ||
149 | * to remember if we sent a barrier request | ||
150 | */ | ||
151 | if (ret == -EOPNOTSUPP && barrier_done) { | ||
152 | printk(KERN_WARNING | ||
153 | "JBD: barrier-based sync failed on %s - " | ||
154 | "disabling barriers\n", journal->j_devname); | ||
155 | spin_lock(&journal->j_state_lock); | ||
156 | journal->j_flags &= ~JBD2_BARRIER; | ||
157 | spin_unlock(&journal->j_state_lock); | ||
158 | |||
159 | /* And try again, without the barrier */ | ||
160 | lock_buffer(bh); | ||
161 | set_buffer_uptodate(bh); | ||
162 | clear_buffer_dirty(bh); | ||
163 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | 142 | ret = submit_bh(WRITE_SYNC_PLUG, bh); |
164 | } | 143 | |
165 | *cbh = bh; | 144 | *cbh = bh; |
166 | return ret; | 145 | return ret; |
167 | } | 146 | } |
@@ -175,29 +154,8 @@ static int journal_wait_on_commit_record(journal_t *journal, | |||
175 | { | 154 | { |
176 | int ret = 0; | 155 | int ret = 0; |
177 | 156 | ||
178 | retry: | ||
179 | clear_buffer_dirty(bh); | 157 | clear_buffer_dirty(bh); |
180 | wait_on_buffer(bh); | 158 | wait_on_buffer(bh); |
181 | if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { | ||
182 | printk(KERN_WARNING | ||
183 | "JBD2: wait_on_commit_record: sync failed on %s - " | ||
184 | "disabling barriers\n", journal->j_devname); | ||
185 | spin_lock(&journal->j_state_lock); | ||
186 | journal->j_flags &= ~JBD2_BARRIER; | ||
187 | spin_unlock(&journal->j_state_lock); | ||
188 | |||
189 | lock_buffer(bh); | ||
190 | clear_buffer_dirty(bh); | ||
191 | set_buffer_uptodate(bh); | ||
192 | bh->b_end_io = journal_end_buffer_io_sync; | ||
193 | |||
194 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | ||
195 | if (ret) { | ||
196 | unlock_buffer(bh); | ||
197 | return ret; | ||
198 | } | ||
199 | goto retry; | ||
200 | } | ||
201 | 159 | ||
202 | if (unlikely(!buffer_uptodate(bh))) | 160 | if (unlikely(!buffer_uptodate(bh))) |
203 | ret = -EIO; | 161 | ret = -EIO; |
@@ -245,7 +203,7 @@ static int journal_submit_data_buffers(journal_t *journal, | |||
245 | spin_lock(&journal->j_list_lock); | 203 | spin_lock(&journal->j_list_lock); |
246 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { | 204 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { |
247 | mapping = jinode->i_vfs_inode->i_mapping; | 205 | mapping = jinode->i_vfs_inode->i_mapping; |
248 | jinode->i_flags |= JI_COMMIT_RUNNING; | 206 | set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); |
249 | spin_unlock(&journal->j_list_lock); | 207 | spin_unlock(&journal->j_list_lock); |
250 | /* | 208 | /* |
251 | * submit the inode data buffers. We use writepage | 209 | * submit the inode data buffers. We use writepage |
@@ -260,7 +218,8 @@ static int journal_submit_data_buffers(journal_t *journal, | |||
260 | spin_lock(&journal->j_list_lock); | 218 | spin_lock(&journal->j_list_lock); |
261 | J_ASSERT(jinode->i_transaction == commit_transaction); | 219 | J_ASSERT(jinode->i_transaction == commit_transaction); |
262 | commit_transaction->t_flushed_data_blocks = 1; | 220 | commit_transaction->t_flushed_data_blocks = 1; |
263 | jinode->i_flags &= ~JI_COMMIT_RUNNING; | 221 | clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); |
222 | smp_mb__after_clear_bit(); | ||
264 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); | 223 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); |
265 | } | 224 | } |
266 | spin_unlock(&journal->j_list_lock); | 225 | spin_unlock(&journal->j_list_lock); |
@@ -281,7 +240,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal, | |||
281 | /* For locking, see the comment in journal_submit_data_buffers() */ | 240 | /* For locking, see the comment in journal_submit_data_buffers() */ |
282 | spin_lock(&journal->j_list_lock); | 241 | spin_lock(&journal->j_list_lock); |
283 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { | 242 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { |
284 | jinode->i_flags |= JI_COMMIT_RUNNING; | 243 | set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); |
285 | spin_unlock(&journal->j_list_lock); | 244 | spin_unlock(&journal->j_list_lock); |
286 | err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); | 245 | err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); |
287 | if (err) { | 246 | if (err) { |
@@ -297,7 +256,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal, | |||
297 | ret = err; | 256 | ret = err; |
298 | } | 257 | } |
299 | spin_lock(&journal->j_list_lock); | 258 | spin_lock(&journal->j_list_lock); |
300 | jinode->i_flags &= ~JI_COMMIT_RUNNING; | 259 | clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); |
260 | smp_mb__after_clear_bit(); | ||
301 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); | 261 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); |
302 | } | 262 | } |
303 | 263 | ||
@@ -369,7 +329,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
369 | int tag_bytes = journal_tag_bytes(journal); | 329 | int tag_bytes = journal_tag_bytes(journal); |
370 | struct buffer_head *cbh = NULL; /* For transactional checksums */ | 330 | struct buffer_head *cbh = NULL; /* For transactional checksums */ |
371 | __u32 crc32_sum = ~0; | 331 | __u32 crc32_sum = ~0; |
372 | int write_op = WRITE; | 332 | int write_op = WRITE_SYNC; |
373 | 333 | ||
374 | /* | 334 | /* |
375 | * First job: lock down the current transaction and wait for | 335 | * First job: lock down the current transaction and wait for |
@@ -400,7 +360,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
400 | jbd_debug(1, "JBD: starting commit of transaction %d\n", | 360 | jbd_debug(1, "JBD: starting commit of transaction %d\n", |
401 | commit_transaction->t_tid); | 361 | commit_transaction->t_tid); |
402 | 362 | ||
403 | spin_lock(&journal->j_state_lock); | 363 | write_lock(&journal->j_state_lock); |
404 | commit_transaction->t_state = T_LOCKED; | 364 | commit_transaction->t_state = T_LOCKED; |
405 | 365 | ||
406 | /* | 366 | /* |
@@ -417,23 +377,23 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
417 | stats.run.rs_locked); | 377 | stats.run.rs_locked); |
418 | 378 | ||
419 | spin_lock(&commit_transaction->t_handle_lock); | 379 | spin_lock(&commit_transaction->t_handle_lock); |
420 | while (commit_transaction->t_updates) { | 380 | while (atomic_read(&commit_transaction->t_updates)) { |
421 | DEFINE_WAIT(wait); | 381 | DEFINE_WAIT(wait); |
422 | 382 | ||
423 | prepare_to_wait(&journal->j_wait_updates, &wait, | 383 | prepare_to_wait(&journal->j_wait_updates, &wait, |
424 | TASK_UNINTERRUPTIBLE); | 384 | TASK_UNINTERRUPTIBLE); |
425 | if (commit_transaction->t_updates) { | 385 | if (atomic_read(&commit_transaction->t_updates)) { |
426 | spin_unlock(&commit_transaction->t_handle_lock); | 386 | spin_unlock(&commit_transaction->t_handle_lock); |
427 | spin_unlock(&journal->j_state_lock); | 387 | write_unlock(&journal->j_state_lock); |
428 | schedule(); | 388 | schedule(); |
429 | spin_lock(&journal->j_state_lock); | 389 | write_lock(&journal->j_state_lock); |
430 | spin_lock(&commit_transaction->t_handle_lock); | 390 | spin_lock(&commit_transaction->t_handle_lock); |
431 | } | 391 | } |
432 | finish_wait(&journal->j_wait_updates, &wait); | 392 | finish_wait(&journal->j_wait_updates, &wait); |
433 | } | 393 | } |
434 | spin_unlock(&commit_transaction->t_handle_lock); | 394 | spin_unlock(&commit_transaction->t_handle_lock); |
435 | 395 | ||
436 | J_ASSERT (commit_transaction->t_outstanding_credits <= | 396 | J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <= |
437 | journal->j_max_transaction_buffers); | 397 | journal->j_max_transaction_buffers); |
438 | 398 | ||
439 | /* | 399 | /* |
@@ -497,7 +457,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
497 | start_time = ktime_get(); | 457 | start_time = ktime_get(); |
498 | commit_transaction->t_log_start = journal->j_head; | 458 | commit_transaction->t_log_start = journal->j_head; |
499 | wake_up(&journal->j_wait_transaction_locked); | 459 | wake_up(&journal->j_wait_transaction_locked); |
500 | spin_unlock(&journal->j_state_lock); | 460 | write_unlock(&journal->j_state_lock); |
501 | 461 | ||
502 | jbd_debug (3, "JBD: commit phase 2\n"); | 462 | jbd_debug (3, "JBD: commit phase 2\n"); |
503 | 463 | ||
@@ -519,19 +479,20 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
519 | * transaction! Now comes the tricky part: we need to write out | 479 | * transaction! Now comes the tricky part: we need to write out |
520 | * metadata. Loop over the transaction's entire buffer list: | 480 | * metadata. Loop over the transaction's entire buffer list: |
521 | */ | 481 | */ |
522 | spin_lock(&journal->j_state_lock); | 482 | write_lock(&journal->j_state_lock); |
523 | commit_transaction->t_state = T_COMMIT; | 483 | commit_transaction->t_state = T_COMMIT; |
524 | spin_unlock(&journal->j_state_lock); | 484 | write_unlock(&journal->j_state_lock); |
525 | 485 | ||
526 | trace_jbd2_commit_logging(journal, commit_transaction); | 486 | trace_jbd2_commit_logging(journal, commit_transaction); |
527 | stats.run.rs_logging = jiffies; | 487 | stats.run.rs_logging = jiffies; |
528 | stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing, | 488 | stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing, |
529 | stats.run.rs_logging); | 489 | stats.run.rs_logging); |
530 | stats.run.rs_blocks = commit_transaction->t_outstanding_credits; | 490 | stats.run.rs_blocks = |
491 | atomic_read(&commit_transaction->t_outstanding_credits); | ||
531 | stats.run.rs_blocks_logged = 0; | 492 | stats.run.rs_blocks_logged = 0; |
532 | 493 | ||
533 | J_ASSERT(commit_transaction->t_nr_buffers <= | 494 | J_ASSERT(commit_transaction->t_nr_buffers <= |
534 | commit_transaction->t_outstanding_credits); | 495 | atomic_read(&commit_transaction->t_outstanding_credits)); |
535 | 496 | ||
536 | err = 0; | 497 | err = 0; |
537 | descriptor = NULL; | 498 | descriptor = NULL; |
@@ -616,7 +577,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
616 | * the free space in the log, but this counter is changed | 577 | * the free space in the log, but this counter is changed |
617 | * by jbd2_journal_next_log_block() also. | 578 | * by jbd2_journal_next_log_block() also. |
618 | */ | 579 | */ |
619 | commit_transaction->t_outstanding_credits--; | 580 | atomic_dec(&commit_transaction->t_outstanding_credits); |
620 | 581 | ||
621 | /* Bump b_count to prevent truncate from stumbling over | 582 | /* Bump b_count to prevent truncate from stumbling over |
622 | the shadowed buffer! @@@ This can go if we ever get | 583 | the shadowed buffer! @@@ This can go if we ever get |
@@ -709,6 +670,16 @@ start_journal_io: | |||
709 | } | 670 | } |
710 | } | 671 | } |
711 | 672 | ||
673 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | ||
674 | if (err) { | ||
675 | printk(KERN_WARNING | ||
676 | "JBD2: Detected IO errors while flushing file data " | ||
677 | "on %s\n", journal->j_devname); | ||
678 | if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) | ||
679 | jbd2_journal_abort(journal, err); | ||
680 | err = 0; | ||
681 | } | ||
682 | |||
712 | /* | 683 | /* |
713 | * If the journal is not located on the file system device, | 684 | * If the journal is not located on the file system device, |
714 | * then we must flush the file system device before we issue | 685 | * then we must flush the file system device before we issue |
@@ -717,8 +688,7 @@ start_journal_io: | |||
717 | if (commit_transaction->t_flushed_data_blocks && | 688 | if (commit_transaction->t_flushed_data_blocks && |
718 | (journal->j_fs_dev != journal->j_dev) && | 689 | (journal->j_fs_dev != journal->j_dev) && |
719 | (journal->j_flags & JBD2_BARRIER)) | 690 | (journal->j_flags & JBD2_BARRIER)) |
720 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, | 691 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); |
721 | BLKDEV_IFL_WAIT); | ||
722 | 692 | ||
723 | /* Done it all: now write the commit record asynchronously. */ | 693 | /* Done it all: now write the commit record asynchronously. */ |
724 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | 694 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, |
@@ -727,19 +697,6 @@ start_journal_io: | |||
727 | &cbh, crc32_sum); | 697 | &cbh, crc32_sum); |
728 | if (err) | 698 | if (err) |
729 | __jbd2_journal_abort_hard(journal); | 699 | __jbd2_journal_abort_hard(journal); |
730 | if (journal->j_flags & JBD2_BARRIER) | ||
731 | blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL, | ||
732 | BLKDEV_IFL_WAIT); | ||
733 | } | ||
734 | |||
735 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | ||
736 | if (err) { | ||
737 | printk(KERN_WARNING | ||
738 | "JBD2: Detected IO errors while flushing file data " | ||
739 | "on %s\n", journal->j_devname); | ||
740 | if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) | ||
741 | jbd2_journal_abort(journal, err); | ||
742 | err = 0; | ||
743 | } | 700 | } |
744 | 701 | ||
745 | /* Lo and behold: we have just managed to send a transaction to | 702 | /* Lo and behold: we have just managed to send a transaction to |
@@ -853,6 +810,11 @@ wait_for_iobuf: | |||
853 | } | 810 | } |
854 | if (!err && !is_journal_aborted(journal)) | 811 | if (!err && !is_journal_aborted(journal)) |
855 | err = journal_wait_on_commit_record(journal, cbh); | 812 | err = journal_wait_on_commit_record(journal, cbh); |
813 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | ||
814 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && | ||
815 | journal->j_flags & JBD2_BARRIER) { | ||
816 | blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL); | ||
817 | } | ||
856 | 818 | ||
857 | if (err) | 819 | if (err) |
858 | jbd2_journal_abort(journal, err); | 820 | jbd2_journal_abort(journal, err); |
@@ -977,7 +939,7 @@ restart_loop: | |||
977 | * __jbd2_journal_drop_transaction(). Otherwise we could race with | 939 | * __jbd2_journal_drop_transaction(). Otherwise we could race with |
978 | * other checkpointing code processing the transaction... | 940 | * other checkpointing code processing the transaction... |
979 | */ | 941 | */ |
980 | spin_lock(&journal->j_state_lock); | 942 | write_lock(&journal->j_state_lock); |
981 | spin_lock(&journal->j_list_lock); | 943 | spin_lock(&journal->j_list_lock); |
982 | /* | 944 | /* |
983 | * Now recheck if some buffers did not get attached to the transaction | 945 | * Now recheck if some buffers did not get attached to the transaction |
@@ -985,7 +947,7 @@ restart_loop: | |||
985 | */ | 947 | */ |
986 | if (commit_transaction->t_forget) { | 948 | if (commit_transaction->t_forget) { |
987 | spin_unlock(&journal->j_list_lock); | 949 | spin_unlock(&journal->j_list_lock); |
988 | spin_unlock(&journal->j_state_lock); | 950 | write_unlock(&journal->j_state_lock); |
989 | goto restart_loop; | 951 | goto restart_loop; |
990 | } | 952 | } |
991 | 953 | ||
@@ -1003,7 +965,8 @@ restart_loop: | |||
1003 | * File the transaction statistics | 965 | * File the transaction statistics |
1004 | */ | 966 | */ |
1005 | stats.ts_tid = commit_transaction->t_tid; | 967 | stats.ts_tid = commit_transaction->t_tid; |
1006 | stats.run.rs_handle_count = commit_transaction->t_handle_count; | 968 | stats.run.rs_handle_count = |
969 | atomic_read(&commit_transaction->t_handle_count); | ||
1007 | trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, | 970 | trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, |
1008 | commit_transaction->t_tid, &stats.run); | 971 | commit_transaction->t_tid, &stats.run); |
1009 | 972 | ||
@@ -1037,7 +1000,7 @@ restart_loop: | |||
1037 | journal->j_average_commit_time*3) / 4; | 1000 | journal->j_average_commit_time*3) / 4; |
1038 | else | 1001 | else |
1039 | journal->j_average_commit_time = commit_time; | 1002 | journal->j_average_commit_time = commit_time; |
1040 | spin_unlock(&journal->j_state_lock); | 1003 | write_unlock(&journal->j_state_lock); |
1041 | 1004 | ||
1042 | if (commit_transaction->t_checkpoint_list == NULL && | 1005 | if (commit_transaction->t_checkpoint_list == NULL && |
1043 | commit_transaction->t_checkpoint_io_list == NULL) { | 1006 | commit_transaction->t_checkpoint_io_list == NULL) { |