diff options
author | Andrea Bastoni <bastoni@cs.unc.edu> | 2011-08-27 09:43:54 -0400 |
---|---|---|
committer | Andrea Bastoni <bastoni@cs.unc.edu> | 2011-08-27 10:06:11 -0400 |
commit | 7b1bb388bc879ffcc6c69b567816d5c354afe42b (patch) | |
tree | 5a217fdfb0b5e5a327bdcd624506337c1ae1fe32 /fs/jbd/commit.c | |
parent | 7d754596756240fa918b94cd0c3011c77a638987 (diff) | |
parent | 02f8c6aee8df3cdc935e9bdd4f2d020306035dbe (diff) |
Merge 'Linux v3.0' into Litmus
Some notes:
* Litmus^RT scheduling class is the topmost scheduling class
(above stop_sched_class).
* scheduler_ipi() function (e.g., in smp_reschedule_interrupt())
may increase IPI latencies.
* Added path into schedule() to quickly re-evaluate scheduling
decision without becoming preemptive again. This used to be
a standard path before the removal of BKL.
Conflicts:
Makefile
arch/arm/kernel/calls.S
arch/arm/kernel/smp.c
arch/x86/include/asm/unistd_32.h
arch/x86/kernel/smp.c
arch/x86/kernel/syscall_table_32.S
include/linux/hrtimer.h
kernel/printk.c
kernel/sched.c
kernel/sched_fair.c
Diffstat (limited to 'fs/jbd/commit.c')
-rw-r--r-- | fs/jbd/commit.c | 77 |
1 files changed, 26 insertions, 51 deletions
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 95d8c11c929e..72ffa974b0b8 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
21 | #include <linux/pagemap.h> | 21 | #include <linux/pagemap.h> |
22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
23 | #include <linux/blkdev.h> | ||
23 | 24 | ||
24 | /* | 25 | /* |
25 | * Default IO end handler for temporary BJ_IO buffer_heads. | 26 | * Default IO end handler for temporary BJ_IO buffer_heads. |
@@ -137,34 +138,10 @@ static int journal_write_commit_record(journal_t *journal, | |||
137 | JBUFFER_TRACE(descriptor, "write commit block"); | 138 | JBUFFER_TRACE(descriptor, "write commit block"); |
138 | set_buffer_dirty(bh); | 139 | set_buffer_dirty(bh); |
139 | 140 | ||
140 | if (journal->j_flags & JFS_BARRIER) { | 141 | if (journal->j_flags & JFS_BARRIER) |
141 | ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER); | 142 | ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_FLUSH_FUA); |
142 | 143 | else | |
143 | /* | ||
144 | * Is it possible for another commit to fail at roughly | ||
145 | * the same time as this one? If so, we don't want to | ||
146 | * trust the barrier flag in the super, but instead want | ||
147 | * to remember if we sent a barrier request | ||
148 | */ | ||
149 | if (ret == -EOPNOTSUPP) { | ||
150 | char b[BDEVNAME_SIZE]; | ||
151 | |||
152 | printk(KERN_WARNING | ||
153 | "JBD: barrier-based sync failed on %s - " | ||
154 | "disabling barriers\n", | ||
155 | bdevname(journal->j_dev, b)); | ||
156 | spin_lock(&journal->j_state_lock); | ||
157 | journal->j_flags &= ~JFS_BARRIER; | ||
158 | spin_unlock(&journal->j_state_lock); | ||
159 | |||
160 | /* And try again, without the barrier */ | ||
161 | set_buffer_uptodate(bh); | ||
162 | set_buffer_dirty(bh); | ||
163 | ret = sync_dirty_buffer(bh); | ||
164 | } | ||
165 | } else { | ||
166 | ret = sync_dirty_buffer(bh); | 144 | ret = sync_dirty_buffer(bh); |
167 | } | ||
168 | 145 | ||
169 | put_bh(bh); /* One for getblk() */ | 146 | put_bh(bh); /* One for getblk() */ |
170 | journal_put_journal_head(descriptor); | 147 | journal_put_journal_head(descriptor); |
@@ -318,19 +295,13 @@ void journal_commit_transaction(journal_t *journal) | |||
318 | int first_tag = 0; | 295 | int first_tag = 0; |
319 | int tag_flag; | 296 | int tag_flag; |
320 | int i; | 297 | int i; |
321 | int write_op = WRITE; | 298 | struct blk_plug plug; |
322 | 299 | ||
323 | /* | 300 | /* |
324 | * First job: lock down the current transaction and wait for | 301 | * First job: lock down the current transaction and wait for |
325 | * all outstanding updates to complete. | 302 | * all outstanding updates to complete. |
326 | */ | 303 | */ |
327 | 304 | ||
328 | #ifdef COMMIT_STATS | ||
329 | spin_lock(&journal->j_list_lock); | ||
330 | summarise_journal_usage(journal); | ||
331 | spin_unlock(&journal->j_list_lock); | ||
332 | #endif | ||
333 | |||
334 | /* Do we need to erase the effects of a prior journal_flush? */ | 305 | /* Do we need to erase the effects of a prior journal_flush? */ |
335 | if (journal->j_flags & JFS_FLUSHED) { | 306 | if (journal->j_flags & JFS_FLUSHED) { |
336 | jbd_debug(3, "super block updated\n"); | 307 | jbd_debug(3, "super block updated\n"); |
@@ -351,13 +322,6 @@ void journal_commit_transaction(journal_t *journal) | |||
351 | spin_lock(&journal->j_state_lock); | 322 | spin_lock(&journal->j_state_lock); |
352 | commit_transaction->t_state = T_LOCKED; | 323 | commit_transaction->t_state = T_LOCKED; |
353 | 324 | ||
354 | /* | ||
355 | * Use plugged writes here, since we want to submit several before | ||
356 | * we unplug the device. We don't do explicit unplugging in here, | ||
357 | * instead we rely on sync_buffer() doing the unplug for us. | ||
358 | */ | ||
359 | if (commit_transaction->t_synchronous_commit) | ||
360 | write_op = WRITE_SYNC_PLUG; | ||
361 | spin_lock(&commit_transaction->t_handle_lock); | 325 | spin_lock(&commit_transaction->t_handle_lock); |
362 | while (commit_transaction->t_updates) { | 326 | while (commit_transaction->t_updates) { |
363 | DEFINE_WAIT(wait); | 327 | DEFINE_WAIT(wait); |
@@ -392,7 +356,7 @@ void journal_commit_transaction(journal_t *journal) | |||
392 | * we do not require it to remember exactly which old buffers it | 356 | * we do not require it to remember exactly which old buffers it |
393 | * has reserved. This is consistent with the existing behaviour | 357 | * has reserved. This is consistent with the existing behaviour |
394 | * that multiple journal_get_write_access() calls to the same | 358 | * that multiple journal_get_write_access() calls to the same |
395 | * buffer are perfectly permissable. | 359 | * buffer are perfectly permissible. |
396 | */ | 360 | */ |
397 | while (commit_transaction->t_reserved_list) { | 361 | while (commit_transaction->t_reserved_list) { |
398 | jh = commit_transaction->t_reserved_list; | 362 | jh = commit_transaction->t_reserved_list; |
@@ -442,8 +406,10 @@ void journal_commit_transaction(journal_t *journal) | |||
442 | * Now start flushing things to disk, in the order they appear | 406 | * Now start flushing things to disk, in the order they appear |
443 | * on the transaction lists. Data blocks go first. | 407 | * on the transaction lists. Data blocks go first. |
444 | */ | 408 | */ |
409 | blk_start_plug(&plug); | ||
445 | err = journal_submit_data_buffers(journal, commit_transaction, | 410 | err = journal_submit_data_buffers(journal, commit_transaction, |
446 | write_op); | 411 | WRITE_SYNC); |
412 | blk_finish_plug(&plug); | ||
447 | 413 | ||
448 | /* | 414 | /* |
449 | * Wait for all previously submitted IO to complete. | 415 | * Wait for all previously submitted IO to complete. |
@@ -504,7 +470,9 @@ void journal_commit_transaction(journal_t *journal) | |||
504 | err = 0; | 470 | err = 0; |
505 | } | 471 | } |
506 | 472 | ||
507 | journal_write_revoke_records(journal, commit_transaction, write_op); | 473 | blk_start_plug(&plug); |
474 | |||
475 | journal_write_revoke_records(journal, commit_transaction, WRITE_SYNC); | ||
508 | 476 | ||
509 | /* | 477 | /* |
510 | * If we found any dirty or locked buffers, then we should have | 478 | * If we found any dirty or locked buffers, then we should have |
@@ -611,13 +579,13 @@ void journal_commit_transaction(journal_t *journal) | |||
611 | /* Bump b_count to prevent truncate from stumbling over | 579 | /* Bump b_count to prevent truncate from stumbling over |
612 | the shadowed buffer! @@@ This can go if we ever get | 580 | the shadowed buffer! @@@ This can go if we ever get |
613 | rid of the BJ_IO/BJ_Shadow pairing of buffers. */ | 581 | rid of the BJ_IO/BJ_Shadow pairing of buffers. */ |
614 | atomic_inc(&jh2bh(jh)->b_count); | 582 | get_bh(jh2bh(jh)); |
615 | 583 | ||
616 | /* Make a temporary IO buffer with which to write it out | 584 | /* Make a temporary IO buffer with which to write it out |
617 | (this will requeue both the metadata buffer and the | 585 | (this will requeue both the metadata buffer and the |
618 | temporary IO buffer). new_bh goes on BJ_IO*/ | 586 | temporary IO buffer). new_bh goes on BJ_IO*/ |
619 | 587 | ||
620 | set_bit(BH_JWrite, &jh2bh(jh)->b_state); | 588 | set_buffer_jwrite(jh2bh(jh)); |
621 | /* | 589 | /* |
622 | * akpm: journal_write_metadata_buffer() sets | 590 | * akpm: journal_write_metadata_buffer() sets |
623 | * new_bh->b_transaction to commit_transaction. | 591 | * new_bh->b_transaction to commit_transaction. |
@@ -627,7 +595,7 @@ void journal_commit_transaction(journal_t *journal) | |||
627 | JBUFFER_TRACE(jh, "ph3: write metadata"); | 595 | JBUFFER_TRACE(jh, "ph3: write metadata"); |
628 | flags = journal_write_metadata_buffer(commit_transaction, | 596 | flags = journal_write_metadata_buffer(commit_transaction, |
629 | jh, &new_jh, blocknr); | 597 | jh, &new_jh, blocknr); |
630 | set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); | 598 | set_buffer_jwrite(jh2bh(new_jh)); |
631 | wbuf[bufs++] = jh2bh(new_jh); | 599 | wbuf[bufs++] = jh2bh(new_jh); |
632 | 600 | ||
633 | /* Record the new block's tag in the current descriptor | 601 | /* Record the new block's tag in the current descriptor |
@@ -674,7 +642,7 @@ start_journal_io: | |||
674 | clear_buffer_dirty(bh); | 642 | clear_buffer_dirty(bh); |
675 | set_buffer_uptodate(bh); | 643 | set_buffer_uptodate(bh); |
676 | bh->b_end_io = journal_end_buffer_io_sync; | 644 | bh->b_end_io = journal_end_buffer_io_sync; |
677 | submit_bh(write_op, bh); | 645 | submit_bh(WRITE_SYNC, bh); |
678 | } | 646 | } |
679 | cond_resched(); | 647 | cond_resched(); |
680 | 648 | ||
@@ -685,6 +653,8 @@ start_journal_io: | |||
685 | } | 653 | } |
686 | } | 654 | } |
687 | 655 | ||
656 | blk_finish_plug(&plug); | ||
657 | |||
688 | /* Lo and behold: we have just managed to send a transaction to | 658 | /* Lo and behold: we have just managed to send a transaction to |
689 | the log. Before we can commit it, wait for the IO so far to | 659 | the log. Before we can commit it, wait for the IO so far to |
690 | complete. Control buffers being written are on the | 660 | complete. Control buffers being written are on the |
@@ -737,7 +707,7 @@ wait_for_iobuf: | |||
737 | shadowed buffer */ | 707 | shadowed buffer */ |
738 | jh = commit_transaction->t_shadow_list->b_tprev; | 708 | jh = commit_transaction->t_shadow_list->b_tprev; |
739 | bh = jh2bh(jh); | 709 | bh = jh2bh(jh); |
740 | clear_bit(BH_JWrite, &bh->b_state); | 710 | clear_buffer_jwrite(bh); |
741 | J_ASSERT_BH(bh, buffer_jbddirty(bh)); | 711 | J_ASSERT_BH(bh, buffer_jbddirty(bh)); |
742 | 712 | ||
743 | /* The metadata is now released for reuse, but we need | 713 | /* The metadata is now released for reuse, but we need |
@@ -746,8 +716,13 @@ wait_for_iobuf: | |||
746 | required. */ | 716 | required. */ |
747 | JBUFFER_TRACE(jh, "file as BJ_Forget"); | 717 | JBUFFER_TRACE(jh, "file as BJ_Forget"); |
748 | journal_file_buffer(jh, commit_transaction, BJ_Forget); | 718 | journal_file_buffer(jh, commit_transaction, BJ_Forget); |
749 | /* Wake up any transactions which were waiting for this | 719 | /* |
750 | IO to complete */ | 720 | * Wake up any transactions which were waiting for this |
721 | * IO to complete. The barrier must be here so that changes | ||
722 | * by journal_file_buffer() take effect before wake_up_bit() | ||
723 | * does the waitqueue check. | ||
724 | */ | ||
725 | smp_mb(); | ||
751 | wake_up_bit(&bh->b_state, BH_Unshadow); | 726 | wake_up_bit(&bh->b_state, BH_Unshadow); |
752 | JBUFFER_TRACE(jh, "brelse shadowed buffer"); | 727 | JBUFFER_TRACE(jh, "brelse shadowed buffer"); |
753 | __brelse(bh); | 728 | __brelse(bh); |