diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-07-08 11:06:40 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-07-08 11:06:40 -0400 |
| commit | c77e9e68269bf50573ce130b5d95ff6e539dbcf8 (patch) | |
| tree | 99edc1b070717e18f5958f934bc3921cfed2ce1f | |
| parent | 31016e9941be85e8c487b1f3c45d253eda356715 (diff) | |
| parent | 83ba7b071f30f7c01f72518ad72d5cd203c27502 (diff) | |
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
writeback: simplify the write back thread queue
writeback: split writeback_inodes_wb
writeback: remove writeback_inodes_wbc
fs-writeback: fix kernel-doc warnings
splice: check f_mode for seekable file
splice: direct_splice_actor() should not use pos in sd
| -rw-r--r-- | fs/afs/write.c | 1 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 2 | ||||
| -rw-r--r-- | fs/fs-writeback.c | 335 | ||||
| -rw-r--r-- | fs/splice.c | 9 | ||||
| -rw-r--r-- | include/linux/backing-dev.h | 2 | ||||
| -rw-r--r-- | include/linux/writeback.h | 7 | ||||
| -rw-r--r-- | mm/backing-dev.c | 17 | ||||
| -rw-r--r-- | mm/page-writeback.c | 3 |
8 files changed, 122 insertions, 254 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c index 3dab9e9948d0..722743b152d8 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
| @@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode) | |||
| 680 | { | 680 | { |
| 681 | struct address_space *mapping = vnode->vfs_inode.i_mapping; | 681 | struct address_space *mapping = vnode->vfs_inode.i_mapping; |
| 682 | struct writeback_control wbc = { | 682 | struct writeback_control wbc = { |
| 683 | .bdi = mapping->backing_dev_info, | ||
| 684 | .sync_mode = WB_SYNC_ALL, | 683 | .sync_mode = WB_SYNC_ALL, |
| 685 | .nr_to_write = LONG_MAX, | 684 | .nr_to_write = LONG_MAX, |
| 686 | .range_cyclic = 1, | 685 | .range_cyclic = 1, |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a4080c21ec55..d74e6af9b53a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
| 2594 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 2594 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
| 2595 | }; | 2595 | }; |
| 2596 | struct writeback_control wbc_writepages = { | 2596 | struct writeback_control wbc_writepages = { |
| 2597 | .bdi = wbc->bdi, | ||
| 2598 | .sync_mode = wbc->sync_mode, | 2597 | .sync_mode = wbc->sync_mode, |
| 2599 | .older_than_this = NULL, | 2598 | .older_than_this = NULL, |
| 2600 | .nr_to_write = 64, | 2599 | .nr_to_write = 64, |
| @@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
| 2628 | .sync_io = mode == WB_SYNC_ALL, | 2627 | .sync_io = mode == WB_SYNC_ALL, |
| 2629 | }; | 2628 | }; |
| 2630 | struct writeback_control wbc_writepages = { | 2629 | struct writeback_control wbc_writepages = { |
| 2631 | .bdi = inode->i_mapping->backing_dev_info, | ||
| 2632 | .sync_mode = mode, | 2630 | .sync_mode = mode, |
| 2633 | .older_than_this = NULL, | 2631 | .older_than_this = NULL, |
| 2634 | .nr_to_write = nr_pages * 2, | 2632 | .nr_to_write = nr_pages * 2, |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 0609607d3955..d5be1693ac93 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -38,43 +38,18 @@ int nr_pdflush_threads; | |||
| 38 | /* | 38 | /* |
| 39 | * Passed into wb_writeback(), essentially a subset of writeback_control | 39 | * Passed into wb_writeback(), essentially a subset of writeback_control |
| 40 | */ | 40 | */ |
| 41 | struct wb_writeback_args { | 41 | struct wb_writeback_work { |
| 42 | long nr_pages; | 42 | long nr_pages; |
| 43 | struct super_block *sb; | 43 | struct super_block *sb; |
| 44 | enum writeback_sync_modes sync_mode; | 44 | enum writeback_sync_modes sync_mode; |
| 45 | unsigned int for_kupdate:1; | 45 | unsigned int for_kupdate:1; |
| 46 | unsigned int range_cyclic:1; | 46 | unsigned int range_cyclic:1; |
| 47 | unsigned int for_background:1; | 47 | unsigned int for_background:1; |
| 48 | }; | ||
| 49 | 48 | ||
| 50 | /* | ||
| 51 | * Work items for the bdi_writeback threads | ||
| 52 | */ | ||
| 53 | struct bdi_work { | ||
| 54 | struct list_head list; /* pending work list */ | 49 | struct list_head list; /* pending work list */ |
| 55 | struct rcu_head rcu_head; /* for RCU free/clear of work */ | 50 | struct completion *done; /* set if the caller waits */ |
| 56 | |||
| 57 | unsigned long seen; /* threads that have seen this work */ | ||
| 58 | atomic_t pending; /* number of threads still to do work */ | ||
| 59 | |||
| 60 | struct wb_writeback_args args; /* writeback arguments */ | ||
| 61 | |||
| 62 | unsigned long state; /* flag bits, see WS_* */ | ||
| 63 | }; | 51 | }; |
| 64 | 52 | ||
| 65 | enum { | ||
| 66 | WS_INPROGRESS = 0, | ||
| 67 | WS_ONSTACK, | ||
| 68 | }; | ||
| 69 | |||
| 70 | static inline void bdi_work_init(struct bdi_work *work, | ||
| 71 | struct wb_writeback_args *args) | ||
| 72 | { | ||
| 73 | INIT_RCU_HEAD(&work->rcu_head); | ||
| 74 | work->args = *args; | ||
| 75 | __set_bit(WS_INPROGRESS, &work->state); | ||
| 76 | } | ||
| 77 | |||
| 78 | /** | 53 | /** |
| 79 | * writeback_in_progress - determine whether there is writeback in progress | 54 | * writeback_in_progress - determine whether there is writeback in progress |
| 80 | * @bdi: the device's backing_dev_info structure. | 55 | * @bdi: the device's backing_dev_info structure. |
| @@ -87,49 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi) | |||
| 87 | return !list_empty(&bdi->work_list); | 62 | return !list_empty(&bdi->work_list); |
| 88 | } | 63 | } |
| 89 | 64 | ||
| 90 | static void bdi_work_free(struct rcu_head *head) | 65 | static void bdi_queue_work(struct backing_dev_info *bdi, |
| 91 | { | 66 | struct wb_writeback_work *work) |
| 92 | struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); | ||
| 93 | |||
| 94 | clear_bit(WS_INPROGRESS, &work->state); | ||
| 95 | smp_mb__after_clear_bit(); | ||
| 96 | wake_up_bit(&work->state, WS_INPROGRESS); | ||
| 97 | |||
| 98 | if (!test_bit(WS_ONSTACK, &work->state)) | ||
| 99 | kfree(work); | ||
| 100 | } | ||
| 101 | |||
| 102 | static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) | ||
| 103 | { | 67 | { |
| 104 | /* | ||
| 105 | * The caller has retrieved the work arguments from this work, | ||
| 106 | * drop our reference. If this is the last ref, delete and free it | ||
| 107 | */ | ||
| 108 | if (atomic_dec_and_test(&work->pending)) { | ||
| 109 | struct backing_dev_info *bdi = wb->bdi; | ||
| 110 | |||
| 111 | spin_lock(&bdi->wb_lock); | ||
| 112 | list_del_rcu(&work->list); | ||
| 113 | spin_unlock(&bdi->wb_lock); | ||
| 114 | |||
| 115 | call_rcu(&work->rcu_head, bdi_work_free); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) | ||
| 120 | { | ||
| 121 | work->seen = bdi->wb_mask; | ||
| 122 | BUG_ON(!work->seen); | ||
| 123 | atomic_set(&work->pending, bdi->wb_cnt); | ||
| 124 | BUG_ON(!bdi->wb_cnt); | ||
| 125 | |||
| 126 | /* | ||
| 127 | * list_add_tail_rcu() contains the necessary barriers to | ||
| 128 | * make sure the above stores are seen before the item is | ||
| 129 | * noticed on the list | ||
| 130 | */ | ||
| 131 | spin_lock(&bdi->wb_lock); | 68 | spin_lock(&bdi->wb_lock); |
| 132 | list_add_tail_rcu(&work->list, &bdi->work_list); | 69 | list_add_tail(&work->list, &bdi->work_list); |
| 133 | spin_unlock(&bdi->wb_lock); | 70 | spin_unlock(&bdi->wb_lock); |
| 134 | 71 | ||
| 135 | /* | 72 | /* |
| @@ -146,55 +83,29 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) | |||
| 146 | } | 83 | } |
| 147 | } | 84 | } |
| 148 | 85 | ||
| 149 | /* | 86 | static void |
| 150 | * Used for on-stack allocated work items. The caller needs to wait until | 87 | __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, |
| 151 | * the wb threads have acked the work before it's safe to continue. | 88 | bool range_cyclic, bool for_background) |
| 152 | */ | ||
| 153 | static void bdi_wait_on_work_done(struct bdi_work *work) | ||
| 154 | { | 89 | { |
| 155 | wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait, | 90 | struct wb_writeback_work *work; |
| 156 | TASK_UNINTERRUPTIBLE); | ||
| 157 | } | ||
| 158 | |||
| 159 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | ||
| 160 | struct wb_writeback_args *args) | ||
| 161 | { | ||
| 162 | struct bdi_work *work; | ||
| 163 | 91 | ||
| 164 | /* | 92 | /* |
| 165 | * This is WB_SYNC_NONE writeback, so if allocation fails just | 93 | * This is WB_SYNC_NONE writeback, so if allocation fails just |
| 166 | * wakeup the thread for old dirty data writeback | 94 | * wakeup the thread for old dirty data writeback |
| 167 | */ | 95 | */ |
| 168 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | 96 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
| 169 | if (work) { | 97 | if (!work) { |
| 170 | bdi_work_init(work, args); | 98 | if (bdi->wb.task) |
| 171 | bdi_queue_work(bdi, work); | 99 | wake_up_process(bdi->wb.task); |
| 172 | } else { | 100 | return; |
| 173 | struct bdi_writeback *wb = &bdi->wb; | ||
| 174 | |||
| 175 | if (wb->task) | ||
| 176 | wake_up_process(wb->task); | ||
| 177 | } | 101 | } |
| 178 | } | ||
| 179 | 102 | ||
| 180 | /** | 103 | work->sync_mode = WB_SYNC_NONE; |
| 181 | * bdi_queue_work_onstack - start and wait for writeback | 104 | work->nr_pages = nr_pages; |
| 182 | * @sb: write inodes from this super_block | 105 | work->range_cyclic = range_cyclic; |
| 183 | * | 106 | work->for_background = for_background; |
| 184 | * Description: | ||
| 185 | * This function initiates writeback and waits for the operation to | ||
| 186 | * complete. Callers must hold the sb s_umount semaphore for | ||
| 187 | * reading, to avoid having the super disappear before we are done. | ||
| 188 | */ | ||
| 189 | static void bdi_queue_work_onstack(struct wb_writeback_args *args) | ||
| 190 | { | ||
| 191 | struct bdi_work work; | ||
| 192 | |||
| 193 | bdi_work_init(&work, args); | ||
| 194 | __set_bit(WS_ONSTACK, &work.state); | ||
| 195 | 107 | ||
| 196 | bdi_queue_work(args->sb->s_bdi, &work); | 108 | bdi_queue_work(bdi, work); |
| 197 | bdi_wait_on_work_done(&work); | ||
| 198 | } | 109 | } |
| 199 | 110 | ||
| 200 | /** | 111 | /** |
| @@ -210,13 +121,7 @@ static void bdi_queue_work_onstack(struct wb_writeback_args *args) | |||
| 210 | */ | 121 | */ |
| 211 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | 122 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) |
| 212 | { | 123 | { |
| 213 | struct wb_writeback_args args = { | 124 | __bdi_start_writeback(bdi, nr_pages, true, false); |
| 214 | .sync_mode = WB_SYNC_NONE, | ||
| 215 | .nr_pages = nr_pages, | ||
| 216 | .range_cyclic = 1, | ||
| 217 | }; | ||
| 218 | |||
| 219 | bdi_alloc_queue_work(bdi, &args); | ||
| 220 | } | 125 | } |
| 221 | 126 | ||
| 222 | /** | 127 | /** |
| @@ -230,13 +135,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | |||
| 230 | */ | 135 | */ |
| 231 | void bdi_start_background_writeback(struct backing_dev_info *bdi) | 136 | void bdi_start_background_writeback(struct backing_dev_info *bdi) |
| 232 | { | 137 | { |
| 233 | struct wb_writeback_args args = { | 138 | __bdi_start_writeback(bdi, LONG_MAX, true, true); |
| 234 | .sync_mode = WB_SYNC_NONE, | ||
| 235 | .nr_pages = LONG_MAX, | ||
| 236 | .for_background = 1, | ||
| 237 | .range_cyclic = 1, | ||
| 238 | }; | ||
| 239 | bdi_alloc_queue_work(bdi, &args); | ||
| 240 | } | 139 | } |
| 241 | 140 | ||
| 242 | /* | 141 | /* |
| @@ -554,29 +453,41 @@ static bool pin_sb_for_writeback(struct super_block *sb) | |||
| 554 | 453 | ||
| 555 | /* | 454 | /* |
| 556 | * Write a portion of b_io inodes which belong to @sb. | 455 | * Write a portion of b_io inodes which belong to @sb. |
| 557 | * If @wbc->sb != NULL, then find and write all such | 456 | * |
| 457 | * If @only_this_sb is true, then find and write all such | ||
| 558 | * inodes. Otherwise write only ones which go sequentially | 458 | * inodes. Otherwise write only ones which go sequentially |
| 559 | * in reverse order. | 459 | * in reverse order. |
| 460 | * | ||
| 560 | * Return 1, if the caller writeback routine should be | 461 | * Return 1, if the caller writeback routine should be |
| 561 | * interrupted. Otherwise return 0. | 462 | * interrupted. Otherwise return 0. |
| 562 | */ | 463 | */ |
| 563 | static int writeback_sb_inodes(struct super_block *sb, | 464 | static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, |
| 564 | struct bdi_writeback *wb, | 465 | struct writeback_control *wbc, bool only_this_sb) |
| 565 | struct writeback_control *wbc) | ||
| 566 | { | 466 | { |
| 567 | while (!list_empty(&wb->b_io)) { | 467 | while (!list_empty(&wb->b_io)) { |
| 568 | long pages_skipped; | 468 | long pages_skipped; |
| 569 | struct inode *inode = list_entry(wb->b_io.prev, | 469 | struct inode *inode = list_entry(wb->b_io.prev, |
| 570 | struct inode, i_list); | 470 | struct inode, i_list); |
| 571 | if (wbc->sb && sb != inode->i_sb) { | 471 | |
| 572 | /* super block given and doesn't | 472 | if (inode->i_sb != sb) { |
| 573 | match, skip this inode */ | 473 | if (only_this_sb) { |
| 574 | redirty_tail(inode); | 474 | /* |
| 575 | continue; | 475 | * We only want to write back data for this |
| 576 | } | 476 | * superblock, move all inodes not belonging |
| 577 | if (sb != inode->i_sb) | 477 | * to it back onto the dirty list. |
| 578 | /* finish with this superblock */ | 478 | */ |
| 479 | redirty_tail(inode); | ||
| 480 | continue; | ||
| 481 | } | ||
| 482 | |||
| 483 | /* | ||
| 484 | * The inode belongs to a different superblock. | ||
| 485 | * Bounce back to the caller to unpin this and | ||
| 486 | * pin the next superblock. | ||
| 487 | */ | ||
| 579 | return 0; | 488 | return 0; |
| 489 | } | ||
| 490 | |||
| 580 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { | 491 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { |
| 581 | requeue_io(inode); | 492 | requeue_io(inode); |
| 582 | continue; | 493 | continue; |
| @@ -614,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb, | |||
| 614 | return 1; | 525 | return 1; |
| 615 | } | 526 | } |
| 616 | 527 | ||
| 617 | static void writeback_inodes_wb(struct bdi_writeback *wb, | 528 | void writeback_inodes_wb(struct bdi_writeback *wb, |
| 618 | struct writeback_control *wbc) | 529 | struct writeback_control *wbc) |
| 619 | { | 530 | { |
| 620 | int ret = 0; | 531 | int ret = 0; |
| 621 | 532 | ||
| @@ -629,29 +540,12 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
| 629 | struct inode, i_list); | 540 | struct inode, i_list); |
| 630 | struct super_block *sb = inode->i_sb; | 541 | struct super_block *sb = inode->i_sb; |
| 631 | 542 | ||
| 632 | if (wbc->sb) { | 543 | if (!pin_sb_for_writeback(sb)) { |
| 633 | /* | 544 | requeue_io(inode); |
| 634 | * We are requested to write out inodes for a specific | 545 | continue; |
| 635 | * superblock. This means we already have s_umount | ||
| 636 | * taken by the caller which also waits for us to | ||
| 637 | * complete the writeout. | ||
| 638 | */ | ||
| 639 | if (sb != wbc->sb) { | ||
| 640 | redirty_tail(inode); | ||
| 641 | continue; | ||
| 642 | } | ||
| 643 | |||
| 644 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | ||
| 645 | |||
| 646 | ret = writeback_sb_inodes(sb, wb, wbc); | ||
| 647 | } else { | ||
| 648 | if (!pin_sb_for_writeback(sb)) { | ||
| 649 | requeue_io(inode); | ||
| 650 | continue; | ||
| 651 | } | ||
| 652 | ret = writeback_sb_inodes(sb, wb, wbc); | ||
| 653 | drop_super(sb); | ||
| 654 | } | 546 | } |
| 547 | ret = writeback_sb_inodes(sb, wb, wbc, false); | ||
| 548 | drop_super(sb); | ||
| 655 | 549 | ||
| 656 | if (ret) | 550 | if (ret) |
| 657 | break; | 551 | break; |
| @@ -660,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
| 660 | /* Leave any unwritten inodes on b_io */ | 554 | /* Leave any unwritten inodes on b_io */ |
| 661 | } | 555 | } |
| 662 | 556 | ||
| 663 | void writeback_inodes_wbc(struct writeback_control *wbc) | 557 | static void __writeback_inodes_sb(struct super_block *sb, |
| 558 | struct bdi_writeback *wb, struct writeback_control *wbc) | ||
| 664 | { | 559 | { |
| 665 | struct backing_dev_info *bdi = wbc->bdi; | 560 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
| 666 | 561 | ||
| 667 | writeback_inodes_wb(&bdi->wb, wbc); | 562 | wbc->wb_start = jiffies; /* livelock avoidance */ |
| 563 | spin_lock(&inode_lock); | ||
| 564 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | ||
| 565 | queue_io(wb, wbc->older_than_this); | ||
| 566 | writeback_sb_inodes(sb, wb, wbc, true); | ||
| 567 | spin_unlock(&inode_lock); | ||
| 668 | } | 568 | } |
| 669 | 569 | ||
| 670 | /* | 570 | /* |
| @@ -702,16 +602,14 @@ static inline bool over_bground_thresh(void) | |||
| 702 | * all dirty pages if they are all attached to "old" mappings. | 602 | * all dirty pages if they are all attached to "old" mappings. |
| 703 | */ | 603 | */ |
| 704 | static long wb_writeback(struct bdi_writeback *wb, | 604 | static long wb_writeback(struct bdi_writeback *wb, |
| 705 | struct wb_writeback_args *args) | 605 | struct wb_writeback_work *work) |
| 706 | { | 606 | { |
| 707 | struct writeback_control wbc = { | 607 | struct writeback_control wbc = { |
| 708 | .bdi = wb->bdi, | 608 | .sync_mode = work->sync_mode, |
| 709 | .sb = args->sb, | ||
| 710 | .sync_mode = args->sync_mode, | ||
| 711 | .older_than_this = NULL, | 609 | .older_than_this = NULL, |
| 712 | .for_kupdate = args->for_kupdate, | 610 | .for_kupdate = work->for_kupdate, |
| 713 | .for_background = args->for_background, | 611 | .for_background = work->for_background, |
| 714 | .range_cyclic = args->range_cyclic, | 612 | .range_cyclic = work->range_cyclic, |
| 715 | }; | 613 | }; |
| 716 | unsigned long oldest_jif; | 614 | unsigned long oldest_jif; |
| 717 | long wrote = 0; | 615 | long wrote = 0; |
| @@ -731,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
| 731 | /* | 629 | /* |
| 732 | * Stop writeback when nr_pages has been consumed | 630 | * Stop writeback when nr_pages has been consumed |
| 733 | */ | 631 | */ |
| 734 | if (args->nr_pages <= 0) | 632 | if (work->nr_pages <= 0) |
| 735 | break; | 633 | break; |
| 736 | 634 | ||
| 737 | /* | 635 | /* |
| 738 | * For background writeout, stop when we are below the | 636 | * For background writeout, stop when we are below the |
| 739 | * background dirty threshold | 637 | * background dirty threshold |
| 740 | */ | 638 | */ |
| 741 | if (args->for_background && !over_bground_thresh()) | 639 | if (work->for_background && !over_bground_thresh()) |
| 742 | break; | 640 | break; |
| 743 | 641 | ||
| 744 | wbc.more_io = 0; | 642 | wbc.more_io = 0; |
| 745 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 643 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; |
| 746 | wbc.pages_skipped = 0; | 644 | wbc.pages_skipped = 0; |
| 747 | writeback_inodes_wb(wb, &wbc); | 645 | if (work->sb) |
| 748 | args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 646 | __writeback_inodes_sb(work->sb, wb, &wbc); |
| 647 | else | ||
| 648 | writeback_inodes_wb(wb, &wbc); | ||
| 649 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
| 749 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 650 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
| 750 | 651 | ||
| 751 | /* | 652 | /* |
| @@ -781,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
| 781 | } | 682 | } |
| 782 | 683 | ||
| 783 | /* | 684 | /* |
| 784 | * Return the next bdi_work struct that hasn't been processed by this | 685 | * Return the next wb_writeback_work struct that hasn't been processed yet. |
| 785 | * wb thread yet. ->seen is initially set for each thread that exists | ||
| 786 | * for this device, when a thread first notices a piece of work it | ||
| 787 | * clears its bit. Depending on writeback type, the thread will notify | ||
| 788 | * completion on either receiving the work (WB_SYNC_NONE) or after | ||
| 789 | * it is done (WB_SYNC_ALL). | ||
| 790 | */ | 686 | */ |
| 791 | static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, | 687 | static struct wb_writeback_work * |
| 792 | struct bdi_writeback *wb) | 688 | get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) |
| 793 | { | 689 | { |
| 794 | struct bdi_work *work, *ret = NULL; | 690 | struct wb_writeback_work *work = NULL; |
| 795 | |||
| 796 | rcu_read_lock(); | ||
| 797 | 691 | ||
| 798 | list_for_each_entry_rcu(work, &bdi->work_list, list) { | 692 | spin_lock(&bdi->wb_lock); |
| 799 | if (!test_bit(wb->nr, &work->seen)) | 693 | if (!list_empty(&bdi->work_list)) { |
| 800 | continue; | 694 | work = list_entry(bdi->work_list.next, |
| 801 | clear_bit(wb->nr, &work->seen); | 695 | struct wb_writeback_work, list); |
| 802 | 696 | list_del_init(&work->list); | |
| 803 | ret = work; | ||
| 804 | break; | ||
| 805 | } | 697 | } |
| 806 | 698 | spin_unlock(&bdi->wb_lock); | |
| 807 | rcu_read_unlock(); | 699 | return work; |
| 808 | return ret; | ||
| 809 | } | 700 | } |
| 810 | 701 | ||
| 811 | static long wb_check_old_data_flush(struct bdi_writeback *wb) | 702 | static long wb_check_old_data_flush(struct bdi_writeback *wb) |
| @@ -830,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
| 830 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 721 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
| 831 | 722 | ||
| 832 | if (nr_pages) { | 723 | if (nr_pages) { |
| 833 | struct wb_writeback_args args = { | 724 | struct wb_writeback_work work = { |
| 834 | .nr_pages = nr_pages, | 725 | .nr_pages = nr_pages, |
| 835 | .sync_mode = WB_SYNC_NONE, | 726 | .sync_mode = WB_SYNC_NONE, |
| 836 | .for_kupdate = 1, | 727 | .for_kupdate = 1, |
| 837 | .range_cyclic = 1, | 728 | .range_cyclic = 1, |
| 838 | }; | 729 | }; |
| 839 | 730 | ||
| 840 | return wb_writeback(wb, &args); | 731 | return wb_writeback(wb, &work); |
| 841 | } | 732 | } |
| 842 | 733 | ||
| 843 | return 0; | 734 | return 0; |
| @@ -849,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
| 849 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | 740 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) |
| 850 | { | 741 | { |
| 851 | struct backing_dev_info *bdi = wb->bdi; | 742 | struct backing_dev_info *bdi = wb->bdi; |
| 852 | struct bdi_work *work; | 743 | struct wb_writeback_work *work; |
| 853 | long wrote = 0; | 744 | long wrote = 0; |
| 854 | 745 | ||
| 855 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 746 | while ((work = get_next_work_item(bdi, wb)) != NULL) { |
| 856 | struct wb_writeback_args args = work->args; | ||
| 857 | |||
| 858 | /* | 747 | /* |
| 859 | * Override sync mode, in case we must wait for completion | 748 | * Override sync mode, in case we must wait for completion |
| 749 | * because this thread is exiting now. | ||
| 860 | */ | 750 | */ |
| 861 | if (force_wait) | 751 | if (force_wait) |
| 862 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; | 752 | work->sync_mode = WB_SYNC_ALL; |
| 863 | |||
| 864 | /* | ||
| 865 | * If this isn't a data integrity operation, just notify | ||
| 866 | * that we have seen this work and we are now starting it. | ||
| 867 | */ | ||
| 868 | if (!test_bit(WS_ONSTACK, &work->state)) | ||
| 869 | wb_clear_pending(wb, work); | ||
| 870 | 753 | ||
| 871 | wrote += wb_writeback(wb, &args); | 754 | wrote += wb_writeback(wb, work); |
| 872 | 755 | ||
| 873 | /* | 756 | /* |
| 874 | * This is a data integrity writeback, so only do the | 757 | * Notify the caller of completion if this is a synchronous |
| 875 | * notification when we have completed the work. | 758 | * work item, otherwise just free it. |
| 876 | */ | 759 | */ |
| 877 | if (test_bit(WS_ONSTACK, &work->state)) | 760 | if (work->done) |
| 878 | wb_clear_pending(wb, work); | 761 | complete(work->done); |
| 762 | else | ||
| 763 | kfree(work); | ||
| 879 | } | 764 | } |
| 880 | 765 | ||
| 881 | /* | 766 | /* |
| @@ -938,14 +823,9 @@ int bdi_writeback_task(struct bdi_writeback *wb) | |||
| 938 | void wakeup_flusher_threads(long nr_pages) | 823 | void wakeup_flusher_threads(long nr_pages) |
| 939 | { | 824 | { |
| 940 | struct backing_dev_info *bdi; | 825 | struct backing_dev_info *bdi; |
| 941 | struct wb_writeback_args args = { | ||
| 942 | .sync_mode = WB_SYNC_NONE, | ||
| 943 | }; | ||
| 944 | 826 | ||
| 945 | if (nr_pages) { | 827 | if (!nr_pages) { |
| 946 | args.nr_pages = nr_pages; | 828 | nr_pages = global_page_state(NR_FILE_DIRTY) + |
| 947 | } else { | ||
| 948 | args.nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
| 949 | global_page_state(NR_UNSTABLE_NFS); | 829 | global_page_state(NR_UNSTABLE_NFS); |
| 950 | } | 830 | } |
| 951 | 831 | ||
| @@ -953,7 +833,7 @@ void wakeup_flusher_threads(long nr_pages) | |||
| 953 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | 833 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { |
| 954 | if (!bdi_has_dirty_io(bdi)) | 834 | if (!bdi_has_dirty_io(bdi)) |
| 955 | continue; | 835 | continue; |
| 956 | bdi_alloc_queue_work(bdi, &args); | 836 | __bdi_start_writeback(bdi, nr_pages, false, false); |
| 957 | } | 837 | } |
| 958 | rcu_read_unlock(); | 838 | rcu_read_unlock(); |
| 959 | } | 839 | } |
| @@ -1162,17 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb) | |||
| 1162 | { | 1042 | { |
| 1163 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 1043 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); |
| 1164 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | 1044 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); |
| 1165 | struct wb_writeback_args args = { | 1045 | DECLARE_COMPLETION_ONSTACK(done); |
| 1046 | struct wb_writeback_work work = { | ||
| 1166 | .sb = sb, | 1047 | .sb = sb, |
| 1167 | .sync_mode = WB_SYNC_NONE, | 1048 | .sync_mode = WB_SYNC_NONE, |
| 1049 | .done = &done, | ||
| 1168 | }; | 1050 | }; |
| 1169 | 1051 | ||
| 1170 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 1052 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
| 1171 | 1053 | ||
| 1172 | args.nr_pages = nr_dirty + nr_unstable + | 1054 | work.nr_pages = nr_dirty + nr_unstable + |
| 1173 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 1055 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
| 1174 | 1056 | ||
| 1175 | bdi_queue_work_onstack(&args); | 1057 | bdi_queue_work(sb->s_bdi, &work); |
| 1058 | wait_for_completion(&done); | ||
| 1176 | } | 1059 | } |
| 1177 | EXPORT_SYMBOL(writeback_inodes_sb); | 1060 | EXPORT_SYMBOL(writeback_inodes_sb); |
| 1178 | 1061 | ||
| @@ -1204,16 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle); | |||
| 1204 | */ | 1087 | */ |
| 1205 | void sync_inodes_sb(struct super_block *sb) | 1088 | void sync_inodes_sb(struct super_block *sb) |
| 1206 | { | 1089 | { |
| 1207 | struct wb_writeback_args args = { | 1090 | DECLARE_COMPLETION_ONSTACK(done); |
| 1091 | struct wb_writeback_work work = { | ||
| 1208 | .sb = sb, | 1092 | .sb = sb, |
| 1209 | .sync_mode = WB_SYNC_ALL, | 1093 | .sync_mode = WB_SYNC_ALL, |
| 1210 | .nr_pages = LONG_MAX, | 1094 | .nr_pages = LONG_MAX, |
| 1211 | .range_cyclic = 0, | 1095 | .range_cyclic = 0, |
| 1096 | .done = &done, | ||
| 1212 | }; | 1097 | }; |
| 1213 | 1098 | ||
| 1214 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 1099 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
| 1215 | 1100 | ||
| 1216 | bdi_queue_work_onstack(&args); | 1101 | bdi_queue_work(sb->s_bdi, &work); |
| 1102 | wait_for_completion(&done); | ||
| 1103 | |||
| 1217 | wait_sb_inodes(sb); | 1104 | wait_sb_inodes(sb); |
| 1218 | } | 1105 | } |
| 1219 | EXPORT_SYMBOL(sync_inodes_sb); | 1106 | EXPORT_SYMBOL(sync_inodes_sb); |
diff --git a/fs/splice.c b/fs/splice.c index 740e6b9faf7a..efdbfece9932 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -1282,7 +1282,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe, | |||
| 1282 | { | 1282 | { |
| 1283 | struct file *file = sd->u.file; | 1283 | struct file *file = sd->u.file; |
| 1284 | 1284 | ||
| 1285 | return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); | 1285 | return do_splice_from(pipe, file, &file->f_pos, sd->total_len, |
| 1286 | sd->flags); | ||
| 1286 | } | 1287 | } |
| 1287 | 1288 | ||
| 1288 | /** | 1289 | /** |
| @@ -1371,8 +1372,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
| 1371 | if (off_in) | 1372 | if (off_in) |
| 1372 | return -ESPIPE; | 1373 | return -ESPIPE; |
| 1373 | if (off_out) { | 1374 | if (off_out) { |
| 1374 | if (!out->f_op || !out->f_op->llseek || | 1375 | if (!(out->f_mode & FMODE_PWRITE)) |
| 1375 | out->f_op->llseek == no_llseek) | ||
| 1376 | return -EINVAL; | 1376 | return -EINVAL; |
| 1377 | if (copy_from_user(&offset, off_out, sizeof(loff_t))) | 1377 | if (copy_from_user(&offset, off_out, sizeof(loff_t))) |
| 1378 | return -EFAULT; | 1378 | return -EFAULT; |
| @@ -1392,8 +1392,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
| 1392 | if (off_out) | 1392 | if (off_out) |
| 1393 | return -ESPIPE; | 1393 | return -ESPIPE; |
| 1394 | if (off_in) { | 1394 | if (off_in) { |
| 1395 | if (!in->f_op || !in->f_op->llseek || | 1395 | if (!(in->f_mode & FMODE_PREAD)) |
| 1396 | in->f_op->llseek == no_llseek) | ||
| 1397 | return -EINVAL; | 1396 | return -EINVAL; |
| 1398 | if (copy_from_user(&offset, off_in, sizeof(loff_t))) | 1397 | if (copy_from_user(&offset, off_in, sizeof(loff_t))) |
| 1399 | return -EFAULT; | 1398 | return -EFAULT; |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 9ae2889096b6..e9aec0d099df 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
| @@ -82,8 +82,6 @@ struct backing_dev_info { | |||
| 82 | struct bdi_writeback wb; /* default writeback info for this bdi */ | 82 | struct bdi_writeback wb; /* default writeback info for this bdi */ |
| 83 | spinlock_t wb_lock; /* protects update side of wb_list */ | 83 | spinlock_t wb_lock; /* protects update side of wb_list */ |
| 84 | struct list_head wb_list; /* the flusher threads hanging off this bdi */ | 84 | struct list_head wb_list; /* the flusher threads hanging off this bdi */ |
| 85 | unsigned long wb_mask; /* bitmask of registered tasks */ | ||
| 86 | unsigned int wb_cnt; /* number of registered tasks */ | ||
| 87 | 85 | ||
| 88 | struct list_head work_list; | 86 | struct list_head work_list; |
| 89 | 87 | ||
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d63ef8f9609f..c24eca71e80c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
| @@ -27,10 +27,6 @@ enum writeback_sync_modes { | |||
| 27 | * in a manner such that unspecified fields are set to zero. | 27 | * in a manner such that unspecified fields are set to zero. |
| 28 | */ | 28 | */ |
| 29 | struct writeback_control { | 29 | struct writeback_control { |
| 30 | struct backing_dev_info *bdi; /* If !NULL, only write back this | ||
| 31 | queue */ | ||
| 32 | struct super_block *sb; /* if !NULL, only write inodes from | ||
| 33 | this super_block */ | ||
| 34 | enum writeback_sync_modes sync_mode; | 30 | enum writeback_sync_modes sync_mode; |
| 35 | unsigned long *older_than_this; /* If !NULL, only write back inodes | 31 | unsigned long *older_than_this; /* If !NULL, only write back inodes |
| 36 | older than this */ | 32 | older than this */ |
| @@ -66,7 +62,8 @@ int inode_wait(void *); | |||
| 66 | void writeback_inodes_sb(struct super_block *); | 62 | void writeback_inodes_sb(struct super_block *); |
| 67 | int writeback_inodes_sb_if_idle(struct super_block *); | 63 | int writeback_inodes_sb_if_idle(struct super_block *); |
| 68 | void sync_inodes_sb(struct super_block *); | 64 | void sync_inodes_sb(struct super_block *); |
| 69 | void writeback_inodes_wbc(struct writeback_control *wbc); | 65 | void writeback_inodes_wb(struct bdi_writeback *wb, |
| 66 | struct writeback_control *wbc); | ||
| 70 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait); | 67 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait); |
| 71 | void wakeup_flusher_threads(long nr_pages); | 68 | void wakeup_flusher_threads(long nr_pages); |
| 72 | 69 | ||
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 660a87a22511..123bcef13e51 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
| @@ -104,15 +104,13 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) | |||
| 104 | "b_more_io: %8lu\n" | 104 | "b_more_io: %8lu\n" |
| 105 | "bdi_list: %8u\n" | 105 | "bdi_list: %8u\n" |
| 106 | "state: %8lx\n" | 106 | "state: %8lx\n" |
| 107 | "wb_mask: %8lx\n" | 107 | "wb_list: %8u\n", |
| 108 | "wb_list: %8u\n" | ||
| 109 | "wb_cnt: %8u\n", | ||
| 110 | (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), | 108 | (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), |
| 111 | (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), | 109 | (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), |
| 112 | K(bdi_thresh), K(dirty_thresh), | 110 | K(bdi_thresh), K(dirty_thresh), |
| 113 | K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io, | 111 | K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io, |
| 114 | !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask, | 112 | !list_empty(&bdi->bdi_list), bdi->state, |
| 115 | !list_empty(&bdi->wb_list), bdi->wb_cnt); | 113 | !list_empty(&bdi->wb_list)); |
| 116 | #undef K | 114 | #undef K |
| 117 | 115 | ||
| 118 | return 0; | 116 | return 0; |
| @@ -340,14 +338,13 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) | |||
| 340 | static void bdi_flush_io(struct backing_dev_info *bdi) | 338 | static void bdi_flush_io(struct backing_dev_info *bdi) |
| 341 | { | 339 | { |
| 342 | struct writeback_control wbc = { | 340 | struct writeback_control wbc = { |
| 343 | .bdi = bdi, | ||
| 344 | .sync_mode = WB_SYNC_NONE, | 341 | .sync_mode = WB_SYNC_NONE, |
| 345 | .older_than_this = NULL, | 342 | .older_than_this = NULL, |
| 346 | .range_cyclic = 1, | 343 | .range_cyclic = 1, |
| 347 | .nr_to_write = 1024, | 344 | .nr_to_write = 1024, |
| 348 | }; | 345 | }; |
| 349 | 346 | ||
| 350 | writeback_inodes_wbc(&wbc); | 347 | writeback_inodes_wb(&bdi->wb, &wbc); |
| 351 | } | 348 | } |
| 352 | 349 | ||
| 353 | /* | 350 | /* |
| @@ -675,12 +672,6 @@ int bdi_init(struct backing_dev_info *bdi) | |||
| 675 | 672 | ||
| 676 | bdi_wb_init(&bdi->wb, bdi); | 673 | bdi_wb_init(&bdi->wb, bdi); |
| 677 | 674 | ||
| 678 | /* | ||
| 679 | * Just one thread support for now, hard code mask and count | ||
| 680 | */ | ||
| 681 | bdi->wb_mask = 1; | ||
| 682 | bdi->wb_cnt = 1; | ||
| 683 | |||
| 684 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { | 675 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { |
| 685 | err = percpu_counter_init(&bdi->bdi_stat[i], 0); | 676 | err = percpu_counter_init(&bdi->bdi_stat[i], 0); |
| 686 | if (err) | 677 | if (err) |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 54f28bd493d3..37498ef61548 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -495,7 +495,6 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
| 495 | 495 | ||
| 496 | for (;;) { | 496 | for (;;) { |
| 497 | struct writeback_control wbc = { | 497 | struct writeback_control wbc = { |
| 498 | .bdi = bdi, | ||
| 499 | .sync_mode = WB_SYNC_NONE, | 498 | .sync_mode = WB_SYNC_NONE, |
| 500 | .older_than_this = NULL, | 499 | .older_than_this = NULL, |
| 501 | .nr_to_write = write_chunk, | 500 | .nr_to_write = write_chunk, |
| @@ -537,7 +536,7 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
| 537 | * up. | 536 | * up. |
| 538 | */ | 537 | */ |
| 539 | if (bdi_nr_reclaimable > bdi_thresh) { | 538 | if (bdi_nr_reclaimable > bdi_thresh) { |
| 540 | writeback_inodes_wbc(&wbc); | 539 | writeback_inodes_wb(&bdi->wb, &wbc); |
| 541 | pages_written += write_chunk - wbc.nr_to_write; | 540 | pages_written += write_chunk - wbc.nr_to_write; |
| 542 | get_dirty_limits(&background_thresh, &dirty_thresh, | 541 | get_dirty_limits(&background_thresh, &dirty_thresh, |
| 543 | &bdi_thresh, bdi); | 542 | &bdi_thresh, bdi); |
