diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-07-08 11:06:40 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-07-08 11:06:40 -0400 |
commit | c77e9e68269bf50573ce130b5d95ff6e539dbcf8 (patch) | |
tree | 99edc1b070717e18f5958f934bc3921cfed2ce1f | |
parent | 31016e9941be85e8c487b1f3c45d253eda356715 (diff) | |
parent | 83ba7b071f30f7c01f72518ad72d5cd203c27502 (diff) |
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
writeback: simplify the write back thread queue
writeback: split writeback_inodes_wb
writeback: remove writeback_inodes_wbc
fs-writeback: fix kernel-doc warnings
splice: check f_mode for seekable file
splice: direct_splice_actor() should not use pos in sd
-rw-r--r-- | fs/afs/write.c | 1 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 2 | ||||
-rw-r--r-- | fs/fs-writeback.c | 335 | ||||
-rw-r--r-- | fs/splice.c | 9 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 2 | ||||
-rw-r--r-- | include/linux/writeback.h | 7 | ||||
-rw-r--r-- | mm/backing-dev.c | 17 | ||||
-rw-r--r-- | mm/page-writeback.c | 3 |
8 files changed, 122 insertions, 254 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c index 3dab9e9948d0..722743b152d8 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
@@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode) | |||
680 | { | 680 | { |
681 | struct address_space *mapping = vnode->vfs_inode.i_mapping; | 681 | struct address_space *mapping = vnode->vfs_inode.i_mapping; |
682 | struct writeback_control wbc = { | 682 | struct writeback_control wbc = { |
683 | .bdi = mapping->backing_dev_info, | ||
684 | .sync_mode = WB_SYNC_ALL, | 683 | .sync_mode = WB_SYNC_ALL, |
685 | .nr_to_write = LONG_MAX, | 684 | .nr_to_write = LONG_MAX, |
686 | .range_cyclic = 1, | 685 | .range_cyclic = 1, |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a4080c21ec55..d74e6af9b53a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2594 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 2594 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
2595 | }; | 2595 | }; |
2596 | struct writeback_control wbc_writepages = { | 2596 | struct writeback_control wbc_writepages = { |
2597 | .bdi = wbc->bdi, | ||
2598 | .sync_mode = wbc->sync_mode, | 2597 | .sync_mode = wbc->sync_mode, |
2599 | .older_than_this = NULL, | 2598 | .older_than_this = NULL, |
2600 | .nr_to_write = 64, | 2599 | .nr_to_write = 64, |
@@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2628 | .sync_io = mode == WB_SYNC_ALL, | 2627 | .sync_io = mode == WB_SYNC_ALL, |
2629 | }; | 2628 | }; |
2630 | struct writeback_control wbc_writepages = { | 2629 | struct writeback_control wbc_writepages = { |
2631 | .bdi = inode->i_mapping->backing_dev_info, | ||
2632 | .sync_mode = mode, | 2630 | .sync_mode = mode, |
2633 | .older_than_this = NULL, | 2631 | .older_than_this = NULL, |
2634 | .nr_to_write = nr_pages * 2, | 2632 | .nr_to_write = nr_pages * 2, |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 0609607d3955..d5be1693ac93 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -38,43 +38,18 @@ int nr_pdflush_threads; | |||
38 | /* | 38 | /* |
39 | * Passed into wb_writeback(), essentially a subset of writeback_control | 39 | * Passed into wb_writeback(), essentially a subset of writeback_control |
40 | */ | 40 | */ |
41 | struct wb_writeback_args { | 41 | struct wb_writeback_work { |
42 | long nr_pages; | 42 | long nr_pages; |
43 | struct super_block *sb; | 43 | struct super_block *sb; |
44 | enum writeback_sync_modes sync_mode; | 44 | enum writeback_sync_modes sync_mode; |
45 | unsigned int for_kupdate:1; | 45 | unsigned int for_kupdate:1; |
46 | unsigned int range_cyclic:1; | 46 | unsigned int range_cyclic:1; |
47 | unsigned int for_background:1; | 47 | unsigned int for_background:1; |
48 | }; | ||
49 | 48 | ||
50 | /* | ||
51 | * Work items for the bdi_writeback threads | ||
52 | */ | ||
53 | struct bdi_work { | ||
54 | struct list_head list; /* pending work list */ | 49 | struct list_head list; /* pending work list */ |
55 | struct rcu_head rcu_head; /* for RCU free/clear of work */ | 50 | struct completion *done; /* set if the caller waits */ |
56 | |||
57 | unsigned long seen; /* threads that have seen this work */ | ||
58 | atomic_t pending; /* number of threads still to do work */ | ||
59 | |||
60 | struct wb_writeback_args args; /* writeback arguments */ | ||
61 | |||
62 | unsigned long state; /* flag bits, see WS_* */ | ||
63 | }; | 51 | }; |
64 | 52 | ||
65 | enum { | ||
66 | WS_INPROGRESS = 0, | ||
67 | WS_ONSTACK, | ||
68 | }; | ||
69 | |||
70 | static inline void bdi_work_init(struct bdi_work *work, | ||
71 | struct wb_writeback_args *args) | ||
72 | { | ||
73 | INIT_RCU_HEAD(&work->rcu_head); | ||
74 | work->args = *args; | ||
75 | __set_bit(WS_INPROGRESS, &work->state); | ||
76 | } | ||
77 | |||
78 | /** | 53 | /** |
79 | * writeback_in_progress - determine whether there is writeback in progress | 54 | * writeback_in_progress - determine whether there is writeback in progress |
80 | * @bdi: the device's backing_dev_info structure. | 55 | * @bdi: the device's backing_dev_info structure. |
@@ -87,49 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi) | |||
87 | return !list_empty(&bdi->work_list); | 62 | return !list_empty(&bdi->work_list); |
88 | } | 63 | } |
89 | 64 | ||
90 | static void bdi_work_free(struct rcu_head *head) | 65 | static void bdi_queue_work(struct backing_dev_info *bdi, |
91 | { | 66 | struct wb_writeback_work *work) |
92 | struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); | ||
93 | |||
94 | clear_bit(WS_INPROGRESS, &work->state); | ||
95 | smp_mb__after_clear_bit(); | ||
96 | wake_up_bit(&work->state, WS_INPROGRESS); | ||
97 | |||
98 | if (!test_bit(WS_ONSTACK, &work->state)) | ||
99 | kfree(work); | ||
100 | } | ||
101 | |||
102 | static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) | ||
103 | { | 67 | { |
104 | /* | ||
105 | * The caller has retrieved the work arguments from this work, | ||
106 | * drop our reference. If this is the last ref, delete and free it | ||
107 | */ | ||
108 | if (atomic_dec_and_test(&work->pending)) { | ||
109 | struct backing_dev_info *bdi = wb->bdi; | ||
110 | |||
111 | spin_lock(&bdi->wb_lock); | ||
112 | list_del_rcu(&work->list); | ||
113 | spin_unlock(&bdi->wb_lock); | ||
114 | |||
115 | call_rcu(&work->rcu_head, bdi_work_free); | ||
116 | } | ||
117 | } | ||
118 | |||
119 | static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) | ||
120 | { | ||
121 | work->seen = bdi->wb_mask; | ||
122 | BUG_ON(!work->seen); | ||
123 | atomic_set(&work->pending, bdi->wb_cnt); | ||
124 | BUG_ON(!bdi->wb_cnt); | ||
125 | |||
126 | /* | ||
127 | * list_add_tail_rcu() contains the necessary barriers to | ||
128 | * make sure the above stores are seen before the item is | ||
129 | * noticed on the list | ||
130 | */ | ||
131 | spin_lock(&bdi->wb_lock); | 68 | spin_lock(&bdi->wb_lock); |
132 | list_add_tail_rcu(&work->list, &bdi->work_list); | 69 | list_add_tail(&work->list, &bdi->work_list); |
133 | spin_unlock(&bdi->wb_lock); | 70 | spin_unlock(&bdi->wb_lock); |
134 | 71 | ||
135 | /* | 72 | /* |
@@ -146,55 +83,29 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) | |||
146 | } | 83 | } |
147 | } | 84 | } |
148 | 85 | ||
149 | /* | 86 | static void |
150 | * Used for on-stack allocated work items. The caller needs to wait until | 87 | __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, |
151 | * the wb threads have acked the work before it's safe to continue. | 88 | bool range_cyclic, bool for_background) |
152 | */ | ||
153 | static void bdi_wait_on_work_done(struct bdi_work *work) | ||
154 | { | 89 | { |
155 | wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait, | 90 | struct wb_writeback_work *work; |
156 | TASK_UNINTERRUPTIBLE); | ||
157 | } | ||
158 | |||
159 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | ||
160 | struct wb_writeback_args *args) | ||
161 | { | ||
162 | struct bdi_work *work; | ||
163 | 91 | ||
164 | /* | 92 | /* |
165 | * This is WB_SYNC_NONE writeback, so if allocation fails just | 93 | * This is WB_SYNC_NONE writeback, so if allocation fails just |
166 | * wakeup the thread for old dirty data writeback | 94 | * wakeup the thread for old dirty data writeback |
167 | */ | 95 | */ |
168 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | 96 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
169 | if (work) { | 97 | if (!work) { |
170 | bdi_work_init(work, args); | 98 | if (bdi->wb.task) |
171 | bdi_queue_work(bdi, work); | 99 | wake_up_process(bdi->wb.task); |
172 | } else { | 100 | return; |
173 | struct bdi_writeback *wb = &bdi->wb; | ||
174 | |||
175 | if (wb->task) | ||
176 | wake_up_process(wb->task); | ||
177 | } | 101 | } |
178 | } | ||
179 | 102 | ||
180 | /** | 103 | work->sync_mode = WB_SYNC_NONE; |
181 | * bdi_queue_work_onstack - start and wait for writeback | 104 | work->nr_pages = nr_pages; |
182 | * @sb: write inodes from this super_block | 105 | work->range_cyclic = range_cyclic; |
183 | * | 106 | work->for_background = for_background; |
184 | * Description: | ||
185 | * This function initiates writeback and waits for the operation to | ||
186 | * complete. Callers must hold the sb s_umount semaphore for | ||
187 | * reading, to avoid having the super disappear before we are done. | ||
188 | */ | ||
189 | static void bdi_queue_work_onstack(struct wb_writeback_args *args) | ||
190 | { | ||
191 | struct bdi_work work; | ||
192 | |||
193 | bdi_work_init(&work, args); | ||
194 | __set_bit(WS_ONSTACK, &work.state); | ||
195 | 107 | ||
196 | bdi_queue_work(args->sb->s_bdi, &work); | 108 | bdi_queue_work(bdi, work); |
197 | bdi_wait_on_work_done(&work); | ||
198 | } | 109 | } |
199 | 110 | ||
200 | /** | 111 | /** |
@@ -210,13 +121,7 @@ static void bdi_queue_work_onstack(struct wb_writeback_args *args) | |||
210 | */ | 121 | */ |
211 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | 122 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) |
212 | { | 123 | { |
213 | struct wb_writeback_args args = { | 124 | __bdi_start_writeback(bdi, nr_pages, true, false); |
214 | .sync_mode = WB_SYNC_NONE, | ||
215 | .nr_pages = nr_pages, | ||
216 | .range_cyclic = 1, | ||
217 | }; | ||
218 | |||
219 | bdi_alloc_queue_work(bdi, &args); | ||
220 | } | 125 | } |
221 | 126 | ||
222 | /** | 127 | /** |
@@ -230,13 +135,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | |||
230 | */ | 135 | */ |
231 | void bdi_start_background_writeback(struct backing_dev_info *bdi) | 136 | void bdi_start_background_writeback(struct backing_dev_info *bdi) |
232 | { | 137 | { |
233 | struct wb_writeback_args args = { | 138 | __bdi_start_writeback(bdi, LONG_MAX, true, true); |
234 | .sync_mode = WB_SYNC_NONE, | ||
235 | .nr_pages = LONG_MAX, | ||
236 | .for_background = 1, | ||
237 | .range_cyclic = 1, | ||
238 | }; | ||
239 | bdi_alloc_queue_work(bdi, &args); | ||
240 | } | 139 | } |
241 | 140 | ||
242 | /* | 141 | /* |
@@ -554,29 +453,41 @@ static bool pin_sb_for_writeback(struct super_block *sb) | |||
554 | 453 | ||
555 | /* | 454 | /* |
556 | * Write a portion of b_io inodes which belong to @sb. | 455 | * Write a portion of b_io inodes which belong to @sb. |
557 | * If @wbc->sb != NULL, then find and write all such | 456 | * |
457 | * If @only_this_sb is true, then find and write all such | ||
558 | * inodes. Otherwise write only ones which go sequentially | 458 | * inodes. Otherwise write only ones which go sequentially |
559 | * in reverse order. | 459 | * in reverse order. |
460 | * | ||
560 | * Return 1, if the caller writeback routine should be | 461 | * Return 1, if the caller writeback routine should be |
561 | * interrupted. Otherwise return 0. | 462 | * interrupted. Otherwise return 0. |
562 | */ | 463 | */ |
563 | static int writeback_sb_inodes(struct super_block *sb, | 464 | static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, |
564 | struct bdi_writeback *wb, | 465 | struct writeback_control *wbc, bool only_this_sb) |
565 | struct writeback_control *wbc) | ||
566 | { | 466 | { |
567 | while (!list_empty(&wb->b_io)) { | 467 | while (!list_empty(&wb->b_io)) { |
568 | long pages_skipped; | 468 | long pages_skipped; |
569 | struct inode *inode = list_entry(wb->b_io.prev, | 469 | struct inode *inode = list_entry(wb->b_io.prev, |
570 | struct inode, i_list); | 470 | struct inode, i_list); |
571 | if (wbc->sb && sb != inode->i_sb) { | 471 | |
572 | /* super block given and doesn't | 472 | if (inode->i_sb != sb) { |
573 | match, skip this inode */ | 473 | if (only_this_sb) { |
574 | redirty_tail(inode); | 474 | /* |
575 | continue; | 475 | * We only want to write back data for this |
576 | } | 476 | * superblock, move all inodes not belonging |
577 | if (sb != inode->i_sb) | 477 | * to it back onto the dirty list. |
578 | /* finish with this superblock */ | 478 | */ |
479 | redirty_tail(inode); | ||
480 | continue; | ||
481 | } | ||
482 | |||
483 | /* | ||
484 | * The inode belongs to a different superblock. | ||
485 | * Bounce back to the caller to unpin this and | ||
486 | * pin the next superblock. | ||
487 | */ | ||
579 | return 0; | 488 | return 0; |
489 | } | ||
490 | |||
580 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { | 491 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { |
581 | requeue_io(inode); | 492 | requeue_io(inode); |
582 | continue; | 493 | continue; |
@@ -614,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb, | |||
614 | return 1; | 525 | return 1; |
615 | } | 526 | } |
616 | 527 | ||
617 | static void writeback_inodes_wb(struct bdi_writeback *wb, | 528 | void writeback_inodes_wb(struct bdi_writeback *wb, |
618 | struct writeback_control *wbc) | 529 | struct writeback_control *wbc) |
619 | { | 530 | { |
620 | int ret = 0; | 531 | int ret = 0; |
621 | 532 | ||
@@ -629,29 +540,12 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
629 | struct inode, i_list); | 540 | struct inode, i_list); |
630 | struct super_block *sb = inode->i_sb; | 541 | struct super_block *sb = inode->i_sb; |
631 | 542 | ||
632 | if (wbc->sb) { | 543 | if (!pin_sb_for_writeback(sb)) { |
633 | /* | 544 | requeue_io(inode); |
634 | * We are requested to write out inodes for a specific | 545 | continue; |
635 | * superblock. This means we already have s_umount | ||
636 | * taken by the caller which also waits for us to | ||
637 | * complete the writeout. | ||
638 | */ | ||
639 | if (sb != wbc->sb) { | ||
640 | redirty_tail(inode); | ||
641 | continue; | ||
642 | } | ||
643 | |||
644 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | ||
645 | |||
646 | ret = writeback_sb_inodes(sb, wb, wbc); | ||
647 | } else { | ||
648 | if (!pin_sb_for_writeback(sb)) { | ||
649 | requeue_io(inode); | ||
650 | continue; | ||
651 | } | ||
652 | ret = writeback_sb_inodes(sb, wb, wbc); | ||
653 | drop_super(sb); | ||
654 | } | 546 | } |
547 | ret = writeback_sb_inodes(sb, wb, wbc, false); | ||
548 | drop_super(sb); | ||
655 | 549 | ||
656 | if (ret) | 550 | if (ret) |
657 | break; | 551 | break; |
@@ -660,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
660 | /* Leave any unwritten inodes on b_io */ | 554 | /* Leave any unwritten inodes on b_io */ |
661 | } | 555 | } |
662 | 556 | ||
663 | void writeback_inodes_wbc(struct writeback_control *wbc) | 557 | static void __writeback_inodes_sb(struct super_block *sb, |
558 | struct bdi_writeback *wb, struct writeback_control *wbc) | ||
664 | { | 559 | { |
665 | struct backing_dev_info *bdi = wbc->bdi; | 560 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
666 | 561 | ||
667 | writeback_inodes_wb(&bdi->wb, wbc); | 562 | wbc->wb_start = jiffies; /* livelock avoidance */ |
563 | spin_lock(&inode_lock); | ||
564 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | ||
565 | queue_io(wb, wbc->older_than_this); | ||
566 | writeback_sb_inodes(sb, wb, wbc, true); | ||
567 | spin_unlock(&inode_lock); | ||
668 | } | 568 | } |
669 | 569 | ||
670 | /* | 570 | /* |
@@ -702,16 +602,14 @@ static inline bool over_bground_thresh(void) | |||
702 | * all dirty pages if they are all attached to "old" mappings. | 602 | * all dirty pages if they are all attached to "old" mappings. |
703 | */ | 603 | */ |
704 | static long wb_writeback(struct bdi_writeback *wb, | 604 | static long wb_writeback(struct bdi_writeback *wb, |
705 | struct wb_writeback_args *args) | 605 | struct wb_writeback_work *work) |
706 | { | 606 | { |
707 | struct writeback_control wbc = { | 607 | struct writeback_control wbc = { |
708 | .bdi = wb->bdi, | 608 | .sync_mode = work->sync_mode, |
709 | .sb = args->sb, | ||
710 | .sync_mode = args->sync_mode, | ||
711 | .older_than_this = NULL, | 609 | .older_than_this = NULL, |
712 | .for_kupdate = args->for_kupdate, | 610 | .for_kupdate = work->for_kupdate, |
713 | .for_background = args->for_background, | 611 | .for_background = work->for_background, |
714 | .range_cyclic = args->range_cyclic, | 612 | .range_cyclic = work->range_cyclic, |
715 | }; | 613 | }; |
716 | unsigned long oldest_jif; | 614 | unsigned long oldest_jif; |
717 | long wrote = 0; | 615 | long wrote = 0; |
@@ -731,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
731 | /* | 629 | /* |
732 | * Stop writeback when nr_pages has been consumed | 630 | * Stop writeback when nr_pages has been consumed |
733 | */ | 631 | */ |
734 | if (args->nr_pages <= 0) | 632 | if (work->nr_pages <= 0) |
735 | break; | 633 | break; |
736 | 634 | ||
737 | /* | 635 | /* |
738 | * For background writeout, stop when we are below the | 636 | * For background writeout, stop when we are below the |
739 | * background dirty threshold | 637 | * background dirty threshold |
740 | */ | 638 | */ |
741 | if (args->for_background && !over_bground_thresh()) | 639 | if (work->for_background && !over_bground_thresh()) |
742 | break; | 640 | break; |
743 | 641 | ||
744 | wbc.more_io = 0; | 642 | wbc.more_io = 0; |
745 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 643 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; |
746 | wbc.pages_skipped = 0; | 644 | wbc.pages_skipped = 0; |
747 | writeback_inodes_wb(wb, &wbc); | 645 | if (work->sb) |
748 | args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 646 | __writeback_inodes_sb(work->sb, wb, &wbc); |
647 | else | ||
648 | writeback_inodes_wb(wb, &wbc); | ||
649 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
749 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 650 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
750 | 651 | ||
751 | /* | 652 | /* |
@@ -781,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
781 | } | 682 | } |
782 | 683 | ||
783 | /* | 684 | /* |
784 | * Return the next bdi_work struct that hasn't been processed by this | 685 | * Return the next wb_writeback_work struct that hasn't been processed yet. |
785 | * wb thread yet. ->seen is initially set for each thread that exists | ||
786 | * for this device, when a thread first notices a piece of work it | ||
787 | * clears its bit. Depending on writeback type, the thread will notify | ||
788 | * completion on either receiving the work (WB_SYNC_NONE) or after | ||
789 | * it is done (WB_SYNC_ALL). | ||
790 | */ | 686 | */ |
791 | static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, | 687 | static struct wb_writeback_work * |
792 | struct bdi_writeback *wb) | 688 | get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) |
793 | { | 689 | { |
794 | struct bdi_work *work, *ret = NULL; | 690 | struct wb_writeback_work *work = NULL; |
795 | |||
796 | rcu_read_lock(); | ||
797 | 691 | ||
798 | list_for_each_entry_rcu(work, &bdi->work_list, list) { | 692 | spin_lock(&bdi->wb_lock); |
799 | if (!test_bit(wb->nr, &work->seen)) | 693 | if (!list_empty(&bdi->work_list)) { |
800 | continue; | 694 | work = list_entry(bdi->work_list.next, |
801 | clear_bit(wb->nr, &work->seen); | 695 | struct wb_writeback_work, list); |
802 | 696 | list_del_init(&work->list); | |
803 | ret = work; | ||
804 | break; | ||
805 | } | 697 | } |
806 | 698 | spin_unlock(&bdi->wb_lock); | |
807 | rcu_read_unlock(); | 699 | return work; |
808 | return ret; | ||
809 | } | 700 | } |
810 | 701 | ||
811 | static long wb_check_old_data_flush(struct bdi_writeback *wb) | 702 | static long wb_check_old_data_flush(struct bdi_writeback *wb) |
@@ -830,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
830 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 721 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
831 | 722 | ||
832 | if (nr_pages) { | 723 | if (nr_pages) { |
833 | struct wb_writeback_args args = { | 724 | struct wb_writeback_work work = { |
834 | .nr_pages = nr_pages, | 725 | .nr_pages = nr_pages, |
835 | .sync_mode = WB_SYNC_NONE, | 726 | .sync_mode = WB_SYNC_NONE, |
836 | .for_kupdate = 1, | 727 | .for_kupdate = 1, |
837 | .range_cyclic = 1, | 728 | .range_cyclic = 1, |
838 | }; | 729 | }; |
839 | 730 | ||
840 | return wb_writeback(wb, &args); | 731 | return wb_writeback(wb, &work); |
841 | } | 732 | } |
842 | 733 | ||
843 | return 0; | 734 | return 0; |
@@ -849,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
849 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | 740 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) |
850 | { | 741 | { |
851 | struct backing_dev_info *bdi = wb->bdi; | 742 | struct backing_dev_info *bdi = wb->bdi; |
852 | struct bdi_work *work; | 743 | struct wb_writeback_work *work; |
853 | long wrote = 0; | 744 | long wrote = 0; |
854 | 745 | ||
855 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 746 | while ((work = get_next_work_item(bdi, wb)) != NULL) { |
856 | struct wb_writeback_args args = work->args; | ||
857 | |||
858 | /* | 747 | /* |
859 | * Override sync mode, in case we must wait for completion | 748 | * Override sync mode, in case we must wait for completion |
749 | * because this thread is exiting now. | ||
860 | */ | 750 | */ |
861 | if (force_wait) | 751 | if (force_wait) |
862 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; | 752 | work->sync_mode = WB_SYNC_ALL; |
863 | |||
864 | /* | ||
865 | * If this isn't a data integrity operation, just notify | ||
866 | * that we have seen this work and we are now starting it. | ||
867 | */ | ||
868 | if (!test_bit(WS_ONSTACK, &work->state)) | ||
869 | wb_clear_pending(wb, work); | ||
870 | 753 | ||
871 | wrote += wb_writeback(wb, &args); | 754 | wrote += wb_writeback(wb, work); |
872 | 755 | ||
873 | /* | 756 | /* |
874 | * This is a data integrity writeback, so only do the | 757 | * Notify the caller of completion if this is a synchronous |
875 | * notification when we have completed the work. | 758 | * work item, otherwise just free it. |
876 | */ | 759 | */ |
877 | if (test_bit(WS_ONSTACK, &work->state)) | 760 | if (work->done) |
878 | wb_clear_pending(wb, work); | 761 | complete(work->done); |
762 | else | ||
763 | kfree(work); | ||
879 | } | 764 | } |
880 | 765 | ||
881 | /* | 766 | /* |
@@ -938,14 +823,9 @@ int bdi_writeback_task(struct bdi_writeback *wb) | |||
938 | void wakeup_flusher_threads(long nr_pages) | 823 | void wakeup_flusher_threads(long nr_pages) |
939 | { | 824 | { |
940 | struct backing_dev_info *bdi; | 825 | struct backing_dev_info *bdi; |
941 | struct wb_writeback_args args = { | ||
942 | .sync_mode = WB_SYNC_NONE, | ||
943 | }; | ||
944 | 826 | ||
945 | if (nr_pages) { | 827 | if (!nr_pages) { |
946 | args.nr_pages = nr_pages; | 828 | nr_pages = global_page_state(NR_FILE_DIRTY) + |
947 | } else { | ||
948 | args.nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
949 | global_page_state(NR_UNSTABLE_NFS); | 829 | global_page_state(NR_UNSTABLE_NFS); |
950 | } | 830 | } |
951 | 831 | ||
@@ -953,7 +833,7 @@ void wakeup_flusher_threads(long nr_pages) | |||
953 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | 833 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { |
954 | if (!bdi_has_dirty_io(bdi)) | 834 | if (!bdi_has_dirty_io(bdi)) |
955 | continue; | 835 | continue; |
956 | bdi_alloc_queue_work(bdi, &args); | 836 | __bdi_start_writeback(bdi, nr_pages, false, false); |
957 | } | 837 | } |
958 | rcu_read_unlock(); | 838 | rcu_read_unlock(); |
959 | } | 839 | } |
@@ -1162,17 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb) | |||
1162 | { | 1042 | { |
1163 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 1043 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); |
1164 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | 1044 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); |
1165 | struct wb_writeback_args args = { | 1045 | DECLARE_COMPLETION_ONSTACK(done); |
1046 | struct wb_writeback_work work = { | ||
1166 | .sb = sb, | 1047 | .sb = sb, |
1167 | .sync_mode = WB_SYNC_NONE, | 1048 | .sync_mode = WB_SYNC_NONE, |
1049 | .done = &done, | ||
1168 | }; | 1050 | }; |
1169 | 1051 | ||
1170 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 1052 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
1171 | 1053 | ||
1172 | args.nr_pages = nr_dirty + nr_unstable + | 1054 | work.nr_pages = nr_dirty + nr_unstable + |
1173 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 1055 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
1174 | 1056 | ||
1175 | bdi_queue_work_onstack(&args); | 1057 | bdi_queue_work(sb->s_bdi, &work); |
1058 | wait_for_completion(&done); | ||
1176 | } | 1059 | } |
1177 | EXPORT_SYMBOL(writeback_inodes_sb); | 1060 | EXPORT_SYMBOL(writeback_inodes_sb); |
1178 | 1061 | ||
@@ -1204,16 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle); | |||
1204 | */ | 1087 | */ |
1205 | void sync_inodes_sb(struct super_block *sb) | 1088 | void sync_inodes_sb(struct super_block *sb) |
1206 | { | 1089 | { |
1207 | struct wb_writeback_args args = { | 1090 | DECLARE_COMPLETION_ONSTACK(done); |
1091 | struct wb_writeback_work work = { | ||
1208 | .sb = sb, | 1092 | .sb = sb, |
1209 | .sync_mode = WB_SYNC_ALL, | 1093 | .sync_mode = WB_SYNC_ALL, |
1210 | .nr_pages = LONG_MAX, | 1094 | .nr_pages = LONG_MAX, |
1211 | .range_cyclic = 0, | 1095 | .range_cyclic = 0, |
1096 | .done = &done, | ||
1212 | }; | 1097 | }; |
1213 | 1098 | ||
1214 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 1099 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
1215 | 1100 | ||
1216 | bdi_queue_work_onstack(&args); | 1101 | bdi_queue_work(sb->s_bdi, &work); |
1102 | wait_for_completion(&done); | ||
1103 | |||
1217 | wait_sb_inodes(sb); | 1104 | wait_sb_inodes(sb); |
1218 | } | 1105 | } |
1219 | EXPORT_SYMBOL(sync_inodes_sb); | 1106 | EXPORT_SYMBOL(sync_inodes_sb); |
diff --git a/fs/splice.c b/fs/splice.c index 740e6b9faf7a..efdbfece9932 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -1282,7 +1282,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe, | |||
1282 | { | 1282 | { |
1283 | struct file *file = sd->u.file; | 1283 | struct file *file = sd->u.file; |
1284 | 1284 | ||
1285 | return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); | 1285 | return do_splice_from(pipe, file, &file->f_pos, sd->total_len, |
1286 | sd->flags); | ||
1286 | } | 1287 | } |
1287 | 1288 | ||
1288 | /** | 1289 | /** |
@@ -1371,8 +1372,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1371 | if (off_in) | 1372 | if (off_in) |
1372 | return -ESPIPE; | 1373 | return -ESPIPE; |
1373 | if (off_out) { | 1374 | if (off_out) { |
1374 | if (!out->f_op || !out->f_op->llseek || | 1375 | if (!(out->f_mode & FMODE_PWRITE)) |
1375 | out->f_op->llseek == no_llseek) | ||
1376 | return -EINVAL; | 1376 | return -EINVAL; |
1377 | if (copy_from_user(&offset, off_out, sizeof(loff_t))) | 1377 | if (copy_from_user(&offset, off_out, sizeof(loff_t))) |
1378 | return -EFAULT; | 1378 | return -EFAULT; |
@@ -1392,8 +1392,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1392 | if (off_out) | 1392 | if (off_out) |
1393 | return -ESPIPE; | 1393 | return -ESPIPE; |
1394 | if (off_in) { | 1394 | if (off_in) { |
1395 | if (!in->f_op || !in->f_op->llseek || | 1395 | if (!(in->f_mode & FMODE_PREAD)) |
1396 | in->f_op->llseek == no_llseek) | ||
1397 | return -EINVAL; | 1396 | return -EINVAL; |
1398 | if (copy_from_user(&offset, off_in, sizeof(loff_t))) | 1397 | if (copy_from_user(&offset, off_in, sizeof(loff_t))) |
1399 | return -EFAULT; | 1398 | return -EFAULT; |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 9ae2889096b6..e9aec0d099df 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -82,8 +82,6 @@ struct backing_dev_info { | |||
82 | struct bdi_writeback wb; /* default writeback info for this bdi */ | 82 | struct bdi_writeback wb; /* default writeback info for this bdi */ |
83 | spinlock_t wb_lock; /* protects update side of wb_list */ | 83 | spinlock_t wb_lock; /* protects update side of wb_list */ |
84 | struct list_head wb_list; /* the flusher threads hanging off this bdi */ | 84 | struct list_head wb_list; /* the flusher threads hanging off this bdi */ |
85 | unsigned long wb_mask; /* bitmask of registered tasks */ | ||
86 | unsigned int wb_cnt; /* number of registered tasks */ | ||
87 | 85 | ||
88 | struct list_head work_list; | 86 | struct list_head work_list; |
89 | 87 | ||
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d63ef8f9609f..c24eca71e80c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -27,10 +27,6 @@ enum writeback_sync_modes { | |||
27 | * in a manner such that unspecified fields are set to zero. | 27 | * in a manner such that unspecified fields are set to zero. |
28 | */ | 28 | */ |
29 | struct writeback_control { | 29 | struct writeback_control { |
30 | struct backing_dev_info *bdi; /* If !NULL, only write back this | ||
31 | queue */ | ||
32 | struct super_block *sb; /* if !NULL, only write inodes from | ||
33 | this super_block */ | ||
34 | enum writeback_sync_modes sync_mode; | 30 | enum writeback_sync_modes sync_mode; |
35 | unsigned long *older_than_this; /* If !NULL, only write back inodes | 31 | unsigned long *older_than_this; /* If !NULL, only write back inodes |
36 | older than this */ | 32 | older than this */ |
@@ -66,7 +62,8 @@ int inode_wait(void *); | |||
66 | void writeback_inodes_sb(struct super_block *); | 62 | void writeback_inodes_sb(struct super_block *); |
67 | int writeback_inodes_sb_if_idle(struct super_block *); | 63 | int writeback_inodes_sb_if_idle(struct super_block *); |
68 | void sync_inodes_sb(struct super_block *); | 64 | void sync_inodes_sb(struct super_block *); |
69 | void writeback_inodes_wbc(struct writeback_control *wbc); | 65 | void writeback_inodes_wb(struct bdi_writeback *wb, |
66 | struct writeback_control *wbc); | ||
70 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait); | 67 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait); |
71 | void wakeup_flusher_threads(long nr_pages); | 68 | void wakeup_flusher_threads(long nr_pages); |
72 | 69 | ||
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 660a87a22511..123bcef13e51 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -104,15 +104,13 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) | |||
104 | "b_more_io: %8lu\n" | 104 | "b_more_io: %8lu\n" |
105 | "bdi_list: %8u\n" | 105 | "bdi_list: %8u\n" |
106 | "state: %8lx\n" | 106 | "state: %8lx\n" |
107 | "wb_mask: %8lx\n" | 107 | "wb_list: %8u\n", |
108 | "wb_list: %8u\n" | ||
109 | "wb_cnt: %8u\n", | ||
110 | (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), | 108 | (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), |
111 | (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), | 109 | (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), |
112 | K(bdi_thresh), K(dirty_thresh), | 110 | K(bdi_thresh), K(dirty_thresh), |
113 | K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io, | 111 | K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io, |
114 | !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask, | 112 | !list_empty(&bdi->bdi_list), bdi->state, |
115 | !list_empty(&bdi->wb_list), bdi->wb_cnt); | 113 | !list_empty(&bdi->wb_list)); |
116 | #undef K | 114 | #undef K |
117 | 115 | ||
118 | return 0; | 116 | return 0; |
@@ -340,14 +338,13 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) | |||
340 | static void bdi_flush_io(struct backing_dev_info *bdi) | 338 | static void bdi_flush_io(struct backing_dev_info *bdi) |
341 | { | 339 | { |
342 | struct writeback_control wbc = { | 340 | struct writeback_control wbc = { |
343 | .bdi = bdi, | ||
344 | .sync_mode = WB_SYNC_NONE, | 341 | .sync_mode = WB_SYNC_NONE, |
345 | .older_than_this = NULL, | 342 | .older_than_this = NULL, |
346 | .range_cyclic = 1, | 343 | .range_cyclic = 1, |
347 | .nr_to_write = 1024, | 344 | .nr_to_write = 1024, |
348 | }; | 345 | }; |
349 | 346 | ||
350 | writeback_inodes_wbc(&wbc); | 347 | writeback_inodes_wb(&bdi->wb, &wbc); |
351 | } | 348 | } |
352 | 349 | ||
353 | /* | 350 | /* |
@@ -675,12 +672,6 @@ int bdi_init(struct backing_dev_info *bdi) | |||
675 | 672 | ||
676 | bdi_wb_init(&bdi->wb, bdi); | 673 | bdi_wb_init(&bdi->wb, bdi); |
677 | 674 | ||
678 | /* | ||
679 | * Just one thread support for now, hard code mask and count | ||
680 | */ | ||
681 | bdi->wb_mask = 1; | ||
682 | bdi->wb_cnt = 1; | ||
683 | |||
684 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { | 675 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { |
685 | err = percpu_counter_init(&bdi->bdi_stat[i], 0); | 676 | err = percpu_counter_init(&bdi->bdi_stat[i], 0); |
686 | if (err) | 677 | if (err) |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 54f28bd493d3..37498ef61548 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -495,7 +495,6 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
495 | 495 | ||
496 | for (;;) { | 496 | for (;;) { |
497 | struct writeback_control wbc = { | 497 | struct writeback_control wbc = { |
498 | .bdi = bdi, | ||
499 | .sync_mode = WB_SYNC_NONE, | 498 | .sync_mode = WB_SYNC_NONE, |
500 | .older_than_this = NULL, | 499 | .older_than_this = NULL, |
501 | .nr_to_write = write_chunk, | 500 | .nr_to_write = write_chunk, |
@@ -537,7 +536,7 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
537 | * up. | 536 | * up. |
538 | */ | 537 | */ |
539 | if (bdi_nr_reclaimable > bdi_thresh) { | 538 | if (bdi_nr_reclaimable > bdi_thresh) { |
540 | writeback_inodes_wbc(&wbc); | 539 | writeback_inodes_wb(&bdi->wb, &wbc); |
541 | pages_written += write_chunk - wbc.nr_to_write; | 540 | pages_written += write_chunk - wbc.nr_to_write; |
542 | get_dirty_limits(&background_thresh, &dirty_thresh, | 541 | get_dirty_limits(&background_thresh, &dirty_thresh, |
543 | &bdi_thresh, bdi); | 542 | &bdi_thresh, bdi); |