Merge branch 'for-3.10/core' of git://git.kernel.dk/linux-block

Pull block core updates from Jens Axboe: - Major bit is Kents prep work for immutable bio vecs. - Stable candidate fix for a scheduling-while-atomic in the queue bypass operation. - Fix for the hang on exceeded rq->datalen 32-bit unsigned when merging discard bios. - Tejuns changes to convert the writeback thread pool to the generic workqueue mechanism. - Runtime PM framework, SCSI patches exists on top of these in James' tree. - A few random fixes. * 'for-3.10/core' of git://git.kernel.dk/linux-block: (40 commits) relay: move remove_buf_file inside relay_close_buf partitions/efi.c: replace useless kzalloc's by kmalloc's fs/block_dev.c: fix iov_shorten() criteria in blkdev_aio_read() block: fix max discard sectors limit blkcg: fix "scheduling while atomic" in blk_queue_bypass_start Documentation: cfq-iosched: update documentation help for cfq tunables writeback: expose the bdi_wq workqueue writeback: replace custom worker pool implementation with unbound workqueue writeback: remove unused bdi_pending_list aoe: Fix unitialized var usage bio-integrity: Add explicit field for owner of bip_buf block: Add an explicit bio flag for bios that own their bvec block: Add bio_alloc_pages() block: Convert some code to bio_for_each_segment_all() block: Add bio_for_each_segment_all() bounce: Refactor __blk_queue_bounce to not use bi_io_vec raid1: use bio_copy_data() pktcdvd: Use bio_reset() in disabled code to kill bi_idx usage pktcdvd: use bio_copy_data() block: Add bio_copy_data() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2013-05-08 13:13:35 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-05-08 13:13:35 -0400
commit: 4de13d7aa8f4d02f4dc99d4609575659f92b3c5a (patch)
tree: 3bc9729eabe79c6164cd29a5d605000bc82bf837 /fs/fs-writeback.c
parent: 5af43c24ca59a448c9312dd4a4a51d27ec3b9a73 (diff)
parent: b8d4a5bf6a049303a29a3275f463f09a490b50ea (diff)
1 files changed, 32 insertions, 70 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 798d4458a4d3..3be57189efd5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -22,7 +22,6 @@
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/kthread.h>
-#include <linux/freezer.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
@@ -88,20 +87,6 @@ static inline struct inode *wb_inode(struct list_head *head)
 #define CREATE_TRACE_POINTS
 #include <trace/events/writeback.h>
-/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
-static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
-{
-        if (bdi->wb.task) {
-                wake_up_process(bdi->wb.task);
-        } else {
-                /*
-                 * The bdi thread isn't there, wake up the forker thread which
-                 * will create and run it.
-                 */
-                wake_up_process(default_backing_dev_info.wb.task);
-        }
-}
 static void bdi_queue_work(struct backing_dev_info *bdi,
                           struct wb_writeback_work *work)
 {
@@ -109,10 +94,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
        spin_lock_bh(&bdi->wb_lock);
        list_add_tail(&work->list, &bdi->work_list);
-        if (!bdi->wb.task)
-                trace_writeback_nothread(bdi, work);
-        bdi_wakeup_flusher(bdi);
        spin_unlock_bh(&bdi->wb_lock);
+        mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
 }
 static void
@@ -127,10 +111,8 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
         */
        work = kzalloc(sizeof(*work), GFP_ATOMIC);
        if (!work) {
-                if (bdi->wb.task) {
+                trace_writeback_nowork(bdi);
-                        trace_writeback_nowork(bdi);
+                mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
-                        wake_up_process(bdi->wb.task);
-                }
                return;
        }
@@ -177,9 +159,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
         * writeback as soon as there is no other work to do.
         */
        trace_writeback_wake_background(bdi);
-        spin_lock_bh(&bdi->wb_lock);
+        mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
-        bdi_wakeup_flusher(bdi);
-        spin_unlock_bh(&bdi->wb_lock);
 }
 /*
@@ -1020,67 +1000,49 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 /*
 * Handle writeback of dirty data for the device backed by this bdi. Also
- * wakes up periodically and does kupdated style flushing.
+ * reschedules periodically and does kupdated style flushing.
 */
-int bdi_writeback_thread(void *data)
+void bdi_writeback_workfn(struct work_struct *work)
 {
-        struct bdi_writeback *wb = data;
+        struct bdi_writeback *wb = container_of(to_delayed_work(work),
+                                                struct bdi_writeback, dwork);
        struct backing_dev_info *bdi = wb->bdi;
        long pages_written;
        set_worker_desc("flush-%s", dev_name(bdi->dev));
        current->flags |= PF_SWAPWRITE;
-        set_freezable();
-        wb->last_active = jiffies;
-        /*
-         * Our parent may run at a different priority, just set us to normal
-         */
-        set_user_nice(current, 0);
-        trace_writeback_thread_start(bdi);
-        while (!kthread_freezable_should_stop(NULL)) {
+        if (likely(!current_is_workqueue_rescuer() ||
+                   list_empty(&bdi->bdi_list))) {
                /*
-                 * Remove own delayed wake-up timer, since we are already awake
+                 * The normal path.  Keep writing back @bdi until its
-                 * and we'll take care of the periodic write-back.
+                 * work_list is empty.  Note that this path is also taken
+                 * if @bdi is shutting down even when we're running off the
+                 * rescuer as work_list needs to be drained.
                 */
-                del_timer(&wb->wakeup_timer);
+                do {
+                        pages_written = wb_do_writeback(wb, 0);
-                pages_written = wb_do_writeback(wb, 0);
+                        trace_writeback_pages_written(pages_written);
+                } while (!list_empty(&bdi->work_list));
+        } else {
+                /*
+                 * bdi_wq can't get enough workers and we're running off
+                 * the emergency worker.  Don't hog it.  Hopefully, 1024 is
+                 * enough for efficient IO.
+                 */
+                pages_written = writeback_inodes_wb(&bdi->wb, 1024,
+                                                    WB_REASON_FORKER_THREAD);
                trace_writeback_pages_written(pages_written);
-                if (pages_written)
-                        wb->last_active = jiffies;
-                set_current_state(TASK_INTERRUPTIBLE);
-                if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
-                        __set_current_state(TASK_RUNNING);
-                        continue;
-                }
-                if (wb_has_dirty_io(wb) && dirty_writeback_interval)
-                        schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
-                else {
-                        /*
-                         * We have nothing to do, so can go sleep without any
-                         * timeout and save power. When a work is queued or
-                         * something is made dirty - we will be woken up.
-                         */
-                        schedule();
-                }
        }
-        /* Flush any work that raced with us exiting */
+        if (!list_empty(&bdi->work_list) ||
-        if (!list_empty(&bdi->work_list))
+            (wb_has_dirty_io(wb) && dirty_writeback_interval))
-                wb_do_writeback(wb, 1);
+                queue_delayed_work(bdi_wq, &wb->dwork,
+                        msecs_to_jiffies(dirty_writeback_interval * 10));
-        trace_writeback_thread_stop(bdi);
+        current->flags &= ~PF_SWAPWRITE;
-        return 0;
 }
 /*
 * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
 * the whole world.
author	Linus Torvalds <torvalds@linux-foundation.org>	2013-05-08 13:13:35 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-05-08 13:13:35 -0400
commit	4de13d7aa8f4d02f4dc99d4609575659f92b3c5a (patch)
tree	3bc9729eabe79c6164cd29a5d605000bc82bf837 /fs/fs-writeback.c
parent	5af43c24ca59a448c9312dd4a4a51d27ec3b9a73 (diff)
parent	b8d4a5bf6a049303a29a3275f463f09a490b50ea (diff)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 798d4458a4d3..3be57189efd5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c
@@ -22,7 +22,6 @@
22	#include <linux/mm.h>	22	#include <linux/mm.h>
23	#include <linux/pagemap.h>	23	#include <linux/pagemap.h>
24	#include <linux/kthread.h>	24	#include <linux/kthread.h>
25	#include <linux/freezer.h>
26	#include <linux/writeback.h>	25	#include <linux/writeback.h>
27	#include <linux/blkdev.h>	26	#include <linux/blkdev.h>
28	#include <linux/backing-dev.h>	27	#include <linux/backing-dev.h>
@@ -88,20 +87,6 @@ static inline struct inode wb_inode(struct list_head head)
88	#define CREATE_TRACE_POINTS	87	#define CREATE_TRACE_POINTS
89	#include <trace/events/writeback.h>	88	#include <trace/events/writeback.h>
90		89
91	/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
92	static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
93	{
94	if (bdi->wb.task) {
95	wake_up_process(bdi->wb.task);
96	} else {
97	/*
98	* The bdi thread isn't there, wake up the forker thread which
99	* will create and run it.
100	*/
101	wake_up_process(default_backing_dev_info.wb.task);
102	}
103	}
104
105	static void bdi_queue_work(struct backing_dev_info *bdi,	90	static void bdi_queue_work(struct backing_dev_info *bdi,
106	struct wb_writeback_work *work)	91	struct wb_writeback_work *work)
107	{	92	{
@@ -109,10 +94,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
109		94
110	spin_lock_bh(&bdi->wb_lock);	95	spin_lock_bh(&bdi->wb_lock);
111	list_add_tail(&work->list, &bdi->work_list);	96	list_add_tail(&work->list, &bdi->work_list);
112	if (!bdi->wb.task)
113	trace_writeback_nothread(bdi, work);
114	bdi_wakeup_flusher(bdi);
115	spin_unlock_bh(&bdi->wb_lock);	97	spin_unlock_bh(&bdi->wb_lock);
		98
		99	mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
116	}	100	}
117		101
118	static void	102	static void
@@ -127,10 +111,8 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
127	*/	111	*/
128	work = kzalloc(sizeof(*work), GFP_ATOMIC);	112	work = kzalloc(sizeof(*work), GFP_ATOMIC);
129	if (!work) {	113	if (!work) {
130	if (bdi->wb.task) {	114	trace_writeback_nowork(bdi);
131	trace_writeback_nowork(bdi);	115	mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
132	wake_up_process(bdi->wb.task);
133	}
134	return;	116	return;
135	}	117	}
136		118
@@ -177,9 +159,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
177	* writeback as soon as there is no other work to do.	159	* writeback as soon as there is no other work to do.
178	*/	160	*/
179	trace_writeback_wake_background(bdi);	161	trace_writeback_wake_background(bdi);
180	spin_lock_bh(&bdi->wb_lock);	162	mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
181	bdi_wakeup_flusher(bdi);
182	spin_unlock_bh(&bdi->wb_lock);
183	}	163	}
184		164
185	/*	165	/*
@@ -1020,67 +1000,49 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
1020		1000
1021	/*	1001	/*
1022	* Handle writeback of dirty data for the device backed by this bdi. Also	1002	* Handle writeback of dirty data for the device backed by this bdi. Also
1023	* wakes up periodically and does kupdated style flushing.	1003	* reschedules periodically and does kupdated style flushing.
1024	*/	1004	*/
1025	int bdi_writeback_thread(void *data)	1005	void bdi_writeback_workfn(struct work_struct *work)
1026	{	1006	{
1027	struct bdi_writeback *wb = data;	1007	struct bdi_writeback *wb = container_of(to_delayed_work(work),
		1008	struct bdi_writeback, dwork);
1028	struct backing_dev_info *bdi = wb->bdi;	1009	struct backing_dev_info *bdi = wb->bdi;
1029	long pages_written;	1010	long pages_written;
1030		1011
1031	set_worker_desc("flush-%s", dev_name(bdi->dev));	1012	set_worker_desc("flush-%s", dev_name(bdi->dev));
1032	current->flags \|= PF_SWAPWRITE;	1013	current->flags \|= PF_SWAPWRITE;
1033	set_freezable();
1034	wb->last_active = jiffies;
1035
1036	/*
1037	* Our parent may run at a different priority, just set us to normal
1038	*/
1039	set_user_nice(current, 0);
1040
1041	trace_writeback_thread_start(bdi);
1042		1014
1043	while (!kthread_freezable_should_stop(NULL)) {	1015	if (likely(!current_is_workqueue_rescuer() \|\|
		1016	list_empty(&bdi->bdi_list))) {
1044	/*	1017	/*
1045	* Remove own delayed wake-up timer, since we are already awake	1018	* The normal path. Keep writing back @bdi until its
1046	* and we'll take care of the periodic write-back.	1019	* work_list is empty. Note that this path is also taken
		1020	* if @bdi is shutting down even when we're running off the
		1021	* rescuer as work_list needs to be drained.
1047	*/	1022	*/
1048	del_timer(&wb->wakeup_timer);	1023	do {
1049		1024	pages_written = wb_do_writeback(wb, 0);
1050	pages_written = wb_do_writeback(wb, 0);	1025	trace_writeback_pages_written(pages_written);
1051		1026	} while (!list_empty(&bdi->work_list));
		1027	} else {
		1028	/*
		1029	* bdi_wq can't get enough workers and we're running off
		1030	* the emergency worker. Don't hog it. Hopefully, 1024 is
		1031	* enough for efficient IO.
		1032	*/
		1033	pages_written = writeback_inodes_wb(&bdi->wb, 1024,
		1034	WB_REASON_FORKER_THREAD);
1052	trace_writeback_pages_written(pages_written);	1035	trace_writeback_pages_written(pages_written);
1053
1054	if (pages_written)
1055	wb->last_active = jiffies;
1056
1057	set_current_state(TASK_INTERRUPTIBLE);
1058	if (!list_empty(&bdi->work_list) \|\| kthread_should_stop()) {
1059	__set_current_state(TASK_RUNNING);
1060	continue;
1061	}
1062
1063	if (wb_has_dirty_io(wb) && dirty_writeback_interval)
1064	schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
1065	else {
1066	/*
1067	* We have nothing to do, so can go sleep without any
1068	* timeout and save power. When a work is queued or
1069	* something is made dirty - we will be woken up.
1070	*/
1071	schedule();
1072	}
1073	}	1036	}
1074		1037
1075	/* Flush any work that raced with us exiting */	1038	if (!list_empty(&bdi->work_list) \|\|
1076	if (!list_empty(&bdi->work_list))	1039	(wb_has_dirty_io(wb) && dirty_writeback_interval))
1077	wb_do_writeback(wb, 1);	1040	queue_delayed_work(bdi_wq, &wb->dwork,
		1041	msecs_to_jiffies(dirty_writeback_interval * 10));
1078		1042
1079	trace_writeback_thread_stop(bdi);	1043	current->flags &= ~PF_SWAPWRITE;
1080	return 0;
1081	}	1044	}
1082		1045
1083
1084	/*	1046	/*
1085	* Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back	1047	* Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
1086	* the whole world.	1048	* the whole world.