Merge branch 'linus' into core/rcu

Conflicts: fs/fs-writeback.c Merge reason: Resolve the conflict Note, i picked the version from Linus's tree, which effectively reverts the fs-writeback.c bits of: b97181f: fs: remove all rcu head initializations, except on_stack initializations As the upstream changes to this file changed this code heavily and the first attempt to resolve the conflict resulted in a non-booting kernel. It's safer to re-try this portion of the commit cleanly. Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Ingo Molnar <mingo@elte.hu> 2010-07-01 02:49:28 -0400
committer: Ingo Molnar <mingo@elte.hu> 2010-07-01 03:31:25 -0400
commit: 0a54cec0c25cc49e3b68b14c205f1f6cff13f5e1 (patch)
tree: eb4e63ee9ae1fcaf9aa53a1668e55c09516052d9 /fs/fs-writeback.c
parent: ec8c27e04f89a7575ca2c4facb99152e03d6a99c (diff)
parent: 980019d74e4b2428362b36a0506519d6d9460800 (diff)
1 files changed, 109 insertions, 170 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index af92100a7411..0609607d3955 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -63,45 +63,16 @@ struct bdi_work {
 };
 enum {
-        WS_USED_B = 0,
+        WS_INPROGRESS = 0,
-        WS_ONSTACK_B,
+        WS_ONSTACK,
 };
-#define WS_USED (1 << WS_USED_B)
-#define WS_ONSTACK (1 << WS_ONSTACK_B)
-static inline bool bdi_work_on_stack(struct bdi_work *work)
-{
-        return test_bit(WS_ONSTACK_B, &work->state);
-}
-static inline void __bdi_work_init(struct bdi_work *work,
-                                   struct wb_writeback_args *args,
-                                   int on_stack)
-{
-        work->args = *args;
-        work->state = WS_USED;
-        if (on_stack) {
-                work->state |= WS_ONSTACK;
-                init_rcu_head_on_stack(&work->rcu_head);
-        }
-}
 static inline void bdi_work_init(struct bdi_work *work,
                                 struct wb_writeback_args *args)
 {
-        __bdi_work_init(work, args, false);
+        INIT_RCU_HEAD(&work->rcu_head);
-}
+        work->args = *args;
+        __set_bit(WS_INPROGRESS, &work->state);
-static inline void bdi_work_init_on_stack(struct bdi_work *work,
-                                          struct wb_writeback_args *args)
-{
-        __bdi_work_init(work, args, true);
-}
-static inline void bdi_destroy_work_on_stack(struct bdi_work *work)
-{
-        destroy_rcu_head_on_stack(&work->rcu_head);
 }
 /**
@@ -116,43 +87,16 @@ int writeback_in_progress(struct backing_dev_info *bdi)
        return !list_empty(&bdi->work_list);
 }
-static void bdi_work_clear(struct bdi_work *work)
-{
-        clear_bit(WS_USED_B, &work->state);
-        smp_mb__after_clear_bit();
-        /*
-         * work can have disappeared at this point. bit waitq functions
-         * should be able to tolerate this, provided bdi_sched_wait does
-         * not dereference it's pointer argument.
-        */
-        wake_up_bit(&work->state, WS_USED_B);
-}
 static void bdi_work_free(struct rcu_head *head)
 {
        struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
-        if (!bdi_work_on_stack(work))
+        clear_bit(WS_INPROGRESS, &work->state);
-                kfree(work);
+        smp_mb__after_clear_bit();
-        else
+        wake_up_bit(&work->state, WS_INPROGRESS);
-                bdi_work_clear(work);
-}
-static void wb_work_complete(struct bdi_work *work)
-{
-        const enum writeback_sync_modes sync_mode = work->args.sync_mode;
-        int onstack = bdi_work_on_stack(work);
-        /*
+        if (!test_bit(WS_ONSTACK, &work->state))
-         * For allocated work, we can clear the done/seen bit right here.
+                kfree(work);
-         * For on-stack work, we need to postpone both the clear and free
-         * to after the RCU grace period, since the stack could be invalidated
-         * as soon as bdi_work_clear() has done the wakeup.
-         */
-        if (!onstack)
-                bdi_work_clear(work);
-        if (sync_mode == WB_SYNC_NONE || onstack)
-                call_rcu(&work->rcu_head, bdi_work_free);
 }
 static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
@@ -168,7 +112,7 @@ static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
                list_del_rcu(&work->list);
                spin_unlock(&bdi->wb_lock);
-                wb_work_complete(work);
+                call_rcu(&work->rcu_head, bdi_work_free);
        }
 }
@@ -206,9 +150,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
 * Used for on-stack allocated work items. The caller needs to wait until
 * the wb threads have acked the work before it's safe to continue.
 */
-static void bdi_wait_on_work_clear(struct bdi_work *work)
+static void bdi_wait_on_work_done(struct bdi_work *work)
 {
-        wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
+        wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait,
                    TASK_UNINTERRUPTIBLE);
 }
@@ -234,37 +178,28 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
 }
 /**
- * bdi_sync_writeback - start and wait for writeback
+ * bdi_queue_work_onstack - start and wait for writeback
- * @bdi: the backing device to write from
 * @sb: write inodes from this super_block
 *
 * Description:
- *   This does WB_SYNC_ALL data integrity writeback and waits for the
+ *   This function initiates writeback and waits for the operation to
- *   IO to complete. Callers must hold the sb s_umount semaphore for
+ *   complete. Callers must hold the sb s_umount semaphore for
 *   reading, to avoid having the super disappear before we are done.
 */
-static void bdi_sync_writeback(struct backing_dev_info *bdi,
+static void bdi_queue_work_onstack(struct wb_writeback_args *args)
-                               struct super_block *sb)
 {
-        struct wb_writeback_args args = {
-                .sb             = sb,
-                .sync_mode      = WB_SYNC_ALL,
-                .nr_pages       = LONG_MAX,
-                .range_cyclic   = 0,
-        };
        struct bdi_work work;
-        bdi_work_init_on_stack(&work, &args);
+        bdi_work_init(&work, args);
+        __set_bit(WS_ONSTACK, &work.state);
-        bdi_queue_work(bdi, &work);
+        bdi_queue_work(args->sb->s_bdi, &work);
-        bdi_wait_on_work_clear(&work);
+        bdi_wait_on_work_done(&work);
-        bdi_destroy_work_on_stack(&work);
 }
 /**
 * bdi_start_writeback - start writeback
 * @bdi: the backing device to write from
- * @sb: write inodes from this super_block
 * @nr_pages: the number of pages to write
 *
 * Description:
@@ -273,25 +208,34 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
 *   completion. Caller need not hold sb s_umount semaphore.
 *
 */
-void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
-                         long nr_pages)
 {
        struct wb_writeback_args args = {
-                .sb             = sb,
                .sync_mode      = WB_SYNC_NONE,
                .nr_pages       = nr_pages,
                .range_cyclic   = 1,
        };
-        /*
+        bdi_alloc_queue_work(bdi, &args);
-         * We treat @nr_pages=0 as the special case to do background writeback,
+}
-         * ie. to sync pages until the background dirty threshold is reached.
-         */
-        if (!nr_pages) {
-                args.nr_pages = LONG_MAX;
-                args.for_background = 1;
-        }
+/**
+ * bdi_start_background_writeback - start background writeback
+ * @bdi: the backing device to write from
+ *
+ * Description:
+ *   This does WB_SYNC_NONE background writeback. The IO is only
+ *   started when this function returns, we make no guarentees on
+ *   completion. Caller need not hold sb s_umount semaphore.
+ */
+void bdi_start_background_writeback(struct backing_dev_info *bdi)
+{
+        struct wb_writeback_args args = {
+                .sync_mode      = WB_SYNC_NONE,
+                .nr_pages       = LONG_MAX,
+                .for_background = 1,
+                .range_cyclic   = 1,
+        };
        bdi_alloc_queue_work(bdi, &args);
 }
@@ -582,48 +526,30 @@ select_queue:
        return ret;
 }
-static void unpin_sb_for_writeback(struct super_block *sb)
-{
-        up_read(&sb->s_umount);
-        put_super(sb);
-}
-enum sb_pin_state {
-        SB_PINNED,
-        SB_NOT_PINNED,
-        SB_PIN_FAILED
-};
 /*
- * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
+ * For background writeback the caller does not have the sb pinned
 * before calling writeback. So make sure that we do pin it, so it doesn't
 * go away while we are writing inodes from it.
 */
-static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
+static bool pin_sb_for_writeback(struct super_block *sb)
-                                              struct super_block *sb)
 {
-        /*
-         * Caller must already hold the ref for this
-         */
-        if (wbc->sync_mode == WB_SYNC_ALL) {
-                WARN_ON(!rwsem_is_locked(&sb->s_umount));
-                return SB_NOT_PINNED;
-        }
        spin_lock(&sb_lock);
+        if (list_empty(&sb->s_instances)) {
+                spin_unlock(&sb_lock);
+                return false;
+        }
        sb->s_count++;
+        spin_unlock(&sb_lock);
        if (down_read_trylock(&sb->s_umount)) {
-                if (sb->s_root) {
+                if (sb->s_root)
-                        spin_unlock(&sb_lock);
+                        return true;
-                        return SB_PINNED;
-                }
-                /*
-                 * umounted, drop rwsem again and fall through to failure
-                 */
                up_read(&sb->s_umount);
        }
-        sb->s_count--;
-        spin_unlock(&sb_lock);
+        put_super(sb);
-        return SB_PIN_FAILED;
+        return false;
 }
 /*
@@ -702,24 +628,31 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
                struct inode *inode = list_entry(wb->b_io.prev,
                                                 struct inode, i_list);
                struct super_block *sb = inode->i_sb;
-                enum sb_pin_state state;
-                if (wbc->sb && sb != wbc->sb) {
+                if (wbc->sb) {
-                        /* super block given and doesn't
+                        /*
-                           match, skip this inode */
+                         * We are requested to write out inodes for a specific
-                        redirty_tail(inode);
+                         * superblock.  This means we already have s_umount
-                        continue;
+                         * taken by the caller which also waits for us to
-                }
+                         * complete the writeout.
-                state = pin_sb_for_writeback(wbc, sb);
+                         */
+                        if (sb != wbc->sb) {
+                                redirty_tail(inode);
+                                continue;
+                        }
-                if (state == SB_PIN_FAILED) {
+                        WARN_ON(!rwsem_is_locked(&sb->s_umount));
-                        requeue_io(inode);
-                        continue;
+                        ret = writeback_sb_inodes(sb, wb, wbc);
+                } else {
+                        if (!pin_sb_for_writeback(sb)) {
+                                requeue_io(inode);
+                                continue;
+                        }
+                        ret = writeback_sb_inodes(sb, wb, wbc);
+                        drop_super(sb);
                }
-                ret = writeback_sb_inodes(sb, wb, wbc);
-                if (state == SB_PINNED)
-                        unpin_sb_for_writeback(sb);
                if (ret)
                        break;
        }
@@ -932,7 +865,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
                 * If this isn't a data integrity operation, just notify
                 * that we have seen this work and we are now starting it.
                 */
-                if (args.sync_mode == WB_SYNC_NONE)
+                if (!test_bit(WS_ONSTACK, &work->state))
                        wb_clear_pending(wb, work);
                wrote += wb_writeback(wb, &args);
@@ -941,7 +874,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
                 * This is a data integrity writeback, so only do the
                 * notification when we have completed the work.
                 */
-                if (args.sync_mode == WB_SYNC_ALL)
+                if (test_bit(WS_ONSTACK, &work->state))
                        wb_clear_pending(wb, work);
        }
@@ -999,42 +932,32 @@ int bdi_writeback_task(struct bdi_writeback *wb)
 }
 /*
- * Schedule writeback for all backing devices. This does WB_SYNC_NONE
+ * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
- * writeback, for integrity writeback see bdi_sync_writeback().
+ * the whole world.
 */
-static void bdi_writeback_all(struct super_block *sb, long nr_pages)
+void wakeup_flusher_threads(long nr_pages)
 {
+        struct backing_dev_info *bdi;
        struct wb_writeback_args args = {
-                .sb             = sb,
-                .nr_pages       = nr_pages,
                .sync_mode      = WB_SYNC_NONE,
        };
-        struct backing_dev_info *bdi;
-        rcu_read_lock();
+        if (nr_pages) {
+                args.nr_pages = nr_pages;
+        } else {
+                args.nr_pages = global_page_state(NR_FILE_DIRTY) +
+                                global_page_state(NR_UNSTABLE_NFS);
+        }
+        rcu_read_lock();
        list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
                if (!bdi_has_dirty_io(bdi))
                        continue;
                bdi_alloc_queue_work(bdi, &args);
        }
        rcu_read_unlock();
 }
-/*
- * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
- * the whole world.
- */
-void wakeup_flusher_threads(long nr_pages)
-{
-        if (nr_pages == 0)
-                nr_pages = global_page_state(NR_FILE_DIRTY) +
-                                global_page_state(NR_UNSTABLE_NFS);
-        bdi_writeback_all(NULL, nr_pages);
-}
 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 {
        if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1239,12 +1162,17 @@ void writeback_inodes_sb(struct super_block *sb)
 {
        unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
        unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
-        long nr_to_write;
+        struct wb_writeback_args args = {
+                .sb             = sb,
+                .sync_mode      = WB_SYNC_NONE,
+        };
-        nr_to_write = nr_dirty + nr_unstable +
+        WARN_ON(!rwsem_is_locked(&sb->s_umount));
+        args.nr_pages = nr_dirty + nr_unstable +
                        (inodes_stat.nr_inodes - inodes_stat.nr_unused);
-        bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
+        bdi_queue_work_onstack(&args);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
@@ -1258,7 +1186,9 @@ EXPORT_SYMBOL(writeback_inodes_sb);
 int writeback_inodes_sb_if_idle(struct super_block *sb)
 {
        if (!writeback_in_progress(sb->s_bdi)) {
+                down_read(&sb->s_umount);
                writeback_inodes_sb(sb);
+                up_read(&sb->s_umount);
                return 1;
        } else
                return 0;
@@ -1274,7 +1204,16 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
 */
 void sync_inodes_sb(struct super_block *sb)
 {
-        bdi_sync_writeback(sb->s_bdi, sb);
+        struct wb_writeback_args args = {
+                .sb             = sb,
+                .sync_mode      = WB_SYNC_ALL,
+                .nr_pages       = LONG_MAX,
+                .range_cyclic   = 0,
+        };
+        WARN_ON(!rwsem_is_locked(&sb->s_umount));
+        bdi_queue_work_onstack(&args);
        wait_sb_inodes(sb);
 }
 EXPORT_SYMBOL(sync_inodes_sb);
author	Ingo Molnar <mingo@elte.hu>	2010-07-01 02:49:28 -0400
committer	Ingo Molnar <mingo@elte.hu>	2010-07-01 03:31:25 -0400
commit	0a54cec0c25cc49e3b68b14c205f1f6cff13f5e1 (patch)
tree	eb4e63ee9ae1fcaf9aa53a1668e55c09516052d9 /fs/fs-writeback.c
parent	ec8c27e04f89a7575ca2c4facb99152e03d6a99c (diff)
parent	980019d74e4b2428362b36a0506519d6d9460800 (diff)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index af92100a7411..0609607d3955 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c
@@ -63,45 +63,16 @@ struct bdi_work {
63	};	63	};
64		64
65	enum {	65	enum {
66	WS_USED_B = 0,	66	WS_INPROGRESS = 0,
67	WS_ONSTACK_B,	67	WS_ONSTACK,
68	};	68	};
69		69
70	#define WS_USED (1 << WS_USED_B)
71	#define WS_ONSTACK (1 << WS_ONSTACK_B)
72
73	static inline bool bdi_work_on_stack(struct bdi_work *work)
74	{
75	return test_bit(WS_ONSTACK_B, &work->state);
76	}
77
78	static inline void __bdi_work_init(struct bdi_work *work,
79	struct wb_writeback_args *args,
80	int on_stack)
81	{
82	work->args = *args;
83	work->state = WS_USED;
84	if (on_stack) {
85	work->state \|= WS_ONSTACK;
86	init_rcu_head_on_stack(&work->rcu_head);
87	}
88	}
89
90	static inline void bdi_work_init(struct bdi_work *work,	70	static inline void bdi_work_init(struct bdi_work *work,
91	struct wb_writeback_args *args)	71	struct wb_writeback_args *args)
92	{	72	{
93	__bdi_work_init(work, args, false);	73	INIT_RCU_HEAD(&work->rcu_head);
94	}	74	work->args = *args;
95		75	__set_bit(WS_INPROGRESS, &work->state);
96	static inline void bdi_work_init_on_stack(struct bdi_work *work,
97	struct wb_writeback_args *args)
98	{
99	__bdi_work_init(work, args, true);
100	}
101
102	static inline void bdi_destroy_work_on_stack(struct bdi_work *work)
103	{
104	destroy_rcu_head_on_stack(&work->rcu_head);
105	}	76	}
106		77
107	/**	78	/**
@@ -116,43 +87,16 @@ int writeback_in_progress(struct backing_dev_info *bdi)
116	return !list_empty(&bdi->work_list);	87	return !list_empty(&bdi->work_list);
117	}	88	}
118		89
119	static void bdi_work_clear(struct bdi_work *work)
120	{
121	clear_bit(WS_USED_B, &work->state);
122	smp_mb__after_clear_bit();
123	/*
124	* work can have disappeared at this point. bit waitq functions
125	* should be able to tolerate this, provided bdi_sched_wait does
126	* not dereference it's pointer argument.
127	*/
128	wake_up_bit(&work->state, WS_USED_B);
129	}
130
131	static void bdi_work_free(struct rcu_head *head)	90	static void bdi_work_free(struct rcu_head *head)
132	{	91	{
133	struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);	92	struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
134		93
135	if (!bdi_work_on_stack(work))	94	clear_bit(WS_INPROGRESS, &work->state);
136	kfree(work);	95	smp_mb__after_clear_bit();
137	else	96	wake_up_bit(&work->state, WS_INPROGRESS);
138	bdi_work_clear(work);
139	}
140
141	static void wb_work_complete(struct bdi_work *work)
142	{
143	const enum writeback_sync_modes sync_mode = work->args.sync_mode;
144	int onstack = bdi_work_on_stack(work);
145		97
146	/*	98	if (!test_bit(WS_ONSTACK, &work->state))
147	* For allocated work, we can clear the done/seen bit right here.	99	kfree(work);
148	* For on-stack work, we need to postpone both the clear and free
149	* to after the RCU grace period, since the stack could be invalidated
150	* as soon as bdi_work_clear() has done the wakeup.
151	*/
152	if (!onstack)
153	bdi_work_clear(work);
154	if (sync_mode == WB_SYNC_NONE \|\| onstack)
155	call_rcu(&work->rcu_head, bdi_work_free);
156	}	100	}
157		101
158	static void wb_clear_pending(struct bdi_writeback wb, struct bdi_work work)	102	static void wb_clear_pending(struct bdi_writeback wb, struct bdi_work work)
@@ -168,7 +112,7 @@ static void wb_clear_pending(struct bdi_writeback wb, struct bdi_work work)
168	list_del_rcu(&work->list);	112	list_del_rcu(&work->list);
169	spin_unlock(&bdi->wb_lock);	113	spin_unlock(&bdi->wb_lock);
170		114
171	wb_work_complete(work);	115	call_rcu(&work->rcu_head, bdi_work_free);
172	}	116	}
173	}	117	}
174		118
@@ -206,9 +150,9 @@ static void bdi_queue_work(struct backing_dev_info bdi, struct bdi_work work)
206	* Used for on-stack allocated work items. The caller needs to wait until	150	* Used for on-stack allocated work items. The caller needs to wait until
207	* the wb threads have acked the work before it's safe to continue.	151	* the wb threads have acked the work before it's safe to continue.
208	*/	152	*/
209	static void bdi_wait_on_work_clear(struct bdi_work *work)	153	static void bdi_wait_on_work_done(struct bdi_work *work)
210	{	154	{
211	wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,	155	wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait,
212	TASK_UNINTERRUPTIBLE);	156	TASK_UNINTERRUPTIBLE);
213	}	157	}
214		158
@@ -234,37 +178,28 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
234	}	178	}
235		179
236	/**	180	/**
237	* bdi_sync_writeback - start and wait for writeback	181	* bdi_queue_work_onstack - start and wait for writeback
238	* @bdi: the backing device to write from
239	* @sb: write inodes from this super_block	182	* @sb: write inodes from this super_block
240	*	183	*
241	* Description:	184	* Description:
242	* This does WB_SYNC_ALL data integrity writeback and waits for the	185	* This function initiates writeback and waits for the operation to
243	* IO to complete. Callers must hold the sb s_umount semaphore for	186	* complete. Callers must hold the sb s_umount semaphore for
244	* reading, to avoid having the super disappear before we are done.	187	* reading, to avoid having the super disappear before we are done.
245	*/	188	*/
246	static void bdi_sync_writeback(struct backing_dev_info *bdi,	189	static void bdi_queue_work_onstack(struct wb_writeback_args *args)
247	struct super_block *sb)
248	{	190	{
249	struct wb_writeback_args args = {
250	.sb = sb,
251	.sync_mode = WB_SYNC_ALL,
252	.nr_pages = LONG_MAX,
253	.range_cyclic = 0,
254	};
255	struct bdi_work work;	191	struct bdi_work work;
256		192
257	bdi_work_init_on_stack(&work, &args);	193	bdi_work_init(&work, args);
		194	__set_bit(WS_ONSTACK, &work.state);
258		195
259	bdi_queue_work(bdi, &work);	196	bdi_queue_work(args->sb->s_bdi, &work);
260	bdi_wait_on_work_clear(&work);	197	bdi_wait_on_work_done(&work);
261	bdi_destroy_work_on_stack(&work);
262	}	198	}
263		199
264	/**	200	/**
265	* bdi_start_writeback - start writeback	201	* bdi_start_writeback - start writeback
266	* @bdi: the backing device to write from	202	* @bdi: the backing device to write from
267	* @sb: write inodes from this super_block
268	* @nr_pages: the number of pages to write	203	* @nr_pages: the number of pages to write
269	*	204	*
270	* Description:	205	* Description:
@@ -273,25 +208,34 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
273	* completion. Caller need not hold sb s_umount semaphore.	208	* completion. Caller need not hold sb s_umount semaphore.
274	*	209	*
275	*/	210	*/
276	void bdi_start_writeback(struct backing_dev_info bdi, struct super_block sb,	211	void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
277	long nr_pages)
278	{	212	{
279	struct wb_writeback_args args = {	213	struct wb_writeback_args args = {
280	.sb = sb,
281	.sync_mode = WB_SYNC_NONE,	214	.sync_mode = WB_SYNC_NONE,
282	.nr_pages = nr_pages,	215	.nr_pages = nr_pages,
283	.range_cyclic = 1,	216	.range_cyclic = 1,
284	};	217	};
285		218
286	/*	219	bdi_alloc_queue_work(bdi, &args);
287	* We treat @nr_pages=0 as the special case to do background writeback,	220	}
288	* ie. to sync pages until the background dirty threshold is reached.
289	*/
290	if (!nr_pages) {
291	args.nr_pages = LONG_MAX;
292	args.for_background = 1;
293	}
294		221
		222	/**
		223	* bdi_start_background_writeback - start background writeback
		224	* @bdi: the backing device to write from
		225	*
		226	* Description:
		227	* This does WB_SYNC_NONE background writeback. The IO is only
		228	* started when this function returns, we make no guarentees on
		229	* completion. Caller need not hold sb s_umount semaphore.
		230	*/
		231	void bdi_start_background_writeback(struct backing_dev_info *bdi)
		232	{
		233	struct wb_writeback_args args = {
		234	.sync_mode = WB_SYNC_NONE,
		235	.nr_pages = LONG_MAX,
		236	.for_background = 1,
		237	.range_cyclic = 1,
		238	};
295	bdi_alloc_queue_work(bdi, &args);	239	bdi_alloc_queue_work(bdi, &args);
296	}	240	}
297		241
@@ -582,48 +526,30 @@ select_queue:
582	return ret;	526	return ret;
583	}	527	}
584		528
585	static void unpin_sb_for_writeback(struct super_block *sb)
586	{
587	up_read(&sb->s_umount);
588	put_super(sb);
589	}
590
591	enum sb_pin_state {
592	SB_PINNED,
593	SB_NOT_PINNED,
594	SB_PIN_FAILED
595	};
596
597	/*	529	/*
598	* For WB_SYNC_NONE writeback, the caller does not have the sb pinned	530	* For background writeback the caller does not have the sb pinned
599	* before calling writeback. So make sure that we do pin it, so it doesn't	531	* before calling writeback. So make sure that we do pin it, so it doesn't
600	* go away while we are writing inodes from it.	532	* go away while we are writing inodes from it.
601	*/	533	*/
602	static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,	534	static bool pin_sb_for_writeback(struct super_block *sb)
603	struct super_block *sb)
604	{	535	{
605	/*
606	* Caller must already hold the ref for this
607	*/
608	if (wbc->sync_mode == WB_SYNC_ALL) {
609	WARN_ON(!rwsem_is_locked(&sb->s_umount));
610	return SB_NOT_PINNED;
611	}
612	spin_lock(&sb_lock);	536	spin_lock(&sb_lock);
		537	if (list_empty(&sb->s_instances)) {
		538	spin_unlock(&sb_lock);
		539	return false;
		540	}
		541
613	sb->s_count++;	542	sb->s_count++;
		543	spin_unlock(&sb_lock);
		544
614	if (down_read_trylock(&sb->s_umount)) {	545	if (down_read_trylock(&sb->s_umount)) {
615	if (sb->s_root) {	546	if (sb->s_root)
616	spin_unlock(&sb_lock);	547	return true;
617	return SB_PINNED;
618	}
619	/*
620	* umounted, drop rwsem again and fall through to failure
621	*/
622	up_read(&sb->s_umount);	548	up_read(&sb->s_umount);
623	}	549	}
624	sb->s_count--;	550
625	spin_unlock(&sb_lock);	551	put_super(sb);
626	return SB_PIN_FAILED;	552	return false;
627	}	553	}
628		554
629	/*	555	/*
@@ -702,24 +628,31 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
702	struct inode *inode = list_entry(wb->b_io.prev,	628	struct inode *inode = list_entry(wb->b_io.prev,
703	struct inode, i_list);	629	struct inode, i_list);
704	struct super_block *sb = inode->i_sb;	630	struct super_block *sb = inode->i_sb;
705	enum sb_pin_state state;
706		631
707	if (wbc->sb && sb != wbc->sb) {	632	if (wbc->sb) {
708	/* super block given and doesn't	633	/*
709	match, skip this inode */	634	* We are requested to write out inodes for a specific
710	redirty_tail(inode);	635	* superblock. This means we already have s_umount
711	continue;	636	* taken by the caller which also waits for us to
712	}	637	* complete the writeout.
713	state = pin_sb_for_writeback(wbc, sb);	638	*/
		639	if (sb != wbc->sb) {
		640	redirty_tail(inode);
		641	continue;
		642	}
714		643
715	if (state == SB_PIN_FAILED) {	644	WARN_ON(!rwsem_is_locked(&sb->s_umount));
716	requeue_io(inode);	645
717	continue;	646	ret = writeback_sb_inodes(sb, wb, wbc);
		647	} else {
		648	if (!pin_sb_for_writeback(sb)) {
		649	requeue_io(inode);
		650	continue;
		651	}
		652	ret = writeback_sb_inodes(sb, wb, wbc);
		653	drop_super(sb);
718	}	654	}
719	ret = writeback_sb_inodes(sb, wb, wbc);
720		655
721	if (state == SB_PINNED)
722	unpin_sb_for_writeback(sb);
723	if (ret)	656	if (ret)
724	break;	657	break;
725	}	658	}
@@ -932,7 +865,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
932	* If this isn't a data integrity operation, just notify	865	* If this isn't a data integrity operation, just notify
933	* that we have seen this work and we are now starting it.	866	* that we have seen this work and we are now starting it.
934	*/	867	*/
935	if (args.sync_mode == WB_SYNC_NONE)	868	if (!test_bit(WS_ONSTACK, &work->state))
936	wb_clear_pending(wb, work);	869	wb_clear_pending(wb, work);
937		870
938	wrote += wb_writeback(wb, &args);	871	wrote += wb_writeback(wb, &args);
@@ -941,7 +874,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
941	* This is a data integrity writeback, so only do the	874	* This is a data integrity writeback, so only do the
942	* notification when we have completed the work.	875	* notification when we have completed the work.
943	*/	876	*/
944	if (args.sync_mode == WB_SYNC_ALL)	877	if (test_bit(WS_ONSTACK, &work->state))
945	wb_clear_pending(wb, work);	878	wb_clear_pending(wb, work);
946	}	879	}
947		880
@@ -999,42 +932,32 @@ int bdi_writeback_task(struct bdi_writeback *wb)
999	}	932	}
1000		933
1001	/*	934	/*
1002	* Schedule writeback for all backing devices. This does WB_SYNC_NONE	935	* Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
1003	* writeback, for integrity writeback see bdi_sync_writeback().	936	* the whole world.
1004	*/	937	*/
1005	static void bdi_writeback_all(struct super_block *sb, long nr_pages)	938	void wakeup_flusher_threads(long nr_pages)
1006	{	939	{
		940	struct backing_dev_info *bdi;
1007	struct wb_writeback_args args = {	941	struct wb_writeback_args args = {
1008	.sb = sb,
1009	.nr_pages = nr_pages,
1010	.sync_mode = WB_SYNC_NONE,	942	.sync_mode = WB_SYNC_NONE,
1011	};	943	};
1012	struct backing_dev_info *bdi;
1013		944
1014	rcu_read_lock();	945	if (nr_pages) {
		946	args.nr_pages = nr_pages;
		947	} else {
		948	args.nr_pages = global_page_state(NR_FILE_DIRTY) +
		949	global_page_state(NR_UNSTABLE_NFS);
		950	}
1015		951
		952	rcu_read_lock();
1016	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {	953	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
1017	if (!bdi_has_dirty_io(bdi))	954	if (!bdi_has_dirty_io(bdi))
1018	continue;	955	continue;
1019
1020	bdi_alloc_queue_work(bdi, &args);	956	bdi_alloc_queue_work(bdi, &args);
1021	}	957	}
1022
1023	rcu_read_unlock();	958	rcu_read_unlock();
1024	}	959	}
1025		960
1026	/*
1027	* Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
1028	* the whole world.
1029	*/
1030	void wakeup_flusher_threads(long nr_pages)
1031	{
1032	if (nr_pages == 0)
1033	nr_pages = global_page_state(NR_FILE_DIRTY) +
1034	global_page_state(NR_UNSTABLE_NFS);
1035	bdi_writeback_all(NULL, nr_pages);
1036	}
1037
1038	static noinline void block_dump___mark_inode_dirty(struct inode *inode)	961	static noinline void block_dump___mark_inode_dirty(struct inode *inode)
1039	{	962	{
1040	if (inode->i_ino \|\| strcmp(inode->i_sb->s_id, "bdev")) {	963	if (inode->i_ino \|\| strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1239,12 +1162,17 @@ void writeback_inodes_sb(struct super_block *sb)
1239	{	1162	{
1240	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);	1163	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1241	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);	1164	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1242	long nr_to_write;	1165	struct wb_writeback_args args = {
		1166	.sb = sb,
		1167	.sync_mode = WB_SYNC_NONE,
		1168	};
1243		1169
1244	nr_to_write = nr_dirty + nr_unstable +	1170	WARN_ON(!rwsem_is_locked(&sb->s_umount));
		1171
		1172	args.nr_pages = nr_dirty + nr_unstable +
1245	(inodes_stat.nr_inodes - inodes_stat.nr_unused);	1173	(inodes_stat.nr_inodes - inodes_stat.nr_unused);
1246		1174
1247	bdi_start_writeback(sb->s_bdi, sb, nr_to_write);	1175	bdi_queue_work_onstack(&args);
1248	}	1176	}
1249	EXPORT_SYMBOL(writeback_inodes_sb);	1177	EXPORT_SYMBOL(writeback_inodes_sb);
1250		1178
@@ -1258,7 +1186,9 @@ EXPORT_SYMBOL(writeback_inodes_sb);
1258	int writeback_inodes_sb_if_idle(struct super_block *sb)	1186	int writeback_inodes_sb_if_idle(struct super_block *sb)
1259	{	1187	{
1260	if (!writeback_in_progress(sb->s_bdi)) {	1188	if (!writeback_in_progress(sb->s_bdi)) {
		1189	down_read(&sb->s_umount);
1261	writeback_inodes_sb(sb);	1190	writeback_inodes_sb(sb);
		1191	up_read(&sb->s_umount);
1262	return 1;	1192	return 1;
1263	} else	1193	} else
1264	return 0;	1194	return 0;
@@ -1274,7 +1204,16 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1274	*/	1204	*/
1275	void sync_inodes_sb(struct super_block *sb)	1205	void sync_inodes_sb(struct super_block *sb)
1276	{	1206	{
1277	bdi_sync_writeback(sb->s_bdi, sb);	1207	struct wb_writeback_args args = {
		1208	.sb = sb,
		1209	.sync_mode = WB_SYNC_ALL,
		1210	.nr_pages = LONG_MAX,
		1211	.range_cyclic = 0,
		1212	};
		1213
		1214	WARN_ON(!rwsem_is_locked(&sb->s_umount));
		1215
		1216	bdi_queue_work_onstack(&args);
1278	wait_sb_inodes(sb);	1217	wait_sb_inodes(sb);
1279	}	1218	}
1280	EXPORT_SYMBOL(sync_inodes_sb);	1219	EXPORT_SYMBOL(sync_inodes_sb);