diff options
author | Christoph Hellwig <hch@lst.de> | 2010-06-08 12:14:34 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-06-11 06:58:07 -0400 |
commit | 7f0e7bed936a0c422641a046551829a01341dd80 (patch) | |
tree | 6b5fd4a8e11253819c883e409b612fc26c57235e /fs | |
parent | 7908a9e5fc3f9a679b1777ed231a03636c068446 (diff) |
writeback: fix writeback completion notifications
The code dealing with bdi_work->state and completion of a bdi_work is a
major mess currently. This patch makes sure we directly use one set of
flags to deal with it, and use it consistently, which means:
- always notify about completion from the rcu callback. We only ever
wait for it from on-stack callers, so this simplification does not
even cause a theoretical slowdown currently. It also makes sure we
don't miss out on the notification if we ever add other callers to
wait for it.
- make earlier completion notification depending on the on-stack
allocation, not the sync mode. If we introduce new callers that
want to do WB_SYNC_NONE writeback from on-stack callers this will
be nessecary.
Also rename bdi_wait_on_work_clear to bdi_wait_on_work_done and inline
a few small functions into their only caller to make the code
understandable.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/fs-writeback.c | 65 |
1 files changed, 15 insertions, 50 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1d1088f48bc2..dbf6f108e868 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -63,24 +63,16 @@ struct bdi_work { | |||
63 | }; | 63 | }; |
64 | 64 | ||
65 | enum { | 65 | enum { |
66 | WS_USED_B = 0, | 66 | WS_INPROGRESS = 0, |
67 | WS_ONSTACK_B, | 67 | WS_ONSTACK, |
68 | }; | 68 | }; |
69 | 69 | ||
70 | #define WS_USED (1 << WS_USED_B) | ||
71 | #define WS_ONSTACK (1 << WS_ONSTACK_B) | ||
72 | |||
73 | static inline bool bdi_work_on_stack(struct bdi_work *work) | ||
74 | { | ||
75 | return test_bit(WS_ONSTACK_B, &work->state); | ||
76 | } | ||
77 | |||
78 | static inline void bdi_work_init(struct bdi_work *work, | 70 | static inline void bdi_work_init(struct bdi_work *work, |
79 | struct wb_writeback_args *args) | 71 | struct wb_writeback_args *args) |
80 | { | 72 | { |
81 | INIT_RCU_HEAD(&work->rcu_head); | 73 | INIT_RCU_HEAD(&work->rcu_head); |
82 | work->args = *args; | 74 | work->args = *args; |
83 | work->state = WS_USED; | 75 | __set_bit(WS_INPROGRESS, &work->state); |
84 | } | 76 | } |
85 | 77 | ||
86 | /** | 78 | /** |
@@ -95,43 +87,16 @@ int writeback_in_progress(struct backing_dev_info *bdi) | |||
95 | return !list_empty(&bdi->work_list); | 87 | return !list_empty(&bdi->work_list); |
96 | } | 88 | } |
97 | 89 | ||
98 | static void bdi_work_clear(struct bdi_work *work) | ||
99 | { | ||
100 | clear_bit(WS_USED_B, &work->state); | ||
101 | smp_mb__after_clear_bit(); | ||
102 | /* | ||
103 | * work can have disappeared at this point. bit waitq functions | ||
104 | * should be able to tolerate this, provided bdi_sched_wait does | ||
105 | * not dereference it's pointer argument. | ||
106 | */ | ||
107 | wake_up_bit(&work->state, WS_USED_B); | ||
108 | } | ||
109 | |||
110 | static void bdi_work_free(struct rcu_head *head) | 90 | static void bdi_work_free(struct rcu_head *head) |
111 | { | 91 | { |
112 | struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); | 92 | struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); |
113 | 93 | ||
114 | if (!bdi_work_on_stack(work)) | 94 | clear_bit(WS_INPROGRESS, &work->state); |
115 | kfree(work); | 95 | smp_mb__after_clear_bit(); |
116 | else | 96 | wake_up_bit(&work->state, WS_INPROGRESS); |
117 | bdi_work_clear(work); | ||
118 | } | ||
119 | |||
120 | static void wb_work_complete(struct bdi_work *work) | ||
121 | { | ||
122 | const enum writeback_sync_modes sync_mode = work->args.sync_mode; | ||
123 | int onstack = bdi_work_on_stack(work); | ||
124 | 97 | ||
125 | /* | 98 | if (!test_bit(WS_ONSTACK, &work->state)) |
126 | * For allocated work, we can clear the done/seen bit right here. | 99 | kfree(work); |
127 | * For on-stack work, we need to postpone both the clear and free | ||
128 | * to after the RCU grace period, since the stack could be invalidated | ||
129 | * as soon as bdi_work_clear() has done the wakeup. | ||
130 | */ | ||
131 | if (!onstack) | ||
132 | bdi_work_clear(work); | ||
133 | if (sync_mode == WB_SYNC_NONE || onstack) | ||
134 | call_rcu(&work->rcu_head, bdi_work_free); | ||
135 | } | 100 | } |
136 | 101 | ||
137 | static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) | 102 | static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) |
@@ -147,7 +112,7 @@ static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) | |||
147 | list_del_rcu(&work->list); | 112 | list_del_rcu(&work->list); |
148 | spin_unlock(&bdi->wb_lock); | 113 | spin_unlock(&bdi->wb_lock); |
149 | 114 | ||
150 | wb_work_complete(work); | 115 | call_rcu(&work->rcu_head, bdi_work_free); |
151 | } | 116 | } |
152 | } | 117 | } |
153 | 118 | ||
@@ -185,9 +150,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) | |||
185 | * Used for on-stack allocated work items. The caller needs to wait until | 150 | * Used for on-stack allocated work items. The caller needs to wait until |
186 | * the wb threads have acked the work before it's safe to continue. | 151 | * the wb threads have acked the work before it's safe to continue. |
187 | */ | 152 | */ |
188 | static void bdi_wait_on_work_clear(struct bdi_work *work) | 153 | static void bdi_wait_on_work_done(struct bdi_work *work) |
189 | { | 154 | { |
190 | wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait, | 155 | wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait, |
191 | TASK_UNINTERRUPTIBLE); | 156 | TASK_UNINTERRUPTIBLE); |
192 | } | 157 | } |
193 | 158 | ||
@@ -234,10 +199,10 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
234 | struct bdi_work work; | 199 | struct bdi_work work; |
235 | 200 | ||
236 | bdi_work_init(&work, &args); | 201 | bdi_work_init(&work, &args); |
237 | work.state |= WS_ONSTACK; | 202 | __set_bit(WS_ONSTACK, &work.state); |
238 | 203 | ||
239 | bdi_queue_work(bdi, &work); | 204 | bdi_queue_work(bdi, &work); |
240 | bdi_wait_on_work_clear(&work); | 205 | bdi_wait_on_work_done(&work); |
241 | } | 206 | } |
242 | 207 | ||
243 | /** | 208 | /** |
@@ -911,7 +876,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
911 | * If this isn't a data integrity operation, just notify | 876 | * If this isn't a data integrity operation, just notify |
912 | * that we have seen this work and we are now starting it. | 877 | * that we have seen this work and we are now starting it. |
913 | */ | 878 | */ |
914 | if (args.sync_mode == WB_SYNC_NONE) | 879 | if (!test_bit(WS_ONSTACK, &work->state)) |
915 | wb_clear_pending(wb, work); | 880 | wb_clear_pending(wb, work); |
916 | 881 | ||
917 | wrote += wb_writeback(wb, &args); | 882 | wrote += wb_writeback(wb, &args); |
@@ -920,7 +885,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
920 | * This is a data integrity writeback, so only do the | 885 | * This is a data integrity writeback, so only do the |
921 | * notification when we have completed the work. | 886 | * notification when we have completed the work. |
922 | */ | 887 | */ |
923 | if (args.sync_mode == WB_SYNC_ALL) | 888 | if (test_bit(WS_ONSTACK, &work->state)) |
924 | wb_clear_pending(wb, work); | 889 | wb_clear_pending(wb, work); |
925 | } | 890 | } |
926 | 891 | ||