aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-04-20 15:50:09 -0400
committerChris Mason <chris.mason@oracle.com>2009-04-20 15:53:08 -0400
commitffbd517d5a8c8e93ddd11046434fb029f3df73aa (patch)
tree9ec7b7f2efbb8950ca2654235a899398e82a68b5 /fs/btrfs/volumes.c
parent0882e8dd3aad33eca41696d463bb896e6c8817eb (diff)
Btrfs: use WRITE_SYNC for synchronous writes
Part of reducing fsync/O_SYNC/O_DIRECT latencies is using WRITE_SYNC for writes we plan on waiting on in the near future. This patch mirrors recent changes in other filesystems and the generic code to use WRITE_SYNC when WB_SYNC_ALL is passed and to use WRITE_SYNC for other latency critical writes. Btrfs uses async worker threads for checksumming before the write is done, and then again to actually submit the bios. The bio submission code just runs a per-device list of bios that need to be sent down the pipe. This list is split into low priority and high priority lists so the WRITE_SYNC IO happens first. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c124
1 files changed, 97 insertions, 27 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e0913e469728..e53835b88594 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
125 return NULL; 125 return NULL;
126} 126}
127 127
128static void requeue_list(struct btrfs_pending_bios *pending_bios,
129 struct bio *head, struct bio *tail)
130{
131
132 struct bio *old_head;
133
134 old_head = pending_bios->head;
135 pending_bios->head = head;
136 if (pending_bios->tail)
137 tail->bi_next = old_head;
138 else
139 pending_bios->tail = tail;
140}
141
128/* 142/*
129 * we try to collect pending bios for a device so we don't get a large 143 * we try to collect pending bios for a device so we don't get a large
130 * number of procs sending bios down to the same device. This greatly 144 * number of procs sending bios down to the same device. This greatly
@@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
141 struct bio *pending; 155 struct bio *pending;
142 struct backing_dev_info *bdi; 156 struct backing_dev_info *bdi;
143 struct btrfs_fs_info *fs_info; 157 struct btrfs_fs_info *fs_info;
158 struct btrfs_pending_bios *pending_bios;
144 struct bio *tail; 159 struct bio *tail;
145 struct bio *cur; 160 struct bio *cur;
146 int again = 0; 161 int again = 0;
147 unsigned long num_run = 0; 162 unsigned long num_run;
163 unsigned long num_sync_run;
148 unsigned long limit; 164 unsigned long limit;
149 unsigned long last_waited = 0; 165 unsigned long last_waited = 0;
150 166
@@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
153 limit = btrfs_async_submit_limit(fs_info); 169 limit = btrfs_async_submit_limit(fs_info);
154 limit = limit * 2 / 3; 170 limit = limit * 2 / 3;
155 171
172 /* we want to make sure that every time we switch from the sync
173 * list to the normal list, we unplug
174 */
175 num_sync_run = 0;
176
156loop: 177loop:
157 spin_lock(&device->io_lock); 178 spin_lock(&device->io_lock);
179 num_run = 0;
158 180
159loop_lock: 181loop_lock:
182
160 /* take all the bios off the list at once and process them 183 /* take all the bios off the list at once and process them
161 * later on (without the lock held). But, remember the 184 * later on (without the lock held). But, remember the
162 * tail and other pointers so the bios can be properly reinserted 185 * tail and other pointers so the bios can be properly reinserted
163 * into the list if we hit congestion 186 * into the list if we hit congestion
164 */ 187 */
165 pending = device->pending_bios; 188 if (device->pending_sync_bios.head)
166 tail = device->pending_bio_tail; 189 pending_bios = &device->pending_sync_bios;
190 else
191 pending_bios = &device->pending_bios;
192
193 pending = pending_bios->head;
194 tail = pending_bios->tail;
167 WARN_ON(pending && !tail); 195 WARN_ON(pending && !tail);
168 device->pending_bios = NULL;
169 device->pending_bio_tail = NULL;
170 196
171 /* 197 /*
172 * if pending was null this time around, no bios need processing 198 * if pending was null this time around, no bios need processing
@@ -176,16 +202,41 @@ loop_lock:
176 * device->running_pending is used to synchronize with the 202 * device->running_pending is used to synchronize with the
177 * schedule_bio code. 203 * schedule_bio code.
178 */ 204 */
179 if (pending) { 205 if (device->pending_sync_bios.head == NULL &&
180 again = 1; 206 device->pending_bios.head == NULL) {
181 device->running_pending = 1;
182 } else {
183 again = 0; 207 again = 0;
184 device->running_pending = 0; 208 device->running_pending = 0;
209 } else {
210 again = 1;
211 device->running_pending = 1;
185 } 212 }
213
214 pending_bios->head = NULL;
215 pending_bios->tail = NULL;
216
186 spin_unlock(&device->io_lock); 217 spin_unlock(&device->io_lock);
187 218
219 /*
220 * if we're doing the regular priority list, make sure we unplug
221 * for any high prio bios we've sent down
222 */
223 if (pending_bios == &device->pending_bios && num_sync_run > 0) {
224 num_sync_run = 0;
225 blk_run_backing_dev(bdi, NULL);
226 }
227
188 while (pending) { 228 while (pending) {
229
230 rmb();
231 if (pending_bios != &device->pending_sync_bios &&
232 device->pending_sync_bios.head &&
233 num_run > 16) {
234 cond_resched();
235 spin_lock(&device->io_lock);
236 requeue_list(pending_bios, pending, tail);
237 goto loop_lock;
238 }
239
189 cur = pending; 240 cur = pending;
190 pending = pending->bi_next; 241 pending = pending->bi_next;
191 cur->bi_next = NULL; 242 cur->bi_next = NULL;
@@ -196,10 +247,18 @@ loop_lock:
196 wake_up(&fs_info->async_submit_wait); 247 wake_up(&fs_info->async_submit_wait);
197 248
198 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 249 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
199 bio_get(cur);
200 submit_bio(cur->bi_rw, cur); 250 submit_bio(cur->bi_rw, cur);
201 bio_put(cur);
202 num_run++; 251 num_run++;
252 if (bio_sync(cur))
253 num_sync_run++;
254
255 if (need_resched()) {
256 if (num_sync_run) {
257 blk_run_backing_dev(bdi, NULL);
258 num_sync_run = 0;
259 }
260 cond_resched();
261 }
203 262
204 /* 263 /*
205 * we made progress, there is more work to do and the bdi 264 * we made progress, there is more work to do and the bdi
@@ -208,7 +267,6 @@ loop_lock:
208 */ 267 */
209 if (pending && bdi_write_congested(bdi) && num_run > 16 && 268 if (pending && bdi_write_congested(bdi) && num_run > 16 &&
210 fs_info->fs_devices->open_devices > 1) { 269 fs_info->fs_devices->open_devices > 1) {
211 struct bio *old_head;
212 struct io_context *ioc; 270 struct io_context *ioc;
213 271
214 ioc = current->io_context; 272 ioc = current->io_context;
@@ -233,17 +291,17 @@ loop_lock:
233 * against it before looping 291 * against it before looping
234 */ 292 */
235 last_waited = ioc->last_waited; 293 last_waited = ioc->last_waited;
294 if (need_resched()) {
295 if (num_sync_run) {
296 blk_run_backing_dev(bdi, NULL);
297 num_sync_run = 0;
298 }
299 cond_resched();
300 }
236 continue; 301 continue;
237 } 302 }
238 spin_lock(&device->io_lock); 303 spin_lock(&device->io_lock);
239 304 requeue_list(pending_bios, pending, tail);
240 old_head = device->pending_bios;
241 device->pending_bios = pending;
242 if (device->pending_bio_tail)
243 tail->bi_next = old_head;
244 else
245 device->pending_bio_tail = tail;
246
247 device->running_pending = 1; 305 device->running_pending = 1;
248 306
249 spin_unlock(&device->io_lock); 307 spin_unlock(&device->io_lock);
@@ -251,11 +309,18 @@ loop_lock:
251 goto done; 309 goto done;
252 } 310 }
253 } 311 }
312
313 if (num_sync_run) {
314 num_sync_run = 0;
315 blk_run_backing_dev(bdi, NULL);
316 }
317
318 cond_resched();
254 if (again) 319 if (again)
255 goto loop; 320 goto loop;
256 321
257 spin_lock(&device->io_lock); 322 spin_lock(&device->io_lock);
258 if (device->pending_bios) 323 if (device->pending_bios.head || device->pending_sync_bios.head)
259 goto loop_lock; 324 goto loop_lock;
260 spin_unlock(&device->io_lock); 325 spin_unlock(&device->io_lock);
261 326
@@ -2497,7 +2562,7 @@ again:
2497 max_errors = 1; 2562 max_errors = 1;
2498 } 2563 }
2499 } 2564 }
2500 if (multi_ret && rw == WRITE && 2565 if (multi_ret && (rw & (1 << BIO_RW)) &&
2501 stripes_allocated < stripes_required) { 2566 stripes_allocated < stripes_required) {
2502 stripes_allocated = map->num_stripes; 2567 stripes_allocated = map->num_stripes;
2503 free_extent_map(em); 2568 free_extent_map(em);
@@ -2762,6 +2827,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
2762 int rw, struct bio *bio) 2827 int rw, struct bio *bio)
2763{ 2828{
2764 int should_queue = 1; 2829 int should_queue = 1;
2830 struct btrfs_pending_bios *pending_bios;
2765 2831
2766 /* don't bother with additional async steps for reads, right now */ 2832 /* don't bother with additional async steps for reads, right now */
2767 if (!(rw & (1 << BIO_RW))) { 2833 if (!(rw & (1 << BIO_RW))) {
@@ -2783,13 +2849,17 @@ static noinline int schedule_bio(struct btrfs_root *root,
2783 bio->bi_rw |= rw; 2849 bio->bi_rw |= rw;
2784 2850
2785 spin_lock(&device->io_lock); 2851 spin_lock(&device->io_lock);
2852 if (bio_sync(bio))
2853 pending_bios = &device->pending_sync_bios;
2854 else
2855 pending_bios = &device->pending_bios;
2786 2856
2787 if (device->pending_bio_tail) 2857 if (pending_bios->tail)
2788 device->pending_bio_tail->bi_next = bio; 2858 pending_bios->tail->bi_next = bio;
2789 2859
2790 device->pending_bio_tail = bio; 2860 pending_bios->tail = bio;
2791 if (!device->pending_bios) 2861 if (!pending_bios->head)
2792 device->pending_bios = bio; 2862 pending_bios->head = bio;
2793 if (device->running_pending) 2863 if (device->running_pending)
2794 should_queue = 0; 2864 should_queue = 0;
2795 2865