aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/extent_io.c44
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/volumes.c124
-rw-r--r--fs/btrfs/volumes.h13
5 files changed, 141 insertions, 46 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 92caa8035f36..fec18b43c2c3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2095,10 +2095,10 @@ static int write_dev_supers(struct btrfs_device *device,
2095 device->barriers = 0; 2095 device->barriers = 0;
2096 get_bh(bh); 2096 get_bh(bh);
2097 lock_buffer(bh); 2097 lock_buffer(bh);
2098 ret = submit_bh(WRITE, bh); 2098 ret = submit_bh(WRITE_SYNC, bh);
2099 } 2099 }
2100 } else { 2100 } else {
2101 ret = submit_bh(WRITE, bh); 2101 ret = submit_bh(WRITE_SYNC, bh);
2102 } 2102 }
2103 2103
2104 if (!ret && wait) { 2104 if (!ret && wait) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index eb2bee8b7fbf..483b6727aaaf 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -50,7 +50,10 @@ struct extent_page_data {
50 /* tells writepage not to lock the state bits for this range 50 /* tells writepage not to lock the state bits for this range
51 * it still does the unlocking 51 * it still does the unlocking
52 */ 52 */
53 int extent_locked; 53 unsigned int extent_locked:1;
54
55 /* tells the submit_bio code to use a WRITE_SYNC */
56 unsigned int sync_io:1;
54}; 57};
55 58
56int __init extent_io_init(void) 59int __init extent_io_init(void)
@@ -2136,8 +2139,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2136 u64 delalloc_end; 2139 u64 delalloc_end;
2137 int page_started; 2140 int page_started;
2138 int compressed; 2141 int compressed;
2142 int write_flags;
2139 unsigned long nr_written = 0; 2143 unsigned long nr_written = 0;
2140 2144
2145 if (wbc->sync_mode == WB_SYNC_ALL)
2146 write_flags = WRITE_SYNC_PLUG;
2147 else
2148 write_flags = WRITE;
2149
2141 WARN_ON(!PageLocked(page)); 2150 WARN_ON(!PageLocked(page));
2142 pg_offset = i_size & (PAGE_CACHE_SIZE - 1); 2151 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
2143 if (page->index > end_index || 2152 if (page->index > end_index ||
@@ -2314,9 +2323,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2314 (unsigned long long)end); 2323 (unsigned long long)end);
2315 } 2324 }
2316 2325
2317 ret = submit_extent_page(WRITE, tree, page, sector, 2326 ret = submit_extent_page(write_flags, tree, page,
2318 iosize, pg_offset, bdev, 2327 sector, iosize, pg_offset,
2319 &epd->bio, max_nr, 2328 bdev, &epd->bio, max_nr,
2320 end_bio_extent_writepage, 2329 end_bio_extent_writepage,
2321 0, 0, 0); 2330 0, 0, 0);
2322 if (ret) 2331 if (ret)
@@ -2460,15 +2469,23 @@ retry:
2460 return ret; 2469 return ret;
2461} 2470}
2462 2471
2463static noinline void flush_write_bio(void *data) 2472static void flush_epd_write_bio(struct extent_page_data *epd)
2464{ 2473{
2465 struct extent_page_data *epd = data;
2466 if (epd->bio) { 2474 if (epd->bio) {
2467 submit_one_bio(WRITE, epd->bio, 0, 0); 2475 if (epd->sync_io)
2476 submit_one_bio(WRITE_SYNC, epd->bio, 0, 0);
2477 else
2478 submit_one_bio(WRITE, epd->bio, 0, 0);
2468 epd->bio = NULL; 2479 epd->bio = NULL;
2469 } 2480 }
2470} 2481}
2471 2482
2483static noinline void flush_write_bio(void *data)
2484{
2485 struct extent_page_data *epd = data;
2486 flush_epd_write_bio(epd);
2487}
2488
2472int extent_write_full_page(struct extent_io_tree *tree, struct page *page, 2489int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2473 get_extent_t *get_extent, 2490 get_extent_t *get_extent,
2474 struct writeback_control *wbc) 2491 struct writeback_control *wbc)
@@ -2480,6 +2497,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2480 .tree = tree, 2497 .tree = tree,
2481 .get_extent = get_extent, 2498 .get_extent = get_extent,
2482 .extent_locked = 0, 2499 .extent_locked = 0,
2500 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
2483 }; 2501 };
2484 struct writeback_control wbc_writepages = { 2502 struct writeback_control wbc_writepages = {
2485 .bdi = wbc->bdi, 2503 .bdi = wbc->bdi,
@@ -2490,13 +2508,11 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2490 .range_end = (loff_t)-1, 2508 .range_end = (loff_t)-1,
2491 }; 2509 };
2492 2510
2493
2494 ret = __extent_writepage(page, wbc, &epd); 2511 ret = __extent_writepage(page, wbc, &epd);
2495 2512
2496 extent_write_cache_pages(tree, mapping, &wbc_writepages, 2513 extent_write_cache_pages(tree, mapping, &wbc_writepages,
2497 __extent_writepage, &epd, flush_write_bio); 2514 __extent_writepage, &epd, flush_write_bio);
2498 if (epd.bio) 2515 flush_epd_write_bio(&epd);
2499 submit_one_bio(WRITE, epd.bio, 0, 0);
2500 return ret; 2516 return ret;
2501} 2517}
2502 2518
@@ -2515,6 +2531,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2515 .tree = tree, 2531 .tree = tree,
2516 .get_extent = get_extent, 2532 .get_extent = get_extent,
2517 .extent_locked = 1, 2533 .extent_locked = 1,
2534 .sync_io = mode == WB_SYNC_ALL,
2518 }; 2535 };
2519 struct writeback_control wbc_writepages = { 2536 struct writeback_control wbc_writepages = {
2520 .bdi = inode->i_mapping->backing_dev_info, 2537 .bdi = inode->i_mapping->backing_dev_info,
@@ -2540,8 +2557,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2540 start += PAGE_CACHE_SIZE; 2557 start += PAGE_CACHE_SIZE;
2541 } 2558 }
2542 2559
2543 if (epd.bio) 2560 flush_epd_write_bio(&epd);
2544 submit_one_bio(WRITE, epd.bio, 0, 0);
2545 return ret; 2561 return ret;
2546} 2562}
2547 2563
@@ -2556,13 +2572,13 @@ int extent_writepages(struct extent_io_tree *tree,
2556 .tree = tree, 2572 .tree = tree,
2557 .get_extent = get_extent, 2573 .get_extent = get_extent,
2558 .extent_locked = 0, 2574 .extent_locked = 0,
2575 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
2559 }; 2576 };
2560 2577
2561 ret = extent_write_cache_pages(tree, mapping, wbc, 2578 ret = extent_write_cache_pages(tree, mapping, wbc,
2562 __extent_writepage, &epd, 2579 __extent_writepage, &epd,
2563 flush_write_bio); 2580 flush_write_bio);
2564 if (epd.bio) 2581 flush_epd_write_bio(&epd);
2565 submit_one_bio(WRITE, epd.bio, 0, 0);
2566 return ret; 2582 return ret;
2567} 2583}
2568 2584
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 53c87b197d70..d6f0806c682f 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -489,7 +489,7 @@ again:
489 /* start IO across the range first to instantiate any delalloc 489 /* start IO across the range first to instantiate any delalloc
490 * extents 490 * extents
491 */ 491 */
492 btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); 492 btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL);
493 493
494 /* The compression code will leave pages locked but return from 494 /* The compression code will leave pages locked but return from
495 * writepage without setting the page writeback. Starting again 495 * writepage without setting the page writeback. Starting again
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e0913e469728..e53835b88594 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
125 return NULL; 125 return NULL;
126} 126}
127 127
128static void requeue_list(struct btrfs_pending_bios *pending_bios,
129 struct bio *head, struct bio *tail)
130{
131
132 struct bio *old_head;
133
134 old_head = pending_bios->head;
135 pending_bios->head = head;
136 if (pending_bios->tail)
137 tail->bi_next = old_head;
138 else
139 pending_bios->tail = tail;
140}
141
128/* 142/*
129 * we try to collect pending bios for a device so we don't get a large 143 * we try to collect pending bios for a device so we don't get a large
130 * number of procs sending bios down to the same device. This greatly 144 * number of procs sending bios down to the same device. This greatly
@@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
141 struct bio *pending; 155 struct bio *pending;
142 struct backing_dev_info *bdi; 156 struct backing_dev_info *bdi;
143 struct btrfs_fs_info *fs_info; 157 struct btrfs_fs_info *fs_info;
158 struct btrfs_pending_bios *pending_bios;
144 struct bio *tail; 159 struct bio *tail;
145 struct bio *cur; 160 struct bio *cur;
146 int again = 0; 161 int again = 0;
147 unsigned long num_run = 0; 162 unsigned long num_run;
163 unsigned long num_sync_run;
148 unsigned long limit; 164 unsigned long limit;
149 unsigned long last_waited = 0; 165 unsigned long last_waited = 0;
150 166
@@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
153 limit = btrfs_async_submit_limit(fs_info); 169 limit = btrfs_async_submit_limit(fs_info);
154 limit = limit * 2 / 3; 170 limit = limit * 2 / 3;
155 171
172 /* we want to make sure that every time we switch from the sync
173 * list to the normal list, we unplug
174 */
175 num_sync_run = 0;
176
156loop: 177loop:
157 spin_lock(&device->io_lock); 178 spin_lock(&device->io_lock);
179 num_run = 0;
158 180
159loop_lock: 181loop_lock:
182
160 /* take all the bios off the list at once and process them 183 /* take all the bios off the list at once and process them
161 * later on (without the lock held). But, remember the 184 * later on (without the lock held). But, remember the
162 * tail and other pointers so the bios can be properly reinserted 185 * tail and other pointers so the bios can be properly reinserted
163 * into the list if we hit congestion 186 * into the list if we hit congestion
164 */ 187 */
165 pending = device->pending_bios; 188 if (device->pending_sync_bios.head)
166 tail = device->pending_bio_tail; 189 pending_bios = &device->pending_sync_bios;
190 else
191 pending_bios = &device->pending_bios;
192
193 pending = pending_bios->head;
194 tail = pending_bios->tail;
167 WARN_ON(pending && !tail); 195 WARN_ON(pending && !tail);
168 device->pending_bios = NULL;
169 device->pending_bio_tail = NULL;
170 196
171 /* 197 /*
172 * if pending was null this time around, no bios need processing 198 * if pending was null this time around, no bios need processing
@@ -176,16 +202,41 @@ loop_lock:
176 * device->running_pending is used to synchronize with the 202 * device->running_pending is used to synchronize with the
177 * schedule_bio code. 203 * schedule_bio code.
178 */ 204 */
179 if (pending) { 205 if (device->pending_sync_bios.head == NULL &&
180 again = 1; 206 device->pending_bios.head == NULL) {
181 device->running_pending = 1;
182 } else {
183 again = 0; 207 again = 0;
184 device->running_pending = 0; 208 device->running_pending = 0;
209 } else {
210 again = 1;
211 device->running_pending = 1;
185 } 212 }
213
214 pending_bios->head = NULL;
215 pending_bios->tail = NULL;
216
186 spin_unlock(&device->io_lock); 217 spin_unlock(&device->io_lock);
187 218
219 /*
220 * if we're doing the regular priority list, make sure we unplug
221 * for any high prio bios we've sent down
222 */
223 if (pending_bios == &device->pending_bios && num_sync_run > 0) {
224 num_sync_run = 0;
225 blk_run_backing_dev(bdi, NULL);
226 }
227
188 while (pending) { 228 while (pending) {
229
230 rmb();
231 if (pending_bios != &device->pending_sync_bios &&
232 device->pending_sync_bios.head &&
233 num_run > 16) {
234 cond_resched();
235 spin_lock(&device->io_lock);
236 requeue_list(pending_bios, pending, tail);
237 goto loop_lock;
238 }
239
189 cur = pending; 240 cur = pending;
190 pending = pending->bi_next; 241 pending = pending->bi_next;
191 cur->bi_next = NULL; 242 cur->bi_next = NULL;
@@ -196,10 +247,18 @@ loop_lock:
196 wake_up(&fs_info->async_submit_wait); 247 wake_up(&fs_info->async_submit_wait);
197 248
198 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 249 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
199 bio_get(cur);
200 submit_bio(cur->bi_rw, cur); 250 submit_bio(cur->bi_rw, cur);
201 bio_put(cur);
202 num_run++; 251 num_run++;
252 if (bio_sync(cur))
253 num_sync_run++;
254
255 if (need_resched()) {
256 if (num_sync_run) {
257 blk_run_backing_dev(bdi, NULL);
258 num_sync_run = 0;
259 }
260 cond_resched();
261 }
203 262
204 /* 263 /*
205 * we made progress, there is more work to do and the bdi 264 * we made progress, there is more work to do and the bdi
@@ -208,7 +267,6 @@ loop_lock:
208 */ 267 */
209 if (pending && bdi_write_congested(bdi) && num_run > 16 && 268 if (pending && bdi_write_congested(bdi) && num_run > 16 &&
210 fs_info->fs_devices->open_devices > 1) { 269 fs_info->fs_devices->open_devices > 1) {
211 struct bio *old_head;
212 struct io_context *ioc; 270 struct io_context *ioc;
213 271
214 ioc = current->io_context; 272 ioc = current->io_context;
@@ -233,17 +291,17 @@ loop_lock:
233 * against it before looping 291 * against it before looping
234 */ 292 */
235 last_waited = ioc->last_waited; 293 last_waited = ioc->last_waited;
294 if (need_resched()) {
295 if (num_sync_run) {
296 blk_run_backing_dev(bdi, NULL);
297 num_sync_run = 0;
298 }
299 cond_resched();
300 }
236 continue; 301 continue;
237 } 302 }
238 spin_lock(&device->io_lock); 303 spin_lock(&device->io_lock);
239 304 requeue_list(pending_bios, pending, tail);
240 old_head = device->pending_bios;
241 device->pending_bios = pending;
242 if (device->pending_bio_tail)
243 tail->bi_next = old_head;
244 else
245 device->pending_bio_tail = tail;
246
247 device->running_pending = 1; 305 device->running_pending = 1;
248 306
249 spin_unlock(&device->io_lock); 307 spin_unlock(&device->io_lock);
@@ -251,11 +309,18 @@ loop_lock:
251 goto done; 309 goto done;
252 } 310 }
253 } 311 }
312
313 if (num_sync_run) {
314 num_sync_run = 0;
315 blk_run_backing_dev(bdi, NULL);
316 }
317
318 cond_resched();
254 if (again) 319 if (again)
255 goto loop; 320 goto loop;
256 321
257 spin_lock(&device->io_lock); 322 spin_lock(&device->io_lock);
258 if (device->pending_bios) 323 if (device->pending_bios.head || device->pending_sync_bios.head)
259 goto loop_lock; 324 goto loop_lock;
260 spin_unlock(&device->io_lock); 325 spin_unlock(&device->io_lock);
261 326
@@ -2497,7 +2562,7 @@ again:
2497 max_errors = 1; 2562 max_errors = 1;
2498 } 2563 }
2499 } 2564 }
2500 if (multi_ret && rw == WRITE && 2565 if (multi_ret && (rw & (1 << BIO_RW)) &&
2501 stripes_allocated < stripes_required) { 2566 stripes_allocated < stripes_required) {
2502 stripes_allocated = map->num_stripes; 2567 stripes_allocated = map->num_stripes;
2503 free_extent_map(em); 2568 free_extent_map(em);
@@ -2762,6 +2827,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
2762 int rw, struct bio *bio) 2827 int rw, struct bio *bio)
2763{ 2828{
2764 int should_queue = 1; 2829 int should_queue = 1;
2830 struct btrfs_pending_bios *pending_bios;
2765 2831
2766 /* don't bother with additional async steps for reads, right now */ 2832 /* don't bother with additional async steps for reads, right now */
2767 if (!(rw & (1 << BIO_RW))) { 2833 if (!(rw & (1 << BIO_RW))) {
@@ -2783,13 +2849,17 @@ static noinline int schedule_bio(struct btrfs_root *root,
2783 bio->bi_rw |= rw; 2849 bio->bi_rw |= rw;
2784 2850
2785 spin_lock(&device->io_lock); 2851 spin_lock(&device->io_lock);
2852 if (bio_sync(bio))
2853 pending_bios = &device->pending_sync_bios;
2854 else
2855 pending_bios = &device->pending_bios;
2786 2856
2787 if (device->pending_bio_tail) 2857 if (pending_bios->tail)
2788 device->pending_bio_tail->bi_next = bio; 2858 pending_bios->tail->bi_next = bio;
2789 2859
2790 device->pending_bio_tail = bio; 2860 pending_bios->tail = bio;
2791 if (!device->pending_bios) 2861 if (!pending_bios->head)
2792 device->pending_bios = bio; 2862 pending_bios->head = bio;
2793 if (device->running_pending) 2863 if (device->running_pending)
2794 should_queue = 0; 2864 should_queue = 0;
2795 2865
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2185de72ff7d..5836327ba5dd 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -23,13 +23,22 @@
23#include "async-thread.h" 23#include "async-thread.h"
24 24
25struct buffer_head; 25struct buffer_head;
26struct btrfs_pending_bios {
27 struct bio *head;
28 struct bio *tail;
29};
30
26struct btrfs_device { 31struct btrfs_device {
27 struct list_head dev_list; 32 struct list_head dev_list;
28 struct list_head dev_alloc_list; 33 struct list_head dev_alloc_list;
29 struct btrfs_fs_devices *fs_devices; 34 struct btrfs_fs_devices *fs_devices;
30 struct btrfs_root *dev_root; 35 struct btrfs_root *dev_root;
31 struct bio *pending_bios; 36
32 struct bio *pending_bio_tail; 37 /* regular prio bios */
38 struct btrfs_pending_bios pending_bios;
39 /* WRITE_SYNC bios */
40 struct btrfs_pending_bios pending_sync_bios;
41
33 int running_pending; 42 int running_pending;
34 u64 generation; 43 u64 generation;
35 44