diff options
-rw-r--r-- | fs/btrfs/disk-io.c | 4 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 44 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 2 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 124 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 13 |
5 files changed, 141 insertions, 46 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 92caa8035f36..fec18b43c2c3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -2095,10 +2095,10 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2095 | device->barriers = 0; | 2095 | device->barriers = 0; |
2096 | get_bh(bh); | 2096 | get_bh(bh); |
2097 | lock_buffer(bh); | 2097 | lock_buffer(bh); |
2098 | ret = submit_bh(WRITE, bh); | 2098 | ret = submit_bh(WRITE_SYNC, bh); |
2099 | } | 2099 | } |
2100 | } else { | 2100 | } else { |
2101 | ret = submit_bh(WRITE, bh); | 2101 | ret = submit_bh(WRITE_SYNC, bh); |
2102 | } | 2102 | } |
2103 | 2103 | ||
2104 | if (!ret && wait) { | 2104 | if (!ret && wait) { |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index eb2bee8b7fbf..483b6727aaaf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -50,7 +50,10 @@ struct extent_page_data { | |||
50 | /* tells writepage not to lock the state bits for this range | 50 | /* tells writepage not to lock the state bits for this range |
51 | * it still does the unlocking | 51 | * it still does the unlocking |
52 | */ | 52 | */ |
53 | int extent_locked; | 53 | unsigned int extent_locked:1; |
54 | |||
55 | /* tells the submit_bio code to use a WRITE_SYNC */ | ||
56 | unsigned int sync_io:1; | ||
54 | }; | 57 | }; |
55 | 58 | ||
56 | int __init extent_io_init(void) | 59 | int __init extent_io_init(void) |
@@ -2136,8 +2139,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2136 | u64 delalloc_end; | 2139 | u64 delalloc_end; |
2137 | int page_started; | 2140 | int page_started; |
2138 | int compressed; | 2141 | int compressed; |
2142 | int write_flags; | ||
2139 | unsigned long nr_written = 0; | 2143 | unsigned long nr_written = 0; |
2140 | 2144 | ||
2145 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2146 | write_flags = WRITE_SYNC_PLUG; | ||
2147 | else | ||
2148 | write_flags = WRITE; | ||
2149 | |||
2141 | WARN_ON(!PageLocked(page)); | 2150 | WARN_ON(!PageLocked(page)); |
2142 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2151 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
2143 | if (page->index > end_index || | 2152 | if (page->index > end_index || |
@@ -2314,9 +2323,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2314 | (unsigned long long)end); | 2323 | (unsigned long long)end); |
2315 | } | 2324 | } |
2316 | 2325 | ||
2317 | ret = submit_extent_page(WRITE, tree, page, sector, | 2326 | ret = submit_extent_page(write_flags, tree, page, |
2318 | iosize, pg_offset, bdev, | 2327 | sector, iosize, pg_offset, |
2319 | &epd->bio, max_nr, | 2328 | bdev, &epd->bio, max_nr, |
2320 | end_bio_extent_writepage, | 2329 | end_bio_extent_writepage, |
2321 | 0, 0, 0); | 2330 | 0, 0, 0); |
2322 | if (ret) | 2331 | if (ret) |
@@ -2460,15 +2469,23 @@ retry: | |||
2460 | return ret; | 2469 | return ret; |
2461 | } | 2470 | } |
2462 | 2471 | ||
2463 | static noinline void flush_write_bio(void *data) | 2472 | static void flush_epd_write_bio(struct extent_page_data *epd) |
2464 | { | 2473 | { |
2465 | struct extent_page_data *epd = data; | ||
2466 | if (epd->bio) { | 2474 | if (epd->bio) { |
2467 | submit_one_bio(WRITE, epd->bio, 0, 0); | 2475 | if (epd->sync_io) |
2476 | submit_one_bio(WRITE_SYNC, epd->bio, 0, 0); | ||
2477 | else | ||
2478 | submit_one_bio(WRITE, epd->bio, 0, 0); | ||
2468 | epd->bio = NULL; | 2479 | epd->bio = NULL; |
2469 | } | 2480 | } |
2470 | } | 2481 | } |
2471 | 2482 | ||
2483 | static noinline void flush_write_bio(void *data) | ||
2484 | { | ||
2485 | struct extent_page_data *epd = data; | ||
2486 | flush_epd_write_bio(epd); | ||
2487 | } | ||
2488 | |||
2472 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | 2489 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, |
2473 | get_extent_t *get_extent, | 2490 | get_extent_t *get_extent, |
2474 | struct writeback_control *wbc) | 2491 | struct writeback_control *wbc) |
@@ -2480,6 +2497,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2480 | .tree = tree, | 2497 | .tree = tree, |
2481 | .get_extent = get_extent, | 2498 | .get_extent = get_extent, |
2482 | .extent_locked = 0, | 2499 | .extent_locked = 0, |
2500 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | ||
2483 | }; | 2501 | }; |
2484 | struct writeback_control wbc_writepages = { | 2502 | struct writeback_control wbc_writepages = { |
2485 | .bdi = wbc->bdi, | 2503 | .bdi = wbc->bdi, |
@@ -2490,13 +2508,11 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2490 | .range_end = (loff_t)-1, | 2508 | .range_end = (loff_t)-1, |
2491 | }; | 2509 | }; |
2492 | 2510 | ||
2493 | |||
2494 | ret = __extent_writepage(page, wbc, &epd); | 2511 | ret = __extent_writepage(page, wbc, &epd); |
2495 | 2512 | ||
2496 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | 2513 | extent_write_cache_pages(tree, mapping, &wbc_writepages, |
2497 | __extent_writepage, &epd, flush_write_bio); | 2514 | __extent_writepage, &epd, flush_write_bio); |
2498 | if (epd.bio) | 2515 | flush_epd_write_bio(&epd); |
2499 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2500 | return ret; | 2516 | return ret; |
2501 | } | 2517 | } |
2502 | 2518 | ||
@@ -2515,6 +2531,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2515 | .tree = tree, | 2531 | .tree = tree, |
2516 | .get_extent = get_extent, | 2532 | .get_extent = get_extent, |
2517 | .extent_locked = 1, | 2533 | .extent_locked = 1, |
2534 | .sync_io = mode == WB_SYNC_ALL, | ||
2518 | }; | 2535 | }; |
2519 | struct writeback_control wbc_writepages = { | 2536 | struct writeback_control wbc_writepages = { |
2520 | .bdi = inode->i_mapping->backing_dev_info, | 2537 | .bdi = inode->i_mapping->backing_dev_info, |
@@ -2540,8 +2557,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2540 | start += PAGE_CACHE_SIZE; | 2557 | start += PAGE_CACHE_SIZE; |
2541 | } | 2558 | } |
2542 | 2559 | ||
2543 | if (epd.bio) | 2560 | flush_epd_write_bio(&epd); |
2544 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2545 | return ret; | 2561 | return ret; |
2546 | } | 2562 | } |
2547 | 2563 | ||
@@ -2556,13 +2572,13 @@ int extent_writepages(struct extent_io_tree *tree, | |||
2556 | .tree = tree, | 2572 | .tree = tree, |
2557 | .get_extent = get_extent, | 2573 | .get_extent = get_extent, |
2558 | .extent_locked = 0, | 2574 | .extent_locked = 0, |
2575 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | ||
2559 | }; | 2576 | }; |
2560 | 2577 | ||
2561 | ret = extent_write_cache_pages(tree, mapping, wbc, | 2578 | ret = extent_write_cache_pages(tree, mapping, wbc, |
2562 | __extent_writepage, &epd, | 2579 | __extent_writepage, &epd, |
2563 | flush_write_bio); | 2580 | flush_write_bio); |
2564 | if (epd.bio) | 2581 | flush_epd_write_bio(&epd); |
2565 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2566 | return ret; | 2582 | return ret; |
2567 | } | 2583 | } |
2568 | 2584 | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 53c87b197d70..d6f0806c682f 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -489,7 +489,7 @@ again: | |||
489 | /* start IO across the range first to instantiate any delalloc | 489 | /* start IO across the range first to instantiate any delalloc |
490 | * extents | 490 | * extents |
491 | */ | 491 | */ |
492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); | 492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); |
493 | 493 | ||
494 | /* The compression code will leave pages locked but return from | 494 | /* The compression code will leave pages locked but return from |
495 | * writepage without setting the page writeback. Starting again | 495 | * writepage without setting the page writeback. Starting again |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e0913e469728..e53835b88594 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) | |||
125 | return NULL; | 125 | return NULL; |
126 | } | 126 | } |
127 | 127 | ||
128 | static void requeue_list(struct btrfs_pending_bios *pending_bios, | ||
129 | struct bio *head, struct bio *tail) | ||
130 | { | ||
131 | |||
132 | struct bio *old_head; | ||
133 | |||
134 | old_head = pending_bios->head; | ||
135 | pending_bios->head = head; | ||
136 | if (pending_bios->tail) | ||
137 | tail->bi_next = old_head; | ||
138 | else | ||
139 | pending_bios->tail = tail; | ||
140 | } | ||
141 | |||
128 | /* | 142 | /* |
129 | * we try to collect pending bios for a device so we don't get a large | 143 | * we try to collect pending bios for a device so we don't get a large |
130 | * number of procs sending bios down to the same device. This greatly | 144 | * number of procs sending bios down to the same device. This greatly |
@@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
141 | struct bio *pending; | 155 | struct bio *pending; |
142 | struct backing_dev_info *bdi; | 156 | struct backing_dev_info *bdi; |
143 | struct btrfs_fs_info *fs_info; | 157 | struct btrfs_fs_info *fs_info; |
158 | struct btrfs_pending_bios *pending_bios; | ||
144 | struct bio *tail; | 159 | struct bio *tail; |
145 | struct bio *cur; | 160 | struct bio *cur; |
146 | int again = 0; | 161 | int again = 0; |
147 | unsigned long num_run = 0; | 162 | unsigned long num_run; |
163 | unsigned long num_sync_run; | ||
148 | unsigned long limit; | 164 | unsigned long limit; |
149 | unsigned long last_waited = 0; | 165 | unsigned long last_waited = 0; |
150 | 166 | ||
@@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
153 | limit = btrfs_async_submit_limit(fs_info); | 169 | limit = btrfs_async_submit_limit(fs_info); |
154 | limit = limit * 2 / 3; | 170 | limit = limit * 2 / 3; |
155 | 171 | ||
172 | /* we want to make sure that every time we switch from the sync | ||
173 | * list to the normal list, we unplug | ||
174 | */ | ||
175 | num_sync_run = 0; | ||
176 | |||
156 | loop: | 177 | loop: |
157 | spin_lock(&device->io_lock); | 178 | spin_lock(&device->io_lock); |
179 | num_run = 0; | ||
158 | 180 | ||
159 | loop_lock: | 181 | loop_lock: |
182 | |||
160 | /* take all the bios off the list at once and process them | 183 | /* take all the bios off the list at once and process them |
161 | * later on (without the lock held). But, remember the | 184 | * later on (without the lock held). But, remember the |
162 | * tail and other pointers so the bios can be properly reinserted | 185 | * tail and other pointers so the bios can be properly reinserted |
163 | * into the list if we hit congestion | 186 | * into the list if we hit congestion |
164 | */ | 187 | */ |
165 | pending = device->pending_bios; | 188 | if (device->pending_sync_bios.head) |
166 | tail = device->pending_bio_tail; | 189 | pending_bios = &device->pending_sync_bios; |
190 | else | ||
191 | pending_bios = &device->pending_bios; | ||
192 | |||
193 | pending = pending_bios->head; | ||
194 | tail = pending_bios->tail; | ||
167 | WARN_ON(pending && !tail); | 195 | WARN_ON(pending && !tail); |
168 | device->pending_bios = NULL; | ||
169 | device->pending_bio_tail = NULL; | ||
170 | 196 | ||
171 | /* | 197 | /* |
172 | * if pending was null this time around, no bios need processing | 198 | * if pending was null this time around, no bios need processing |
@@ -176,16 +202,41 @@ loop_lock: | |||
176 | * device->running_pending is used to synchronize with the | 202 | * device->running_pending is used to synchronize with the |
177 | * schedule_bio code. | 203 | * schedule_bio code. |
178 | */ | 204 | */ |
179 | if (pending) { | 205 | if (device->pending_sync_bios.head == NULL && |
180 | again = 1; | 206 | device->pending_bios.head == NULL) { |
181 | device->running_pending = 1; | ||
182 | } else { | ||
183 | again = 0; | 207 | again = 0; |
184 | device->running_pending = 0; | 208 | device->running_pending = 0; |
209 | } else { | ||
210 | again = 1; | ||
211 | device->running_pending = 1; | ||
185 | } | 212 | } |
213 | |||
214 | pending_bios->head = NULL; | ||
215 | pending_bios->tail = NULL; | ||
216 | |||
186 | spin_unlock(&device->io_lock); | 217 | spin_unlock(&device->io_lock); |
187 | 218 | ||
219 | /* | ||
220 | * if we're doing the regular priority list, make sure we unplug | ||
221 | * for any high prio bios we've sent down | ||
222 | */ | ||
223 | if (pending_bios == &device->pending_bios && num_sync_run > 0) { | ||
224 | num_sync_run = 0; | ||
225 | blk_run_backing_dev(bdi, NULL); | ||
226 | } | ||
227 | |||
188 | while (pending) { | 228 | while (pending) { |
229 | |||
230 | rmb(); | ||
231 | if (pending_bios != &device->pending_sync_bios && | ||
232 | device->pending_sync_bios.head && | ||
233 | num_run > 16) { | ||
234 | cond_resched(); | ||
235 | spin_lock(&device->io_lock); | ||
236 | requeue_list(pending_bios, pending, tail); | ||
237 | goto loop_lock; | ||
238 | } | ||
239 | |||
189 | cur = pending; | 240 | cur = pending; |
190 | pending = pending->bi_next; | 241 | pending = pending->bi_next; |
191 | cur->bi_next = NULL; | 242 | cur->bi_next = NULL; |
@@ -196,10 +247,18 @@ loop_lock: | |||
196 | wake_up(&fs_info->async_submit_wait); | 247 | wake_up(&fs_info->async_submit_wait); |
197 | 248 | ||
198 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 249 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
199 | bio_get(cur); | ||
200 | submit_bio(cur->bi_rw, cur); | 250 | submit_bio(cur->bi_rw, cur); |
201 | bio_put(cur); | ||
202 | num_run++; | 251 | num_run++; |
252 | if (bio_sync(cur)) | ||
253 | num_sync_run++; | ||
254 | |||
255 | if (need_resched()) { | ||
256 | if (num_sync_run) { | ||
257 | blk_run_backing_dev(bdi, NULL); | ||
258 | num_sync_run = 0; | ||
259 | } | ||
260 | cond_resched(); | ||
261 | } | ||
203 | 262 | ||
204 | /* | 263 | /* |
205 | * we made progress, there is more work to do and the bdi | 264 | * we made progress, there is more work to do and the bdi |
@@ -208,7 +267,6 @@ loop_lock: | |||
208 | */ | 267 | */ |
209 | if (pending && bdi_write_congested(bdi) && num_run > 16 && | 268 | if (pending && bdi_write_congested(bdi) && num_run > 16 && |
210 | fs_info->fs_devices->open_devices > 1) { | 269 | fs_info->fs_devices->open_devices > 1) { |
211 | struct bio *old_head; | ||
212 | struct io_context *ioc; | 270 | struct io_context *ioc; |
213 | 271 | ||
214 | ioc = current->io_context; | 272 | ioc = current->io_context; |
@@ -233,17 +291,17 @@ loop_lock: | |||
233 | * against it before looping | 291 | * against it before looping |
234 | */ | 292 | */ |
235 | last_waited = ioc->last_waited; | 293 | last_waited = ioc->last_waited; |
294 | if (need_resched()) { | ||
295 | if (num_sync_run) { | ||
296 | blk_run_backing_dev(bdi, NULL); | ||
297 | num_sync_run = 0; | ||
298 | } | ||
299 | cond_resched(); | ||
300 | } | ||
236 | continue; | 301 | continue; |
237 | } | 302 | } |
238 | spin_lock(&device->io_lock); | 303 | spin_lock(&device->io_lock); |
239 | 304 | requeue_list(pending_bios, pending, tail); | |
240 | old_head = device->pending_bios; | ||
241 | device->pending_bios = pending; | ||
242 | if (device->pending_bio_tail) | ||
243 | tail->bi_next = old_head; | ||
244 | else | ||
245 | device->pending_bio_tail = tail; | ||
246 | |||
247 | device->running_pending = 1; | 305 | device->running_pending = 1; |
248 | 306 | ||
249 | spin_unlock(&device->io_lock); | 307 | spin_unlock(&device->io_lock); |
@@ -251,11 +309,18 @@ loop_lock: | |||
251 | goto done; | 309 | goto done; |
252 | } | 310 | } |
253 | } | 311 | } |
312 | |||
313 | if (num_sync_run) { | ||
314 | num_sync_run = 0; | ||
315 | blk_run_backing_dev(bdi, NULL); | ||
316 | } | ||
317 | |||
318 | cond_resched(); | ||
254 | if (again) | 319 | if (again) |
255 | goto loop; | 320 | goto loop; |
256 | 321 | ||
257 | spin_lock(&device->io_lock); | 322 | spin_lock(&device->io_lock); |
258 | if (device->pending_bios) | 323 | if (device->pending_bios.head || device->pending_sync_bios.head) |
259 | goto loop_lock; | 324 | goto loop_lock; |
260 | spin_unlock(&device->io_lock); | 325 | spin_unlock(&device->io_lock); |
261 | 326 | ||
@@ -2497,7 +2562,7 @@ again: | |||
2497 | max_errors = 1; | 2562 | max_errors = 1; |
2498 | } | 2563 | } |
2499 | } | 2564 | } |
2500 | if (multi_ret && rw == WRITE && | 2565 | if (multi_ret && (rw & (1 << BIO_RW)) && |
2501 | stripes_allocated < stripes_required) { | 2566 | stripes_allocated < stripes_required) { |
2502 | stripes_allocated = map->num_stripes; | 2567 | stripes_allocated = map->num_stripes; |
2503 | free_extent_map(em); | 2568 | free_extent_map(em); |
@@ -2762,6 +2827,7 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2762 | int rw, struct bio *bio) | 2827 | int rw, struct bio *bio) |
2763 | { | 2828 | { |
2764 | int should_queue = 1; | 2829 | int should_queue = 1; |
2830 | struct btrfs_pending_bios *pending_bios; | ||
2765 | 2831 | ||
2766 | /* don't bother with additional async steps for reads, right now */ | 2832 | /* don't bother with additional async steps for reads, right now */ |
2767 | if (!(rw & (1 << BIO_RW))) { | 2833 | if (!(rw & (1 << BIO_RW))) { |
@@ -2783,13 +2849,17 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2783 | bio->bi_rw |= rw; | 2849 | bio->bi_rw |= rw; |
2784 | 2850 | ||
2785 | spin_lock(&device->io_lock); | 2851 | spin_lock(&device->io_lock); |
2852 | if (bio_sync(bio)) | ||
2853 | pending_bios = &device->pending_sync_bios; | ||
2854 | else | ||
2855 | pending_bios = &device->pending_bios; | ||
2786 | 2856 | ||
2787 | if (device->pending_bio_tail) | 2857 | if (pending_bios->tail) |
2788 | device->pending_bio_tail->bi_next = bio; | 2858 | pending_bios->tail->bi_next = bio; |
2789 | 2859 | ||
2790 | device->pending_bio_tail = bio; | 2860 | pending_bios->tail = bio; |
2791 | if (!device->pending_bios) | 2861 | if (!pending_bios->head) |
2792 | device->pending_bios = bio; | 2862 | pending_bios->head = bio; |
2793 | if (device->running_pending) | 2863 | if (device->running_pending) |
2794 | should_queue = 0; | 2864 | should_queue = 0; |
2795 | 2865 | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2185de72ff7d..5836327ba5dd 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -23,13 +23,22 @@ | |||
23 | #include "async-thread.h" | 23 | #include "async-thread.h" |
24 | 24 | ||
25 | struct buffer_head; | 25 | struct buffer_head; |
26 | struct btrfs_pending_bios { | ||
27 | struct bio *head; | ||
28 | struct bio *tail; | ||
29 | }; | ||
30 | |||
26 | struct btrfs_device { | 31 | struct btrfs_device { |
27 | struct list_head dev_list; | 32 | struct list_head dev_list; |
28 | struct list_head dev_alloc_list; | 33 | struct list_head dev_alloc_list; |
29 | struct btrfs_fs_devices *fs_devices; | 34 | struct btrfs_fs_devices *fs_devices; |
30 | struct btrfs_root *dev_root; | 35 | struct btrfs_root *dev_root; |
31 | struct bio *pending_bios; | 36 | |
32 | struct bio *pending_bio_tail; | 37 | /* regular prio bios */ |
38 | struct btrfs_pending_bios pending_bios; | ||
39 | /* WRITE_SYNC bios */ | ||
40 | struct btrfs_pending_bios pending_sync_bios; | ||
41 | |||
33 | int running_pending; | 42 | int running_pending; |
34 | u64 generation; | 43 | u64 generation; |
35 | 44 | ||