diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 167 |
1 files changed, 69 insertions, 98 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index eb2bee8b7fbf..fe9eb990e443 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -17,12 +17,6 @@ | |||
17 | #include "ctree.h" | 17 | #include "ctree.h" |
18 | #include "btrfs_inode.h" | 18 | #include "btrfs_inode.h" |
19 | 19 | ||
20 | /* temporary define until extent_map moves out of btrfs */ | ||
21 | struct kmem_cache *btrfs_cache_create(const char *name, size_t size, | ||
22 | unsigned long extra_flags, | ||
23 | void (*ctor)(void *, struct kmem_cache *, | ||
24 | unsigned long)); | ||
25 | |||
26 | static struct kmem_cache *extent_state_cache; | 20 | static struct kmem_cache *extent_state_cache; |
27 | static struct kmem_cache *extent_buffer_cache; | 21 | static struct kmem_cache *extent_buffer_cache; |
28 | 22 | ||
@@ -50,20 +44,23 @@ struct extent_page_data { | |||
50 | /* tells writepage not to lock the state bits for this range | 44 | /* tells writepage not to lock the state bits for this range |
51 | * it still does the unlocking | 45 | * it still does the unlocking |
52 | */ | 46 | */ |
53 | int extent_locked; | 47 | unsigned int extent_locked:1; |
48 | |||
49 | /* tells the submit_bio code to use a WRITE_SYNC */ | ||
50 | unsigned int sync_io:1; | ||
54 | }; | 51 | }; |
55 | 52 | ||
56 | int __init extent_io_init(void) | 53 | int __init extent_io_init(void) |
57 | { | 54 | { |
58 | extent_state_cache = btrfs_cache_create("extent_state", | 55 | extent_state_cache = kmem_cache_create("extent_state", |
59 | sizeof(struct extent_state), 0, | 56 | sizeof(struct extent_state), 0, |
60 | NULL); | 57 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
61 | if (!extent_state_cache) | 58 | if (!extent_state_cache) |
62 | return -ENOMEM; | 59 | return -ENOMEM; |
63 | 60 | ||
64 | extent_buffer_cache = btrfs_cache_create("extent_buffers", | 61 | extent_buffer_cache = kmem_cache_create("extent_buffers", |
65 | sizeof(struct extent_buffer), 0, | 62 | sizeof(struct extent_buffer), 0, |
66 | NULL); | 63 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
67 | if (!extent_buffer_cache) | 64 | if (!extent_buffer_cache) |
68 | goto free_state_cache; | 65 | goto free_state_cache; |
69 | return 0; | 66 | return 0; |
@@ -1404,69 +1401,6 @@ out: | |||
1404 | return total_bytes; | 1401 | return total_bytes; |
1405 | } | 1402 | } |
1406 | 1403 | ||
1407 | #if 0 | ||
1408 | /* | ||
1409 | * helper function to lock both pages and extents in the tree. | ||
1410 | * pages must be locked first. | ||
1411 | */ | ||
1412 | static int lock_range(struct extent_io_tree *tree, u64 start, u64 end) | ||
1413 | { | ||
1414 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
1415 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
1416 | struct page *page; | ||
1417 | int err; | ||
1418 | |||
1419 | while (index <= end_index) { | ||
1420 | page = grab_cache_page(tree->mapping, index); | ||
1421 | if (!page) { | ||
1422 | err = -ENOMEM; | ||
1423 | goto failed; | ||
1424 | } | ||
1425 | if (IS_ERR(page)) { | ||
1426 | err = PTR_ERR(page); | ||
1427 | goto failed; | ||
1428 | } | ||
1429 | index++; | ||
1430 | } | ||
1431 | lock_extent(tree, start, end, GFP_NOFS); | ||
1432 | return 0; | ||
1433 | |||
1434 | failed: | ||
1435 | /* | ||
1436 | * we failed above in getting the page at 'index', so we undo here | ||
1437 | * up to but not including the page at 'index' | ||
1438 | */ | ||
1439 | end_index = index; | ||
1440 | index = start >> PAGE_CACHE_SHIFT; | ||
1441 | while (index < end_index) { | ||
1442 | page = find_get_page(tree->mapping, index); | ||
1443 | unlock_page(page); | ||
1444 | page_cache_release(page); | ||
1445 | index++; | ||
1446 | } | ||
1447 | return err; | ||
1448 | } | ||
1449 | |||
1450 | /* | ||
1451 | * helper function to unlock both pages and extents in the tree. | ||
1452 | */ | ||
1453 | static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) | ||
1454 | { | ||
1455 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
1456 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
1457 | struct page *page; | ||
1458 | |||
1459 | while (index <= end_index) { | ||
1460 | page = find_get_page(tree->mapping, index); | ||
1461 | unlock_page(page); | ||
1462 | page_cache_release(page); | ||
1463 | index++; | ||
1464 | } | ||
1465 | unlock_extent(tree, start, end, GFP_NOFS); | ||
1466 | return 0; | ||
1467 | } | ||
1468 | #endif | ||
1469 | |||
1470 | /* | 1404 | /* |
1471 | * set the private field for a given byte offset in the tree. If there isn't | 1405 | * set the private field for a given byte offset in the tree. If there isn't |
1472 | * an extent_state there already, this does nothing. | 1406 | * an extent_state there already, this does nothing. |
@@ -2101,6 +2035,16 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
2101 | return ret; | 2035 | return ret; |
2102 | } | 2036 | } |
2103 | 2037 | ||
2038 | static noinline void update_nr_written(struct page *page, | ||
2039 | struct writeback_control *wbc, | ||
2040 | unsigned long nr_written) | ||
2041 | { | ||
2042 | wbc->nr_to_write -= nr_written; | ||
2043 | if (wbc->range_cyclic || (wbc->nr_to_write > 0 && | ||
2044 | wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) | ||
2045 | page->mapping->writeback_index = page->index + nr_written; | ||
2046 | } | ||
2047 | |||
2104 | /* | 2048 | /* |
2105 | * the writepage semantics are similar to regular writepage. extent | 2049 | * the writepage semantics are similar to regular writepage. extent |
2106 | * records are inserted to lock ranges in the tree, and as dirty areas | 2050 | * records are inserted to lock ranges in the tree, and as dirty areas |
@@ -2136,8 +2080,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2136 | u64 delalloc_end; | 2080 | u64 delalloc_end; |
2137 | int page_started; | 2081 | int page_started; |
2138 | int compressed; | 2082 | int compressed; |
2083 | int write_flags; | ||
2139 | unsigned long nr_written = 0; | 2084 | unsigned long nr_written = 0; |
2140 | 2085 | ||
2086 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2087 | write_flags = WRITE_SYNC_PLUG; | ||
2088 | else | ||
2089 | write_flags = WRITE; | ||
2090 | |||
2141 | WARN_ON(!PageLocked(page)); | 2091 | WARN_ON(!PageLocked(page)); |
2142 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2092 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
2143 | if (page->index > end_index || | 2093 | if (page->index > end_index || |
@@ -2164,6 +2114,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2164 | delalloc_end = 0; | 2114 | delalloc_end = 0; |
2165 | page_started = 0; | 2115 | page_started = 0; |
2166 | if (!epd->extent_locked) { | 2116 | if (!epd->extent_locked) { |
2117 | /* | ||
2118 | * make sure the wbc mapping index is at least updated | ||
2119 | * to this page. | ||
2120 | */ | ||
2121 | update_nr_written(page, wbc, 0); | ||
2122 | |||
2167 | while (delalloc_end < page_end) { | 2123 | while (delalloc_end < page_end) { |
2168 | nr_delalloc = find_lock_delalloc_range(inode, tree, | 2124 | nr_delalloc = find_lock_delalloc_range(inode, tree, |
2169 | page, | 2125 | page, |
@@ -2185,7 +2141,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2185 | */ | 2141 | */ |
2186 | if (page_started) { | 2142 | if (page_started) { |
2187 | ret = 0; | 2143 | ret = 0; |
2188 | goto update_nr_written; | 2144 | /* |
2145 | * we've unlocked the page, so we can't update | ||
2146 | * the mapping's writeback index, just update | ||
2147 | * nr_to_write. | ||
2148 | */ | ||
2149 | wbc->nr_to_write -= nr_written; | ||
2150 | goto done_unlocked; | ||
2189 | } | 2151 | } |
2190 | } | 2152 | } |
2191 | lock_extent(tree, start, page_end, GFP_NOFS); | 2153 | lock_extent(tree, start, page_end, GFP_NOFS); |
@@ -2198,13 +2160,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2198 | if (ret == -EAGAIN) { | 2160 | if (ret == -EAGAIN) { |
2199 | unlock_extent(tree, start, page_end, GFP_NOFS); | 2161 | unlock_extent(tree, start, page_end, GFP_NOFS); |
2200 | redirty_page_for_writepage(wbc, page); | 2162 | redirty_page_for_writepage(wbc, page); |
2163 | update_nr_written(page, wbc, nr_written); | ||
2201 | unlock_page(page); | 2164 | unlock_page(page); |
2202 | ret = 0; | 2165 | ret = 0; |
2203 | goto update_nr_written; | 2166 | goto done_unlocked; |
2204 | } | 2167 | } |
2205 | } | 2168 | } |
2206 | 2169 | ||
2207 | nr_written++; | 2170 | /* |
2171 | * we don't want to touch the inode after unlocking the page, | ||
2172 | * so we update the mapping writeback index now | ||
2173 | */ | ||
2174 | update_nr_written(page, wbc, nr_written + 1); | ||
2208 | 2175 | ||
2209 | end = page_end; | 2176 | end = page_end; |
2210 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) | 2177 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) |
@@ -2314,9 +2281,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2314 | (unsigned long long)end); | 2281 | (unsigned long long)end); |
2315 | } | 2282 | } |
2316 | 2283 | ||
2317 | ret = submit_extent_page(WRITE, tree, page, sector, | 2284 | ret = submit_extent_page(write_flags, tree, page, |
2318 | iosize, pg_offset, bdev, | 2285 | sector, iosize, pg_offset, |
2319 | &epd->bio, max_nr, | 2286 | bdev, &epd->bio, max_nr, |
2320 | end_bio_extent_writepage, | 2287 | end_bio_extent_writepage, |
2321 | 0, 0, 0); | 2288 | 0, 0, 0); |
2322 | if (ret) | 2289 | if (ret) |
@@ -2336,11 +2303,8 @@ done: | |||
2336 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | 2303 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); |
2337 | unlock_page(page); | 2304 | unlock_page(page); |
2338 | 2305 | ||
2339 | update_nr_written: | 2306 | done_unlocked: |
2340 | wbc->nr_to_write -= nr_written; | 2307 | |
2341 | if (wbc->range_cyclic || (wbc->nr_to_write > 0 && | ||
2342 | wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) | ||
2343 | page->mapping->writeback_index = page->index + nr_written; | ||
2344 | return 0; | 2308 | return 0; |
2345 | } | 2309 | } |
2346 | 2310 | ||
@@ -2460,15 +2424,23 @@ retry: | |||
2460 | return ret; | 2424 | return ret; |
2461 | } | 2425 | } |
2462 | 2426 | ||
2463 | static noinline void flush_write_bio(void *data) | 2427 | static void flush_epd_write_bio(struct extent_page_data *epd) |
2464 | { | 2428 | { |
2465 | struct extent_page_data *epd = data; | ||
2466 | if (epd->bio) { | 2429 | if (epd->bio) { |
2467 | submit_one_bio(WRITE, epd->bio, 0, 0); | 2430 | if (epd->sync_io) |
2431 | submit_one_bio(WRITE_SYNC, epd->bio, 0, 0); | ||
2432 | else | ||
2433 | submit_one_bio(WRITE, epd->bio, 0, 0); | ||
2468 | epd->bio = NULL; | 2434 | epd->bio = NULL; |
2469 | } | 2435 | } |
2470 | } | 2436 | } |
2471 | 2437 | ||
2438 | static noinline void flush_write_bio(void *data) | ||
2439 | { | ||
2440 | struct extent_page_data *epd = data; | ||
2441 | flush_epd_write_bio(epd); | ||
2442 | } | ||
2443 | |||
2472 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | 2444 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, |
2473 | get_extent_t *get_extent, | 2445 | get_extent_t *get_extent, |
2474 | struct writeback_control *wbc) | 2446 | struct writeback_control *wbc) |
@@ -2480,23 +2452,22 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2480 | .tree = tree, | 2452 | .tree = tree, |
2481 | .get_extent = get_extent, | 2453 | .get_extent = get_extent, |
2482 | .extent_locked = 0, | 2454 | .extent_locked = 0, |
2455 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | ||
2483 | }; | 2456 | }; |
2484 | struct writeback_control wbc_writepages = { | 2457 | struct writeback_control wbc_writepages = { |
2485 | .bdi = wbc->bdi, | 2458 | .bdi = wbc->bdi, |
2486 | .sync_mode = WB_SYNC_NONE, | 2459 | .sync_mode = wbc->sync_mode, |
2487 | .older_than_this = NULL, | 2460 | .older_than_this = NULL, |
2488 | .nr_to_write = 64, | 2461 | .nr_to_write = 64, |
2489 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, | 2462 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, |
2490 | .range_end = (loff_t)-1, | 2463 | .range_end = (loff_t)-1, |
2491 | }; | 2464 | }; |
2492 | 2465 | ||
2493 | |||
2494 | ret = __extent_writepage(page, wbc, &epd); | 2466 | ret = __extent_writepage(page, wbc, &epd); |
2495 | 2467 | ||
2496 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | 2468 | extent_write_cache_pages(tree, mapping, &wbc_writepages, |
2497 | __extent_writepage, &epd, flush_write_bio); | 2469 | __extent_writepage, &epd, flush_write_bio); |
2498 | if (epd.bio) | 2470 | flush_epd_write_bio(&epd); |
2499 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2500 | return ret; | 2471 | return ret; |
2501 | } | 2472 | } |
2502 | 2473 | ||
@@ -2515,6 +2486,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2515 | .tree = tree, | 2486 | .tree = tree, |
2516 | .get_extent = get_extent, | 2487 | .get_extent = get_extent, |
2517 | .extent_locked = 1, | 2488 | .extent_locked = 1, |
2489 | .sync_io = mode == WB_SYNC_ALL, | ||
2518 | }; | 2490 | }; |
2519 | struct writeback_control wbc_writepages = { | 2491 | struct writeback_control wbc_writepages = { |
2520 | .bdi = inode->i_mapping->backing_dev_info, | 2492 | .bdi = inode->i_mapping->backing_dev_info, |
@@ -2540,8 +2512,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2540 | start += PAGE_CACHE_SIZE; | 2512 | start += PAGE_CACHE_SIZE; |
2541 | } | 2513 | } |
2542 | 2514 | ||
2543 | if (epd.bio) | 2515 | flush_epd_write_bio(&epd); |
2544 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2545 | return ret; | 2516 | return ret; |
2546 | } | 2517 | } |
2547 | 2518 | ||
@@ -2556,13 +2527,13 @@ int extent_writepages(struct extent_io_tree *tree, | |||
2556 | .tree = tree, | 2527 | .tree = tree, |
2557 | .get_extent = get_extent, | 2528 | .get_extent = get_extent, |
2558 | .extent_locked = 0, | 2529 | .extent_locked = 0, |
2530 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | ||
2559 | }; | 2531 | }; |
2560 | 2532 | ||
2561 | ret = extent_write_cache_pages(tree, mapping, wbc, | 2533 | ret = extent_write_cache_pages(tree, mapping, wbc, |
2562 | __extent_writepage, &epd, | 2534 | __extent_writepage, &epd, |
2563 | flush_write_bio); | 2535 | flush_write_bio); |
2564 | if (epd.bio) | 2536 | flush_epd_write_bio(&epd); |
2565 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2566 | return ret; | 2537 | return ret; |
2567 | } | 2538 | } |
2568 | 2539 | ||