aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-11-06 22:02:51 -0500
committerChris Mason <chris.mason@oracle.com>2008-11-06 22:02:51 -0500
commit771ed689d2cd53439e28e095bc38fbe40a71429e (patch)
tree518801f7141928e398d40c2b5955720d4346ce1a /fs/btrfs/extent_io.c
parent4a69a41009c4ac691f7d9c289f5f37fabeddce46 (diff)
Btrfs: Optimize compressed writeback and reads
When reading compressed extents, try to put pages into the page cache for any pages covered by the compressed extent that readpages didn't already preload. Add an async work queue to handle transformations at delayed allocation processing time. Right now this is just compression. The workflow is: 1) Find offsets in the file marked for delayed allocation 2) Lock the pages 3) Lock the state bits 4) Call the async delalloc code The async delalloc code clears the state lock bits and delalloc bits. It is important this happens before the range goes into the work queue because otherwise it might deadlock with other work queue items that try to lock those extent bits. The file pages are compressed, and if the compression doesn't work the pages are written back directly. An ordered work queue is used to make sure the inodes are written in the same order that pdflush or writepages sent them down. This changes extent_write_cache_pages to let the writepage function update the wbc nr_written count. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c140
1 files changed, 117 insertions, 23 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 9b37ce6e5168..bbe3bcfcf4ae 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -47,6 +47,11 @@ struct extent_page_data {
47 struct bio *bio; 47 struct bio *bio;
48 struct extent_io_tree *tree; 48 struct extent_io_tree *tree;
49 get_extent_t *get_extent; 49 get_extent_t *get_extent;
50
51 /* tells writepage not to lock the state bits for this range
52 * it still does the unlocking
53 */
54 int extent_locked;
50}; 55};
51 56
52int __init extent_io_init(void) 57int __init extent_io_init(void)
@@ -1198,11 +1203,18 @@ static noinline int lock_delalloc_pages(struct inode *inode,
1198 * the caller is taking responsibility for 1203 * the caller is taking responsibility for
1199 * locked_page 1204 * locked_page
1200 */ 1205 */
1201 if (pages[i] != locked_page) 1206 if (pages[i] != locked_page) {
1202 lock_page(pages[i]); 1207 lock_page(pages[i]);
1208 if (pages[i]->mapping != inode->i_mapping) {
1209 ret = -EAGAIN;
1210 unlock_page(pages[i]);
1211 page_cache_release(pages[i]);
1212 goto done;
1213 }
1214 }
1203 page_cache_release(pages[i]); 1215 page_cache_release(pages[i]);
1216 pages_locked++;
1204 } 1217 }
1205 pages_locked += ret;
1206 nrpages -= ret; 1218 nrpages -= ret;
1207 index += ret; 1219 index += ret;
1208 cond_resched(); 1220 cond_resched();
@@ -1262,8 +1274,7 @@ again:
1262 * if we're looping. 1274 * if we're looping.
1263 */ 1275 */
1264 if (delalloc_end + 1 - delalloc_start > max_bytes && loops) { 1276 if (delalloc_end + 1 - delalloc_start > max_bytes && loops) {
1265 delalloc_end = (delalloc_start + PAGE_CACHE_SIZE - 1) & 1277 delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
1266 ~((u64)PAGE_CACHE_SIZE - 1);
1267 } 1278 }
1268 /* step two, lock all the pages after the page that has start */ 1279 /* step two, lock all the pages after the page that has start */
1269 ret = lock_delalloc_pages(inode, locked_page, 1280 ret = lock_delalloc_pages(inode, locked_page,
@@ -1306,7 +1317,10 @@ out_failed:
1306int extent_clear_unlock_delalloc(struct inode *inode, 1317int extent_clear_unlock_delalloc(struct inode *inode,
1307 struct extent_io_tree *tree, 1318 struct extent_io_tree *tree,
1308 u64 start, u64 end, struct page *locked_page, 1319 u64 start, u64 end, struct page *locked_page,
1309 int clear_dirty, int set_writeback, 1320 int unlock_pages,
1321 int clear_unlock,
1322 int clear_delalloc, int clear_dirty,
1323 int set_writeback,
1310 int end_writeback) 1324 int end_writeback)
1311{ 1325{
1312 int ret; 1326 int ret;
@@ -1315,12 +1329,19 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1315 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1329 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1316 unsigned long nr_pages = end_index - index + 1; 1330 unsigned long nr_pages = end_index - index + 1;
1317 int i; 1331 int i;
1318 int clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC; 1332 int clear_bits = 0;
1319 1333
1334 if (clear_unlock)
1335 clear_bits |= EXTENT_LOCKED;
1320 if (clear_dirty) 1336 if (clear_dirty)
1321 clear_bits |= EXTENT_DIRTY; 1337 clear_bits |= EXTENT_DIRTY;
1322 1338
1339 if (clear_delalloc)
1340 clear_bits |= EXTENT_DELALLOC;
1341
1323 clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); 1342 clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS);
1343 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback))
1344 return 0;
1324 1345
1325 while(nr_pages > 0) { 1346 while(nr_pages > 0) {
1326 ret = find_get_pages_contig(inode->i_mapping, index, 1347 ret = find_get_pages_contig(inode->i_mapping, index,
@@ -1336,7 +1357,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1336 set_page_writeback(pages[i]); 1357 set_page_writeback(pages[i]);
1337 if (end_writeback) 1358 if (end_writeback)
1338 end_page_writeback(pages[i]); 1359 end_page_writeback(pages[i]);
1339 unlock_page(pages[i]); 1360 if (unlock_pages)
1361 unlock_page(pages[i]);
1340 page_cache_release(pages[i]); 1362 page_cache_release(pages[i]);
1341 } 1363 }
1342 nr_pages -= ret; 1364 nr_pages -= ret;
@@ -1741,9 +1763,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1741 } 1763 }
1742 } 1764 }
1743 1765
1744 if (uptodate) 1766 if (uptodate) {
1745 set_extent_uptodate(tree, start, end, 1767 set_extent_uptodate(tree, start, end,
1746 GFP_ATOMIC); 1768 GFP_ATOMIC);
1769 }
1747 unlock_extent(tree, start, end, GFP_ATOMIC); 1770 unlock_extent(tree, start, end, GFP_ATOMIC);
1748 1771
1749 if (whole_page) { 1772 if (whole_page) {
@@ -1925,6 +1948,7 @@ void set_page_extent_mapped(struct page *page)
1925 set_page_private(page, EXTENT_PAGE_PRIVATE); 1948 set_page_private(page, EXTENT_PAGE_PRIVATE);
1926 } 1949 }
1927} 1950}
1951EXPORT_SYMBOL(set_page_extent_mapped);
1928 1952
1929void set_page_extent_head(struct page *page, unsigned long len) 1953void set_page_extent_head(struct page *page, unsigned long len)
1930{ 1954{
@@ -2143,12 +2167,17 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2143 u64 delalloc_end; 2167 u64 delalloc_end;
2144 int page_started; 2168 int page_started;
2145 int compressed; 2169 int compressed;
2170 unsigned long nr_written = 0;
2146 2171
2147 WARN_ON(!PageLocked(page)); 2172 WARN_ON(!PageLocked(page));
2148 pg_offset = i_size & (PAGE_CACHE_SIZE - 1); 2173 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
2149 if (page->index > end_index || 2174 if (page->index > end_index ||
2150 (page->index == end_index && !pg_offset)) { 2175 (page->index == end_index && !pg_offset)) {
2151 page->mapping->a_ops->invalidatepage(page, 0); 2176 if (epd->extent_locked) {
2177 if (tree->ops && tree->ops->writepage_end_io_hook)
2178 tree->ops->writepage_end_io_hook(page, start,
2179 page_end, NULL, 1);
2180 }
2152 unlock_page(page); 2181 unlock_page(page);
2153 return 0; 2182 return 0;
2154 } 2183 }
@@ -2169,27 +2198,33 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2169 delalloc_start = start; 2198 delalloc_start = start;
2170 delalloc_end = 0; 2199 delalloc_end = 0;
2171 page_started = 0; 2200 page_started = 0;
2172 while(delalloc_end < page_end) { 2201 if (!epd->extent_locked) {
2173 nr_delalloc = find_lock_delalloc_range(inode, tree, 2202 while(delalloc_end < page_end) {
2203 nr_delalloc = find_lock_delalloc_range(inode, tree,
2174 page, 2204 page,
2175 &delalloc_start, 2205 &delalloc_start,
2176 &delalloc_end, 2206 &delalloc_end,
2177 128 * 1024 * 1024); 2207 128 * 1024 * 1024);
2178 if (nr_delalloc == 0) { 2208 if (nr_delalloc == 0) {
2209 delalloc_start = delalloc_end + 1;
2210 continue;
2211 }
2212 tree->ops->fill_delalloc(inode, page, delalloc_start,
2213 delalloc_end, &page_started,
2214 &nr_written);
2179 delalloc_start = delalloc_end + 1; 2215 delalloc_start = delalloc_end + 1;
2180 continue;
2181 } 2216 }
2182 tree->ops->fill_delalloc(inode, page, delalloc_start,
2183 delalloc_end, &page_started);
2184 delalloc_start = delalloc_end + 1;
2185 }
2186 2217
2187 /* did the fill delalloc function already unlock and start the IO? */ 2218 /* did the fill delalloc function already unlock and start
2188 if (page_started) { 2219 * the IO?
2189 return 0; 2220 */
2221 if (page_started) {
2222 ret = 0;
2223 goto update_nr_written;
2224 }
2190 } 2225 }
2191
2192 lock_extent(tree, start, page_end, GFP_NOFS); 2226 lock_extent(tree, start, page_end, GFP_NOFS);
2227
2193 unlock_start = start; 2228 unlock_start = start;
2194 2229
2195 if (tree->ops && tree->ops->writepage_start_hook) { 2230 if (tree->ops && tree->ops->writepage_start_hook) {
@@ -2199,10 +2234,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2199 unlock_extent(tree, start, page_end, GFP_NOFS); 2234 unlock_extent(tree, start, page_end, GFP_NOFS);
2200 redirty_page_for_writepage(wbc, page); 2235 redirty_page_for_writepage(wbc, page);
2201 unlock_page(page); 2236 unlock_page(page);
2202 return 0; 2237 ret = 0;
2238 goto update_nr_written;
2203 } 2239 }
2204 } 2240 }
2205 2241
2242 nr_written++;
2243
2206 end = page_end; 2244 end = page_end;
2207 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 2245 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
2208 printk("found delalloc bits after lock_extent\n"); 2246 printk("found delalloc bits after lock_extent\n");
@@ -2333,6 +2371,12 @@ done:
2333 if (unlock_start <= page_end) 2371 if (unlock_start <= page_end)
2334 unlock_extent(tree, unlock_start, page_end, GFP_NOFS); 2372 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2335 unlock_page(page); 2373 unlock_page(page);
2374
2375update_nr_written:
2376 wbc->nr_to_write -= nr_written;
2377 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
2378 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
2379 page->mapping->writeback_index = page->index + nr_written;
2336 return 0; 2380 return 0;
2337} 2381}
2338 2382
@@ -2431,7 +2475,7 @@ retry:
2431 unlock_page(page); 2475 unlock_page(page);
2432 ret = 0; 2476 ret = 0;
2433 } 2477 }
2434 if (ret || (--(wbc->nr_to_write) <= 0)) 2478 if (ret || wbc->nr_to_write <= 0)
2435 done = 1; 2479 done = 1;
2436 if (wbc->nonblocking && bdi_write_congested(bdi)) { 2480 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2437 wbc->encountered_congestion = 1; 2481 wbc->encountered_congestion = 1;
@@ -2452,6 +2496,8 @@ retry:
2452 } 2496 }
2453 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 2497 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2454 mapping->writeback_index = index; 2498 mapping->writeback_index = index;
2499 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2500 range_whole = 1;
2455 2501
2456 if (wbc->range_cont) 2502 if (wbc->range_cont)
2457 wbc->range_start = index << PAGE_CACHE_SHIFT; 2503 wbc->range_start = index << PAGE_CACHE_SHIFT;
@@ -2469,6 +2515,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2469 .bio = NULL, 2515 .bio = NULL,
2470 .tree = tree, 2516 .tree = tree,
2471 .get_extent = get_extent, 2517 .get_extent = get_extent,
2518 .extent_locked = 0,
2472 }; 2519 };
2473 struct writeback_control wbc_writepages = { 2520 struct writeback_control wbc_writepages = {
2474 .bdi = wbc->bdi, 2521 .bdi = wbc->bdi,
@@ -2491,6 +2538,52 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2491} 2538}
2492EXPORT_SYMBOL(extent_write_full_page); 2539EXPORT_SYMBOL(extent_write_full_page);
2493 2540
2541int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2542 u64 start, u64 end, get_extent_t *get_extent,
2543 int mode)
2544{
2545 int ret = 0;
2546 struct address_space *mapping = inode->i_mapping;
2547 struct page *page;
2548 unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
2549 PAGE_CACHE_SHIFT;
2550
2551 struct extent_page_data epd = {
2552 .bio = NULL,
2553 .tree = tree,
2554 .get_extent = get_extent,
2555 .extent_locked = 1,
2556 };
2557 struct writeback_control wbc_writepages = {
2558 .bdi = inode->i_mapping->backing_dev_info,
2559 .sync_mode = mode,
2560 .older_than_this = NULL,
2561 .nr_to_write = nr_pages * 2,
2562 .range_start = start,
2563 .range_end = end + 1,
2564 };
2565
2566 while(start <= end) {
2567 page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
2568 if (clear_page_dirty_for_io(page))
2569 ret = __extent_writepage(page, &wbc_writepages, &epd);
2570 else {
2571 if (tree->ops && tree->ops->writepage_end_io_hook)
2572 tree->ops->writepage_end_io_hook(page, start,
2573 start + PAGE_CACHE_SIZE - 1,
2574 NULL, 1);
2575 unlock_page(page);
2576 }
2577 page_cache_release(page);
2578 start += PAGE_CACHE_SIZE;
2579 }
2580
2581 if (epd.bio)
2582 submit_one_bio(WRITE, epd.bio, 0, 0);
2583 return ret;
2584}
2585EXPORT_SYMBOL(extent_write_locked_range);
2586
2494 2587
2495int extent_writepages(struct extent_io_tree *tree, 2588int extent_writepages(struct extent_io_tree *tree,
2496 struct address_space *mapping, 2589 struct address_space *mapping,
@@ -2502,6 +2595,7 @@ int extent_writepages(struct extent_io_tree *tree,
2502 .bio = NULL, 2595 .bio = NULL,
2503 .tree = tree, 2596 .tree = tree,
2504 .get_extent = get_extent, 2597 .get_extent = get_extent,
2598 .extent_locked = 0,
2505 }; 2599 };
2506 2600
2507 ret = extent_write_cache_pages(tree, mapping, wbc, 2601 ret = extent_write_cache_pages(tree, mapping, wbc,