Btrfs: Optimize compressed writeback and reads

When reading compressed extents, try to put pages into the page cache for any pages covered by the compressed extent that readpages didn't already preload. Add an async work queue to handle transformations at delayed allocation processing time. Right now this is just compression. The workflow is: 1) Find offsets in the file marked for delayed allocation 2) Lock the pages 3) Lock the state bits 4) Call the async delalloc code The async delalloc code clears the state lock bits and delalloc bits. It is important this happens before the range goes into the work queue because otherwise it might deadlock with other work queue items that try to lock those extent bits. The file pages are compressed, and if the compression doesn't work the pages are written back directly. An ordered work queue is used to make sure the inodes are written in the same order that pdflush or writepages sent them down. This changes extent_write_cache_pages to let the writepage function update the wbc nr_written count. Signed-off-by: Chris Mason <chris.mason@oracle.com>
author: Chris Mason <chris.mason@oracle.com> 2008-11-06 22:02:51 -0500
committer: Chris Mason <chris.mason@oracle.com> 2008-11-06 22:02:51 -0500
commit: 771ed689d2cd53439e28e095bc38fbe40a71429e (patch)
tree: 518801f7141928e398d40c2b5955720d4346ce1a /fs/btrfs/compression.c
parent: 4a69a41009c4ac691f7d9c289f5f37fabeddce46 (diff)
1 files changed, 145 insertions, 5 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 354913177ba6..284f21025bcc 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -33,6 +33,7 @@
 #include <linux/writeback.h>
 #include <linux/bit_spinlock.h>
 #include <linux/version.h>
+#include <linux/pagevec.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -145,9 +146,9 @@ static void end_compressed_bio_read(struct bio *bio, int err)
        }
        /* do io completion on the original bio */
-        if (cb->errors)
+        if (cb->errors) {
                bio_io_error(cb->orig_bio);
-        else
+        } else
                bio_endio(cb->orig_bio, 0);
        /* finally free the cb struct */
@@ -333,6 +334,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
                }
                bytes_left -= PAGE_CACHE_SIZE;
                first_byte += PAGE_CACHE_SIZE;
+                cond_resched();
        }
        bio_get(bio);
@@ -346,6 +348,130 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
        return 0;
 }
+static noinline int add_ra_bio_pages(struct inode *inode,
+                                     u64 compressed_end,
+                                     struct compressed_bio *cb)
+{
+        unsigned long end_index;
+        unsigned long page_index;
+        u64 last_offset;
+        u64 isize = i_size_read(inode);
+        int ret;
+        struct page *page;
+        unsigned long nr_pages = 0;
+        struct extent_map *em;
+        struct address_space *mapping = inode->i_mapping;
+        struct pagevec pvec;
+        struct extent_map_tree *em_tree;
+        struct extent_io_tree *tree;
+        u64 end;
+        int misses = 0;
+        page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page;
+        last_offset = (page_offset(page) + PAGE_CACHE_SIZE);
+        em_tree = &BTRFS_I(inode)->extent_tree;
+        tree = &BTRFS_I(inode)->io_tree;
+        if (isize == 0)
+                return 0;
+        end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+        pagevec_init(&pvec, 0);
+        while(last_offset < compressed_end) {
+                page_index = last_offset >> PAGE_CACHE_SHIFT;
+                if (page_index > end_index)
+                        break;
+                rcu_read_lock();
+                page = radix_tree_lookup(&mapping->page_tree, page_index);
+                rcu_read_unlock();
+                if (page) {
+                        misses++;
+                        if (misses > 4)
+                                break;
+                        goto next;
+                }
+                page = alloc_page(mapping_gfp_mask(mapping) | GFP_NOFS);
+                if (!page)
+                        break;
+                page->index = page_index;
+                /*
+                 * what we want to do here is call add_to_page_cache_lru,
+                 * but that isn't exported, so we reproduce it here
+                 */
+                if (add_to_page_cache(page, mapping,
+                                      page->index, GFP_NOFS)) {
+                        page_cache_release(page);
+                        goto next;
+                }
+                /* open coding of lru_cache_add, also not exported */
+                page_cache_get(page);
+                if (!pagevec_add(&pvec, page))
+                        __pagevec_lru_add(&pvec);
+                end = last_offset + PAGE_CACHE_SIZE - 1;
+                /*
+                 * at this point, we have a locked page in the page cache
+                 * for these bytes in the file.  But, we have to make
+                 * sure they map to this compressed extent on disk.
+                 */
+                set_page_extent_mapped(page);
+                lock_extent(tree, last_offset, end, GFP_NOFS);
+                spin_lock(&em_tree->lock);
+                em = lookup_extent_mapping(em_tree, last_offset,
+                                           PAGE_CACHE_SIZE);
+                spin_unlock(&em_tree->lock);
+                if (!em || last_offset < em->start ||
+                    (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
+                    (em->block_start >> 9) != cb->orig_bio->bi_sector) {
+                        free_extent_map(em);
+                        unlock_extent(tree, last_offset, end, GFP_NOFS);
+                        unlock_page(page);
+                        page_cache_release(page);
+                        break;
+                }
+                free_extent_map(em);
+                if (page->index == end_index) {
+                        char *userpage;
+                        size_t zero_offset = isize & (PAGE_CACHE_SIZE - 1);
+                        if (zero_offset) {
+                                int zeros;
+                                zeros = PAGE_CACHE_SIZE - zero_offset;
+                                userpage = kmap_atomic(page, KM_USER0);
+                                memset(userpage + zero_offset, 0, zeros);
+                                flush_dcache_page(page);
+                                kunmap_atomic(userpage, KM_USER0);
+                        }
+                }
+                ret = bio_add_page(cb->orig_bio, page,
+                                   PAGE_CACHE_SIZE, 0);
+                if (ret == PAGE_CACHE_SIZE) {
+                        nr_pages++;
+                        page_cache_release(page);
+                } else {
+                        unlock_extent(tree, last_offset, end, GFP_NOFS);
+                        unlock_page(page);
+                        page_cache_release(page);
+                        break;
+                }
+next:
+                last_offset += PAGE_CACHE_SIZE;
+        }
+        if (pagevec_count(&pvec))
+                __pagevec_lru_add(&pvec);
+        return 0;
+}
 /*
 * for a compressed read, the bio we get passed has all the inode pages
 * in it.  We don't actually do IO on those pages but allocate new ones
@@ -373,6 +499,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        struct block_device *bdev;
        struct bio *comp_bio;
        u64 cur_disk_byte = (u64)bio->bi_sector << 9;
+        u64 em_len;
        struct extent_map *em;
        int ret;
@@ -393,6 +520,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        cb->start = em->start;
        compressed_len = em->block_len;
+        em_len = em->len;
        free_extent_map(em);
        cb->len = uncompressed_len;
@@ -411,6 +539,17 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        }
        cb->nr_pages = nr_pages;
+        add_ra_bio_pages(inode, cb->start + em_len, cb);
+        if (!btrfs_test_opt(root, NODATASUM) &&
+            !btrfs_test_flag(inode, NODATASUM)) {
+                btrfs_lookup_bio_sums(root, inode, cb->orig_bio);
+        }
+        /* include any pages we added in add_ra-bio_pages */
+        uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
+        cb->len = uncompressed_len;
        comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
        comp_bio->bi_private = cb;
        comp_bio->bi_end_io = end_compressed_bio_read;
@@ -442,9 +581,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                        comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
                                                        GFP_NOFS);
                        atomic_inc(&cb->pending_bios);
-                        bio->bi_private = cb;
+                        comp_bio->bi_private = cb;
-                        bio->bi_end_io = end_compressed_bio_write;
+                        comp_bio->bi_end_io = end_compressed_bio_read;
-                        bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
+                        bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0);
                }
                cur_disk_byte += PAGE_CACHE_SIZE;
        }
author	Chris Mason <chris.mason@oracle.com>	2008-11-06 22:02:51 -0500
committer	Chris Mason <chris.mason@oracle.com>	2008-11-06 22:02:51 -0500
commit	771ed689d2cd53439e28e095bc38fbe40a71429e (patch)
tree	518801f7141928e398d40c2b5955720d4346ce1a /fs/btrfs/compression.c
parent	4a69a41009c4ac691f7d9c289f5f37fabeddce46 (diff)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 354913177ba6..284f21025bcc 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c
@@ -33,6 +33,7 @@
33	#include <linux/writeback.h>	33	#include <linux/writeback.h>
34	#include <linux/bit_spinlock.h>	34	#include <linux/bit_spinlock.h>
35	#include <linux/version.h>	35	#include <linux/version.h>
		36	#include <linux/pagevec.h>
36	#include "ctree.h"	37	#include "ctree.h"
37	#include "disk-io.h"	38	#include "disk-io.h"
38	#include "transaction.h"	39	#include "transaction.h"
@@ -145,9 +146,9 @@ static void end_compressed_bio_read(struct bio *bio, int err)
145	}	146	}
146		147
147	/* do io completion on the original bio */	148	/* do io completion on the original bio */
148	if (cb->errors)	149	if (cb->errors) {
149	bio_io_error(cb->orig_bio);	150	bio_io_error(cb->orig_bio);
150	else	151	} else
151	bio_endio(cb->orig_bio, 0);	152	bio_endio(cb->orig_bio, 0);
152		153
153	/* finally free the cb struct */	154	/* finally free the cb struct */
@@ -333,6 +334,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
333	}	334	}
334	bytes_left -= PAGE_CACHE_SIZE;	335	bytes_left -= PAGE_CACHE_SIZE;
335	first_byte += PAGE_CACHE_SIZE;	336	first_byte += PAGE_CACHE_SIZE;
		337	cond_resched();
336	}	338	}
337	bio_get(bio);	339	bio_get(bio);
338		340
@@ -346,6 +348,130 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
346	return 0;	348	return 0;
347	}	349	}
348		350
		351	static noinline int add_ra_bio_pages(struct inode *inode,
		352	u64 compressed_end,
		353	struct compressed_bio *cb)
		354	{
		355	unsigned long end_index;
		356	unsigned long page_index;
		357	u64 last_offset;
		358	u64 isize = i_size_read(inode);
		359	int ret;
		360	struct page *page;
		361	unsigned long nr_pages = 0;
		362	struct extent_map *em;
		363	struct address_space *mapping = inode->i_mapping;
		364	struct pagevec pvec;
		365	struct extent_map_tree *em_tree;
		366	struct extent_io_tree *tree;
		367	u64 end;
		368	int misses = 0;
		369
		370	page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page;
		371	last_offset = (page_offset(page) + PAGE_CACHE_SIZE);
		372	em_tree = &BTRFS_I(inode)->extent_tree;
		373	tree = &BTRFS_I(inode)->io_tree;
		374
		375	if (isize == 0)
		376	return 0;
		377
		378	end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
		379
		380	pagevec_init(&pvec, 0);
		381	while(last_offset < compressed_end) {
		382	page_index = last_offset >> PAGE_CACHE_SHIFT;
		383
		384	if (page_index > end_index)
		385	break;
		386
		387	rcu_read_lock();
		388	page = radix_tree_lookup(&mapping->page_tree, page_index);
		389	rcu_read_unlock();
		390	if (page) {
		391	misses++;
		392	if (misses > 4)
		393	break;
		394	goto next;
		395	}
		396
		397	page = alloc_page(mapping_gfp_mask(mapping) \| GFP_NOFS);
		398	if (!page)
		399	break;
		400
		401	page->index = page_index;
		402	/*
		403	* what we want to do here is call add_to_page_cache_lru,
		404	* but that isn't exported, so we reproduce it here
		405	*/
		406	if (add_to_page_cache(page, mapping,
		407	page->index, GFP_NOFS)) {
		408	page_cache_release(page);
		409	goto next;
		410	}
		411
		412	/* open coding of lru_cache_add, also not exported */
		413	page_cache_get(page);
		414	if (!pagevec_add(&pvec, page))
		415	__pagevec_lru_add(&pvec);
		416
		417	end = last_offset + PAGE_CACHE_SIZE - 1;
		418	/*
		419	* at this point, we have a locked page in the page cache
		420	* for these bytes in the file. But, we have to make
		421	* sure they map to this compressed extent on disk.
		422	*/
		423	set_page_extent_mapped(page);
		424	lock_extent(tree, last_offset, end, GFP_NOFS);
		425	spin_lock(&em_tree->lock);
		426	em = lookup_extent_mapping(em_tree, last_offset,
		427	PAGE_CACHE_SIZE);
		428	spin_unlock(&em_tree->lock);
		429
		430	if (!em \|\| last_offset < em->start \|\|
		431	(last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) \|\|
		432	(em->block_start >> 9) != cb->orig_bio->bi_sector) {
		433	free_extent_map(em);
		434	unlock_extent(tree, last_offset, end, GFP_NOFS);
		435	unlock_page(page);
		436	page_cache_release(page);
		437	break;
		438	}
		439	free_extent_map(em);
		440
		441	if (page->index == end_index) {
		442	char *userpage;
		443	size_t zero_offset = isize & (PAGE_CACHE_SIZE - 1);
		444
		445	if (zero_offset) {
		446	int zeros;
		447	zeros = PAGE_CACHE_SIZE - zero_offset;
		448	userpage = kmap_atomic(page, KM_USER0);
		449	memset(userpage + zero_offset, 0, zeros);
		450	flush_dcache_page(page);
		451	kunmap_atomic(userpage, KM_USER0);
		452	}
		453	}
		454
		455	ret = bio_add_page(cb->orig_bio, page,
		456	PAGE_CACHE_SIZE, 0);
		457
		458	if (ret == PAGE_CACHE_SIZE) {
		459	nr_pages++;
		460	page_cache_release(page);
		461	} else {
		462	unlock_extent(tree, last_offset, end, GFP_NOFS);
		463	unlock_page(page);
		464	page_cache_release(page);
		465	break;
		466	}
		467	next:
		468	last_offset += PAGE_CACHE_SIZE;
		469	}
		470	if (pagevec_count(&pvec))
		471	__pagevec_lru_add(&pvec);
		472	return 0;
		473	}
		474
349	/*	475	/*
350	* for a compressed read, the bio we get passed has all the inode pages	476	* for a compressed read, the bio we get passed has all the inode pages
351	* in it. We don't actually do IO on those pages but allocate new ones	477	* in it. We don't actually do IO on those pages but allocate new ones
@@ -373,6 +499,7 @@ int btrfs_submit_compressed_read(struct inode inode, struct bio bio,
373	struct block_device *bdev;	499	struct block_device *bdev;
374	struct bio *comp_bio;	500	struct bio *comp_bio;
375	u64 cur_disk_byte = (u64)bio->bi_sector << 9;	501	u64 cur_disk_byte = (u64)bio->bi_sector << 9;
		502	u64 em_len;
376	struct extent_map *em;	503	struct extent_map *em;
377	int ret;	504	int ret;
378		505
@@ -393,6 +520,7 @@ int btrfs_submit_compressed_read(struct inode inode, struct bio bio,
393		520
394	cb->start = em->start;	521	cb->start = em->start;
395	compressed_len = em->block_len;	522	compressed_len = em->block_len;
		523	em_len = em->len;
396	free_extent_map(em);	524	free_extent_map(em);
397		525
398	cb->len = uncompressed_len;	526	cb->len = uncompressed_len;
@@ -411,6 +539,17 @@ int btrfs_submit_compressed_read(struct inode inode, struct bio bio,
411	}	539	}
412	cb->nr_pages = nr_pages;	540	cb->nr_pages = nr_pages;
413		541
		542	add_ra_bio_pages(inode, cb->start + em_len, cb);
		543
		544	if (!btrfs_test_opt(root, NODATASUM) &&
		545	!btrfs_test_flag(inode, NODATASUM)) {
		546	btrfs_lookup_bio_sums(root, inode, cb->orig_bio);
		547	}
		548
		549	/* include any pages we added in add_ra-bio_pages */
		550	uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
		551	cb->len = uncompressed_len;
		552
414	comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);	553	comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
415	comp_bio->bi_private = cb;	554	comp_bio->bi_private = cb;
416	comp_bio->bi_end_io = end_compressed_bio_read;	555	comp_bio->bi_end_io = end_compressed_bio_read;
@@ -442,9 +581,10 @@ int btrfs_submit_compressed_read(struct inode inode, struct bio bio,
442	comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,	581	comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
443	GFP_NOFS);	582	GFP_NOFS);
444	atomic_inc(&cb->pending_bios);	583	atomic_inc(&cb->pending_bios);
445	bio->bi_private = cb;	584	comp_bio->bi_private = cb;
446	bio->bi_end_io = end_compressed_bio_write;	585	comp_bio->bi_end_io = end_compressed_bio_read;
447	bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);	586
		587	bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0);
448	}	588	}
449	cur_disk_byte += PAGE_CACHE_SIZE;	589	cur_disk_byte += PAGE_CACHE_SIZE;
450	}	590	}