1 files changed, 330 insertions, 39 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index a11a32058b50..f745287fbf2e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -31,7 +31,7 @@
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/bit_spinlock.h>
-#include <linux/pagevec.h>
+#include <linux/slab.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -62,6 +62,9 @@ struct compressed_bio {
        /* number of bytes on disk */
        unsigned long compressed_len;
+        /* the compression algorithm for this bio */
+        int compress_type;
        /* number of compressed pages in the array */
        unsigned long nr_pages;
@@ -91,23 +94,10 @@ static inline int compressed_bio_size(struct btrfs_root *root,
 static struct bio *compressed_bio_alloc(struct block_device *bdev,
                                        u64 first_byte, gfp_t gfp_flags)
 {
-        struct bio *bio;
        int nr_vecs;
        nr_vecs = bio_get_nr_vecs(bdev);
-        bio = bio_alloc(gfp_flags, nr_vecs);
+        return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
-        if (bio == NULL && (current->flags & PF_MEMALLOC)) {
-                while (!bio && (nr_vecs /= 2))
-                        bio = bio_alloc(gfp_flags, nr_vecs);
-        }
-        if (bio) {
-                bio->bi_size = 0;
-                bio->bi_bdev = bdev;
-                bio->bi_sector = first_byte >> 9;
-        }
-        return bio;
 }
 static int check_compressed_csum(struct inode *inode,
@@ -163,7 +153,6 @@ fail:
 */
 static void end_compressed_bio_read(struct bio *bio, int err)
 {
-        struct extent_io_tree *tree;
        struct compressed_bio *cb = bio->bi_private;
        struct inode *inode;
        struct page *page;
@@ -187,12 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err)
        /* ok, we're the last bio for this extent, lets start
         * the decompression.
         */
-        tree = &BTRFS_I(inode)->io_tree;
+        ret = btrfs_decompress_biovec(cb->compress_type,
-        ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
+                                      cb->compressed_pages,
-                                        cb->start,
+                                      cb->start,
-                                        cb->orig_bio->bi_io_vec,
+                                      cb->orig_bio->bi_io_vec,
-                                        cb->orig_bio->bi_vcnt,
+                                      cb->orig_bio->bi_vcnt,
-                                        cb->compressed_len);
+                                      cb->compressed_len);
 csum_failed:
        if (ret)
                cb->errors = 1;
@@ -445,7 +434,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
        unsigned long nr_pages = 0;
        struct extent_map *em;
        struct address_space *mapping = inode->i_mapping;
-        struct pagevec pvec;
        struct extent_map_tree *em_tree;
        struct extent_io_tree *tree;
        u64 end;
@@ -461,7 +449,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
        end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
-        pagevec_init(&pvec, 0);
        while (last_offset < compressed_end) {
                page_index = last_offset >> PAGE_CACHE_SHIFT;
@@ -478,26 +465,17 @@ static noinline int add_ra_bio_pages(struct inode *inode,
                        goto next;
                }
-                page = alloc_page(mapping_gfp_mask(mapping) | GFP_NOFS);
+                page = __page_cache_alloc(mapping_gfp_mask(mapping) &
+                                                                ~__GFP_FS);
                if (!page)
                        break;
-                page->index = page_index;
+                if (add_to_page_cache_lru(page, mapping, page_index,
-                /*
+                                                                GFP_NOFS)) {
-                 * what we want to do here is call add_to_page_cache_lru,
-                 * but that isn't exported, so we reproduce it here
-                 */
-                if (add_to_page_cache(page, mapping,
-                                      page->index, GFP_NOFS)) {
                        page_cache_release(page);
                        goto next;
                }
-                /* open coding of lru_cache_add, also not exported */
-                page_cache_get(page);
-                if (!pagevec_add(&pvec, page))
-                        __pagevec_lru_add_file(&pvec);
                end = last_offset + PAGE_CACHE_SIZE - 1;
                /*
                 * at this point, we have a locked page in the page cache
@@ -551,8 +529,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 next:
                last_offset += PAGE_CACHE_SIZE;
        }
-        if (pagevec_count(&pvec))
-                __pagevec_lru_add_file(&pvec);
        return 0;
 }
@@ -616,6 +592,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        cb->len = uncompressed_len;
        cb->compressed_len = compressed_len;
+        cb->compress_type = extent_compress_type(bio_flags);
        cb->orig_bio = bio;
        nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
@@ -705,3 +682,317 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        bio_put(comp_bio);
        return 0;
 }
+static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
+static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES];
+static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
+static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
+static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
+struct btrfs_compress_op *btrfs_compress_op[] = {
+        &btrfs_zlib_compress,
+        &btrfs_lzo_compress,
+};
+int __init btrfs_init_compress(void)
+{
+        int i;
+        for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
+                INIT_LIST_HEAD(&comp_idle_workspace[i]);
+                spin_lock_init(&comp_workspace_lock[i]);
+                atomic_set(&comp_alloc_workspace[i], 0);
+                init_waitqueue_head(&comp_workspace_wait[i]);
+        }
+        return 0;
+}
+/*
+ * this finds an available workspace or allocates a new one
+ * ERR_PTR is returned if things go bad.
+ */
+static struct list_head *find_workspace(int type)
+{
+        struct list_head *workspace;
+        int cpus = num_online_cpus();
+        int idx = type - 1;
+        struct list_head *idle_workspace        = &comp_idle_workspace[idx];
+        spinlock_t *workspace_lock              = &comp_workspace_lock[idx];
+        atomic_t *alloc_workspace               = &comp_alloc_workspace[idx];
+        wait_queue_head_t *workspace_wait       = &comp_workspace_wait[idx];
+        int *num_workspace                      = &comp_num_workspace[idx];
+again:
+        spin_lock(workspace_lock);
+        if (!list_empty(idle_workspace)) {
+                workspace = idle_workspace->next;
+                list_del(workspace);
+                (*num_workspace)--;
+                spin_unlock(workspace_lock);
+                return workspace;
+        }
+        if (atomic_read(alloc_workspace) > cpus) {
+                DEFINE_WAIT(wait);
+                spin_unlock(workspace_lock);
+                prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
+                if (atomic_read(alloc_workspace) > cpus && !*num_workspace)
+                        schedule();
+                finish_wait(workspace_wait, &wait);
+                goto again;
+        }
+        atomic_inc(alloc_workspace);
+        spin_unlock(workspace_lock);
+        workspace = btrfs_compress_op[idx]->alloc_workspace();
+        if (IS_ERR(workspace)) {
+                atomic_dec(alloc_workspace);
+                wake_up(workspace_wait);
+        }
+        return workspace;
+}
+/*
+ * put a workspace struct back on the list or free it if we have enough
+ * idle ones sitting around
+ */
+static void free_workspace(int type, struct list_head *workspace)
+{
+        int idx = type - 1;
+        struct list_head *idle_workspace        = &comp_idle_workspace[idx];
+        spinlock_t *workspace_lock              = &comp_workspace_lock[idx];
+        atomic_t *alloc_workspace               = &comp_alloc_workspace[idx];
+        wait_queue_head_t *workspace_wait       = &comp_workspace_wait[idx];
+        int *num_workspace                      = &comp_num_workspace[idx];
+        spin_lock(workspace_lock);
+        if (*num_workspace < num_online_cpus()) {
+                list_add_tail(workspace, idle_workspace);
+                (*num_workspace)++;
+                spin_unlock(workspace_lock);
+                goto wake;
+        }
+        spin_unlock(workspace_lock);
+        btrfs_compress_op[idx]->free_workspace(workspace);
+        atomic_dec(alloc_workspace);
+wake:
+        if (waitqueue_active(workspace_wait))
+                wake_up(workspace_wait);
+}
+/*
+ * cleanup function for module exit
+ */
+static void free_workspaces(void)
+{
+        struct list_head *workspace;
+        int i;
+        for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
+                while (!list_empty(&comp_idle_workspace[i])) {
+                        workspace = comp_idle_workspace[i].next;
+                        list_del(workspace);
+                        btrfs_compress_op[i]->free_workspace(workspace);
+                        atomic_dec(&comp_alloc_workspace[i]);
+                }
+        }
+}
+/*
+ * given an address space and start/len, compress the bytes.
+ *
+ * pages are allocated to hold the compressed result and stored
+ * in 'pages'
+ *
+ * out_pages is used to return the number of pages allocated.  There
+ * may be pages allocated even if we return an error
+ *
+ * total_in is used to return the number of bytes actually read.  It
+ * may be smaller then len if we had to exit early because we
+ * ran out of room in the pages array or because we cross the
+ * max_out threshold.
+ *
+ * total_out is used to return the total number of compressed bytes
+ *
+ * max_out tells us the max number of bytes that we're allowed to
+ * stuff into pages
+ */
+int btrfs_compress_pages(int type, struct address_space *mapping,
+                         u64 start, unsigned long len,
+                         struct page **pages,
+                         unsigned long nr_dest_pages,
+                         unsigned long *out_pages,
+                         unsigned long *total_in,
+                         unsigned long *total_out,
+                         unsigned long max_out)
+{
+        struct list_head *workspace;
+        int ret;
+        workspace = find_workspace(type);
+        if (IS_ERR(workspace))
+                return -1;
+        ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
+                                                      start, len, pages,
+                                                      nr_dest_pages, out_pages,
+                                                      total_in, total_out,
+                                                      max_out);
+        free_workspace(type, workspace);
+        return ret;
+}
+/*
+ * pages_in is an array of pages with compressed data.
+ *
+ * disk_start is the starting logical offset of this array in the file
+ *
+ * bvec is a bio_vec of pages from the file that we want to decompress into
+ *
+ * vcnt is the count of pages in the biovec
+ *
+ * srclen is the number of bytes in pages_in
+ *
+ * The basic idea is that we have a bio that was created by readpages.
+ * The pages in the bio are for the uncompressed data, and they may not
+ * be contiguous.  They all correspond to the range of bytes covered by
+ * the compressed extent.
+ */
+int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
+                            struct bio_vec *bvec, int vcnt, size_t srclen)
+{
+        struct list_head *workspace;
+        int ret;
+        workspace = find_workspace(type);
+        if (IS_ERR(workspace))
+                return -ENOMEM;
+        ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
+                                                         disk_start,
+                                                         bvec, vcnt, srclen);
+        free_workspace(type, workspace);
+        return ret;
+}
+/*
+ * a less complex decompression routine.  Our compressed data fits in a
+ * single page, and we want to read a single page out of it.
+ * start_byte tells us the offset into the compressed data we're interested in
+ */
+int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
+                     unsigned long start_byte, size_t srclen, size_t destlen)
+{
+        struct list_head *workspace;
+        int ret;
+        workspace = find_workspace(type);
+        if (IS_ERR(workspace))
+                return -ENOMEM;
+        ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
+                                                  dest_page, start_byte,
+                                                  srclen, destlen);
+        free_workspace(type, workspace);
+        return ret;
+}
+void __exit btrfs_exit_compress(void)
+{
+        free_workspaces();
+}
+/*
+ * Copy uncompressed data from working buffer to pages.
+ *
+ * buf_start is the byte offset we're of the start of our workspace buffer.
+ *
+ * total_out is the last byte of the buffer
+ */
+int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+                              unsigned long total_out, u64 disk_start,
+                              struct bio_vec *bvec, int vcnt,
+                              unsigned long *page_index,
+                              unsigned long *pg_offset)
+{
+        unsigned long buf_offset;
+        unsigned long current_buf_start;
+        unsigned long start_byte;
+        unsigned long working_bytes = total_out - buf_start;
+        unsigned long bytes;
+        char *kaddr;
+        struct page *page_out = bvec[*page_index].bv_page;
+        /*
+         * start byte is the first byte of the page we're currently
+         * copying into relative to the start of the compressed data.
+         */
+        start_byte = page_offset(page_out) - disk_start;
+        /* we haven't yet hit data corresponding to this page */
+        if (total_out <= start_byte)
+                return 1;
+        /*
+         * the start of the data we care about is offset into
+         * the middle of our working buffer
+         */
+        if (total_out > start_byte && buf_start < start_byte) {
+                buf_offset = start_byte - buf_start;
+                working_bytes -= buf_offset;
+        } else {
+                buf_offset = 0;
+        }
+        current_buf_start = buf_start;
+        /* copy bytes from the working buffer into the pages */
+        while (working_bytes > 0) {
+                bytes = min(PAGE_CACHE_SIZE - *pg_offset,
+                            PAGE_CACHE_SIZE - buf_offset);
+                bytes = min(bytes, working_bytes);
+                kaddr = kmap_atomic(page_out, KM_USER0);
+                memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
+                kunmap_atomic(kaddr, KM_USER0);
+                flush_dcache_page(page_out);
+                *pg_offset += bytes;
+                buf_offset += bytes;
+                working_bytes -= bytes;
+                current_buf_start += bytes;
+                /* check if we need to pick another page */
+                if (*pg_offset == PAGE_CACHE_SIZE) {
+                        (*page_index)++;
+                        if (*page_index >= vcnt)
+                                return 0;
+                        page_out = bvec[*page_index].bv_page;
+                        *pg_offset = 0;
+                        start_byte = page_offset(page_out) - disk_start;
+                        /*
+                         * make sure our new page is covered by this
+                         * working buffer
+                         */
+                        if (total_out <= start_byte)
+                                return 1;
+                        /*
+                         * the next page in the biovec might not be adjacent
+                         * to the last page, but it might still be found
+                         * inside this working buffer. bump our offset pointer
+                         */
+                        if (total_out > start_byte &&
+                            current_buf_start < start_byte) {
+                                buf_offset = start_byte - buf_start;
+                                working_bytes = total_out - start_byte;
+                                current_buf_start = buf_start + buf_offset;
+                        }
+                }
+        }
+        return 1;
+}

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index a11a32058b50..f745287fbf2e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c
@@ -31,7 +31,7 @@
31	#include <linux/swap.h>	31	#include <linux/swap.h>
32	#include <linux/writeback.h>	32	#include <linux/writeback.h>
33	#include <linux/bit_spinlock.h>	33	#include <linux/bit_spinlock.h>
34	#include <linux/pagevec.h>	34	#include <linux/slab.h>
35	#include "compat.h"	35	#include "compat.h"
36	#include "ctree.h"	36	#include "ctree.h"
37	#include "disk-io.h"	37	#include "disk-io.h"
@@ -62,6 +62,9 @@ struct compressed_bio {
62	/* number of bytes on disk */	62	/* number of bytes on disk */
63	unsigned long compressed_len;	63	unsigned long compressed_len;
64		64
		65	/* the compression algorithm for this bio */
		66	int compress_type;
		67
65	/* number of compressed pages in the array */	68	/* number of compressed pages in the array */
66	unsigned long nr_pages;	69	unsigned long nr_pages;
67		70
@@ -91,23 +94,10 @@ static inline int compressed_bio_size(struct btrfs_root *root,
91	static struct bio compressed_bio_alloc(struct block_device bdev,	94	static struct bio compressed_bio_alloc(struct block_device bdev,
92	u64 first_byte, gfp_t gfp_flags)	95	u64 first_byte, gfp_t gfp_flags)
93	{	96	{
94	struct bio *bio;
95	int nr_vecs;	97	int nr_vecs;
96		98
97	nr_vecs = bio_get_nr_vecs(bdev);	99	nr_vecs = bio_get_nr_vecs(bdev);
98	bio = bio_alloc(gfp_flags, nr_vecs);	100	return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
99
100	if (bio == NULL && (current->flags & PF_MEMALLOC)) {
101	while (!bio && (nr_vecs /= 2))
102	bio = bio_alloc(gfp_flags, nr_vecs);
103	}
104
105	if (bio) {
106	bio->bi_size = 0;
107	bio->bi_bdev = bdev;
108	bio->bi_sector = first_byte >> 9;
109	}
110	return bio;
111	}	101	}
112		102
113	static int check_compressed_csum(struct inode *inode,	103	static int check_compressed_csum(struct inode *inode,
@@ -163,7 +153,6 @@ fail:
163	*/	153	*/
164	static void end_compressed_bio_read(struct bio *bio, int err)	154	static void end_compressed_bio_read(struct bio *bio, int err)
165	{	155	{
166	struct extent_io_tree *tree;
167	struct compressed_bio *cb = bio->bi_private;	156	struct compressed_bio *cb = bio->bi_private;
168	struct inode *inode;	157	struct inode *inode;
169	struct page *page;	158	struct page *page;
@@ -187,12 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err)
187	/* ok, we're the last bio for this extent, lets start	176	/* ok, we're the last bio for this extent, lets start
188	* the decompression.	177	* the decompression.
189	*/	178	*/
190	tree = &BTRFS_I(inode)->io_tree;	179	ret = btrfs_decompress_biovec(cb->compress_type,
191	ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,	180	cb->compressed_pages,
192	cb->start,	181	cb->start,
193	cb->orig_bio->bi_io_vec,	182	cb->orig_bio->bi_io_vec,
194	cb->orig_bio->bi_vcnt,	183	cb->orig_bio->bi_vcnt,
195	cb->compressed_len);	184	cb->compressed_len);
196	csum_failed:	185	csum_failed:
197	if (ret)	186	if (ret)
198	cb->errors = 1;	187	cb->errors = 1;
@@ -445,7 +434,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
445	unsigned long nr_pages = 0;	434	unsigned long nr_pages = 0;
446	struct extent_map *em;	435	struct extent_map *em;
447	struct address_space *mapping = inode->i_mapping;	436	struct address_space *mapping = inode->i_mapping;
448	struct pagevec pvec;
449	struct extent_map_tree *em_tree;	437	struct extent_map_tree *em_tree;
450	struct extent_io_tree *tree;	438	struct extent_io_tree *tree;
451	u64 end;	439	u64 end;
@@ -461,7 +449,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
461		449
462	end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;	450	end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
463		451
464	pagevec_init(&pvec, 0);
465	while (last_offset < compressed_end) {	452	while (last_offset < compressed_end) {
466	page_index = last_offset >> PAGE_CACHE_SHIFT;	453	page_index = last_offset >> PAGE_CACHE_SHIFT;
467		454
@@ -478,26 +465,17 @@ static noinline int add_ra_bio_pages(struct inode *inode,
478	goto next;	465	goto next;
479	}	466	}
480		467
481	page = alloc_page(mapping_gfp_mask(mapping) \| GFP_NOFS);	468	page = __page_cache_alloc(mapping_gfp_mask(mapping) &
		469	~__GFP_FS);
482	if (!page)	470	if (!page)
483	break;	471	break;
484		472
485	page->index = page_index;	473	if (add_to_page_cache_lru(page, mapping, page_index,
486	/*	474	GFP_NOFS)) {
487	* what we want to do here is call add_to_page_cache_lru,
488	* but that isn't exported, so we reproduce it here
489	*/
490	if (add_to_page_cache(page, mapping,
491	page->index, GFP_NOFS)) {
492	page_cache_release(page);	475	page_cache_release(page);
493	goto next;	476	goto next;
494	}	477	}
495		478
496	/* open coding of lru_cache_add, also not exported */
497	page_cache_get(page);
498	if (!pagevec_add(&pvec, page))
499	__pagevec_lru_add_file(&pvec);
500
501	end = last_offset + PAGE_CACHE_SIZE - 1;	479	end = last_offset + PAGE_CACHE_SIZE - 1;
502	/*	480	/*
503	* at this point, we have a locked page in the page cache	481	* at this point, we have a locked page in the page cache
@@ -551,8 +529,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
551	next:	529	next:
552	last_offset += PAGE_CACHE_SIZE;	530	last_offset += PAGE_CACHE_SIZE;
553	}	531	}
554	if (pagevec_count(&pvec))
555	__pagevec_lru_add_file(&pvec);
556	return 0;	532	return 0;
557	}	533	}
558		534
@@ -616,6 +592,7 @@ int btrfs_submit_compressed_read(struct inode inode, struct bio bio,
616		592
617	cb->len = uncompressed_len;	593	cb->len = uncompressed_len;
618	cb->compressed_len = compressed_len;	594	cb->compressed_len = compressed_len;
		595	cb->compress_type = extent_compress_type(bio_flags);
619	cb->orig_bio = bio;	596	cb->orig_bio = bio;
620		597
621	nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /	598	nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
@@ -705,3 +682,317 @@ int btrfs_submit_compressed_read(struct inode inode, struct bio bio,
705	bio_put(comp_bio);	682	bio_put(comp_bio);
706	return 0;	683	return 0;
707	}	684	}
		685
		686	static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
		687	static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES];
		688	static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
		689	static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
		690	static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
		691
		692	struct btrfs_compress_op *btrfs_compress_op[] = {
		693	&btrfs_zlib_compress,
		694	&btrfs_lzo_compress,
		695	};
		696
		697	int __init btrfs_init_compress(void)
		698	{
		699	int i;
		700
		701	for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
		702	INIT_LIST_HEAD(&comp_idle_workspace[i]);
		703	spin_lock_init(&comp_workspace_lock[i]);
		704	atomic_set(&comp_alloc_workspace[i], 0);
		705	init_waitqueue_head(&comp_workspace_wait[i]);
		706	}
		707	return 0;
		708	}
		709
		710	/*
		711	* this finds an available workspace or allocates a new one
		712	* ERR_PTR is returned if things go bad.
		713	*/
		714	static struct list_head *find_workspace(int type)
		715	{
		716	struct list_head *workspace;
		717	int cpus = num_online_cpus();
		718	int idx = type - 1;
		719
		720	struct list_head *idle_workspace = &comp_idle_workspace[idx];
		721	spinlock_t *workspace_lock = &comp_workspace_lock[idx];
		722	atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
		723	wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
		724	int *num_workspace = &comp_num_workspace[idx];
		725	again:
		726	spin_lock(workspace_lock);
		727	if (!list_empty(idle_workspace)) {
		728	workspace = idle_workspace->next;
		729	list_del(workspace);
		730	(*num_workspace)--;
		731	spin_unlock(workspace_lock);
		732	return workspace;
		733
		734	}
		735	if (atomic_read(alloc_workspace) > cpus) {
		736	DEFINE_WAIT(wait);
		737
		738	spin_unlock(workspace_lock);
		739	prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
		740	if (atomic_read(alloc_workspace) > cpus && !*num_workspace)
		741	schedule();
		742	finish_wait(workspace_wait, &wait);
		743	goto again;
		744	}
		745	atomic_inc(alloc_workspace);
		746	spin_unlock(workspace_lock);
		747
		748	workspace = btrfs_compress_op[idx]->alloc_workspace();
		749	if (IS_ERR(workspace)) {
		750	atomic_dec(alloc_workspace);
		751	wake_up(workspace_wait);
		752	}
		753	return workspace;
		754	}
		755
		756	/*
		757	* put a workspace struct back on the list or free it if we have enough
		758	* idle ones sitting around
		759	*/
		760	static void free_workspace(int type, struct list_head *workspace)
		761	{
		762	int idx = type - 1;
		763	struct list_head *idle_workspace = &comp_idle_workspace[idx];
		764	spinlock_t *workspace_lock = &comp_workspace_lock[idx];
		765	atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
		766	wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
		767	int *num_workspace = &comp_num_workspace[idx];
		768
		769	spin_lock(workspace_lock);
		770	if (*num_workspace < num_online_cpus()) {
		771	list_add_tail(workspace, idle_workspace);
		772	(*num_workspace)++;
		773	spin_unlock(workspace_lock);
		774	goto wake;
		775	}
		776	spin_unlock(workspace_lock);
		777
		778	btrfs_compress_op[idx]->free_workspace(workspace);
		779	atomic_dec(alloc_workspace);
		780	wake:
		781	if (waitqueue_active(workspace_wait))
		782	wake_up(workspace_wait);
		783	}
		784
		785	/*
		786	* cleanup function for module exit
		787	*/
		788	static void free_workspaces(void)
		789	{
		790	struct list_head *workspace;
		791	int i;
		792
		793	for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
		794	while (!list_empty(&comp_idle_workspace[i])) {
		795	workspace = comp_idle_workspace[i].next;
		796	list_del(workspace);
		797	btrfs_compress_op[i]->free_workspace(workspace);
		798	atomic_dec(&comp_alloc_workspace[i]);
		799	}
		800	}
		801	}
		802
		803	/*
		804	* given an address space and start/len, compress the bytes.
		805	*
		806	* pages are allocated to hold the compressed result and stored
		807	* in 'pages'
		808	*
		809	* out_pages is used to return the number of pages allocated. There
		810	* may be pages allocated even if we return an error
		811	*
		812	* total_in is used to return the number of bytes actually read. It
		813	* may be smaller then len if we had to exit early because we
		814	* ran out of room in the pages array or because we cross the
		815	* max_out threshold.
		816	*
		817	* total_out is used to return the total number of compressed bytes
		818	*
		819	* max_out tells us the max number of bytes that we're allowed to
		820	* stuff into pages
		821	*/
		822	int btrfs_compress_pages(int type, struct address_space *mapping,
		823	u64 start, unsigned long len,
		824	struct page **pages,
		825	unsigned long nr_dest_pages,
		826	unsigned long *out_pages,
		827	unsigned long *total_in,
		828	unsigned long *total_out,
		829	unsigned long max_out)
		830	{
		831	struct list_head *workspace;
		832	int ret;
		833
		834	workspace = find_workspace(type);
		835	if (IS_ERR(workspace))
		836	return -1;
		837
		838	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
		839	start, len, pages,
		840	nr_dest_pages, out_pages,
		841	total_in, total_out,
		842	max_out);
		843	free_workspace(type, workspace);
		844	return ret;
		845	}
		846
		847	/*
		848	* pages_in is an array of pages with compressed data.
		849	*
		850	* disk_start is the starting logical offset of this array in the file
		851	*
		852	* bvec is a bio_vec of pages from the file that we want to decompress into
		853	*
		854	* vcnt is the count of pages in the biovec
		855	*
		856	* srclen is the number of bytes in pages_in
		857	*
		858	* The basic idea is that we have a bio that was created by readpages.
		859	* The pages in the bio are for the uncompressed data, and they may not
		860	* be contiguous. They all correspond to the range of bytes covered by
		861	* the compressed extent.
		862	*/
		863	int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
		864	struct bio_vec *bvec, int vcnt, size_t srclen)
		865	{
		866	struct list_head *workspace;
		867	int ret;
		868
		869	workspace = find_workspace(type);
		870	if (IS_ERR(workspace))
		871	return -ENOMEM;
		872
		873	ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
		874	disk_start,
		875	bvec, vcnt, srclen);
		876	free_workspace(type, workspace);
		877	return ret;
		878	}
		879
		880	/*
		881	* a less complex decompression routine. Our compressed data fits in a
		882	* single page, and we want to read a single page out of it.
		883	* start_byte tells us the offset into the compressed data we're interested in
		884	*/
		885	int btrfs_decompress(int type, unsigned char data_in, struct page dest_page,
		886	unsigned long start_byte, size_t srclen, size_t destlen)
		887	{
		888	struct list_head *workspace;
		889	int ret;
		890
		891	workspace = find_workspace(type);
		892	if (IS_ERR(workspace))
		893	return -ENOMEM;
		894
		895	ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
		896	dest_page, start_byte,
		897	srclen, destlen);
		898
		899	free_workspace(type, workspace);
		900	return ret;
		901	}
		902
		903	void __exit btrfs_exit_compress(void)
		904	{
		905	free_workspaces();
		906	}
		907
		908	/*
		909	* Copy uncompressed data from working buffer to pages.
		910	*
		911	* buf_start is the byte offset we're of the start of our workspace buffer.
		912	*
		913	* total_out is the last byte of the buffer
		914	*/
		915	int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
		916	unsigned long total_out, u64 disk_start,
		917	struct bio_vec *bvec, int vcnt,
		918	unsigned long *page_index,
		919	unsigned long *pg_offset)
		920	{
		921	unsigned long buf_offset;
		922	unsigned long current_buf_start;
		923	unsigned long start_byte;
		924	unsigned long working_bytes = total_out - buf_start;
		925	unsigned long bytes;
		926	char *kaddr;
		927	struct page page_out = bvec[page_index].bv_page;
		928
		929	/*
		930	* start byte is the first byte of the page we're currently
		931	* copying into relative to the start of the compressed data.
		932	*/
		933	start_byte = page_offset(page_out) - disk_start;
		934
		935	/* we haven't yet hit data corresponding to this page */
		936	if (total_out <= start_byte)
		937	return 1;
		938
		939	/*
		940	* the start of the data we care about is offset into
		941	* the middle of our working buffer
		942	*/
		943	if (total_out > start_byte && buf_start < start_byte) {
		944	buf_offset = start_byte - buf_start;
		945	working_bytes -= buf_offset;
		946	} else {
		947	buf_offset = 0;
		948	}
		949	current_buf_start = buf_start;
		950
		951	/* copy bytes from the working buffer into the pages */
		952	while (working_bytes > 0) {
		953	bytes = min(PAGE_CACHE_SIZE - *pg_offset,
		954	PAGE_CACHE_SIZE - buf_offset);
		955	bytes = min(bytes, working_bytes);
		956	kaddr = kmap_atomic(page_out, KM_USER0);
		957	memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
		958	kunmap_atomic(kaddr, KM_USER0);
		959	flush_dcache_page(page_out);
		960
		961	*pg_offset += bytes;
		962	buf_offset += bytes;
		963	working_bytes -= bytes;
		964	current_buf_start += bytes;
		965
		966	/* check if we need to pick another page */
		967	if (*pg_offset == PAGE_CACHE_SIZE) {
		968	(*page_index)++;
		969	if (*page_index >= vcnt)
		970	return 0;
		971
		972	page_out = bvec[*page_index].bv_page;
		973	*pg_offset = 0;
		974	start_byte = page_offset(page_out) - disk_start;
		975
		976	/*
		977	* make sure our new page is covered by this
		978	* working buffer
		979	*/
		980	if (total_out <= start_byte)
		981	return 1;
		982
		983	/*
		984	* the next page in the biovec might not be adjacent
		985	* to the last page, but it might still be found
		986	* inside this working buffer. bump our offset pointer
		987	*/
		988	if (total_out > start_byte &&
		989	current_buf_start < start_byte) {
		990	buf_offset = start_byte - buf_start;
		991	working_bytes = total_out - start_byte;
		992	current_buf_start = buf_start + buf_offset;
		993	}
		994	}
		995	}
		996
		997	return 1;
		998	}