aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/Kconfig2
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/compression.c329
-rw-r--r--fs/btrfs/compression.h72
-rw-r--r--fs/btrfs/ctree.h15
-rw-r--r--fs/btrfs/disk-io.c8
-rw-r--r--fs/btrfs/extent_io.c5
-rw-r--r--fs/btrfs/extent_io.h17
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/extent_map.h3
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c82
-rw-r--r--fs/btrfs/ioctl.c21
-rw-r--r--fs/btrfs/ioctl.h9
-rw-r--r--fs/btrfs/lzo.c420
-rw-r--r--fs/btrfs/ordered-data.c18
-rw-r--r--fs/btrfs/ordered-data.h8
-rw-r--r--fs/btrfs/super.c50
-rw-r--r--fs/btrfs/zlib.c369
20 files changed, 1051 insertions, 385 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 7bb3c020e570..ecb9fd3be143 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,6 +4,8 @@ config BTRFS_FS
4 select LIBCRC32C 4 select LIBCRC32C
5 select ZLIB_INFLATE 5 select ZLIB_INFLATE
6 select ZLIB_DEFLATE 6 select ZLIB_DEFLATE
7 select LZO_COMPRESS
8 select LZO_DECOMPRESS
7 help 9 help
8 Btrfs is a new filesystem with extents, writable snapshotting, 10 Btrfs is a new filesystem with extents, writable snapshotting,
9 support for multiple devices and many more features. 11 support for multiple devices and many more features.
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index a35eb36b32fd..31610ea73aec 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 transaction.o inode.o file.o tree-defrag.o \ 6 transaction.o inode.o file.o tree-defrag.o \
7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 export.o tree-log.o acl.o free-space-cache.o zlib.o \ 9 export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
10 compression.o delayed-ref.o relocation.o 10 compression.o delayed-ref.o relocation.o
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 6ad63f17eca0..ccc991c542df 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -157,7 +157,7 @@ struct btrfs_inode {
157 /* 157 /*
158 * always compress this one file 158 * always compress this one file
159 */ 159 */
160 unsigned force_compress:1; 160 unsigned force_compress:4;
161 161
162 struct inode vfs_inode; 162 struct inode vfs_inode;
163}; 163};
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b50bc4bd5c56..f745287fbf2e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -62,6 +62,9 @@ struct compressed_bio {
62 /* number of bytes on disk */ 62 /* number of bytes on disk */
63 unsigned long compressed_len; 63 unsigned long compressed_len;
64 64
65 /* the compression algorithm for this bio */
66 int compress_type;
67
65 /* number of compressed pages in the array */ 68 /* number of compressed pages in the array */
66 unsigned long nr_pages; 69 unsigned long nr_pages;
67 70
@@ -173,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err)
173 /* ok, we're the last bio for this extent, lets start 176 /* ok, we're the last bio for this extent, lets start
174 * the decompression. 177 * the decompression.
175 */ 178 */
176 ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, 179 ret = btrfs_decompress_biovec(cb->compress_type,
177 cb->start, 180 cb->compressed_pages,
178 cb->orig_bio->bi_io_vec, 181 cb->start,
179 cb->orig_bio->bi_vcnt, 182 cb->orig_bio->bi_io_vec,
180 cb->compressed_len); 183 cb->orig_bio->bi_vcnt,
184 cb->compressed_len);
181csum_failed: 185csum_failed:
182 if (ret) 186 if (ret)
183 cb->errors = 1; 187 cb->errors = 1;
@@ -588,6 +592,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
588 592
589 cb->len = uncompressed_len; 593 cb->len = uncompressed_len;
590 cb->compressed_len = compressed_len; 594 cb->compressed_len = compressed_len;
595 cb->compress_type = extent_compress_type(bio_flags);
591 cb->orig_bio = bio; 596 cb->orig_bio = bio;
592 597
593 nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / 598 nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
@@ -677,3 +682,317 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
677 bio_put(comp_bio); 682 bio_put(comp_bio);
678 return 0; 683 return 0;
679} 684}
685
686static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
687static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES];
688static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
689static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
690static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
691
692struct btrfs_compress_op *btrfs_compress_op[] = {
693 &btrfs_zlib_compress,
694 &btrfs_lzo_compress,
695};
696
697int __init btrfs_init_compress(void)
698{
699 int i;
700
701 for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
702 INIT_LIST_HEAD(&comp_idle_workspace[i]);
703 spin_lock_init(&comp_workspace_lock[i]);
704 atomic_set(&comp_alloc_workspace[i], 0);
705 init_waitqueue_head(&comp_workspace_wait[i]);
706 }
707 return 0;
708}
709
710/*
711 * this finds an available workspace or allocates a new one
712 * ERR_PTR is returned if things go bad.
713 */
714static struct list_head *find_workspace(int type)
715{
716 struct list_head *workspace;
717 int cpus = num_online_cpus();
718 int idx = type - 1;
719
720 struct list_head *idle_workspace = &comp_idle_workspace[idx];
721 spinlock_t *workspace_lock = &comp_workspace_lock[idx];
722 atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
723 wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
724 int *num_workspace = &comp_num_workspace[idx];
725again:
726 spin_lock(workspace_lock);
727 if (!list_empty(idle_workspace)) {
728 workspace = idle_workspace->next;
729 list_del(workspace);
730 (*num_workspace)--;
731 spin_unlock(workspace_lock);
732 return workspace;
733
734 }
735 if (atomic_read(alloc_workspace) > cpus) {
736 DEFINE_WAIT(wait);
737
738 spin_unlock(workspace_lock);
739 prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
740 if (atomic_read(alloc_workspace) > cpus && !*num_workspace)
741 schedule();
742 finish_wait(workspace_wait, &wait);
743 goto again;
744 }
745 atomic_inc(alloc_workspace);
746 spin_unlock(workspace_lock);
747
748 workspace = btrfs_compress_op[idx]->alloc_workspace();
749 if (IS_ERR(workspace)) {
750 atomic_dec(alloc_workspace);
751 wake_up(workspace_wait);
752 }
753 return workspace;
754}
755
756/*
757 * put a workspace struct back on the list or free it if we have enough
758 * idle ones sitting around
759 */
760static void free_workspace(int type, struct list_head *workspace)
761{
762 int idx = type - 1;
763 struct list_head *idle_workspace = &comp_idle_workspace[idx];
764 spinlock_t *workspace_lock = &comp_workspace_lock[idx];
765 atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
766 wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
767 int *num_workspace = &comp_num_workspace[idx];
768
769 spin_lock(workspace_lock);
770 if (*num_workspace < num_online_cpus()) {
771 list_add_tail(workspace, idle_workspace);
772 (*num_workspace)++;
773 spin_unlock(workspace_lock);
774 goto wake;
775 }
776 spin_unlock(workspace_lock);
777
778 btrfs_compress_op[idx]->free_workspace(workspace);
779 atomic_dec(alloc_workspace);
780wake:
781 if (waitqueue_active(workspace_wait))
782 wake_up(workspace_wait);
783}
784
785/*
786 * cleanup function for module exit
787 */
788static void free_workspaces(void)
789{
790 struct list_head *workspace;
791 int i;
792
793 for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
794 while (!list_empty(&comp_idle_workspace[i])) {
795 workspace = comp_idle_workspace[i].next;
796 list_del(workspace);
797 btrfs_compress_op[i]->free_workspace(workspace);
798 atomic_dec(&comp_alloc_workspace[i]);
799 }
800 }
801}
802
803/*
804 * given an address space and start/len, compress the bytes.
805 *
806 * pages are allocated to hold the compressed result and stored
807 * in 'pages'
808 *
809 * out_pages is used to return the number of pages allocated. There
810 * may be pages allocated even if we return an error
811 *
812 * total_in is used to return the number of bytes actually read. It
813 * may be smaller then len if we had to exit early because we
814 * ran out of room in the pages array or because we cross the
815 * max_out threshold.
816 *
817 * total_out is used to return the total number of compressed bytes
818 *
819 * max_out tells us the max number of bytes that we're allowed to
820 * stuff into pages
821 */
822int btrfs_compress_pages(int type, struct address_space *mapping,
823 u64 start, unsigned long len,
824 struct page **pages,
825 unsigned long nr_dest_pages,
826 unsigned long *out_pages,
827 unsigned long *total_in,
828 unsigned long *total_out,
829 unsigned long max_out)
830{
831 struct list_head *workspace;
832 int ret;
833
834 workspace = find_workspace(type);
835 if (IS_ERR(workspace))
836 return -1;
837
838 ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
839 start, len, pages,
840 nr_dest_pages, out_pages,
841 total_in, total_out,
842 max_out);
843 free_workspace(type, workspace);
844 return ret;
845}
846
847/*
848 * pages_in is an array of pages with compressed data.
849 *
850 * disk_start is the starting logical offset of this array in the file
851 *
852 * bvec is a bio_vec of pages from the file that we want to decompress into
853 *
854 * vcnt is the count of pages in the biovec
855 *
856 * srclen is the number of bytes in pages_in
857 *
858 * The basic idea is that we have a bio that was created by readpages.
859 * The pages in the bio are for the uncompressed data, and they may not
860 * be contiguous. They all correspond to the range of bytes covered by
861 * the compressed extent.
862 */
863int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
864 struct bio_vec *bvec, int vcnt, size_t srclen)
865{
866 struct list_head *workspace;
867 int ret;
868
869 workspace = find_workspace(type);
870 if (IS_ERR(workspace))
871 return -ENOMEM;
872
873 ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
874 disk_start,
875 bvec, vcnt, srclen);
876 free_workspace(type, workspace);
877 return ret;
878}
879
880/*
881 * a less complex decompression routine. Our compressed data fits in a
882 * single page, and we want to read a single page out of it.
883 * start_byte tells us the offset into the compressed data we're interested in
884 */
885int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
886 unsigned long start_byte, size_t srclen, size_t destlen)
887{
888 struct list_head *workspace;
889 int ret;
890
891 workspace = find_workspace(type);
892 if (IS_ERR(workspace))
893 return -ENOMEM;
894
895 ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
896 dest_page, start_byte,
897 srclen, destlen);
898
899 free_workspace(type, workspace);
900 return ret;
901}
902
903void __exit btrfs_exit_compress(void)
904{
905 free_workspaces();
906}
907
908/*
909 * Copy uncompressed data from working buffer to pages.
910 *
911 * buf_start is the byte offset we're of the start of our workspace buffer.
912 *
913 * total_out is the last byte of the buffer
914 */
915int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
916 unsigned long total_out, u64 disk_start,
917 struct bio_vec *bvec, int vcnt,
918 unsigned long *page_index,
919 unsigned long *pg_offset)
920{
921 unsigned long buf_offset;
922 unsigned long current_buf_start;
923 unsigned long start_byte;
924 unsigned long working_bytes = total_out - buf_start;
925 unsigned long bytes;
926 char *kaddr;
927 struct page *page_out = bvec[*page_index].bv_page;
928
929 /*
930 * start byte is the first byte of the page we're currently
931 * copying into relative to the start of the compressed data.
932 */
933 start_byte = page_offset(page_out) - disk_start;
934
935 /* we haven't yet hit data corresponding to this page */
936 if (total_out <= start_byte)
937 return 1;
938
939 /*
940 * the start of the data we care about is offset into
941 * the middle of our working buffer
942 */
943 if (total_out > start_byte && buf_start < start_byte) {
944 buf_offset = start_byte - buf_start;
945 working_bytes -= buf_offset;
946 } else {
947 buf_offset = 0;
948 }
949 current_buf_start = buf_start;
950
951 /* copy bytes from the working buffer into the pages */
952 while (working_bytes > 0) {
953 bytes = min(PAGE_CACHE_SIZE - *pg_offset,
954 PAGE_CACHE_SIZE - buf_offset);
955 bytes = min(bytes, working_bytes);
956 kaddr = kmap_atomic(page_out, KM_USER0);
957 memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
958 kunmap_atomic(kaddr, KM_USER0);
959 flush_dcache_page(page_out);
960
961 *pg_offset += bytes;
962 buf_offset += bytes;
963 working_bytes -= bytes;
964 current_buf_start += bytes;
965
966 /* check if we need to pick another page */
967 if (*pg_offset == PAGE_CACHE_SIZE) {
968 (*page_index)++;
969 if (*page_index >= vcnt)
970 return 0;
971
972 page_out = bvec[*page_index].bv_page;
973 *pg_offset = 0;
974 start_byte = page_offset(page_out) - disk_start;
975
976 /*
977 * make sure our new page is covered by this
978 * working buffer
979 */
980 if (total_out <= start_byte)
981 return 1;
982
983 /*
984 * the next page in the biovec might not be adjacent
985 * to the last page, but it might still be found
986 * inside this working buffer. bump our offset pointer
987 */
988 if (total_out > start_byte &&
989 current_buf_start < start_byte) {
990 buf_offset = start_byte - buf_start;
991 working_bytes = total_out - start_byte;
992 current_buf_start = buf_start + buf_offset;
993 }
994 }
995 }
996
997 return 1;
998}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 421f5b4aa715..51000174b9d7 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -19,24 +19,27 @@
19#ifndef __BTRFS_COMPRESSION_ 19#ifndef __BTRFS_COMPRESSION_
20#define __BTRFS_COMPRESSION_ 20#define __BTRFS_COMPRESSION_
21 21
22int btrfs_zlib_decompress(unsigned char *data_in, 22int btrfs_init_compress(void);
23 struct page *dest_page, 23void btrfs_exit_compress(void);
24 unsigned long start_byte, 24
25 size_t srclen, size_t destlen); 25int btrfs_compress_pages(int type, struct address_space *mapping,
26int btrfs_zlib_compress_pages(struct address_space *mapping, 26 u64 start, unsigned long len,
27 u64 start, unsigned long len, 27 struct page **pages,
28 struct page **pages, 28 unsigned long nr_dest_pages,
29 unsigned long nr_dest_pages, 29 unsigned long *out_pages,
30 unsigned long *out_pages, 30 unsigned long *total_in,
31 unsigned long *total_in, 31 unsigned long *total_out,
32 unsigned long *total_out, 32 unsigned long max_out);
33 unsigned long max_out); 33int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
34int btrfs_zlib_decompress_biovec(struct page **pages_in, 34 struct bio_vec *bvec, int vcnt, size_t srclen);
35 u64 disk_start, 35int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
36 struct bio_vec *bvec, 36 unsigned long start_byte, size_t srclen, size_t destlen);
37 int vcnt, 37int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
38 size_t srclen); 38 unsigned long total_out, u64 disk_start,
39void btrfs_zlib_exit(void); 39 struct bio_vec *bvec, int vcnt,
40 unsigned long *page_index,
41 unsigned long *pg_offset);
42
40int btrfs_submit_compressed_write(struct inode *inode, u64 start, 43int btrfs_submit_compressed_write(struct inode *inode, u64 start,
41 unsigned long len, u64 disk_start, 44 unsigned long len, u64 disk_start,
42 unsigned long compressed_len, 45 unsigned long compressed_len,
@@ -44,4 +47,37 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
44 unsigned long nr_pages); 47 unsigned long nr_pages);
45int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, 48int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
46 int mirror_num, unsigned long bio_flags); 49 int mirror_num, unsigned long bio_flags);
50
51struct btrfs_compress_op {
52 struct list_head *(*alloc_workspace)(void);
53
54 void (*free_workspace)(struct list_head *workspace);
55
56 int (*compress_pages)(struct list_head *workspace,
57 struct address_space *mapping,
58 u64 start, unsigned long len,
59 struct page **pages,
60 unsigned long nr_dest_pages,
61 unsigned long *out_pages,
62 unsigned long *total_in,
63 unsigned long *total_out,
64 unsigned long max_out);
65
66 int (*decompress_biovec)(struct list_head *workspace,
67 struct page **pages_in,
68 u64 disk_start,
69 struct bio_vec *bvec,
70 int vcnt,
71 size_t srclen);
72
73 int (*decompress)(struct list_head *workspace,
74 unsigned char *data_in,
75 struct page *dest_page,
76 unsigned long start_byte,
77 size_t srclen, size_t destlen);
78};
79
80extern struct btrfs_compress_op btrfs_zlib_compress;
81extern struct btrfs_compress_op btrfs_lzo_compress;
82
47#endif 83#endif
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4403e5643d43..4acd4c611efa 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -398,13 +398,15 @@ struct btrfs_super_block {
398#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) 398#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
399#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) 399#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
400#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) 400#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
401#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
401 402
402#define BTRFS_FEATURE_COMPAT_SUPP 0ULL 403#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
403#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 404#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
404#define BTRFS_FEATURE_INCOMPAT_SUPP \ 405#define BTRFS_FEATURE_INCOMPAT_SUPP \
405 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 406 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
406 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ 407 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
407 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) 408 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
409 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
408 410
409/* 411/*
410 * A leaf is full of items. offset and size tell us where to find 412 * A leaf is full of items. offset and size tell us where to find
@@ -551,9 +553,11 @@ struct btrfs_timespec {
551} __attribute__ ((__packed__)); 553} __attribute__ ((__packed__));
552 554
553enum btrfs_compression_type { 555enum btrfs_compression_type {
554 BTRFS_COMPRESS_NONE = 0, 556 BTRFS_COMPRESS_NONE = 0,
555 BTRFS_COMPRESS_ZLIB = 1, 557 BTRFS_COMPRESS_ZLIB = 1,
556 BTRFS_COMPRESS_LAST = 2, 558 BTRFS_COMPRESS_LZO = 2,
559 BTRFS_COMPRESS_TYPES = 2,
560 BTRFS_COMPRESS_LAST = 3,
557}; 561};
558 562
559struct btrfs_inode_item { 563struct btrfs_inode_item {
@@ -897,7 +901,8 @@ struct btrfs_fs_info {
897 */ 901 */
898 u64 last_trans_log_full_commit; 902 u64 last_trans_log_full_commit;
899 u64 open_ioctl_trans; 903 u64 open_ioctl_trans;
900 unsigned long mount_opt; 904 unsigned long mount_opt:20;
905 unsigned long compress_type:4;
901 u64 max_inline; 906 u64 max_inline;
902 u64 alloc_start; 907 u64 alloc_start;
903 struct btrfs_transaction *running_transaction; 908 struct btrfs_transaction *running_transaction;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a5d2249e6da5..f88eb2ce7919 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1744,10 +1744,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1744 } 1744 }
1745 1745
1746 features = btrfs_super_incompat_flags(disk_super); 1746 features = btrfs_super_incompat_flags(disk_super);
1747 if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { 1747 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
1748 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 1748 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
1749 btrfs_set_super_incompat_flags(disk_super, features); 1749 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
1750 } 1750 btrfs_set_super_incompat_flags(disk_super, features);
1751 1751
1752 features = btrfs_super_compat_ro_flags(disk_super) & 1752 features = btrfs_super_compat_ro_flags(disk_super) &
1753 ~BTRFS_FEATURE_COMPAT_RO_SUPP; 1753 ~BTRFS_FEATURE_COMPAT_RO_SUPP;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 5e7a94d7da89..f1d198128959 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2028,8 +2028,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2028 BUG_ON(extent_map_end(em) <= cur); 2028 BUG_ON(extent_map_end(em) <= cur);
2029 BUG_ON(end < cur); 2029 BUG_ON(end < cur);
2030 2030
2031 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 2031 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2032 this_bio_flag = EXTENT_BIO_COMPRESSED; 2032 this_bio_flag = EXTENT_BIO_COMPRESSED;
2033 extent_set_compress_type(&this_bio_flag,
2034 em->compress_type);
2035 }
2033 2036
2034 iosize = min(extent_map_end(em) - cur, end - cur + 1); 2037 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2035 cur_end = min(extent_map_end(em) - 1, end); 2038 cur_end = min(extent_map_end(em) - 1, end);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4183c8178f01..7083cfafd061 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -20,8 +20,12 @@
20#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 20#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
21#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) 21#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
22 22
23/* flags for bio submission */ 23/*
24 * flags for bio submission. The high bits indicate the compression
25 * type for this bio
26 */
24#define EXTENT_BIO_COMPRESSED 1 27#define EXTENT_BIO_COMPRESSED 1
28#define EXTENT_BIO_FLAG_SHIFT 16
25 29
26/* these are bit numbers for test/set bit */ 30/* these are bit numbers for test/set bit */
27#define EXTENT_BUFFER_UPTODATE 0 31#define EXTENT_BUFFER_UPTODATE 0
@@ -135,6 +139,17 @@ struct extent_buffer {
135 wait_queue_head_t lock_wq; 139 wait_queue_head_t lock_wq;
136}; 140};
137 141
142static inline void extent_set_compress_type(unsigned long *bio_flags,
143 int compress_type)
144{
145 *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT;
146}
147
148static inline int extent_compress_type(unsigned long bio_flags)
149{
150 return bio_flags >> EXTENT_BIO_FLAG_SHIFT;
151}
152
138struct extent_map_tree; 153struct extent_map_tree;
139 154
140static inline struct extent_state *extent_state_next(struct extent_state *state) 155static inline struct extent_state *extent_state_next(struct extent_state *state)
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 23cb8da3ff66..b0e1fce12530 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -3,6 +3,7 @@
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/spinlock.h> 4#include <linux/spinlock.h>
5#include <linux/hardirq.h> 5#include <linux/hardirq.h>
6#include "ctree.h"
6#include "extent_map.h" 7#include "extent_map.h"
7 8
8 9
@@ -54,6 +55,7 @@ struct extent_map *alloc_extent_map(gfp_t mask)
54 return em; 55 return em;
55 em->in_tree = 0; 56 em->in_tree = 0;
56 em->flags = 0; 57 em->flags = 0;
58 em->compress_type = BTRFS_COMPRESS_NONE;
57 atomic_set(&em->refs, 1); 59 atomic_set(&em->refs, 1);
58 return em; 60 return em;
59} 61}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index ab6d74b6e647..28b44dbd1e35 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -26,7 +26,8 @@ struct extent_map {
26 unsigned long flags; 26 unsigned long flags;
27 struct block_device *bdev; 27 struct block_device *bdev;
28 atomic_t refs; 28 atomic_t refs;
29 int in_tree; 29 unsigned int in_tree:1;
30 unsigned int compress_type:4;
30}; 31};
31 32
32struct extent_map_tree { 33struct extent_map_tree {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 66836d85763b..05df688c96f4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -224,6 +224,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
224 224
225 split->bdev = em->bdev; 225 split->bdev = em->bdev;
226 split->flags = flags; 226 split->flags = flags;
227 split->compress_type = em->compress_type;
227 ret = add_extent_mapping(em_tree, split); 228 ret = add_extent_mapping(em_tree, split);
228 BUG_ON(ret); 229 BUG_ON(ret);
229 free_extent_map(split); 230 free_extent_map(split);
@@ -238,6 +239,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
238 split->len = em->start + em->len - (start + len); 239 split->len = em->start + em->len - (start + len);
239 split->bdev = em->bdev; 240 split->bdev = em->bdev;
240 split->flags = flags; 241 split->flags = flags;
242 split->compress_type = em->compress_type;
241 243
242 if (compressed) { 244 if (compressed) {
243 split->block_len = em->block_len; 245 split->block_len = em->block_len;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 956f1eb913b1..1562765c8e6a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -122,10 +122,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
122 size_t cur_size = size; 122 size_t cur_size = size;
123 size_t datasize; 123 size_t datasize;
124 unsigned long offset; 124 unsigned long offset;
125 int use_compress = 0; 125 int compress_type = BTRFS_COMPRESS_NONE;
126 126
127 if (compressed_size && compressed_pages) { 127 if (compressed_size && compressed_pages) {
128 use_compress = 1; 128 compress_type = root->fs_info->compress_type;
129 cur_size = compressed_size; 129 cur_size = compressed_size;
130 } 130 }
131 131
@@ -159,7 +159,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
159 btrfs_set_file_extent_ram_bytes(leaf, ei, size); 159 btrfs_set_file_extent_ram_bytes(leaf, ei, size);
160 ptr = btrfs_file_extent_inline_start(ei); 160 ptr = btrfs_file_extent_inline_start(ei);
161 161
162 if (use_compress) { 162 if (compress_type != BTRFS_COMPRESS_NONE) {
163 struct page *cpage; 163 struct page *cpage;
164 int i = 0; 164 int i = 0;
165 while (compressed_size > 0) { 165 while (compressed_size > 0) {
@@ -176,7 +176,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
176 compressed_size -= cur_size; 176 compressed_size -= cur_size;
177 } 177 }
178 btrfs_set_file_extent_compression(leaf, ei, 178 btrfs_set_file_extent_compression(leaf, ei,
179 BTRFS_COMPRESS_ZLIB); 179 compress_type);
180 } else { 180 } else {
181 page = find_get_page(inode->i_mapping, 181 page = find_get_page(inode->i_mapping,
182 start >> PAGE_CACHE_SHIFT); 182 start >> PAGE_CACHE_SHIFT);
@@ -263,6 +263,7 @@ struct async_extent {
263 u64 compressed_size; 263 u64 compressed_size;
264 struct page **pages; 264 struct page **pages;
265 unsigned long nr_pages; 265 unsigned long nr_pages;
266 int compress_type;
266 struct list_head list; 267 struct list_head list;
267}; 268};
268 269
@@ -280,7 +281,8 @@ static noinline int add_async_extent(struct async_cow *cow,
280 u64 start, u64 ram_size, 281 u64 start, u64 ram_size,
281 u64 compressed_size, 282 u64 compressed_size,
282 struct page **pages, 283 struct page **pages,
283 unsigned long nr_pages) 284 unsigned long nr_pages,
285 int compress_type)
284{ 286{
285 struct async_extent *async_extent; 287 struct async_extent *async_extent;
286 288
@@ -290,6 +292,7 @@ static noinline int add_async_extent(struct async_cow *cow,
290 async_extent->compressed_size = compressed_size; 292 async_extent->compressed_size = compressed_size;
291 async_extent->pages = pages; 293 async_extent->pages = pages;
292 async_extent->nr_pages = nr_pages; 294 async_extent->nr_pages = nr_pages;
295 async_extent->compress_type = compress_type;
293 list_add_tail(&async_extent->list, &cow->extents); 296 list_add_tail(&async_extent->list, &cow->extents);
294 return 0; 297 return 0;
295} 298}
@@ -332,6 +335,7 @@ static noinline int compress_file_range(struct inode *inode,
332 unsigned long max_uncompressed = 128 * 1024; 335 unsigned long max_uncompressed = 128 * 1024;
333 int i; 336 int i;
334 int will_compress; 337 int will_compress;
338 int compress_type = root->fs_info->compress_type;
335 339
336 actual_end = min_t(u64, isize, end + 1); 340 actual_end = min_t(u64, isize, end + 1);
337again: 341again:
@@ -381,12 +385,16 @@ again:
381 WARN_ON(pages); 385 WARN_ON(pages);
382 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 386 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
383 387
384 ret = btrfs_zlib_compress_pages(inode->i_mapping, start, 388 if (BTRFS_I(inode)->force_compress)
385 total_compressed, pages, 389 compress_type = BTRFS_I(inode)->force_compress;
386 nr_pages, &nr_pages_ret, 390
387 &total_in, 391 ret = btrfs_compress_pages(compress_type,
388 &total_compressed, 392 inode->i_mapping, start,
389 max_compressed); 393 total_compressed, pages,
394 nr_pages, &nr_pages_ret,
395 &total_in,
396 &total_compressed,
397 max_compressed);
390 398
391 if (!ret) { 399 if (!ret) {
392 unsigned long offset = total_compressed & 400 unsigned long offset = total_compressed &
@@ -493,7 +501,8 @@ again:
493 * and will submit them to the elevator. 501 * and will submit them to the elevator.
494 */ 502 */
495 add_async_extent(async_cow, start, num_bytes, 503 add_async_extent(async_cow, start, num_bytes,
496 total_compressed, pages, nr_pages_ret); 504 total_compressed, pages, nr_pages_ret,
505 compress_type);
497 506
498 if (start + num_bytes < end) { 507 if (start + num_bytes < end) {
499 start += num_bytes; 508 start += num_bytes;
@@ -515,7 +524,8 @@ cleanup_and_bail_uncompressed:
515 __set_page_dirty_nobuffers(locked_page); 524 __set_page_dirty_nobuffers(locked_page);
516 /* unlocked later on in the async handlers */ 525 /* unlocked later on in the async handlers */
517 } 526 }
518 add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); 527 add_async_extent(async_cow, start, end - start + 1,
528 0, NULL, 0, BTRFS_COMPRESS_NONE);
519 *num_added += 1; 529 *num_added += 1;
520 } 530 }
521 531
@@ -640,6 +650,7 @@ retry:
640 em->block_start = ins.objectid; 650 em->block_start = ins.objectid;
641 em->block_len = ins.offset; 651 em->block_len = ins.offset;
642 em->bdev = root->fs_info->fs_devices->latest_bdev; 652 em->bdev = root->fs_info->fs_devices->latest_bdev;
653 em->compress_type = async_extent->compress_type;
643 set_bit(EXTENT_FLAG_PINNED, &em->flags); 654 set_bit(EXTENT_FLAG_PINNED, &em->flags);
644 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 655 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
645 656
@@ -656,11 +667,13 @@ retry:
656 async_extent->ram_size - 1, 0); 667 async_extent->ram_size - 1, 0);
657 } 668 }
658 669
659 ret = btrfs_add_ordered_extent(inode, async_extent->start, 670 ret = btrfs_add_ordered_extent_compress(inode,
660 ins.objectid, 671 async_extent->start,
661 async_extent->ram_size, 672 ins.objectid,
662 ins.offset, 673 async_extent->ram_size,
663 BTRFS_ORDERED_COMPRESSED); 674 ins.offset,
675 BTRFS_ORDERED_COMPRESSED,
676 async_extent->compress_type);
664 BUG_ON(ret); 677 BUG_ON(ret);
665 678
666 /* 679 /*
@@ -1670,7 +1683,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1670 struct btrfs_ordered_extent *ordered_extent = NULL; 1683 struct btrfs_ordered_extent *ordered_extent = NULL;
1671 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1684 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1672 struct extent_state *cached_state = NULL; 1685 struct extent_state *cached_state = NULL;
1673 int compressed = 0; 1686 int compress_type = 0;
1674 int ret; 1687 int ret;
1675 bool nolock = false; 1688 bool nolock = false;
1676 1689
@@ -1711,9 +1724,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1711 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1724 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1712 1725
1713 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1726 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1714 compressed = 1; 1727 compress_type = ordered_extent->compress_type;
1715 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1728 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1716 BUG_ON(compressed); 1729 BUG_ON(compress_type);
1717 ret = btrfs_mark_extent_written(trans, inode, 1730 ret = btrfs_mark_extent_written(trans, inode,
1718 ordered_extent->file_offset, 1731 ordered_extent->file_offset,
1719 ordered_extent->file_offset + 1732 ordered_extent->file_offset +
@@ -1727,7 +1740,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1727 ordered_extent->disk_len, 1740 ordered_extent->disk_len,
1728 ordered_extent->len, 1741 ordered_extent->len,
1729 ordered_extent->len, 1742 ordered_extent->len,
1730 compressed, 0, 0, 1743 compress_type, 0, 0,
1731 BTRFS_FILE_EXTENT_REG); 1744 BTRFS_FILE_EXTENT_REG);
1732 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1745 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
1733 ordered_extent->file_offset, 1746 ordered_extent->file_offset,
@@ -1829,6 +1842,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1829 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 1842 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
1830 logical = em->block_start; 1843 logical = em->block_start;
1831 failrec->bio_flags = EXTENT_BIO_COMPRESSED; 1844 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
1845 extent_set_compress_type(&failrec->bio_flags,
1846 em->compress_type);
1832 } 1847 }
1833 failrec->logical = logical; 1848 failrec->logical = logical;
1834 free_extent_map(em); 1849 free_extent_map(em);
@@ -4934,8 +4949,10 @@ static noinline int uncompress_inline(struct btrfs_path *path,
4934 size_t max_size; 4949 size_t max_size;
4935 unsigned long inline_size; 4950 unsigned long inline_size;
4936 unsigned long ptr; 4951 unsigned long ptr;
4952 int compress_type;
4937 4953
4938 WARN_ON(pg_offset != 0); 4954 WARN_ON(pg_offset != 0);
4955 compress_type = btrfs_file_extent_compression(leaf, item);
4939 max_size = btrfs_file_extent_ram_bytes(leaf, item); 4956 max_size = btrfs_file_extent_ram_bytes(leaf, item);
4940 inline_size = btrfs_file_extent_inline_item_len(leaf, 4957 inline_size = btrfs_file_extent_inline_item_len(leaf,
4941 btrfs_item_nr(leaf, path->slots[0])); 4958 btrfs_item_nr(leaf, path->slots[0]));
@@ -4945,8 +4962,8 @@ static noinline int uncompress_inline(struct btrfs_path *path,
4945 read_extent_buffer(leaf, tmp, ptr, inline_size); 4962 read_extent_buffer(leaf, tmp, ptr, inline_size);
4946 4963
4947 max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); 4964 max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
4948 ret = btrfs_zlib_decompress(tmp, page, extent_offset, 4965 ret = btrfs_decompress(compress_type, tmp, page,
4949 inline_size, max_size); 4966 extent_offset, inline_size, max_size);
4950 if (ret) { 4967 if (ret) {
4951 char *kaddr = kmap_atomic(page, KM_USER0); 4968 char *kaddr = kmap_atomic(page, KM_USER0);
4952 unsigned long copy_size = min_t(u64, 4969 unsigned long copy_size = min_t(u64,
@@ -4988,7 +5005,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
4988 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 5005 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
4989 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 5006 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4990 struct btrfs_trans_handle *trans = NULL; 5007 struct btrfs_trans_handle *trans = NULL;
4991 int compressed; 5008 int compress_type;
4992 5009
4993again: 5010again:
4994 read_lock(&em_tree->lock); 5011 read_lock(&em_tree->lock);
@@ -5047,7 +5064,7 @@ again:
5047 5064
5048 found_type = btrfs_file_extent_type(leaf, item); 5065 found_type = btrfs_file_extent_type(leaf, item);
5049 extent_start = found_key.offset; 5066 extent_start = found_key.offset;
5050 compressed = btrfs_file_extent_compression(leaf, item); 5067 compress_type = btrfs_file_extent_compression(leaf, item);
5051 if (found_type == BTRFS_FILE_EXTENT_REG || 5068 if (found_type == BTRFS_FILE_EXTENT_REG ||
5052 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 5069 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
5053 extent_end = extent_start + 5070 extent_end = extent_start +
@@ -5093,8 +5110,9 @@ again:
5093 em->block_start = EXTENT_MAP_HOLE; 5110 em->block_start = EXTENT_MAP_HOLE;
5094 goto insert; 5111 goto insert;
5095 } 5112 }
5096 if (compressed) { 5113 if (compress_type != BTRFS_COMPRESS_NONE) {
5097 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 5114 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
5115 em->compress_type = compress_type;
5098 em->block_start = bytenr; 5116 em->block_start = bytenr;
5099 em->block_len = btrfs_file_extent_disk_num_bytes(leaf, 5117 em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
5100 item); 5118 item);
@@ -5128,12 +5146,14 @@ again:
5128 em->len = (copy_size + root->sectorsize - 1) & 5146 em->len = (copy_size + root->sectorsize - 1) &
5129 ~((u64)root->sectorsize - 1); 5147 ~((u64)root->sectorsize - 1);
5130 em->orig_start = EXTENT_MAP_INLINE; 5148 em->orig_start = EXTENT_MAP_INLINE;
5131 if (compressed) 5149 if (compress_type) {
5132 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 5150 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
5151 em->compress_type = compress_type;
5152 }
5133 ptr = btrfs_file_extent_inline_start(item) + extent_offset; 5153 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
5134 if (create == 0 && !PageUptodate(page)) { 5154 if (create == 0 && !PageUptodate(page)) {
5135 if (btrfs_file_extent_compression(leaf, item) == 5155 if (btrfs_file_extent_compression(leaf, item) !=
5136 BTRFS_COMPRESS_ZLIB) { 5156 BTRFS_COMPRESS_NONE) {
5137 ret = uncompress_inline(path, inode, page, 5157 ret = uncompress_inline(path, inode, page,
5138 pg_offset, 5158 pg_offset,
5139 extent_offset, item); 5159 extent_offset, item);
@@ -6483,7 +6503,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6483 ei->ordered_data_close = 0; 6503 ei->ordered_data_close = 0;
6484 ei->orphan_meta_reserved = 0; 6504 ei->orphan_meta_reserved = 0;
6485 ei->dummy_inode = 0; 6505 ei->dummy_inode = 0;
6486 ei->force_compress = 0; 6506 ei->force_compress = BTRFS_COMPRESS_NONE;
6487 6507
6488 inode = &ei->vfs_inode; 6508 inode = &ei->vfs_inode;
6489 extent_map_tree_init(&ei->extent_tree, GFP_NOFS); 6509 extent_map_tree_init(&ei->extent_tree, GFP_NOFS);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ad1983524f97..a506a22b522a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -643,9 +643,11 @@ static int btrfs_defrag_file(struct file *file,
643 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 643 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
644 struct btrfs_ordered_extent *ordered; 644 struct btrfs_ordered_extent *ordered;
645 struct page *page; 645 struct page *page;
646 struct btrfs_super_block *disk_super;
646 unsigned long last_index; 647 unsigned long last_index;
647 unsigned long ra_pages = root->fs_info->bdi.ra_pages; 648 unsigned long ra_pages = root->fs_info->bdi.ra_pages;
648 unsigned long total_read = 0; 649 unsigned long total_read = 0;
650 u64 features;
649 u64 page_start; 651 u64 page_start;
650 u64 page_end; 652 u64 page_end;
651 u64 last_len = 0; 653 u64 last_len = 0;
@@ -653,6 +655,14 @@ static int btrfs_defrag_file(struct file *file,
653 u64 defrag_end = 0; 655 u64 defrag_end = 0;
654 unsigned long i; 656 unsigned long i;
655 int ret; 657 int ret;
658 int compress_type = BTRFS_COMPRESS_ZLIB;
659
660 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
661 if (range->compress_type > BTRFS_COMPRESS_TYPES)
662 return -EINVAL;
663 if (range->compress_type)
664 compress_type = range->compress_type;
665 }
656 666
657 if (inode->i_size == 0) 667 if (inode->i_size == 0)
658 return 0; 668 return 0;
@@ -688,7 +698,7 @@ static int btrfs_defrag_file(struct file *file,
688 total_read++; 698 total_read++;
689 mutex_lock(&inode->i_mutex); 699 mutex_lock(&inode->i_mutex);
690 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 700 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
691 BTRFS_I(inode)->force_compress = 1; 701 BTRFS_I(inode)->force_compress = compress_type;
692 702
693 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 703 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
694 if (ret) 704 if (ret)
@@ -786,10 +796,17 @@ loop_unlock:
786 atomic_dec(&root->fs_info->async_submit_draining); 796 atomic_dec(&root->fs_info->async_submit_draining);
787 797
788 mutex_lock(&inode->i_mutex); 798 mutex_lock(&inode->i_mutex);
789 BTRFS_I(inode)->force_compress = 0; 799 BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
790 mutex_unlock(&inode->i_mutex); 800 mutex_unlock(&inode->i_mutex);
791 } 801 }
792 802
803 disk_super = &root->fs_info->super_copy;
804 features = btrfs_super_incompat_flags(disk_super);
805 if (range->compress_type == BTRFS_COMPRESS_LZO) {
806 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
807 btrfs_set_super_incompat_flags(disk_super, features);
808 }
809
793 return 0; 810 return 0;
794 811
795err_reservations: 812err_reservations:
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 1223223351fa..8fb382167b13 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -134,8 +134,15 @@ struct btrfs_ioctl_defrag_range_args {
134 */ 134 */
135 __u32 extent_thresh; 135 __u32 extent_thresh;
136 136
137 /*
138 * which compression method to use if turning on compression
139 * for this defrag operation. If unspecified, zlib will
140 * be used
141 */
142 __u32 compress_type;
143
137 /* spare for later */ 144 /* spare for later */
138 __u32 unused[5]; 145 __u32 unused[4];
139}; 146};
140 147
141struct btrfs_ioctl_space_info { 148struct btrfs_ioctl_space_info {
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
new file mode 100644
index 000000000000..cc9b450399df
--- /dev/null
+++ b/fs/btrfs/lzo.c
@@ -0,0 +1,420 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/kernel.h>
20#include <linux/slab.h>
21#include <linux/vmalloc.h>
22#include <linux/init.h>
23#include <linux/err.h>
24#include <linux/sched.h>
25#include <linux/pagemap.h>
26#include <linux/bio.h>
27#include <linux/lzo.h>
28#include "compression.h"
29
30#define LZO_LEN 4
31
32struct workspace {
33 void *mem;
34 void *buf; /* where compressed data goes */
35 void *cbuf; /* where decompressed data goes */
36 struct list_head list;
37};
38
39static void lzo_free_workspace(struct list_head *ws)
40{
41 struct workspace *workspace = list_entry(ws, struct workspace, list);
42
43 vfree(workspace->buf);
44 vfree(workspace->cbuf);
45 vfree(workspace->mem);
46 kfree(workspace);
47}
48
49static struct list_head *lzo_alloc_workspace(void)
50{
51 struct workspace *workspace;
52
53 workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
54 if (!workspace)
55 return ERR_PTR(-ENOMEM);
56
57 workspace->mem = vmalloc(LZO1X_MEM_COMPRESS);
58 workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
59 workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
60 if (!workspace->mem || !workspace->buf || !workspace->cbuf)
61 goto fail;
62
63 INIT_LIST_HEAD(&workspace->list);
64
65 return &workspace->list;
66fail:
67 lzo_free_workspace(&workspace->list);
68 return ERR_PTR(-ENOMEM);
69}
70
71static inline void write_compress_length(char *buf, size_t len)
72{
73 __le32 dlen;
74
75 dlen = cpu_to_le32(len);
76 memcpy(buf, &dlen, LZO_LEN);
77}
78
79static inline size_t read_compress_length(char *buf)
80{
81 __le32 dlen;
82
83 memcpy(&dlen, buf, LZO_LEN);
84 return le32_to_cpu(dlen);
85}
86
87static int lzo_compress_pages(struct list_head *ws,
88 struct address_space *mapping,
89 u64 start, unsigned long len,
90 struct page **pages,
91 unsigned long nr_dest_pages,
92 unsigned long *out_pages,
93 unsigned long *total_in,
94 unsigned long *total_out,
95 unsigned long max_out)
96{
97 struct workspace *workspace = list_entry(ws, struct workspace, list);
98 int ret = 0;
99 char *data_in;
100 char *cpage_out;
101 int nr_pages = 0;
102 struct page *in_page = NULL;
103 struct page *out_page = NULL;
104 unsigned long bytes_left;
105
106 size_t in_len;
107 size_t out_len;
108 char *buf;
109 unsigned long tot_in = 0;
110 unsigned long tot_out = 0;
111 unsigned long pg_bytes_left;
112 unsigned long out_offset;
113 unsigned long bytes;
114
115 *out_pages = 0;
116 *total_out = 0;
117 *total_in = 0;
118
119 in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
120 data_in = kmap(in_page);
121
122 /*
123 * store the size of all chunks of compressed data in
124 * the first 4 bytes
125 */
126 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
127 if (out_page == NULL) {
128 ret = -ENOMEM;
129 goto out;
130 }
131 cpage_out = kmap(out_page);
132 out_offset = LZO_LEN;
133 tot_out = LZO_LEN;
134 pages[0] = out_page;
135 nr_pages = 1;
136 pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
137
138 /* compress at most one page of data each time */
139 in_len = min(len, PAGE_CACHE_SIZE);
140 while (tot_in < len) {
141 ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
142 &out_len, workspace->mem);
143 if (ret != LZO_E_OK) {
144 printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
145 ret);
146 ret = -1;
147 goto out;
148 }
149
150 /* store the size of this chunk of compressed data */
151 write_compress_length(cpage_out + out_offset, out_len);
152 tot_out += LZO_LEN;
153 out_offset += LZO_LEN;
154 pg_bytes_left -= LZO_LEN;
155
156 tot_in += in_len;
157 tot_out += out_len;
158
159 /* copy bytes from the working buffer into the pages */
160 buf = workspace->cbuf;
161 while (out_len) {
162 bytes = min_t(unsigned long, pg_bytes_left, out_len);
163
164 memcpy(cpage_out + out_offset, buf, bytes);
165
166 out_len -= bytes;
167 pg_bytes_left -= bytes;
168 buf += bytes;
169 out_offset += bytes;
170
171 /*
172 * we need another page for writing out.
173 *
174 * Note if there's less than 4 bytes left, we just
175 * skip to a new page.
176 */
177 if ((out_len == 0 && pg_bytes_left < LZO_LEN) ||
178 pg_bytes_left == 0) {
179 if (pg_bytes_left) {
180 memset(cpage_out + out_offset, 0,
181 pg_bytes_left);
182 tot_out += pg_bytes_left;
183 }
184
185 /* we're done, don't allocate new page */
186 if (out_len == 0 && tot_in >= len)
187 break;
188
189 kunmap(out_page);
190 if (nr_pages == nr_dest_pages) {
191 out_page = NULL;
192 ret = -1;
193 goto out;
194 }
195
196 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
197 if (out_page == NULL) {
198 ret = -ENOMEM;
199 goto out;
200 }
201 cpage_out = kmap(out_page);
202 pages[nr_pages++] = out_page;
203
204 pg_bytes_left = PAGE_CACHE_SIZE;
205 out_offset = 0;
206 }
207 }
208
209 /* we're making it bigger, give up */
210 if (tot_in > 8192 && tot_in < tot_out)
211 goto out;
212
213 /* we're all done */
214 if (tot_in >= len)
215 break;
216
217 if (tot_out > max_out)
218 break;
219
220 bytes_left = len - tot_in;
221 kunmap(in_page);
222 page_cache_release(in_page);
223
224 start += PAGE_CACHE_SIZE;
225 in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
226 data_in = kmap(in_page);
227 in_len = min(bytes_left, PAGE_CACHE_SIZE);
228 }
229
230 if (tot_out > tot_in)
231 goto out;
232
233 /* store the size of all chunks of compressed data */
234 cpage_out = kmap(pages[0]);
235 write_compress_length(cpage_out, tot_out);
236
237 kunmap(pages[0]);
238
239 ret = 0;
240 *total_out = tot_out;
241 *total_in = tot_in;
242out:
243 *out_pages = nr_pages;
244 if (out_page)
245 kunmap(out_page);
246
247 if (in_page) {
248 kunmap(in_page);
249 page_cache_release(in_page);
250 }
251
252 return ret;
253}
254
255static int lzo_decompress_biovec(struct list_head *ws,
256 struct page **pages_in,
257 u64 disk_start,
258 struct bio_vec *bvec,
259 int vcnt,
260 size_t srclen)
261{
262 struct workspace *workspace = list_entry(ws, struct workspace, list);
263 int ret = 0, ret2;
264 char *data_in;
265 unsigned long page_in_index = 0;
266 unsigned long page_out_index = 0;
267 unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
268 PAGE_CACHE_SIZE;
269 unsigned long buf_start;
270 unsigned long buf_offset = 0;
271 unsigned long bytes;
272 unsigned long working_bytes;
273 unsigned long pg_offset;
274
275 size_t in_len;
276 size_t out_len;
277 unsigned long in_offset;
278 unsigned long in_page_bytes_left;
279 unsigned long tot_in;
280 unsigned long tot_out;
281 unsigned long tot_len;
282 char *buf;
283
284 data_in = kmap(pages_in[0]);
285 tot_len = read_compress_length(data_in);
286
287 tot_in = LZO_LEN;
288 in_offset = LZO_LEN;
289 tot_len = min_t(size_t, srclen, tot_len);
290 in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
291
292 tot_out = 0;
293 pg_offset = 0;
294
295 while (tot_in < tot_len) {
296 in_len = read_compress_length(data_in + in_offset);
297 in_page_bytes_left -= LZO_LEN;
298 in_offset += LZO_LEN;
299 tot_in += LZO_LEN;
300
301 tot_in += in_len;
302 working_bytes = in_len;
303
304 /* fast path: avoid using the working buffer */
305 if (in_page_bytes_left >= in_len) {
306 buf = data_in + in_offset;
307 bytes = in_len;
308 goto cont;
309 }
310
311 /* copy bytes from the pages into the working buffer */
312 buf = workspace->cbuf;
313 buf_offset = 0;
314 while (working_bytes) {
315 bytes = min(working_bytes, in_page_bytes_left);
316
317 memcpy(buf + buf_offset, data_in + in_offset, bytes);
318 buf_offset += bytes;
319cont:
320 working_bytes -= bytes;
321 in_page_bytes_left -= bytes;
322 in_offset += bytes;
323
324 /* check if we need to pick another page */
325 if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN)
326 || in_page_bytes_left == 0) {
327 tot_in += in_page_bytes_left;
328
329 if (working_bytes == 0 && tot_in >= tot_len)
330 break;
331
332 kunmap(pages_in[page_in_index]);
333 page_in_index++;
334 if (page_in_index >= total_pages_in) {
335 ret = -1;
336 data_in = NULL;
337 goto done;
338 }
339 data_in = kmap(pages_in[page_in_index]);
340
341 in_page_bytes_left = PAGE_CACHE_SIZE;
342 in_offset = 0;
343 }
344 }
345
346 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
347 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
348 &out_len);
349 if (ret != LZO_E_OK) {
350 printk(KERN_WARNING "btrfs decompress failed\n");
351 ret = -1;
352 break;
353 }
354
355 buf_start = tot_out;
356 tot_out += out_len;
357
358 ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
359 tot_out, disk_start,
360 bvec, vcnt,
361 &page_out_index, &pg_offset);
362 if (ret2 == 0)
363 break;
364 }
365done:
366 if (data_in)
367 kunmap(pages_in[page_in_index]);
368 return ret;
369}
370
371static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
372 struct page *dest_page,
373 unsigned long start_byte,
374 size_t srclen, size_t destlen)
375{
376 struct workspace *workspace = list_entry(ws, struct workspace, list);
377 size_t in_len;
378 size_t out_len;
379 size_t tot_len;
380 int ret = 0;
381 char *kaddr;
382 unsigned long bytes;
383
384 BUG_ON(srclen < LZO_LEN);
385
386 tot_len = read_compress_length(data_in);
387 data_in += LZO_LEN;
388
389 in_len = read_compress_length(data_in);
390 data_in += LZO_LEN;
391
392 out_len = PAGE_CACHE_SIZE;
393 ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
394 if (ret != LZO_E_OK) {
395 printk(KERN_WARNING "btrfs decompress failed!\n");
396 ret = -1;
397 goto out;
398 }
399
400 if (out_len < start_byte) {
401 ret = -1;
402 goto out;
403 }
404
405 bytes = min_t(unsigned long, destlen, out_len - start_byte);
406
407 kaddr = kmap_atomic(dest_page, KM_USER0);
408 memcpy(kaddr, workspace->buf + start_byte, bytes);
409 kunmap_atomic(kaddr, KM_USER0);
410out:
411 return ret;
412}
413
414struct btrfs_compress_op btrfs_lzo_compress = {
415 .alloc_workspace = lzo_alloc_workspace,
416 .free_workspace = lzo_free_workspace,
417 .compress_pages = lzo_compress_pages,
418 .decompress_biovec = lzo_decompress_biovec,
419 .decompress = lzo_decompress,
420};
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index ae7737e352c9..2b61e1ddcd99 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -172,7 +172,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
172 */ 172 */
173static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, 173static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
174 u64 start, u64 len, u64 disk_len, 174 u64 start, u64 len, u64 disk_len,
175 int type, int dio) 175 int type, int dio, int compress_type)
176{ 176{
177 struct btrfs_ordered_inode_tree *tree; 177 struct btrfs_ordered_inode_tree *tree;
178 struct rb_node *node; 178 struct rb_node *node;
@@ -189,6 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
189 entry->disk_len = disk_len; 189 entry->disk_len = disk_len;
190 entry->bytes_left = len; 190 entry->bytes_left = len;
191 entry->inode = inode; 191 entry->inode = inode;
192 entry->compress_type = compress_type;
192 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 193 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
193 set_bit(type, &entry->flags); 194 set_bit(type, &entry->flags);
194 195
@@ -220,14 +221,25 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
220 u64 start, u64 len, u64 disk_len, int type) 221 u64 start, u64 len, u64 disk_len, int type)
221{ 222{
222 return __btrfs_add_ordered_extent(inode, file_offset, start, len, 223 return __btrfs_add_ordered_extent(inode, file_offset, start, len,
223 disk_len, type, 0); 224 disk_len, type, 0,
225 BTRFS_COMPRESS_NONE);
224} 226}
225 227
226int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, 228int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
227 u64 start, u64 len, u64 disk_len, int type) 229 u64 start, u64 len, u64 disk_len, int type)
228{ 230{
229 return __btrfs_add_ordered_extent(inode, file_offset, start, len, 231 return __btrfs_add_ordered_extent(inode, file_offset, start, len,
230 disk_len, type, 1); 232 disk_len, type, 1,
233 BTRFS_COMPRESS_NONE);
234}
235
236int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
237 u64 start, u64 len, u64 disk_len,
238 int type, int compress_type)
239{
240 return __btrfs_add_ordered_extent(inode, file_offset, start, len,
241 disk_len, type, 0,
242 compress_type);
231} 243}
232 244
233/* 245/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 61dca83119dd..ff1f69aa1883 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -68,7 +68,7 @@ struct btrfs_ordered_sum {
68 68
69#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ 69#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
70 70
71#define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ 71#define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */
72 72
73#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ 73#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */
74 74
@@ -93,6 +93,9 @@ struct btrfs_ordered_extent {
93 /* flags (described above) */ 93 /* flags (described above) */
94 unsigned long flags; 94 unsigned long flags;
95 95
96 /* compression algorithm */
97 int compress_type;
98
96 /* reference count */ 99 /* reference count */
97 atomic_t refs; 100 atomic_t refs;
98 101
@@ -148,6 +151,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
148 u64 start, u64 len, u64 disk_len, int type); 151 u64 start, u64 len, u64 disk_len, int type);
149int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, 152int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
150 u64 start, u64 len, u64 disk_len, int type); 153 u64 start, u64 len, u64 disk_len, int type);
154int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
155 u64 start, u64 len, u64 disk_len,
156 int type, int compress_type);
151int btrfs_add_ordered_sum(struct inode *inode, 157int btrfs_add_ordered_sum(struct inode *inode,
152 struct btrfs_ordered_extent *entry, 158 struct btrfs_ordered_extent *entry,
153 struct btrfs_ordered_sum *sum); 159 struct btrfs_ordered_sum *sum);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 61bd79abb805..a1a76b2a61f9 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -69,9 +69,9 @@ enum {
69 Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, 69 Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
70 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, 70 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
71 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 71 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
72 Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, 72 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
73 Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err, 73 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
74 Opt_user_subvol_rm_allowed, 74 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
75}; 75};
76 76
77static match_table_t tokens = { 77static match_table_t tokens = {
@@ -86,7 +86,9 @@ static match_table_t tokens = {
86 {Opt_alloc_start, "alloc_start=%s"}, 86 {Opt_alloc_start, "alloc_start=%s"},
87 {Opt_thread_pool, "thread_pool=%d"}, 87 {Opt_thread_pool, "thread_pool=%d"},
88 {Opt_compress, "compress"}, 88 {Opt_compress, "compress"},
89 {Opt_compress_type, "compress=%s"},
89 {Opt_compress_force, "compress-force"}, 90 {Opt_compress_force, "compress-force"},
91 {Opt_compress_force_type, "compress-force=%s"},
90 {Opt_ssd, "ssd"}, 92 {Opt_ssd, "ssd"},
91 {Opt_ssd_spread, "ssd_spread"}, 93 {Opt_ssd_spread, "ssd_spread"},
92 {Opt_nossd, "nossd"}, 94 {Opt_nossd, "nossd"},
@@ -112,6 +114,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
112 char *p, *num, *orig; 114 char *p, *num, *orig;
113 int intarg; 115 int intarg;
114 int ret = 0; 116 int ret = 0;
117 char *compress_type;
118 bool compress_force = false;
115 119
116 if (!options) 120 if (!options)
117 return 0; 121 return 0;
@@ -154,14 +158,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
154 btrfs_set_opt(info->mount_opt, NODATACOW); 158 btrfs_set_opt(info->mount_opt, NODATACOW);
155 btrfs_set_opt(info->mount_opt, NODATASUM); 159 btrfs_set_opt(info->mount_opt, NODATASUM);
156 break; 160 break;
157 case Opt_compress:
158 printk(KERN_INFO "btrfs: use compression\n");
159 btrfs_set_opt(info->mount_opt, COMPRESS);
160 break;
161 case Opt_compress_force: 161 case Opt_compress_force:
162 printk(KERN_INFO "btrfs: forcing compression\n"); 162 case Opt_compress_force_type:
163 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); 163 compress_force = true;
164 case Opt_compress:
165 case Opt_compress_type:
166 if (token == Opt_compress ||
167 token == Opt_compress_force ||
168 strcmp(args[0].from, "zlib") == 0) {
169 compress_type = "zlib";
170 info->compress_type = BTRFS_COMPRESS_ZLIB;
171 } else if (strcmp(args[0].from, "lzo") == 0) {
172 compress_type = "lzo";
173 info->compress_type = BTRFS_COMPRESS_LZO;
174 } else {
175 ret = -EINVAL;
176 goto out;
177 }
178
164 btrfs_set_opt(info->mount_opt, COMPRESS); 179 btrfs_set_opt(info->mount_opt, COMPRESS);
180 if (compress_force) {
181 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
182 pr_info("btrfs: force %s compression\n",
183 compress_type);
184 } else
185 pr_info("btrfs: use %s compression\n",
186 compress_type);
165 break; 187 break;
166 case Opt_ssd: 188 case Opt_ssd:
167 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); 189 printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
@@ -898,10 +920,14 @@ static int __init init_btrfs_fs(void)
898 if (err) 920 if (err)
899 return err; 921 return err;
900 922
901 err = btrfs_init_cachep(); 923 err = btrfs_init_compress();
902 if (err) 924 if (err)
903 goto free_sysfs; 925 goto free_sysfs;
904 926
927 err = btrfs_init_cachep();
928 if (err)
929 goto free_compress;
930
905 err = extent_io_init(); 931 err = extent_io_init();
906 if (err) 932 if (err)
907 goto free_cachep; 933 goto free_cachep;
@@ -929,6 +955,8 @@ free_extent_io:
929 extent_io_exit(); 955 extent_io_exit();
930free_cachep: 956free_cachep:
931 btrfs_destroy_cachep(); 957 btrfs_destroy_cachep();
958free_compress:
959 btrfs_exit_compress();
932free_sysfs: 960free_sysfs:
933 btrfs_exit_sysfs(); 961 btrfs_exit_sysfs();
934 return err; 962 return err;
@@ -943,7 +971,7 @@ static void __exit exit_btrfs_fs(void)
943 unregister_filesystem(&btrfs_fs_type); 971 unregister_filesystem(&btrfs_fs_type);
944 btrfs_exit_sysfs(); 972 btrfs_exit_sysfs();
945 btrfs_cleanup_fs_uuids(); 973 btrfs_cleanup_fs_uuids();
946 btrfs_zlib_exit(); 974 btrfs_exit_compress();
947} 975}
948 976
949module_init(init_btrfs_fs) 977module_init(init_btrfs_fs)
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index b9cd5445f71c..f5ec2d44150d 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -32,15 +32,6 @@
32#include <linux/bio.h> 32#include <linux/bio.h>
33#include "compression.h" 33#include "compression.h"
34 34
35/* Plan: call deflate() with avail_in == *sourcelen,
36 avail_out = *dstlen - 12 and flush == Z_FINISH.
37 If it doesn't manage to finish, call it again with
38 avail_in == 0 and avail_out set to the remaining 12
39 bytes for it to clean up.
40 Q: Is 12 bytes sufficient?
41*/
42#define STREAM_END_SPACE 12
43
44struct workspace { 35struct workspace {
45 z_stream inf_strm; 36 z_stream inf_strm;
46 z_stream def_strm; 37 z_stream def_strm;
@@ -48,152 +39,51 @@ struct workspace {
48 struct list_head list; 39 struct list_head list;
49}; 40};
50 41
51static LIST_HEAD(idle_workspace); 42static void zlib_free_workspace(struct list_head *ws)
52static DEFINE_SPINLOCK(workspace_lock); 43{
53static unsigned long num_workspace; 44 struct workspace *workspace = list_entry(ws, struct workspace, list);
54static atomic_t alloc_workspace = ATOMIC_INIT(0);
55static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
56 45
57/* 46 vfree(workspace->def_strm.workspace);
58 * this finds an available zlib workspace or allocates a new one 47 vfree(workspace->inf_strm.workspace);
59 * NULL or an ERR_PTR is returned if things go bad. 48 kfree(workspace->buf);
60 */ 49 kfree(workspace);
61static struct workspace *find_zlib_workspace(void) 50}
51
52static struct list_head *zlib_alloc_workspace(void)
62{ 53{
63 struct workspace *workspace; 54 struct workspace *workspace;
64 int ret;
65 int cpus = num_online_cpus();
66
67again:
68 spin_lock(&workspace_lock);
69 if (!list_empty(&idle_workspace)) {
70 workspace = list_entry(idle_workspace.next, struct workspace,
71 list);
72 list_del(&workspace->list);
73 num_workspace--;
74 spin_unlock(&workspace_lock);
75 return workspace;
76 55
77 }
78 spin_unlock(&workspace_lock);
79 if (atomic_read(&alloc_workspace) > cpus) {
80 DEFINE_WAIT(wait);
81 prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
82 if (atomic_read(&alloc_workspace) > cpus)
83 schedule();
84 finish_wait(&workspace_wait, &wait);
85 goto again;
86 }
87 atomic_inc(&alloc_workspace);
88 workspace = kzalloc(sizeof(*workspace), GFP_NOFS); 56 workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
89 if (!workspace) { 57 if (!workspace)
90 ret = -ENOMEM; 58 return ERR_PTR(-ENOMEM);
91 goto fail;
92 }
93 59
94 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); 60 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
95 if (!workspace->def_strm.workspace) {
96 ret = -ENOMEM;
97 goto fail;
98 }
99 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); 61 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
100 if (!workspace->inf_strm.workspace) {
101 ret = -ENOMEM;
102 goto fail_inflate;
103 }
104 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); 62 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
105 if (!workspace->buf) { 63 if (!workspace->def_strm.workspace ||
106 ret = -ENOMEM; 64 !workspace->inf_strm.workspace || !workspace->buf)
107 goto fail_kmalloc; 65 goto fail;
108 }
109 return workspace;
110
111fail_kmalloc:
112 vfree(workspace->inf_strm.workspace);
113fail_inflate:
114 vfree(workspace->def_strm.workspace);
115fail:
116 kfree(workspace);
117 atomic_dec(&alloc_workspace);
118 wake_up(&workspace_wait);
119 return ERR_PTR(ret);
120}
121
122/*
123 * put a workspace struct back on the list or free it if we have enough
124 * idle ones sitting around
125 */
126static int free_workspace(struct workspace *workspace)
127{
128 spin_lock(&workspace_lock);
129 if (num_workspace < num_online_cpus()) {
130 list_add_tail(&workspace->list, &idle_workspace);
131 num_workspace++;
132 spin_unlock(&workspace_lock);
133 if (waitqueue_active(&workspace_wait))
134 wake_up(&workspace_wait);
135 return 0;
136 }
137 spin_unlock(&workspace_lock);
138 vfree(workspace->def_strm.workspace);
139 vfree(workspace->inf_strm.workspace);
140 kfree(workspace->buf);
141 kfree(workspace);
142 66
143 atomic_dec(&alloc_workspace); 67 INIT_LIST_HEAD(&workspace->list);
144 if (waitqueue_active(&workspace_wait))
145 wake_up(&workspace_wait);
146 return 0;
147}
148 68
149/* 69 return &workspace->list;
150 * cleanup function for module exit 70fail:
151 */ 71 zlib_free_workspace(&workspace->list);
152static void free_workspaces(void) 72 return ERR_PTR(-ENOMEM);
153{
154 struct workspace *workspace;
155 while (!list_empty(&idle_workspace)) {
156 workspace = list_entry(idle_workspace.next, struct workspace,
157 list);
158 list_del(&workspace->list);
159 vfree(workspace->def_strm.workspace);
160 vfree(workspace->inf_strm.workspace);
161 kfree(workspace->buf);
162 kfree(workspace);
163 atomic_dec(&alloc_workspace);
164 }
165} 73}
166 74
167/* 75static int zlib_compress_pages(struct list_head *ws,
168 * given an address space and start/len, compress the bytes. 76 struct address_space *mapping,
169 * 77 u64 start, unsigned long len,
170 * pages are allocated to hold the compressed result and stored 78 struct page **pages,
171 * in 'pages' 79 unsigned long nr_dest_pages,
172 * 80 unsigned long *out_pages,
173 * out_pages is used to return the number of pages allocated. There 81 unsigned long *total_in,
174 * may be pages allocated even if we return an error 82 unsigned long *total_out,
175 * 83 unsigned long max_out)
176 * total_in is used to return the number of bytes actually read. It
177 * may be smaller then len if we had to exit early because we
178 * ran out of room in the pages array or because we cross the
179 * max_out threshold.
180 *
181 * total_out is used to return the total number of compressed bytes
182 *
183 * max_out tells us the max number of bytes that we're allowed to
184 * stuff into pages
185 */
186int btrfs_zlib_compress_pages(struct address_space *mapping,
187 u64 start, unsigned long len,
188 struct page **pages,
189 unsigned long nr_dest_pages,
190 unsigned long *out_pages,
191 unsigned long *total_in,
192 unsigned long *total_out,
193 unsigned long max_out)
194{ 84{
85 struct workspace *workspace = list_entry(ws, struct workspace, list);
195 int ret; 86 int ret;
196 struct workspace *workspace;
197 char *data_in; 87 char *data_in;
198 char *cpage_out; 88 char *cpage_out;
199 int nr_pages = 0; 89 int nr_pages = 0;
@@ -205,10 +95,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
205 *total_out = 0; 95 *total_out = 0;
206 *total_in = 0; 96 *total_in = 0;
207 97
208 workspace = find_zlib_workspace();
209 if (IS_ERR(workspace))
210 return -1;
211
212 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { 98 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
213 printk(KERN_WARNING "deflateInit failed\n"); 99 printk(KERN_WARNING "deflateInit failed\n");
214 ret = -1; 100 ret = -1;
@@ -222,6 +108,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
222 data_in = kmap(in_page); 108 data_in = kmap(in_page);
223 109
224 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 110 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
111 if (out_page == NULL) {
112 ret = -1;
113 goto out;
114 }
225 cpage_out = kmap(out_page); 115 cpage_out = kmap(out_page);
226 pages[0] = out_page; 116 pages[0] = out_page;
227 nr_pages = 1; 117 nr_pages = 1;
@@ -260,6 +150,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
260 goto out; 150 goto out;
261 } 151 }
262 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 152 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
153 if (out_page == NULL) {
154 ret = -1;
155 goto out;
156 }
263 cpage_out = kmap(out_page); 157 cpage_out = kmap(out_page);
264 pages[nr_pages] = out_page; 158 pages[nr_pages] = out_page;
265 nr_pages++; 159 nr_pages++;
@@ -314,55 +208,26 @@ out:
314 kunmap(in_page); 208 kunmap(in_page);
315 page_cache_release(in_page); 209 page_cache_release(in_page);
316 } 210 }
317 free_workspace(workspace);
318 return ret; 211 return ret;
319} 212}
320 213
321/* 214static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
322 * pages_in is an array of pages with compressed data. 215 u64 disk_start,
323 * 216 struct bio_vec *bvec,
324 * disk_start is the starting logical offset of this array in the file 217 int vcnt,
325 * 218 size_t srclen)
326 * bvec is a bio_vec of pages from the file that we want to decompress into
327 *
328 * vcnt is the count of pages in the biovec
329 *
330 * srclen is the number of bytes in pages_in
331 *
332 * The basic idea is that we have a bio that was created by readpages.
333 * The pages in the bio are for the uncompressed data, and they may not
334 * be contiguous. They all correspond to the range of bytes covered by
335 * the compressed extent.
336 */
337int btrfs_zlib_decompress_biovec(struct page **pages_in,
338 u64 disk_start,
339 struct bio_vec *bvec,
340 int vcnt,
341 size_t srclen)
342{ 219{
343 int ret = 0; 220 struct workspace *workspace = list_entry(ws, struct workspace, list);
221 int ret = 0, ret2;
344 int wbits = MAX_WBITS; 222 int wbits = MAX_WBITS;
345 struct workspace *workspace;
346 char *data_in; 223 char *data_in;
347 size_t total_out = 0; 224 size_t total_out = 0;
348 unsigned long page_bytes_left;
349 unsigned long page_in_index = 0; 225 unsigned long page_in_index = 0;
350 unsigned long page_out_index = 0; 226 unsigned long page_out_index = 0;
351 struct page *page_out;
352 unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / 227 unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
353 PAGE_CACHE_SIZE; 228 PAGE_CACHE_SIZE;
354 unsigned long buf_start; 229 unsigned long buf_start;
355 unsigned long buf_offset;
356 unsigned long bytes;
357 unsigned long working_bytes;
358 unsigned long pg_offset; 230 unsigned long pg_offset;
359 unsigned long start_byte;
360 unsigned long current_buf_start;
361 char *kaddr;
362
363 workspace = find_zlib_workspace();
364 if (IS_ERR(workspace))
365 return -ENOMEM;
366 231
367 data_in = kmap(pages_in[page_in_index]); 232 data_in = kmap(pages_in[page_in_index]);
368 workspace->inf_strm.next_in = data_in; 233 workspace->inf_strm.next_in = data_in;
@@ -372,8 +237,6 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in,
372 workspace->inf_strm.total_out = 0; 237 workspace->inf_strm.total_out = 0;
373 workspace->inf_strm.next_out = workspace->buf; 238 workspace->inf_strm.next_out = workspace->buf;
374 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; 239 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
375 page_out = bvec[page_out_index].bv_page;
376 page_bytes_left = PAGE_CACHE_SIZE;
377 pg_offset = 0; 240 pg_offset = 0;
378 241
379 /* If it's deflate, and it's got no preset dictionary, then 242 /* If it's deflate, and it's got no preset dictionary, then
@@ -389,107 +252,29 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in,
389 252
390 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 253 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
391 printk(KERN_WARNING "inflateInit failed\n"); 254 printk(KERN_WARNING "inflateInit failed\n");
392 ret = -1; 255 return -1;
393 goto out;
394 } 256 }
395 while (workspace->inf_strm.total_in < srclen) { 257 while (workspace->inf_strm.total_in < srclen) {
396 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); 258 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
397 if (ret != Z_OK && ret != Z_STREAM_END) 259 if (ret != Z_OK && ret != Z_STREAM_END)
398 break; 260 break;
399 /*
400 * buf start is the byte offset we're of the start of
401 * our workspace buffer
402 */
403 buf_start = total_out;
404 261
405 /* total_out is the last byte of the workspace buffer */ 262 buf_start = total_out;
406 total_out = workspace->inf_strm.total_out; 263 total_out = workspace->inf_strm.total_out;
407 264
408 working_bytes = total_out - buf_start; 265 /* we didn't make progress in this inflate call, we're done */
409 266 if (buf_start == total_out)
410 /*
411 * start byte is the first byte of the page we're currently
412 * copying into relative to the start of the compressed data.
413 */
414 start_byte = page_offset(page_out) - disk_start;
415
416 if (working_bytes == 0) {
417 /* we didn't make progress in this inflate
418 * call, we're done
419 */
420 if (ret != Z_STREAM_END)
421 ret = -1;
422 break; 267 break;
423 }
424 268
425 /* we haven't yet hit data corresponding to this page */ 269 ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
426 if (total_out <= start_byte) 270 total_out, disk_start,
427 goto next; 271 bvec, vcnt,
428 272 &page_out_index, &pg_offset);
429 /* 273 if (ret2 == 0) {
430 * the start of the data we care about is offset into 274 ret = 0;
431 * the middle of our working buffer 275 goto done;
432 */
433 if (total_out > start_byte && buf_start < start_byte) {
434 buf_offset = start_byte - buf_start;
435 working_bytes -= buf_offset;
436 } else {
437 buf_offset = 0;
438 }
439 current_buf_start = buf_start;
440
441 /* copy bytes from the working buffer into the pages */
442 while (working_bytes > 0) {
443 bytes = min(PAGE_CACHE_SIZE - pg_offset,
444 PAGE_CACHE_SIZE - buf_offset);
445 bytes = min(bytes, working_bytes);
446 kaddr = kmap_atomic(page_out, KM_USER0);
447 memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
448 bytes);
449 kunmap_atomic(kaddr, KM_USER0);
450 flush_dcache_page(page_out);
451
452 pg_offset += bytes;
453 page_bytes_left -= bytes;
454 buf_offset += bytes;
455 working_bytes -= bytes;
456 current_buf_start += bytes;
457
458 /* check if we need to pick another page */
459 if (page_bytes_left == 0) {
460 page_out_index++;
461 if (page_out_index >= vcnt) {
462 ret = 0;
463 goto done;
464 }
465
466 page_out = bvec[page_out_index].bv_page;
467 pg_offset = 0;
468 page_bytes_left = PAGE_CACHE_SIZE;
469 start_byte = page_offset(page_out) - disk_start;
470
471 /*
472 * make sure our new page is covered by this
473 * working buffer
474 */
475 if (total_out <= start_byte)
476 goto next;
477
478 /* the next page in the biovec might not
479 * be adjacent to the last page, but it
480 * might still be found inside this working
481 * buffer. bump our offset pointer
482 */
483 if (total_out > start_byte &&
484 current_buf_start < start_byte) {
485 buf_offset = start_byte - buf_start;
486 working_bytes = total_out - start_byte;
487 current_buf_start = buf_start +
488 buf_offset;
489 }
490 }
491 } 276 }
492next: 277
493 workspace->inf_strm.next_out = workspace->buf; 278 workspace->inf_strm.next_out = workspace->buf;
494 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; 279 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
495 280
@@ -516,35 +301,21 @@ done:
516 zlib_inflateEnd(&workspace->inf_strm); 301 zlib_inflateEnd(&workspace->inf_strm);
517 if (data_in) 302 if (data_in)
518 kunmap(pages_in[page_in_index]); 303 kunmap(pages_in[page_in_index]);
519out:
520 free_workspace(workspace);
521 return ret; 304 return ret;
522} 305}
523 306
524/* 307static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
525 * a less complex decompression routine. Our compressed data fits in a 308 struct page *dest_page,
526 * single page, and we want to read a single page out of it. 309 unsigned long start_byte,
527 * start_byte tells us the offset into the compressed data we're interested in 310 size_t srclen, size_t destlen)
528 */
529int btrfs_zlib_decompress(unsigned char *data_in,
530 struct page *dest_page,
531 unsigned long start_byte,
532 size_t srclen, size_t destlen)
533{ 311{
312 struct workspace *workspace = list_entry(ws, struct workspace, list);
534 int ret = 0; 313 int ret = 0;
535 int wbits = MAX_WBITS; 314 int wbits = MAX_WBITS;
536 struct workspace *workspace;
537 unsigned long bytes_left = destlen; 315 unsigned long bytes_left = destlen;
538 unsigned long total_out = 0; 316 unsigned long total_out = 0;
539 char *kaddr; 317 char *kaddr;
540 318
541 if (destlen > PAGE_CACHE_SIZE)
542 return -ENOMEM;
543
544 workspace = find_zlib_workspace();
545 if (IS_ERR(workspace))
546 return -ENOMEM;
547
548 workspace->inf_strm.next_in = data_in; 319 workspace->inf_strm.next_in = data_in;
549 workspace->inf_strm.avail_in = srclen; 320 workspace->inf_strm.avail_in = srclen;
550 workspace->inf_strm.total_in = 0; 321 workspace->inf_strm.total_in = 0;
@@ -565,8 +336,7 @@ int btrfs_zlib_decompress(unsigned char *data_in,
565 336
566 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 337 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
567 printk(KERN_WARNING "inflateInit failed\n"); 338 printk(KERN_WARNING "inflateInit failed\n");
568 ret = -1; 339 return -1;
569 goto out;
570 } 340 }
571 341
572 while (bytes_left > 0) { 342 while (bytes_left > 0) {
@@ -616,12 +386,13 @@ next:
616 ret = 0; 386 ret = 0;
617 387
618 zlib_inflateEnd(&workspace->inf_strm); 388 zlib_inflateEnd(&workspace->inf_strm);
619out:
620 free_workspace(workspace);
621 return ret; 389 return ret;
622} 390}
623 391
624void btrfs_zlib_exit(void) 392struct btrfs_compress_op btrfs_zlib_compress = {
625{ 393 .alloc_workspace = zlib_alloc_workspace,
626 free_workspaces(); 394 .free_workspace = zlib_free_workspace,
627} 395 .compress_pages = zlib_compress_pages,
396 .decompress_biovec = zlib_decompress_biovec,
397 .decompress = zlib_decompress,
398};