diff options
Diffstat (limited to 'fs/btrfs/compression.c')
-rw-r--r-- | fs/btrfs/compression.c | 354 |
1 files changed, 347 insertions, 7 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b50bc4bd5c56..4d2110eafe29 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -62,6 +62,9 @@ struct compressed_bio { | |||
62 | /* number of bytes on disk */ | 62 | /* number of bytes on disk */ |
63 | unsigned long compressed_len; | 63 | unsigned long compressed_len; |
64 | 64 | ||
65 | /* the compression algorithm for this bio */ | ||
66 | int compress_type; | ||
67 | |||
65 | /* number of compressed pages in the array */ | 68 | /* number of compressed pages in the array */ |
66 | unsigned long nr_pages; | 69 | unsigned long nr_pages; |
67 | 70 | ||
@@ -173,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err) | |||
173 | /* ok, we're the last bio for this extent, lets start | 176 | /* ok, we're the last bio for this extent, lets start |
174 | * the decompression. | 177 | * the decompression. |
175 | */ | 178 | */ |
176 | ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, | 179 | ret = btrfs_decompress_biovec(cb->compress_type, |
177 | cb->start, | 180 | cb->compressed_pages, |
178 | cb->orig_bio->bi_io_vec, | 181 | cb->start, |
179 | cb->orig_bio->bi_vcnt, | 182 | cb->orig_bio->bi_io_vec, |
180 | cb->compressed_len); | 183 | cb->orig_bio->bi_vcnt, |
184 | cb->compressed_len); | ||
181 | csum_failed: | 185 | csum_failed: |
182 | if (ret) | 186 | if (ret) |
183 | cb->errors = 1; | 187 | cb->errors = 1; |
@@ -558,7 +562,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
558 | u64 em_len; | 562 | u64 em_len; |
559 | u64 em_start; | 563 | u64 em_start; |
560 | struct extent_map *em; | 564 | struct extent_map *em; |
561 | int ret; | 565 | int ret = -ENOMEM; |
562 | u32 *sums; | 566 | u32 *sums; |
563 | 567 | ||
564 | tree = &BTRFS_I(inode)->io_tree; | 568 | tree = &BTRFS_I(inode)->io_tree; |
@@ -573,6 +577,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
573 | 577 | ||
574 | compressed_len = em->block_len; | 578 | compressed_len = em->block_len; |
575 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 579 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
580 | if (!cb) | ||
581 | goto out; | ||
582 | |||
576 | atomic_set(&cb->pending_bios, 0); | 583 | atomic_set(&cb->pending_bios, 0); |
577 | cb->errors = 0; | 584 | cb->errors = 0; |
578 | cb->inode = inode; | 585 | cb->inode = inode; |
@@ -588,17 +595,23 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
588 | 595 | ||
589 | cb->len = uncompressed_len; | 596 | cb->len = uncompressed_len; |
590 | cb->compressed_len = compressed_len; | 597 | cb->compressed_len = compressed_len; |
598 | cb->compress_type = extent_compress_type(bio_flags); | ||
591 | cb->orig_bio = bio; | 599 | cb->orig_bio = bio; |
592 | 600 | ||
593 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / | 601 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / |
594 | PAGE_CACHE_SIZE; | 602 | PAGE_CACHE_SIZE; |
595 | cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages, | 603 | cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, |
596 | GFP_NOFS); | 604 | GFP_NOFS); |
605 | if (!cb->compressed_pages) | ||
606 | goto fail1; | ||
607 | |||
597 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 608 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
598 | 609 | ||
599 | for (page_index = 0; page_index < nr_pages; page_index++) { | 610 | for (page_index = 0; page_index < nr_pages; page_index++) { |
600 | cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | | 611 | cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | |
601 | __GFP_HIGHMEM); | 612 | __GFP_HIGHMEM); |
613 | if (!cb->compressed_pages[page_index]) | ||
614 | goto fail2; | ||
602 | } | 615 | } |
603 | cb->nr_pages = nr_pages; | 616 | cb->nr_pages = nr_pages; |
604 | 617 | ||
@@ -609,6 +622,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
609 | cb->len = uncompressed_len; | 622 | cb->len = uncompressed_len; |
610 | 623 | ||
611 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); | 624 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); |
625 | if (!comp_bio) | ||
626 | goto fail2; | ||
612 | comp_bio->bi_private = cb; | 627 | comp_bio->bi_private = cb; |
613 | comp_bio->bi_end_io = end_compressed_bio_read; | 628 | comp_bio->bi_end_io = end_compressed_bio_read; |
614 | atomic_inc(&cb->pending_bios); | 629 | atomic_inc(&cb->pending_bios); |
@@ -676,4 +691,329 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
676 | 691 | ||
677 | bio_put(comp_bio); | 692 | bio_put(comp_bio); |
678 | return 0; | 693 | return 0; |
694 | |||
695 | fail2: | ||
696 | for (page_index = 0; page_index < nr_pages; page_index++) | ||
697 | free_page((unsigned long)cb->compressed_pages[page_index]); | ||
698 | |||
699 | kfree(cb->compressed_pages); | ||
700 | fail1: | ||
701 | kfree(cb); | ||
702 | out: | ||
703 | free_extent_map(em); | ||
704 | return ret; | ||
705 | } | ||
706 | |||
707 | static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; | ||
708 | static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; | ||
709 | static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; | ||
710 | static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; | ||
711 | static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; | ||
712 | |||
713 | struct btrfs_compress_op *btrfs_compress_op[] = { | ||
714 | &btrfs_zlib_compress, | ||
715 | &btrfs_lzo_compress, | ||
716 | }; | ||
717 | |||
718 | int __init btrfs_init_compress(void) | ||
719 | { | ||
720 | int i; | ||
721 | |||
722 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
723 | INIT_LIST_HEAD(&comp_idle_workspace[i]); | ||
724 | spin_lock_init(&comp_workspace_lock[i]); | ||
725 | atomic_set(&comp_alloc_workspace[i], 0); | ||
726 | init_waitqueue_head(&comp_workspace_wait[i]); | ||
727 | } | ||
728 | return 0; | ||
729 | } | ||
730 | |||
731 | /* | ||
732 | * this finds an available workspace or allocates a new one | ||
733 | * ERR_PTR is returned if things go bad. | ||
734 | */ | ||
735 | static struct list_head *find_workspace(int type) | ||
736 | { | ||
737 | struct list_head *workspace; | ||
738 | int cpus = num_online_cpus(); | ||
739 | int idx = type - 1; | ||
740 | |||
741 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
742 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
743 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
744 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
745 | int *num_workspace = &comp_num_workspace[idx]; | ||
746 | again: | ||
747 | spin_lock(workspace_lock); | ||
748 | if (!list_empty(idle_workspace)) { | ||
749 | workspace = idle_workspace->next; | ||
750 | list_del(workspace); | ||
751 | (*num_workspace)--; | ||
752 | spin_unlock(workspace_lock); | ||
753 | return workspace; | ||
754 | |||
755 | } | ||
756 | if (atomic_read(alloc_workspace) > cpus) { | ||
757 | DEFINE_WAIT(wait); | ||
758 | |||
759 | spin_unlock(workspace_lock); | ||
760 | prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
761 | if (atomic_read(alloc_workspace) > cpus && !*num_workspace) | ||
762 | schedule(); | ||
763 | finish_wait(workspace_wait, &wait); | ||
764 | goto again; | ||
765 | } | ||
766 | atomic_inc(alloc_workspace); | ||
767 | spin_unlock(workspace_lock); | ||
768 | |||
769 | workspace = btrfs_compress_op[idx]->alloc_workspace(); | ||
770 | if (IS_ERR(workspace)) { | ||
771 | atomic_dec(alloc_workspace); | ||
772 | wake_up(workspace_wait); | ||
773 | } | ||
774 | return workspace; | ||
775 | } | ||
776 | |||
777 | /* | ||
778 | * put a workspace struct back on the list or free it if we have enough | ||
779 | * idle ones sitting around | ||
780 | */ | ||
781 | static void free_workspace(int type, struct list_head *workspace) | ||
782 | { | ||
783 | int idx = type - 1; | ||
784 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
785 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
786 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
787 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
788 | int *num_workspace = &comp_num_workspace[idx]; | ||
789 | |||
790 | spin_lock(workspace_lock); | ||
791 | if (*num_workspace < num_online_cpus()) { | ||
792 | list_add_tail(workspace, idle_workspace); | ||
793 | (*num_workspace)++; | ||
794 | spin_unlock(workspace_lock); | ||
795 | goto wake; | ||
796 | } | ||
797 | spin_unlock(workspace_lock); | ||
798 | |||
799 | btrfs_compress_op[idx]->free_workspace(workspace); | ||
800 | atomic_dec(alloc_workspace); | ||
801 | wake: | ||
802 | if (waitqueue_active(workspace_wait)) | ||
803 | wake_up(workspace_wait); | ||
804 | } | ||
805 | |||
806 | /* | ||
807 | * cleanup function for module exit | ||
808 | */ | ||
809 | static void free_workspaces(void) | ||
810 | { | ||
811 | struct list_head *workspace; | ||
812 | int i; | ||
813 | |||
814 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
815 | while (!list_empty(&comp_idle_workspace[i])) { | ||
816 | workspace = comp_idle_workspace[i].next; | ||
817 | list_del(workspace); | ||
818 | btrfs_compress_op[i]->free_workspace(workspace); | ||
819 | atomic_dec(&comp_alloc_workspace[i]); | ||
820 | } | ||
821 | } | ||
822 | } | ||
823 | |||
824 | /* | ||
825 | * given an address space and start/len, compress the bytes. | ||
826 | * | ||
827 | * pages are allocated to hold the compressed result and stored | ||
828 | * in 'pages' | ||
829 | * | ||
830 | * out_pages is used to return the number of pages allocated. There | ||
831 | * may be pages allocated even if we return an error | ||
832 | * | ||
833 | * total_in is used to return the number of bytes actually read. It | ||
834 | * may be smaller then len if we had to exit early because we | ||
835 | * ran out of room in the pages array or because we cross the | ||
836 | * max_out threshold. | ||
837 | * | ||
838 | * total_out is used to return the total number of compressed bytes | ||
839 | * | ||
840 | * max_out tells us the max number of bytes that we're allowed to | ||
841 | * stuff into pages | ||
842 | */ | ||
843 | int btrfs_compress_pages(int type, struct address_space *mapping, | ||
844 | u64 start, unsigned long len, | ||
845 | struct page **pages, | ||
846 | unsigned long nr_dest_pages, | ||
847 | unsigned long *out_pages, | ||
848 | unsigned long *total_in, | ||
849 | unsigned long *total_out, | ||
850 | unsigned long max_out) | ||
851 | { | ||
852 | struct list_head *workspace; | ||
853 | int ret; | ||
854 | |||
855 | workspace = find_workspace(type); | ||
856 | if (IS_ERR(workspace)) | ||
857 | return -1; | ||
858 | |||
859 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, | ||
860 | start, len, pages, | ||
861 | nr_dest_pages, out_pages, | ||
862 | total_in, total_out, | ||
863 | max_out); | ||
864 | free_workspace(type, workspace); | ||
865 | return ret; | ||
866 | } | ||
867 | |||
868 | /* | ||
869 | * pages_in is an array of pages with compressed data. | ||
870 | * | ||
871 | * disk_start is the starting logical offset of this array in the file | ||
872 | * | ||
873 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
874 | * | ||
875 | * vcnt is the count of pages in the biovec | ||
876 | * | ||
877 | * srclen is the number of bytes in pages_in | ||
878 | * | ||
879 | * The basic idea is that we have a bio that was created by readpages. | ||
880 | * The pages in the bio are for the uncompressed data, and they may not | ||
881 | * be contiguous. They all correspond to the range of bytes covered by | ||
882 | * the compressed extent. | ||
883 | */ | ||
884 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, | ||
885 | struct bio_vec *bvec, int vcnt, size_t srclen) | ||
886 | { | ||
887 | struct list_head *workspace; | ||
888 | int ret; | ||
889 | |||
890 | workspace = find_workspace(type); | ||
891 | if (IS_ERR(workspace)) | ||
892 | return -ENOMEM; | ||
893 | |||
894 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, | ||
895 | disk_start, | ||
896 | bvec, vcnt, srclen); | ||
897 | free_workspace(type, workspace); | ||
898 | return ret; | ||
899 | } | ||
900 | |||
901 | /* | ||
902 | * a less complex decompression routine. Our compressed data fits in a | ||
903 | * single page, and we want to read a single page out of it. | ||
904 | * start_byte tells us the offset into the compressed data we're interested in | ||
905 | */ | ||
906 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | ||
907 | unsigned long start_byte, size_t srclen, size_t destlen) | ||
908 | { | ||
909 | struct list_head *workspace; | ||
910 | int ret; | ||
911 | |||
912 | workspace = find_workspace(type); | ||
913 | if (IS_ERR(workspace)) | ||
914 | return -ENOMEM; | ||
915 | |||
916 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, | ||
917 | dest_page, start_byte, | ||
918 | srclen, destlen); | ||
919 | |||
920 | free_workspace(type, workspace); | ||
921 | return ret; | ||
922 | } | ||
923 | |||
924 | void btrfs_exit_compress(void) | ||
925 | { | ||
926 | free_workspaces(); | ||
927 | } | ||
928 | |||
929 | /* | ||
930 | * Copy uncompressed data from working buffer to pages. | ||
931 | * | ||
932 | * buf_start is the byte offset we're of the start of our workspace buffer. | ||
933 | * | ||
934 | * total_out is the last byte of the buffer | ||
935 | */ | ||
936 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, | ||
937 | unsigned long total_out, u64 disk_start, | ||
938 | struct bio_vec *bvec, int vcnt, | ||
939 | unsigned long *page_index, | ||
940 | unsigned long *pg_offset) | ||
941 | { | ||
942 | unsigned long buf_offset; | ||
943 | unsigned long current_buf_start; | ||
944 | unsigned long start_byte; | ||
945 | unsigned long working_bytes = total_out - buf_start; | ||
946 | unsigned long bytes; | ||
947 | char *kaddr; | ||
948 | struct page *page_out = bvec[*page_index].bv_page; | ||
949 | |||
950 | /* | ||
951 | * start byte is the first byte of the page we're currently | ||
952 | * copying into relative to the start of the compressed data. | ||
953 | */ | ||
954 | start_byte = page_offset(page_out) - disk_start; | ||
955 | |||
956 | /* we haven't yet hit data corresponding to this page */ | ||
957 | if (total_out <= start_byte) | ||
958 | return 1; | ||
959 | |||
960 | /* | ||
961 | * the start of the data we care about is offset into | ||
962 | * the middle of our working buffer | ||
963 | */ | ||
964 | if (total_out > start_byte && buf_start < start_byte) { | ||
965 | buf_offset = start_byte - buf_start; | ||
966 | working_bytes -= buf_offset; | ||
967 | } else { | ||
968 | buf_offset = 0; | ||
969 | } | ||
970 | current_buf_start = buf_start; | ||
971 | |||
972 | /* copy bytes from the working buffer into the pages */ | ||
973 | while (working_bytes > 0) { | ||
974 | bytes = min(PAGE_CACHE_SIZE - *pg_offset, | ||
975 | PAGE_CACHE_SIZE - buf_offset); | ||
976 | bytes = min(bytes, working_bytes); | ||
977 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
978 | memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); | ||
979 | kunmap_atomic(kaddr, KM_USER0); | ||
980 | flush_dcache_page(page_out); | ||
981 | |||
982 | *pg_offset += bytes; | ||
983 | buf_offset += bytes; | ||
984 | working_bytes -= bytes; | ||
985 | current_buf_start += bytes; | ||
986 | |||
987 | /* check if we need to pick another page */ | ||
988 | if (*pg_offset == PAGE_CACHE_SIZE) { | ||
989 | (*page_index)++; | ||
990 | if (*page_index >= vcnt) | ||
991 | return 0; | ||
992 | |||
993 | page_out = bvec[*page_index].bv_page; | ||
994 | *pg_offset = 0; | ||
995 | start_byte = page_offset(page_out) - disk_start; | ||
996 | |||
997 | /* | ||
998 | * make sure our new page is covered by this | ||
999 | * working buffer | ||
1000 | */ | ||
1001 | if (total_out <= start_byte) | ||
1002 | return 1; | ||
1003 | |||
1004 | /* | ||
1005 | * the next page in the biovec might not be adjacent | ||
1006 | * to the last page, but it might still be found | ||
1007 | * inside this working buffer. bump our offset pointer | ||
1008 | */ | ||
1009 | if (total_out > start_byte && | ||
1010 | current_buf_start < start_byte) { | ||
1011 | buf_offset = start_byte - buf_start; | ||
1012 | working_bytes = total_out - start_byte; | ||
1013 | current_buf_start = buf_start + buf_offset; | ||
1014 | } | ||
1015 | } | ||
1016 | } | ||
1017 | |||
1018 | return 1; | ||
679 | } | 1019 | } |