diff options
Diffstat (limited to 'fs/btrfs/compression.c')
-rw-r--r-- | fs/btrfs/compression.c | 371 |
1 files changed, 360 insertions, 11 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b50bc4bd5c56..41d1d7c70e29 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -62,6 +62,9 @@ struct compressed_bio { | |||
62 | /* number of bytes on disk */ | 62 | /* number of bytes on disk */ |
63 | unsigned long compressed_len; | 63 | unsigned long compressed_len; |
64 | 64 | ||
65 | /* the compression algorithm for this bio */ | ||
66 | int compress_type; | ||
67 | |||
65 | /* number of compressed pages in the array */ | 68 | /* number of compressed pages in the array */ |
66 | unsigned long nr_pages; | 69 | unsigned long nr_pages; |
67 | 70 | ||
@@ -173,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err) | |||
173 | /* ok, we're the last bio for this extent, lets start | 176 | /* ok, we're the last bio for this extent, lets start |
174 | * the decompression. | 177 | * the decompression. |
175 | */ | 178 | */ |
176 | ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, | 179 | ret = btrfs_decompress_biovec(cb->compress_type, |
177 | cb->start, | 180 | cb->compressed_pages, |
178 | cb->orig_bio->bi_io_vec, | 181 | cb->start, |
179 | cb->orig_bio->bi_vcnt, | 182 | cb->orig_bio->bi_io_vec, |
180 | cb->compressed_len); | 183 | cb->orig_bio->bi_vcnt, |
184 | cb->compressed_len); | ||
181 | csum_failed: | 185 | csum_failed: |
182 | if (ret) | 186 | if (ret) |
183 | cb->errors = 1; | 187 | cb->errors = 1; |
@@ -336,6 +340,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
336 | 340 | ||
337 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); | 341 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); |
338 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 342 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
343 | if (!cb) | ||
344 | return -ENOMEM; | ||
339 | atomic_set(&cb->pending_bios, 0); | 345 | atomic_set(&cb->pending_bios, 0); |
340 | cb->errors = 0; | 346 | cb->errors = 0; |
341 | cb->inode = inode; | 347 | cb->inode = inode; |
@@ -350,6 +356,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
350 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 356 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
351 | 357 | ||
352 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); | 358 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); |
359 | if(!bio) { | ||
360 | kfree(cb); | ||
361 | return -ENOMEM; | ||
362 | } | ||
353 | bio->bi_private = cb; | 363 | bio->bi_private = cb; |
354 | bio->bi_end_io = end_compressed_bio_write; | 364 | bio->bi_end_io = end_compressed_bio_write; |
355 | atomic_inc(&cb->pending_bios); | 365 | atomic_inc(&cb->pending_bios); |
@@ -558,7 +568,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
558 | u64 em_len; | 568 | u64 em_len; |
559 | u64 em_start; | 569 | u64 em_start; |
560 | struct extent_map *em; | 570 | struct extent_map *em; |
561 | int ret; | 571 | int ret = -ENOMEM; |
562 | u32 *sums; | 572 | u32 *sums; |
563 | 573 | ||
564 | tree = &BTRFS_I(inode)->io_tree; | 574 | tree = &BTRFS_I(inode)->io_tree; |
@@ -573,6 +583,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
573 | 583 | ||
574 | compressed_len = em->block_len; | 584 | compressed_len = em->block_len; |
575 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 585 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
586 | if (!cb) | ||
587 | goto out; | ||
588 | |||
576 | atomic_set(&cb->pending_bios, 0); | 589 | atomic_set(&cb->pending_bios, 0); |
577 | cb->errors = 0; | 590 | cb->errors = 0; |
578 | cb->inode = inode; | 591 | cb->inode = inode; |
@@ -588,17 +601,23 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
588 | 601 | ||
589 | cb->len = uncompressed_len; | 602 | cb->len = uncompressed_len; |
590 | cb->compressed_len = compressed_len; | 603 | cb->compressed_len = compressed_len; |
604 | cb->compress_type = extent_compress_type(bio_flags); | ||
591 | cb->orig_bio = bio; | 605 | cb->orig_bio = bio; |
592 | 606 | ||
593 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / | 607 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / |
594 | PAGE_CACHE_SIZE; | 608 | PAGE_CACHE_SIZE; |
595 | cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages, | 609 | cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, |
596 | GFP_NOFS); | 610 | GFP_NOFS); |
611 | if (!cb->compressed_pages) | ||
612 | goto fail1; | ||
613 | |||
597 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 614 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
598 | 615 | ||
599 | for (page_index = 0; page_index < nr_pages; page_index++) { | 616 | for (page_index = 0; page_index < nr_pages; page_index++) { |
600 | cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | | 617 | cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | |
601 | __GFP_HIGHMEM); | 618 | __GFP_HIGHMEM); |
619 | if (!cb->compressed_pages[page_index]) | ||
620 | goto fail2; | ||
602 | } | 621 | } |
603 | cb->nr_pages = nr_pages; | 622 | cb->nr_pages = nr_pages; |
604 | 623 | ||
@@ -609,6 +628,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
609 | cb->len = uncompressed_len; | 628 | cb->len = uncompressed_len; |
610 | 629 | ||
611 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); | 630 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); |
631 | if (!comp_bio) | ||
632 | goto fail2; | ||
612 | comp_bio->bi_private = cb; | 633 | comp_bio->bi_private = cb; |
613 | comp_bio->bi_end_io = end_compressed_bio_read; | 634 | comp_bio->bi_end_io = end_compressed_bio_read; |
614 | atomic_inc(&cb->pending_bios); | 635 | atomic_inc(&cb->pending_bios); |
@@ -642,8 +663,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
642 | atomic_inc(&cb->pending_bios); | 663 | atomic_inc(&cb->pending_bios); |
643 | 664 | ||
644 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | 665 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
645 | btrfs_lookup_bio_sums(root, inode, comp_bio, | 666 | ret = btrfs_lookup_bio_sums(root, inode, |
646 | sums); | 667 | comp_bio, sums); |
668 | BUG_ON(ret); | ||
647 | } | 669 | } |
648 | sums += (comp_bio->bi_size + root->sectorsize - 1) / | 670 | sums += (comp_bio->bi_size + root->sectorsize - 1) / |
649 | root->sectorsize; | 671 | root->sectorsize; |
@@ -668,12 +690,339 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
668 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | 690 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); |
669 | BUG_ON(ret); | 691 | BUG_ON(ret); |
670 | 692 | ||
671 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) | 693 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
672 | btrfs_lookup_bio_sums(root, inode, comp_bio, sums); | 694 | ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums); |
695 | BUG_ON(ret); | ||
696 | } | ||
673 | 697 | ||
674 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); | 698 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); |
675 | BUG_ON(ret); | 699 | BUG_ON(ret); |
676 | 700 | ||
677 | bio_put(comp_bio); | 701 | bio_put(comp_bio); |
678 | return 0; | 702 | return 0; |
703 | |||
704 | fail2: | ||
705 | for (page_index = 0; page_index < nr_pages; page_index++) | ||
706 | free_page((unsigned long)cb->compressed_pages[page_index]); | ||
707 | |||
708 | kfree(cb->compressed_pages); | ||
709 | fail1: | ||
710 | kfree(cb); | ||
711 | out: | ||
712 | free_extent_map(em); | ||
713 | return ret; | ||
714 | } | ||
715 | |||
716 | static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; | ||
717 | static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; | ||
718 | static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; | ||
719 | static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; | ||
720 | static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; | ||
721 | |||
722 | struct btrfs_compress_op *btrfs_compress_op[] = { | ||
723 | &btrfs_zlib_compress, | ||
724 | &btrfs_lzo_compress, | ||
725 | }; | ||
726 | |||
727 | int __init btrfs_init_compress(void) | ||
728 | { | ||
729 | int i; | ||
730 | |||
731 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
732 | INIT_LIST_HEAD(&comp_idle_workspace[i]); | ||
733 | spin_lock_init(&comp_workspace_lock[i]); | ||
734 | atomic_set(&comp_alloc_workspace[i], 0); | ||
735 | init_waitqueue_head(&comp_workspace_wait[i]); | ||
736 | } | ||
737 | return 0; | ||
738 | } | ||
739 | |||
740 | /* | ||
741 | * this finds an available workspace or allocates a new one | ||
742 | * ERR_PTR is returned if things go bad. | ||
743 | */ | ||
744 | static struct list_head *find_workspace(int type) | ||
745 | { | ||
746 | struct list_head *workspace; | ||
747 | int cpus = num_online_cpus(); | ||
748 | int idx = type - 1; | ||
749 | |||
750 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
751 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
752 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
753 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
754 | int *num_workspace = &comp_num_workspace[idx]; | ||
755 | again: | ||
756 | spin_lock(workspace_lock); | ||
757 | if (!list_empty(idle_workspace)) { | ||
758 | workspace = idle_workspace->next; | ||
759 | list_del(workspace); | ||
760 | (*num_workspace)--; | ||
761 | spin_unlock(workspace_lock); | ||
762 | return workspace; | ||
763 | |||
764 | } | ||
765 | if (atomic_read(alloc_workspace) > cpus) { | ||
766 | DEFINE_WAIT(wait); | ||
767 | |||
768 | spin_unlock(workspace_lock); | ||
769 | prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
770 | if (atomic_read(alloc_workspace) > cpus && !*num_workspace) | ||
771 | schedule(); | ||
772 | finish_wait(workspace_wait, &wait); | ||
773 | goto again; | ||
774 | } | ||
775 | atomic_inc(alloc_workspace); | ||
776 | spin_unlock(workspace_lock); | ||
777 | |||
778 | workspace = btrfs_compress_op[idx]->alloc_workspace(); | ||
779 | if (IS_ERR(workspace)) { | ||
780 | atomic_dec(alloc_workspace); | ||
781 | wake_up(workspace_wait); | ||
782 | } | ||
783 | return workspace; | ||
784 | } | ||
785 | |||
786 | /* | ||
787 | * put a workspace struct back on the list or free it if we have enough | ||
788 | * idle ones sitting around | ||
789 | */ | ||
790 | static void free_workspace(int type, struct list_head *workspace) | ||
791 | { | ||
792 | int idx = type - 1; | ||
793 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
794 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
795 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
796 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
797 | int *num_workspace = &comp_num_workspace[idx]; | ||
798 | |||
799 | spin_lock(workspace_lock); | ||
800 | if (*num_workspace < num_online_cpus()) { | ||
801 | list_add_tail(workspace, idle_workspace); | ||
802 | (*num_workspace)++; | ||
803 | spin_unlock(workspace_lock); | ||
804 | goto wake; | ||
805 | } | ||
806 | spin_unlock(workspace_lock); | ||
807 | |||
808 | btrfs_compress_op[idx]->free_workspace(workspace); | ||
809 | atomic_dec(alloc_workspace); | ||
810 | wake: | ||
811 | if (waitqueue_active(workspace_wait)) | ||
812 | wake_up(workspace_wait); | ||
813 | } | ||
814 | |||
815 | /* | ||
816 | * cleanup function for module exit | ||
817 | */ | ||
818 | static void free_workspaces(void) | ||
819 | { | ||
820 | struct list_head *workspace; | ||
821 | int i; | ||
822 | |||
823 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
824 | while (!list_empty(&comp_idle_workspace[i])) { | ||
825 | workspace = comp_idle_workspace[i].next; | ||
826 | list_del(workspace); | ||
827 | btrfs_compress_op[i]->free_workspace(workspace); | ||
828 | atomic_dec(&comp_alloc_workspace[i]); | ||
829 | } | ||
830 | } | ||
831 | } | ||
832 | |||
833 | /* | ||
834 | * given an address space and start/len, compress the bytes. | ||
835 | * | ||
836 | * pages are allocated to hold the compressed result and stored | ||
837 | * in 'pages' | ||
838 | * | ||
839 | * out_pages is used to return the number of pages allocated. There | ||
840 | * may be pages allocated even if we return an error | ||
841 | * | ||
842 | * total_in is used to return the number of bytes actually read. It | ||
843 | * may be smaller then len if we had to exit early because we | ||
844 | * ran out of room in the pages array or because we cross the | ||
845 | * max_out threshold. | ||
846 | * | ||
847 | * total_out is used to return the total number of compressed bytes | ||
848 | * | ||
849 | * max_out tells us the max number of bytes that we're allowed to | ||
850 | * stuff into pages | ||
851 | */ | ||
852 | int btrfs_compress_pages(int type, struct address_space *mapping, | ||
853 | u64 start, unsigned long len, | ||
854 | struct page **pages, | ||
855 | unsigned long nr_dest_pages, | ||
856 | unsigned long *out_pages, | ||
857 | unsigned long *total_in, | ||
858 | unsigned long *total_out, | ||
859 | unsigned long max_out) | ||
860 | { | ||
861 | struct list_head *workspace; | ||
862 | int ret; | ||
863 | |||
864 | workspace = find_workspace(type); | ||
865 | if (IS_ERR(workspace)) | ||
866 | return -1; | ||
867 | |||
868 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, | ||
869 | start, len, pages, | ||
870 | nr_dest_pages, out_pages, | ||
871 | total_in, total_out, | ||
872 | max_out); | ||
873 | free_workspace(type, workspace); | ||
874 | return ret; | ||
875 | } | ||
876 | |||
877 | /* | ||
878 | * pages_in is an array of pages with compressed data. | ||
879 | * | ||
880 | * disk_start is the starting logical offset of this array in the file | ||
881 | * | ||
882 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
883 | * | ||
884 | * vcnt is the count of pages in the biovec | ||
885 | * | ||
886 | * srclen is the number of bytes in pages_in | ||
887 | * | ||
888 | * The basic idea is that we have a bio that was created by readpages. | ||
889 | * The pages in the bio are for the uncompressed data, and they may not | ||
890 | * be contiguous. They all correspond to the range of bytes covered by | ||
891 | * the compressed extent. | ||
892 | */ | ||
893 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, | ||
894 | struct bio_vec *bvec, int vcnt, size_t srclen) | ||
895 | { | ||
896 | struct list_head *workspace; | ||
897 | int ret; | ||
898 | |||
899 | workspace = find_workspace(type); | ||
900 | if (IS_ERR(workspace)) | ||
901 | return -ENOMEM; | ||
902 | |||
903 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, | ||
904 | disk_start, | ||
905 | bvec, vcnt, srclen); | ||
906 | free_workspace(type, workspace); | ||
907 | return ret; | ||
908 | } | ||
909 | |||
910 | /* | ||
911 | * a less complex decompression routine. Our compressed data fits in a | ||
912 | * single page, and we want to read a single page out of it. | ||
913 | * start_byte tells us the offset into the compressed data we're interested in | ||
914 | */ | ||
915 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | ||
916 | unsigned long start_byte, size_t srclen, size_t destlen) | ||
917 | { | ||
918 | struct list_head *workspace; | ||
919 | int ret; | ||
920 | |||
921 | workspace = find_workspace(type); | ||
922 | if (IS_ERR(workspace)) | ||
923 | return -ENOMEM; | ||
924 | |||
925 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, | ||
926 | dest_page, start_byte, | ||
927 | srclen, destlen); | ||
928 | |||
929 | free_workspace(type, workspace); | ||
930 | return ret; | ||
931 | } | ||
932 | |||
933 | void btrfs_exit_compress(void) | ||
934 | { | ||
935 | free_workspaces(); | ||
936 | } | ||
937 | |||
938 | /* | ||
939 | * Copy uncompressed data from working buffer to pages. | ||
940 | * | ||
941 | * buf_start is the byte offset we're of the start of our workspace buffer. | ||
942 | * | ||
943 | * total_out is the last byte of the buffer | ||
944 | */ | ||
945 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, | ||
946 | unsigned long total_out, u64 disk_start, | ||
947 | struct bio_vec *bvec, int vcnt, | ||
948 | unsigned long *page_index, | ||
949 | unsigned long *pg_offset) | ||
950 | { | ||
951 | unsigned long buf_offset; | ||
952 | unsigned long current_buf_start; | ||
953 | unsigned long start_byte; | ||
954 | unsigned long working_bytes = total_out - buf_start; | ||
955 | unsigned long bytes; | ||
956 | char *kaddr; | ||
957 | struct page *page_out = bvec[*page_index].bv_page; | ||
958 | |||
959 | /* | ||
960 | * start byte is the first byte of the page we're currently | ||
961 | * copying into relative to the start of the compressed data. | ||
962 | */ | ||
963 | start_byte = page_offset(page_out) - disk_start; | ||
964 | |||
965 | /* we haven't yet hit data corresponding to this page */ | ||
966 | if (total_out <= start_byte) | ||
967 | return 1; | ||
968 | |||
969 | /* | ||
970 | * the start of the data we care about is offset into | ||
971 | * the middle of our working buffer | ||
972 | */ | ||
973 | if (total_out > start_byte && buf_start < start_byte) { | ||
974 | buf_offset = start_byte - buf_start; | ||
975 | working_bytes -= buf_offset; | ||
976 | } else { | ||
977 | buf_offset = 0; | ||
978 | } | ||
979 | current_buf_start = buf_start; | ||
980 | |||
981 | /* copy bytes from the working buffer into the pages */ | ||
982 | while (working_bytes > 0) { | ||
983 | bytes = min(PAGE_CACHE_SIZE - *pg_offset, | ||
984 | PAGE_CACHE_SIZE - buf_offset); | ||
985 | bytes = min(bytes, working_bytes); | ||
986 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
987 | memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); | ||
988 | kunmap_atomic(kaddr, KM_USER0); | ||
989 | flush_dcache_page(page_out); | ||
990 | |||
991 | *pg_offset += bytes; | ||
992 | buf_offset += bytes; | ||
993 | working_bytes -= bytes; | ||
994 | current_buf_start += bytes; | ||
995 | |||
996 | /* check if we need to pick another page */ | ||
997 | if (*pg_offset == PAGE_CACHE_SIZE) { | ||
998 | (*page_index)++; | ||
999 | if (*page_index >= vcnt) | ||
1000 | return 0; | ||
1001 | |||
1002 | page_out = bvec[*page_index].bv_page; | ||
1003 | *pg_offset = 0; | ||
1004 | start_byte = page_offset(page_out) - disk_start; | ||
1005 | |||
1006 | /* | ||
1007 | * make sure our new page is covered by this | ||
1008 | * working buffer | ||
1009 | */ | ||
1010 | if (total_out <= start_byte) | ||
1011 | return 1; | ||
1012 | |||
1013 | /* | ||
1014 | * the next page in the biovec might not be adjacent | ||
1015 | * to the last page, but it might still be found | ||
1016 | * inside this working buffer. bump our offset pointer | ||
1017 | */ | ||
1018 | if (total_out > start_byte && | ||
1019 | current_buf_start < start_byte) { | ||
1020 | buf_offset = start_byte - buf_start; | ||
1021 | working_bytes = total_out - start_byte; | ||
1022 | current_buf_start = buf_start + buf_offset; | ||
1023 | } | ||
1024 | } | ||
1025 | } | ||
1026 | |||
1027 | return 1; | ||
679 | } | 1028 | } |