diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-17 17:43:43 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-17 17:43:43 -0500 |
commit | eee2a817df7c5a6e569f353f8be78cc1b3604bb6 (patch) | |
tree | f721beb4712c732702d3383d3c6a52da8b5bbb20 /fs/btrfs | |
parent | 83896fb5e51594281720d145164f866ba769abd5 (diff) | |
parent | acce952b0263825da32cf10489413dec78053347 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (25 commits)
Btrfs: forced readonly mounts on errors
btrfs: Require CAP_SYS_ADMIN for filesystem rebalance
Btrfs: don't warn if we get ENOSPC in btrfs_block_rsv_check
btrfs: Fix memory leak in btrfs_read_fs_root_no_radix()
btrfs: check NULL or not
btrfs: Don't pass NULL ptr to func that may deref it.
btrfs: mount failure return value fix
btrfs: Mem leak in btrfs_get_acl()
btrfs: fix wrong free space information of btrfs
btrfs: make the chunk allocator utilize the devices better
btrfs: restructure find_free_dev_extent()
btrfs: fix wrong calculation of stripe size
btrfs: try to reclaim some space when chunk allocation fails
btrfs: fix wrong data space statistics
fs/btrfs: Fix build of ctree
Btrfs: fix off by one while setting block groups readonly
Btrfs: Add BTRFS_IOC_SUBVOL_GETFLAGS/SETFLAGS ioctls
Btrfs: Add readonly snapshots support
Btrfs: Refactor btrfs_ioctl_snap_create()
btrfs: Extract duplicate decompress code
...
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/Kconfig | 2 | ||||
-rw-r--r-- | fs/btrfs/Makefile | 2 | ||||
-rw-r--r-- | fs/btrfs/acl.c | 4 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 2 | ||||
-rw-r--r-- | fs/btrfs/compression.c | 329 | ||||
-rw-r--r-- | fs/btrfs/compression.h | 72 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 8 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 48 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 412 | ||||
-rw-r--r-- | fs/btrfs/disk-io.h | 1 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 90 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 7 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 17 | ||||
-rw-r--r-- | fs/btrfs/extent_map.c | 2 | ||||
-rw-r--r-- | fs/btrfs/extent_map.h | 3 | ||||
-rw-r--r-- | fs/btrfs/file.c | 13 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 90 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 220 | ||||
-rw-r--r-- | fs/btrfs/ioctl.h | 12 | ||||
-rw-r--r-- | fs/btrfs/lzo.c | 420 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 18 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 8 | ||||
-rw-r--r-- | fs/btrfs/super.c | 281 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 11 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 1 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 626 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 27 | ||||
-rw-r--r-- | fs/btrfs/xattr.c | 18 | ||||
-rw-r--r-- | fs/btrfs/zlib.c | 369 |
29 files changed, 2490 insertions, 623 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 7bb3c020e570..ecb9fd3be143 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig | |||
@@ -4,6 +4,8 @@ config BTRFS_FS | |||
4 | select LIBCRC32C | 4 | select LIBCRC32C |
5 | select ZLIB_INFLATE | 5 | select ZLIB_INFLATE |
6 | select ZLIB_DEFLATE | 6 | select ZLIB_DEFLATE |
7 | select LZO_COMPRESS | ||
8 | select LZO_DECOMPRESS | ||
7 | help | 9 | help |
8 | Btrfs is a new filesystem with extents, writable snapshotting, | 10 | Btrfs is a new filesystem with extents, writable snapshotting, |
9 | support for multiple devices and many more features. | 11 | support for multiple devices and many more features. |
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a35eb36b32fd..31610ea73aec 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
6 | transaction.o inode.o file.o tree-defrag.o \ | 6 | transaction.o inode.o file.o tree-defrag.o \ |
7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
9 | export.o tree-log.o acl.o free-space-cache.o zlib.o \ | 9 | export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ |
10 | compression.o delayed-ref.o relocation.o | 10 | compression.o delayed-ref.o relocation.o |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6ae2c8cac9d5..15b5ca2a2606 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -60,8 +60,10 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
60 | size = __btrfs_getxattr(inode, name, value, size); | 60 | size = __btrfs_getxattr(inode, name, value, size); |
61 | if (size > 0) { | 61 | if (size > 0) { |
62 | acl = posix_acl_from_xattr(value, size); | 62 | acl = posix_acl_from_xattr(value, size); |
63 | if (IS_ERR(acl)) | 63 | if (IS_ERR(acl)) { |
64 | kfree(value); | ||
64 | return acl; | 65 | return acl; |
66 | } | ||
65 | set_cached_acl(inode, type, acl); | 67 | set_cached_acl(inode, type, acl); |
66 | } | 68 | } |
67 | kfree(value); | 69 | kfree(value); |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6ad63f17eca0..ccc991c542df 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -157,7 +157,7 @@ struct btrfs_inode { | |||
157 | /* | 157 | /* |
158 | * always compress this one file | 158 | * always compress this one file |
159 | */ | 159 | */ |
160 | unsigned force_compress:1; | 160 | unsigned force_compress:4; |
161 | 161 | ||
162 | struct inode vfs_inode; | 162 | struct inode vfs_inode; |
163 | }; | 163 | }; |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b50bc4bd5c56..f745287fbf2e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -62,6 +62,9 @@ struct compressed_bio { | |||
62 | /* number of bytes on disk */ | 62 | /* number of bytes on disk */ |
63 | unsigned long compressed_len; | 63 | unsigned long compressed_len; |
64 | 64 | ||
65 | /* the compression algorithm for this bio */ | ||
66 | int compress_type; | ||
67 | |||
65 | /* number of compressed pages in the array */ | 68 | /* number of compressed pages in the array */ |
66 | unsigned long nr_pages; | 69 | unsigned long nr_pages; |
67 | 70 | ||
@@ -173,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err) | |||
173 | /* ok, we're the last bio for this extent, lets start | 176 | /* ok, we're the last bio for this extent, lets start |
174 | * the decompression. | 177 | * the decompression. |
175 | */ | 178 | */ |
176 | ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, | 179 | ret = btrfs_decompress_biovec(cb->compress_type, |
177 | cb->start, | 180 | cb->compressed_pages, |
178 | cb->orig_bio->bi_io_vec, | 181 | cb->start, |
179 | cb->orig_bio->bi_vcnt, | 182 | cb->orig_bio->bi_io_vec, |
180 | cb->compressed_len); | 183 | cb->orig_bio->bi_vcnt, |
184 | cb->compressed_len); | ||
181 | csum_failed: | 185 | csum_failed: |
182 | if (ret) | 186 | if (ret) |
183 | cb->errors = 1; | 187 | cb->errors = 1; |
@@ -588,6 +592,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
588 | 592 | ||
589 | cb->len = uncompressed_len; | 593 | cb->len = uncompressed_len; |
590 | cb->compressed_len = compressed_len; | 594 | cb->compressed_len = compressed_len; |
595 | cb->compress_type = extent_compress_type(bio_flags); | ||
591 | cb->orig_bio = bio; | 596 | cb->orig_bio = bio; |
592 | 597 | ||
593 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / | 598 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / |
@@ -677,3 +682,317 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
677 | bio_put(comp_bio); | 682 | bio_put(comp_bio); |
678 | return 0; | 683 | return 0; |
679 | } | 684 | } |
685 | |||
686 | static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; | ||
687 | static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; | ||
688 | static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; | ||
689 | static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; | ||
690 | static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; | ||
691 | |||
692 | struct btrfs_compress_op *btrfs_compress_op[] = { | ||
693 | &btrfs_zlib_compress, | ||
694 | &btrfs_lzo_compress, | ||
695 | }; | ||
696 | |||
697 | int __init btrfs_init_compress(void) | ||
698 | { | ||
699 | int i; | ||
700 | |||
701 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
702 | INIT_LIST_HEAD(&comp_idle_workspace[i]); | ||
703 | spin_lock_init(&comp_workspace_lock[i]); | ||
704 | atomic_set(&comp_alloc_workspace[i], 0); | ||
705 | init_waitqueue_head(&comp_workspace_wait[i]); | ||
706 | } | ||
707 | return 0; | ||
708 | } | ||
709 | |||
710 | /* | ||
711 | * this finds an available workspace or allocates a new one | ||
712 | * ERR_PTR is returned if things go bad. | ||
713 | */ | ||
714 | static struct list_head *find_workspace(int type) | ||
715 | { | ||
716 | struct list_head *workspace; | ||
717 | int cpus = num_online_cpus(); | ||
718 | int idx = type - 1; | ||
719 | |||
720 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
721 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
722 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
723 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
724 | int *num_workspace = &comp_num_workspace[idx]; | ||
725 | again: | ||
726 | spin_lock(workspace_lock); | ||
727 | if (!list_empty(idle_workspace)) { | ||
728 | workspace = idle_workspace->next; | ||
729 | list_del(workspace); | ||
730 | (*num_workspace)--; | ||
731 | spin_unlock(workspace_lock); | ||
732 | return workspace; | ||
733 | |||
734 | } | ||
735 | if (atomic_read(alloc_workspace) > cpus) { | ||
736 | DEFINE_WAIT(wait); | ||
737 | |||
738 | spin_unlock(workspace_lock); | ||
739 | prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
740 | if (atomic_read(alloc_workspace) > cpus && !*num_workspace) | ||
741 | schedule(); | ||
742 | finish_wait(workspace_wait, &wait); | ||
743 | goto again; | ||
744 | } | ||
745 | atomic_inc(alloc_workspace); | ||
746 | spin_unlock(workspace_lock); | ||
747 | |||
748 | workspace = btrfs_compress_op[idx]->alloc_workspace(); | ||
749 | if (IS_ERR(workspace)) { | ||
750 | atomic_dec(alloc_workspace); | ||
751 | wake_up(workspace_wait); | ||
752 | } | ||
753 | return workspace; | ||
754 | } | ||
755 | |||
756 | /* | ||
757 | * put a workspace struct back on the list or free it if we have enough | ||
758 | * idle ones sitting around | ||
759 | */ | ||
760 | static void free_workspace(int type, struct list_head *workspace) | ||
761 | { | ||
762 | int idx = type - 1; | ||
763 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
764 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
765 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
766 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
767 | int *num_workspace = &comp_num_workspace[idx]; | ||
768 | |||
769 | spin_lock(workspace_lock); | ||
770 | if (*num_workspace < num_online_cpus()) { | ||
771 | list_add_tail(workspace, idle_workspace); | ||
772 | (*num_workspace)++; | ||
773 | spin_unlock(workspace_lock); | ||
774 | goto wake; | ||
775 | } | ||
776 | spin_unlock(workspace_lock); | ||
777 | |||
778 | btrfs_compress_op[idx]->free_workspace(workspace); | ||
779 | atomic_dec(alloc_workspace); | ||
780 | wake: | ||
781 | if (waitqueue_active(workspace_wait)) | ||
782 | wake_up(workspace_wait); | ||
783 | } | ||
784 | |||
785 | /* | ||
786 | * cleanup function for module exit | ||
787 | */ | ||
788 | static void free_workspaces(void) | ||
789 | { | ||
790 | struct list_head *workspace; | ||
791 | int i; | ||
792 | |||
793 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
794 | while (!list_empty(&comp_idle_workspace[i])) { | ||
795 | workspace = comp_idle_workspace[i].next; | ||
796 | list_del(workspace); | ||
797 | btrfs_compress_op[i]->free_workspace(workspace); | ||
798 | atomic_dec(&comp_alloc_workspace[i]); | ||
799 | } | ||
800 | } | ||
801 | } | ||
802 | |||
803 | /* | ||
804 | * given an address space and start/len, compress the bytes. | ||
805 | * | ||
806 | * pages are allocated to hold the compressed result and stored | ||
807 | * in 'pages' | ||
808 | * | ||
809 | * out_pages is used to return the number of pages allocated. There | ||
810 | * may be pages allocated even if we return an error | ||
811 | * | ||
812 | * total_in is used to return the number of bytes actually read. It | ||
813 | * may be smaller then len if we had to exit early because we | ||
814 | * ran out of room in the pages array or because we cross the | ||
815 | * max_out threshold. | ||
816 | * | ||
817 | * total_out is used to return the total number of compressed bytes | ||
818 | * | ||
819 | * max_out tells us the max number of bytes that we're allowed to | ||
820 | * stuff into pages | ||
821 | */ | ||
822 | int btrfs_compress_pages(int type, struct address_space *mapping, | ||
823 | u64 start, unsigned long len, | ||
824 | struct page **pages, | ||
825 | unsigned long nr_dest_pages, | ||
826 | unsigned long *out_pages, | ||
827 | unsigned long *total_in, | ||
828 | unsigned long *total_out, | ||
829 | unsigned long max_out) | ||
830 | { | ||
831 | struct list_head *workspace; | ||
832 | int ret; | ||
833 | |||
834 | workspace = find_workspace(type); | ||
835 | if (IS_ERR(workspace)) | ||
836 | return -1; | ||
837 | |||
838 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, | ||
839 | start, len, pages, | ||
840 | nr_dest_pages, out_pages, | ||
841 | total_in, total_out, | ||
842 | max_out); | ||
843 | free_workspace(type, workspace); | ||
844 | return ret; | ||
845 | } | ||
846 | |||
847 | /* | ||
848 | * pages_in is an array of pages with compressed data. | ||
849 | * | ||
850 | * disk_start is the starting logical offset of this array in the file | ||
851 | * | ||
852 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
853 | * | ||
854 | * vcnt is the count of pages in the biovec | ||
855 | * | ||
856 | * srclen is the number of bytes in pages_in | ||
857 | * | ||
858 | * The basic idea is that we have a bio that was created by readpages. | ||
859 | * The pages in the bio are for the uncompressed data, and they may not | ||
860 | * be contiguous. They all correspond to the range of bytes covered by | ||
861 | * the compressed extent. | ||
862 | */ | ||
863 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, | ||
864 | struct bio_vec *bvec, int vcnt, size_t srclen) | ||
865 | { | ||
866 | struct list_head *workspace; | ||
867 | int ret; | ||
868 | |||
869 | workspace = find_workspace(type); | ||
870 | if (IS_ERR(workspace)) | ||
871 | return -ENOMEM; | ||
872 | |||
873 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, | ||
874 | disk_start, | ||
875 | bvec, vcnt, srclen); | ||
876 | free_workspace(type, workspace); | ||
877 | return ret; | ||
878 | } | ||
879 | |||
880 | /* | ||
881 | * a less complex decompression routine. Our compressed data fits in a | ||
882 | * single page, and we want to read a single page out of it. | ||
883 | * start_byte tells us the offset into the compressed data we're interested in | ||
884 | */ | ||
885 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | ||
886 | unsigned long start_byte, size_t srclen, size_t destlen) | ||
887 | { | ||
888 | struct list_head *workspace; | ||
889 | int ret; | ||
890 | |||
891 | workspace = find_workspace(type); | ||
892 | if (IS_ERR(workspace)) | ||
893 | return -ENOMEM; | ||
894 | |||
895 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, | ||
896 | dest_page, start_byte, | ||
897 | srclen, destlen); | ||
898 | |||
899 | free_workspace(type, workspace); | ||
900 | return ret; | ||
901 | } | ||
902 | |||
903 | void __exit btrfs_exit_compress(void) | ||
904 | { | ||
905 | free_workspaces(); | ||
906 | } | ||
907 | |||
908 | /* | ||
909 | * Copy uncompressed data from working buffer to pages. | ||
910 | * | ||
911 | * buf_start is the byte offset we're of the start of our workspace buffer. | ||
912 | * | ||
913 | * total_out is the last byte of the buffer | ||
914 | */ | ||
915 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, | ||
916 | unsigned long total_out, u64 disk_start, | ||
917 | struct bio_vec *bvec, int vcnt, | ||
918 | unsigned long *page_index, | ||
919 | unsigned long *pg_offset) | ||
920 | { | ||
921 | unsigned long buf_offset; | ||
922 | unsigned long current_buf_start; | ||
923 | unsigned long start_byte; | ||
924 | unsigned long working_bytes = total_out - buf_start; | ||
925 | unsigned long bytes; | ||
926 | char *kaddr; | ||
927 | struct page *page_out = bvec[*page_index].bv_page; | ||
928 | |||
929 | /* | ||
930 | * start byte is the first byte of the page we're currently | ||
931 | * copying into relative to the start of the compressed data. | ||
932 | */ | ||
933 | start_byte = page_offset(page_out) - disk_start; | ||
934 | |||
935 | /* we haven't yet hit data corresponding to this page */ | ||
936 | if (total_out <= start_byte) | ||
937 | return 1; | ||
938 | |||
939 | /* | ||
940 | * the start of the data we care about is offset into | ||
941 | * the middle of our working buffer | ||
942 | */ | ||
943 | if (total_out > start_byte && buf_start < start_byte) { | ||
944 | buf_offset = start_byte - buf_start; | ||
945 | working_bytes -= buf_offset; | ||
946 | } else { | ||
947 | buf_offset = 0; | ||
948 | } | ||
949 | current_buf_start = buf_start; | ||
950 | |||
951 | /* copy bytes from the working buffer into the pages */ | ||
952 | while (working_bytes > 0) { | ||
953 | bytes = min(PAGE_CACHE_SIZE - *pg_offset, | ||
954 | PAGE_CACHE_SIZE - buf_offset); | ||
955 | bytes = min(bytes, working_bytes); | ||
956 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
957 | memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); | ||
958 | kunmap_atomic(kaddr, KM_USER0); | ||
959 | flush_dcache_page(page_out); | ||
960 | |||
961 | *pg_offset += bytes; | ||
962 | buf_offset += bytes; | ||
963 | working_bytes -= bytes; | ||
964 | current_buf_start += bytes; | ||
965 | |||
966 | /* check if we need to pick another page */ | ||
967 | if (*pg_offset == PAGE_CACHE_SIZE) { | ||
968 | (*page_index)++; | ||
969 | if (*page_index >= vcnt) | ||
970 | return 0; | ||
971 | |||
972 | page_out = bvec[*page_index].bv_page; | ||
973 | *pg_offset = 0; | ||
974 | start_byte = page_offset(page_out) - disk_start; | ||
975 | |||
976 | /* | ||
977 | * make sure our new page is covered by this | ||
978 | * working buffer | ||
979 | */ | ||
980 | if (total_out <= start_byte) | ||
981 | return 1; | ||
982 | |||
983 | /* | ||
984 | * the next page in the biovec might not be adjacent | ||
985 | * to the last page, but it might still be found | ||
986 | * inside this working buffer. bump our offset pointer | ||
987 | */ | ||
988 | if (total_out > start_byte && | ||
989 | current_buf_start < start_byte) { | ||
990 | buf_offset = start_byte - buf_start; | ||
991 | working_bytes = total_out - start_byte; | ||
992 | current_buf_start = buf_start + buf_offset; | ||
993 | } | ||
994 | } | ||
995 | } | ||
996 | |||
997 | return 1; | ||
998 | } | ||
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 421f5b4aa715..51000174b9d7 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h | |||
@@ -19,24 +19,27 @@ | |||
19 | #ifndef __BTRFS_COMPRESSION_ | 19 | #ifndef __BTRFS_COMPRESSION_ |
20 | #define __BTRFS_COMPRESSION_ | 20 | #define __BTRFS_COMPRESSION_ |
21 | 21 | ||
22 | int btrfs_zlib_decompress(unsigned char *data_in, | 22 | int btrfs_init_compress(void); |
23 | struct page *dest_page, | 23 | void btrfs_exit_compress(void); |
24 | unsigned long start_byte, | 24 | |
25 | size_t srclen, size_t destlen); | 25 | int btrfs_compress_pages(int type, struct address_space *mapping, |
26 | int btrfs_zlib_compress_pages(struct address_space *mapping, | 26 | u64 start, unsigned long len, |
27 | u64 start, unsigned long len, | 27 | struct page **pages, |
28 | struct page **pages, | 28 | unsigned long nr_dest_pages, |
29 | unsigned long nr_dest_pages, | 29 | unsigned long *out_pages, |
30 | unsigned long *out_pages, | 30 | unsigned long *total_in, |
31 | unsigned long *total_in, | 31 | unsigned long *total_out, |
32 | unsigned long *total_out, | 32 | unsigned long max_out); |
33 | unsigned long max_out); | 33 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, |
34 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | 34 | struct bio_vec *bvec, int vcnt, size_t srclen); |
35 | u64 disk_start, | 35 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, |
36 | struct bio_vec *bvec, | 36 | unsigned long start_byte, size_t srclen, size_t destlen); |
37 | int vcnt, | 37 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, |
38 | size_t srclen); | 38 | unsigned long total_out, u64 disk_start, |
39 | void btrfs_zlib_exit(void); | 39 | struct bio_vec *bvec, int vcnt, |
40 | unsigned long *page_index, | ||
41 | unsigned long *pg_offset); | ||
42 | |||
40 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, | 43 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, |
41 | unsigned long len, u64 disk_start, | 44 | unsigned long len, u64 disk_start, |
42 | unsigned long compressed_len, | 45 | unsigned long compressed_len, |
@@ -44,4 +47,37 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
44 | unsigned long nr_pages); | 47 | unsigned long nr_pages); |
45 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | 48 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, |
46 | int mirror_num, unsigned long bio_flags); | 49 | int mirror_num, unsigned long bio_flags); |
50 | |||
51 | struct btrfs_compress_op { | ||
52 | struct list_head *(*alloc_workspace)(void); | ||
53 | |||
54 | void (*free_workspace)(struct list_head *workspace); | ||
55 | |||
56 | int (*compress_pages)(struct list_head *workspace, | ||
57 | struct address_space *mapping, | ||
58 | u64 start, unsigned long len, | ||
59 | struct page **pages, | ||
60 | unsigned long nr_dest_pages, | ||
61 | unsigned long *out_pages, | ||
62 | unsigned long *total_in, | ||
63 | unsigned long *total_out, | ||
64 | unsigned long max_out); | ||
65 | |||
66 | int (*decompress_biovec)(struct list_head *workspace, | ||
67 | struct page **pages_in, | ||
68 | u64 disk_start, | ||
69 | struct bio_vec *bvec, | ||
70 | int vcnt, | ||
71 | size_t srclen); | ||
72 | |||
73 | int (*decompress)(struct list_head *workspace, | ||
74 | unsigned char *data_in, | ||
75 | struct page *dest_page, | ||
76 | unsigned long start_byte, | ||
77 | size_t srclen, size_t destlen); | ||
78 | }; | ||
79 | |||
80 | extern struct btrfs_compress_op btrfs_zlib_compress; | ||
81 | extern struct btrfs_compress_op btrfs_lzo_compress; | ||
82 | |||
47 | #endif | 83 | #endif |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9ac171599258..b5baff0dccfe 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -105,6 +105,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
105 | /* this also releases the path */ | 105 | /* this also releases the path */ |
106 | void btrfs_free_path(struct btrfs_path *p) | 106 | void btrfs_free_path(struct btrfs_path *p) |
107 | { | 107 | { |
108 | if (!p) | ||
109 | return; | ||
108 | btrfs_release_path(NULL, p); | 110 | btrfs_release_path(NULL, p); |
109 | kmem_cache_free(btrfs_path_cachep, p); | 111 | kmem_cache_free(btrfs_path_cachep, p); |
110 | } | 112 | } |
@@ -2514,6 +2516,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2514 | btrfs_assert_tree_locked(path->nodes[1]); | 2516 | btrfs_assert_tree_locked(path->nodes[1]); |
2515 | 2517 | ||
2516 | right = read_node_slot(root, upper, slot + 1); | 2518 | right = read_node_slot(root, upper, slot + 1); |
2519 | if (right == NULL) | ||
2520 | return 1; | ||
2521 | |||
2517 | btrfs_tree_lock(right); | 2522 | btrfs_tree_lock(right); |
2518 | btrfs_set_lock_blocking(right); | 2523 | btrfs_set_lock_blocking(right); |
2519 | 2524 | ||
@@ -2764,6 +2769,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2764 | btrfs_assert_tree_locked(path->nodes[1]); | 2769 | btrfs_assert_tree_locked(path->nodes[1]); |
2765 | 2770 | ||
2766 | left = read_node_slot(root, path->nodes[1], slot - 1); | 2771 | left = read_node_slot(root, path->nodes[1], slot - 1); |
2772 | if (left == NULL) | ||
2773 | return 1; | ||
2774 | |||
2767 | btrfs_tree_lock(left); | 2775 | btrfs_tree_lock(left); |
2768 | btrfs_set_lock_blocking(left); | 2776 | btrfs_set_lock_blocking(left); |
2769 | 2777 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b875d445ea81..2c98b3af6052 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -295,6 +295,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
295 | #define BTRFS_FSID_SIZE 16 | 295 | #define BTRFS_FSID_SIZE 16 |
296 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) | 296 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) |
297 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) | 297 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) |
298 | |||
299 | /* | ||
300 | * File system states | ||
301 | */ | ||
302 | |||
303 | /* Errors detected */ | ||
304 | #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) | ||
305 | |||
298 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) | 306 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) |
299 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) | 307 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) |
300 | 308 | ||
@@ -399,13 +407,15 @@ struct btrfs_super_block { | |||
399 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) | 407 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) |
400 | #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) | 408 | #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) |
401 | #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) | 409 | #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) |
410 | #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) | ||
402 | 411 | ||
403 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL | 412 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL |
404 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL | 413 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL |
405 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ | 414 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ |
406 | (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ | 415 | (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ |
407 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ | 416 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ |
408 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) | 417 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ |
418 | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) | ||
409 | 419 | ||
410 | /* | 420 | /* |
411 | * A leaf is full of items. offset and size tell us where to find | 421 | * A leaf is full of items. offset and size tell us where to find |
@@ -552,9 +562,11 @@ struct btrfs_timespec { | |||
552 | } __attribute__ ((__packed__)); | 562 | } __attribute__ ((__packed__)); |
553 | 563 | ||
554 | enum btrfs_compression_type { | 564 | enum btrfs_compression_type { |
555 | BTRFS_COMPRESS_NONE = 0, | 565 | BTRFS_COMPRESS_NONE = 0, |
556 | BTRFS_COMPRESS_ZLIB = 1, | 566 | BTRFS_COMPRESS_ZLIB = 1, |
557 | BTRFS_COMPRESS_LAST = 2, | 567 | BTRFS_COMPRESS_LZO = 2, |
568 | BTRFS_COMPRESS_TYPES = 2, | ||
569 | BTRFS_COMPRESS_LAST = 3, | ||
558 | }; | 570 | }; |
559 | 571 | ||
560 | struct btrfs_inode_item { | 572 | struct btrfs_inode_item { |
@@ -598,6 +610,8 @@ struct btrfs_dir_item { | |||
598 | u8 type; | 610 | u8 type; |
599 | } __attribute__ ((__packed__)); | 611 | } __attribute__ ((__packed__)); |
600 | 612 | ||
613 | #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) | ||
614 | |||
601 | struct btrfs_root_item { | 615 | struct btrfs_root_item { |
602 | struct btrfs_inode_item inode; | 616 | struct btrfs_inode_item inode; |
603 | __le64 generation; | 617 | __le64 generation; |
@@ -896,7 +910,8 @@ struct btrfs_fs_info { | |||
896 | */ | 910 | */ |
897 | u64 last_trans_log_full_commit; | 911 | u64 last_trans_log_full_commit; |
898 | u64 open_ioctl_trans; | 912 | u64 open_ioctl_trans; |
899 | unsigned long mount_opt; | 913 | unsigned long mount_opt:20; |
914 | unsigned long compress_type:4; | ||
900 | u64 max_inline; | 915 | u64 max_inline; |
901 | u64 alloc_start; | 916 | u64 alloc_start; |
902 | struct btrfs_transaction *running_transaction; | 917 | struct btrfs_transaction *running_transaction; |
@@ -1051,6 +1066,9 @@ struct btrfs_fs_info { | |||
1051 | unsigned metadata_ratio; | 1066 | unsigned metadata_ratio; |
1052 | 1067 | ||
1053 | void *bdev_holder; | 1068 | void *bdev_holder; |
1069 | |||
1070 | /* filesystem state */ | ||
1071 | u64 fs_state; | ||
1054 | }; | 1072 | }; |
1055 | 1073 | ||
1056 | /* | 1074 | /* |
@@ -1894,6 +1912,11 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); | |||
1894 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, | 1912 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, |
1895 | last_snapshot, 64); | 1913 | last_snapshot, 64); |
1896 | 1914 | ||
1915 | static inline bool btrfs_root_readonly(struct btrfs_root *root) | ||
1916 | { | ||
1917 | return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; | ||
1918 | } | ||
1919 | |||
1897 | /* struct btrfs_super_block */ | 1920 | /* struct btrfs_super_block */ |
1898 | 1921 | ||
1899 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); | 1922 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); |
@@ -2146,6 +2169,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
2146 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 2169 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
2147 | struct btrfs_root *root, u64 group_start); | 2170 | struct btrfs_root *root, u64 group_start); |
2148 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2171 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
2172 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | ||
2149 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2173 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
2150 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2174 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2151 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | 2175 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); |
@@ -2189,6 +2213,12 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
2189 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 2213 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
2190 | struct btrfs_block_group_cache *cache); | 2214 | struct btrfs_block_group_cache *cache); |
2191 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info); | 2215 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info); |
2216 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); | ||
2217 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, | ||
2218 | u64 start, u64 end); | ||
2219 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
2220 | u64 num_bytes); | ||
2221 | |||
2192 | /* ctree.c */ | 2222 | /* ctree.c */ |
2193 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2223 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2194 | int level, int *slot); | 2224 | int level, int *slot); |
@@ -2542,6 +2572,14 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); | |||
2542 | /* super.c */ | 2572 | /* super.c */ |
2543 | int btrfs_parse_options(struct btrfs_root *root, char *options); | 2573 | int btrfs_parse_options(struct btrfs_root *root, char *options); |
2544 | int btrfs_sync_fs(struct super_block *sb, int wait); | 2574 | int btrfs_sync_fs(struct super_block *sb, int wait); |
2575 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
2576 | unsigned int line, int errno); | ||
2577 | |||
2578 | #define btrfs_std_error(fs_info, errno) \ | ||
2579 | do { \ | ||
2580 | if ((errno)) \ | ||
2581 | __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\ | ||
2582 | } while (0) | ||
2545 | 2583 | ||
2546 | /* acl.c */ | 2584 | /* acl.c */ |
2547 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 2585 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 51d2e4de34eb..b531c36455d8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -44,6 +44,20 @@ | |||
44 | static struct extent_io_ops btree_extent_io_ops; | 44 | static struct extent_io_ops btree_extent_io_ops; |
45 | static void end_workqueue_fn(struct btrfs_work *work); | 45 | static void end_workqueue_fn(struct btrfs_work *work); |
46 | static void free_fs_root(struct btrfs_root *root); | 46 | static void free_fs_root(struct btrfs_root *root); |
47 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
48 | int read_only); | ||
49 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root); | ||
50 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root); | ||
51 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
52 | struct btrfs_root *root); | ||
53 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); | ||
54 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); | ||
55 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
56 | struct extent_io_tree *dirty_pages, | ||
57 | int mark); | ||
58 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
59 | struct extent_io_tree *pinned_extents); | ||
60 | static int btrfs_cleanup_transaction(struct btrfs_root *root); | ||
47 | 61 | ||
48 | /* | 62 | /* |
49 | * end_io_wq structs are used to do processing in task context when an IO is | 63 | * end_io_wq structs are used to do processing in task context when an IO is |
@@ -353,6 +367,10 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
353 | WARN_ON(len == 0); | 367 | WARN_ON(len == 0); |
354 | 368 | ||
355 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 369 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); |
370 | if (eb == NULL) { | ||
371 | WARN_ON(1); | ||
372 | goto out; | ||
373 | } | ||
356 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, | 374 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, |
357 | btrfs_header_generation(eb)); | 375 | btrfs_header_generation(eb)); |
358 | BUG_ON(ret); | 376 | BUG_ON(ret); |
@@ -427,6 +445,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
427 | WARN_ON(len == 0); | 445 | WARN_ON(len == 0); |
428 | 446 | ||
429 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 447 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); |
448 | if (eb == NULL) { | ||
449 | ret = -EIO; | ||
450 | goto out; | ||
451 | } | ||
430 | 452 | ||
431 | found_start = btrfs_header_bytenr(eb); | 453 | found_start = btrfs_header_bytenr(eb); |
432 | if (found_start != start) { | 454 | if (found_start != start) { |
@@ -1145,6 +1167,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1145 | } | 1167 | } |
1146 | btrfs_free_path(path); | 1168 | btrfs_free_path(path); |
1147 | if (ret) { | 1169 | if (ret) { |
1170 | kfree(root); | ||
1148 | if (ret > 0) | 1171 | if (ret > 0) |
1149 | ret = -ENOENT; | 1172 | ret = -ENOENT; |
1150 | return ERR_PTR(ret); | 1173 | return ERR_PTR(ret); |
@@ -1713,8 +1736,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1713 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | 1736 | fs_info, BTRFS_ROOT_TREE_OBJECTID); |
1714 | 1737 | ||
1715 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); | 1738 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); |
1716 | if (!bh) | 1739 | if (!bh) { |
1740 | err = -EINVAL; | ||
1717 | goto fail_iput; | 1741 | goto fail_iput; |
1742 | } | ||
1718 | 1743 | ||
1719 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); | 1744 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); |
1720 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, | 1745 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, |
@@ -1727,6 +1752,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1727 | if (!btrfs_super_root(disk_super)) | 1752 | if (!btrfs_super_root(disk_super)) |
1728 | goto fail_iput; | 1753 | goto fail_iput; |
1729 | 1754 | ||
1755 | /* check FS state, whether FS is broken. */ | ||
1756 | fs_info->fs_state |= btrfs_super_flags(disk_super); | ||
1757 | |||
1758 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | ||
1759 | |||
1730 | ret = btrfs_parse_options(tree_root, options); | 1760 | ret = btrfs_parse_options(tree_root, options); |
1731 | if (ret) { | 1761 | if (ret) { |
1732 | err = ret; | 1762 | err = ret; |
@@ -1744,10 +1774,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1744 | } | 1774 | } |
1745 | 1775 | ||
1746 | features = btrfs_super_incompat_flags(disk_super); | 1776 | features = btrfs_super_incompat_flags(disk_super); |
1747 | if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { | 1777 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; |
1748 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; | 1778 | if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) |
1749 | btrfs_set_super_incompat_flags(disk_super, features); | 1779 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; |
1750 | } | 1780 | btrfs_set_super_incompat_flags(disk_super, features); |
1751 | 1781 | ||
1752 | features = btrfs_super_compat_ro_flags(disk_super) & | 1782 | features = btrfs_super_compat_ro_flags(disk_super) & |
1753 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; | 1783 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; |
@@ -1957,7 +1987,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1957 | btrfs_set_opt(fs_info->mount_opt, SSD); | 1987 | btrfs_set_opt(fs_info->mount_opt, SSD); |
1958 | } | 1988 | } |
1959 | 1989 | ||
1960 | if (btrfs_super_log_root(disk_super) != 0) { | 1990 | /* do not make disk changes in broken FS */ |
1991 | if (btrfs_super_log_root(disk_super) != 0 && | ||
1992 | !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { | ||
1961 | u64 bytenr = btrfs_super_log_root(disk_super); | 1993 | u64 bytenr = btrfs_super_log_root(disk_super); |
1962 | 1994 | ||
1963 | if (fs_devices->rw_devices == 0) { | 1995 | if (fs_devices->rw_devices == 0) { |
@@ -2442,8 +2474,28 @@ int close_ctree(struct btrfs_root *root) | |||
2442 | smp_mb(); | 2474 | smp_mb(); |
2443 | 2475 | ||
2444 | btrfs_put_block_group_cache(fs_info); | 2476 | btrfs_put_block_group_cache(fs_info); |
2477 | |||
2478 | /* | ||
2479 | * Here come 2 situations when btrfs is broken to flip readonly: | ||
2480 | * | ||
2481 | * 1. when btrfs flips readonly somewhere else before | ||
2482 | * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, | ||
2483 | * and btrfs will skip to write sb directly to keep | ||
2484 | * ERROR state on disk. | ||
2485 | * | ||
2486 | * 2. when btrfs flips readonly just in btrfs_commit_super, | ||
2487 | * and in such case, btrfs cannnot write sb via btrfs_commit_super, | ||
2488 | * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, | ||
2489 | * btrfs will cleanup all FS resources first and write sb then. | ||
2490 | */ | ||
2445 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 2491 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
2446 | ret = btrfs_commit_super(root); | 2492 | ret = btrfs_commit_super(root); |
2493 | if (ret) | ||
2494 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | ||
2495 | } | ||
2496 | |||
2497 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
2498 | ret = btrfs_error_commit_super(root); | ||
2447 | if (ret) | 2499 | if (ret) |
2448 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2500 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
2449 | } | 2501 | } |
@@ -2619,6 +2671,352 @@ out: | |||
2619 | return 0; | 2671 | return 0; |
2620 | } | 2672 | } |
2621 | 2673 | ||
2674 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
2675 | int read_only) | ||
2676 | { | ||
2677 | if (read_only) | ||
2678 | return; | ||
2679 | |||
2680 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
2681 | printk(KERN_WARNING "warning: mount fs with errors, " | ||
2682 | "running btrfsck is recommended\n"); | ||
2683 | } | ||
2684 | |||
2685 | int btrfs_error_commit_super(struct btrfs_root *root) | ||
2686 | { | ||
2687 | int ret; | ||
2688 | |||
2689 | mutex_lock(&root->fs_info->cleaner_mutex); | ||
2690 | btrfs_run_delayed_iputs(root); | ||
2691 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
2692 | |||
2693 | down_write(&root->fs_info->cleanup_work_sem); | ||
2694 | up_write(&root->fs_info->cleanup_work_sem); | ||
2695 | |||
2696 | /* cleanup FS via transaction */ | ||
2697 | btrfs_cleanup_transaction(root); | ||
2698 | |||
2699 | ret = write_ctree_super(NULL, root, 0); | ||
2700 | |||
2701 | return ret; | ||
2702 | } | ||
2703 | |||
2704 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root) | ||
2705 | { | ||
2706 | struct btrfs_inode *btrfs_inode; | ||
2707 | struct list_head splice; | ||
2708 | |||
2709 | INIT_LIST_HEAD(&splice); | ||
2710 | |||
2711 | mutex_lock(&root->fs_info->ordered_operations_mutex); | ||
2712 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2713 | |||
2714 | list_splice_init(&root->fs_info->ordered_operations, &splice); | ||
2715 | while (!list_empty(&splice)) { | ||
2716 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
2717 | ordered_operations); | ||
2718 | |||
2719 | list_del_init(&btrfs_inode->ordered_operations); | ||
2720 | |||
2721 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
2722 | } | ||
2723 | |||
2724 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2725 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | ||
2726 | |||
2727 | return 0; | ||
2728 | } | ||
2729 | |||
2730 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root) | ||
2731 | { | ||
2732 | struct list_head splice; | ||
2733 | struct btrfs_ordered_extent *ordered; | ||
2734 | struct inode *inode; | ||
2735 | |||
2736 | INIT_LIST_HEAD(&splice); | ||
2737 | |||
2738 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2739 | |||
2740 | list_splice_init(&root->fs_info->ordered_extents, &splice); | ||
2741 | while (!list_empty(&splice)) { | ||
2742 | ordered = list_entry(splice.next, struct btrfs_ordered_extent, | ||
2743 | root_extent_list); | ||
2744 | |||
2745 | list_del_init(&ordered->root_extent_list); | ||
2746 | atomic_inc(&ordered->refs); | ||
2747 | |||
2748 | /* the inode may be getting freed (in sys_unlink path). */ | ||
2749 | inode = igrab(ordered->inode); | ||
2750 | |||
2751 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2752 | if (inode) | ||
2753 | iput(inode); | ||
2754 | |||
2755 | atomic_set(&ordered->refs, 1); | ||
2756 | btrfs_put_ordered_extent(ordered); | ||
2757 | |||
2758 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2759 | } | ||
2760 | |||
2761 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2762 | |||
2763 | return 0; | ||
2764 | } | ||
2765 | |||
2766 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
2767 | struct btrfs_root *root) | ||
2768 | { | ||
2769 | struct rb_node *node; | ||
2770 | struct btrfs_delayed_ref_root *delayed_refs; | ||
2771 | struct btrfs_delayed_ref_node *ref; | ||
2772 | int ret = 0; | ||
2773 | |||
2774 | delayed_refs = &trans->delayed_refs; | ||
2775 | |||
2776 | spin_lock(&delayed_refs->lock); | ||
2777 | if (delayed_refs->num_entries == 0) { | ||
2778 | printk(KERN_INFO "delayed_refs has NO entry\n"); | ||
2779 | return ret; | ||
2780 | } | ||
2781 | |||
2782 | node = rb_first(&delayed_refs->root); | ||
2783 | while (node) { | ||
2784 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
2785 | node = rb_next(node); | ||
2786 | |||
2787 | ref->in_tree = 0; | ||
2788 | rb_erase(&ref->rb_node, &delayed_refs->root); | ||
2789 | delayed_refs->num_entries--; | ||
2790 | |||
2791 | atomic_set(&ref->refs, 1); | ||
2792 | if (btrfs_delayed_ref_is_head(ref)) { | ||
2793 | struct btrfs_delayed_ref_head *head; | ||
2794 | |||
2795 | head = btrfs_delayed_node_to_head(ref); | ||
2796 | mutex_lock(&head->mutex); | ||
2797 | kfree(head->extent_op); | ||
2798 | delayed_refs->num_heads--; | ||
2799 | if (list_empty(&head->cluster)) | ||
2800 | delayed_refs->num_heads_ready--; | ||
2801 | list_del_init(&head->cluster); | ||
2802 | mutex_unlock(&head->mutex); | ||
2803 | } | ||
2804 | |||
2805 | spin_unlock(&delayed_refs->lock); | ||
2806 | btrfs_put_delayed_ref(ref); | ||
2807 | |||
2808 | cond_resched(); | ||
2809 | spin_lock(&delayed_refs->lock); | ||
2810 | } | ||
2811 | |||
2812 | spin_unlock(&delayed_refs->lock); | ||
2813 | |||
2814 | return ret; | ||
2815 | } | ||
2816 | |||
2817 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) | ||
2818 | { | ||
2819 | struct btrfs_pending_snapshot *snapshot; | ||
2820 | struct list_head splice; | ||
2821 | |||
2822 | INIT_LIST_HEAD(&splice); | ||
2823 | |||
2824 | list_splice_init(&t->pending_snapshots, &splice); | ||
2825 | |||
2826 | while (!list_empty(&splice)) { | ||
2827 | snapshot = list_entry(splice.next, | ||
2828 | struct btrfs_pending_snapshot, | ||
2829 | list); | ||
2830 | |||
2831 | list_del_init(&snapshot->list); | ||
2832 | |||
2833 | kfree(snapshot); | ||
2834 | } | ||
2835 | |||
2836 | return 0; | ||
2837 | } | ||
2838 | |||
2839 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | ||
2840 | { | ||
2841 | struct btrfs_inode *btrfs_inode; | ||
2842 | struct list_head splice; | ||
2843 | |||
2844 | INIT_LIST_HEAD(&splice); | ||
2845 | |||
2846 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | ||
2847 | |||
2848 | spin_lock(&root->fs_info->delalloc_lock); | ||
2849 | |||
2850 | while (!list_empty(&splice)) { | ||
2851 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
2852 | delalloc_inodes); | ||
2853 | |||
2854 | list_del_init(&btrfs_inode->delalloc_inodes); | ||
2855 | |||
2856 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
2857 | } | ||
2858 | |||
2859 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2860 | |||
2861 | return 0; | ||
2862 | } | ||
2863 | |||
2864 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
2865 | struct extent_io_tree *dirty_pages, | ||
2866 | int mark) | ||
2867 | { | ||
2868 | int ret; | ||
2869 | struct page *page; | ||
2870 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
2871 | struct extent_buffer *eb; | ||
2872 | u64 start = 0; | ||
2873 | u64 end; | ||
2874 | u64 offset; | ||
2875 | unsigned long index; | ||
2876 | |||
2877 | while (1) { | ||
2878 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | ||
2879 | mark); | ||
2880 | if (ret) | ||
2881 | break; | ||
2882 | |||
2883 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); | ||
2884 | while (start <= end) { | ||
2885 | index = start >> PAGE_CACHE_SHIFT; | ||
2886 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
2887 | page = find_get_page(btree_inode->i_mapping, index); | ||
2888 | if (!page) | ||
2889 | continue; | ||
2890 | offset = page_offset(page); | ||
2891 | |||
2892 | spin_lock(&dirty_pages->buffer_lock); | ||
2893 | eb = radix_tree_lookup( | ||
2894 | &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, | ||
2895 | offset >> PAGE_CACHE_SHIFT); | ||
2896 | spin_unlock(&dirty_pages->buffer_lock); | ||
2897 | if (eb) { | ||
2898 | ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, | ||
2899 | &eb->bflags); | ||
2900 | atomic_set(&eb->refs, 1); | ||
2901 | } | ||
2902 | if (PageWriteback(page)) | ||
2903 | end_page_writeback(page); | ||
2904 | |||
2905 | lock_page(page); | ||
2906 | if (PageDirty(page)) { | ||
2907 | clear_page_dirty_for_io(page); | ||
2908 | spin_lock_irq(&page->mapping->tree_lock); | ||
2909 | radix_tree_tag_clear(&page->mapping->page_tree, | ||
2910 | page_index(page), | ||
2911 | PAGECACHE_TAG_DIRTY); | ||
2912 | spin_unlock_irq(&page->mapping->tree_lock); | ||
2913 | } | ||
2914 | |||
2915 | page->mapping->a_ops->invalidatepage(page, 0); | ||
2916 | unlock_page(page); | ||
2917 | } | ||
2918 | } | ||
2919 | |||
2920 | return ret; | ||
2921 | } | ||
2922 | |||
2923 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
2924 | struct extent_io_tree *pinned_extents) | ||
2925 | { | ||
2926 | struct extent_io_tree *unpin; | ||
2927 | u64 start; | ||
2928 | u64 end; | ||
2929 | int ret; | ||
2930 | |||
2931 | unpin = pinned_extents; | ||
2932 | while (1) { | ||
2933 | ret = find_first_extent_bit(unpin, 0, &start, &end, | ||
2934 | EXTENT_DIRTY); | ||
2935 | if (ret) | ||
2936 | break; | ||
2937 | |||
2938 | /* opt_discard */ | ||
2939 | ret = btrfs_error_discard_extent(root, start, end + 1 - start); | ||
2940 | |||
2941 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | ||
2942 | btrfs_error_unpin_extent_range(root, start, end); | ||
2943 | cond_resched(); | ||
2944 | } | ||
2945 | |||
2946 | return 0; | ||
2947 | } | ||
2948 | |||
2949 | static int btrfs_cleanup_transaction(struct btrfs_root *root) | ||
2950 | { | ||
2951 | struct btrfs_transaction *t; | ||
2952 | LIST_HEAD(list); | ||
2953 | |||
2954 | WARN_ON(1); | ||
2955 | |||
2956 | mutex_lock(&root->fs_info->trans_mutex); | ||
2957 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | ||
2958 | |||
2959 | list_splice_init(&root->fs_info->trans_list, &list); | ||
2960 | while (!list_empty(&list)) { | ||
2961 | t = list_entry(list.next, struct btrfs_transaction, list); | ||
2962 | if (!t) | ||
2963 | break; | ||
2964 | |||
2965 | btrfs_destroy_ordered_operations(root); | ||
2966 | |||
2967 | btrfs_destroy_ordered_extents(root); | ||
2968 | |||
2969 | btrfs_destroy_delayed_refs(t, root); | ||
2970 | |||
2971 | btrfs_block_rsv_release(root, | ||
2972 | &root->fs_info->trans_block_rsv, | ||
2973 | t->dirty_pages.dirty_bytes); | ||
2974 | |||
2975 | /* FIXME: cleanup wait for commit */ | ||
2976 | t->in_commit = 1; | ||
2977 | t->blocked = 1; | ||
2978 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) | ||
2979 | wake_up(&root->fs_info->transaction_blocked_wait); | ||
2980 | |||
2981 | t->blocked = 0; | ||
2982 | if (waitqueue_active(&root->fs_info->transaction_wait)) | ||
2983 | wake_up(&root->fs_info->transaction_wait); | ||
2984 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2985 | |||
2986 | mutex_lock(&root->fs_info->trans_mutex); | ||
2987 | t->commit_done = 1; | ||
2988 | if (waitqueue_active(&t->commit_wait)) | ||
2989 | wake_up(&t->commit_wait); | ||
2990 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2991 | |||
2992 | mutex_lock(&root->fs_info->trans_mutex); | ||
2993 | |||
2994 | btrfs_destroy_pending_snapshots(t); | ||
2995 | |||
2996 | btrfs_destroy_delalloc_inodes(root); | ||
2997 | |||
2998 | spin_lock(&root->fs_info->new_trans_lock); | ||
2999 | root->fs_info->running_transaction = NULL; | ||
3000 | spin_unlock(&root->fs_info->new_trans_lock); | ||
3001 | |||
3002 | btrfs_destroy_marked_extents(root, &t->dirty_pages, | ||
3003 | EXTENT_DIRTY); | ||
3004 | |||
3005 | btrfs_destroy_pinned_extent(root, | ||
3006 | root->fs_info->pinned_extents); | ||
3007 | |||
3008 | t->use_count = 0; | ||
3009 | list_del_init(&t->list); | ||
3010 | memset(t, 0, sizeof(*t)); | ||
3011 | kmem_cache_free(btrfs_transaction_cachep, t); | ||
3012 | } | ||
3013 | |||
3014 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | ||
3015 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3016 | |||
3017 | return 0; | ||
3018 | } | ||
3019 | |||
2622 | static struct extent_io_ops btree_extent_io_ops = { | 3020 | static struct extent_io_ops btree_extent_io_ops = { |
2623 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3021 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
2624 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3022 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 88e825a0bf21..07b20dc2fd95 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -52,6 +52,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
52 | struct btrfs_root *root, int max_mirrors); | 52 | struct btrfs_root *root, int max_mirrors); |
53 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); | 53 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); |
54 | int btrfs_commit_super(struct btrfs_root *root); | 54 | int btrfs_commit_super(struct btrfs_root *root); |
55 | int btrfs_error_commit_super(struct btrfs_root *root); | ||
55 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 56 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
56 | u64 bytenr, u32 blocksize); | 57 | u64 bytenr, u32 blocksize); |
57 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | 58 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 227e5815d838..b55269340cec 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3089,7 +3089,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | |||
3089 | return btrfs_reduce_alloc_profile(root, flags); | 3089 | return btrfs_reduce_alloc_profile(root, flags); |
3090 | } | 3090 | } |
3091 | 3091 | ||
3092 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | 3092 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
3093 | { | 3093 | { |
3094 | u64 flags; | 3094 | u64 flags; |
3095 | 3095 | ||
@@ -3161,8 +3161,12 @@ alloc: | |||
3161 | bytes + 2 * 1024 * 1024, | 3161 | bytes + 2 * 1024 * 1024, |
3162 | alloc_target, 0); | 3162 | alloc_target, 0); |
3163 | btrfs_end_transaction(trans, root); | 3163 | btrfs_end_transaction(trans, root); |
3164 | if (ret < 0) | 3164 | if (ret < 0) { |
3165 | return ret; | 3165 | if (ret != -ENOSPC) |
3166 | return ret; | ||
3167 | else | ||
3168 | goto commit_trans; | ||
3169 | } | ||
3166 | 3170 | ||
3167 | if (!data_sinfo) { | 3171 | if (!data_sinfo) { |
3168 | btrfs_set_inode_space_info(root, inode); | 3172 | btrfs_set_inode_space_info(root, inode); |
@@ -3173,6 +3177,7 @@ alloc: | |||
3173 | spin_unlock(&data_sinfo->lock); | 3177 | spin_unlock(&data_sinfo->lock); |
3174 | 3178 | ||
3175 | /* commit the current transaction and try again */ | 3179 | /* commit the current transaction and try again */ |
3180 | commit_trans: | ||
3176 | if (!committed && !root->fs_info->open_ioctl_trans) { | 3181 | if (!committed && !root->fs_info->open_ioctl_trans) { |
3177 | committed = 1; | 3182 | committed = 1; |
3178 | trans = btrfs_join_transaction(root, 1); | 3183 | trans = btrfs_join_transaction(root, 1); |
@@ -3721,11 +3726,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3721 | return 0; | 3726 | return 0; |
3722 | } | 3727 | } |
3723 | 3728 | ||
3724 | WARN_ON(1); | ||
3725 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
3726 | block_rsv->size, block_rsv->reserved, | ||
3727 | block_rsv->freed[0], block_rsv->freed[1]); | ||
3728 | |||
3729 | return -ENOSPC; | 3729 | return -ENOSPC; |
3730 | } | 3730 | } |
3731 | 3731 | ||
@@ -7970,13 +7970,14 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) | |||
7970 | 7970 | ||
7971 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 7971 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
7972 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 7972 | sinfo->bytes_may_use + sinfo->bytes_readonly + |
7973 | cache->reserved_pinned + num_bytes < sinfo->total_bytes) { | 7973 | cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { |
7974 | sinfo->bytes_readonly += num_bytes; | 7974 | sinfo->bytes_readonly += num_bytes; |
7975 | sinfo->bytes_reserved += cache->reserved_pinned; | 7975 | sinfo->bytes_reserved += cache->reserved_pinned; |
7976 | cache->reserved_pinned = 0; | 7976 | cache->reserved_pinned = 0; |
7977 | cache->ro = 1; | 7977 | cache->ro = 1; |
7978 | ret = 0; | 7978 | ret = 0; |
7979 | } | 7979 | } |
7980 | |||
7980 | spin_unlock(&cache->lock); | 7981 | spin_unlock(&cache->lock); |
7981 | spin_unlock(&sinfo->lock); | 7982 | spin_unlock(&sinfo->lock); |
7982 | return ret; | 7983 | return ret; |
@@ -8012,6 +8013,62 @@ out: | |||
8012 | return ret; | 8013 | return ret; |
8013 | } | 8014 | } |
8014 | 8015 | ||
8016 | /* | ||
8017 | * helper to account the unused space of all the readonly block group in the | ||
8018 | * list. takes mirrors into account. | ||
8019 | */ | ||
8020 | static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) | ||
8021 | { | ||
8022 | struct btrfs_block_group_cache *block_group; | ||
8023 | u64 free_bytes = 0; | ||
8024 | int factor; | ||
8025 | |||
8026 | list_for_each_entry(block_group, groups_list, list) { | ||
8027 | spin_lock(&block_group->lock); | ||
8028 | |||
8029 | if (!block_group->ro) { | ||
8030 | spin_unlock(&block_group->lock); | ||
8031 | continue; | ||
8032 | } | ||
8033 | |||
8034 | if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
8035 | BTRFS_BLOCK_GROUP_RAID10 | | ||
8036 | BTRFS_BLOCK_GROUP_DUP)) | ||
8037 | factor = 2; | ||
8038 | else | ||
8039 | factor = 1; | ||
8040 | |||
8041 | free_bytes += (block_group->key.offset - | ||
8042 | btrfs_block_group_used(&block_group->item)) * | ||
8043 | factor; | ||
8044 | |||
8045 | spin_unlock(&block_group->lock); | ||
8046 | } | ||
8047 | |||
8048 | return free_bytes; | ||
8049 | } | ||
8050 | |||
8051 | /* | ||
8052 | * helper to account the unused space of all the readonly block group in the | ||
8053 | * space_info. takes mirrors into account. | ||
8054 | */ | ||
8055 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) | ||
8056 | { | ||
8057 | int i; | ||
8058 | u64 free_bytes = 0; | ||
8059 | |||
8060 | spin_lock(&sinfo->lock); | ||
8061 | |||
8062 | for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) | ||
8063 | if (!list_empty(&sinfo->block_groups[i])) | ||
8064 | free_bytes += __btrfs_get_ro_block_group_free_space( | ||
8065 | &sinfo->block_groups[i]); | ||
8066 | |||
8067 | spin_unlock(&sinfo->lock); | ||
8068 | |||
8069 | return free_bytes; | ||
8070 | } | ||
8071 | |||
8015 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 8072 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
8016 | struct btrfs_block_group_cache *cache) | 8073 | struct btrfs_block_group_cache *cache) |
8017 | { | 8074 | { |
@@ -8092,7 +8149,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
8092 | mutex_lock(&root->fs_info->chunk_mutex); | 8149 | mutex_lock(&root->fs_info->chunk_mutex); |
8093 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 8150 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
8094 | u64 min_free = btrfs_block_group_used(&block_group->item); | 8151 | u64 min_free = btrfs_block_group_used(&block_group->item); |
8095 | u64 dev_offset, max_avail; | 8152 | u64 dev_offset; |
8096 | 8153 | ||
8097 | /* | 8154 | /* |
8098 | * check to make sure we can actually find a chunk with enough | 8155 | * check to make sure we can actually find a chunk with enough |
@@ -8100,7 +8157,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
8100 | */ | 8157 | */ |
8101 | if (device->total_bytes > device->bytes_used + min_free) { | 8158 | if (device->total_bytes > device->bytes_used + min_free) { |
8102 | ret = find_free_dev_extent(NULL, device, min_free, | 8159 | ret = find_free_dev_extent(NULL, device, min_free, |
8103 | &dev_offset, &max_avail); | 8160 | &dev_offset, NULL); |
8104 | if (!ret) | 8161 | if (!ret) |
8105 | break; | 8162 | break; |
8106 | ret = -1; | 8163 | ret = -1; |
@@ -8584,3 +8641,14 @@ out: | |||
8584 | btrfs_free_path(path); | 8641 | btrfs_free_path(path); |
8585 | return ret; | 8642 | return ret; |
8586 | } | 8643 | } |
8644 | |||
8645 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | ||
8646 | { | ||
8647 | return unpin_extent_range(root, start, end); | ||
8648 | } | ||
8649 | |||
8650 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
8651 | u64 num_bytes) | ||
8652 | { | ||
8653 | return btrfs_discard_extent(root, bytenr, num_bytes); | ||
8654 | } | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3e86b9f36507..2e993cf1766e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2028,8 +2028,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2028 | BUG_ON(extent_map_end(em) <= cur); | 2028 | BUG_ON(extent_map_end(em) <= cur); |
2029 | BUG_ON(end < cur); | 2029 | BUG_ON(end < cur); |
2030 | 2030 | ||
2031 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2031 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
2032 | this_bio_flag = EXTENT_BIO_COMPRESSED; | 2032 | this_bio_flag = EXTENT_BIO_COMPRESSED; |
2033 | extent_set_compress_type(&this_bio_flag, | ||
2034 | em->compress_type); | ||
2035 | } | ||
2033 | 2036 | ||
2034 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 2037 | iosize = min(extent_map_end(em) - cur, end - cur + 1); |
2035 | cur_end = min(extent_map_end(em) - 1, end); | 2038 | cur_end = min(extent_map_end(em) - 1, end); |
@@ -3072,6 +3075,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3072 | #endif | 3075 | #endif |
3073 | 3076 | ||
3074 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); | 3077 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); |
3078 | if (eb == NULL) | ||
3079 | return NULL; | ||
3075 | eb->start = start; | 3080 | eb->start = start; |
3076 | eb->len = len; | 3081 | eb->len = len; |
3077 | spin_lock_init(&eb->lock); | 3082 | spin_lock_init(&eb->lock); |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 4183c8178f01..7083cfafd061 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -20,8 +20,12 @@ | |||
20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | 21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) |
22 | 22 | ||
23 | /* flags for bio submission */ | 23 | /* |
24 | * flags for bio submission. The high bits indicate the compression | ||
25 | * type for this bio | ||
26 | */ | ||
24 | #define EXTENT_BIO_COMPRESSED 1 | 27 | #define EXTENT_BIO_COMPRESSED 1 |
28 | #define EXTENT_BIO_FLAG_SHIFT 16 | ||
25 | 29 | ||
26 | /* these are bit numbers for test/set bit */ | 30 | /* these are bit numbers for test/set bit */ |
27 | #define EXTENT_BUFFER_UPTODATE 0 | 31 | #define EXTENT_BUFFER_UPTODATE 0 |
@@ -135,6 +139,17 @@ struct extent_buffer { | |||
135 | wait_queue_head_t lock_wq; | 139 | wait_queue_head_t lock_wq; |
136 | }; | 140 | }; |
137 | 141 | ||
142 | static inline void extent_set_compress_type(unsigned long *bio_flags, | ||
143 | int compress_type) | ||
144 | { | ||
145 | *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; | ||
146 | } | ||
147 | |||
148 | static inline int extent_compress_type(unsigned long bio_flags) | ||
149 | { | ||
150 | return bio_flags >> EXTENT_BIO_FLAG_SHIFT; | ||
151 | } | ||
152 | |||
138 | struct extent_map_tree; | 153 | struct extent_map_tree; |
139 | 154 | ||
140 | static inline struct extent_state *extent_state_next(struct extent_state *state) | 155 | static inline struct extent_state *extent_state_next(struct extent_state *state) |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 23cb8da3ff66..b0e1fce12530 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/spinlock.h> | 4 | #include <linux/spinlock.h> |
5 | #include <linux/hardirq.h> | 5 | #include <linux/hardirq.h> |
6 | #include "ctree.h" | ||
6 | #include "extent_map.h" | 7 | #include "extent_map.h" |
7 | 8 | ||
8 | 9 | ||
@@ -54,6 +55,7 @@ struct extent_map *alloc_extent_map(gfp_t mask) | |||
54 | return em; | 55 | return em; |
55 | em->in_tree = 0; | 56 | em->in_tree = 0; |
56 | em->flags = 0; | 57 | em->flags = 0; |
58 | em->compress_type = BTRFS_COMPRESS_NONE; | ||
57 | atomic_set(&em->refs, 1); | 59 | atomic_set(&em->refs, 1); |
58 | return em; | 60 | return em; |
59 | } | 61 | } |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index ab6d74b6e647..28b44dbd1e35 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
@@ -26,7 +26,8 @@ struct extent_map { | |||
26 | unsigned long flags; | 26 | unsigned long flags; |
27 | struct block_device *bdev; | 27 | struct block_device *bdev; |
28 | atomic_t refs; | 28 | atomic_t refs; |
29 | int in_tree; | 29 | unsigned int in_tree:1; |
30 | unsigned int compress_type:4; | ||
30 | }; | 31 | }; |
31 | 32 | ||
32 | struct extent_map_tree { | 33 | struct extent_map_tree { |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a9e0a4eaf3d9..c800d58f3013 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -225,6 +225,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
225 | 225 | ||
226 | split->bdev = em->bdev; | 226 | split->bdev = em->bdev; |
227 | split->flags = flags; | 227 | split->flags = flags; |
228 | split->compress_type = em->compress_type; | ||
228 | ret = add_extent_mapping(em_tree, split); | 229 | ret = add_extent_mapping(em_tree, split); |
229 | BUG_ON(ret); | 230 | BUG_ON(ret); |
230 | free_extent_map(split); | 231 | free_extent_map(split); |
@@ -239,6 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
239 | split->len = em->start + em->len - (start + len); | 240 | split->len = em->start + em->len - (start + len); |
240 | split->bdev = em->bdev; | 241 | split->bdev = em->bdev; |
241 | split->flags = flags; | 242 | split->flags = flags; |
243 | split->compress_type = em->compress_type; | ||
242 | 244 | ||
243 | if (compressed) { | 245 | if (compressed) { |
244 | split->block_len = em->block_len; | 246 | split->block_len = em->block_len; |
@@ -891,6 +893,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
891 | if (err) | 893 | if (err) |
892 | goto out; | 894 | goto out; |
893 | 895 | ||
896 | /* | ||
897 | * If BTRFS flips readonly due to some impossible error | ||
898 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | ||
899 | * although we have opened a file as writable, we have | ||
900 | * to stop this write operation to ensure FS consistency. | ||
901 | */ | ||
902 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
903 | err = -EROFS; | ||
904 | goto out; | ||
905 | } | ||
906 | |||
894 | file_update_time(file); | 907 | file_update_time(file); |
895 | BTRFS_I(inode)->sequence++; | 908 | BTRFS_I(inode)->sequence++; |
896 | 909 | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 902afbf50811..160b55b3e132 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -122,10 +122,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
122 | size_t cur_size = size; | 122 | size_t cur_size = size; |
123 | size_t datasize; | 123 | size_t datasize; |
124 | unsigned long offset; | 124 | unsigned long offset; |
125 | int use_compress = 0; | 125 | int compress_type = BTRFS_COMPRESS_NONE; |
126 | 126 | ||
127 | if (compressed_size && compressed_pages) { | 127 | if (compressed_size && compressed_pages) { |
128 | use_compress = 1; | 128 | compress_type = root->fs_info->compress_type; |
129 | cur_size = compressed_size; | 129 | cur_size = compressed_size; |
130 | } | 130 | } |
131 | 131 | ||
@@ -159,7 +159,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
159 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); | 159 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); |
160 | ptr = btrfs_file_extent_inline_start(ei); | 160 | ptr = btrfs_file_extent_inline_start(ei); |
161 | 161 | ||
162 | if (use_compress) { | 162 | if (compress_type != BTRFS_COMPRESS_NONE) { |
163 | struct page *cpage; | 163 | struct page *cpage; |
164 | int i = 0; | 164 | int i = 0; |
165 | while (compressed_size > 0) { | 165 | while (compressed_size > 0) { |
@@ -176,7 +176,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
176 | compressed_size -= cur_size; | 176 | compressed_size -= cur_size; |
177 | } | 177 | } |
178 | btrfs_set_file_extent_compression(leaf, ei, | 178 | btrfs_set_file_extent_compression(leaf, ei, |
179 | BTRFS_COMPRESS_ZLIB); | 179 | compress_type); |
180 | } else { | 180 | } else { |
181 | page = find_get_page(inode->i_mapping, | 181 | page = find_get_page(inode->i_mapping, |
182 | start >> PAGE_CACHE_SHIFT); | 182 | start >> PAGE_CACHE_SHIFT); |
@@ -263,6 +263,7 @@ struct async_extent { | |||
263 | u64 compressed_size; | 263 | u64 compressed_size; |
264 | struct page **pages; | 264 | struct page **pages; |
265 | unsigned long nr_pages; | 265 | unsigned long nr_pages; |
266 | int compress_type; | ||
266 | struct list_head list; | 267 | struct list_head list; |
267 | }; | 268 | }; |
268 | 269 | ||
@@ -280,7 +281,8 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
280 | u64 start, u64 ram_size, | 281 | u64 start, u64 ram_size, |
281 | u64 compressed_size, | 282 | u64 compressed_size, |
282 | struct page **pages, | 283 | struct page **pages, |
283 | unsigned long nr_pages) | 284 | unsigned long nr_pages, |
285 | int compress_type) | ||
284 | { | 286 | { |
285 | struct async_extent *async_extent; | 287 | struct async_extent *async_extent; |
286 | 288 | ||
@@ -290,6 +292,7 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
290 | async_extent->compressed_size = compressed_size; | 292 | async_extent->compressed_size = compressed_size; |
291 | async_extent->pages = pages; | 293 | async_extent->pages = pages; |
292 | async_extent->nr_pages = nr_pages; | 294 | async_extent->nr_pages = nr_pages; |
295 | async_extent->compress_type = compress_type; | ||
293 | list_add_tail(&async_extent->list, &cow->extents); | 296 | list_add_tail(&async_extent->list, &cow->extents); |
294 | return 0; | 297 | return 0; |
295 | } | 298 | } |
@@ -332,6 +335,7 @@ static noinline int compress_file_range(struct inode *inode, | |||
332 | unsigned long max_uncompressed = 128 * 1024; | 335 | unsigned long max_uncompressed = 128 * 1024; |
333 | int i; | 336 | int i; |
334 | int will_compress; | 337 | int will_compress; |
338 | int compress_type = root->fs_info->compress_type; | ||
335 | 339 | ||
336 | actual_end = min_t(u64, isize, end + 1); | 340 | actual_end = min_t(u64, isize, end + 1); |
337 | again: | 341 | again: |
@@ -381,12 +385,16 @@ again: | |||
381 | WARN_ON(pages); | 385 | WARN_ON(pages); |
382 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 386 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
383 | 387 | ||
384 | ret = btrfs_zlib_compress_pages(inode->i_mapping, start, | 388 | if (BTRFS_I(inode)->force_compress) |
385 | total_compressed, pages, | 389 | compress_type = BTRFS_I(inode)->force_compress; |
386 | nr_pages, &nr_pages_ret, | 390 | |
387 | &total_in, | 391 | ret = btrfs_compress_pages(compress_type, |
388 | &total_compressed, | 392 | inode->i_mapping, start, |
389 | max_compressed); | 393 | total_compressed, pages, |
394 | nr_pages, &nr_pages_ret, | ||
395 | &total_in, | ||
396 | &total_compressed, | ||
397 | max_compressed); | ||
390 | 398 | ||
391 | if (!ret) { | 399 | if (!ret) { |
392 | unsigned long offset = total_compressed & | 400 | unsigned long offset = total_compressed & |
@@ -493,7 +501,8 @@ again: | |||
493 | * and will submit them to the elevator. | 501 | * and will submit them to the elevator. |
494 | */ | 502 | */ |
495 | add_async_extent(async_cow, start, num_bytes, | 503 | add_async_extent(async_cow, start, num_bytes, |
496 | total_compressed, pages, nr_pages_ret); | 504 | total_compressed, pages, nr_pages_ret, |
505 | compress_type); | ||
497 | 506 | ||
498 | if (start + num_bytes < end) { | 507 | if (start + num_bytes < end) { |
499 | start += num_bytes; | 508 | start += num_bytes; |
@@ -515,7 +524,8 @@ cleanup_and_bail_uncompressed: | |||
515 | __set_page_dirty_nobuffers(locked_page); | 524 | __set_page_dirty_nobuffers(locked_page); |
516 | /* unlocked later on in the async handlers */ | 525 | /* unlocked later on in the async handlers */ |
517 | } | 526 | } |
518 | add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); | 527 | add_async_extent(async_cow, start, end - start + 1, |
528 | 0, NULL, 0, BTRFS_COMPRESS_NONE); | ||
519 | *num_added += 1; | 529 | *num_added += 1; |
520 | } | 530 | } |
521 | 531 | ||
@@ -640,6 +650,7 @@ retry: | |||
640 | em->block_start = ins.objectid; | 650 | em->block_start = ins.objectid; |
641 | em->block_len = ins.offset; | 651 | em->block_len = ins.offset; |
642 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 652 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
653 | em->compress_type = async_extent->compress_type; | ||
643 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 654 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
644 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 655 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
645 | 656 | ||
@@ -656,11 +667,13 @@ retry: | |||
656 | async_extent->ram_size - 1, 0); | 667 | async_extent->ram_size - 1, 0); |
657 | } | 668 | } |
658 | 669 | ||
659 | ret = btrfs_add_ordered_extent(inode, async_extent->start, | 670 | ret = btrfs_add_ordered_extent_compress(inode, |
660 | ins.objectid, | 671 | async_extent->start, |
661 | async_extent->ram_size, | 672 | ins.objectid, |
662 | ins.offset, | 673 | async_extent->ram_size, |
663 | BTRFS_ORDERED_COMPRESSED); | 674 | ins.offset, |
675 | BTRFS_ORDERED_COMPRESSED, | ||
676 | async_extent->compress_type); | ||
664 | BUG_ON(ret); | 677 | BUG_ON(ret); |
665 | 678 | ||
666 | /* | 679 | /* |
@@ -1670,7 +1683,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1670 | struct btrfs_ordered_extent *ordered_extent = NULL; | 1683 | struct btrfs_ordered_extent *ordered_extent = NULL; |
1671 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1684 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
1672 | struct extent_state *cached_state = NULL; | 1685 | struct extent_state *cached_state = NULL; |
1673 | int compressed = 0; | 1686 | int compress_type = 0; |
1674 | int ret; | 1687 | int ret; |
1675 | bool nolock = false; | 1688 | bool nolock = false; |
1676 | 1689 | ||
@@ -1711,9 +1724,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1711 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1724 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1712 | 1725 | ||
1713 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 1726 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
1714 | compressed = 1; | 1727 | compress_type = ordered_extent->compress_type; |
1715 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { | 1728 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
1716 | BUG_ON(compressed); | 1729 | BUG_ON(compress_type); |
1717 | ret = btrfs_mark_extent_written(trans, inode, | 1730 | ret = btrfs_mark_extent_written(trans, inode, |
1718 | ordered_extent->file_offset, | 1731 | ordered_extent->file_offset, |
1719 | ordered_extent->file_offset + | 1732 | ordered_extent->file_offset + |
@@ -1727,7 +1740,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1727 | ordered_extent->disk_len, | 1740 | ordered_extent->disk_len, |
1728 | ordered_extent->len, | 1741 | ordered_extent->len, |
1729 | ordered_extent->len, | 1742 | ordered_extent->len, |
1730 | compressed, 0, 0, | 1743 | compress_type, 0, 0, |
1731 | BTRFS_FILE_EXTENT_REG); | 1744 | BTRFS_FILE_EXTENT_REG); |
1732 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | 1745 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, |
1733 | ordered_extent->file_offset, | 1746 | ordered_extent->file_offset, |
@@ -1829,6 +1842,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1829 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | 1842 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
1830 | logical = em->block_start; | 1843 | logical = em->block_start; |
1831 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; | 1844 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; |
1845 | extent_set_compress_type(&failrec->bio_flags, | ||
1846 | em->compress_type); | ||
1832 | } | 1847 | } |
1833 | failrec->logical = logical; | 1848 | failrec->logical = logical; |
1834 | free_extent_map(em); | 1849 | free_extent_map(em); |
@@ -3671,8 +3686,12 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3671 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | 3686 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) |
3672 | { | 3687 | { |
3673 | struct inode *inode = dentry->d_inode; | 3688 | struct inode *inode = dentry->d_inode; |
3689 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3674 | int err; | 3690 | int err; |
3675 | 3691 | ||
3692 | if (btrfs_root_readonly(root)) | ||
3693 | return -EROFS; | ||
3694 | |||
3676 | err = inode_change_ok(inode, attr); | 3695 | err = inode_change_ok(inode, attr); |
3677 | if (err) | 3696 | if (err) |
3678 | return err; | 3697 | return err; |
@@ -4928,8 +4947,10 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
4928 | size_t max_size; | 4947 | size_t max_size; |
4929 | unsigned long inline_size; | 4948 | unsigned long inline_size; |
4930 | unsigned long ptr; | 4949 | unsigned long ptr; |
4950 | int compress_type; | ||
4931 | 4951 | ||
4932 | WARN_ON(pg_offset != 0); | 4952 | WARN_ON(pg_offset != 0); |
4953 | compress_type = btrfs_file_extent_compression(leaf, item); | ||
4933 | max_size = btrfs_file_extent_ram_bytes(leaf, item); | 4954 | max_size = btrfs_file_extent_ram_bytes(leaf, item); |
4934 | inline_size = btrfs_file_extent_inline_item_len(leaf, | 4955 | inline_size = btrfs_file_extent_inline_item_len(leaf, |
4935 | btrfs_item_nr(leaf, path->slots[0])); | 4956 | btrfs_item_nr(leaf, path->slots[0])); |
@@ -4939,8 +4960,8 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
4939 | read_extent_buffer(leaf, tmp, ptr, inline_size); | 4960 | read_extent_buffer(leaf, tmp, ptr, inline_size); |
4940 | 4961 | ||
4941 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); | 4962 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); |
4942 | ret = btrfs_zlib_decompress(tmp, page, extent_offset, | 4963 | ret = btrfs_decompress(compress_type, tmp, page, |
4943 | inline_size, max_size); | 4964 | extent_offset, inline_size, max_size); |
4944 | if (ret) { | 4965 | if (ret) { |
4945 | char *kaddr = kmap_atomic(page, KM_USER0); | 4966 | char *kaddr = kmap_atomic(page, KM_USER0); |
4946 | unsigned long copy_size = min_t(u64, | 4967 | unsigned long copy_size = min_t(u64, |
@@ -4982,7 +5003,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
4982 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 5003 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
4983 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 5004 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
4984 | struct btrfs_trans_handle *trans = NULL; | 5005 | struct btrfs_trans_handle *trans = NULL; |
4985 | int compressed; | 5006 | int compress_type; |
4986 | 5007 | ||
4987 | again: | 5008 | again: |
4988 | read_lock(&em_tree->lock); | 5009 | read_lock(&em_tree->lock); |
@@ -5041,7 +5062,7 @@ again: | |||
5041 | 5062 | ||
5042 | found_type = btrfs_file_extent_type(leaf, item); | 5063 | found_type = btrfs_file_extent_type(leaf, item); |
5043 | extent_start = found_key.offset; | 5064 | extent_start = found_key.offset; |
5044 | compressed = btrfs_file_extent_compression(leaf, item); | 5065 | compress_type = btrfs_file_extent_compression(leaf, item); |
5045 | if (found_type == BTRFS_FILE_EXTENT_REG || | 5066 | if (found_type == BTRFS_FILE_EXTENT_REG || |
5046 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 5067 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
5047 | extent_end = extent_start + | 5068 | extent_end = extent_start + |
@@ -5087,8 +5108,9 @@ again: | |||
5087 | em->block_start = EXTENT_MAP_HOLE; | 5108 | em->block_start = EXTENT_MAP_HOLE; |
5088 | goto insert; | 5109 | goto insert; |
5089 | } | 5110 | } |
5090 | if (compressed) { | 5111 | if (compress_type != BTRFS_COMPRESS_NONE) { |
5091 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5112 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
5113 | em->compress_type = compress_type; | ||
5092 | em->block_start = bytenr; | 5114 | em->block_start = bytenr; |
5093 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, | 5115 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, |
5094 | item); | 5116 | item); |
@@ -5122,12 +5144,14 @@ again: | |||
5122 | em->len = (copy_size + root->sectorsize - 1) & | 5144 | em->len = (copy_size + root->sectorsize - 1) & |
5123 | ~((u64)root->sectorsize - 1); | 5145 | ~((u64)root->sectorsize - 1); |
5124 | em->orig_start = EXTENT_MAP_INLINE; | 5146 | em->orig_start = EXTENT_MAP_INLINE; |
5125 | if (compressed) | 5147 | if (compress_type) { |
5126 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5148 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
5149 | em->compress_type = compress_type; | ||
5150 | } | ||
5127 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; | 5151 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; |
5128 | if (create == 0 && !PageUptodate(page)) { | 5152 | if (create == 0 && !PageUptodate(page)) { |
5129 | if (btrfs_file_extent_compression(leaf, item) == | 5153 | if (btrfs_file_extent_compression(leaf, item) != |
5130 | BTRFS_COMPRESS_ZLIB) { | 5154 | BTRFS_COMPRESS_NONE) { |
5131 | ret = uncompress_inline(path, inode, page, | 5155 | ret = uncompress_inline(path, inode, page, |
5132 | pg_offset, | 5156 | pg_offset, |
5133 | extent_offset, item); | 5157 | extent_offset, item); |
@@ -6477,7 +6501,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6477 | ei->ordered_data_close = 0; | 6501 | ei->ordered_data_close = 0; |
6478 | ei->orphan_meta_reserved = 0; | 6502 | ei->orphan_meta_reserved = 0; |
6479 | ei->dummy_inode = 0; | 6503 | ei->dummy_inode = 0; |
6480 | ei->force_compress = 0; | 6504 | ei->force_compress = BTRFS_COMPRESS_NONE; |
6481 | 6505 | ||
6482 | inode = &ei->vfs_inode; | 6506 | inode = &ei->vfs_inode; |
6483 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); | 6507 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); |
@@ -7105,6 +7129,10 @@ static int btrfs_set_page_dirty(struct page *page) | |||
7105 | 7129 | ||
7106 | static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) | 7130 | static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) |
7107 | { | 7131 | { |
7132 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
7133 | |||
7134 | if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) | ||
7135 | return -EROFS; | ||
7108 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) | 7136 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) |
7109 | return -EACCES; | 7137 | return -EACCES; |
7110 | return generic_permission(inode, mask, flags, btrfs_check_acl); | 7138 | return generic_permission(inode, mask, flags, btrfs_check_acl); |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f87552a1d7ea..a506a22b522a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -147,6 +147,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
147 | unsigned int flags, oldflags; | 147 | unsigned int flags, oldflags; |
148 | int ret; | 148 | int ret; |
149 | 149 | ||
150 | if (btrfs_root_readonly(root)) | ||
151 | return -EROFS; | ||
152 | |||
150 | if (copy_from_user(&flags, arg, sizeof(flags))) | 153 | if (copy_from_user(&flags, arg, sizeof(flags))) |
151 | return -EFAULT; | 154 | return -EFAULT; |
152 | 155 | ||
@@ -360,7 +363,8 @@ fail: | |||
360 | } | 363 | } |
361 | 364 | ||
362 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | 365 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, |
363 | char *name, int namelen, u64 *async_transid) | 366 | char *name, int namelen, u64 *async_transid, |
367 | bool readonly) | ||
364 | { | 368 | { |
365 | struct inode *inode; | 369 | struct inode *inode; |
366 | struct dentry *parent; | 370 | struct dentry *parent; |
@@ -378,6 +382,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
378 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); | 382 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); |
379 | pending_snapshot->dentry = dentry; | 383 | pending_snapshot->dentry = dentry; |
380 | pending_snapshot->root = root; | 384 | pending_snapshot->root = root; |
385 | pending_snapshot->readonly = readonly; | ||
381 | 386 | ||
382 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | 387 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); |
383 | if (IS_ERR(trans)) { | 388 | if (IS_ERR(trans)) { |
@@ -509,7 +514,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | |||
509 | static noinline int btrfs_mksubvol(struct path *parent, | 514 | static noinline int btrfs_mksubvol(struct path *parent, |
510 | char *name, int namelen, | 515 | char *name, int namelen, |
511 | struct btrfs_root *snap_src, | 516 | struct btrfs_root *snap_src, |
512 | u64 *async_transid) | 517 | u64 *async_transid, bool readonly) |
513 | { | 518 | { |
514 | struct inode *dir = parent->dentry->d_inode; | 519 | struct inode *dir = parent->dentry->d_inode; |
515 | struct dentry *dentry; | 520 | struct dentry *dentry; |
@@ -541,7 +546,7 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
541 | 546 | ||
542 | if (snap_src) { | 547 | if (snap_src) { |
543 | error = create_snapshot(snap_src, dentry, | 548 | error = create_snapshot(snap_src, dentry, |
544 | name, namelen, async_transid); | 549 | name, namelen, async_transid, readonly); |
545 | } else { | 550 | } else { |
546 | error = create_subvol(BTRFS_I(dir)->root, dentry, | 551 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
547 | name, namelen, async_transid); | 552 | name, namelen, async_transid); |
@@ -638,9 +643,11 @@ static int btrfs_defrag_file(struct file *file, | |||
638 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 643 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
639 | struct btrfs_ordered_extent *ordered; | 644 | struct btrfs_ordered_extent *ordered; |
640 | struct page *page; | 645 | struct page *page; |
646 | struct btrfs_super_block *disk_super; | ||
641 | unsigned long last_index; | 647 | unsigned long last_index; |
642 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; | 648 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; |
643 | unsigned long total_read = 0; | 649 | unsigned long total_read = 0; |
650 | u64 features; | ||
644 | u64 page_start; | 651 | u64 page_start; |
645 | u64 page_end; | 652 | u64 page_end; |
646 | u64 last_len = 0; | 653 | u64 last_len = 0; |
@@ -648,6 +655,14 @@ static int btrfs_defrag_file(struct file *file, | |||
648 | u64 defrag_end = 0; | 655 | u64 defrag_end = 0; |
649 | unsigned long i; | 656 | unsigned long i; |
650 | int ret; | 657 | int ret; |
658 | int compress_type = BTRFS_COMPRESS_ZLIB; | ||
659 | |||
660 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { | ||
661 | if (range->compress_type > BTRFS_COMPRESS_TYPES) | ||
662 | return -EINVAL; | ||
663 | if (range->compress_type) | ||
664 | compress_type = range->compress_type; | ||
665 | } | ||
651 | 666 | ||
652 | if (inode->i_size == 0) | 667 | if (inode->i_size == 0) |
653 | return 0; | 668 | return 0; |
@@ -683,7 +698,7 @@ static int btrfs_defrag_file(struct file *file, | |||
683 | total_read++; | 698 | total_read++; |
684 | mutex_lock(&inode->i_mutex); | 699 | mutex_lock(&inode->i_mutex); |
685 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | 700 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) |
686 | BTRFS_I(inode)->force_compress = 1; | 701 | BTRFS_I(inode)->force_compress = compress_type; |
687 | 702 | ||
688 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 703 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
689 | if (ret) | 704 | if (ret) |
@@ -781,10 +796,17 @@ loop_unlock: | |||
781 | atomic_dec(&root->fs_info->async_submit_draining); | 796 | atomic_dec(&root->fs_info->async_submit_draining); |
782 | 797 | ||
783 | mutex_lock(&inode->i_mutex); | 798 | mutex_lock(&inode->i_mutex); |
784 | BTRFS_I(inode)->force_compress = 0; | 799 | BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; |
785 | mutex_unlock(&inode->i_mutex); | 800 | mutex_unlock(&inode->i_mutex); |
786 | } | 801 | } |
787 | 802 | ||
803 | disk_super = &root->fs_info->super_copy; | ||
804 | features = btrfs_super_incompat_flags(disk_super); | ||
805 | if (range->compress_type == BTRFS_COMPRESS_LZO) { | ||
806 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; | ||
807 | btrfs_set_super_incompat_flags(disk_super, features); | ||
808 | } | ||
809 | |||
788 | return 0; | 810 | return 0; |
789 | 811 | ||
790 | err_reservations: | 812 | err_reservations: |
@@ -901,7 +923,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
901 | char *name, | 923 | char *name, |
902 | unsigned long fd, | 924 | unsigned long fd, |
903 | int subvol, | 925 | int subvol, |
904 | u64 *transid) | 926 | u64 *transid, |
927 | bool readonly) | ||
905 | { | 928 | { |
906 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 929 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; |
907 | struct file *src_file; | 930 | struct file *src_file; |
@@ -919,7 +942,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
919 | 942 | ||
920 | if (subvol) { | 943 | if (subvol) { |
921 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 944 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
922 | NULL, transid); | 945 | NULL, transid, readonly); |
923 | } else { | 946 | } else { |
924 | struct inode *src_inode; | 947 | struct inode *src_inode; |
925 | src_file = fget(fd); | 948 | src_file = fget(fd); |
@@ -938,7 +961,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
938 | } | 961 | } |
939 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 962 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
940 | BTRFS_I(src_inode)->root, | 963 | BTRFS_I(src_inode)->root, |
941 | transid); | 964 | transid, readonly); |
942 | fput(src_file); | 965 | fput(src_file); |
943 | } | 966 | } |
944 | out: | 967 | out: |
@@ -946,58 +969,139 @@ out: | |||
946 | } | 969 | } |
947 | 970 | ||
948 | static noinline int btrfs_ioctl_snap_create(struct file *file, | 971 | static noinline int btrfs_ioctl_snap_create(struct file *file, |
949 | void __user *arg, int subvol, | 972 | void __user *arg, int subvol) |
950 | int v2) | ||
951 | { | 973 | { |
952 | struct btrfs_ioctl_vol_args *vol_args = NULL; | 974 | struct btrfs_ioctl_vol_args *vol_args; |
953 | struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL; | ||
954 | char *name; | ||
955 | u64 fd; | ||
956 | int ret; | 975 | int ret; |
957 | 976 | ||
958 | if (v2) { | 977 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
959 | u64 transid = 0; | 978 | if (IS_ERR(vol_args)) |
960 | u64 *ptr = NULL; | 979 | return PTR_ERR(vol_args); |
980 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
961 | 981 | ||
962 | vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2)); | 982 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, |
963 | if (IS_ERR(vol_args_v2)) | 983 | vol_args->fd, subvol, |
964 | return PTR_ERR(vol_args_v2); | 984 | NULL, false); |
965 | 985 | ||
966 | if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) { | 986 | kfree(vol_args); |
967 | ret = -EINVAL; | 987 | return ret; |
968 | goto out; | 988 | } |
969 | } | ||
970 | |||
971 | name = vol_args_v2->name; | ||
972 | fd = vol_args_v2->fd; | ||
973 | vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | ||
974 | 989 | ||
975 | if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC) | 990 | static noinline int btrfs_ioctl_snap_create_v2(struct file *file, |
976 | ptr = &transid; | 991 | void __user *arg, int subvol) |
992 | { | ||
993 | struct btrfs_ioctl_vol_args_v2 *vol_args; | ||
994 | int ret; | ||
995 | u64 transid = 0; | ||
996 | u64 *ptr = NULL; | ||
997 | bool readonly = false; | ||
977 | 998 | ||
978 | ret = btrfs_ioctl_snap_create_transid(file, name, fd, | 999 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
979 | subvol, ptr); | 1000 | if (IS_ERR(vol_args)) |
1001 | return PTR_ERR(vol_args); | ||
1002 | vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | ||
980 | 1003 | ||
981 | if (ret == 0 && ptr && | 1004 | if (vol_args->flags & |
982 | copy_to_user(arg + | 1005 | ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { |
983 | offsetof(struct btrfs_ioctl_vol_args_v2, | 1006 | ret = -EOPNOTSUPP; |
984 | transid), ptr, sizeof(*ptr))) | 1007 | goto out; |
985 | ret = -EFAULT; | ||
986 | } else { | ||
987 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
988 | if (IS_ERR(vol_args)) | ||
989 | return PTR_ERR(vol_args); | ||
990 | name = vol_args->name; | ||
991 | fd = vol_args->fd; | ||
992 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
993 | |||
994 | ret = btrfs_ioctl_snap_create_transid(file, name, fd, | ||
995 | subvol, NULL); | ||
996 | } | 1008 | } |
1009 | |||
1010 | if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) | ||
1011 | ptr = &transid; | ||
1012 | if (vol_args->flags & BTRFS_SUBVOL_RDONLY) | ||
1013 | readonly = true; | ||
1014 | |||
1015 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | ||
1016 | vol_args->fd, subvol, | ||
1017 | ptr, readonly); | ||
1018 | |||
1019 | if (ret == 0 && ptr && | ||
1020 | copy_to_user(arg + | ||
1021 | offsetof(struct btrfs_ioctl_vol_args_v2, | ||
1022 | transid), ptr, sizeof(*ptr))) | ||
1023 | ret = -EFAULT; | ||
997 | out: | 1024 | out: |
998 | kfree(vol_args); | 1025 | kfree(vol_args); |
999 | kfree(vol_args_v2); | 1026 | return ret; |
1027 | } | ||
1000 | 1028 | ||
1029 | static noinline int btrfs_ioctl_subvol_getflags(struct file *file, | ||
1030 | void __user *arg) | ||
1031 | { | ||
1032 | struct inode *inode = fdentry(file)->d_inode; | ||
1033 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1034 | int ret = 0; | ||
1035 | u64 flags = 0; | ||
1036 | |||
1037 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
1038 | return -EINVAL; | ||
1039 | |||
1040 | down_read(&root->fs_info->subvol_sem); | ||
1041 | if (btrfs_root_readonly(root)) | ||
1042 | flags |= BTRFS_SUBVOL_RDONLY; | ||
1043 | up_read(&root->fs_info->subvol_sem); | ||
1044 | |||
1045 | if (copy_to_user(arg, &flags, sizeof(flags))) | ||
1046 | ret = -EFAULT; | ||
1047 | |||
1048 | return ret; | ||
1049 | } | ||
1050 | |||
1051 | static noinline int btrfs_ioctl_subvol_setflags(struct file *file, | ||
1052 | void __user *arg) | ||
1053 | { | ||
1054 | struct inode *inode = fdentry(file)->d_inode; | ||
1055 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1056 | struct btrfs_trans_handle *trans; | ||
1057 | u64 root_flags; | ||
1058 | u64 flags; | ||
1059 | int ret = 0; | ||
1060 | |||
1061 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
1062 | return -EROFS; | ||
1063 | |||
1064 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
1065 | return -EINVAL; | ||
1066 | |||
1067 | if (copy_from_user(&flags, arg, sizeof(flags))) | ||
1068 | return -EFAULT; | ||
1069 | |||
1070 | if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) | ||
1071 | return -EINVAL; | ||
1072 | |||
1073 | if (flags & ~BTRFS_SUBVOL_RDONLY) | ||
1074 | return -EOPNOTSUPP; | ||
1075 | |||
1076 | down_write(&root->fs_info->subvol_sem); | ||
1077 | |||
1078 | /* nothing to do */ | ||
1079 | if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) | ||
1080 | goto out; | ||
1081 | |||
1082 | root_flags = btrfs_root_flags(&root->root_item); | ||
1083 | if (flags & BTRFS_SUBVOL_RDONLY) | ||
1084 | btrfs_set_root_flags(&root->root_item, | ||
1085 | root_flags | BTRFS_ROOT_SUBVOL_RDONLY); | ||
1086 | else | ||
1087 | btrfs_set_root_flags(&root->root_item, | ||
1088 | root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); | ||
1089 | |||
1090 | trans = btrfs_start_transaction(root, 1); | ||
1091 | if (IS_ERR(trans)) { | ||
1092 | ret = PTR_ERR(trans); | ||
1093 | goto out_reset; | ||
1094 | } | ||
1095 | |||
1096 | ret = btrfs_update_root(trans, root, | ||
1097 | &root->root_key, &root->root_item); | ||
1098 | |||
1099 | btrfs_commit_transaction(trans, root); | ||
1100 | out_reset: | ||
1101 | if (ret) | ||
1102 | btrfs_set_root_flags(&root->root_item, root_flags); | ||
1103 | out: | ||
1104 | up_write(&root->fs_info->subvol_sem); | ||
1001 | return ret; | 1105 | return ret; |
1002 | } | 1106 | } |
1003 | 1107 | ||
@@ -1509,6 +1613,9 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
1509 | struct btrfs_ioctl_defrag_range_args *range; | 1613 | struct btrfs_ioctl_defrag_range_args *range; |
1510 | int ret; | 1614 | int ret; |
1511 | 1615 | ||
1616 | if (btrfs_root_readonly(root)) | ||
1617 | return -EROFS; | ||
1618 | |||
1512 | ret = mnt_want_write(file->f_path.mnt); | 1619 | ret = mnt_want_write(file->f_path.mnt); |
1513 | if (ret) | 1620 | if (ret) |
1514 | return ret; | 1621 | return ret; |
@@ -1637,6 +1744,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1637 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) | 1744 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) |
1638 | return -EINVAL; | 1745 | return -EINVAL; |
1639 | 1746 | ||
1747 | if (btrfs_root_readonly(root)) | ||
1748 | return -EROFS; | ||
1749 | |||
1640 | ret = mnt_want_write(file->f_path.mnt); | 1750 | ret = mnt_want_write(file->f_path.mnt); |
1641 | if (ret) | 1751 | if (ret) |
1642 | return ret; | 1752 | return ret; |
@@ -1958,6 +2068,10 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
1958 | if (file->private_data) | 2068 | if (file->private_data) |
1959 | goto out; | 2069 | goto out; |
1960 | 2070 | ||
2071 | ret = -EROFS; | ||
2072 | if (btrfs_root_readonly(root)) | ||
2073 | goto out; | ||
2074 | |||
1961 | ret = mnt_want_write(file->f_path.mnt); | 2075 | ret = mnt_want_write(file->f_path.mnt); |
1962 | if (ret) | 2076 | if (ret) |
1963 | goto out; | 2077 | goto out; |
@@ -2257,13 +2371,17 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
2257 | case FS_IOC_GETVERSION: | 2371 | case FS_IOC_GETVERSION: |
2258 | return btrfs_ioctl_getversion(file, argp); | 2372 | return btrfs_ioctl_getversion(file, argp); |
2259 | case BTRFS_IOC_SNAP_CREATE: | 2373 | case BTRFS_IOC_SNAP_CREATE: |
2260 | return btrfs_ioctl_snap_create(file, argp, 0, 0); | 2374 | return btrfs_ioctl_snap_create(file, argp, 0); |
2261 | case BTRFS_IOC_SNAP_CREATE_V2: | 2375 | case BTRFS_IOC_SNAP_CREATE_V2: |
2262 | return btrfs_ioctl_snap_create(file, argp, 0, 1); | 2376 | return btrfs_ioctl_snap_create_v2(file, argp, 0); |
2263 | case BTRFS_IOC_SUBVOL_CREATE: | 2377 | case BTRFS_IOC_SUBVOL_CREATE: |
2264 | return btrfs_ioctl_snap_create(file, argp, 1, 0); | 2378 | return btrfs_ioctl_snap_create(file, argp, 1); |
2265 | case BTRFS_IOC_SNAP_DESTROY: | 2379 | case BTRFS_IOC_SNAP_DESTROY: |
2266 | return btrfs_ioctl_snap_destroy(file, argp); | 2380 | return btrfs_ioctl_snap_destroy(file, argp); |
2381 | case BTRFS_IOC_SUBVOL_GETFLAGS: | ||
2382 | return btrfs_ioctl_subvol_getflags(file, argp); | ||
2383 | case BTRFS_IOC_SUBVOL_SETFLAGS: | ||
2384 | return btrfs_ioctl_subvol_setflags(file, argp); | ||
2267 | case BTRFS_IOC_DEFAULT_SUBVOL: | 2385 | case BTRFS_IOC_DEFAULT_SUBVOL: |
2268 | return btrfs_ioctl_default_subvol(file, argp); | 2386 | return btrfs_ioctl_default_subvol(file, argp); |
2269 | case BTRFS_IOC_DEFRAG: | 2387 | case BTRFS_IOC_DEFRAG: |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index c344d12c646b..8fb382167b13 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -31,6 +31,7 @@ struct btrfs_ioctl_vol_args { | |||
31 | }; | 31 | }; |
32 | 32 | ||
33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) | 33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) |
34 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) | ||
34 | 35 | ||
35 | #define BTRFS_SUBVOL_NAME_MAX 4039 | 36 | #define BTRFS_SUBVOL_NAME_MAX 4039 |
36 | struct btrfs_ioctl_vol_args_v2 { | 37 | struct btrfs_ioctl_vol_args_v2 { |
@@ -133,8 +134,15 @@ struct btrfs_ioctl_defrag_range_args { | |||
133 | */ | 134 | */ |
134 | __u32 extent_thresh; | 135 | __u32 extent_thresh; |
135 | 136 | ||
137 | /* | ||
138 | * which compression method to use if turning on compression | ||
139 | * for this defrag operation. If unspecified, zlib will | ||
140 | * be used | ||
141 | */ | ||
142 | __u32 compress_type; | ||
143 | |||
136 | /* spare for later */ | 144 | /* spare for later */ |
137 | __u32 unused[5]; | 145 | __u32 unused[4]; |
138 | }; | 146 | }; |
139 | 147 | ||
140 | struct btrfs_ioctl_space_info { | 148 | struct btrfs_ioctl_space_info { |
@@ -193,4 +201,6 @@ struct btrfs_ioctl_space_args { | |||
193 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) | 201 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) |
194 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ | 202 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ |
195 | struct btrfs_ioctl_vol_args_v2) | 203 | struct btrfs_ioctl_vol_args_v2) |
204 | #define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) | ||
205 | #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) | ||
196 | #endif | 206 | #endif |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c new file mode 100644 index 000000000000..cc9b450399df --- /dev/null +++ b/fs/btrfs/lzo.c | |||
@@ -0,0 +1,420 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include <linux/vmalloc.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/err.h> | ||
24 | #include <linux/sched.h> | ||
25 | #include <linux/pagemap.h> | ||
26 | #include <linux/bio.h> | ||
27 | #include <linux/lzo.h> | ||
28 | #include "compression.h" | ||
29 | |||
30 | #define LZO_LEN 4 | ||
31 | |||
32 | struct workspace { | ||
33 | void *mem; | ||
34 | void *buf; /* where compressed data goes */ | ||
35 | void *cbuf; /* where decompressed data goes */ | ||
36 | struct list_head list; | ||
37 | }; | ||
38 | |||
39 | static void lzo_free_workspace(struct list_head *ws) | ||
40 | { | ||
41 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
42 | |||
43 | vfree(workspace->buf); | ||
44 | vfree(workspace->cbuf); | ||
45 | vfree(workspace->mem); | ||
46 | kfree(workspace); | ||
47 | } | ||
48 | |||
49 | static struct list_head *lzo_alloc_workspace(void) | ||
50 | { | ||
51 | struct workspace *workspace; | ||
52 | |||
53 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | ||
54 | if (!workspace) | ||
55 | return ERR_PTR(-ENOMEM); | ||
56 | |||
57 | workspace->mem = vmalloc(LZO1X_MEM_COMPRESS); | ||
58 | workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); | ||
59 | workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); | ||
60 | if (!workspace->mem || !workspace->buf || !workspace->cbuf) | ||
61 | goto fail; | ||
62 | |||
63 | INIT_LIST_HEAD(&workspace->list); | ||
64 | |||
65 | return &workspace->list; | ||
66 | fail: | ||
67 | lzo_free_workspace(&workspace->list); | ||
68 | return ERR_PTR(-ENOMEM); | ||
69 | } | ||
70 | |||
71 | static inline void write_compress_length(char *buf, size_t len) | ||
72 | { | ||
73 | __le32 dlen; | ||
74 | |||
75 | dlen = cpu_to_le32(len); | ||
76 | memcpy(buf, &dlen, LZO_LEN); | ||
77 | } | ||
78 | |||
79 | static inline size_t read_compress_length(char *buf) | ||
80 | { | ||
81 | __le32 dlen; | ||
82 | |||
83 | memcpy(&dlen, buf, LZO_LEN); | ||
84 | return le32_to_cpu(dlen); | ||
85 | } | ||
86 | |||
87 | static int lzo_compress_pages(struct list_head *ws, | ||
88 | struct address_space *mapping, | ||
89 | u64 start, unsigned long len, | ||
90 | struct page **pages, | ||
91 | unsigned long nr_dest_pages, | ||
92 | unsigned long *out_pages, | ||
93 | unsigned long *total_in, | ||
94 | unsigned long *total_out, | ||
95 | unsigned long max_out) | ||
96 | { | ||
97 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
98 | int ret = 0; | ||
99 | char *data_in; | ||
100 | char *cpage_out; | ||
101 | int nr_pages = 0; | ||
102 | struct page *in_page = NULL; | ||
103 | struct page *out_page = NULL; | ||
104 | unsigned long bytes_left; | ||
105 | |||
106 | size_t in_len; | ||
107 | size_t out_len; | ||
108 | char *buf; | ||
109 | unsigned long tot_in = 0; | ||
110 | unsigned long tot_out = 0; | ||
111 | unsigned long pg_bytes_left; | ||
112 | unsigned long out_offset; | ||
113 | unsigned long bytes; | ||
114 | |||
115 | *out_pages = 0; | ||
116 | *total_out = 0; | ||
117 | *total_in = 0; | ||
118 | |||
119 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
120 | data_in = kmap(in_page); | ||
121 | |||
122 | /* | ||
123 | * store the size of all chunks of compressed data in | ||
124 | * the first 4 bytes | ||
125 | */ | ||
126 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
127 | if (out_page == NULL) { | ||
128 | ret = -ENOMEM; | ||
129 | goto out; | ||
130 | } | ||
131 | cpage_out = kmap(out_page); | ||
132 | out_offset = LZO_LEN; | ||
133 | tot_out = LZO_LEN; | ||
134 | pages[0] = out_page; | ||
135 | nr_pages = 1; | ||
136 | pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; | ||
137 | |||
138 | /* compress at most one page of data each time */ | ||
139 | in_len = min(len, PAGE_CACHE_SIZE); | ||
140 | while (tot_in < len) { | ||
141 | ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, | ||
142 | &out_len, workspace->mem); | ||
143 | if (ret != LZO_E_OK) { | ||
144 | printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", | ||
145 | ret); | ||
146 | ret = -1; | ||
147 | goto out; | ||
148 | } | ||
149 | |||
150 | /* store the size of this chunk of compressed data */ | ||
151 | write_compress_length(cpage_out + out_offset, out_len); | ||
152 | tot_out += LZO_LEN; | ||
153 | out_offset += LZO_LEN; | ||
154 | pg_bytes_left -= LZO_LEN; | ||
155 | |||
156 | tot_in += in_len; | ||
157 | tot_out += out_len; | ||
158 | |||
159 | /* copy bytes from the working buffer into the pages */ | ||
160 | buf = workspace->cbuf; | ||
161 | while (out_len) { | ||
162 | bytes = min_t(unsigned long, pg_bytes_left, out_len); | ||
163 | |||
164 | memcpy(cpage_out + out_offset, buf, bytes); | ||
165 | |||
166 | out_len -= bytes; | ||
167 | pg_bytes_left -= bytes; | ||
168 | buf += bytes; | ||
169 | out_offset += bytes; | ||
170 | |||
171 | /* | ||
172 | * we need another page for writing out. | ||
173 | * | ||
174 | * Note if there's less than 4 bytes left, we just | ||
175 | * skip to a new page. | ||
176 | */ | ||
177 | if ((out_len == 0 && pg_bytes_left < LZO_LEN) || | ||
178 | pg_bytes_left == 0) { | ||
179 | if (pg_bytes_left) { | ||
180 | memset(cpage_out + out_offset, 0, | ||
181 | pg_bytes_left); | ||
182 | tot_out += pg_bytes_left; | ||
183 | } | ||
184 | |||
185 | /* we're done, don't allocate new page */ | ||
186 | if (out_len == 0 && tot_in >= len) | ||
187 | break; | ||
188 | |||
189 | kunmap(out_page); | ||
190 | if (nr_pages == nr_dest_pages) { | ||
191 | out_page = NULL; | ||
192 | ret = -1; | ||
193 | goto out; | ||
194 | } | ||
195 | |||
196 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
197 | if (out_page == NULL) { | ||
198 | ret = -ENOMEM; | ||
199 | goto out; | ||
200 | } | ||
201 | cpage_out = kmap(out_page); | ||
202 | pages[nr_pages++] = out_page; | ||
203 | |||
204 | pg_bytes_left = PAGE_CACHE_SIZE; | ||
205 | out_offset = 0; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | /* we're making it bigger, give up */ | ||
210 | if (tot_in > 8192 && tot_in < tot_out) | ||
211 | goto out; | ||
212 | |||
213 | /* we're all done */ | ||
214 | if (tot_in >= len) | ||
215 | break; | ||
216 | |||
217 | if (tot_out > max_out) | ||
218 | break; | ||
219 | |||
220 | bytes_left = len - tot_in; | ||
221 | kunmap(in_page); | ||
222 | page_cache_release(in_page); | ||
223 | |||
224 | start += PAGE_CACHE_SIZE; | ||
225 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
226 | data_in = kmap(in_page); | ||
227 | in_len = min(bytes_left, PAGE_CACHE_SIZE); | ||
228 | } | ||
229 | |||
230 | if (tot_out > tot_in) | ||
231 | goto out; | ||
232 | |||
233 | /* store the size of all chunks of compressed data */ | ||
234 | cpage_out = kmap(pages[0]); | ||
235 | write_compress_length(cpage_out, tot_out); | ||
236 | |||
237 | kunmap(pages[0]); | ||
238 | |||
239 | ret = 0; | ||
240 | *total_out = tot_out; | ||
241 | *total_in = tot_in; | ||
242 | out: | ||
243 | *out_pages = nr_pages; | ||
244 | if (out_page) | ||
245 | kunmap(out_page); | ||
246 | |||
247 | if (in_page) { | ||
248 | kunmap(in_page); | ||
249 | page_cache_release(in_page); | ||
250 | } | ||
251 | |||
252 | return ret; | ||
253 | } | ||
254 | |||
255 | static int lzo_decompress_biovec(struct list_head *ws, | ||
256 | struct page **pages_in, | ||
257 | u64 disk_start, | ||
258 | struct bio_vec *bvec, | ||
259 | int vcnt, | ||
260 | size_t srclen) | ||
261 | { | ||
262 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
263 | int ret = 0, ret2; | ||
264 | char *data_in; | ||
265 | unsigned long page_in_index = 0; | ||
266 | unsigned long page_out_index = 0; | ||
267 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | ||
268 | PAGE_CACHE_SIZE; | ||
269 | unsigned long buf_start; | ||
270 | unsigned long buf_offset = 0; | ||
271 | unsigned long bytes; | ||
272 | unsigned long working_bytes; | ||
273 | unsigned long pg_offset; | ||
274 | |||
275 | size_t in_len; | ||
276 | size_t out_len; | ||
277 | unsigned long in_offset; | ||
278 | unsigned long in_page_bytes_left; | ||
279 | unsigned long tot_in; | ||
280 | unsigned long tot_out; | ||
281 | unsigned long tot_len; | ||
282 | char *buf; | ||
283 | |||
284 | data_in = kmap(pages_in[0]); | ||
285 | tot_len = read_compress_length(data_in); | ||
286 | |||
287 | tot_in = LZO_LEN; | ||
288 | in_offset = LZO_LEN; | ||
289 | tot_len = min_t(size_t, srclen, tot_len); | ||
290 | in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; | ||
291 | |||
292 | tot_out = 0; | ||
293 | pg_offset = 0; | ||
294 | |||
295 | while (tot_in < tot_len) { | ||
296 | in_len = read_compress_length(data_in + in_offset); | ||
297 | in_page_bytes_left -= LZO_LEN; | ||
298 | in_offset += LZO_LEN; | ||
299 | tot_in += LZO_LEN; | ||
300 | |||
301 | tot_in += in_len; | ||
302 | working_bytes = in_len; | ||
303 | |||
304 | /* fast path: avoid using the working buffer */ | ||
305 | if (in_page_bytes_left >= in_len) { | ||
306 | buf = data_in + in_offset; | ||
307 | bytes = in_len; | ||
308 | goto cont; | ||
309 | } | ||
310 | |||
311 | /* copy bytes from the pages into the working buffer */ | ||
312 | buf = workspace->cbuf; | ||
313 | buf_offset = 0; | ||
314 | while (working_bytes) { | ||
315 | bytes = min(working_bytes, in_page_bytes_left); | ||
316 | |||
317 | memcpy(buf + buf_offset, data_in + in_offset, bytes); | ||
318 | buf_offset += bytes; | ||
319 | cont: | ||
320 | working_bytes -= bytes; | ||
321 | in_page_bytes_left -= bytes; | ||
322 | in_offset += bytes; | ||
323 | |||
324 | /* check if we need to pick another page */ | ||
325 | if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN) | ||
326 | || in_page_bytes_left == 0) { | ||
327 | tot_in += in_page_bytes_left; | ||
328 | |||
329 | if (working_bytes == 0 && tot_in >= tot_len) | ||
330 | break; | ||
331 | |||
332 | kunmap(pages_in[page_in_index]); | ||
333 | page_in_index++; | ||
334 | if (page_in_index >= total_pages_in) { | ||
335 | ret = -1; | ||
336 | data_in = NULL; | ||
337 | goto done; | ||
338 | } | ||
339 | data_in = kmap(pages_in[page_in_index]); | ||
340 | |||
341 | in_page_bytes_left = PAGE_CACHE_SIZE; | ||
342 | in_offset = 0; | ||
343 | } | ||
344 | } | ||
345 | |||
346 | out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); | ||
347 | ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, | ||
348 | &out_len); | ||
349 | if (ret != LZO_E_OK) { | ||
350 | printk(KERN_WARNING "btrfs decompress failed\n"); | ||
351 | ret = -1; | ||
352 | break; | ||
353 | } | ||
354 | |||
355 | buf_start = tot_out; | ||
356 | tot_out += out_len; | ||
357 | |||
358 | ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, | ||
359 | tot_out, disk_start, | ||
360 | bvec, vcnt, | ||
361 | &page_out_index, &pg_offset); | ||
362 | if (ret2 == 0) | ||
363 | break; | ||
364 | } | ||
365 | done: | ||
366 | if (data_in) | ||
367 | kunmap(pages_in[page_in_index]); | ||
368 | return ret; | ||
369 | } | ||
370 | |||
371 | static int lzo_decompress(struct list_head *ws, unsigned char *data_in, | ||
372 | struct page *dest_page, | ||
373 | unsigned long start_byte, | ||
374 | size_t srclen, size_t destlen) | ||
375 | { | ||
376 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
377 | size_t in_len; | ||
378 | size_t out_len; | ||
379 | size_t tot_len; | ||
380 | int ret = 0; | ||
381 | char *kaddr; | ||
382 | unsigned long bytes; | ||
383 | |||
384 | BUG_ON(srclen < LZO_LEN); | ||
385 | |||
386 | tot_len = read_compress_length(data_in); | ||
387 | data_in += LZO_LEN; | ||
388 | |||
389 | in_len = read_compress_length(data_in); | ||
390 | data_in += LZO_LEN; | ||
391 | |||
392 | out_len = PAGE_CACHE_SIZE; | ||
393 | ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); | ||
394 | if (ret != LZO_E_OK) { | ||
395 | printk(KERN_WARNING "btrfs decompress failed!\n"); | ||
396 | ret = -1; | ||
397 | goto out; | ||
398 | } | ||
399 | |||
400 | if (out_len < start_byte) { | ||
401 | ret = -1; | ||
402 | goto out; | ||
403 | } | ||
404 | |||
405 | bytes = min_t(unsigned long, destlen, out_len - start_byte); | ||
406 | |||
407 | kaddr = kmap_atomic(dest_page, KM_USER0); | ||
408 | memcpy(kaddr, workspace->buf + start_byte, bytes); | ||
409 | kunmap_atomic(kaddr, KM_USER0); | ||
410 | out: | ||
411 | return ret; | ||
412 | } | ||
413 | |||
414 | struct btrfs_compress_op btrfs_lzo_compress = { | ||
415 | .alloc_workspace = lzo_alloc_workspace, | ||
416 | .free_workspace = lzo_free_workspace, | ||
417 | .compress_pages = lzo_compress_pages, | ||
418 | .decompress_biovec = lzo_decompress_biovec, | ||
419 | .decompress = lzo_decompress, | ||
420 | }; | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index ae7737e352c9..2b61e1ddcd99 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -172,7 +172,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
172 | */ | 172 | */ |
173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
174 | u64 start, u64 len, u64 disk_len, | 174 | u64 start, u64 len, u64 disk_len, |
175 | int type, int dio) | 175 | int type, int dio, int compress_type) |
176 | { | 176 | { |
177 | struct btrfs_ordered_inode_tree *tree; | 177 | struct btrfs_ordered_inode_tree *tree; |
178 | struct rb_node *node; | 178 | struct rb_node *node; |
@@ -189,6 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
189 | entry->disk_len = disk_len; | 189 | entry->disk_len = disk_len; |
190 | entry->bytes_left = len; | 190 | entry->bytes_left = len; |
191 | entry->inode = inode; | 191 | entry->inode = inode; |
192 | entry->compress_type = compress_type; | ||
192 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 193 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
193 | set_bit(type, &entry->flags); | 194 | set_bit(type, &entry->flags); |
194 | 195 | ||
@@ -220,14 +221,25 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
220 | u64 start, u64 len, u64 disk_len, int type) | 221 | u64 start, u64 len, u64 disk_len, int type) |
221 | { | 222 | { |
222 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | 223 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
223 | disk_len, type, 0); | 224 | disk_len, type, 0, |
225 | BTRFS_COMPRESS_NONE); | ||
224 | } | 226 | } |
225 | 227 | ||
226 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 228 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
227 | u64 start, u64 len, u64 disk_len, int type) | 229 | u64 start, u64 len, u64 disk_len, int type) |
228 | { | 230 | { |
229 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | 231 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
230 | disk_len, type, 1); | 232 | disk_len, type, 1, |
233 | BTRFS_COMPRESS_NONE); | ||
234 | } | ||
235 | |||
236 | int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, | ||
237 | u64 start, u64 len, u64 disk_len, | ||
238 | int type, int compress_type) | ||
239 | { | ||
240 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
241 | disk_len, type, 0, | ||
242 | compress_type); | ||
231 | } | 243 | } |
232 | 244 | ||
233 | /* | 245 | /* |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 61dca83119dd..ff1f69aa1883 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -68,7 +68,7 @@ struct btrfs_ordered_sum { | |||
68 | 68 | ||
69 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ | 69 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ |
70 | 70 | ||
71 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ | 71 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */ |
72 | 72 | ||
73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ | 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ |
74 | 74 | ||
@@ -93,6 +93,9 @@ struct btrfs_ordered_extent { | |||
93 | /* flags (described above) */ | 93 | /* flags (described above) */ |
94 | unsigned long flags; | 94 | unsigned long flags; |
95 | 95 | ||
96 | /* compression algorithm */ | ||
97 | int compress_type; | ||
98 | |||
96 | /* reference count */ | 99 | /* reference count */ |
97 | atomic_t refs; | 100 | atomic_t refs; |
98 | 101 | ||
@@ -148,6 +151,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
148 | u64 start, u64 len, u64 disk_len, int type); | 151 | u64 start, u64 len, u64 disk_len, int type); |
149 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 152 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
150 | u64 start, u64 len, u64 disk_len, int type); | 153 | u64 start, u64 len, u64 disk_len, int type); |
154 | int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, | ||
155 | u64 start, u64 len, u64 disk_len, | ||
156 | int type, int compress_type); | ||
151 | int btrfs_add_ordered_sum(struct inode *inode, | 157 | int btrfs_add_ordered_sum(struct inode *inode, |
152 | struct btrfs_ordered_extent *entry, | 158 | struct btrfs_ordered_extent *entry, |
153 | struct btrfs_ordered_sum *sum); | 159 | struct btrfs_ordered_sum *sum); |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 22acdaa78ce1..b2130c46fdb5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -54,6 +54,90 @@ | |||
54 | 54 | ||
55 | static const struct super_operations btrfs_super_ops; | 55 | static const struct super_operations btrfs_super_ops; |
56 | 56 | ||
57 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, | ||
58 | char nbuf[16]) | ||
59 | { | ||
60 | char *errstr = NULL; | ||
61 | |||
62 | switch (errno) { | ||
63 | case -EIO: | ||
64 | errstr = "IO failure"; | ||
65 | break; | ||
66 | case -ENOMEM: | ||
67 | errstr = "Out of memory"; | ||
68 | break; | ||
69 | case -EROFS: | ||
70 | errstr = "Readonly filesystem"; | ||
71 | break; | ||
72 | default: | ||
73 | if (nbuf) { | ||
74 | if (snprintf(nbuf, 16, "error %d", -errno) >= 0) | ||
75 | errstr = nbuf; | ||
76 | } | ||
77 | break; | ||
78 | } | ||
79 | |||
80 | return errstr; | ||
81 | } | ||
82 | |||
83 | static void __save_error_info(struct btrfs_fs_info *fs_info) | ||
84 | { | ||
85 | /* | ||
86 | * today we only save the error info into ram. Long term we'll | ||
87 | * also send it down to the disk | ||
88 | */ | ||
89 | fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; | ||
90 | } | ||
91 | |||
92 | /* NOTE: | ||
93 | * We move write_super stuff at umount in order to avoid deadlock | ||
94 | * for umount hold all lock. | ||
95 | */ | ||
96 | static void save_error_info(struct btrfs_fs_info *fs_info) | ||
97 | { | ||
98 | __save_error_info(fs_info); | ||
99 | } | ||
100 | |||
101 | /* btrfs handle error by forcing the filesystem readonly */ | ||
102 | static void btrfs_handle_error(struct btrfs_fs_info *fs_info) | ||
103 | { | ||
104 | struct super_block *sb = fs_info->sb; | ||
105 | |||
106 | if (sb->s_flags & MS_RDONLY) | ||
107 | return; | ||
108 | |||
109 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
110 | sb->s_flags |= MS_RDONLY; | ||
111 | printk(KERN_INFO "btrfs is forced readonly\n"); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * __btrfs_std_error decodes expected errors from the caller and | ||
117 | * invokes the approciate error response. | ||
118 | */ | ||
119 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
120 | unsigned int line, int errno) | ||
121 | { | ||
122 | struct super_block *sb = fs_info->sb; | ||
123 | char nbuf[16]; | ||
124 | const char *errstr; | ||
125 | |||
126 | /* | ||
127 | * Special case: if the error is EROFS, and we're already | ||
128 | * under MS_RDONLY, then it is safe here. | ||
129 | */ | ||
130 | if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) | ||
131 | return; | ||
132 | |||
133 | errstr = btrfs_decode_error(fs_info, errno, nbuf); | ||
134 | printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", | ||
135 | sb->s_id, function, line, errstr); | ||
136 | save_error_info(fs_info); | ||
137 | |||
138 | btrfs_handle_error(fs_info); | ||
139 | } | ||
140 | |||
57 | static void btrfs_put_super(struct super_block *sb) | 141 | static void btrfs_put_super(struct super_block *sb) |
58 | { | 142 | { |
59 | struct btrfs_root *root = btrfs_sb(sb); | 143 | struct btrfs_root *root = btrfs_sb(sb); |
@@ -69,9 +153,9 @@ enum { | |||
69 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, | 153 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, |
70 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, | 154 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, |
71 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, | 155 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, |
72 | Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, | 156 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, |
73 | Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err, | 157 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
74 | Opt_user_subvol_rm_allowed, | 158 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, |
75 | }; | 159 | }; |
76 | 160 | ||
77 | static match_table_t tokens = { | 161 | static match_table_t tokens = { |
@@ -86,7 +170,9 @@ static match_table_t tokens = { | |||
86 | {Opt_alloc_start, "alloc_start=%s"}, | 170 | {Opt_alloc_start, "alloc_start=%s"}, |
87 | {Opt_thread_pool, "thread_pool=%d"}, | 171 | {Opt_thread_pool, "thread_pool=%d"}, |
88 | {Opt_compress, "compress"}, | 172 | {Opt_compress, "compress"}, |
173 | {Opt_compress_type, "compress=%s"}, | ||
89 | {Opt_compress_force, "compress-force"}, | 174 | {Opt_compress_force, "compress-force"}, |
175 | {Opt_compress_force_type, "compress-force=%s"}, | ||
90 | {Opt_ssd, "ssd"}, | 176 | {Opt_ssd, "ssd"}, |
91 | {Opt_ssd_spread, "ssd_spread"}, | 177 | {Opt_ssd_spread, "ssd_spread"}, |
92 | {Opt_nossd, "nossd"}, | 178 | {Opt_nossd, "nossd"}, |
@@ -112,6 +198,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
112 | char *p, *num, *orig; | 198 | char *p, *num, *orig; |
113 | int intarg; | 199 | int intarg; |
114 | int ret = 0; | 200 | int ret = 0; |
201 | char *compress_type; | ||
202 | bool compress_force = false; | ||
115 | 203 | ||
116 | if (!options) | 204 | if (!options) |
117 | return 0; | 205 | return 0; |
@@ -154,14 +242,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
154 | btrfs_set_opt(info->mount_opt, NODATACOW); | 242 | btrfs_set_opt(info->mount_opt, NODATACOW); |
155 | btrfs_set_opt(info->mount_opt, NODATASUM); | 243 | btrfs_set_opt(info->mount_opt, NODATASUM); |
156 | break; | 244 | break; |
157 | case Opt_compress: | ||
158 | printk(KERN_INFO "btrfs: use compression\n"); | ||
159 | btrfs_set_opt(info->mount_opt, COMPRESS); | ||
160 | break; | ||
161 | case Opt_compress_force: | 245 | case Opt_compress_force: |
162 | printk(KERN_INFO "btrfs: forcing compression\n"); | 246 | case Opt_compress_force_type: |
163 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | 247 | compress_force = true; |
248 | case Opt_compress: | ||
249 | case Opt_compress_type: | ||
250 | if (token == Opt_compress || | ||
251 | token == Opt_compress_force || | ||
252 | strcmp(args[0].from, "zlib") == 0) { | ||
253 | compress_type = "zlib"; | ||
254 | info->compress_type = BTRFS_COMPRESS_ZLIB; | ||
255 | } else if (strcmp(args[0].from, "lzo") == 0) { | ||
256 | compress_type = "lzo"; | ||
257 | info->compress_type = BTRFS_COMPRESS_LZO; | ||
258 | } else { | ||
259 | ret = -EINVAL; | ||
260 | goto out; | ||
261 | } | ||
262 | |||
164 | btrfs_set_opt(info->mount_opt, COMPRESS); | 263 | btrfs_set_opt(info->mount_opt, COMPRESS); |
264 | if (compress_force) { | ||
265 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | ||
266 | pr_info("btrfs: force %s compression\n", | ||
267 | compress_type); | ||
268 | } else | ||
269 | pr_info("btrfs: use %s compression\n", | ||
270 | compress_type); | ||
165 | break; | 271 | break; |
166 | case Opt_ssd: | 272 | case Opt_ssd: |
167 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); | 273 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); |
@@ -753,6 +859,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
753 | return 0; | 859 | return 0; |
754 | } | 860 | } |
755 | 861 | ||
862 | /* | ||
863 | * The helper to calc the free space on the devices that can be used to store | ||
864 | * file data. | ||
865 | */ | ||
866 | static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | ||
867 | { | ||
868 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
869 | struct btrfs_device_info *devices_info; | ||
870 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
871 | struct btrfs_device *device; | ||
872 | u64 skip_space; | ||
873 | u64 type; | ||
874 | u64 avail_space; | ||
875 | u64 used_space; | ||
876 | u64 min_stripe_size; | ||
877 | int min_stripes = 1; | ||
878 | int i = 0, nr_devices; | ||
879 | int ret; | ||
880 | |||
881 | nr_devices = fs_info->fs_devices->rw_devices; | ||
882 | BUG_ON(!nr_devices); | ||
883 | |||
884 | devices_info = kmalloc(sizeof(*devices_info) * nr_devices, | ||
885 | GFP_NOFS); | ||
886 | if (!devices_info) | ||
887 | return -ENOMEM; | ||
888 | |||
889 | /* calc min stripe number for data space alloction */ | ||
890 | type = btrfs_get_alloc_profile(root, 1); | ||
891 | if (type & BTRFS_BLOCK_GROUP_RAID0) | ||
892 | min_stripes = 2; | ||
893 | else if (type & BTRFS_BLOCK_GROUP_RAID1) | ||
894 | min_stripes = 2; | ||
895 | else if (type & BTRFS_BLOCK_GROUP_RAID10) | ||
896 | min_stripes = 4; | ||
897 | |||
898 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
899 | min_stripe_size = 2 * BTRFS_STRIPE_LEN; | ||
900 | else | ||
901 | min_stripe_size = BTRFS_STRIPE_LEN; | ||
902 | |||
903 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | ||
904 | if (!device->in_fs_metadata) | ||
905 | continue; | ||
906 | |||
907 | avail_space = device->total_bytes - device->bytes_used; | ||
908 | |||
909 | /* align with stripe_len */ | ||
910 | do_div(avail_space, BTRFS_STRIPE_LEN); | ||
911 | avail_space *= BTRFS_STRIPE_LEN; | ||
912 | |||
913 | /* | ||
914 | * In order to avoid overwritting the superblock on the drive, | ||
915 | * btrfs starts at an offset of at least 1MB when doing chunk | ||
916 | * allocation. | ||
917 | */ | ||
918 | skip_space = 1024 * 1024; | ||
919 | |||
920 | /* user can set the offset in fs_info->alloc_start. */ | ||
921 | if (fs_info->alloc_start + BTRFS_STRIPE_LEN <= | ||
922 | device->total_bytes) | ||
923 | skip_space = max(fs_info->alloc_start, skip_space); | ||
924 | |||
925 | /* | ||
926 | * btrfs can not use the free space in [0, skip_space - 1], | ||
927 | * we must subtract it from the total. In order to implement | ||
928 | * it, we account the used space in this range first. | ||
929 | */ | ||
930 | ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1, | ||
931 | &used_space); | ||
932 | if (ret) { | ||
933 | kfree(devices_info); | ||
934 | return ret; | ||
935 | } | ||
936 | |||
937 | /* calc the free space in [0, skip_space - 1] */ | ||
938 | skip_space -= used_space; | ||
939 | |||
940 | /* | ||
941 | * we can use the free space in [0, skip_space - 1], subtract | ||
942 | * it from the total. | ||
943 | */ | ||
944 | if (avail_space && avail_space >= skip_space) | ||
945 | avail_space -= skip_space; | ||
946 | else | ||
947 | avail_space = 0; | ||
948 | |||
949 | if (avail_space < min_stripe_size) | ||
950 | continue; | ||
951 | |||
952 | devices_info[i].dev = device; | ||
953 | devices_info[i].max_avail = avail_space; | ||
954 | |||
955 | i++; | ||
956 | } | ||
957 | |||
958 | nr_devices = i; | ||
959 | |||
960 | btrfs_descending_sort_devices(devices_info, nr_devices); | ||
961 | |||
962 | i = nr_devices - 1; | ||
963 | avail_space = 0; | ||
964 | while (nr_devices >= min_stripes) { | ||
965 | if (devices_info[i].max_avail >= min_stripe_size) { | ||
966 | int j; | ||
967 | u64 alloc_size; | ||
968 | |||
969 | avail_space += devices_info[i].max_avail * min_stripes; | ||
970 | alloc_size = devices_info[i].max_avail; | ||
971 | for (j = i + 1 - min_stripes; j <= i; j++) | ||
972 | devices_info[j].max_avail -= alloc_size; | ||
973 | } | ||
974 | i--; | ||
975 | nr_devices--; | ||
976 | } | ||
977 | |||
978 | kfree(devices_info); | ||
979 | *free_bytes = avail_space; | ||
980 | return 0; | ||
981 | } | ||
982 | |||
756 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 983 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
757 | { | 984 | { |
758 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); | 985 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); |
@@ -760,17 +987,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
760 | struct list_head *head = &root->fs_info->space_info; | 987 | struct list_head *head = &root->fs_info->space_info; |
761 | struct btrfs_space_info *found; | 988 | struct btrfs_space_info *found; |
762 | u64 total_used = 0; | 989 | u64 total_used = 0; |
763 | u64 total_used_data = 0; | 990 | u64 total_free_data = 0; |
764 | int bits = dentry->d_sb->s_blocksize_bits; | 991 | int bits = dentry->d_sb->s_blocksize_bits; |
765 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | 992 | __be32 *fsid = (__be32 *)root->fs_info->fsid; |
993 | int ret; | ||
766 | 994 | ||
995 | /* holding chunk_muext to avoid allocating new chunks */ | ||
996 | mutex_lock(&root->fs_info->chunk_mutex); | ||
767 | rcu_read_lock(); | 997 | rcu_read_lock(); |
768 | list_for_each_entry_rcu(found, head, list) { | 998 | list_for_each_entry_rcu(found, head, list) { |
769 | if (found->flags & (BTRFS_BLOCK_GROUP_METADATA | | 999 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) { |
770 | BTRFS_BLOCK_GROUP_SYSTEM)) | 1000 | total_free_data += found->disk_total - found->disk_used; |
771 | total_used_data += found->disk_total; | 1001 | total_free_data -= |
772 | else | 1002 | btrfs_account_ro_block_groups_free_space(found); |
773 | total_used_data += found->disk_used; | 1003 | } |
1004 | |||
774 | total_used += found->disk_used; | 1005 | total_used += found->disk_used; |
775 | } | 1006 | } |
776 | rcu_read_unlock(); | 1007 | rcu_read_unlock(); |
@@ -778,9 +1009,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
778 | buf->f_namelen = BTRFS_NAME_LEN; | 1009 | buf->f_namelen = BTRFS_NAME_LEN; |
779 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; | 1010 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; |
780 | buf->f_bfree = buf->f_blocks - (total_used >> bits); | 1011 | buf->f_bfree = buf->f_blocks - (total_used >> bits); |
781 | buf->f_bavail = buf->f_blocks - (total_used_data >> bits); | ||
782 | buf->f_bsize = dentry->d_sb->s_blocksize; | 1012 | buf->f_bsize = dentry->d_sb->s_blocksize; |
783 | buf->f_type = BTRFS_SUPER_MAGIC; | 1013 | buf->f_type = BTRFS_SUPER_MAGIC; |
1014 | buf->f_bavail = total_free_data; | ||
1015 | ret = btrfs_calc_avail_data_space(root, &total_free_data); | ||
1016 | if (ret) { | ||
1017 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
1018 | return ret; | ||
1019 | } | ||
1020 | buf->f_bavail += total_free_data; | ||
1021 | buf->f_bavail = buf->f_bavail >> bits; | ||
1022 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
784 | 1023 | ||
785 | /* We treat it as constant endianness (it doesn't matter _which_) | 1024 | /* We treat it as constant endianness (it doesn't matter _which_) |
786 | because we want the fsid to come out the same whether mounted | 1025 | because we want the fsid to come out the same whether mounted |
@@ -897,10 +1136,14 @@ static int __init init_btrfs_fs(void) | |||
897 | if (err) | 1136 | if (err) |
898 | return err; | 1137 | return err; |
899 | 1138 | ||
900 | err = btrfs_init_cachep(); | 1139 | err = btrfs_init_compress(); |
901 | if (err) | 1140 | if (err) |
902 | goto free_sysfs; | 1141 | goto free_sysfs; |
903 | 1142 | ||
1143 | err = btrfs_init_cachep(); | ||
1144 | if (err) | ||
1145 | goto free_compress; | ||
1146 | |||
904 | err = extent_io_init(); | 1147 | err = extent_io_init(); |
905 | if (err) | 1148 | if (err) |
906 | goto free_cachep; | 1149 | goto free_cachep; |
@@ -928,6 +1171,8 @@ free_extent_io: | |||
928 | extent_io_exit(); | 1171 | extent_io_exit(); |
929 | free_cachep: | 1172 | free_cachep: |
930 | btrfs_destroy_cachep(); | 1173 | btrfs_destroy_cachep(); |
1174 | free_compress: | ||
1175 | btrfs_exit_compress(); | ||
931 | free_sysfs: | 1176 | free_sysfs: |
932 | btrfs_exit_sysfs(); | 1177 | btrfs_exit_sysfs(); |
933 | return err; | 1178 | return err; |
@@ -942,7 +1187,7 @@ static void __exit exit_btrfs_fs(void) | |||
942 | unregister_filesystem(&btrfs_fs_type); | 1187 | unregister_filesystem(&btrfs_fs_type); |
943 | btrfs_exit_sysfs(); | 1188 | btrfs_exit_sysfs(); |
944 | btrfs_cleanup_fs_uuids(); | 1189 | btrfs_cleanup_fs_uuids(); |
945 | btrfs_zlib_exit(); | 1190 | btrfs_exit_compress(); |
946 | } | 1191 | } |
947 | 1192 | ||
948 | module_init(init_btrfs_fs) | 1193 | module_init(init_btrfs_fs) |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f50e931fc217..bae5c7b8bbe2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -181,6 +181,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
181 | struct btrfs_trans_handle *h; | 181 | struct btrfs_trans_handle *h; |
182 | struct btrfs_transaction *cur_trans; | 182 | struct btrfs_transaction *cur_trans; |
183 | int ret; | 183 | int ret; |
184 | |||
185 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
186 | return ERR_PTR(-EROFS); | ||
184 | again: | 187 | again: |
185 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 188 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); |
186 | if (!h) | 189 | if (!h) |
@@ -910,6 +913,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
910 | u64 to_reserve = 0; | 913 | u64 to_reserve = 0; |
911 | u64 index = 0; | 914 | u64 index = 0; |
912 | u64 objectid; | 915 | u64 objectid; |
916 | u64 root_flags; | ||
913 | 917 | ||
914 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 918 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
915 | if (!new_root_item) { | 919 | if (!new_root_item) { |
@@ -967,6 +971,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
967 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 971 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
968 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 972 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
969 | 973 | ||
974 | root_flags = btrfs_root_flags(new_root_item); | ||
975 | if (pending->readonly) | ||
976 | root_flags |= BTRFS_ROOT_SUBVOL_RDONLY; | ||
977 | else | ||
978 | root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; | ||
979 | btrfs_set_root_flags(new_root_item, root_flags); | ||
980 | |||
970 | old = btrfs_lock_root_node(root); | 981 | old = btrfs_lock_root_node(root); |
971 | btrfs_cow_block(trans, root, old, NULL, 0, &old); | 982 | btrfs_cow_block(trans, root, old, NULL, 0, &old); |
972 | btrfs_set_lock_blocking(old); | 983 | btrfs_set_lock_blocking(old); |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f104b57ad4ef..229a594cacd5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -62,6 +62,7 @@ struct btrfs_pending_snapshot { | |||
62 | struct btrfs_block_rsv block_rsv; | 62 | struct btrfs_block_rsv block_rsv; |
63 | /* extra metadata reseration for relocation */ | 63 | /* extra metadata reseration for relocation */ |
64 | int error; | 64 | int error; |
65 | bool readonly; | ||
65 | struct list_head list; | 66 | struct list_head list; |
66 | }; | 67 | }; |
67 | 68 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1718e1a5c320..d158530233b7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
23 | #include <linux/random.h> | 23 | #include <linux/random.h> |
24 | #include <linux/iocontext.h> | 24 | #include <linux/iocontext.h> |
25 | #include <linux/capability.h> | ||
25 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
26 | #include "compat.h" | 27 | #include "compat.h" |
27 | #include "ctree.h" | 28 | #include "ctree.h" |
@@ -600,8 +601,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
600 | set_blocksize(bdev, 4096); | 601 | set_blocksize(bdev, 4096); |
601 | 602 | ||
602 | bh = btrfs_read_dev_super(bdev); | 603 | bh = btrfs_read_dev_super(bdev); |
603 | if (!bh) | 604 | if (!bh) { |
605 | ret = -EINVAL; | ||
604 | goto error_close; | 606 | goto error_close; |
607 | } | ||
605 | 608 | ||
606 | disk_super = (struct btrfs_super_block *)bh->b_data; | 609 | disk_super = (struct btrfs_super_block *)bh->b_data; |
607 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 610 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
@@ -703,7 +706,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
703 | goto error_close; | 706 | goto error_close; |
704 | bh = btrfs_read_dev_super(bdev); | 707 | bh = btrfs_read_dev_super(bdev); |
705 | if (!bh) { | 708 | if (!bh) { |
706 | ret = -EIO; | 709 | ret = -EINVAL; |
707 | goto error_close; | 710 | goto error_close; |
708 | } | 711 | } |
709 | disk_super = (struct btrfs_super_block *)bh->b_data; | 712 | disk_super = (struct btrfs_super_block *)bh->b_data; |
@@ -729,59 +732,167 @@ error: | |||
729 | return ret; | 732 | return ret; |
730 | } | 733 | } |
731 | 734 | ||
735 | /* helper to account the used device space in the range */ | ||
736 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
737 | u64 end, u64 *length) | ||
738 | { | ||
739 | struct btrfs_key key; | ||
740 | struct btrfs_root *root = device->dev_root; | ||
741 | struct btrfs_dev_extent *dev_extent; | ||
742 | struct btrfs_path *path; | ||
743 | u64 extent_end; | ||
744 | int ret; | ||
745 | int slot; | ||
746 | struct extent_buffer *l; | ||
747 | |||
748 | *length = 0; | ||
749 | |||
750 | if (start >= device->total_bytes) | ||
751 | return 0; | ||
752 | |||
753 | path = btrfs_alloc_path(); | ||
754 | if (!path) | ||
755 | return -ENOMEM; | ||
756 | path->reada = 2; | ||
757 | |||
758 | key.objectid = device->devid; | ||
759 | key.offset = start; | ||
760 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
761 | |||
762 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
763 | if (ret < 0) | ||
764 | goto out; | ||
765 | if (ret > 0) { | ||
766 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | ||
767 | if (ret < 0) | ||
768 | goto out; | ||
769 | } | ||
770 | |||
771 | while (1) { | ||
772 | l = path->nodes[0]; | ||
773 | slot = path->slots[0]; | ||
774 | if (slot >= btrfs_header_nritems(l)) { | ||
775 | ret = btrfs_next_leaf(root, path); | ||
776 | if (ret == 0) | ||
777 | continue; | ||
778 | if (ret < 0) | ||
779 | goto out; | ||
780 | |||
781 | break; | ||
782 | } | ||
783 | btrfs_item_key_to_cpu(l, &key, slot); | ||
784 | |||
785 | if (key.objectid < device->devid) | ||
786 | goto next; | ||
787 | |||
788 | if (key.objectid > device->devid) | ||
789 | break; | ||
790 | |||
791 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
792 | goto next; | ||
793 | |||
794 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | ||
795 | extent_end = key.offset + btrfs_dev_extent_length(l, | ||
796 | dev_extent); | ||
797 | if (key.offset <= start && extent_end > end) { | ||
798 | *length = end - start + 1; | ||
799 | break; | ||
800 | } else if (key.offset <= start && extent_end > start) | ||
801 | *length += extent_end - start; | ||
802 | else if (key.offset > start && extent_end <= end) | ||
803 | *length += extent_end - key.offset; | ||
804 | else if (key.offset > start && key.offset <= end) { | ||
805 | *length += end - key.offset + 1; | ||
806 | break; | ||
807 | } else if (key.offset > end) | ||
808 | break; | ||
809 | |||
810 | next: | ||
811 | path->slots[0]++; | ||
812 | } | ||
813 | ret = 0; | ||
814 | out: | ||
815 | btrfs_free_path(path); | ||
816 | return ret; | ||
817 | } | ||
818 | |||
732 | /* | 819 | /* |
820 | * find_free_dev_extent - find free space in the specified device | ||
821 | * @trans: transaction handler | ||
822 | * @device: the device which we search the free space in | ||
823 | * @num_bytes: the size of the free space that we need | ||
824 | * @start: store the start of the free space. | ||
825 | * @len: the size of the free space. that we find, or the size of the max | ||
826 | * free space if we don't find suitable free space | ||
827 | * | ||
733 | * this uses a pretty simple search, the expectation is that it is | 828 | * this uses a pretty simple search, the expectation is that it is |
734 | * called very infrequently and that a given device has a small number | 829 | * called very infrequently and that a given device has a small number |
735 | * of extents | 830 | * of extents |
831 | * | ||
832 | * @start is used to store the start of the free space if we find. But if we | ||
833 | * don't find suitable free space, it will be used to store the start position | ||
834 | * of the max free space. | ||
835 | * | ||
836 | * @len is used to store the size of the free space that we find. | ||
837 | * But if we don't find suitable free space, it is used to store the size of | ||
838 | * the max free space. | ||
736 | */ | 839 | */ |
737 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | 840 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
738 | struct btrfs_device *device, u64 num_bytes, | 841 | struct btrfs_device *device, u64 num_bytes, |
739 | u64 *start, u64 *max_avail) | 842 | u64 *start, u64 *len) |
740 | { | 843 | { |
741 | struct btrfs_key key; | 844 | struct btrfs_key key; |
742 | struct btrfs_root *root = device->dev_root; | 845 | struct btrfs_root *root = device->dev_root; |
743 | struct btrfs_dev_extent *dev_extent = NULL; | 846 | struct btrfs_dev_extent *dev_extent; |
744 | struct btrfs_path *path; | 847 | struct btrfs_path *path; |
745 | u64 hole_size = 0; | 848 | u64 hole_size; |
746 | u64 last_byte = 0; | 849 | u64 max_hole_start; |
747 | u64 search_start = 0; | 850 | u64 max_hole_size; |
851 | u64 extent_end; | ||
852 | u64 search_start; | ||
748 | u64 search_end = device->total_bytes; | 853 | u64 search_end = device->total_bytes; |
749 | int ret; | 854 | int ret; |
750 | int slot = 0; | 855 | int slot; |
751 | int start_found; | ||
752 | struct extent_buffer *l; | 856 | struct extent_buffer *l; |
753 | 857 | ||
754 | path = btrfs_alloc_path(); | ||
755 | if (!path) | ||
756 | return -ENOMEM; | ||
757 | path->reada = 2; | ||
758 | start_found = 0; | ||
759 | |||
760 | /* FIXME use last free of some kind */ | 858 | /* FIXME use last free of some kind */ |
761 | 859 | ||
762 | /* we don't want to overwrite the superblock on the drive, | 860 | /* we don't want to overwrite the superblock on the drive, |
763 | * so we make sure to start at an offset of at least 1MB | 861 | * so we make sure to start at an offset of at least 1MB |
764 | */ | 862 | */ |
765 | search_start = max((u64)1024 * 1024, search_start); | 863 | search_start = 1024 * 1024; |
766 | 864 | ||
767 | if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) | 865 | if (root->fs_info->alloc_start + num_bytes <= search_end) |
768 | search_start = max(root->fs_info->alloc_start, search_start); | 866 | search_start = max(root->fs_info->alloc_start, search_start); |
769 | 867 | ||
868 | max_hole_start = search_start; | ||
869 | max_hole_size = 0; | ||
870 | |||
871 | if (search_start >= search_end) { | ||
872 | ret = -ENOSPC; | ||
873 | goto error; | ||
874 | } | ||
875 | |||
876 | path = btrfs_alloc_path(); | ||
877 | if (!path) { | ||
878 | ret = -ENOMEM; | ||
879 | goto error; | ||
880 | } | ||
881 | path->reada = 2; | ||
882 | |||
770 | key.objectid = device->devid; | 883 | key.objectid = device->devid; |
771 | key.offset = search_start; | 884 | key.offset = search_start; |
772 | key.type = BTRFS_DEV_EXTENT_KEY; | 885 | key.type = BTRFS_DEV_EXTENT_KEY; |
886 | |||
773 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 887 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); |
774 | if (ret < 0) | 888 | if (ret < 0) |
775 | goto error; | 889 | goto out; |
776 | if (ret > 0) { | 890 | if (ret > 0) { |
777 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | 891 | ret = btrfs_previous_item(root, path, key.objectid, key.type); |
778 | if (ret < 0) | 892 | if (ret < 0) |
779 | goto error; | 893 | goto out; |
780 | if (ret > 0) | ||
781 | start_found = 1; | ||
782 | } | 894 | } |
783 | l = path->nodes[0]; | 895 | |
784 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | ||
785 | while (1) { | 896 | while (1) { |
786 | l = path->nodes[0]; | 897 | l = path->nodes[0]; |
787 | slot = path->slots[0]; | 898 | slot = path->slots[0]; |
@@ -790,24 +901,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
790 | if (ret == 0) | 901 | if (ret == 0) |
791 | continue; | 902 | continue; |
792 | if (ret < 0) | 903 | if (ret < 0) |
793 | goto error; | 904 | goto out; |
794 | no_more_items: | 905 | |
795 | if (!start_found) { | 906 | break; |
796 | if (search_start >= search_end) { | ||
797 | ret = -ENOSPC; | ||
798 | goto error; | ||
799 | } | ||
800 | *start = search_start; | ||
801 | start_found = 1; | ||
802 | goto check_pending; | ||
803 | } | ||
804 | *start = last_byte > search_start ? | ||
805 | last_byte : search_start; | ||
806 | if (search_end <= *start) { | ||
807 | ret = -ENOSPC; | ||
808 | goto error; | ||
809 | } | ||
810 | goto check_pending; | ||
811 | } | 907 | } |
812 | btrfs_item_key_to_cpu(l, &key, slot); | 908 | btrfs_item_key_to_cpu(l, &key, slot); |
813 | 909 | ||
@@ -815,48 +911,62 @@ no_more_items: | |||
815 | goto next; | 911 | goto next; |
816 | 912 | ||
817 | if (key.objectid > device->devid) | 913 | if (key.objectid > device->devid) |
818 | goto no_more_items; | 914 | break; |
819 | 915 | ||
820 | if (key.offset >= search_start && key.offset > last_byte && | 916 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) |
821 | start_found) { | 917 | goto next; |
822 | if (last_byte < search_start) | ||
823 | last_byte = search_start; | ||
824 | hole_size = key.offset - last_byte; | ||
825 | 918 | ||
826 | if (hole_size > *max_avail) | 919 | if (key.offset > search_start) { |
827 | *max_avail = hole_size; | 920 | hole_size = key.offset - search_start; |
828 | 921 | ||
829 | if (key.offset > last_byte && | 922 | if (hole_size > max_hole_size) { |
830 | hole_size >= num_bytes) { | 923 | max_hole_start = search_start; |
831 | *start = last_byte; | 924 | max_hole_size = hole_size; |
832 | goto check_pending; | 925 | } |
926 | |||
927 | /* | ||
928 | * If this free space is greater than which we need, | ||
929 | * it must be the max free space that we have found | ||
930 | * until now, so max_hole_start must point to the start | ||
931 | * of this free space and the length of this free space | ||
932 | * is stored in max_hole_size. Thus, we return | ||
933 | * max_hole_start and max_hole_size and go back to the | ||
934 | * caller. | ||
935 | */ | ||
936 | if (hole_size >= num_bytes) { | ||
937 | ret = 0; | ||
938 | goto out; | ||
833 | } | 939 | } |
834 | } | 940 | } |
835 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
836 | goto next; | ||
837 | 941 | ||
838 | start_found = 1; | ||
839 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 942 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
840 | last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); | 943 | extent_end = key.offset + btrfs_dev_extent_length(l, |
944 | dev_extent); | ||
945 | if (extent_end > search_start) | ||
946 | search_start = extent_end; | ||
841 | next: | 947 | next: |
842 | path->slots[0]++; | 948 | path->slots[0]++; |
843 | cond_resched(); | 949 | cond_resched(); |
844 | } | 950 | } |
845 | check_pending: | ||
846 | /* we have to make sure we didn't find an extent that has already | ||
847 | * been allocated by the map tree or the original allocation | ||
848 | */ | ||
849 | BUG_ON(*start < search_start); | ||
850 | 951 | ||
851 | if (*start + num_bytes > search_end) { | 952 | hole_size = search_end- search_start; |
852 | ret = -ENOSPC; | 953 | if (hole_size > max_hole_size) { |
853 | goto error; | 954 | max_hole_start = search_start; |
955 | max_hole_size = hole_size; | ||
854 | } | 956 | } |
855 | /* check for pending inserts here */ | ||
856 | ret = 0; | ||
857 | 957 | ||
858 | error: | 958 | /* See above. */ |
959 | if (hole_size < num_bytes) | ||
960 | ret = -ENOSPC; | ||
961 | else | ||
962 | ret = 0; | ||
963 | |||
964 | out: | ||
859 | btrfs_free_path(path); | 965 | btrfs_free_path(path); |
966 | error: | ||
967 | *start = max_hole_start; | ||
968 | if (len) | ||
969 | *len = max_hole_size; | ||
860 | return ret; | 970 | return ret; |
861 | } | 971 | } |
862 | 972 | ||
@@ -1196,7 +1306,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1196 | set_blocksize(bdev, 4096); | 1306 | set_blocksize(bdev, 4096); |
1197 | bh = btrfs_read_dev_super(bdev); | 1307 | bh = btrfs_read_dev_super(bdev); |
1198 | if (!bh) { | 1308 | if (!bh) { |
1199 | ret = -EIO; | 1309 | ret = -EINVAL; |
1200 | goto error_close; | 1310 | goto error_close; |
1201 | } | 1311 | } |
1202 | disk_super = (struct btrfs_super_block *)bh->b_data; | 1312 | disk_super = (struct btrfs_super_block *)bh->b_data; |
@@ -1916,6 +2026,9 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1916 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) | 2026 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) |
1917 | return -EROFS; | 2027 | return -EROFS; |
1918 | 2028 | ||
2029 | if (!capable(CAP_SYS_ADMIN)) | ||
2030 | return -EPERM; | ||
2031 | |||
1919 | mutex_lock(&dev_root->fs_info->volume_mutex); | 2032 | mutex_lock(&dev_root->fs_info->volume_mutex); |
1920 | dev_root = dev_root->fs_info->dev_root; | 2033 | dev_root = dev_root->fs_info->dev_root; |
1921 | 2034 | ||
@@ -2154,66 +2267,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, | |||
2154 | return calc_size * num_stripes; | 2267 | return calc_size * num_stripes; |
2155 | } | 2268 | } |
2156 | 2269 | ||
2157 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 2270 | /* Used to sort the devices by max_avail(descending sort) */ |
2158 | struct btrfs_root *extent_root, | 2271 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) |
2159 | struct map_lookup **map_ret, | ||
2160 | u64 *num_bytes, u64 *stripe_size, | ||
2161 | u64 start, u64 type) | ||
2162 | { | 2272 | { |
2163 | struct btrfs_fs_info *info = extent_root->fs_info; | 2273 | if (((struct btrfs_device_info *)dev_info1)->max_avail > |
2164 | struct btrfs_device *device = NULL; | 2274 | ((struct btrfs_device_info *)dev_info2)->max_avail) |
2165 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | 2275 | return -1; |
2166 | struct list_head *cur; | 2276 | else if (((struct btrfs_device_info *)dev_info1)->max_avail < |
2167 | struct map_lookup *map = NULL; | 2277 | ((struct btrfs_device_info *)dev_info2)->max_avail) |
2168 | struct extent_map_tree *em_tree; | 2278 | return 1; |
2169 | struct extent_map *em; | 2279 | else |
2170 | struct list_head private_devs; | 2280 | return 0; |
2171 | int min_stripe_size = 1 * 1024 * 1024; | 2281 | } |
2172 | u64 calc_size = 1024 * 1024 * 1024; | ||
2173 | u64 max_chunk_size = calc_size; | ||
2174 | u64 min_free; | ||
2175 | u64 avail; | ||
2176 | u64 max_avail = 0; | ||
2177 | u64 dev_offset; | ||
2178 | int num_stripes = 1; | ||
2179 | int min_stripes = 1; | ||
2180 | int sub_stripes = 0; | ||
2181 | int looped = 0; | ||
2182 | int ret; | ||
2183 | int index; | ||
2184 | int stripe_len = 64 * 1024; | ||
2185 | 2282 | ||
2186 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | 2283 | static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, |
2187 | (type & BTRFS_BLOCK_GROUP_DUP)) { | 2284 | int *num_stripes, int *min_stripes, |
2188 | WARN_ON(1); | 2285 | int *sub_stripes) |
2189 | type &= ~BTRFS_BLOCK_GROUP_DUP; | 2286 | { |
2190 | } | 2287 | *num_stripes = 1; |
2191 | if (list_empty(&fs_devices->alloc_list)) | 2288 | *min_stripes = 1; |
2192 | return -ENOSPC; | 2289 | *sub_stripes = 0; |
2193 | 2290 | ||
2194 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { | 2291 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { |
2195 | num_stripes = fs_devices->rw_devices; | 2292 | *num_stripes = fs_devices->rw_devices; |
2196 | min_stripes = 2; | 2293 | *min_stripes = 2; |
2197 | } | 2294 | } |
2198 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { | 2295 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { |
2199 | num_stripes = 2; | 2296 | *num_stripes = 2; |
2200 | min_stripes = 2; | 2297 | *min_stripes = 2; |
2201 | } | 2298 | } |
2202 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | 2299 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { |
2203 | if (fs_devices->rw_devices < 2) | 2300 | if (fs_devices->rw_devices < 2) |
2204 | return -ENOSPC; | 2301 | return -ENOSPC; |
2205 | num_stripes = 2; | 2302 | *num_stripes = 2; |
2206 | min_stripes = 2; | 2303 | *min_stripes = 2; |
2207 | } | 2304 | } |
2208 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | 2305 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { |
2209 | num_stripes = fs_devices->rw_devices; | 2306 | *num_stripes = fs_devices->rw_devices; |
2210 | if (num_stripes < 4) | 2307 | if (*num_stripes < 4) |
2211 | return -ENOSPC; | 2308 | return -ENOSPC; |
2212 | num_stripes &= ~(u32)1; | 2309 | *num_stripes &= ~(u32)1; |
2213 | sub_stripes = 2; | 2310 | *sub_stripes = 2; |
2214 | min_stripes = 4; | 2311 | *min_stripes = 4; |
2215 | } | 2312 | } |
2216 | 2313 | ||
2314 | return 0; | ||
2315 | } | ||
2316 | |||
2317 | static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, | ||
2318 | u64 proposed_size, u64 type, | ||
2319 | int num_stripes, int small_stripe) | ||
2320 | { | ||
2321 | int min_stripe_size = 1 * 1024 * 1024; | ||
2322 | u64 calc_size = proposed_size; | ||
2323 | u64 max_chunk_size = calc_size; | ||
2324 | int ncopies = 1; | ||
2325 | |||
2326 | if (type & (BTRFS_BLOCK_GROUP_RAID1 | | ||
2327 | BTRFS_BLOCK_GROUP_DUP | | ||
2328 | BTRFS_BLOCK_GROUP_RAID10)) | ||
2329 | ncopies = 2; | ||
2330 | |||
2217 | if (type & BTRFS_BLOCK_GROUP_DATA) { | 2331 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
2218 | max_chunk_size = 10 * calc_size; | 2332 | max_chunk_size = 10 * calc_size; |
2219 | min_stripe_size = 64 * 1024 * 1024; | 2333 | min_stripe_size = 64 * 1024 * 1024; |
@@ -2230,51 +2344,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2230 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), | 2344 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), |
2231 | max_chunk_size); | 2345 | max_chunk_size); |
2232 | 2346 | ||
2233 | again: | 2347 | if (calc_size * num_stripes > max_chunk_size * ncopies) { |
2234 | max_avail = 0; | 2348 | calc_size = max_chunk_size * ncopies; |
2235 | if (!map || map->num_stripes != num_stripes) { | ||
2236 | kfree(map); | ||
2237 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
2238 | if (!map) | ||
2239 | return -ENOMEM; | ||
2240 | map->num_stripes = num_stripes; | ||
2241 | } | ||
2242 | |||
2243 | if (calc_size * num_stripes > max_chunk_size) { | ||
2244 | calc_size = max_chunk_size; | ||
2245 | do_div(calc_size, num_stripes); | 2349 | do_div(calc_size, num_stripes); |
2246 | do_div(calc_size, stripe_len); | 2350 | do_div(calc_size, BTRFS_STRIPE_LEN); |
2247 | calc_size *= stripe_len; | 2351 | calc_size *= BTRFS_STRIPE_LEN; |
2248 | } | 2352 | } |
2249 | 2353 | ||
2250 | /* we don't want tiny stripes */ | 2354 | /* we don't want tiny stripes */ |
2251 | if (!looped) | 2355 | if (!small_stripe) |
2252 | calc_size = max_t(u64, min_stripe_size, calc_size); | 2356 | calc_size = max_t(u64, min_stripe_size, calc_size); |
2253 | 2357 | ||
2254 | /* | 2358 | /* |
2255 | * we're about to do_div by the stripe_len so lets make sure | 2359 | * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure |
2256 | * we end up with something bigger than a stripe | 2360 | * we end up with something bigger than a stripe |
2257 | */ | 2361 | */ |
2258 | calc_size = max_t(u64, calc_size, stripe_len * 4); | 2362 | calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); |
2363 | |||
2364 | do_div(calc_size, BTRFS_STRIPE_LEN); | ||
2365 | calc_size *= BTRFS_STRIPE_LEN; | ||
2366 | |||
2367 | return calc_size; | ||
2368 | } | ||
2369 | |||
2370 | static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map, | ||
2371 | int num_stripes) | ||
2372 | { | ||
2373 | struct map_lookup *new; | ||
2374 | size_t len = map_lookup_size(num_stripes); | ||
2375 | |||
2376 | BUG_ON(map->num_stripes < num_stripes); | ||
2377 | |||
2378 | if (map->num_stripes == num_stripes) | ||
2379 | return map; | ||
2380 | |||
2381 | new = kmalloc(len, GFP_NOFS); | ||
2382 | if (!new) { | ||
2383 | /* just change map->num_stripes */ | ||
2384 | map->num_stripes = num_stripes; | ||
2385 | return map; | ||
2386 | } | ||
2387 | |||
2388 | memcpy(new, map, len); | ||
2389 | new->num_stripes = num_stripes; | ||
2390 | kfree(map); | ||
2391 | return new; | ||
2392 | } | ||
2393 | |||
2394 | /* | ||
2395 | * helper to allocate device space from btrfs_device_info, in which we stored | ||
2396 | * max free space information of every device. It is used when we can not | ||
2397 | * allocate chunks by default size. | ||
2398 | * | ||
2399 | * By this helper, we can allocate a new chunk as larger as possible. | ||
2400 | */ | ||
2401 | static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans, | ||
2402 | struct btrfs_fs_devices *fs_devices, | ||
2403 | struct btrfs_device_info *devices, | ||
2404 | int nr_device, u64 type, | ||
2405 | struct map_lookup **map_lookup, | ||
2406 | int min_stripes, u64 *stripe_size) | ||
2407 | { | ||
2408 | int i, index, sort_again = 0; | ||
2409 | int min_devices = min_stripes; | ||
2410 | u64 max_avail, min_free; | ||
2411 | struct map_lookup *map = *map_lookup; | ||
2412 | int ret; | ||
2413 | |||
2414 | if (nr_device < min_stripes) | ||
2415 | return -ENOSPC; | ||
2416 | |||
2417 | btrfs_descending_sort_devices(devices, nr_device); | ||
2418 | |||
2419 | max_avail = devices[0].max_avail; | ||
2420 | if (!max_avail) | ||
2421 | return -ENOSPC; | ||
2422 | |||
2423 | for (i = 0; i < nr_device; i++) { | ||
2424 | /* | ||
2425 | * if dev_offset = 0, it means the free space of this device | ||
2426 | * is less than what we need, and we didn't search max avail | ||
2427 | * extent on this device, so do it now. | ||
2428 | */ | ||
2429 | if (!devices[i].dev_offset) { | ||
2430 | ret = find_free_dev_extent(trans, devices[i].dev, | ||
2431 | max_avail, | ||
2432 | &devices[i].dev_offset, | ||
2433 | &devices[i].max_avail); | ||
2434 | if (ret != 0 && ret != -ENOSPC) | ||
2435 | return ret; | ||
2436 | sort_again = 1; | ||
2437 | } | ||
2438 | } | ||
2439 | |||
2440 | /* we update the max avail free extent of each devices, sort again */ | ||
2441 | if (sort_again) | ||
2442 | btrfs_descending_sort_devices(devices, nr_device); | ||
2443 | |||
2444 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2445 | min_devices = 1; | ||
2446 | |||
2447 | if (!devices[min_devices - 1].max_avail) | ||
2448 | return -ENOSPC; | ||
2449 | |||
2450 | max_avail = devices[min_devices - 1].max_avail; | ||
2451 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2452 | do_div(max_avail, 2); | ||
2453 | |||
2454 | max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, | ||
2455 | min_stripes, 1); | ||
2456 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2457 | min_free = max_avail * 2; | ||
2458 | else | ||
2459 | min_free = max_avail; | ||
2460 | |||
2461 | if (min_free > devices[min_devices - 1].max_avail) | ||
2462 | return -ENOSPC; | ||
2463 | |||
2464 | map = __shrink_map_lookup_stripes(map, min_stripes); | ||
2465 | *stripe_size = max_avail; | ||
2466 | |||
2467 | index = 0; | ||
2468 | for (i = 0; i < min_stripes; i++) { | ||
2469 | map->stripes[i].dev = devices[index].dev; | ||
2470 | map->stripes[i].physical = devices[index].dev_offset; | ||
2471 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
2472 | i++; | ||
2473 | map->stripes[i].dev = devices[index].dev; | ||
2474 | map->stripes[i].physical = devices[index].dev_offset + | ||
2475 | max_avail; | ||
2476 | } | ||
2477 | index++; | ||
2478 | } | ||
2479 | *map_lookup = map; | ||
2480 | |||
2481 | return 0; | ||
2482 | } | ||
2259 | 2483 | ||
2260 | do_div(calc_size, stripe_len); | 2484 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
2261 | calc_size *= stripe_len; | 2485 | struct btrfs_root *extent_root, |
2486 | struct map_lookup **map_ret, | ||
2487 | u64 *num_bytes, u64 *stripe_size, | ||
2488 | u64 start, u64 type) | ||
2489 | { | ||
2490 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
2491 | struct btrfs_device *device = NULL; | ||
2492 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | ||
2493 | struct list_head *cur; | ||
2494 | struct map_lookup *map; | ||
2495 | struct extent_map_tree *em_tree; | ||
2496 | struct extent_map *em; | ||
2497 | struct btrfs_device_info *devices_info; | ||
2498 | struct list_head private_devs; | ||
2499 | u64 calc_size = 1024 * 1024 * 1024; | ||
2500 | u64 min_free; | ||
2501 | u64 avail; | ||
2502 | u64 dev_offset; | ||
2503 | int num_stripes; | ||
2504 | int min_stripes; | ||
2505 | int sub_stripes; | ||
2506 | int min_devices; /* the min number of devices we need */ | ||
2507 | int i; | ||
2508 | int ret; | ||
2509 | int index; | ||
2510 | |||
2511 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | ||
2512 | (type & BTRFS_BLOCK_GROUP_DUP)) { | ||
2513 | WARN_ON(1); | ||
2514 | type &= ~BTRFS_BLOCK_GROUP_DUP; | ||
2515 | } | ||
2516 | if (list_empty(&fs_devices->alloc_list)) | ||
2517 | return -ENOSPC; | ||
2518 | |||
2519 | ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes, | ||
2520 | &min_stripes, &sub_stripes); | ||
2521 | if (ret) | ||
2522 | return ret; | ||
2523 | |||
2524 | devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, | ||
2525 | GFP_NOFS); | ||
2526 | if (!devices_info) | ||
2527 | return -ENOMEM; | ||
2528 | |||
2529 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
2530 | if (!map) { | ||
2531 | ret = -ENOMEM; | ||
2532 | goto error; | ||
2533 | } | ||
2534 | map->num_stripes = num_stripes; | ||
2262 | 2535 | ||
2263 | cur = fs_devices->alloc_list.next; | 2536 | cur = fs_devices->alloc_list.next; |
2264 | index = 0; | 2537 | index = 0; |
2538 | i = 0; | ||
2265 | 2539 | ||
2266 | if (type & BTRFS_BLOCK_GROUP_DUP) | 2540 | calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, |
2541 | num_stripes, 0); | ||
2542 | |||
2543 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
2267 | min_free = calc_size * 2; | 2544 | min_free = calc_size * 2; |
2268 | else | 2545 | min_devices = 1; |
2546 | } else { | ||
2269 | min_free = calc_size; | 2547 | min_free = calc_size; |
2270 | 2548 | min_devices = min_stripes; | |
2271 | /* | 2549 | } |
2272 | * we add 1MB because we never use the first 1MB of the device, unless | ||
2273 | * we've looped, then we are likely allocating the maximum amount of | ||
2274 | * space left already | ||
2275 | */ | ||
2276 | if (!looped) | ||
2277 | min_free += 1024 * 1024; | ||
2278 | 2550 | ||
2279 | INIT_LIST_HEAD(&private_devs); | 2551 | INIT_LIST_HEAD(&private_devs); |
2280 | while (index < num_stripes) { | 2552 | while (index < num_stripes) { |
@@ -2287,27 +2559,39 @@ again: | |||
2287 | cur = cur->next; | 2559 | cur = cur->next; |
2288 | 2560 | ||
2289 | if (device->in_fs_metadata && avail >= min_free) { | 2561 | if (device->in_fs_metadata && avail >= min_free) { |
2290 | ret = find_free_dev_extent(trans, device, | 2562 | ret = find_free_dev_extent(trans, device, min_free, |
2291 | min_free, &dev_offset, | 2563 | &devices_info[i].dev_offset, |
2292 | &max_avail); | 2564 | &devices_info[i].max_avail); |
2293 | if (ret == 0) { | 2565 | if (ret == 0) { |
2294 | list_move_tail(&device->dev_alloc_list, | 2566 | list_move_tail(&device->dev_alloc_list, |
2295 | &private_devs); | 2567 | &private_devs); |
2296 | map->stripes[index].dev = device; | 2568 | map->stripes[index].dev = device; |
2297 | map->stripes[index].physical = dev_offset; | 2569 | map->stripes[index].physical = |
2570 | devices_info[i].dev_offset; | ||
2298 | index++; | 2571 | index++; |
2299 | if (type & BTRFS_BLOCK_GROUP_DUP) { | 2572 | if (type & BTRFS_BLOCK_GROUP_DUP) { |
2300 | map->stripes[index].dev = device; | 2573 | map->stripes[index].dev = device; |
2301 | map->stripes[index].physical = | 2574 | map->stripes[index].physical = |
2302 | dev_offset + calc_size; | 2575 | devices_info[i].dev_offset + |
2576 | calc_size; | ||
2303 | index++; | 2577 | index++; |
2304 | } | 2578 | } |
2305 | } | 2579 | } else if (ret != -ENOSPC) |
2306 | } else if (device->in_fs_metadata && avail > max_avail) | 2580 | goto error; |
2307 | max_avail = avail; | 2581 | |
2582 | devices_info[i].dev = device; | ||
2583 | i++; | ||
2584 | } else if (device->in_fs_metadata && | ||
2585 | avail >= BTRFS_STRIPE_LEN) { | ||
2586 | devices_info[i].dev = device; | ||
2587 | devices_info[i].max_avail = avail; | ||
2588 | i++; | ||
2589 | } | ||
2590 | |||
2308 | if (cur == &fs_devices->alloc_list) | 2591 | if (cur == &fs_devices->alloc_list) |
2309 | break; | 2592 | break; |
2310 | } | 2593 | } |
2594 | |||
2311 | list_splice(&private_devs, &fs_devices->alloc_list); | 2595 | list_splice(&private_devs, &fs_devices->alloc_list); |
2312 | if (index < num_stripes) { | 2596 | if (index < num_stripes) { |
2313 | if (index >= min_stripes) { | 2597 | if (index >= min_stripes) { |
@@ -2316,34 +2600,36 @@ again: | |||
2316 | num_stripes /= sub_stripes; | 2600 | num_stripes /= sub_stripes; |
2317 | num_stripes *= sub_stripes; | 2601 | num_stripes *= sub_stripes; |
2318 | } | 2602 | } |
2319 | looped = 1; | 2603 | |
2320 | goto again; | 2604 | map = __shrink_map_lookup_stripes(map, num_stripes); |
2321 | } | 2605 | } else if (i >= min_devices) { |
2322 | if (!looped && max_avail > 0) { | 2606 | ret = __btrfs_alloc_tiny_space(trans, fs_devices, |
2323 | looped = 1; | 2607 | devices_info, i, type, |
2324 | calc_size = max_avail; | 2608 | &map, min_stripes, |
2325 | goto again; | 2609 | &calc_size); |
2610 | if (ret) | ||
2611 | goto error; | ||
2612 | } else { | ||
2613 | ret = -ENOSPC; | ||
2614 | goto error; | ||
2326 | } | 2615 | } |
2327 | kfree(map); | ||
2328 | return -ENOSPC; | ||
2329 | } | 2616 | } |
2330 | map->sector_size = extent_root->sectorsize; | 2617 | map->sector_size = extent_root->sectorsize; |
2331 | map->stripe_len = stripe_len; | 2618 | map->stripe_len = BTRFS_STRIPE_LEN; |
2332 | map->io_align = stripe_len; | 2619 | map->io_align = BTRFS_STRIPE_LEN; |
2333 | map->io_width = stripe_len; | 2620 | map->io_width = BTRFS_STRIPE_LEN; |
2334 | map->type = type; | 2621 | map->type = type; |
2335 | map->num_stripes = num_stripes; | ||
2336 | map->sub_stripes = sub_stripes; | 2622 | map->sub_stripes = sub_stripes; |
2337 | 2623 | ||
2338 | *map_ret = map; | 2624 | *map_ret = map; |
2339 | *stripe_size = calc_size; | 2625 | *stripe_size = calc_size; |
2340 | *num_bytes = chunk_bytes_by_type(type, calc_size, | 2626 | *num_bytes = chunk_bytes_by_type(type, calc_size, |
2341 | num_stripes, sub_stripes); | 2627 | map->num_stripes, sub_stripes); |
2342 | 2628 | ||
2343 | em = alloc_extent_map(GFP_NOFS); | 2629 | em = alloc_extent_map(GFP_NOFS); |
2344 | if (!em) { | 2630 | if (!em) { |
2345 | kfree(map); | 2631 | ret = -ENOMEM; |
2346 | return -ENOMEM; | 2632 | goto error; |
2347 | } | 2633 | } |
2348 | em->bdev = (struct block_device *)map; | 2634 | em->bdev = (struct block_device *)map; |
2349 | em->start = start; | 2635 | em->start = start; |
@@ -2376,7 +2662,13 @@ again: | |||
2376 | index++; | 2662 | index++; |
2377 | } | 2663 | } |
2378 | 2664 | ||
2665 | kfree(devices_info); | ||
2379 | return 0; | 2666 | return 0; |
2667 | |||
2668 | error: | ||
2669 | kfree(map); | ||
2670 | kfree(devices_info); | ||
2671 | return ret; | ||
2380 | } | 2672 | } |
2381 | 2673 | ||
2382 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | 2674 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 1be781079450..7fb59d45fe8c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -20,8 +20,11 @@ | |||
20 | #define __BTRFS_VOLUMES_ | 20 | #define __BTRFS_VOLUMES_ |
21 | 21 | ||
22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
23 | #include <linux/sort.h> | ||
23 | #include "async-thread.h" | 24 | #include "async-thread.h" |
24 | 25 | ||
26 | #define BTRFS_STRIPE_LEN (64 * 1024) | ||
27 | |||
25 | struct buffer_head; | 28 | struct buffer_head; |
26 | struct btrfs_pending_bios { | 29 | struct btrfs_pending_bios { |
27 | struct bio *head; | 30 | struct bio *head; |
@@ -136,6 +139,30 @@ struct btrfs_multi_bio { | |||
136 | struct btrfs_bio_stripe stripes[]; | 139 | struct btrfs_bio_stripe stripes[]; |
137 | }; | 140 | }; |
138 | 141 | ||
142 | struct btrfs_device_info { | ||
143 | struct btrfs_device *dev; | ||
144 | u64 dev_offset; | ||
145 | u64 max_avail; | ||
146 | }; | ||
147 | |||
148 | /* Used to sort the devices by max_avail(descending sort) */ | ||
149 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); | ||
150 | |||
151 | /* | ||
152 | * sort the devices by max_avail, in which max free extent size of each device | ||
153 | * is stored.(Descending Sort) | ||
154 | */ | ||
155 | static inline void btrfs_descending_sort_devices( | ||
156 | struct btrfs_device_info *devices, | ||
157 | size_t nr_devices) | ||
158 | { | ||
159 | sort(devices, nr_devices, sizeof(struct btrfs_device_info), | ||
160 | btrfs_cmp_device_free_bytes, NULL); | ||
161 | } | ||
162 | |||
163 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
164 | u64 end, u64 *length); | ||
165 | |||
139 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ | 166 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ |
140 | (sizeof(struct btrfs_bio_stripe) * (n))) | 167 | (sizeof(struct btrfs_bio_stripe) * (n))) |
141 | 168 | ||
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 698fdd2c739c..a5776531dc2b 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -316,6 +316,15 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, | |||
316 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | 316 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, |
317 | size_t size, int flags) | 317 | size_t size, int flags) |
318 | { | 318 | { |
319 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | ||
320 | |||
321 | /* | ||
322 | * The permission on security.* and system.* is not checked | ||
323 | * in permission(). | ||
324 | */ | ||
325 | if (btrfs_root_readonly(root)) | ||
326 | return -EROFS; | ||
327 | |||
319 | /* | 328 | /* |
320 | * If this is a request for a synthetic attribute in the system.* | 329 | * If this is a request for a synthetic attribute in the system.* |
321 | * namespace use the generic infrastructure to resolve a handler | 330 | * namespace use the generic infrastructure to resolve a handler |
@@ -336,6 +345,15 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
336 | 345 | ||
337 | int btrfs_removexattr(struct dentry *dentry, const char *name) | 346 | int btrfs_removexattr(struct dentry *dentry, const char *name) |
338 | { | 347 | { |
348 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | ||
349 | |||
350 | /* | ||
351 | * The permission on security.* and system.* is not checked | ||
352 | * in permission(). | ||
353 | */ | ||
354 | if (btrfs_root_readonly(root)) | ||
355 | return -EROFS; | ||
356 | |||
339 | /* | 357 | /* |
340 | * If this is a request for a synthetic attribute in the system.* | 358 | * If this is a request for a synthetic attribute in the system.* |
341 | * namespace use the generic infrastructure to resolve a handler | 359 | * namespace use the generic infrastructure to resolve a handler |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index b9cd5445f71c..f5ec2d44150d 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -32,15 +32,6 @@ | |||
32 | #include <linux/bio.h> | 32 | #include <linux/bio.h> |
33 | #include "compression.h" | 33 | #include "compression.h" |
34 | 34 | ||
35 | /* Plan: call deflate() with avail_in == *sourcelen, | ||
36 | avail_out = *dstlen - 12 and flush == Z_FINISH. | ||
37 | If it doesn't manage to finish, call it again with | ||
38 | avail_in == 0 and avail_out set to the remaining 12 | ||
39 | bytes for it to clean up. | ||
40 | Q: Is 12 bytes sufficient? | ||
41 | */ | ||
42 | #define STREAM_END_SPACE 12 | ||
43 | |||
44 | struct workspace { | 35 | struct workspace { |
45 | z_stream inf_strm; | 36 | z_stream inf_strm; |
46 | z_stream def_strm; | 37 | z_stream def_strm; |
@@ -48,152 +39,51 @@ struct workspace { | |||
48 | struct list_head list; | 39 | struct list_head list; |
49 | }; | 40 | }; |
50 | 41 | ||
51 | static LIST_HEAD(idle_workspace); | 42 | static void zlib_free_workspace(struct list_head *ws) |
52 | static DEFINE_SPINLOCK(workspace_lock); | 43 | { |
53 | static unsigned long num_workspace; | 44 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
54 | static atomic_t alloc_workspace = ATOMIC_INIT(0); | ||
55 | static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); | ||
56 | 45 | ||
57 | /* | 46 | vfree(workspace->def_strm.workspace); |
58 | * this finds an available zlib workspace or allocates a new one | 47 | vfree(workspace->inf_strm.workspace); |
59 | * NULL or an ERR_PTR is returned if things go bad. | 48 | kfree(workspace->buf); |
60 | */ | 49 | kfree(workspace); |
61 | static struct workspace *find_zlib_workspace(void) | 50 | } |
51 | |||
52 | static struct list_head *zlib_alloc_workspace(void) | ||
62 | { | 53 | { |
63 | struct workspace *workspace; | 54 | struct workspace *workspace; |
64 | int ret; | ||
65 | int cpus = num_online_cpus(); | ||
66 | |||
67 | again: | ||
68 | spin_lock(&workspace_lock); | ||
69 | if (!list_empty(&idle_workspace)) { | ||
70 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
71 | list); | ||
72 | list_del(&workspace->list); | ||
73 | num_workspace--; | ||
74 | spin_unlock(&workspace_lock); | ||
75 | return workspace; | ||
76 | 55 | ||
77 | } | ||
78 | spin_unlock(&workspace_lock); | ||
79 | if (atomic_read(&alloc_workspace) > cpus) { | ||
80 | DEFINE_WAIT(wait); | ||
81 | prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
82 | if (atomic_read(&alloc_workspace) > cpus) | ||
83 | schedule(); | ||
84 | finish_wait(&workspace_wait, &wait); | ||
85 | goto again; | ||
86 | } | ||
87 | atomic_inc(&alloc_workspace); | ||
88 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | 56 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); |
89 | if (!workspace) { | 57 | if (!workspace) |
90 | ret = -ENOMEM; | 58 | return ERR_PTR(-ENOMEM); |
91 | goto fail; | ||
92 | } | ||
93 | 59 | ||
94 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); | 60 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); |
95 | if (!workspace->def_strm.workspace) { | ||
96 | ret = -ENOMEM; | ||
97 | goto fail; | ||
98 | } | ||
99 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); | 61 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); |
100 | if (!workspace->inf_strm.workspace) { | ||
101 | ret = -ENOMEM; | ||
102 | goto fail_inflate; | ||
103 | } | ||
104 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 62 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
105 | if (!workspace->buf) { | 63 | if (!workspace->def_strm.workspace || |
106 | ret = -ENOMEM; | 64 | !workspace->inf_strm.workspace || !workspace->buf) |
107 | goto fail_kmalloc; | 65 | goto fail; |
108 | } | ||
109 | return workspace; | ||
110 | |||
111 | fail_kmalloc: | ||
112 | vfree(workspace->inf_strm.workspace); | ||
113 | fail_inflate: | ||
114 | vfree(workspace->def_strm.workspace); | ||
115 | fail: | ||
116 | kfree(workspace); | ||
117 | atomic_dec(&alloc_workspace); | ||
118 | wake_up(&workspace_wait); | ||
119 | return ERR_PTR(ret); | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * put a workspace struct back on the list or free it if we have enough | ||
124 | * idle ones sitting around | ||
125 | */ | ||
126 | static int free_workspace(struct workspace *workspace) | ||
127 | { | ||
128 | spin_lock(&workspace_lock); | ||
129 | if (num_workspace < num_online_cpus()) { | ||
130 | list_add_tail(&workspace->list, &idle_workspace); | ||
131 | num_workspace++; | ||
132 | spin_unlock(&workspace_lock); | ||
133 | if (waitqueue_active(&workspace_wait)) | ||
134 | wake_up(&workspace_wait); | ||
135 | return 0; | ||
136 | } | ||
137 | spin_unlock(&workspace_lock); | ||
138 | vfree(workspace->def_strm.workspace); | ||
139 | vfree(workspace->inf_strm.workspace); | ||
140 | kfree(workspace->buf); | ||
141 | kfree(workspace); | ||
142 | 66 | ||
143 | atomic_dec(&alloc_workspace); | 67 | INIT_LIST_HEAD(&workspace->list); |
144 | if (waitqueue_active(&workspace_wait)) | ||
145 | wake_up(&workspace_wait); | ||
146 | return 0; | ||
147 | } | ||
148 | 68 | ||
149 | /* | 69 | return &workspace->list; |
150 | * cleanup function for module exit | 70 | fail: |
151 | */ | 71 | zlib_free_workspace(&workspace->list); |
152 | static void free_workspaces(void) | 72 | return ERR_PTR(-ENOMEM); |
153 | { | ||
154 | struct workspace *workspace; | ||
155 | while (!list_empty(&idle_workspace)) { | ||
156 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
157 | list); | ||
158 | list_del(&workspace->list); | ||
159 | vfree(workspace->def_strm.workspace); | ||
160 | vfree(workspace->inf_strm.workspace); | ||
161 | kfree(workspace->buf); | ||
162 | kfree(workspace); | ||
163 | atomic_dec(&alloc_workspace); | ||
164 | } | ||
165 | } | 73 | } |
166 | 74 | ||
167 | /* | 75 | static int zlib_compress_pages(struct list_head *ws, |
168 | * given an address space and start/len, compress the bytes. | 76 | struct address_space *mapping, |
169 | * | 77 | u64 start, unsigned long len, |
170 | * pages are allocated to hold the compressed result and stored | 78 | struct page **pages, |
171 | * in 'pages' | 79 | unsigned long nr_dest_pages, |
172 | * | 80 | unsigned long *out_pages, |
173 | * out_pages is used to return the number of pages allocated. There | 81 | unsigned long *total_in, |
174 | * may be pages allocated even if we return an error | 82 | unsigned long *total_out, |
175 | * | 83 | unsigned long max_out) |
176 | * total_in is used to return the number of bytes actually read. It | ||
177 | * may be smaller then len if we had to exit early because we | ||
178 | * ran out of room in the pages array or because we cross the | ||
179 | * max_out threshold. | ||
180 | * | ||
181 | * total_out is used to return the total number of compressed bytes | ||
182 | * | ||
183 | * max_out tells us the max number of bytes that we're allowed to | ||
184 | * stuff into pages | ||
185 | */ | ||
186 | int btrfs_zlib_compress_pages(struct address_space *mapping, | ||
187 | u64 start, unsigned long len, | ||
188 | struct page **pages, | ||
189 | unsigned long nr_dest_pages, | ||
190 | unsigned long *out_pages, | ||
191 | unsigned long *total_in, | ||
192 | unsigned long *total_out, | ||
193 | unsigned long max_out) | ||
194 | { | 84 | { |
85 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
195 | int ret; | 86 | int ret; |
196 | struct workspace *workspace; | ||
197 | char *data_in; | 87 | char *data_in; |
198 | char *cpage_out; | 88 | char *cpage_out; |
199 | int nr_pages = 0; | 89 | int nr_pages = 0; |
@@ -205,10 +95,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
205 | *total_out = 0; | 95 | *total_out = 0; |
206 | *total_in = 0; | 96 | *total_in = 0; |
207 | 97 | ||
208 | workspace = find_zlib_workspace(); | ||
209 | if (IS_ERR(workspace)) | ||
210 | return -1; | ||
211 | |||
212 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 98 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { |
213 | printk(KERN_WARNING "deflateInit failed\n"); | 99 | printk(KERN_WARNING "deflateInit failed\n"); |
214 | ret = -1; | 100 | ret = -1; |
@@ -222,6 +108,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
222 | data_in = kmap(in_page); | 108 | data_in = kmap(in_page); |
223 | 109 | ||
224 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 110 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
111 | if (out_page == NULL) { | ||
112 | ret = -1; | ||
113 | goto out; | ||
114 | } | ||
225 | cpage_out = kmap(out_page); | 115 | cpage_out = kmap(out_page); |
226 | pages[0] = out_page; | 116 | pages[0] = out_page; |
227 | nr_pages = 1; | 117 | nr_pages = 1; |
@@ -260,6 +150,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
260 | goto out; | 150 | goto out; |
261 | } | 151 | } |
262 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 152 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
153 | if (out_page == NULL) { | ||
154 | ret = -1; | ||
155 | goto out; | ||
156 | } | ||
263 | cpage_out = kmap(out_page); | 157 | cpage_out = kmap(out_page); |
264 | pages[nr_pages] = out_page; | 158 | pages[nr_pages] = out_page; |
265 | nr_pages++; | 159 | nr_pages++; |
@@ -314,55 +208,26 @@ out: | |||
314 | kunmap(in_page); | 208 | kunmap(in_page); |
315 | page_cache_release(in_page); | 209 | page_cache_release(in_page); |
316 | } | 210 | } |
317 | free_workspace(workspace); | ||
318 | return ret; | 211 | return ret; |
319 | } | 212 | } |
320 | 213 | ||
321 | /* | 214 | static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, |
322 | * pages_in is an array of pages with compressed data. | 215 | u64 disk_start, |
323 | * | 216 | struct bio_vec *bvec, |
324 | * disk_start is the starting logical offset of this array in the file | 217 | int vcnt, |
325 | * | 218 | size_t srclen) |
326 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
327 | * | ||
328 | * vcnt is the count of pages in the biovec | ||
329 | * | ||
330 | * srclen is the number of bytes in pages_in | ||
331 | * | ||
332 | * The basic idea is that we have a bio that was created by readpages. | ||
333 | * The pages in the bio are for the uncompressed data, and they may not | ||
334 | * be contiguous. They all correspond to the range of bytes covered by | ||
335 | * the compressed extent. | ||
336 | */ | ||
337 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | ||
338 | u64 disk_start, | ||
339 | struct bio_vec *bvec, | ||
340 | int vcnt, | ||
341 | size_t srclen) | ||
342 | { | 219 | { |
343 | int ret = 0; | 220 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
221 | int ret = 0, ret2; | ||
344 | int wbits = MAX_WBITS; | 222 | int wbits = MAX_WBITS; |
345 | struct workspace *workspace; | ||
346 | char *data_in; | 223 | char *data_in; |
347 | size_t total_out = 0; | 224 | size_t total_out = 0; |
348 | unsigned long page_bytes_left; | ||
349 | unsigned long page_in_index = 0; | 225 | unsigned long page_in_index = 0; |
350 | unsigned long page_out_index = 0; | 226 | unsigned long page_out_index = 0; |
351 | struct page *page_out; | ||
352 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | 227 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / |
353 | PAGE_CACHE_SIZE; | 228 | PAGE_CACHE_SIZE; |
354 | unsigned long buf_start; | 229 | unsigned long buf_start; |
355 | unsigned long buf_offset; | ||
356 | unsigned long bytes; | ||
357 | unsigned long working_bytes; | ||
358 | unsigned long pg_offset; | 230 | unsigned long pg_offset; |
359 | unsigned long start_byte; | ||
360 | unsigned long current_buf_start; | ||
361 | char *kaddr; | ||
362 | |||
363 | workspace = find_zlib_workspace(); | ||
364 | if (IS_ERR(workspace)) | ||
365 | return -ENOMEM; | ||
366 | 231 | ||
367 | data_in = kmap(pages_in[page_in_index]); | 232 | data_in = kmap(pages_in[page_in_index]); |
368 | workspace->inf_strm.next_in = data_in; | 233 | workspace->inf_strm.next_in = data_in; |
@@ -372,8 +237,6 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
372 | workspace->inf_strm.total_out = 0; | 237 | workspace->inf_strm.total_out = 0; |
373 | workspace->inf_strm.next_out = workspace->buf; | 238 | workspace->inf_strm.next_out = workspace->buf; |
374 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 239 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
375 | page_out = bvec[page_out_index].bv_page; | ||
376 | page_bytes_left = PAGE_CACHE_SIZE; | ||
377 | pg_offset = 0; | 240 | pg_offset = 0; |
378 | 241 | ||
379 | /* If it's deflate, and it's got no preset dictionary, then | 242 | /* If it's deflate, and it's got no preset dictionary, then |
@@ -389,107 +252,29 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
389 | 252 | ||
390 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 253 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
391 | printk(KERN_WARNING "inflateInit failed\n"); | 254 | printk(KERN_WARNING "inflateInit failed\n"); |
392 | ret = -1; | 255 | return -1; |
393 | goto out; | ||
394 | } | 256 | } |
395 | while (workspace->inf_strm.total_in < srclen) { | 257 | while (workspace->inf_strm.total_in < srclen) { |
396 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | 258 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); |
397 | if (ret != Z_OK && ret != Z_STREAM_END) | 259 | if (ret != Z_OK && ret != Z_STREAM_END) |
398 | break; | 260 | break; |
399 | /* | ||
400 | * buf start is the byte offset we're of the start of | ||
401 | * our workspace buffer | ||
402 | */ | ||
403 | buf_start = total_out; | ||
404 | 261 | ||
405 | /* total_out is the last byte of the workspace buffer */ | 262 | buf_start = total_out; |
406 | total_out = workspace->inf_strm.total_out; | 263 | total_out = workspace->inf_strm.total_out; |
407 | 264 | ||
408 | working_bytes = total_out - buf_start; | 265 | /* we didn't make progress in this inflate call, we're done */ |
409 | 266 | if (buf_start == total_out) | |
410 | /* | ||
411 | * start byte is the first byte of the page we're currently | ||
412 | * copying into relative to the start of the compressed data. | ||
413 | */ | ||
414 | start_byte = page_offset(page_out) - disk_start; | ||
415 | |||
416 | if (working_bytes == 0) { | ||
417 | /* we didn't make progress in this inflate | ||
418 | * call, we're done | ||
419 | */ | ||
420 | if (ret != Z_STREAM_END) | ||
421 | ret = -1; | ||
422 | break; | 267 | break; |
423 | } | ||
424 | 268 | ||
425 | /* we haven't yet hit data corresponding to this page */ | 269 | ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, |
426 | if (total_out <= start_byte) | 270 | total_out, disk_start, |
427 | goto next; | 271 | bvec, vcnt, |
428 | 272 | &page_out_index, &pg_offset); | |
429 | /* | 273 | if (ret2 == 0) { |
430 | * the start of the data we care about is offset into | 274 | ret = 0; |
431 | * the middle of our working buffer | 275 | goto done; |
432 | */ | ||
433 | if (total_out > start_byte && buf_start < start_byte) { | ||
434 | buf_offset = start_byte - buf_start; | ||
435 | working_bytes -= buf_offset; | ||
436 | } else { | ||
437 | buf_offset = 0; | ||
438 | } | ||
439 | current_buf_start = buf_start; | ||
440 | |||
441 | /* copy bytes from the working buffer into the pages */ | ||
442 | while (working_bytes > 0) { | ||
443 | bytes = min(PAGE_CACHE_SIZE - pg_offset, | ||
444 | PAGE_CACHE_SIZE - buf_offset); | ||
445 | bytes = min(bytes, working_bytes); | ||
446 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
447 | memcpy(kaddr + pg_offset, workspace->buf + buf_offset, | ||
448 | bytes); | ||
449 | kunmap_atomic(kaddr, KM_USER0); | ||
450 | flush_dcache_page(page_out); | ||
451 | |||
452 | pg_offset += bytes; | ||
453 | page_bytes_left -= bytes; | ||
454 | buf_offset += bytes; | ||
455 | working_bytes -= bytes; | ||
456 | current_buf_start += bytes; | ||
457 | |||
458 | /* check if we need to pick another page */ | ||
459 | if (page_bytes_left == 0) { | ||
460 | page_out_index++; | ||
461 | if (page_out_index >= vcnt) { | ||
462 | ret = 0; | ||
463 | goto done; | ||
464 | } | ||
465 | |||
466 | page_out = bvec[page_out_index].bv_page; | ||
467 | pg_offset = 0; | ||
468 | page_bytes_left = PAGE_CACHE_SIZE; | ||
469 | start_byte = page_offset(page_out) - disk_start; | ||
470 | |||
471 | /* | ||
472 | * make sure our new page is covered by this | ||
473 | * working buffer | ||
474 | */ | ||
475 | if (total_out <= start_byte) | ||
476 | goto next; | ||
477 | |||
478 | /* the next page in the biovec might not | ||
479 | * be adjacent to the last page, but it | ||
480 | * might still be found inside this working | ||
481 | * buffer. bump our offset pointer | ||
482 | */ | ||
483 | if (total_out > start_byte && | ||
484 | current_buf_start < start_byte) { | ||
485 | buf_offset = start_byte - buf_start; | ||
486 | working_bytes = total_out - start_byte; | ||
487 | current_buf_start = buf_start + | ||
488 | buf_offset; | ||
489 | } | ||
490 | } | ||
491 | } | 276 | } |
492 | next: | 277 | |
493 | workspace->inf_strm.next_out = workspace->buf; | 278 | workspace->inf_strm.next_out = workspace->buf; |
494 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 279 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
495 | 280 | ||
@@ -516,35 +301,21 @@ done: | |||
516 | zlib_inflateEnd(&workspace->inf_strm); | 301 | zlib_inflateEnd(&workspace->inf_strm); |
517 | if (data_in) | 302 | if (data_in) |
518 | kunmap(pages_in[page_in_index]); | 303 | kunmap(pages_in[page_in_index]); |
519 | out: | ||
520 | free_workspace(workspace); | ||
521 | return ret; | 304 | return ret; |
522 | } | 305 | } |
523 | 306 | ||
524 | /* | 307 | static int zlib_decompress(struct list_head *ws, unsigned char *data_in, |
525 | * a less complex decompression routine. Our compressed data fits in a | 308 | struct page *dest_page, |
526 | * single page, and we want to read a single page out of it. | 309 | unsigned long start_byte, |
527 | * start_byte tells us the offset into the compressed data we're interested in | 310 | size_t srclen, size_t destlen) |
528 | */ | ||
529 | int btrfs_zlib_decompress(unsigned char *data_in, | ||
530 | struct page *dest_page, | ||
531 | unsigned long start_byte, | ||
532 | size_t srclen, size_t destlen) | ||
533 | { | 311 | { |
312 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
534 | int ret = 0; | 313 | int ret = 0; |
535 | int wbits = MAX_WBITS; | 314 | int wbits = MAX_WBITS; |
536 | struct workspace *workspace; | ||
537 | unsigned long bytes_left = destlen; | 315 | unsigned long bytes_left = destlen; |
538 | unsigned long total_out = 0; | 316 | unsigned long total_out = 0; |
539 | char *kaddr; | 317 | char *kaddr; |
540 | 318 | ||
541 | if (destlen > PAGE_CACHE_SIZE) | ||
542 | return -ENOMEM; | ||
543 | |||
544 | workspace = find_zlib_workspace(); | ||
545 | if (IS_ERR(workspace)) | ||
546 | return -ENOMEM; | ||
547 | |||
548 | workspace->inf_strm.next_in = data_in; | 319 | workspace->inf_strm.next_in = data_in; |
549 | workspace->inf_strm.avail_in = srclen; | 320 | workspace->inf_strm.avail_in = srclen; |
550 | workspace->inf_strm.total_in = 0; | 321 | workspace->inf_strm.total_in = 0; |
@@ -565,8 +336,7 @@ int btrfs_zlib_decompress(unsigned char *data_in, | |||
565 | 336 | ||
566 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 337 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
567 | printk(KERN_WARNING "inflateInit failed\n"); | 338 | printk(KERN_WARNING "inflateInit failed\n"); |
568 | ret = -1; | 339 | return -1; |
569 | goto out; | ||
570 | } | 340 | } |
571 | 341 | ||
572 | while (bytes_left > 0) { | 342 | while (bytes_left > 0) { |
@@ -616,12 +386,13 @@ next: | |||
616 | ret = 0; | 386 | ret = 0; |
617 | 387 | ||
618 | zlib_inflateEnd(&workspace->inf_strm); | 388 | zlib_inflateEnd(&workspace->inf_strm); |
619 | out: | ||
620 | free_workspace(workspace); | ||
621 | return ret; | 389 | return ret; |
622 | } | 390 | } |
623 | 391 | ||
624 | void btrfs_zlib_exit(void) | 392 | struct btrfs_compress_op btrfs_zlib_compress = { |
625 | { | 393 | .alloc_workspace = zlib_alloc_workspace, |
626 | free_workspaces(); | 394 | .free_workspace = zlib_free_workspace, |
627 | } | 395 | .compress_pages = zlib_compress_pages, |
396 | .decompress_biovec = zlib_decompress_biovec, | ||
397 | .decompress = zlib_decompress, | ||
398 | }; | ||