diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2011-03-19 02:38:50 -0400 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2011-03-19 02:38:50 -0400 |
commit | 97eb3f24352ec6632c2127b35d8087d2a809a9b9 (patch) | |
tree | 722948059bbd325bbca232269490124231df80d4 /fs/btrfs | |
parent | 439581ec07fa9cf3f519dd461a2cf41cfd3adcb4 (diff) | |
parent | def179c271ac9b5020deca798470521f14d11edd (diff) |
Merge branch 'next' into for-linus
Diffstat (limited to 'fs/btrfs')
33 files changed, 3386 insertions, 934 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 7bb3c020e570..ecb9fd3be143 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig | |||
@@ -4,6 +4,8 @@ config BTRFS_FS | |||
4 | select LIBCRC32C | 4 | select LIBCRC32C |
5 | select ZLIB_INFLATE | 5 | select ZLIB_INFLATE |
6 | select ZLIB_DEFLATE | 6 | select ZLIB_DEFLATE |
7 | select LZO_COMPRESS | ||
8 | select LZO_DECOMPRESS | ||
7 | help | 9 | help |
8 | Btrfs is a new filesystem with extents, writable snapshotting, | 10 | Btrfs is a new filesystem with extents, writable snapshotting, |
9 | support for multiple devices and many more features. | 11 | support for multiple devices and many more features. |
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a35eb36b32fd..31610ea73aec 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
6 | transaction.o inode.o file.o tree-defrag.o \ | 6 | transaction.o inode.o file.o tree-defrag.o \ |
7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
9 | export.o tree-log.o acl.o free-space-cache.o zlib.o \ | 9 | export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ |
10 | compression.o delayed-ref.o relocation.o | 10 | compression.o delayed-ref.o relocation.o |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 2222d161c7b6..15b5ca2a2606 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -60,8 +60,10 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
60 | size = __btrfs_getxattr(inode, name, value, size); | 60 | size = __btrfs_getxattr(inode, name, value, size); |
61 | if (size > 0) { | 61 | if (size > 0) { |
62 | acl = posix_acl_from_xattr(value, size); | 62 | acl = posix_acl_from_xattr(value, size); |
63 | if (IS_ERR(acl)) | 63 | if (IS_ERR(acl)) { |
64 | kfree(value); | ||
64 | return acl; | 65 | return acl; |
66 | } | ||
65 | set_cached_acl(inode, type, acl); | 67 | set_cached_acl(inode, type, acl); |
66 | } | 68 | } |
67 | kfree(value); | 69 | kfree(value); |
@@ -185,18 +187,23 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
185 | return ret; | 187 | return ret; |
186 | } | 188 | } |
187 | 189 | ||
188 | int btrfs_check_acl(struct inode *inode, int mask) | 190 | int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags) |
189 | { | 191 | { |
190 | struct posix_acl *acl; | ||
191 | int error = -EAGAIN; | 192 | int error = -EAGAIN; |
192 | 193 | ||
193 | acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); | 194 | if (flags & IPERM_FLAG_RCU) { |
195 | if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) | ||
196 | error = -ECHILD; | ||
194 | 197 | ||
195 | if (IS_ERR(acl)) | 198 | } else { |
196 | return PTR_ERR(acl); | 199 | struct posix_acl *acl; |
197 | if (acl) { | 200 | acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); |
198 | error = posix_acl_permission(inode, acl, mask); | 201 | if (IS_ERR(acl)) |
199 | posix_acl_release(acl); | 202 | return PTR_ERR(acl); |
203 | if (acl) { | ||
204 | error = posix_acl_permission(inode, acl, mask); | ||
205 | posix_acl_release(acl); | ||
206 | } | ||
200 | } | 207 | } |
201 | 208 | ||
202 | return error; | 209 | return error; |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6ad63f17eca0..ccc991c542df 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -157,7 +157,7 @@ struct btrfs_inode { | |||
157 | /* | 157 | /* |
158 | * always compress this one file | 158 | * always compress this one file |
159 | */ | 159 | */ |
160 | unsigned force_compress:1; | 160 | unsigned force_compress:4; |
161 | 161 | ||
162 | struct inode vfs_inode; | 162 | struct inode vfs_inode; |
163 | }; | 163 | }; |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 7845d1f7d1d9..f745287fbf2e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -62,6 +62,9 @@ struct compressed_bio { | |||
62 | /* number of bytes on disk */ | 62 | /* number of bytes on disk */ |
63 | unsigned long compressed_len; | 63 | unsigned long compressed_len; |
64 | 64 | ||
65 | /* the compression algorithm for this bio */ | ||
66 | int compress_type; | ||
67 | |||
65 | /* number of compressed pages in the array */ | 68 | /* number of compressed pages in the array */ |
66 | unsigned long nr_pages; | 69 | unsigned long nr_pages; |
67 | 70 | ||
@@ -91,23 +94,10 @@ static inline int compressed_bio_size(struct btrfs_root *root, | |||
91 | static struct bio *compressed_bio_alloc(struct block_device *bdev, | 94 | static struct bio *compressed_bio_alloc(struct block_device *bdev, |
92 | u64 first_byte, gfp_t gfp_flags) | 95 | u64 first_byte, gfp_t gfp_flags) |
93 | { | 96 | { |
94 | struct bio *bio; | ||
95 | int nr_vecs; | 97 | int nr_vecs; |
96 | 98 | ||
97 | nr_vecs = bio_get_nr_vecs(bdev); | 99 | nr_vecs = bio_get_nr_vecs(bdev); |
98 | bio = bio_alloc(gfp_flags, nr_vecs); | 100 | return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags); |
99 | |||
100 | if (bio == NULL && (current->flags & PF_MEMALLOC)) { | ||
101 | while (!bio && (nr_vecs /= 2)) | ||
102 | bio = bio_alloc(gfp_flags, nr_vecs); | ||
103 | } | ||
104 | |||
105 | if (bio) { | ||
106 | bio->bi_size = 0; | ||
107 | bio->bi_bdev = bdev; | ||
108 | bio->bi_sector = first_byte >> 9; | ||
109 | } | ||
110 | return bio; | ||
111 | } | 101 | } |
112 | 102 | ||
113 | static int check_compressed_csum(struct inode *inode, | 103 | static int check_compressed_csum(struct inode *inode, |
@@ -186,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err) | |||
186 | /* ok, we're the last bio for this extent, lets start | 176 | /* ok, we're the last bio for this extent, lets start |
187 | * the decompression. | 177 | * the decompression. |
188 | */ | 178 | */ |
189 | ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, | 179 | ret = btrfs_decompress_biovec(cb->compress_type, |
190 | cb->start, | 180 | cb->compressed_pages, |
191 | cb->orig_bio->bi_io_vec, | 181 | cb->start, |
192 | cb->orig_bio->bi_vcnt, | 182 | cb->orig_bio->bi_io_vec, |
193 | cb->compressed_len); | 183 | cb->orig_bio->bi_vcnt, |
184 | cb->compressed_len); | ||
194 | csum_failed: | 185 | csum_failed: |
195 | if (ret) | 186 | if (ret) |
196 | cb->errors = 1; | 187 | cb->errors = 1; |
@@ -601,6 +592,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
601 | 592 | ||
602 | cb->len = uncompressed_len; | 593 | cb->len = uncompressed_len; |
603 | cb->compressed_len = compressed_len; | 594 | cb->compressed_len = compressed_len; |
595 | cb->compress_type = extent_compress_type(bio_flags); | ||
604 | cb->orig_bio = bio; | 596 | cb->orig_bio = bio; |
605 | 597 | ||
606 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / | 598 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / |
@@ -690,3 +682,317 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
690 | bio_put(comp_bio); | 682 | bio_put(comp_bio); |
691 | return 0; | 683 | return 0; |
692 | } | 684 | } |
685 | |||
686 | static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; | ||
687 | static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; | ||
688 | static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; | ||
689 | static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; | ||
690 | static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; | ||
691 | |||
692 | struct btrfs_compress_op *btrfs_compress_op[] = { | ||
693 | &btrfs_zlib_compress, | ||
694 | &btrfs_lzo_compress, | ||
695 | }; | ||
696 | |||
697 | int __init btrfs_init_compress(void) | ||
698 | { | ||
699 | int i; | ||
700 | |||
701 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
702 | INIT_LIST_HEAD(&comp_idle_workspace[i]); | ||
703 | spin_lock_init(&comp_workspace_lock[i]); | ||
704 | atomic_set(&comp_alloc_workspace[i], 0); | ||
705 | init_waitqueue_head(&comp_workspace_wait[i]); | ||
706 | } | ||
707 | return 0; | ||
708 | } | ||
709 | |||
710 | /* | ||
711 | * this finds an available workspace or allocates a new one | ||
712 | * ERR_PTR is returned if things go bad. | ||
713 | */ | ||
714 | static struct list_head *find_workspace(int type) | ||
715 | { | ||
716 | struct list_head *workspace; | ||
717 | int cpus = num_online_cpus(); | ||
718 | int idx = type - 1; | ||
719 | |||
720 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
721 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
722 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
723 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
724 | int *num_workspace = &comp_num_workspace[idx]; | ||
725 | again: | ||
726 | spin_lock(workspace_lock); | ||
727 | if (!list_empty(idle_workspace)) { | ||
728 | workspace = idle_workspace->next; | ||
729 | list_del(workspace); | ||
730 | (*num_workspace)--; | ||
731 | spin_unlock(workspace_lock); | ||
732 | return workspace; | ||
733 | |||
734 | } | ||
735 | if (atomic_read(alloc_workspace) > cpus) { | ||
736 | DEFINE_WAIT(wait); | ||
737 | |||
738 | spin_unlock(workspace_lock); | ||
739 | prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
740 | if (atomic_read(alloc_workspace) > cpus && !*num_workspace) | ||
741 | schedule(); | ||
742 | finish_wait(workspace_wait, &wait); | ||
743 | goto again; | ||
744 | } | ||
745 | atomic_inc(alloc_workspace); | ||
746 | spin_unlock(workspace_lock); | ||
747 | |||
748 | workspace = btrfs_compress_op[idx]->alloc_workspace(); | ||
749 | if (IS_ERR(workspace)) { | ||
750 | atomic_dec(alloc_workspace); | ||
751 | wake_up(workspace_wait); | ||
752 | } | ||
753 | return workspace; | ||
754 | } | ||
755 | |||
756 | /* | ||
757 | * put a workspace struct back on the list or free it if we have enough | ||
758 | * idle ones sitting around | ||
759 | */ | ||
760 | static void free_workspace(int type, struct list_head *workspace) | ||
761 | { | ||
762 | int idx = type - 1; | ||
763 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
764 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
765 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
766 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
767 | int *num_workspace = &comp_num_workspace[idx]; | ||
768 | |||
769 | spin_lock(workspace_lock); | ||
770 | if (*num_workspace < num_online_cpus()) { | ||
771 | list_add_tail(workspace, idle_workspace); | ||
772 | (*num_workspace)++; | ||
773 | spin_unlock(workspace_lock); | ||
774 | goto wake; | ||
775 | } | ||
776 | spin_unlock(workspace_lock); | ||
777 | |||
778 | btrfs_compress_op[idx]->free_workspace(workspace); | ||
779 | atomic_dec(alloc_workspace); | ||
780 | wake: | ||
781 | if (waitqueue_active(workspace_wait)) | ||
782 | wake_up(workspace_wait); | ||
783 | } | ||
784 | |||
785 | /* | ||
786 | * cleanup function for module exit | ||
787 | */ | ||
788 | static void free_workspaces(void) | ||
789 | { | ||
790 | struct list_head *workspace; | ||
791 | int i; | ||
792 | |||
793 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
794 | while (!list_empty(&comp_idle_workspace[i])) { | ||
795 | workspace = comp_idle_workspace[i].next; | ||
796 | list_del(workspace); | ||
797 | btrfs_compress_op[i]->free_workspace(workspace); | ||
798 | atomic_dec(&comp_alloc_workspace[i]); | ||
799 | } | ||
800 | } | ||
801 | } | ||
802 | |||
803 | /* | ||
804 | * given an address space and start/len, compress the bytes. | ||
805 | * | ||
806 | * pages are allocated to hold the compressed result and stored | ||
807 | * in 'pages' | ||
808 | * | ||
809 | * out_pages is used to return the number of pages allocated. There | ||
810 | * may be pages allocated even if we return an error | ||
811 | * | ||
812 | * total_in is used to return the number of bytes actually read. It | ||
813 | * may be smaller then len if we had to exit early because we | ||
814 | * ran out of room in the pages array or because we cross the | ||
815 | * max_out threshold. | ||
816 | * | ||
817 | * total_out is used to return the total number of compressed bytes | ||
818 | * | ||
819 | * max_out tells us the max number of bytes that we're allowed to | ||
820 | * stuff into pages | ||
821 | */ | ||
822 | int btrfs_compress_pages(int type, struct address_space *mapping, | ||
823 | u64 start, unsigned long len, | ||
824 | struct page **pages, | ||
825 | unsigned long nr_dest_pages, | ||
826 | unsigned long *out_pages, | ||
827 | unsigned long *total_in, | ||
828 | unsigned long *total_out, | ||
829 | unsigned long max_out) | ||
830 | { | ||
831 | struct list_head *workspace; | ||
832 | int ret; | ||
833 | |||
834 | workspace = find_workspace(type); | ||
835 | if (IS_ERR(workspace)) | ||
836 | return -1; | ||
837 | |||
838 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, | ||
839 | start, len, pages, | ||
840 | nr_dest_pages, out_pages, | ||
841 | total_in, total_out, | ||
842 | max_out); | ||
843 | free_workspace(type, workspace); | ||
844 | return ret; | ||
845 | } | ||
846 | |||
847 | /* | ||
848 | * pages_in is an array of pages with compressed data. | ||
849 | * | ||
850 | * disk_start is the starting logical offset of this array in the file | ||
851 | * | ||
852 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
853 | * | ||
854 | * vcnt is the count of pages in the biovec | ||
855 | * | ||
856 | * srclen is the number of bytes in pages_in | ||
857 | * | ||
858 | * The basic idea is that we have a bio that was created by readpages. | ||
859 | * The pages in the bio are for the uncompressed data, and they may not | ||
860 | * be contiguous. They all correspond to the range of bytes covered by | ||
861 | * the compressed extent. | ||
862 | */ | ||
863 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, | ||
864 | struct bio_vec *bvec, int vcnt, size_t srclen) | ||
865 | { | ||
866 | struct list_head *workspace; | ||
867 | int ret; | ||
868 | |||
869 | workspace = find_workspace(type); | ||
870 | if (IS_ERR(workspace)) | ||
871 | return -ENOMEM; | ||
872 | |||
873 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, | ||
874 | disk_start, | ||
875 | bvec, vcnt, srclen); | ||
876 | free_workspace(type, workspace); | ||
877 | return ret; | ||
878 | } | ||
879 | |||
880 | /* | ||
881 | * a less complex decompression routine. Our compressed data fits in a | ||
882 | * single page, and we want to read a single page out of it. | ||
883 | * start_byte tells us the offset into the compressed data we're interested in | ||
884 | */ | ||
885 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | ||
886 | unsigned long start_byte, size_t srclen, size_t destlen) | ||
887 | { | ||
888 | struct list_head *workspace; | ||
889 | int ret; | ||
890 | |||
891 | workspace = find_workspace(type); | ||
892 | if (IS_ERR(workspace)) | ||
893 | return -ENOMEM; | ||
894 | |||
895 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, | ||
896 | dest_page, start_byte, | ||
897 | srclen, destlen); | ||
898 | |||
899 | free_workspace(type, workspace); | ||
900 | return ret; | ||
901 | } | ||
902 | |||
903 | void __exit btrfs_exit_compress(void) | ||
904 | { | ||
905 | free_workspaces(); | ||
906 | } | ||
907 | |||
908 | /* | ||
909 | * Copy uncompressed data from working buffer to pages. | ||
910 | * | ||
911 | * buf_start is the byte offset we're of the start of our workspace buffer. | ||
912 | * | ||
913 | * total_out is the last byte of the buffer | ||
914 | */ | ||
915 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, | ||
916 | unsigned long total_out, u64 disk_start, | ||
917 | struct bio_vec *bvec, int vcnt, | ||
918 | unsigned long *page_index, | ||
919 | unsigned long *pg_offset) | ||
920 | { | ||
921 | unsigned long buf_offset; | ||
922 | unsigned long current_buf_start; | ||
923 | unsigned long start_byte; | ||
924 | unsigned long working_bytes = total_out - buf_start; | ||
925 | unsigned long bytes; | ||
926 | char *kaddr; | ||
927 | struct page *page_out = bvec[*page_index].bv_page; | ||
928 | |||
929 | /* | ||
930 | * start byte is the first byte of the page we're currently | ||
931 | * copying into relative to the start of the compressed data. | ||
932 | */ | ||
933 | start_byte = page_offset(page_out) - disk_start; | ||
934 | |||
935 | /* we haven't yet hit data corresponding to this page */ | ||
936 | if (total_out <= start_byte) | ||
937 | return 1; | ||
938 | |||
939 | /* | ||
940 | * the start of the data we care about is offset into | ||
941 | * the middle of our working buffer | ||
942 | */ | ||
943 | if (total_out > start_byte && buf_start < start_byte) { | ||
944 | buf_offset = start_byte - buf_start; | ||
945 | working_bytes -= buf_offset; | ||
946 | } else { | ||
947 | buf_offset = 0; | ||
948 | } | ||
949 | current_buf_start = buf_start; | ||
950 | |||
951 | /* copy bytes from the working buffer into the pages */ | ||
952 | while (working_bytes > 0) { | ||
953 | bytes = min(PAGE_CACHE_SIZE - *pg_offset, | ||
954 | PAGE_CACHE_SIZE - buf_offset); | ||
955 | bytes = min(bytes, working_bytes); | ||
956 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
957 | memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); | ||
958 | kunmap_atomic(kaddr, KM_USER0); | ||
959 | flush_dcache_page(page_out); | ||
960 | |||
961 | *pg_offset += bytes; | ||
962 | buf_offset += bytes; | ||
963 | working_bytes -= bytes; | ||
964 | current_buf_start += bytes; | ||
965 | |||
966 | /* check if we need to pick another page */ | ||
967 | if (*pg_offset == PAGE_CACHE_SIZE) { | ||
968 | (*page_index)++; | ||
969 | if (*page_index >= vcnt) | ||
970 | return 0; | ||
971 | |||
972 | page_out = bvec[*page_index].bv_page; | ||
973 | *pg_offset = 0; | ||
974 | start_byte = page_offset(page_out) - disk_start; | ||
975 | |||
976 | /* | ||
977 | * make sure our new page is covered by this | ||
978 | * working buffer | ||
979 | */ | ||
980 | if (total_out <= start_byte) | ||
981 | return 1; | ||
982 | |||
983 | /* | ||
984 | * the next page in the biovec might not be adjacent | ||
985 | * to the last page, but it might still be found | ||
986 | * inside this working buffer. bump our offset pointer | ||
987 | */ | ||
988 | if (total_out > start_byte && | ||
989 | current_buf_start < start_byte) { | ||
990 | buf_offset = start_byte - buf_start; | ||
991 | working_bytes = total_out - start_byte; | ||
992 | current_buf_start = buf_start + buf_offset; | ||
993 | } | ||
994 | } | ||
995 | } | ||
996 | |||
997 | return 1; | ||
998 | } | ||
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 421f5b4aa715..51000174b9d7 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h | |||
@@ -19,24 +19,27 @@ | |||
19 | #ifndef __BTRFS_COMPRESSION_ | 19 | #ifndef __BTRFS_COMPRESSION_ |
20 | #define __BTRFS_COMPRESSION_ | 20 | #define __BTRFS_COMPRESSION_ |
21 | 21 | ||
22 | int btrfs_zlib_decompress(unsigned char *data_in, | 22 | int btrfs_init_compress(void); |
23 | struct page *dest_page, | 23 | void btrfs_exit_compress(void); |
24 | unsigned long start_byte, | 24 | |
25 | size_t srclen, size_t destlen); | 25 | int btrfs_compress_pages(int type, struct address_space *mapping, |
26 | int btrfs_zlib_compress_pages(struct address_space *mapping, | 26 | u64 start, unsigned long len, |
27 | u64 start, unsigned long len, | 27 | struct page **pages, |
28 | struct page **pages, | 28 | unsigned long nr_dest_pages, |
29 | unsigned long nr_dest_pages, | 29 | unsigned long *out_pages, |
30 | unsigned long *out_pages, | 30 | unsigned long *total_in, |
31 | unsigned long *total_in, | 31 | unsigned long *total_out, |
32 | unsigned long *total_out, | 32 | unsigned long max_out); |
33 | unsigned long max_out); | 33 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, |
34 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | 34 | struct bio_vec *bvec, int vcnt, size_t srclen); |
35 | u64 disk_start, | 35 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, |
36 | struct bio_vec *bvec, | 36 | unsigned long start_byte, size_t srclen, size_t destlen); |
37 | int vcnt, | 37 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, |
38 | size_t srclen); | 38 | unsigned long total_out, u64 disk_start, |
39 | void btrfs_zlib_exit(void); | 39 | struct bio_vec *bvec, int vcnt, |
40 | unsigned long *page_index, | ||
41 | unsigned long *pg_offset); | ||
42 | |||
40 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, | 43 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, |
41 | unsigned long len, u64 disk_start, | 44 | unsigned long len, u64 disk_start, |
42 | unsigned long compressed_len, | 45 | unsigned long compressed_len, |
@@ -44,4 +47,37 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
44 | unsigned long nr_pages); | 47 | unsigned long nr_pages); |
45 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | 48 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, |
46 | int mirror_num, unsigned long bio_flags); | 49 | int mirror_num, unsigned long bio_flags); |
50 | |||
51 | struct btrfs_compress_op { | ||
52 | struct list_head *(*alloc_workspace)(void); | ||
53 | |||
54 | void (*free_workspace)(struct list_head *workspace); | ||
55 | |||
56 | int (*compress_pages)(struct list_head *workspace, | ||
57 | struct address_space *mapping, | ||
58 | u64 start, unsigned long len, | ||
59 | struct page **pages, | ||
60 | unsigned long nr_dest_pages, | ||
61 | unsigned long *out_pages, | ||
62 | unsigned long *total_in, | ||
63 | unsigned long *total_out, | ||
64 | unsigned long max_out); | ||
65 | |||
66 | int (*decompress_biovec)(struct list_head *workspace, | ||
67 | struct page **pages_in, | ||
68 | u64 disk_start, | ||
69 | struct bio_vec *bvec, | ||
70 | int vcnt, | ||
71 | size_t srclen); | ||
72 | |||
73 | int (*decompress)(struct list_head *workspace, | ||
74 | unsigned char *data_in, | ||
75 | struct page *dest_page, | ||
76 | unsigned long start_byte, | ||
77 | size_t srclen, size_t destlen); | ||
78 | }; | ||
79 | |||
80 | extern struct btrfs_compress_op btrfs_zlib_compress; | ||
81 | extern struct btrfs_compress_op btrfs_lzo_compress; | ||
82 | |||
47 | #endif | 83 | #endif |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9ac171599258..b5baff0dccfe 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -105,6 +105,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
105 | /* this also releases the path */ | 105 | /* this also releases the path */ |
106 | void btrfs_free_path(struct btrfs_path *p) | 106 | void btrfs_free_path(struct btrfs_path *p) |
107 | { | 107 | { |
108 | if (!p) | ||
109 | return; | ||
108 | btrfs_release_path(NULL, p); | 110 | btrfs_release_path(NULL, p); |
109 | kmem_cache_free(btrfs_path_cachep, p); | 111 | kmem_cache_free(btrfs_path_cachep, p); |
110 | } | 112 | } |
@@ -2514,6 +2516,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2514 | btrfs_assert_tree_locked(path->nodes[1]); | 2516 | btrfs_assert_tree_locked(path->nodes[1]); |
2515 | 2517 | ||
2516 | right = read_node_slot(root, upper, slot + 1); | 2518 | right = read_node_slot(root, upper, slot + 1); |
2519 | if (right == NULL) | ||
2520 | return 1; | ||
2521 | |||
2517 | btrfs_tree_lock(right); | 2522 | btrfs_tree_lock(right); |
2518 | btrfs_set_lock_blocking(right); | 2523 | btrfs_set_lock_blocking(right); |
2519 | 2524 | ||
@@ -2764,6 +2769,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2764 | btrfs_assert_tree_locked(path->nodes[1]); | 2769 | btrfs_assert_tree_locked(path->nodes[1]); |
2765 | 2770 | ||
2766 | left = read_node_slot(root, path->nodes[1], slot - 1); | 2771 | left = read_node_slot(root, path->nodes[1], slot - 1); |
2772 | if (left == NULL) | ||
2773 | return 1; | ||
2774 | |||
2767 | btrfs_tree_lock(left); | 2775 | btrfs_tree_lock(left); |
2768 | btrfs_set_lock_blocking(left); | 2776 | btrfs_set_lock_blocking(left); |
2769 | 2777 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8db9234f6b41..2c98b3af6052 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
28 | #include <linux/wait.h> | 28 | #include <linux/wait.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/kobject.h> | ||
30 | #include <asm/kmap_types.h> | 31 | #include <asm/kmap_types.h> |
31 | #include "extent_io.h" | 32 | #include "extent_io.h" |
32 | #include "extent_map.h" | 33 | #include "extent_map.h" |
@@ -294,6 +295,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
294 | #define BTRFS_FSID_SIZE 16 | 295 | #define BTRFS_FSID_SIZE 16 |
295 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) | 296 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) |
296 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) | 297 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) |
298 | |||
299 | /* | ||
300 | * File system states | ||
301 | */ | ||
302 | |||
303 | /* Errors detected */ | ||
304 | #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) | ||
305 | |||
297 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) | 306 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) |
298 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) | 307 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) |
299 | 308 | ||
@@ -398,13 +407,15 @@ struct btrfs_super_block { | |||
398 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) | 407 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) |
399 | #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) | 408 | #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) |
400 | #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) | 409 | #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) |
410 | #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) | ||
401 | 411 | ||
402 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL | 412 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL |
403 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL | 413 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL |
404 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ | 414 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ |
405 | (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ | 415 | (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ |
406 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ | 416 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ |
407 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) | 417 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ |
418 | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) | ||
408 | 419 | ||
409 | /* | 420 | /* |
410 | * A leaf is full of items. offset and size tell us where to find | 421 | * A leaf is full of items. offset and size tell us where to find |
@@ -551,9 +562,11 @@ struct btrfs_timespec { | |||
551 | } __attribute__ ((__packed__)); | 562 | } __attribute__ ((__packed__)); |
552 | 563 | ||
553 | enum btrfs_compression_type { | 564 | enum btrfs_compression_type { |
554 | BTRFS_COMPRESS_NONE = 0, | 565 | BTRFS_COMPRESS_NONE = 0, |
555 | BTRFS_COMPRESS_ZLIB = 1, | 566 | BTRFS_COMPRESS_ZLIB = 1, |
556 | BTRFS_COMPRESS_LAST = 2, | 567 | BTRFS_COMPRESS_LZO = 2, |
568 | BTRFS_COMPRESS_TYPES = 2, | ||
569 | BTRFS_COMPRESS_LAST = 3, | ||
557 | }; | 570 | }; |
558 | 571 | ||
559 | struct btrfs_inode_item { | 572 | struct btrfs_inode_item { |
@@ -597,6 +610,8 @@ struct btrfs_dir_item { | |||
597 | u8 type; | 610 | u8 type; |
598 | } __attribute__ ((__packed__)); | 611 | } __attribute__ ((__packed__)); |
599 | 612 | ||
613 | #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) | ||
614 | |||
600 | struct btrfs_root_item { | 615 | struct btrfs_root_item { |
601 | struct btrfs_inode_item inode; | 616 | struct btrfs_inode_item inode; |
602 | __le64 generation; | 617 | __le64 generation; |
@@ -808,9 +823,9 @@ struct btrfs_block_group_cache { | |||
808 | int extents_thresh; | 823 | int extents_thresh; |
809 | int free_extents; | 824 | int free_extents; |
810 | int total_bitmaps; | 825 | int total_bitmaps; |
811 | int ro:1; | 826 | unsigned int ro:1; |
812 | int dirty:1; | 827 | unsigned int dirty:1; |
813 | int iref:1; | 828 | unsigned int iref:1; |
814 | 829 | ||
815 | int disk_cache_state; | 830 | int disk_cache_state; |
816 | 831 | ||
@@ -895,7 +910,8 @@ struct btrfs_fs_info { | |||
895 | */ | 910 | */ |
896 | u64 last_trans_log_full_commit; | 911 | u64 last_trans_log_full_commit; |
897 | u64 open_ioctl_trans; | 912 | u64 open_ioctl_trans; |
898 | unsigned long mount_opt; | 913 | unsigned long mount_opt:20; |
914 | unsigned long compress_type:4; | ||
899 | u64 max_inline; | 915 | u64 max_inline; |
900 | u64 alloc_start; | 916 | u64 alloc_start; |
901 | struct btrfs_transaction *running_transaction; | 917 | struct btrfs_transaction *running_transaction; |
@@ -1050,6 +1066,9 @@ struct btrfs_fs_info { | |||
1050 | unsigned metadata_ratio; | 1066 | unsigned metadata_ratio; |
1051 | 1067 | ||
1052 | void *bdev_holder; | 1068 | void *bdev_holder; |
1069 | |||
1070 | /* filesystem state */ | ||
1071 | u64 fs_state; | ||
1053 | }; | 1072 | }; |
1054 | 1073 | ||
1055 | /* | 1074 | /* |
@@ -1893,6 +1912,11 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); | |||
1893 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, | 1912 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, |
1894 | last_snapshot, 64); | 1913 | last_snapshot, 64); |
1895 | 1914 | ||
1915 | static inline bool btrfs_root_readonly(struct btrfs_root *root) | ||
1916 | { | ||
1917 | return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; | ||
1918 | } | ||
1919 | |||
1896 | /* struct btrfs_super_block */ | 1920 | /* struct btrfs_super_block */ |
1897 | 1921 | ||
1898 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); | 1922 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); |
@@ -2145,6 +2169,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
2145 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 2169 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
2146 | struct btrfs_root *root, u64 group_start); | 2170 | struct btrfs_root *root, u64 group_start); |
2147 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2171 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
2172 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | ||
2148 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2173 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
2149 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2174 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2150 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | 2175 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); |
@@ -2188,6 +2213,12 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
2188 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 2213 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
2189 | struct btrfs_block_group_cache *cache); | 2214 | struct btrfs_block_group_cache *cache); |
2190 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info); | 2215 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info); |
2216 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); | ||
2217 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, | ||
2218 | u64 start, u64 end); | ||
2219 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
2220 | u64 num_bytes); | ||
2221 | |||
2191 | /* ctree.c */ | 2222 | /* ctree.c */ |
2192 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2223 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2193 | int level, int *slot); | 2224 | int level, int *slot); |
@@ -2541,10 +2572,18 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); | |||
2541 | /* super.c */ | 2572 | /* super.c */ |
2542 | int btrfs_parse_options(struct btrfs_root *root, char *options); | 2573 | int btrfs_parse_options(struct btrfs_root *root, char *options); |
2543 | int btrfs_sync_fs(struct super_block *sb, int wait); | 2574 | int btrfs_sync_fs(struct super_block *sb, int wait); |
2575 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
2576 | unsigned int line, int errno); | ||
2577 | |||
2578 | #define btrfs_std_error(fs_info, errno) \ | ||
2579 | do { \ | ||
2580 | if ((errno)) \ | ||
2581 | __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\ | ||
2582 | } while (0) | ||
2544 | 2583 | ||
2545 | /* acl.c */ | 2584 | /* acl.c */ |
2546 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 2585 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
2547 | int btrfs_check_acl(struct inode *inode, int mask); | 2586 | int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags); |
2548 | #else | 2587 | #else |
2549 | #define btrfs_check_acl NULL | 2588 | #define btrfs_check_acl NULL |
2550 | #endif | 2589 | #endif |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fb827d0d7181..b531c36455d8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/freezer.h> | 28 | #include <linux/freezer.h> |
29 | #include <linux/crc32c.h> | 29 | #include <linux/crc32c.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/migrate.h> | ||
31 | #include "compat.h" | 32 | #include "compat.h" |
32 | #include "ctree.h" | 33 | #include "ctree.h" |
33 | #include "disk-io.h" | 34 | #include "disk-io.h" |
@@ -43,6 +44,20 @@ | |||
43 | static struct extent_io_ops btree_extent_io_ops; | 44 | static struct extent_io_ops btree_extent_io_ops; |
44 | static void end_workqueue_fn(struct btrfs_work *work); | 45 | static void end_workqueue_fn(struct btrfs_work *work); |
45 | static void free_fs_root(struct btrfs_root *root); | 46 | static void free_fs_root(struct btrfs_root *root); |
47 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
48 | int read_only); | ||
49 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root); | ||
50 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root); | ||
51 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
52 | struct btrfs_root *root); | ||
53 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); | ||
54 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); | ||
55 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
56 | struct extent_io_tree *dirty_pages, | ||
57 | int mark); | ||
58 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
59 | struct extent_io_tree *pinned_extents); | ||
60 | static int btrfs_cleanup_transaction(struct btrfs_root *root); | ||
46 | 61 | ||
47 | /* | 62 | /* |
48 | * end_io_wq structs are used to do processing in task context when an IO is | 63 | * end_io_wq structs are used to do processing in task context when an IO is |
@@ -352,9 +367,15 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
352 | WARN_ON(len == 0); | 367 | WARN_ON(len == 0); |
353 | 368 | ||
354 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 369 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); |
370 | if (eb == NULL) { | ||
371 | WARN_ON(1); | ||
372 | goto out; | ||
373 | } | ||
355 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, | 374 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, |
356 | btrfs_header_generation(eb)); | 375 | btrfs_header_generation(eb)); |
357 | BUG_ON(ret); | 376 | BUG_ON(ret); |
377 | WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN)); | ||
378 | |||
358 | found_start = btrfs_header_bytenr(eb); | 379 | found_start = btrfs_header_bytenr(eb); |
359 | if (found_start != start) { | 380 | if (found_start != start) { |
360 | WARN_ON(1); | 381 | WARN_ON(1); |
@@ -424,6 +445,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
424 | WARN_ON(len == 0); | 445 | WARN_ON(len == 0); |
425 | 446 | ||
426 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 447 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); |
448 | if (eb == NULL) { | ||
449 | ret = -EIO; | ||
450 | goto out; | ||
451 | } | ||
427 | 452 | ||
428 | found_start = btrfs_header_bytenr(eb); | 453 | found_start = btrfs_header_bytenr(eb); |
429 | if (found_start != start) { | 454 | if (found_start != start) { |
@@ -693,6 +718,27 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
693 | __btree_submit_bio_done); | 718 | __btree_submit_bio_done); |
694 | } | 719 | } |
695 | 720 | ||
721 | #ifdef CONFIG_MIGRATION | ||
722 | static int btree_migratepage(struct address_space *mapping, | ||
723 | struct page *newpage, struct page *page) | ||
724 | { | ||
725 | /* | ||
726 | * we can't safely write a btree page from here, | ||
727 | * we haven't done the locking hook | ||
728 | */ | ||
729 | if (PageDirty(page)) | ||
730 | return -EAGAIN; | ||
731 | /* | ||
732 | * Buffers may be managed in a filesystem specific way. | ||
733 | * We must have no buffers or drop them. | ||
734 | */ | ||
735 | if (page_has_private(page) && | ||
736 | !try_to_release_page(page, GFP_KERNEL)) | ||
737 | return -EAGAIN; | ||
738 | return migrate_page(mapping, newpage, page); | ||
739 | } | ||
740 | #endif | ||
741 | |||
696 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | 742 | static int btree_writepage(struct page *page, struct writeback_control *wbc) |
697 | { | 743 | { |
698 | struct extent_io_tree *tree; | 744 | struct extent_io_tree *tree; |
@@ -707,8 +753,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) | |||
707 | } | 753 | } |
708 | 754 | ||
709 | redirty_page_for_writepage(wbc, page); | 755 | redirty_page_for_writepage(wbc, page); |
710 | eb = btrfs_find_tree_block(root, page_offset(page), | 756 | eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE); |
711 | PAGE_CACHE_SIZE); | ||
712 | WARN_ON(!eb); | 757 | WARN_ON(!eb); |
713 | 758 | ||
714 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | 759 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); |
@@ -799,6 +844,9 @@ static const struct address_space_operations btree_aops = { | |||
799 | .releasepage = btree_releasepage, | 844 | .releasepage = btree_releasepage, |
800 | .invalidatepage = btree_invalidatepage, | 845 | .invalidatepage = btree_invalidatepage, |
801 | .sync_page = block_sync_page, | 846 | .sync_page = block_sync_page, |
847 | #ifdef CONFIG_MIGRATION | ||
848 | .migratepage = btree_migratepage, | ||
849 | #endif | ||
802 | }; | 850 | }; |
803 | 851 | ||
804 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 852 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, |
@@ -981,7 +1029,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
981 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1029 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
982 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1030 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
983 | blocksize, generation); | 1031 | blocksize, generation); |
984 | BUG_ON(!root->node); | 1032 | if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { |
1033 | free_extent_buffer(root->node); | ||
1034 | return -EIO; | ||
1035 | } | ||
985 | root->commit_root = btrfs_root_node(root); | 1036 | root->commit_root = btrfs_root_node(root); |
986 | return 0; | 1037 | return 0; |
987 | } | 1038 | } |
@@ -1116,6 +1167,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1116 | } | 1167 | } |
1117 | btrfs_free_path(path); | 1168 | btrfs_free_path(path); |
1118 | if (ret) { | 1169 | if (ret) { |
1170 | kfree(root); | ||
1119 | if (ret > 0) | 1171 | if (ret > 0) |
1120 | ret = -ENOENT; | 1172 | ret = -ENOENT; |
1121 | return ERR_PTR(ret); | 1173 | return ERR_PTR(ret); |
@@ -1538,10 +1590,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1538 | GFP_NOFS); | 1590 | GFP_NOFS); |
1539 | struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), | 1591 | struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), |
1540 | GFP_NOFS); | 1592 | GFP_NOFS); |
1541 | struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), | 1593 | struct btrfs_root *tree_root = btrfs_sb(sb); |
1542 | GFP_NOFS); | 1594 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
1543 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), | ||
1544 | GFP_NOFS); | ||
1545 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), | 1595 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), |
1546 | GFP_NOFS); | 1596 | GFP_NOFS); |
1547 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), | 1597 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), |
@@ -1686,8 +1736,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1686 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | 1736 | fs_info, BTRFS_ROOT_TREE_OBJECTID); |
1687 | 1737 | ||
1688 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); | 1738 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); |
1689 | if (!bh) | 1739 | if (!bh) { |
1740 | err = -EINVAL; | ||
1690 | goto fail_iput; | 1741 | goto fail_iput; |
1742 | } | ||
1691 | 1743 | ||
1692 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); | 1744 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); |
1693 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, | 1745 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, |
@@ -1700,6 +1752,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1700 | if (!btrfs_super_root(disk_super)) | 1752 | if (!btrfs_super_root(disk_super)) |
1701 | goto fail_iput; | 1753 | goto fail_iput; |
1702 | 1754 | ||
1755 | /* check FS state, whether FS is broken. */ | ||
1756 | fs_info->fs_state |= btrfs_super_flags(disk_super); | ||
1757 | |||
1758 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | ||
1759 | |||
1703 | ret = btrfs_parse_options(tree_root, options); | 1760 | ret = btrfs_parse_options(tree_root, options); |
1704 | if (ret) { | 1761 | if (ret) { |
1705 | err = ret; | 1762 | err = ret; |
@@ -1717,10 +1774,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1717 | } | 1774 | } |
1718 | 1775 | ||
1719 | features = btrfs_super_incompat_flags(disk_super); | 1776 | features = btrfs_super_incompat_flags(disk_super); |
1720 | if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { | 1777 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; |
1721 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; | 1778 | if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) |
1722 | btrfs_set_super_incompat_flags(disk_super, features); | 1779 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; |
1723 | } | 1780 | btrfs_set_super_incompat_flags(disk_super, features); |
1724 | 1781 | ||
1725 | features = btrfs_super_compat_ro_flags(disk_super) & | 1782 | features = btrfs_super_compat_ro_flags(disk_super) & |
1726 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; | 1783 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; |
@@ -1930,7 +1987,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1930 | btrfs_set_opt(fs_info->mount_opt, SSD); | 1987 | btrfs_set_opt(fs_info->mount_opt, SSD); |
1931 | } | 1988 | } |
1932 | 1989 | ||
1933 | if (btrfs_super_log_root(disk_super) != 0) { | 1990 | /* do not make disk changes in broken FS */ |
1991 | if (btrfs_super_log_root(disk_super) != 0 && | ||
1992 | !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { | ||
1934 | u64 bytenr = btrfs_super_log_root(disk_super); | 1993 | u64 bytenr = btrfs_super_log_root(disk_super); |
1935 | 1994 | ||
1936 | if (fs_devices->rw_devices == 0) { | 1995 | if (fs_devices->rw_devices == 0) { |
@@ -2415,8 +2474,28 @@ int close_ctree(struct btrfs_root *root) | |||
2415 | smp_mb(); | 2474 | smp_mb(); |
2416 | 2475 | ||
2417 | btrfs_put_block_group_cache(fs_info); | 2476 | btrfs_put_block_group_cache(fs_info); |
2477 | |||
2478 | /* | ||
2479 | * Here come 2 situations when btrfs is broken to flip readonly: | ||
2480 | * | ||
2481 | * 1. when btrfs flips readonly somewhere else before | ||
2482 | * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, | ||
2483 | * and btrfs will skip to write sb directly to keep | ||
2484 | * ERROR state on disk. | ||
2485 | * | ||
2486 | * 2. when btrfs flips readonly just in btrfs_commit_super, | ||
2487 | * and in such case, btrfs cannnot write sb via btrfs_commit_super, | ||
2488 | * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, | ||
2489 | * btrfs will cleanup all FS resources first and write sb then. | ||
2490 | */ | ||
2418 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 2491 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
2419 | ret = btrfs_commit_super(root); | 2492 | ret = btrfs_commit_super(root); |
2493 | if (ret) | ||
2494 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | ||
2495 | } | ||
2496 | |||
2497 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
2498 | ret = btrfs_error_commit_super(root); | ||
2420 | if (ret) | 2499 | if (ret) |
2421 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2500 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
2422 | } | 2501 | } |
@@ -2592,6 +2671,352 @@ out: | |||
2592 | return 0; | 2671 | return 0; |
2593 | } | 2672 | } |
2594 | 2673 | ||
2674 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
2675 | int read_only) | ||
2676 | { | ||
2677 | if (read_only) | ||
2678 | return; | ||
2679 | |||
2680 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
2681 | printk(KERN_WARNING "warning: mount fs with errors, " | ||
2682 | "running btrfsck is recommended\n"); | ||
2683 | } | ||
2684 | |||
2685 | int btrfs_error_commit_super(struct btrfs_root *root) | ||
2686 | { | ||
2687 | int ret; | ||
2688 | |||
2689 | mutex_lock(&root->fs_info->cleaner_mutex); | ||
2690 | btrfs_run_delayed_iputs(root); | ||
2691 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
2692 | |||
2693 | down_write(&root->fs_info->cleanup_work_sem); | ||
2694 | up_write(&root->fs_info->cleanup_work_sem); | ||
2695 | |||
2696 | /* cleanup FS via transaction */ | ||
2697 | btrfs_cleanup_transaction(root); | ||
2698 | |||
2699 | ret = write_ctree_super(NULL, root, 0); | ||
2700 | |||
2701 | return ret; | ||
2702 | } | ||
2703 | |||
2704 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root) | ||
2705 | { | ||
2706 | struct btrfs_inode *btrfs_inode; | ||
2707 | struct list_head splice; | ||
2708 | |||
2709 | INIT_LIST_HEAD(&splice); | ||
2710 | |||
2711 | mutex_lock(&root->fs_info->ordered_operations_mutex); | ||
2712 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2713 | |||
2714 | list_splice_init(&root->fs_info->ordered_operations, &splice); | ||
2715 | while (!list_empty(&splice)) { | ||
2716 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
2717 | ordered_operations); | ||
2718 | |||
2719 | list_del_init(&btrfs_inode->ordered_operations); | ||
2720 | |||
2721 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
2722 | } | ||
2723 | |||
2724 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2725 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | ||
2726 | |||
2727 | return 0; | ||
2728 | } | ||
2729 | |||
2730 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root) | ||
2731 | { | ||
2732 | struct list_head splice; | ||
2733 | struct btrfs_ordered_extent *ordered; | ||
2734 | struct inode *inode; | ||
2735 | |||
2736 | INIT_LIST_HEAD(&splice); | ||
2737 | |||
2738 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2739 | |||
2740 | list_splice_init(&root->fs_info->ordered_extents, &splice); | ||
2741 | while (!list_empty(&splice)) { | ||
2742 | ordered = list_entry(splice.next, struct btrfs_ordered_extent, | ||
2743 | root_extent_list); | ||
2744 | |||
2745 | list_del_init(&ordered->root_extent_list); | ||
2746 | atomic_inc(&ordered->refs); | ||
2747 | |||
2748 | /* the inode may be getting freed (in sys_unlink path). */ | ||
2749 | inode = igrab(ordered->inode); | ||
2750 | |||
2751 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2752 | if (inode) | ||
2753 | iput(inode); | ||
2754 | |||
2755 | atomic_set(&ordered->refs, 1); | ||
2756 | btrfs_put_ordered_extent(ordered); | ||
2757 | |||
2758 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2759 | } | ||
2760 | |||
2761 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2762 | |||
2763 | return 0; | ||
2764 | } | ||
2765 | |||
2766 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
2767 | struct btrfs_root *root) | ||
2768 | { | ||
2769 | struct rb_node *node; | ||
2770 | struct btrfs_delayed_ref_root *delayed_refs; | ||
2771 | struct btrfs_delayed_ref_node *ref; | ||
2772 | int ret = 0; | ||
2773 | |||
2774 | delayed_refs = &trans->delayed_refs; | ||
2775 | |||
2776 | spin_lock(&delayed_refs->lock); | ||
2777 | if (delayed_refs->num_entries == 0) { | ||
2778 | printk(KERN_INFO "delayed_refs has NO entry\n"); | ||
2779 | return ret; | ||
2780 | } | ||
2781 | |||
2782 | node = rb_first(&delayed_refs->root); | ||
2783 | while (node) { | ||
2784 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
2785 | node = rb_next(node); | ||
2786 | |||
2787 | ref->in_tree = 0; | ||
2788 | rb_erase(&ref->rb_node, &delayed_refs->root); | ||
2789 | delayed_refs->num_entries--; | ||
2790 | |||
2791 | atomic_set(&ref->refs, 1); | ||
2792 | if (btrfs_delayed_ref_is_head(ref)) { | ||
2793 | struct btrfs_delayed_ref_head *head; | ||
2794 | |||
2795 | head = btrfs_delayed_node_to_head(ref); | ||
2796 | mutex_lock(&head->mutex); | ||
2797 | kfree(head->extent_op); | ||
2798 | delayed_refs->num_heads--; | ||
2799 | if (list_empty(&head->cluster)) | ||
2800 | delayed_refs->num_heads_ready--; | ||
2801 | list_del_init(&head->cluster); | ||
2802 | mutex_unlock(&head->mutex); | ||
2803 | } | ||
2804 | |||
2805 | spin_unlock(&delayed_refs->lock); | ||
2806 | btrfs_put_delayed_ref(ref); | ||
2807 | |||
2808 | cond_resched(); | ||
2809 | spin_lock(&delayed_refs->lock); | ||
2810 | } | ||
2811 | |||
2812 | spin_unlock(&delayed_refs->lock); | ||
2813 | |||
2814 | return ret; | ||
2815 | } | ||
2816 | |||
2817 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) | ||
2818 | { | ||
2819 | struct btrfs_pending_snapshot *snapshot; | ||
2820 | struct list_head splice; | ||
2821 | |||
2822 | INIT_LIST_HEAD(&splice); | ||
2823 | |||
2824 | list_splice_init(&t->pending_snapshots, &splice); | ||
2825 | |||
2826 | while (!list_empty(&splice)) { | ||
2827 | snapshot = list_entry(splice.next, | ||
2828 | struct btrfs_pending_snapshot, | ||
2829 | list); | ||
2830 | |||
2831 | list_del_init(&snapshot->list); | ||
2832 | |||
2833 | kfree(snapshot); | ||
2834 | } | ||
2835 | |||
2836 | return 0; | ||
2837 | } | ||
2838 | |||
2839 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | ||
2840 | { | ||
2841 | struct btrfs_inode *btrfs_inode; | ||
2842 | struct list_head splice; | ||
2843 | |||
2844 | INIT_LIST_HEAD(&splice); | ||
2845 | |||
2846 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | ||
2847 | |||
2848 | spin_lock(&root->fs_info->delalloc_lock); | ||
2849 | |||
2850 | while (!list_empty(&splice)) { | ||
2851 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
2852 | delalloc_inodes); | ||
2853 | |||
2854 | list_del_init(&btrfs_inode->delalloc_inodes); | ||
2855 | |||
2856 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
2857 | } | ||
2858 | |||
2859 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2860 | |||
2861 | return 0; | ||
2862 | } | ||
2863 | |||
2864 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
2865 | struct extent_io_tree *dirty_pages, | ||
2866 | int mark) | ||
2867 | { | ||
2868 | int ret; | ||
2869 | struct page *page; | ||
2870 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
2871 | struct extent_buffer *eb; | ||
2872 | u64 start = 0; | ||
2873 | u64 end; | ||
2874 | u64 offset; | ||
2875 | unsigned long index; | ||
2876 | |||
2877 | while (1) { | ||
2878 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | ||
2879 | mark); | ||
2880 | if (ret) | ||
2881 | break; | ||
2882 | |||
2883 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); | ||
2884 | while (start <= end) { | ||
2885 | index = start >> PAGE_CACHE_SHIFT; | ||
2886 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
2887 | page = find_get_page(btree_inode->i_mapping, index); | ||
2888 | if (!page) | ||
2889 | continue; | ||
2890 | offset = page_offset(page); | ||
2891 | |||
2892 | spin_lock(&dirty_pages->buffer_lock); | ||
2893 | eb = radix_tree_lookup( | ||
2894 | &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, | ||
2895 | offset >> PAGE_CACHE_SHIFT); | ||
2896 | spin_unlock(&dirty_pages->buffer_lock); | ||
2897 | if (eb) { | ||
2898 | ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, | ||
2899 | &eb->bflags); | ||
2900 | atomic_set(&eb->refs, 1); | ||
2901 | } | ||
2902 | if (PageWriteback(page)) | ||
2903 | end_page_writeback(page); | ||
2904 | |||
2905 | lock_page(page); | ||
2906 | if (PageDirty(page)) { | ||
2907 | clear_page_dirty_for_io(page); | ||
2908 | spin_lock_irq(&page->mapping->tree_lock); | ||
2909 | radix_tree_tag_clear(&page->mapping->page_tree, | ||
2910 | page_index(page), | ||
2911 | PAGECACHE_TAG_DIRTY); | ||
2912 | spin_unlock_irq(&page->mapping->tree_lock); | ||
2913 | } | ||
2914 | |||
2915 | page->mapping->a_ops->invalidatepage(page, 0); | ||
2916 | unlock_page(page); | ||
2917 | } | ||
2918 | } | ||
2919 | |||
2920 | return ret; | ||
2921 | } | ||
2922 | |||
2923 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
2924 | struct extent_io_tree *pinned_extents) | ||
2925 | { | ||
2926 | struct extent_io_tree *unpin; | ||
2927 | u64 start; | ||
2928 | u64 end; | ||
2929 | int ret; | ||
2930 | |||
2931 | unpin = pinned_extents; | ||
2932 | while (1) { | ||
2933 | ret = find_first_extent_bit(unpin, 0, &start, &end, | ||
2934 | EXTENT_DIRTY); | ||
2935 | if (ret) | ||
2936 | break; | ||
2937 | |||
2938 | /* opt_discard */ | ||
2939 | ret = btrfs_error_discard_extent(root, start, end + 1 - start); | ||
2940 | |||
2941 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | ||
2942 | btrfs_error_unpin_extent_range(root, start, end); | ||
2943 | cond_resched(); | ||
2944 | } | ||
2945 | |||
2946 | return 0; | ||
2947 | } | ||
2948 | |||
2949 | static int btrfs_cleanup_transaction(struct btrfs_root *root) | ||
2950 | { | ||
2951 | struct btrfs_transaction *t; | ||
2952 | LIST_HEAD(list); | ||
2953 | |||
2954 | WARN_ON(1); | ||
2955 | |||
2956 | mutex_lock(&root->fs_info->trans_mutex); | ||
2957 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | ||
2958 | |||
2959 | list_splice_init(&root->fs_info->trans_list, &list); | ||
2960 | while (!list_empty(&list)) { | ||
2961 | t = list_entry(list.next, struct btrfs_transaction, list); | ||
2962 | if (!t) | ||
2963 | break; | ||
2964 | |||
2965 | btrfs_destroy_ordered_operations(root); | ||
2966 | |||
2967 | btrfs_destroy_ordered_extents(root); | ||
2968 | |||
2969 | btrfs_destroy_delayed_refs(t, root); | ||
2970 | |||
2971 | btrfs_block_rsv_release(root, | ||
2972 | &root->fs_info->trans_block_rsv, | ||
2973 | t->dirty_pages.dirty_bytes); | ||
2974 | |||
2975 | /* FIXME: cleanup wait for commit */ | ||
2976 | t->in_commit = 1; | ||
2977 | t->blocked = 1; | ||
2978 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) | ||
2979 | wake_up(&root->fs_info->transaction_blocked_wait); | ||
2980 | |||
2981 | t->blocked = 0; | ||
2982 | if (waitqueue_active(&root->fs_info->transaction_wait)) | ||
2983 | wake_up(&root->fs_info->transaction_wait); | ||
2984 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2985 | |||
2986 | mutex_lock(&root->fs_info->trans_mutex); | ||
2987 | t->commit_done = 1; | ||
2988 | if (waitqueue_active(&t->commit_wait)) | ||
2989 | wake_up(&t->commit_wait); | ||
2990 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2991 | |||
2992 | mutex_lock(&root->fs_info->trans_mutex); | ||
2993 | |||
2994 | btrfs_destroy_pending_snapshots(t); | ||
2995 | |||
2996 | btrfs_destroy_delalloc_inodes(root); | ||
2997 | |||
2998 | spin_lock(&root->fs_info->new_trans_lock); | ||
2999 | root->fs_info->running_transaction = NULL; | ||
3000 | spin_unlock(&root->fs_info->new_trans_lock); | ||
3001 | |||
3002 | btrfs_destroy_marked_extents(root, &t->dirty_pages, | ||
3003 | EXTENT_DIRTY); | ||
3004 | |||
3005 | btrfs_destroy_pinned_extent(root, | ||
3006 | root->fs_info->pinned_extents); | ||
3007 | |||
3008 | t->use_count = 0; | ||
3009 | list_del_init(&t->list); | ||
3010 | memset(t, 0, sizeof(*t)); | ||
3011 | kmem_cache_free(btrfs_transaction_cachep, t); | ||
3012 | } | ||
3013 | |||
3014 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | ||
3015 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3016 | |||
3017 | return 0; | ||
3018 | } | ||
3019 | |||
2595 | static struct extent_io_ops btree_extent_io_ops = { | 3020 | static struct extent_io_ops btree_extent_io_ops = { |
2596 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3021 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
2597 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3022 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 88e825a0bf21..07b20dc2fd95 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -52,6 +52,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
52 | struct btrfs_root *root, int max_mirrors); | 52 | struct btrfs_root *root, int max_mirrors); |
53 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); | 53 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); |
54 | int btrfs_commit_super(struct btrfs_root *root); | 54 | int btrfs_commit_super(struct btrfs_root *root); |
55 | int btrfs_error_commit_super(struct btrfs_root *root); | ||
55 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 56 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
56 | u64 bytenr, u32 blocksize); | 57 | u64 bytenr, u32 blocksize); |
57 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | 58 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 951ef09b82f4..9786963b07e5 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -65,7 +65,6 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
65 | { | 65 | { |
66 | struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; | 66 | struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; |
67 | struct btrfs_root *root; | 67 | struct btrfs_root *root; |
68 | struct dentry *dentry; | ||
69 | struct inode *inode; | 68 | struct inode *inode; |
70 | struct btrfs_key key; | 69 | struct btrfs_key key; |
71 | int index; | 70 | int index; |
@@ -108,10 +107,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
108 | return ERR_PTR(-ESTALE); | 107 | return ERR_PTR(-ESTALE); |
109 | } | 108 | } |
110 | 109 | ||
111 | dentry = d_obtain_alias(inode); | 110 | return d_obtain_alias(inode); |
112 | if (!IS_ERR(dentry)) | ||
113 | dentry->d_op = &btrfs_dentry_operations; | ||
114 | return dentry; | ||
115 | fail: | 111 | fail: |
116 | srcu_read_unlock(&fs_info->subvol_srcu, index); | 112 | srcu_read_unlock(&fs_info->subvol_srcu, index); |
117 | return ERR_PTR(err); | 113 | return ERR_PTR(err); |
@@ -166,7 +162,6 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | |||
166 | static struct dentry *btrfs_get_parent(struct dentry *child) | 162 | static struct dentry *btrfs_get_parent(struct dentry *child) |
167 | { | 163 | { |
168 | struct inode *dir = child->d_inode; | 164 | struct inode *dir = child->d_inode; |
169 | static struct dentry *dentry; | ||
170 | struct btrfs_root *root = BTRFS_I(dir)->root; | 165 | struct btrfs_root *root = BTRFS_I(dir)->root; |
171 | struct btrfs_path *path; | 166 | struct btrfs_path *path; |
172 | struct extent_buffer *leaf; | 167 | struct extent_buffer *leaf; |
@@ -223,18 +218,91 @@ static struct dentry *btrfs_get_parent(struct dentry *child) | |||
223 | 218 | ||
224 | key.type = BTRFS_INODE_ITEM_KEY; | 219 | key.type = BTRFS_INODE_ITEM_KEY; |
225 | key.offset = 0; | 220 | key.offset = 0; |
226 | dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); | 221 | return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); |
227 | if (!IS_ERR(dentry)) | ||
228 | dentry->d_op = &btrfs_dentry_operations; | ||
229 | return dentry; | ||
230 | fail: | 222 | fail: |
231 | btrfs_free_path(path); | 223 | btrfs_free_path(path); |
232 | return ERR_PTR(ret); | 224 | return ERR_PTR(ret); |
233 | } | 225 | } |
234 | 226 | ||
227 | static int btrfs_get_name(struct dentry *parent, char *name, | ||
228 | struct dentry *child) | ||
229 | { | ||
230 | struct inode *inode = child->d_inode; | ||
231 | struct inode *dir = parent->d_inode; | ||
232 | struct btrfs_path *path; | ||
233 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
234 | struct btrfs_inode_ref *iref; | ||
235 | struct btrfs_root_ref *rref; | ||
236 | struct extent_buffer *leaf; | ||
237 | unsigned long name_ptr; | ||
238 | struct btrfs_key key; | ||
239 | int name_len; | ||
240 | int ret; | ||
241 | |||
242 | if (!dir || !inode) | ||
243 | return -EINVAL; | ||
244 | |||
245 | if (!S_ISDIR(dir->i_mode)) | ||
246 | return -EINVAL; | ||
247 | |||
248 | path = btrfs_alloc_path(); | ||
249 | if (!path) | ||
250 | return -ENOMEM; | ||
251 | path->leave_spinning = 1; | ||
252 | |||
253 | if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
254 | key.objectid = BTRFS_I(inode)->root->root_key.objectid; | ||
255 | key.type = BTRFS_ROOT_BACKREF_KEY; | ||
256 | key.offset = (u64)-1; | ||
257 | root = root->fs_info->tree_root; | ||
258 | } else { | ||
259 | key.objectid = inode->i_ino; | ||
260 | key.offset = dir->i_ino; | ||
261 | key.type = BTRFS_INODE_REF_KEY; | ||
262 | } | ||
263 | |||
264 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
265 | if (ret < 0) { | ||
266 | btrfs_free_path(path); | ||
267 | return ret; | ||
268 | } else if (ret > 0) { | ||
269 | if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
270 | path->slots[0]--; | ||
271 | } else { | ||
272 | btrfs_free_path(path); | ||
273 | return -ENOENT; | ||
274 | } | ||
275 | } | ||
276 | leaf = path->nodes[0]; | ||
277 | |||
278 | if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
279 | rref = btrfs_item_ptr(leaf, path->slots[0], | ||
280 | struct btrfs_root_ref); | ||
281 | name_ptr = (unsigned long)(rref + 1); | ||
282 | name_len = btrfs_root_ref_name_len(leaf, rref); | ||
283 | } else { | ||
284 | iref = btrfs_item_ptr(leaf, path->slots[0], | ||
285 | struct btrfs_inode_ref); | ||
286 | name_ptr = (unsigned long)(iref + 1); | ||
287 | name_len = btrfs_inode_ref_name_len(leaf, iref); | ||
288 | } | ||
289 | |||
290 | read_extent_buffer(leaf, name, name_ptr, name_len); | ||
291 | btrfs_free_path(path); | ||
292 | |||
293 | /* | ||
294 | * have to add the null termination to make sure that reconnect_path | ||
295 | * gets the right len for strlen | ||
296 | */ | ||
297 | name[name_len] = '\0'; | ||
298 | |||
299 | return 0; | ||
300 | } | ||
301 | |||
235 | const struct export_operations btrfs_export_ops = { | 302 | const struct export_operations btrfs_export_ops = { |
236 | .encode_fh = btrfs_encode_fh, | 303 | .encode_fh = btrfs_encode_fh, |
237 | .fh_to_dentry = btrfs_fh_to_dentry, | 304 | .fh_to_dentry = btrfs_fh_to_dentry, |
238 | .fh_to_parent = btrfs_fh_to_parent, | 305 | .fh_to_parent = btrfs_fh_to_parent, |
239 | .get_parent = btrfs_get_parent, | 306 | .get_parent = btrfs_get_parent, |
307 | .get_name = btrfs_get_name, | ||
240 | }; | 308 | }; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0c097f3aec41..b55269340cec 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -429,6 +429,7 @@ err: | |||
429 | 429 | ||
430 | static int cache_block_group(struct btrfs_block_group_cache *cache, | 430 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
431 | struct btrfs_trans_handle *trans, | 431 | struct btrfs_trans_handle *trans, |
432 | struct btrfs_root *root, | ||
432 | int load_cache_only) | 433 | int load_cache_only) |
433 | { | 434 | { |
434 | struct btrfs_fs_info *fs_info = cache->fs_info; | 435 | struct btrfs_fs_info *fs_info = cache->fs_info; |
@@ -442,9 +443,12 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
442 | 443 | ||
443 | /* | 444 | /* |
444 | * We can't do the read from on-disk cache during a commit since we need | 445 | * We can't do the read from on-disk cache during a commit since we need |
445 | * to have the normal tree locking. | 446 | * to have the normal tree locking. Also if we are currently trying to |
447 | * allocate blocks for the tree root we can't do the fast caching since | ||
448 | * we likely hold important locks. | ||
446 | */ | 449 | */ |
447 | if (!trans->transaction->in_commit) { | 450 | if (!trans->transaction->in_commit && |
451 | (root && root != root->fs_info->tree_root)) { | ||
448 | spin_lock(&cache->lock); | 452 | spin_lock(&cache->lock); |
449 | if (cache->cached != BTRFS_CACHE_NO) { | 453 | if (cache->cached != BTRFS_CACHE_NO) { |
450 | spin_unlock(&cache->lock); | 454 | spin_unlock(&cache->lock); |
@@ -2741,6 +2745,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, | |||
2741 | struct btrfs_root *root = block_group->fs_info->tree_root; | 2745 | struct btrfs_root *root = block_group->fs_info->tree_root; |
2742 | struct inode *inode = NULL; | 2746 | struct inode *inode = NULL; |
2743 | u64 alloc_hint = 0; | 2747 | u64 alloc_hint = 0; |
2748 | int dcs = BTRFS_DC_ERROR; | ||
2744 | int num_pages = 0; | 2749 | int num_pages = 0; |
2745 | int retries = 0; | 2750 | int retries = 0; |
2746 | int ret = 0; | 2751 | int ret = 0; |
@@ -2795,6 +2800,8 @@ again: | |||
2795 | 2800 | ||
2796 | spin_lock(&block_group->lock); | 2801 | spin_lock(&block_group->lock); |
2797 | if (block_group->cached != BTRFS_CACHE_FINISHED) { | 2802 | if (block_group->cached != BTRFS_CACHE_FINISHED) { |
2803 | /* We're not cached, don't bother trying to write stuff out */ | ||
2804 | dcs = BTRFS_DC_WRITTEN; | ||
2798 | spin_unlock(&block_group->lock); | 2805 | spin_unlock(&block_group->lock); |
2799 | goto out_put; | 2806 | goto out_put; |
2800 | } | 2807 | } |
@@ -2821,6 +2828,8 @@ again: | |||
2821 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, | 2828 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, |
2822 | num_pages, num_pages, | 2829 | num_pages, num_pages, |
2823 | &alloc_hint); | 2830 | &alloc_hint); |
2831 | if (!ret) | ||
2832 | dcs = BTRFS_DC_SETUP; | ||
2824 | btrfs_free_reserved_data_space(inode, num_pages); | 2833 | btrfs_free_reserved_data_space(inode, num_pages); |
2825 | out_put: | 2834 | out_put: |
2826 | iput(inode); | 2835 | iput(inode); |
@@ -2828,10 +2837,7 @@ out_free: | |||
2828 | btrfs_release_path(root, path); | 2837 | btrfs_release_path(root, path); |
2829 | out: | 2838 | out: |
2830 | spin_lock(&block_group->lock); | 2839 | spin_lock(&block_group->lock); |
2831 | if (ret) | 2840 | block_group->disk_cache_state = dcs; |
2832 | block_group->disk_cache_state = BTRFS_DC_ERROR; | ||
2833 | else | ||
2834 | block_group->disk_cache_state = BTRFS_DC_SETUP; | ||
2835 | spin_unlock(&block_group->lock); | 2841 | spin_unlock(&block_group->lock); |
2836 | 2842 | ||
2837 | return ret; | 2843 | return ret; |
@@ -3037,7 +3043,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
3037 | 3043 | ||
3038 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | 3044 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) |
3039 | { | 3045 | { |
3040 | u64 num_devices = root->fs_info->fs_devices->rw_devices; | 3046 | /* |
3047 | * we add in the count of missing devices because we want | ||
3048 | * to make sure that any RAID levels on a degraded FS | ||
3049 | * continue to be honored. | ||
3050 | */ | ||
3051 | u64 num_devices = root->fs_info->fs_devices->rw_devices + | ||
3052 | root->fs_info->fs_devices->missing_devices; | ||
3041 | 3053 | ||
3042 | if (num_devices == 1) | 3054 | if (num_devices == 1) |
3043 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); | 3055 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); |
@@ -3077,7 +3089,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | |||
3077 | return btrfs_reduce_alloc_profile(root, flags); | 3089 | return btrfs_reduce_alloc_profile(root, flags); |
3078 | } | 3090 | } |
3079 | 3091 | ||
3080 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | 3092 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
3081 | { | 3093 | { |
3082 | u64 flags; | 3094 | u64 flags; |
3083 | 3095 | ||
@@ -3149,8 +3161,12 @@ alloc: | |||
3149 | bytes + 2 * 1024 * 1024, | 3161 | bytes + 2 * 1024 * 1024, |
3150 | alloc_target, 0); | 3162 | alloc_target, 0); |
3151 | btrfs_end_transaction(trans, root); | 3163 | btrfs_end_transaction(trans, root); |
3152 | if (ret < 0) | 3164 | if (ret < 0) { |
3153 | return ret; | 3165 | if (ret != -ENOSPC) |
3166 | return ret; | ||
3167 | else | ||
3168 | goto commit_trans; | ||
3169 | } | ||
3154 | 3170 | ||
3155 | if (!data_sinfo) { | 3171 | if (!data_sinfo) { |
3156 | btrfs_set_inode_space_info(root, inode); | 3172 | btrfs_set_inode_space_info(root, inode); |
@@ -3161,6 +3177,7 @@ alloc: | |||
3161 | spin_unlock(&data_sinfo->lock); | 3177 | spin_unlock(&data_sinfo->lock); |
3162 | 3178 | ||
3163 | /* commit the current transaction and try again */ | 3179 | /* commit the current transaction and try again */ |
3180 | commit_trans: | ||
3164 | if (!committed && !root->fs_info->open_ioctl_trans) { | 3181 | if (!committed && !root->fs_info->open_ioctl_trans) { |
3165 | committed = 1; | 3182 | committed = 1; |
3166 | trans = btrfs_join_transaction(root, 1); | 3183 | trans = btrfs_join_transaction(root, 1); |
@@ -3412,7 +3429,7 @@ again: | |||
3412 | * our reservation. | 3429 | * our reservation. |
3413 | */ | 3430 | */ |
3414 | if (unused <= space_info->total_bytes) { | 3431 | if (unused <= space_info->total_bytes) { |
3415 | unused -= space_info->total_bytes; | 3432 | unused = space_info->total_bytes - unused; |
3416 | if (unused >= num_bytes) { | 3433 | if (unused >= num_bytes) { |
3417 | if (!reserved) | 3434 | if (!reserved) |
3418 | space_info->bytes_reserved += orig_bytes; | 3435 | space_info->bytes_reserved += orig_bytes; |
@@ -3709,11 +3726,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3709 | return 0; | 3726 | return 0; |
3710 | } | 3727 | } |
3711 | 3728 | ||
3712 | WARN_ON(1); | ||
3713 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
3714 | block_rsv->size, block_rsv->reserved, | ||
3715 | block_rsv->freed[0], block_rsv->freed[1]); | ||
3716 | |||
3717 | return -ENOSPC; | 3729 | return -ENOSPC; |
3718 | } | 3730 | } |
3719 | 3731 | ||
@@ -4080,7 +4092,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
4080 | * space back to the block group, otherwise we will leak space. | 4092 | * space back to the block group, otherwise we will leak space. |
4081 | */ | 4093 | */ |
4082 | if (!alloc && cache->cached == BTRFS_CACHE_NO) | 4094 | if (!alloc && cache->cached == BTRFS_CACHE_NO) |
4083 | cache_block_group(cache, trans, 1); | 4095 | cache_block_group(cache, trans, NULL, 1); |
4084 | 4096 | ||
4085 | byte_in_group = bytenr - cache->key.objectid; | 4097 | byte_in_group = bytenr - cache->key.objectid; |
4086 | WARN_ON(byte_in_group > cache->key.offset); | 4098 | WARN_ON(byte_in_group > cache->key.offset); |
@@ -4930,11 +4942,31 @@ search: | |||
4930 | btrfs_get_block_group(block_group); | 4942 | btrfs_get_block_group(block_group); |
4931 | search_start = block_group->key.objectid; | 4943 | search_start = block_group->key.objectid; |
4932 | 4944 | ||
4945 | /* | ||
4946 | * this can happen if we end up cycling through all the | ||
4947 | * raid types, but we want to make sure we only allocate | ||
4948 | * for the proper type. | ||
4949 | */ | ||
4950 | if (!block_group_bits(block_group, data)) { | ||
4951 | u64 extra = BTRFS_BLOCK_GROUP_DUP | | ||
4952 | BTRFS_BLOCK_GROUP_RAID1 | | ||
4953 | BTRFS_BLOCK_GROUP_RAID10; | ||
4954 | |||
4955 | /* | ||
4956 | * if they asked for extra copies and this block group | ||
4957 | * doesn't provide them, bail. This does allow us to | ||
4958 | * fill raid0 from raid1. | ||
4959 | */ | ||
4960 | if ((data & extra) && !(block_group->flags & extra)) | ||
4961 | goto loop; | ||
4962 | } | ||
4963 | |||
4933 | have_block_group: | 4964 | have_block_group: |
4934 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { | 4965 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
4935 | u64 free_percent; | 4966 | u64 free_percent; |
4936 | 4967 | ||
4937 | ret = cache_block_group(block_group, trans, 1); | 4968 | ret = cache_block_group(block_group, trans, |
4969 | orig_root, 1); | ||
4938 | if (block_group->cached == BTRFS_CACHE_FINISHED) | 4970 | if (block_group->cached == BTRFS_CACHE_FINISHED) |
4939 | goto have_block_group; | 4971 | goto have_block_group; |
4940 | 4972 | ||
@@ -4958,7 +4990,8 @@ have_block_group: | |||
4958 | if (loop > LOOP_CACHING_NOWAIT || | 4990 | if (loop > LOOP_CACHING_NOWAIT || |
4959 | (loop > LOOP_FIND_IDEAL && | 4991 | (loop > LOOP_FIND_IDEAL && |
4960 | atomic_read(&space_info->caching_threads) < 2)) { | 4992 | atomic_read(&space_info->caching_threads) < 2)) { |
4961 | ret = cache_block_group(block_group, trans, 0); | 4993 | ret = cache_block_group(block_group, trans, |
4994 | orig_root, 0); | ||
4962 | BUG_ON(ret); | 4995 | BUG_ON(ret); |
4963 | } | 4996 | } |
4964 | found_uncached_bg = true; | 4997 | found_uncached_bg = true; |
@@ -5515,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
5515 | u64 num_bytes = ins->offset; | 5548 | u64 num_bytes = ins->offset; |
5516 | 5549 | ||
5517 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 5550 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
5518 | cache_block_group(block_group, trans, 0); | 5551 | cache_block_group(block_group, trans, NULL, 0); |
5519 | caching_ctl = get_caching_control(block_group); | 5552 | caching_ctl = get_caching_control(block_group); |
5520 | 5553 | ||
5521 | if (!caching_ctl) { | 5554 | if (!caching_ctl) { |
@@ -6300,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6300 | NULL, NULL); | 6333 | NULL, NULL); |
6301 | BUG_ON(ret < 0); | 6334 | BUG_ON(ret < 0); |
6302 | if (ret > 0) { | 6335 | if (ret > 0) { |
6303 | ret = btrfs_del_orphan_item(trans, tree_root, | 6336 | /* if we fail to delete the orphan item this time |
6304 | root->root_key.objectid); | 6337 | * around, it'll get picked up the next time. |
6305 | BUG_ON(ret); | 6338 | * |
6339 | * The most common failure here is just -ENOENT. | ||
6340 | */ | ||
6341 | btrfs_del_orphan_item(trans, tree_root, | ||
6342 | root->root_key.objectid); | ||
6306 | } | 6343 | } |
6307 | } | 6344 | } |
6308 | 6345 | ||
@@ -7878,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
7878 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | | 7915 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | |
7879 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; | 7916 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; |
7880 | 7917 | ||
7881 | num_devices = root->fs_info->fs_devices->rw_devices; | 7918 | /* |
7919 | * we add in the count of missing devices because we want | ||
7920 | * to make sure that any RAID levels on a degraded FS | ||
7921 | * continue to be honored. | ||
7922 | */ | ||
7923 | num_devices = root->fs_info->fs_devices->rw_devices + | ||
7924 | root->fs_info->fs_devices->missing_devices; | ||
7925 | |||
7882 | if (num_devices == 1) { | 7926 | if (num_devices == 1) { |
7883 | stripped |= BTRFS_BLOCK_GROUP_DUP; | 7927 | stripped |= BTRFS_BLOCK_GROUP_DUP; |
7884 | stripped = flags & ~stripped; | 7928 | stripped = flags & ~stripped; |
@@ -7926,13 +7970,14 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) | |||
7926 | 7970 | ||
7927 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 7971 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
7928 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 7972 | sinfo->bytes_may_use + sinfo->bytes_readonly + |
7929 | cache->reserved_pinned + num_bytes < sinfo->total_bytes) { | 7973 | cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { |
7930 | sinfo->bytes_readonly += num_bytes; | 7974 | sinfo->bytes_readonly += num_bytes; |
7931 | sinfo->bytes_reserved += cache->reserved_pinned; | 7975 | sinfo->bytes_reserved += cache->reserved_pinned; |
7932 | cache->reserved_pinned = 0; | 7976 | cache->reserved_pinned = 0; |
7933 | cache->ro = 1; | 7977 | cache->ro = 1; |
7934 | ret = 0; | 7978 | ret = 0; |
7935 | } | 7979 | } |
7980 | |||
7936 | spin_unlock(&cache->lock); | 7981 | spin_unlock(&cache->lock); |
7937 | spin_unlock(&sinfo->lock); | 7982 | spin_unlock(&sinfo->lock); |
7938 | return ret; | 7983 | return ret; |
@@ -7968,6 +8013,62 @@ out: | |||
7968 | return ret; | 8013 | return ret; |
7969 | } | 8014 | } |
7970 | 8015 | ||
8016 | /* | ||
8017 | * helper to account the unused space of all the readonly block group in the | ||
8018 | * list. takes mirrors into account. | ||
8019 | */ | ||
8020 | static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) | ||
8021 | { | ||
8022 | struct btrfs_block_group_cache *block_group; | ||
8023 | u64 free_bytes = 0; | ||
8024 | int factor; | ||
8025 | |||
8026 | list_for_each_entry(block_group, groups_list, list) { | ||
8027 | spin_lock(&block_group->lock); | ||
8028 | |||
8029 | if (!block_group->ro) { | ||
8030 | spin_unlock(&block_group->lock); | ||
8031 | continue; | ||
8032 | } | ||
8033 | |||
8034 | if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
8035 | BTRFS_BLOCK_GROUP_RAID10 | | ||
8036 | BTRFS_BLOCK_GROUP_DUP)) | ||
8037 | factor = 2; | ||
8038 | else | ||
8039 | factor = 1; | ||
8040 | |||
8041 | free_bytes += (block_group->key.offset - | ||
8042 | btrfs_block_group_used(&block_group->item)) * | ||
8043 | factor; | ||
8044 | |||
8045 | spin_unlock(&block_group->lock); | ||
8046 | } | ||
8047 | |||
8048 | return free_bytes; | ||
8049 | } | ||
8050 | |||
8051 | /* | ||
8052 | * helper to account the unused space of all the readonly block group in the | ||
8053 | * space_info. takes mirrors into account. | ||
8054 | */ | ||
8055 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) | ||
8056 | { | ||
8057 | int i; | ||
8058 | u64 free_bytes = 0; | ||
8059 | |||
8060 | spin_lock(&sinfo->lock); | ||
8061 | |||
8062 | for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) | ||
8063 | if (!list_empty(&sinfo->block_groups[i])) | ||
8064 | free_bytes += __btrfs_get_ro_block_group_free_space( | ||
8065 | &sinfo->block_groups[i]); | ||
8066 | |||
8067 | spin_unlock(&sinfo->lock); | ||
8068 | |||
8069 | return free_bytes; | ||
8070 | } | ||
8071 | |||
7971 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 8072 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
7972 | struct btrfs_block_group_cache *cache) | 8073 | struct btrfs_block_group_cache *cache) |
7973 | { | 8074 | { |
@@ -8048,7 +8149,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
8048 | mutex_lock(&root->fs_info->chunk_mutex); | 8149 | mutex_lock(&root->fs_info->chunk_mutex); |
8049 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 8150 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
8050 | u64 min_free = btrfs_block_group_used(&block_group->item); | 8151 | u64 min_free = btrfs_block_group_used(&block_group->item); |
8051 | u64 dev_offset, max_avail; | 8152 | u64 dev_offset; |
8052 | 8153 | ||
8053 | /* | 8154 | /* |
8054 | * check to make sure we can actually find a chunk with enough | 8155 | * check to make sure we can actually find a chunk with enough |
@@ -8056,7 +8157,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
8056 | */ | 8157 | */ |
8057 | if (device->total_bytes > device->bytes_used + min_free) { | 8158 | if (device->total_bytes > device->bytes_used + min_free) { |
8058 | ret = find_free_dev_extent(NULL, device, min_free, | 8159 | ret = find_free_dev_extent(NULL, device, min_free, |
8059 | &dev_offset, &max_avail); | 8160 | &dev_offset, NULL); |
8060 | if (!ret) | 8161 | if (!ret) |
8061 | break; | 8162 | break; |
8062 | ret = -1; | 8163 | ret = -1; |
@@ -8247,7 +8348,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
8247 | break; | 8348 | break; |
8248 | if (ret != 0) | 8349 | if (ret != 0) |
8249 | goto error; | 8350 | goto error; |
8250 | |||
8251 | leaf = path->nodes[0]; | 8351 | leaf = path->nodes[0]; |
8252 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 8352 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
8253 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 8353 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
@@ -8541,3 +8641,14 @@ out: | |||
8541 | btrfs_free_path(path); | 8641 | btrfs_free_path(path); |
8542 | return ret; | 8642 | return ret; |
8543 | } | 8643 | } |
8644 | |||
8645 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | ||
8646 | { | ||
8647 | return unpin_extent_range(root, start, end); | ||
8648 | } | ||
8649 | |||
8650 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
8651 | u64 num_bytes) | ||
8652 | { | ||
8653 | return btrfs_discard_extent(root, bytenr, num_bytes); | ||
8654 | } | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index eac10e3260a9..2e993cf1766e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1828,9 +1828,9 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) | |||
1828 | bio_put(bio); | 1828 | bio_put(bio); |
1829 | } | 1829 | } |
1830 | 1830 | ||
1831 | static struct bio * | 1831 | struct bio * |
1832 | extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | 1832 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, |
1833 | gfp_t gfp_flags) | 1833 | gfp_t gfp_flags) |
1834 | { | 1834 | { |
1835 | struct bio *bio; | 1835 | struct bio *bio; |
1836 | 1836 | ||
@@ -1919,7 +1919,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
1919 | else | 1919 | else |
1920 | nr = bio_get_nr_vecs(bdev); | 1920 | nr = bio_get_nr_vecs(bdev); |
1921 | 1921 | ||
1922 | bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); | 1922 | bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); |
1923 | 1923 | ||
1924 | bio_add_page(bio, page, page_size, offset); | 1924 | bio_add_page(bio, page, page_size, offset); |
1925 | bio->bi_end_io = end_io_func; | 1925 | bio->bi_end_io = end_io_func; |
@@ -2028,8 +2028,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2028 | BUG_ON(extent_map_end(em) <= cur); | 2028 | BUG_ON(extent_map_end(em) <= cur); |
2029 | BUG_ON(end < cur); | 2029 | BUG_ON(end < cur); |
2030 | 2030 | ||
2031 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2031 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
2032 | this_bio_flag = EXTENT_BIO_COMPRESSED; | 2032 | this_bio_flag = EXTENT_BIO_COMPRESSED; |
2033 | extent_set_compress_type(&this_bio_flag, | ||
2034 | em->compress_type); | ||
2035 | } | ||
2033 | 2036 | ||
2034 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 2037 | iosize = min(extent_map_end(em) - cur, end - cur + 1); |
2035 | cur_end = min(extent_map_end(em) - 1, end); | 2038 | cur_end = min(extent_map_end(em) - 1, end); |
@@ -2901,21 +2904,53 @@ out: | |||
2901 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2904 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
2902 | __u64 start, __u64 len, get_extent_t *get_extent) | 2905 | __u64 start, __u64 len, get_extent_t *get_extent) |
2903 | { | 2906 | { |
2904 | int ret; | 2907 | int ret = 0; |
2905 | u64 off = start; | 2908 | u64 off = start; |
2906 | u64 max = start + len; | 2909 | u64 max = start + len; |
2907 | u32 flags = 0; | 2910 | u32 flags = 0; |
2911 | u32 found_type; | ||
2912 | u64 last; | ||
2908 | u64 disko = 0; | 2913 | u64 disko = 0; |
2914 | struct btrfs_key found_key; | ||
2909 | struct extent_map *em = NULL; | 2915 | struct extent_map *em = NULL; |
2910 | struct extent_state *cached_state = NULL; | 2916 | struct extent_state *cached_state = NULL; |
2917 | struct btrfs_path *path; | ||
2918 | struct btrfs_file_extent_item *item; | ||
2911 | int end = 0; | 2919 | int end = 0; |
2912 | u64 em_start = 0, em_len = 0; | 2920 | u64 em_start = 0, em_len = 0; |
2913 | unsigned long emflags; | 2921 | unsigned long emflags; |
2914 | ret = 0; | 2922 | int hole = 0; |
2915 | 2923 | ||
2916 | if (len == 0) | 2924 | if (len == 0) |
2917 | return -EINVAL; | 2925 | return -EINVAL; |
2918 | 2926 | ||
2927 | path = btrfs_alloc_path(); | ||
2928 | if (!path) | ||
2929 | return -ENOMEM; | ||
2930 | path->leave_spinning = 1; | ||
2931 | |||
2932 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, | ||
2933 | path, inode->i_ino, -1, 0); | ||
2934 | if (ret < 0) { | ||
2935 | btrfs_free_path(path); | ||
2936 | return ret; | ||
2937 | } | ||
2938 | WARN_ON(!ret); | ||
2939 | path->slots[0]--; | ||
2940 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
2941 | struct btrfs_file_extent_item); | ||
2942 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); | ||
2943 | found_type = btrfs_key_type(&found_key); | ||
2944 | |||
2945 | /* No extents, just return */ | ||
2946 | if (found_key.objectid != inode->i_ino || | ||
2947 | found_type != BTRFS_EXTENT_DATA_KEY) { | ||
2948 | btrfs_free_path(path); | ||
2949 | return 0; | ||
2950 | } | ||
2951 | last = found_key.offset; | ||
2952 | btrfs_free_path(path); | ||
2953 | |||
2919 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, | 2954 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, |
2920 | &cached_state, GFP_NOFS); | 2955 | &cached_state, GFP_NOFS); |
2921 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 2956 | em = get_extent(inode, NULL, 0, off, max - off, 0); |
@@ -2925,11 +2960,18 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2925 | ret = PTR_ERR(em); | 2960 | ret = PTR_ERR(em); |
2926 | goto out; | 2961 | goto out; |
2927 | } | 2962 | } |
2963 | |||
2928 | while (!end) { | 2964 | while (!end) { |
2965 | hole = 0; | ||
2929 | off = em->start + em->len; | 2966 | off = em->start + em->len; |
2930 | if (off >= max) | 2967 | if (off >= max) |
2931 | end = 1; | 2968 | end = 1; |
2932 | 2969 | ||
2970 | if (em->block_start == EXTENT_MAP_HOLE) { | ||
2971 | hole = 1; | ||
2972 | goto next; | ||
2973 | } | ||
2974 | |||
2933 | em_start = em->start; | 2975 | em_start = em->start; |
2934 | em_len = em->len; | 2976 | em_len = em->len; |
2935 | 2977 | ||
@@ -2939,8 +2981,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2939 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { | 2981 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { |
2940 | end = 1; | 2982 | end = 1; |
2941 | flags |= FIEMAP_EXTENT_LAST; | 2983 | flags |= FIEMAP_EXTENT_LAST; |
2942 | } else if (em->block_start == EXTENT_MAP_HOLE) { | ||
2943 | flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
2944 | } else if (em->block_start == EXTENT_MAP_INLINE) { | 2984 | } else if (em->block_start == EXTENT_MAP_INLINE) { |
2945 | flags |= (FIEMAP_EXTENT_DATA_INLINE | | 2985 | flags |= (FIEMAP_EXTENT_DATA_INLINE | |
2946 | FIEMAP_EXTENT_NOT_ALIGNED); | 2986 | FIEMAP_EXTENT_NOT_ALIGNED); |
@@ -2953,10 +2993,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2953 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2993 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
2954 | flags |= FIEMAP_EXTENT_ENCODED; | 2994 | flags |= FIEMAP_EXTENT_ENCODED; |
2955 | 2995 | ||
2996 | next: | ||
2956 | emflags = em->flags; | 2997 | emflags = em->flags; |
2957 | free_extent_map(em); | 2998 | free_extent_map(em); |
2958 | em = NULL; | 2999 | em = NULL; |
2959 | |||
2960 | if (!end) { | 3000 | if (!end) { |
2961 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 3001 | em = get_extent(inode, NULL, 0, off, max - off, 0); |
2962 | if (!em) | 3002 | if (!em) |
@@ -2967,15 +3007,23 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2967 | } | 3007 | } |
2968 | emflags = em->flags; | 3008 | emflags = em->flags; |
2969 | } | 3009 | } |
3010 | |||
2970 | if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { | 3011 | if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { |
2971 | flags |= FIEMAP_EXTENT_LAST; | 3012 | flags |= FIEMAP_EXTENT_LAST; |
2972 | end = 1; | 3013 | end = 1; |
2973 | } | 3014 | } |
2974 | 3015 | ||
2975 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | 3016 | if (em_start == last) { |
2976 | em_len, flags); | 3017 | flags |= FIEMAP_EXTENT_LAST; |
2977 | if (ret) | 3018 | end = 1; |
2978 | goto out_free; | 3019 | } |
3020 | |||
3021 | if (!hole) { | ||
3022 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | ||
3023 | em_len, flags); | ||
3024 | if (ret) | ||
3025 | goto out_free; | ||
3026 | } | ||
2979 | } | 3027 | } |
2980 | out_free: | 3028 | out_free: |
2981 | free_extent_map(em); | 3029 | free_extent_map(em); |
@@ -3027,6 +3075,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3027 | #endif | 3075 | #endif |
3028 | 3076 | ||
3029 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); | 3077 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); |
3078 | if (eb == NULL) | ||
3079 | return NULL; | ||
3030 | eb->start = start; | 3080 | eb->start = start; |
3031 | eb->len = len; | 3081 | eb->len = len; |
3032 | spin_lock_init(&eb->lock); | 3082 | spin_lock_init(&eb->lock); |
@@ -3836,8 +3886,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) | |||
3836 | 3886 | ||
3837 | spin_lock(&tree->buffer_lock); | 3887 | spin_lock(&tree->buffer_lock); |
3838 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); | 3888 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3839 | if (!eb) | 3889 | if (!eb) { |
3840 | goto out; | 3890 | spin_unlock(&tree->buffer_lock); |
3891 | return ret; | ||
3892 | } | ||
3841 | 3893 | ||
3842 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | 3894 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { |
3843 | ret = 0; | 3895 | ret = 0; |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 1c6d4f342ef7..7083cfafd061 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -20,8 +20,12 @@ | |||
20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | 21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) |
22 | 22 | ||
23 | /* flags for bio submission */ | 23 | /* |
24 | * flags for bio submission. The high bits indicate the compression | ||
25 | * type for this bio | ||
26 | */ | ||
24 | #define EXTENT_BIO_COMPRESSED 1 | 27 | #define EXTENT_BIO_COMPRESSED 1 |
28 | #define EXTENT_BIO_FLAG_SHIFT 16 | ||
25 | 29 | ||
26 | /* these are bit numbers for test/set bit */ | 30 | /* these are bit numbers for test/set bit */ |
27 | #define EXTENT_BUFFER_UPTODATE 0 | 31 | #define EXTENT_BUFFER_UPTODATE 0 |
@@ -135,6 +139,17 @@ struct extent_buffer { | |||
135 | wait_queue_head_t lock_wq; | 139 | wait_queue_head_t lock_wq; |
136 | }; | 140 | }; |
137 | 141 | ||
142 | static inline void extent_set_compress_type(unsigned long *bio_flags, | ||
143 | int compress_type) | ||
144 | { | ||
145 | *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; | ||
146 | } | ||
147 | |||
148 | static inline int extent_compress_type(unsigned long bio_flags) | ||
149 | { | ||
150 | return bio_flags >> EXTENT_BIO_FLAG_SHIFT; | ||
151 | } | ||
152 | |||
138 | struct extent_map_tree; | 153 | struct extent_map_tree; |
139 | 154 | ||
140 | static inline struct extent_state *extent_state_next(struct extent_state *state) | 155 | static inline struct extent_state *extent_state_next(struct extent_state *state) |
@@ -310,4 +325,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
310 | struct extent_io_tree *tree, | 325 | struct extent_io_tree *tree, |
311 | u64 start, u64 end, struct page *locked_page, | 326 | u64 start, u64 end, struct page *locked_page, |
312 | unsigned long op); | 327 | unsigned long op); |
328 | struct bio * | ||
329 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | ||
330 | gfp_t gfp_flags); | ||
313 | #endif | 331 | #endif |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 23cb8da3ff66..b0e1fce12530 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/spinlock.h> | 4 | #include <linux/spinlock.h> |
5 | #include <linux/hardirq.h> | 5 | #include <linux/hardirq.h> |
6 | #include "ctree.h" | ||
6 | #include "extent_map.h" | 7 | #include "extent_map.h" |
7 | 8 | ||
8 | 9 | ||
@@ -54,6 +55,7 @@ struct extent_map *alloc_extent_map(gfp_t mask) | |||
54 | return em; | 55 | return em; |
55 | em->in_tree = 0; | 56 | em->in_tree = 0; |
56 | em->flags = 0; | 57 | em->flags = 0; |
58 | em->compress_type = BTRFS_COMPRESS_NONE; | ||
57 | atomic_set(&em->refs, 1); | 59 | atomic_set(&em->refs, 1); |
58 | return em; | 60 | return em; |
59 | } | 61 | } |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index ab6d74b6e647..28b44dbd1e35 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
@@ -26,7 +26,8 @@ struct extent_map { | |||
26 | unsigned long flags; | 26 | unsigned long flags; |
27 | struct block_device *bdev; | 27 | struct block_device *bdev; |
28 | atomic_t refs; | 28 | atomic_t refs; |
29 | int in_tree; | 29 | unsigned int in_tree:1; |
30 | unsigned int compress_type:4; | ||
30 | }; | 31 | }; |
31 | 32 | ||
32 | struct extent_map_tree { | 33 | struct extent_map_tree { |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e354c33df082..c800d58f3013 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/string.h> | 24 | #include <linux/string.h> |
25 | #include <linux/backing-dev.h> | 25 | #include <linux/backing-dev.h> |
26 | #include <linux/mpage.h> | 26 | #include <linux/mpage.h> |
27 | #include <linux/falloc.h> | ||
27 | #include <linux/swap.h> | 28 | #include <linux/swap.h> |
28 | #include <linux/writeback.h> | 29 | #include <linux/writeback.h> |
29 | #include <linux/statfs.h> | 30 | #include <linux/statfs.h> |
@@ -48,30 +49,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
48 | struct page **prepared_pages, | 49 | struct page **prepared_pages, |
49 | struct iov_iter *i) | 50 | struct iov_iter *i) |
50 | { | 51 | { |
51 | size_t copied; | 52 | size_t copied = 0; |
52 | int pg = 0; | 53 | int pg = 0; |
53 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 54 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
55 | int total_copied = 0; | ||
54 | 56 | ||
55 | while (write_bytes > 0) { | 57 | while (write_bytes > 0) { |
56 | size_t count = min_t(size_t, | 58 | size_t count = min_t(size_t, |
57 | PAGE_CACHE_SIZE - offset, write_bytes); | 59 | PAGE_CACHE_SIZE - offset, write_bytes); |
58 | struct page *page = prepared_pages[pg]; | 60 | struct page *page = prepared_pages[pg]; |
59 | again: | 61 | /* |
60 | if (unlikely(iov_iter_fault_in_readable(i, count))) | 62 | * Copy data from userspace to the current page |
61 | return -EFAULT; | 63 | * |
62 | 64 | * Disable pagefault to avoid recursive lock since | |
63 | /* Copy data from userspace to the current page */ | 65 | * the pages are already locked |
64 | copied = iov_iter_copy_from_user(page, i, offset, count); | 66 | */ |
67 | pagefault_disable(); | ||
68 | copied = iov_iter_copy_from_user_atomic(page, i, offset, count); | ||
69 | pagefault_enable(); | ||
65 | 70 | ||
66 | /* Flush processor's dcache for this page */ | 71 | /* Flush processor's dcache for this page */ |
67 | flush_dcache_page(page); | 72 | flush_dcache_page(page); |
68 | iov_iter_advance(i, copied); | 73 | iov_iter_advance(i, copied); |
69 | write_bytes -= copied; | 74 | write_bytes -= copied; |
75 | total_copied += copied; | ||
70 | 76 | ||
77 | /* Return to btrfs_file_aio_write to fault page */ | ||
71 | if (unlikely(copied == 0)) { | 78 | if (unlikely(copied == 0)) { |
72 | count = min_t(size_t, PAGE_CACHE_SIZE - offset, | 79 | break; |
73 | iov_iter_single_seg_count(i)); | ||
74 | goto again; | ||
75 | } | 80 | } |
76 | 81 | ||
77 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | 82 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { |
@@ -81,7 +86,7 @@ again: | |||
81 | offset = 0; | 86 | offset = 0; |
82 | } | 87 | } |
83 | } | 88 | } |
84 | return 0; | 89 | return total_copied; |
85 | } | 90 | } |
86 | 91 | ||
87 | /* | 92 | /* |
@@ -220,6 +225,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
220 | 225 | ||
221 | split->bdev = em->bdev; | 226 | split->bdev = em->bdev; |
222 | split->flags = flags; | 227 | split->flags = flags; |
228 | split->compress_type = em->compress_type; | ||
223 | ret = add_extent_mapping(em_tree, split); | 229 | ret = add_extent_mapping(em_tree, split); |
224 | BUG_ON(ret); | 230 | BUG_ON(ret); |
225 | free_extent_map(split); | 231 | free_extent_map(split); |
@@ -234,6 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
234 | split->len = em->start + em->len - (start + len); | 240 | split->len = em->start + em->len - (start + len); |
235 | split->bdev = em->bdev; | 241 | split->bdev = em->bdev; |
236 | split->flags = flags; | 242 | split->flags = flags; |
243 | split->compress_type = em->compress_type; | ||
237 | 244 | ||
238 | if (compressed) { | 245 | if (compressed) { |
239 | split->block_len = em->block_len; | 246 | split->block_len = em->block_len; |
@@ -854,6 +861,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
854 | unsigned long last_index; | 861 | unsigned long last_index; |
855 | int will_write; | 862 | int will_write; |
856 | int buffered = 0; | 863 | int buffered = 0; |
864 | int copied = 0; | ||
865 | int dirty_pages = 0; | ||
857 | 866 | ||
858 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | 867 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || |
859 | (file->f_flags & O_DIRECT)); | 868 | (file->f_flags & O_DIRECT)); |
@@ -884,6 +893,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
884 | if (err) | 893 | if (err) |
885 | goto out; | 894 | goto out; |
886 | 895 | ||
896 | /* | ||
897 | * If BTRFS flips readonly due to some impossible error | ||
898 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | ||
899 | * although we have opened a file as writable, we have | ||
900 | * to stop this write operation to ensure FS consistency. | ||
901 | */ | ||
902 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
903 | err = -EROFS; | ||
904 | goto out; | ||
905 | } | ||
906 | |||
887 | file_update_time(file); | 907 | file_update_time(file); |
888 | BTRFS_I(inode)->sequence++; | 908 | BTRFS_I(inode)->sequence++; |
889 | 909 | ||
@@ -970,7 +990,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
970 | WARN_ON(num_pages > nrptrs); | 990 | WARN_ON(num_pages > nrptrs); |
971 | memset(pages, 0, sizeof(struct page *) * nrptrs); | 991 | memset(pages, 0, sizeof(struct page *) * nrptrs); |
972 | 992 | ||
973 | ret = btrfs_delalloc_reserve_space(inode, write_bytes); | 993 | /* |
994 | * Fault pages before locking them in prepare_pages | ||
995 | * to avoid recursive lock | ||
996 | */ | ||
997 | if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { | ||
998 | ret = -EFAULT; | ||
999 | goto out; | ||
1000 | } | ||
1001 | |||
1002 | ret = btrfs_delalloc_reserve_space(inode, | ||
1003 | num_pages << PAGE_CACHE_SHIFT); | ||
974 | if (ret) | 1004 | if (ret) |
975 | goto out; | 1005 | goto out; |
976 | 1006 | ||
@@ -978,37 +1008,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
978 | pos, first_index, last_index, | 1008 | pos, first_index, last_index, |
979 | write_bytes); | 1009 | write_bytes); |
980 | if (ret) { | 1010 | if (ret) { |
981 | btrfs_delalloc_release_space(inode, write_bytes); | 1011 | btrfs_delalloc_release_space(inode, |
1012 | num_pages << PAGE_CACHE_SHIFT); | ||
982 | goto out; | 1013 | goto out; |
983 | } | 1014 | } |
984 | 1015 | ||
985 | ret = btrfs_copy_from_user(pos, num_pages, | 1016 | copied = btrfs_copy_from_user(pos, num_pages, |
986 | write_bytes, pages, &i); | 1017 | write_bytes, pages, &i); |
987 | if (ret == 0) { | 1018 | dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> |
1019 | PAGE_CACHE_SHIFT; | ||
1020 | |||
1021 | if (num_pages > dirty_pages) { | ||
1022 | if (copied > 0) | ||
1023 | atomic_inc( | ||
1024 | &BTRFS_I(inode)->outstanding_extents); | ||
1025 | btrfs_delalloc_release_space(inode, | ||
1026 | (num_pages - dirty_pages) << | ||
1027 | PAGE_CACHE_SHIFT); | ||
1028 | } | ||
1029 | |||
1030 | if (copied > 0) { | ||
988 | dirty_and_release_pages(NULL, root, file, pages, | 1031 | dirty_and_release_pages(NULL, root, file, pages, |
989 | num_pages, pos, write_bytes); | 1032 | dirty_pages, pos, copied); |
990 | } | 1033 | } |
991 | 1034 | ||
992 | btrfs_drop_pages(pages, num_pages); | 1035 | btrfs_drop_pages(pages, num_pages); |
993 | if (ret) { | ||
994 | btrfs_delalloc_release_space(inode, write_bytes); | ||
995 | goto out; | ||
996 | } | ||
997 | 1036 | ||
998 | if (will_write) { | 1037 | if (copied > 0) { |
999 | filemap_fdatawrite_range(inode->i_mapping, pos, | 1038 | if (will_write) { |
1000 | pos + write_bytes - 1); | 1039 | filemap_fdatawrite_range(inode->i_mapping, pos, |
1001 | } else { | 1040 | pos + copied - 1); |
1002 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1041 | } else { |
1003 | num_pages); | 1042 | balance_dirty_pages_ratelimited_nr( |
1004 | if (num_pages < | 1043 | inode->i_mapping, |
1005 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | 1044 | dirty_pages); |
1006 | btrfs_btree_balance_dirty(root, 1); | 1045 | if (dirty_pages < |
1007 | btrfs_throttle(root); | 1046 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) |
1047 | btrfs_btree_balance_dirty(root, 1); | ||
1048 | btrfs_throttle(root); | ||
1049 | } | ||
1008 | } | 1050 | } |
1009 | 1051 | ||
1010 | pos += write_bytes; | 1052 | pos += copied; |
1011 | num_written += write_bytes; | 1053 | num_written += copied; |
1012 | 1054 | ||
1013 | cond_resched(); | 1055 | cond_resched(); |
1014 | } | 1056 | } |
@@ -1047,8 +1089,14 @@ out: | |||
1047 | 1089 | ||
1048 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 1090 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { |
1049 | trans = btrfs_start_transaction(root, 0); | 1091 | trans = btrfs_start_transaction(root, 0); |
1092 | if (IS_ERR(trans)) { | ||
1093 | num_written = PTR_ERR(trans); | ||
1094 | goto done; | ||
1095 | } | ||
1096 | mutex_lock(&inode->i_mutex); | ||
1050 | ret = btrfs_log_dentry_safe(trans, root, | 1097 | ret = btrfs_log_dentry_safe(trans, root, |
1051 | file->f_dentry); | 1098 | file->f_dentry); |
1099 | mutex_unlock(&inode->i_mutex); | ||
1052 | if (ret == 0) { | 1100 | if (ret == 0) { |
1053 | ret = btrfs_sync_log(trans, root); | 1101 | ret = btrfs_sync_log(trans, root); |
1054 | if (ret == 0) | 1102 | if (ret == 0) |
@@ -1067,6 +1115,7 @@ out: | |||
1067 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | 1115 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); |
1068 | } | 1116 | } |
1069 | } | 1117 | } |
1118 | done: | ||
1070 | current->backing_dev_info = NULL; | 1119 | current->backing_dev_info = NULL; |
1071 | return num_written ? num_written : err; | 1120 | return num_written ? num_written : err; |
1072 | } | 1121 | } |
@@ -1202,6 +1251,117 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | |||
1202 | return 0; | 1251 | return 0; |
1203 | } | 1252 | } |
1204 | 1253 | ||
1254 | static long btrfs_fallocate(struct file *file, int mode, | ||
1255 | loff_t offset, loff_t len) | ||
1256 | { | ||
1257 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1258 | struct extent_state *cached_state = NULL; | ||
1259 | u64 cur_offset; | ||
1260 | u64 last_byte; | ||
1261 | u64 alloc_start; | ||
1262 | u64 alloc_end; | ||
1263 | u64 alloc_hint = 0; | ||
1264 | u64 locked_end; | ||
1265 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
1266 | struct extent_map *em; | ||
1267 | int ret; | ||
1268 | |||
1269 | alloc_start = offset & ~mask; | ||
1270 | alloc_end = (offset + len + mask) & ~mask; | ||
1271 | |||
1272 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | ||
1273 | if (mode & ~FALLOC_FL_KEEP_SIZE) | ||
1274 | return -EOPNOTSUPP; | ||
1275 | |||
1276 | /* | ||
1277 | * wait for ordered IO before we have any locks. We'll loop again | ||
1278 | * below with the locks held. | ||
1279 | */ | ||
1280 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
1281 | |||
1282 | mutex_lock(&inode->i_mutex); | ||
1283 | ret = inode_newsize_ok(inode, alloc_end); | ||
1284 | if (ret) | ||
1285 | goto out; | ||
1286 | |||
1287 | if (alloc_start > inode->i_size) { | ||
1288 | ret = btrfs_cont_expand(inode, alloc_start); | ||
1289 | if (ret) | ||
1290 | goto out; | ||
1291 | } | ||
1292 | |||
1293 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | ||
1294 | if (ret) | ||
1295 | goto out; | ||
1296 | |||
1297 | locked_end = alloc_end - 1; | ||
1298 | while (1) { | ||
1299 | struct btrfs_ordered_extent *ordered; | ||
1300 | |||
1301 | /* the extent lock is ordered inside the running | ||
1302 | * transaction | ||
1303 | */ | ||
1304 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, | ||
1305 | locked_end, 0, &cached_state, GFP_NOFS); | ||
1306 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
1307 | alloc_end - 1); | ||
1308 | if (ordered && | ||
1309 | ordered->file_offset + ordered->len > alloc_start && | ||
1310 | ordered->file_offset < alloc_end) { | ||
1311 | btrfs_put_ordered_extent(ordered); | ||
1312 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
1313 | alloc_start, locked_end, | ||
1314 | &cached_state, GFP_NOFS); | ||
1315 | /* | ||
1316 | * we can't wait on the range with the transaction | ||
1317 | * running or with the extent lock held | ||
1318 | */ | ||
1319 | btrfs_wait_ordered_range(inode, alloc_start, | ||
1320 | alloc_end - alloc_start); | ||
1321 | } else { | ||
1322 | if (ordered) | ||
1323 | btrfs_put_ordered_extent(ordered); | ||
1324 | break; | ||
1325 | } | ||
1326 | } | ||
1327 | |||
1328 | cur_offset = alloc_start; | ||
1329 | while (1) { | ||
1330 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | ||
1331 | alloc_end - cur_offset, 0); | ||
1332 | BUG_ON(IS_ERR(em) || !em); | ||
1333 | last_byte = min(extent_map_end(em), alloc_end); | ||
1334 | last_byte = (last_byte + mask) & ~mask; | ||
1335 | if (em->block_start == EXTENT_MAP_HOLE || | ||
1336 | (cur_offset >= inode->i_size && | ||
1337 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
1338 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | ||
1339 | last_byte - cur_offset, | ||
1340 | 1 << inode->i_blkbits, | ||
1341 | offset + len, | ||
1342 | &alloc_hint); | ||
1343 | if (ret < 0) { | ||
1344 | free_extent_map(em); | ||
1345 | break; | ||
1346 | } | ||
1347 | } | ||
1348 | free_extent_map(em); | ||
1349 | |||
1350 | cur_offset = last_byte; | ||
1351 | if (cur_offset >= alloc_end) { | ||
1352 | ret = 0; | ||
1353 | break; | ||
1354 | } | ||
1355 | } | ||
1356 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
1357 | &cached_state, GFP_NOFS); | ||
1358 | |||
1359 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | ||
1360 | out: | ||
1361 | mutex_unlock(&inode->i_mutex); | ||
1362 | return ret; | ||
1363 | } | ||
1364 | |||
1205 | const struct file_operations btrfs_file_operations = { | 1365 | const struct file_operations btrfs_file_operations = { |
1206 | .llseek = generic_file_llseek, | 1366 | .llseek = generic_file_llseek, |
1207 | .read = do_sync_read, | 1367 | .read = do_sync_read, |
@@ -1213,6 +1373,7 @@ const struct file_operations btrfs_file_operations = { | |||
1213 | .open = generic_file_open, | 1373 | .open = generic_file_open, |
1214 | .release = btrfs_release_file, | 1374 | .release = btrfs_release_file, |
1215 | .fsync = btrfs_sync_file, | 1375 | .fsync = btrfs_sync_file, |
1376 | .fallocate = btrfs_fallocate, | ||
1216 | .unlocked_ioctl = btrfs_ioctl, | 1377 | .unlocked_ioctl = btrfs_ioctl, |
1217 | #ifdef CONFIG_COMPAT | 1378 | #ifdef CONFIG_COMPAT |
1218 | .compat_ioctl = btrfs_ioctl, | 1379 | .compat_ioctl = btrfs_ioctl, |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 22ee0dc2e6b8..60d684266959 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -290,7 +290,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
290 | (unsigned long long)BTRFS_I(inode)->generation, | 290 | (unsigned long long)BTRFS_I(inode)->generation, |
291 | (unsigned long long)generation, | 291 | (unsigned long long)generation, |
292 | (unsigned long long)block_group->key.objectid); | 292 | (unsigned long long)block_group->key.objectid); |
293 | goto out; | 293 | goto free_cache; |
294 | } | 294 | } |
295 | 295 | ||
296 | if (!num_entries) | 296 | if (!num_entries) |
@@ -524,6 +524,12 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
524 | return 0; | 524 | return 0; |
525 | } | 525 | } |
526 | 526 | ||
527 | node = rb_first(&block_group->free_space_offset); | ||
528 | if (!node) { | ||
529 | iput(inode); | ||
530 | return 0; | ||
531 | } | ||
532 | |||
527 | last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | 533 | last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; |
528 | filemap_write_and_wait(inode->i_mapping); | 534 | filemap_write_and_wait(inode->i_mapping); |
529 | btrfs_wait_ordered_range(inode, inode->i_size & | 535 | btrfs_wait_ordered_range(inode, inode->i_size & |
@@ -543,10 +549,6 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
543 | */ | 549 | */ |
544 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | 550 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); |
545 | 551 | ||
546 | node = rb_first(&block_group->free_space_offset); | ||
547 | if (!node) | ||
548 | goto out_free; | ||
549 | |||
550 | /* | 552 | /* |
551 | * Lock all pages first so we can lock the extent safely. | 553 | * Lock all pages first so we can lock the extent safely. |
552 | * | 554 | * |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 558cac2dfa54..160b55b3e132 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -122,10 +122,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
122 | size_t cur_size = size; | 122 | size_t cur_size = size; |
123 | size_t datasize; | 123 | size_t datasize; |
124 | unsigned long offset; | 124 | unsigned long offset; |
125 | int use_compress = 0; | 125 | int compress_type = BTRFS_COMPRESS_NONE; |
126 | 126 | ||
127 | if (compressed_size && compressed_pages) { | 127 | if (compressed_size && compressed_pages) { |
128 | use_compress = 1; | 128 | compress_type = root->fs_info->compress_type; |
129 | cur_size = compressed_size; | 129 | cur_size = compressed_size; |
130 | } | 130 | } |
131 | 131 | ||
@@ -159,7 +159,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
159 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); | 159 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); |
160 | ptr = btrfs_file_extent_inline_start(ei); | 160 | ptr = btrfs_file_extent_inline_start(ei); |
161 | 161 | ||
162 | if (use_compress) { | 162 | if (compress_type != BTRFS_COMPRESS_NONE) { |
163 | struct page *cpage; | 163 | struct page *cpage; |
164 | int i = 0; | 164 | int i = 0; |
165 | while (compressed_size > 0) { | 165 | while (compressed_size > 0) { |
@@ -176,7 +176,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
176 | compressed_size -= cur_size; | 176 | compressed_size -= cur_size; |
177 | } | 177 | } |
178 | btrfs_set_file_extent_compression(leaf, ei, | 178 | btrfs_set_file_extent_compression(leaf, ei, |
179 | BTRFS_COMPRESS_ZLIB); | 179 | compress_type); |
180 | } else { | 180 | } else { |
181 | page = find_get_page(inode->i_mapping, | 181 | page = find_get_page(inode->i_mapping, |
182 | start >> PAGE_CACHE_SHIFT); | 182 | start >> PAGE_CACHE_SHIFT); |
@@ -263,6 +263,7 @@ struct async_extent { | |||
263 | u64 compressed_size; | 263 | u64 compressed_size; |
264 | struct page **pages; | 264 | struct page **pages; |
265 | unsigned long nr_pages; | 265 | unsigned long nr_pages; |
266 | int compress_type; | ||
266 | struct list_head list; | 267 | struct list_head list; |
267 | }; | 268 | }; |
268 | 269 | ||
@@ -280,7 +281,8 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
280 | u64 start, u64 ram_size, | 281 | u64 start, u64 ram_size, |
281 | u64 compressed_size, | 282 | u64 compressed_size, |
282 | struct page **pages, | 283 | struct page **pages, |
283 | unsigned long nr_pages) | 284 | unsigned long nr_pages, |
285 | int compress_type) | ||
284 | { | 286 | { |
285 | struct async_extent *async_extent; | 287 | struct async_extent *async_extent; |
286 | 288 | ||
@@ -290,6 +292,7 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
290 | async_extent->compressed_size = compressed_size; | 292 | async_extent->compressed_size = compressed_size; |
291 | async_extent->pages = pages; | 293 | async_extent->pages = pages; |
292 | async_extent->nr_pages = nr_pages; | 294 | async_extent->nr_pages = nr_pages; |
295 | async_extent->compress_type = compress_type; | ||
293 | list_add_tail(&async_extent->list, &cow->extents); | 296 | list_add_tail(&async_extent->list, &cow->extents); |
294 | return 0; | 297 | return 0; |
295 | } | 298 | } |
@@ -332,6 +335,7 @@ static noinline int compress_file_range(struct inode *inode, | |||
332 | unsigned long max_uncompressed = 128 * 1024; | 335 | unsigned long max_uncompressed = 128 * 1024; |
333 | int i; | 336 | int i; |
334 | int will_compress; | 337 | int will_compress; |
338 | int compress_type = root->fs_info->compress_type; | ||
335 | 339 | ||
336 | actual_end = min_t(u64, isize, end + 1); | 340 | actual_end = min_t(u64, isize, end + 1); |
337 | again: | 341 | again: |
@@ -381,12 +385,16 @@ again: | |||
381 | WARN_ON(pages); | 385 | WARN_ON(pages); |
382 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 386 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
383 | 387 | ||
384 | ret = btrfs_zlib_compress_pages(inode->i_mapping, start, | 388 | if (BTRFS_I(inode)->force_compress) |
385 | total_compressed, pages, | 389 | compress_type = BTRFS_I(inode)->force_compress; |
386 | nr_pages, &nr_pages_ret, | 390 | |
387 | &total_in, | 391 | ret = btrfs_compress_pages(compress_type, |
388 | &total_compressed, | 392 | inode->i_mapping, start, |
389 | max_compressed); | 393 | total_compressed, pages, |
394 | nr_pages, &nr_pages_ret, | ||
395 | &total_in, | ||
396 | &total_compressed, | ||
397 | max_compressed); | ||
390 | 398 | ||
391 | if (!ret) { | 399 | if (!ret) { |
392 | unsigned long offset = total_compressed & | 400 | unsigned long offset = total_compressed & |
@@ -493,9 +501,10 @@ again: | |||
493 | * and will submit them to the elevator. | 501 | * and will submit them to the elevator. |
494 | */ | 502 | */ |
495 | add_async_extent(async_cow, start, num_bytes, | 503 | add_async_extent(async_cow, start, num_bytes, |
496 | total_compressed, pages, nr_pages_ret); | 504 | total_compressed, pages, nr_pages_ret, |
505 | compress_type); | ||
497 | 506 | ||
498 | if (start + num_bytes < end && start + num_bytes < actual_end) { | 507 | if (start + num_bytes < end) { |
499 | start += num_bytes; | 508 | start += num_bytes; |
500 | pages = NULL; | 509 | pages = NULL; |
501 | cond_resched(); | 510 | cond_resched(); |
@@ -515,7 +524,8 @@ cleanup_and_bail_uncompressed: | |||
515 | __set_page_dirty_nobuffers(locked_page); | 524 | __set_page_dirty_nobuffers(locked_page); |
516 | /* unlocked later on in the async handlers */ | 525 | /* unlocked later on in the async handlers */ |
517 | } | 526 | } |
518 | add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); | 527 | add_async_extent(async_cow, start, end - start + 1, |
528 | 0, NULL, 0, BTRFS_COMPRESS_NONE); | ||
519 | *num_added += 1; | 529 | *num_added += 1; |
520 | } | 530 | } |
521 | 531 | ||
@@ -640,6 +650,7 @@ retry: | |||
640 | em->block_start = ins.objectid; | 650 | em->block_start = ins.objectid; |
641 | em->block_len = ins.offset; | 651 | em->block_len = ins.offset; |
642 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 652 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
653 | em->compress_type = async_extent->compress_type; | ||
643 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 654 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
644 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 655 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
645 | 656 | ||
@@ -656,11 +667,13 @@ retry: | |||
656 | async_extent->ram_size - 1, 0); | 667 | async_extent->ram_size - 1, 0); |
657 | } | 668 | } |
658 | 669 | ||
659 | ret = btrfs_add_ordered_extent(inode, async_extent->start, | 670 | ret = btrfs_add_ordered_extent_compress(inode, |
660 | ins.objectid, | 671 | async_extent->start, |
661 | async_extent->ram_size, | 672 | ins.objectid, |
662 | ins.offset, | 673 | async_extent->ram_size, |
663 | BTRFS_ORDERED_COMPRESSED); | 674 | ins.offset, |
675 | BTRFS_ORDERED_COMPRESSED, | ||
676 | async_extent->compress_type); | ||
664 | BUG_ON(ret); | 677 | BUG_ON(ret); |
665 | 678 | ||
666 | /* | 679 | /* |
@@ -1670,7 +1683,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1670 | struct btrfs_ordered_extent *ordered_extent = NULL; | 1683 | struct btrfs_ordered_extent *ordered_extent = NULL; |
1671 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1684 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
1672 | struct extent_state *cached_state = NULL; | 1685 | struct extent_state *cached_state = NULL; |
1673 | int compressed = 0; | 1686 | int compress_type = 0; |
1674 | int ret; | 1687 | int ret; |
1675 | bool nolock = false; | 1688 | bool nolock = false; |
1676 | 1689 | ||
@@ -1711,9 +1724,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1711 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1724 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1712 | 1725 | ||
1713 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 1726 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
1714 | compressed = 1; | 1727 | compress_type = ordered_extent->compress_type; |
1715 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { | 1728 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
1716 | BUG_ON(compressed); | 1729 | BUG_ON(compress_type); |
1717 | ret = btrfs_mark_extent_written(trans, inode, | 1730 | ret = btrfs_mark_extent_written(trans, inode, |
1718 | ordered_extent->file_offset, | 1731 | ordered_extent->file_offset, |
1719 | ordered_extent->file_offset + | 1732 | ordered_extent->file_offset + |
@@ -1727,7 +1740,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1727 | ordered_extent->disk_len, | 1740 | ordered_extent->disk_len, |
1728 | ordered_extent->len, | 1741 | ordered_extent->len, |
1729 | ordered_extent->len, | 1742 | ordered_extent->len, |
1730 | compressed, 0, 0, | 1743 | compress_type, 0, 0, |
1731 | BTRFS_FILE_EXTENT_REG); | 1744 | BTRFS_FILE_EXTENT_REG); |
1732 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | 1745 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, |
1733 | ordered_extent->file_offset, | 1746 | ordered_extent->file_offset, |
@@ -1829,6 +1842,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1829 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | 1842 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
1830 | logical = em->block_start; | 1843 | logical = em->block_start; |
1831 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; | 1844 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; |
1845 | extent_set_compress_type(&failrec->bio_flags, | ||
1846 | em->compress_type); | ||
1832 | } | 1847 | } |
1833 | failrec->logical = logical; | 1848 | failrec->logical = logical; |
1834 | free_extent_map(em); | 1849 | free_extent_map(em); |
@@ -3671,8 +3686,12 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3671 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | 3686 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) |
3672 | { | 3687 | { |
3673 | struct inode *inode = dentry->d_inode; | 3688 | struct inode *inode = dentry->d_inode; |
3689 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3674 | int err; | 3690 | int err; |
3675 | 3691 | ||
3692 | if (btrfs_root_readonly(root)) | ||
3693 | return -EROFS; | ||
3694 | |||
3676 | err = inode_change_ok(inode, attr); | 3695 | err = inode_change_ok(inode, attr); |
3677 | if (err) | 3696 | if (err) |
3678 | return err; | 3697 | return err; |
@@ -4084,8 +4103,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
4084 | int index; | 4103 | int index; |
4085 | int ret; | 4104 | int ret; |
4086 | 4105 | ||
4087 | dentry->d_op = &btrfs_dentry_operations; | ||
4088 | |||
4089 | if (dentry->d_name.len > BTRFS_NAME_LEN) | 4106 | if (dentry->d_name.len > BTRFS_NAME_LEN) |
4090 | return ERR_PTR(-ENAMETOOLONG); | 4107 | return ERR_PTR(-ENAMETOOLONG); |
4091 | 4108 | ||
@@ -4127,7 +4144,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
4127 | return inode; | 4144 | return inode; |
4128 | } | 4145 | } |
4129 | 4146 | ||
4130 | static int btrfs_dentry_delete(struct dentry *dentry) | 4147 | static int btrfs_dentry_delete(const struct dentry *dentry) |
4131 | { | 4148 | { |
4132 | struct btrfs_root *root; | 4149 | struct btrfs_root *root; |
4133 | 4150 | ||
@@ -4501,6 +4518,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4501 | BTRFS_I(inode)->index_cnt = 2; | 4518 | BTRFS_I(inode)->index_cnt = 2; |
4502 | BTRFS_I(inode)->root = root; | 4519 | BTRFS_I(inode)->root = root; |
4503 | BTRFS_I(inode)->generation = trans->transid; | 4520 | BTRFS_I(inode)->generation = trans->transid; |
4521 | inode->i_generation = BTRFS_I(inode)->generation; | ||
4504 | btrfs_set_inode_space_info(root, inode); | 4522 | btrfs_set_inode_space_info(root, inode); |
4505 | 4523 | ||
4506 | if (mode & S_IFDIR) | 4524 | if (mode & S_IFDIR) |
@@ -4622,12 +4640,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
4622 | } | 4640 | } |
4623 | 4641 | ||
4624 | static int btrfs_add_nondir(struct btrfs_trans_handle *trans, | 4642 | static int btrfs_add_nondir(struct btrfs_trans_handle *trans, |
4625 | struct dentry *dentry, struct inode *inode, | 4643 | struct inode *dir, struct dentry *dentry, |
4626 | int backref, u64 index) | 4644 | struct inode *inode, int backref, u64 index) |
4627 | { | 4645 | { |
4628 | int err = btrfs_add_link(trans, dentry->d_parent->d_inode, | 4646 | int err = btrfs_add_link(trans, dir, inode, |
4629 | inode, dentry->d_name.name, | 4647 | dentry->d_name.name, dentry->d_name.len, |
4630 | dentry->d_name.len, backref, index); | 4648 | backref, index); |
4631 | if (!err) { | 4649 | if (!err) { |
4632 | d_instantiate(dentry, inode); | 4650 | d_instantiate(dentry, inode); |
4633 | return 0; | 4651 | return 0; |
@@ -4668,8 +4686,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4668 | btrfs_set_trans_block_group(trans, dir); | 4686 | btrfs_set_trans_block_group(trans, dir); |
4669 | 4687 | ||
4670 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4688 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4671 | dentry->d_name.len, | 4689 | dentry->d_name.len, dir->i_ino, objectid, |
4672 | dentry->d_parent->d_inode->i_ino, objectid, | ||
4673 | BTRFS_I(dir)->block_group, mode, &index); | 4690 | BTRFS_I(dir)->block_group, mode, &index); |
4674 | err = PTR_ERR(inode); | 4691 | err = PTR_ERR(inode); |
4675 | if (IS_ERR(inode)) | 4692 | if (IS_ERR(inode)) |
@@ -4682,7 +4699,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4682 | } | 4699 | } |
4683 | 4700 | ||
4684 | btrfs_set_trans_block_group(trans, inode); | 4701 | btrfs_set_trans_block_group(trans, inode); |
4685 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | 4702 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
4686 | if (err) | 4703 | if (err) |
4687 | drop_inode = 1; | 4704 | drop_inode = 1; |
4688 | else { | 4705 | else { |
@@ -4730,10 +4747,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4730 | btrfs_set_trans_block_group(trans, dir); | 4747 | btrfs_set_trans_block_group(trans, dir); |
4731 | 4748 | ||
4732 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4749 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4733 | dentry->d_name.len, | 4750 | dentry->d_name.len, dir->i_ino, objectid, |
4734 | dentry->d_parent->d_inode->i_ino, | 4751 | BTRFS_I(dir)->block_group, mode, &index); |
4735 | objectid, BTRFS_I(dir)->block_group, mode, | ||
4736 | &index); | ||
4737 | err = PTR_ERR(inode); | 4752 | err = PTR_ERR(inode); |
4738 | if (IS_ERR(inode)) | 4753 | if (IS_ERR(inode)) |
4739 | goto out_unlock; | 4754 | goto out_unlock; |
@@ -4745,7 +4760,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4745 | } | 4760 | } |
4746 | 4761 | ||
4747 | btrfs_set_trans_block_group(trans, inode); | 4762 | btrfs_set_trans_block_group(trans, inode); |
4748 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | 4763 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
4749 | if (err) | 4764 | if (err) |
4750 | drop_inode = 1; | 4765 | drop_inode = 1; |
4751 | else { | 4766 | else { |
@@ -4787,6 +4802,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4787 | return -EPERM; | 4802 | return -EPERM; |
4788 | 4803 | ||
4789 | btrfs_inc_nlink(inode); | 4804 | btrfs_inc_nlink(inode); |
4805 | inode->i_ctime = CURRENT_TIME; | ||
4790 | 4806 | ||
4791 | err = btrfs_set_inode_index(dir, &index); | 4807 | err = btrfs_set_inode_index(dir, &index); |
4792 | if (err) | 4808 | if (err) |
@@ -4805,15 +4821,17 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4805 | btrfs_set_trans_block_group(trans, dir); | 4821 | btrfs_set_trans_block_group(trans, dir); |
4806 | ihold(inode); | 4822 | ihold(inode); |
4807 | 4823 | ||
4808 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); | 4824 | err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); |
4809 | 4825 | ||
4810 | if (err) { | 4826 | if (err) { |
4811 | drop_inode = 1; | 4827 | drop_inode = 1; |
4812 | } else { | 4828 | } else { |
4829 | struct dentry *parent = dget_parent(dentry); | ||
4813 | btrfs_update_inode_block_group(trans, dir); | 4830 | btrfs_update_inode_block_group(trans, dir); |
4814 | err = btrfs_update_inode(trans, root, inode); | 4831 | err = btrfs_update_inode(trans, root, inode); |
4815 | BUG_ON(err); | 4832 | BUG_ON(err); |
4816 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | 4833 | btrfs_log_new_name(trans, inode, NULL, parent); |
4834 | dput(parent); | ||
4817 | } | 4835 | } |
4818 | 4836 | ||
4819 | nr = trans->blocks_used; | 4837 | nr = trans->blocks_used; |
@@ -4853,8 +4871,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4853 | btrfs_set_trans_block_group(trans, dir); | 4871 | btrfs_set_trans_block_group(trans, dir); |
4854 | 4872 | ||
4855 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4873 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4856 | dentry->d_name.len, | 4874 | dentry->d_name.len, dir->i_ino, objectid, |
4857 | dentry->d_parent->d_inode->i_ino, objectid, | ||
4858 | BTRFS_I(dir)->block_group, S_IFDIR | mode, | 4875 | BTRFS_I(dir)->block_group, S_IFDIR | mode, |
4859 | &index); | 4876 | &index); |
4860 | if (IS_ERR(inode)) { | 4877 | if (IS_ERR(inode)) { |
@@ -4877,9 +4894,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4877 | if (err) | 4894 | if (err) |
4878 | goto out_fail; | 4895 | goto out_fail; |
4879 | 4896 | ||
4880 | err = btrfs_add_link(trans, dentry->d_parent->d_inode, | 4897 | err = btrfs_add_link(trans, dir, inode, dentry->d_name.name, |
4881 | inode, dentry->d_name.name, | 4898 | dentry->d_name.len, 0, index); |
4882 | dentry->d_name.len, 0, index); | ||
4883 | if (err) | 4899 | if (err) |
4884 | goto out_fail; | 4900 | goto out_fail; |
4885 | 4901 | ||
@@ -4931,8 +4947,10 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
4931 | size_t max_size; | 4947 | size_t max_size; |
4932 | unsigned long inline_size; | 4948 | unsigned long inline_size; |
4933 | unsigned long ptr; | 4949 | unsigned long ptr; |
4950 | int compress_type; | ||
4934 | 4951 | ||
4935 | WARN_ON(pg_offset != 0); | 4952 | WARN_ON(pg_offset != 0); |
4953 | compress_type = btrfs_file_extent_compression(leaf, item); | ||
4936 | max_size = btrfs_file_extent_ram_bytes(leaf, item); | 4954 | max_size = btrfs_file_extent_ram_bytes(leaf, item); |
4937 | inline_size = btrfs_file_extent_inline_item_len(leaf, | 4955 | inline_size = btrfs_file_extent_inline_item_len(leaf, |
4938 | btrfs_item_nr(leaf, path->slots[0])); | 4956 | btrfs_item_nr(leaf, path->slots[0])); |
@@ -4942,8 +4960,8 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
4942 | read_extent_buffer(leaf, tmp, ptr, inline_size); | 4960 | read_extent_buffer(leaf, tmp, ptr, inline_size); |
4943 | 4961 | ||
4944 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); | 4962 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); |
4945 | ret = btrfs_zlib_decompress(tmp, page, extent_offset, | 4963 | ret = btrfs_decompress(compress_type, tmp, page, |
4946 | inline_size, max_size); | 4964 | extent_offset, inline_size, max_size); |
4947 | if (ret) { | 4965 | if (ret) { |
4948 | char *kaddr = kmap_atomic(page, KM_USER0); | 4966 | char *kaddr = kmap_atomic(page, KM_USER0); |
4949 | unsigned long copy_size = min_t(u64, | 4967 | unsigned long copy_size = min_t(u64, |
@@ -4985,7 +5003,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
4985 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 5003 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
4986 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 5004 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
4987 | struct btrfs_trans_handle *trans = NULL; | 5005 | struct btrfs_trans_handle *trans = NULL; |
4988 | int compressed; | 5006 | int compress_type; |
4989 | 5007 | ||
4990 | again: | 5008 | again: |
4991 | read_lock(&em_tree->lock); | 5009 | read_lock(&em_tree->lock); |
@@ -5044,7 +5062,7 @@ again: | |||
5044 | 5062 | ||
5045 | found_type = btrfs_file_extent_type(leaf, item); | 5063 | found_type = btrfs_file_extent_type(leaf, item); |
5046 | extent_start = found_key.offset; | 5064 | extent_start = found_key.offset; |
5047 | compressed = btrfs_file_extent_compression(leaf, item); | 5065 | compress_type = btrfs_file_extent_compression(leaf, item); |
5048 | if (found_type == BTRFS_FILE_EXTENT_REG || | 5066 | if (found_type == BTRFS_FILE_EXTENT_REG || |
5049 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 5067 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
5050 | extent_end = extent_start + | 5068 | extent_end = extent_start + |
@@ -5090,8 +5108,9 @@ again: | |||
5090 | em->block_start = EXTENT_MAP_HOLE; | 5108 | em->block_start = EXTENT_MAP_HOLE; |
5091 | goto insert; | 5109 | goto insert; |
5092 | } | 5110 | } |
5093 | if (compressed) { | 5111 | if (compress_type != BTRFS_COMPRESS_NONE) { |
5094 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5112 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
5113 | em->compress_type = compress_type; | ||
5095 | em->block_start = bytenr; | 5114 | em->block_start = bytenr; |
5096 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, | 5115 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, |
5097 | item); | 5116 | item); |
@@ -5125,12 +5144,14 @@ again: | |||
5125 | em->len = (copy_size + root->sectorsize - 1) & | 5144 | em->len = (copy_size + root->sectorsize - 1) & |
5126 | ~((u64)root->sectorsize - 1); | 5145 | ~((u64)root->sectorsize - 1); |
5127 | em->orig_start = EXTENT_MAP_INLINE; | 5146 | em->orig_start = EXTENT_MAP_INLINE; |
5128 | if (compressed) | 5147 | if (compress_type) { |
5129 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5148 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
5149 | em->compress_type = compress_type; | ||
5150 | } | ||
5130 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; | 5151 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; |
5131 | if (create == 0 && !PageUptodate(page)) { | 5152 | if (create == 0 && !PageUptodate(page)) { |
5132 | if (btrfs_file_extent_compression(leaf, item) == | 5153 | if (btrfs_file_extent_compression(leaf, item) != |
5133 | BTRFS_COMPRESS_ZLIB) { | 5154 | BTRFS_COMPRESS_NONE) { |
5134 | ret = uncompress_inline(path, inode, page, | 5155 | ret = uncompress_inline(path, inode, page, |
5135 | pg_offset, | 5156 | pg_offset, |
5136 | extent_offset, item); | 5157 | extent_offset, item); |
@@ -5535,13 +5556,21 @@ struct btrfs_dio_private { | |||
5535 | u64 bytes; | 5556 | u64 bytes; |
5536 | u32 *csums; | 5557 | u32 *csums; |
5537 | void *private; | 5558 | void *private; |
5559 | |||
5560 | /* number of bios pending for this dio */ | ||
5561 | atomic_t pending_bios; | ||
5562 | |||
5563 | /* IO errors */ | ||
5564 | int errors; | ||
5565 | |||
5566 | struct bio *orig_bio; | ||
5538 | }; | 5567 | }; |
5539 | 5568 | ||
5540 | static void btrfs_endio_direct_read(struct bio *bio, int err) | 5569 | static void btrfs_endio_direct_read(struct bio *bio, int err) |
5541 | { | 5570 | { |
5571 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5542 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; | 5572 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; |
5543 | struct bio_vec *bvec = bio->bi_io_vec; | 5573 | struct bio_vec *bvec = bio->bi_io_vec; |
5544 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5545 | struct inode *inode = dip->inode; | 5574 | struct inode *inode = dip->inode; |
5546 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5575 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5547 | u64 start; | 5576 | u64 start; |
@@ -5595,15 +5624,18 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) | |||
5595 | struct btrfs_trans_handle *trans; | 5624 | struct btrfs_trans_handle *trans; |
5596 | struct btrfs_ordered_extent *ordered = NULL; | 5625 | struct btrfs_ordered_extent *ordered = NULL; |
5597 | struct extent_state *cached_state = NULL; | 5626 | struct extent_state *cached_state = NULL; |
5627 | u64 ordered_offset = dip->logical_offset; | ||
5628 | u64 ordered_bytes = dip->bytes; | ||
5598 | int ret; | 5629 | int ret; |
5599 | 5630 | ||
5600 | if (err) | 5631 | if (err) |
5601 | goto out_done; | 5632 | goto out_done; |
5602 | 5633 | again: | |
5603 | ret = btrfs_dec_test_ordered_pending(inode, &ordered, | 5634 | ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, |
5604 | dip->logical_offset, dip->bytes); | 5635 | &ordered_offset, |
5636 | ordered_bytes); | ||
5605 | if (!ret) | 5637 | if (!ret) |
5606 | goto out_done; | 5638 | goto out_test; |
5607 | 5639 | ||
5608 | BUG_ON(!ordered); | 5640 | BUG_ON(!ordered); |
5609 | 5641 | ||
@@ -5663,8 +5695,20 @@ out_unlock: | |||
5663 | out: | 5695 | out: |
5664 | btrfs_delalloc_release_metadata(inode, ordered->len); | 5696 | btrfs_delalloc_release_metadata(inode, ordered->len); |
5665 | btrfs_end_transaction(trans, root); | 5697 | btrfs_end_transaction(trans, root); |
5698 | ordered_offset = ordered->file_offset + ordered->len; | ||
5666 | btrfs_put_ordered_extent(ordered); | 5699 | btrfs_put_ordered_extent(ordered); |
5667 | btrfs_put_ordered_extent(ordered); | 5700 | btrfs_put_ordered_extent(ordered); |
5701 | |||
5702 | out_test: | ||
5703 | /* | ||
5704 | * our bio might span multiple ordered extents. If we haven't | ||
5705 | * completed the accounting for the whole dio, go back and try again | ||
5706 | */ | ||
5707 | if (ordered_offset < dip->logical_offset + dip->bytes) { | ||
5708 | ordered_bytes = dip->logical_offset + dip->bytes - | ||
5709 | ordered_offset; | ||
5710 | goto again; | ||
5711 | } | ||
5668 | out_done: | 5712 | out_done: |
5669 | bio->bi_private = dip->private; | 5713 | bio->bi_private = dip->private; |
5670 | 5714 | ||
@@ -5684,6 +5728,176 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, | |||
5684 | return 0; | 5728 | return 0; |
5685 | } | 5729 | } |
5686 | 5730 | ||
5731 | static void btrfs_end_dio_bio(struct bio *bio, int err) | ||
5732 | { | ||
5733 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5734 | |||
5735 | if (err) { | ||
5736 | printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " | ||
5737 | "sector %#Lx len %u err no %d\n", | ||
5738 | dip->inode->i_ino, bio->bi_rw, | ||
5739 | (unsigned long long)bio->bi_sector, bio->bi_size, err); | ||
5740 | dip->errors = 1; | ||
5741 | |||
5742 | /* | ||
5743 | * before atomic variable goto zero, we must make sure | ||
5744 | * dip->errors is perceived to be set. | ||
5745 | */ | ||
5746 | smp_mb__before_atomic_dec(); | ||
5747 | } | ||
5748 | |||
5749 | /* if there are more bios still pending for this dio, just exit */ | ||
5750 | if (!atomic_dec_and_test(&dip->pending_bios)) | ||
5751 | goto out; | ||
5752 | |||
5753 | if (dip->errors) | ||
5754 | bio_io_error(dip->orig_bio); | ||
5755 | else { | ||
5756 | set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags); | ||
5757 | bio_endio(dip->orig_bio, 0); | ||
5758 | } | ||
5759 | out: | ||
5760 | bio_put(bio); | ||
5761 | } | ||
5762 | |||
5763 | static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, | ||
5764 | u64 first_sector, gfp_t gfp_flags) | ||
5765 | { | ||
5766 | int nr_vecs = bio_get_nr_vecs(bdev); | ||
5767 | return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags); | ||
5768 | } | ||
5769 | |||
5770 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | ||
5771 | int rw, u64 file_offset, int skip_sum, | ||
5772 | u32 *csums) | ||
5773 | { | ||
5774 | int write = rw & REQ_WRITE; | ||
5775 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5776 | int ret; | ||
5777 | |||
5778 | bio_get(bio); | ||
5779 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
5780 | if (ret) | ||
5781 | goto err; | ||
5782 | |||
5783 | if (write && !skip_sum) { | ||
5784 | ret = btrfs_wq_submit_bio(root->fs_info, | ||
5785 | inode, rw, bio, 0, 0, | ||
5786 | file_offset, | ||
5787 | __btrfs_submit_bio_start_direct_io, | ||
5788 | __btrfs_submit_bio_done); | ||
5789 | goto err; | ||
5790 | } else if (!skip_sum) | ||
5791 | btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5792 | file_offset, csums); | ||
5793 | |||
5794 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | ||
5795 | err: | ||
5796 | bio_put(bio); | ||
5797 | return ret; | ||
5798 | } | ||
5799 | |||
5800 | static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | ||
5801 | int skip_sum) | ||
5802 | { | ||
5803 | struct inode *inode = dip->inode; | ||
5804 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5805 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
5806 | struct bio *bio; | ||
5807 | struct bio *orig_bio = dip->orig_bio; | ||
5808 | struct bio_vec *bvec = orig_bio->bi_io_vec; | ||
5809 | u64 start_sector = orig_bio->bi_sector; | ||
5810 | u64 file_offset = dip->logical_offset; | ||
5811 | u64 submit_len = 0; | ||
5812 | u64 map_length; | ||
5813 | int nr_pages = 0; | ||
5814 | u32 *csums = dip->csums; | ||
5815 | int ret = 0; | ||
5816 | |||
5817 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); | ||
5818 | if (!bio) | ||
5819 | return -ENOMEM; | ||
5820 | bio->bi_private = dip; | ||
5821 | bio->bi_end_io = btrfs_end_dio_bio; | ||
5822 | atomic_inc(&dip->pending_bios); | ||
5823 | |||
5824 | map_length = orig_bio->bi_size; | ||
5825 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | ||
5826 | &map_length, NULL, 0); | ||
5827 | if (ret) { | ||
5828 | bio_put(bio); | ||
5829 | return -EIO; | ||
5830 | } | ||
5831 | |||
5832 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { | ||
5833 | if (unlikely(map_length < submit_len + bvec->bv_len || | ||
5834 | bio_add_page(bio, bvec->bv_page, bvec->bv_len, | ||
5835 | bvec->bv_offset) < bvec->bv_len)) { | ||
5836 | /* | ||
5837 | * inc the count before we submit the bio so | ||
5838 | * we know the end IO handler won't happen before | ||
5839 | * we inc the count. Otherwise, the dip might get freed | ||
5840 | * before we're done setting it up | ||
5841 | */ | ||
5842 | atomic_inc(&dip->pending_bios); | ||
5843 | ret = __btrfs_submit_dio_bio(bio, inode, rw, | ||
5844 | file_offset, skip_sum, | ||
5845 | csums); | ||
5846 | if (ret) { | ||
5847 | bio_put(bio); | ||
5848 | atomic_dec(&dip->pending_bios); | ||
5849 | goto out_err; | ||
5850 | } | ||
5851 | |||
5852 | if (!skip_sum) | ||
5853 | csums = csums + nr_pages; | ||
5854 | start_sector += submit_len >> 9; | ||
5855 | file_offset += submit_len; | ||
5856 | |||
5857 | submit_len = 0; | ||
5858 | nr_pages = 0; | ||
5859 | |||
5860 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, | ||
5861 | start_sector, GFP_NOFS); | ||
5862 | if (!bio) | ||
5863 | goto out_err; | ||
5864 | bio->bi_private = dip; | ||
5865 | bio->bi_end_io = btrfs_end_dio_bio; | ||
5866 | |||
5867 | map_length = orig_bio->bi_size; | ||
5868 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | ||
5869 | &map_length, NULL, 0); | ||
5870 | if (ret) { | ||
5871 | bio_put(bio); | ||
5872 | goto out_err; | ||
5873 | } | ||
5874 | } else { | ||
5875 | submit_len += bvec->bv_len; | ||
5876 | nr_pages ++; | ||
5877 | bvec++; | ||
5878 | } | ||
5879 | } | ||
5880 | |||
5881 | ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, | ||
5882 | csums); | ||
5883 | if (!ret) | ||
5884 | return 0; | ||
5885 | |||
5886 | bio_put(bio); | ||
5887 | out_err: | ||
5888 | dip->errors = 1; | ||
5889 | /* | ||
5890 | * before atomic variable goto zero, we must | ||
5891 | * make sure dip->errors is perceived to be set. | ||
5892 | */ | ||
5893 | smp_mb__before_atomic_dec(); | ||
5894 | if (atomic_dec_and_test(&dip->pending_bios)) | ||
5895 | bio_io_error(dip->orig_bio); | ||
5896 | |||
5897 | /* bio_end_io() will handle error, so we needn't return it */ | ||
5898 | return 0; | ||
5899 | } | ||
5900 | |||
5687 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | 5901 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, |
5688 | loff_t file_offset) | 5902 | loff_t file_offset) |
5689 | { | 5903 | { |
@@ -5723,36 +5937,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | |||
5723 | 5937 | ||
5724 | dip->disk_bytenr = (u64)bio->bi_sector << 9; | 5938 | dip->disk_bytenr = (u64)bio->bi_sector << 9; |
5725 | bio->bi_private = dip; | 5939 | bio->bi_private = dip; |
5940 | dip->errors = 0; | ||
5941 | dip->orig_bio = bio; | ||
5942 | atomic_set(&dip->pending_bios, 0); | ||
5726 | 5943 | ||
5727 | if (write) | 5944 | if (write) |
5728 | bio->bi_end_io = btrfs_endio_direct_write; | 5945 | bio->bi_end_io = btrfs_endio_direct_write; |
5729 | else | 5946 | else |
5730 | bio->bi_end_io = btrfs_endio_direct_read; | 5947 | bio->bi_end_io = btrfs_endio_direct_read; |
5731 | 5948 | ||
5732 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 5949 | ret = btrfs_submit_direct_hook(rw, dip, skip_sum); |
5733 | if (ret) | 5950 | if (!ret) |
5734 | goto out_err; | ||
5735 | |||
5736 | if (write && !skip_sum) { | ||
5737 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
5738 | inode, rw, bio, 0, 0, | ||
5739 | dip->logical_offset, | ||
5740 | __btrfs_submit_bio_start_direct_io, | ||
5741 | __btrfs_submit_bio_done); | ||
5742 | if (ret) | ||
5743 | goto out_err; | ||
5744 | return; | 5951 | return; |
5745 | } else if (!skip_sum) | ||
5746 | btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5747 | dip->logical_offset, dip->csums); | ||
5748 | |||
5749 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | ||
5750 | if (ret) | ||
5751 | goto out_err; | ||
5752 | return; | ||
5753 | out_err: | ||
5754 | kfree(dip->csums); | ||
5755 | kfree(dip); | ||
5756 | free_ordered: | 5952 | free_ordered: |
5757 | /* | 5953 | /* |
5758 | * If this is a write, we need to clean up the reserved space and kill | 5954 | * If this is a write, we need to clean up the reserved space and kill |
@@ -5760,8 +5956,7 @@ free_ordered: | |||
5760 | */ | 5956 | */ |
5761 | if (write) { | 5957 | if (write) { |
5762 | struct btrfs_ordered_extent *ordered; | 5958 | struct btrfs_ordered_extent *ordered; |
5763 | ordered = btrfs_lookup_ordered_extent(inode, | 5959 | ordered = btrfs_lookup_ordered_extent(inode, file_offset); |
5764 | dip->logical_offset); | ||
5765 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && | 5960 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && |
5766 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) | 5961 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) |
5767 | btrfs_free_reserved_extent(root, ordered->start, | 5962 | btrfs_free_reserved_extent(root, ordered->start, |
@@ -6306,7 +6501,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6306 | ei->ordered_data_close = 0; | 6501 | ei->ordered_data_close = 0; |
6307 | ei->orphan_meta_reserved = 0; | 6502 | ei->orphan_meta_reserved = 0; |
6308 | ei->dummy_inode = 0; | 6503 | ei->dummy_inode = 0; |
6309 | ei->force_compress = 0; | 6504 | ei->force_compress = BTRFS_COMPRESS_NONE; |
6310 | 6505 | ||
6311 | inode = &ei->vfs_inode; | 6506 | inode = &ei->vfs_inode; |
6312 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); | 6507 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); |
@@ -6322,6 +6517,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6322 | return inode; | 6517 | return inode; |
6323 | } | 6518 | } |
6324 | 6519 | ||
6520 | static void btrfs_i_callback(struct rcu_head *head) | ||
6521 | { | ||
6522 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
6523 | INIT_LIST_HEAD(&inode->i_dentry); | ||
6524 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); | ||
6525 | } | ||
6526 | |||
6325 | void btrfs_destroy_inode(struct inode *inode) | 6527 | void btrfs_destroy_inode(struct inode *inode) |
6326 | { | 6528 | { |
6327 | struct btrfs_ordered_extent *ordered; | 6529 | struct btrfs_ordered_extent *ordered; |
@@ -6391,7 +6593,7 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6391 | inode_tree_del(inode); | 6593 | inode_tree_del(inode); |
6392 | btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); | 6594 | btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); |
6393 | free: | 6595 | free: |
6394 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); | 6596 | call_rcu(&inode->i_rcu, btrfs_i_callback); |
6395 | } | 6597 | } |
6396 | 6598 | ||
6397 | int btrfs_drop_inode(struct inode *inode) | 6599 | int btrfs_drop_inode(struct inode *inode) |
@@ -6607,8 +6809,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6607 | BUG_ON(ret); | 6809 | BUG_ON(ret); |
6608 | 6810 | ||
6609 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { | 6811 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { |
6610 | btrfs_log_new_name(trans, old_inode, old_dir, | 6812 | struct dentry *parent = dget_parent(new_dentry); |
6611 | new_dentry->d_parent); | 6813 | btrfs_log_new_name(trans, old_inode, old_dir, parent); |
6814 | dput(parent); | ||
6612 | btrfs_end_log_trans(root); | 6815 | btrfs_end_log_trans(root); |
6613 | } | 6816 | } |
6614 | out_fail: | 6817 | out_fail: |
@@ -6758,8 +6961,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
6758 | btrfs_set_trans_block_group(trans, dir); | 6961 | btrfs_set_trans_block_group(trans, dir); |
6759 | 6962 | ||
6760 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 6963 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
6761 | dentry->d_name.len, | 6964 | dentry->d_name.len, dir->i_ino, objectid, |
6762 | dentry->d_parent->d_inode->i_ino, objectid, | ||
6763 | BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, | 6965 | BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, |
6764 | &index); | 6966 | &index); |
6765 | err = PTR_ERR(inode); | 6967 | err = PTR_ERR(inode); |
@@ -6773,7 +6975,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
6773 | } | 6975 | } |
6774 | 6976 | ||
6775 | btrfs_set_trans_block_group(trans, inode); | 6977 | btrfs_set_trans_block_group(trans, inode); |
6776 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | 6978 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
6777 | if (err) | 6979 | if (err) |
6778 | drop_inode = 1; | 6980 | drop_inode = 1; |
6779 | else { | 6981 | else { |
@@ -6844,6 +7046,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
6844 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7046 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6845 | struct btrfs_key ins; | 7047 | struct btrfs_key ins; |
6846 | u64 cur_offset = start; | 7048 | u64 cur_offset = start; |
7049 | u64 i_size; | ||
6847 | int ret = 0; | 7050 | int ret = 0; |
6848 | bool own_trans = true; | 7051 | bool own_trans = true; |
6849 | 7052 | ||
@@ -6885,11 +7088,11 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
6885 | (actual_len > inode->i_size) && | 7088 | (actual_len > inode->i_size) && |
6886 | (cur_offset > inode->i_size)) { | 7089 | (cur_offset > inode->i_size)) { |
6887 | if (cur_offset > actual_len) | 7090 | if (cur_offset > actual_len) |
6888 | i_size_write(inode, actual_len); | 7091 | i_size = actual_len; |
6889 | else | 7092 | else |
6890 | i_size_write(inode, cur_offset); | 7093 | i_size = cur_offset; |
6891 | i_size_write(inode, cur_offset); | 7094 | i_size_write(inode, i_size); |
6892 | btrfs_ordered_update_i_size(inode, cur_offset, NULL); | 7095 | btrfs_ordered_update_i_size(inode, i_size, NULL); |
6893 | } | 7096 | } |
6894 | 7097 | ||
6895 | ret = btrfs_update_inode(trans, root, inode); | 7098 | ret = btrfs_update_inode(trans, root, inode); |
@@ -6919,118 +7122,20 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, | |||
6919 | min_size, actual_len, alloc_hint, trans); | 7122 | min_size, actual_len, alloc_hint, trans); |
6920 | } | 7123 | } |
6921 | 7124 | ||
6922 | static long btrfs_fallocate(struct inode *inode, int mode, | ||
6923 | loff_t offset, loff_t len) | ||
6924 | { | ||
6925 | struct extent_state *cached_state = NULL; | ||
6926 | u64 cur_offset; | ||
6927 | u64 last_byte; | ||
6928 | u64 alloc_start; | ||
6929 | u64 alloc_end; | ||
6930 | u64 alloc_hint = 0; | ||
6931 | u64 locked_end; | ||
6932 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
6933 | struct extent_map *em; | ||
6934 | int ret; | ||
6935 | |||
6936 | alloc_start = offset & ~mask; | ||
6937 | alloc_end = (offset + len + mask) & ~mask; | ||
6938 | |||
6939 | /* | ||
6940 | * wait for ordered IO before we have any locks. We'll loop again | ||
6941 | * below with the locks held. | ||
6942 | */ | ||
6943 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
6944 | |||
6945 | mutex_lock(&inode->i_mutex); | ||
6946 | if (alloc_start > inode->i_size) { | ||
6947 | ret = btrfs_cont_expand(inode, alloc_start); | ||
6948 | if (ret) | ||
6949 | goto out; | ||
6950 | } | ||
6951 | |||
6952 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | ||
6953 | if (ret) | ||
6954 | goto out; | ||
6955 | |||
6956 | locked_end = alloc_end - 1; | ||
6957 | while (1) { | ||
6958 | struct btrfs_ordered_extent *ordered; | ||
6959 | |||
6960 | /* the extent lock is ordered inside the running | ||
6961 | * transaction | ||
6962 | */ | ||
6963 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, | ||
6964 | locked_end, 0, &cached_state, GFP_NOFS); | ||
6965 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
6966 | alloc_end - 1); | ||
6967 | if (ordered && | ||
6968 | ordered->file_offset + ordered->len > alloc_start && | ||
6969 | ordered->file_offset < alloc_end) { | ||
6970 | btrfs_put_ordered_extent(ordered); | ||
6971 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
6972 | alloc_start, locked_end, | ||
6973 | &cached_state, GFP_NOFS); | ||
6974 | /* | ||
6975 | * we can't wait on the range with the transaction | ||
6976 | * running or with the extent lock held | ||
6977 | */ | ||
6978 | btrfs_wait_ordered_range(inode, alloc_start, | ||
6979 | alloc_end - alloc_start); | ||
6980 | } else { | ||
6981 | if (ordered) | ||
6982 | btrfs_put_ordered_extent(ordered); | ||
6983 | break; | ||
6984 | } | ||
6985 | } | ||
6986 | |||
6987 | cur_offset = alloc_start; | ||
6988 | while (1) { | ||
6989 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | ||
6990 | alloc_end - cur_offset, 0); | ||
6991 | BUG_ON(IS_ERR(em) || !em); | ||
6992 | last_byte = min(extent_map_end(em), alloc_end); | ||
6993 | last_byte = (last_byte + mask) & ~mask; | ||
6994 | if (em->block_start == EXTENT_MAP_HOLE || | ||
6995 | (cur_offset >= inode->i_size && | ||
6996 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
6997 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | ||
6998 | last_byte - cur_offset, | ||
6999 | 1 << inode->i_blkbits, | ||
7000 | offset + len, | ||
7001 | &alloc_hint); | ||
7002 | if (ret < 0) { | ||
7003 | free_extent_map(em); | ||
7004 | break; | ||
7005 | } | ||
7006 | } | ||
7007 | free_extent_map(em); | ||
7008 | |||
7009 | cur_offset = last_byte; | ||
7010 | if (cur_offset >= alloc_end) { | ||
7011 | ret = 0; | ||
7012 | break; | ||
7013 | } | ||
7014 | } | ||
7015 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
7016 | &cached_state, GFP_NOFS); | ||
7017 | |||
7018 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | ||
7019 | out: | ||
7020 | mutex_unlock(&inode->i_mutex); | ||
7021 | return ret; | ||
7022 | } | ||
7023 | |||
7024 | static int btrfs_set_page_dirty(struct page *page) | 7125 | static int btrfs_set_page_dirty(struct page *page) |
7025 | { | 7126 | { |
7026 | return __set_page_dirty_nobuffers(page); | 7127 | return __set_page_dirty_nobuffers(page); |
7027 | } | 7128 | } |
7028 | 7129 | ||
7029 | static int btrfs_permission(struct inode *inode, int mask) | 7130 | static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) |
7030 | { | 7131 | { |
7132 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
7133 | |||
7134 | if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) | ||
7135 | return -EROFS; | ||
7031 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) | 7136 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) |
7032 | return -EACCES; | 7137 | return -EACCES; |
7033 | return generic_permission(inode, mask, btrfs_check_acl); | 7138 | return generic_permission(inode, mask, flags, btrfs_check_acl); |
7034 | } | 7139 | } |
7035 | 7140 | ||
7036 | static const struct inode_operations btrfs_dir_inode_operations = { | 7141 | static const struct inode_operations btrfs_dir_inode_operations = { |
@@ -7123,7 +7228,6 @@ static const struct inode_operations btrfs_file_inode_operations = { | |||
7123 | .listxattr = btrfs_listxattr, | 7228 | .listxattr = btrfs_listxattr, |
7124 | .removexattr = btrfs_removexattr, | 7229 | .removexattr = btrfs_removexattr, |
7125 | .permission = btrfs_permission, | 7230 | .permission = btrfs_permission, |
7126 | .fallocate = btrfs_fallocate, | ||
7127 | .fiemap = btrfs_fiemap, | 7231 | .fiemap = btrfs_fiemap, |
7128 | }; | 7232 | }; |
7129 | static const struct inode_operations btrfs_special_inode_operations = { | 7233 | static const struct inode_operations btrfs_special_inode_operations = { |
@@ -7139,6 +7243,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { | |||
7139 | .readlink = generic_readlink, | 7243 | .readlink = generic_readlink, |
7140 | .follow_link = page_follow_link_light, | 7244 | .follow_link = page_follow_link_light, |
7141 | .put_link = page_put_link, | 7245 | .put_link = page_put_link, |
7246 | .getattr = btrfs_getattr, | ||
7142 | .permission = btrfs_permission, | 7247 | .permission = btrfs_permission, |
7143 | .setxattr = btrfs_setxattr, | 7248 | .setxattr = btrfs_setxattr, |
7144 | .getxattr = btrfs_getxattr, | 7249 | .getxattr = btrfs_getxattr, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 463d91b4dd3a..a506a22b522a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -147,6 +147,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
147 | unsigned int flags, oldflags; | 147 | unsigned int flags, oldflags; |
148 | int ret; | 148 | int ret; |
149 | 149 | ||
150 | if (btrfs_root_readonly(root)) | ||
151 | return -EROFS; | ||
152 | |||
150 | if (copy_from_user(&flags, arg, sizeof(flags))) | 153 | if (copy_from_user(&flags, arg, sizeof(flags))) |
151 | return -EFAULT; | 154 | return -EFAULT; |
152 | 155 | ||
@@ -233,7 +236,8 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
233 | struct btrfs_inode_item *inode_item; | 236 | struct btrfs_inode_item *inode_item; |
234 | struct extent_buffer *leaf; | 237 | struct extent_buffer *leaf; |
235 | struct btrfs_root *new_root; | 238 | struct btrfs_root *new_root; |
236 | struct inode *dir = dentry->d_parent->d_inode; | 239 | struct dentry *parent = dget_parent(dentry); |
240 | struct inode *dir; | ||
237 | int ret; | 241 | int ret; |
238 | int err; | 242 | int err; |
239 | u64 objectid; | 243 | u64 objectid; |
@@ -242,8 +246,13 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
242 | 246 | ||
243 | ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, | 247 | ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, |
244 | 0, &objectid); | 248 | 0, &objectid); |
245 | if (ret) | 249 | if (ret) { |
250 | dput(parent); | ||
246 | return ret; | 251 | return ret; |
252 | } | ||
253 | |||
254 | dir = parent->d_inode; | ||
255 | |||
247 | /* | 256 | /* |
248 | * 1 - inode item | 257 | * 1 - inode item |
249 | * 2 - refs | 258 | * 2 - refs |
@@ -251,8 +260,10 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
251 | * 2 - dir items | 260 | * 2 - dir items |
252 | */ | 261 | */ |
253 | trans = btrfs_start_transaction(root, 6); | 262 | trans = btrfs_start_transaction(root, 6); |
254 | if (IS_ERR(trans)) | 263 | if (IS_ERR(trans)) { |
264 | dput(parent); | ||
255 | return PTR_ERR(trans); | 265 | return PTR_ERR(trans); |
266 | } | ||
256 | 267 | ||
257 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 268 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
258 | 0, objectid, NULL, 0, 0, 0); | 269 | 0, objectid, NULL, 0, 0, 0); |
@@ -339,6 +350,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
339 | 350 | ||
340 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); | 351 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); |
341 | fail: | 352 | fail: |
353 | dput(parent); | ||
342 | if (async_transid) { | 354 | if (async_transid) { |
343 | *async_transid = trans->transid; | 355 | *async_transid = trans->transid; |
344 | err = btrfs_commit_transaction_async(trans, root, 1); | 356 | err = btrfs_commit_transaction_async(trans, root, 1); |
@@ -351,9 +363,11 @@ fail: | |||
351 | } | 363 | } |
352 | 364 | ||
353 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | 365 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, |
354 | char *name, int namelen, u64 *async_transid) | 366 | char *name, int namelen, u64 *async_transid, |
367 | bool readonly) | ||
355 | { | 368 | { |
356 | struct inode *inode; | 369 | struct inode *inode; |
370 | struct dentry *parent; | ||
357 | struct btrfs_pending_snapshot *pending_snapshot; | 371 | struct btrfs_pending_snapshot *pending_snapshot; |
358 | struct btrfs_trans_handle *trans; | 372 | struct btrfs_trans_handle *trans; |
359 | int ret; | 373 | int ret; |
@@ -368,6 +382,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
368 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); | 382 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); |
369 | pending_snapshot->dentry = dentry; | 383 | pending_snapshot->dentry = dentry; |
370 | pending_snapshot->root = root; | 384 | pending_snapshot->root = root; |
385 | pending_snapshot->readonly = readonly; | ||
371 | 386 | ||
372 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | 387 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); |
373 | if (IS_ERR(trans)) { | 388 | if (IS_ERR(trans)) { |
@@ -396,7 +411,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
396 | 411 | ||
397 | btrfs_orphan_cleanup(pending_snapshot->snap); | 412 | btrfs_orphan_cleanup(pending_snapshot->snap); |
398 | 413 | ||
399 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | 414 | parent = dget_parent(dentry); |
415 | inode = btrfs_lookup_dentry(parent->d_inode, dentry); | ||
416 | dput(parent); | ||
400 | if (IS_ERR(inode)) { | 417 | if (IS_ERR(inode)) { |
401 | ret = PTR_ERR(inode); | 418 | ret = PTR_ERR(inode); |
402 | goto fail; | 419 | goto fail; |
@@ -497,7 +514,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | |||
497 | static noinline int btrfs_mksubvol(struct path *parent, | 514 | static noinline int btrfs_mksubvol(struct path *parent, |
498 | char *name, int namelen, | 515 | char *name, int namelen, |
499 | struct btrfs_root *snap_src, | 516 | struct btrfs_root *snap_src, |
500 | u64 *async_transid) | 517 | u64 *async_transid, bool readonly) |
501 | { | 518 | { |
502 | struct inode *dir = parent->dentry->d_inode; | 519 | struct inode *dir = parent->dentry->d_inode; |
503 | struct dentry *dentry; | 520 | struct dentry *dentry; |
@@ -529,7 +546,7 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
529 | 546 | ||
530 | if (snap_src) { | 547 | if (snap_src) { |
531 | error = create_snapshot(snap_src, dentry, | 548 | error = create_snapshot(snap_src, dentry, |
532 | name, namelen, async_transid); | 549 | name, namelen, async_transid, readonly); |
533 | } else { | 550 | } else { |
534 | error = create_subvol(BTRFS_I(dir)->root, dentry, | 551 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
535 | name, namelen, async_transid); | 552 | name, namelen, async_transid); |
@@ -626,9 +643,11 @@ static int btrfs_defrag_file(struct file *file, | |||
626 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 643 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
627 | struct btrfs_ordered_extent *ordered; | 644 | struct btrfs_ordered_extent *ordered; |
628 | struct page *page; | 645 | struct page *page; |
646 | struct btrfs_super_block *disk_super; | ||
629 | unsigned long last_index; | 647 | unsigned long last_index; |
630 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; | 648 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; |
631 | unsigned long total_read = 0; | 649 | unsigned long total_read = 0; |
650 | u64 features; | ||
632 | u64 page_start; | 651 | u64 page_start; |
633 | u64 page_end; | 652 | u64 page_end; |
634 | u64 last_len = 0; | 653 | u64 last_len = 0; |
@@ -636,6 +655,14 @@ static int btrfs_defrag_file(struct file *file, | |||
636 | u64 defrag_end = 0; | 655 | u64 defrag_end = 0; |
637 | unsigned long i; | 656 | unsigned long i; |
638 | int ret; | 657 | int ret; |
658 | int compress_type = BTRFS_COMPRESS_ZLIB; | ||
659 | |||
660 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { | ||
661 | if (range->compress_type > BTRFS_COMPRESS_TYPES) | ||
662 | return -EINVAL; | ||
663 | if (range->compress_type) | ||
664 | compress_type = range->compress_type; | ||
665 | } | ||
639 | 666 | ||
640 | if (inode->i_size == 0) | 667 | if (inode->i_size == 0) |
641 | return 0; | 668 | return 0; |
@@ -671,7 +698,7 @@ static int btrfs_defrag_file(struct file *file, | |||
671 | total_read++; | 698 | total_read++; |
672 | mutex_lock(&inode->i_mutex); | 699 | mutex_lock(&inode->i_mutex); |
673 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | 700 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) |
674 | BTRFS_I(inode)->force_compress = 1; | 701 | BTRFS_I(inode)->force_compress = compress_type; |
675 | 702 | ||
676 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 703 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
677 | if (ret) | 704 | if (ret) |
@@ -769,10 +796,17 @@ loop_unlock: | |||
769 | atomic_dec(&root->fs_info->async_submit_draining); | 796 | atomic_dec(&root->fs_info->async_submit_draining); |
770 | 797 | ||
771 | mutex_lock(&inode->i_mutex); | 798 | mutex_lock(&inode->i_mutex); |
772 | BTRFS_I(inode)->force_compress = 0; | 799 | BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; |
773 | mutex_unlock(&inode->i_mutex); | 800 | mutex_unlock(&inode->i_mutex); |
774 | } | 801 | } |
775 | 802 | ||
803 | disk_super = &root->fs_info->super_copy; | ||
804 | features = btrfs_super_incompat_flags(disk_super); | ||
805 | if (range->compress_type == BTRFS_COMPRESS_LZO) { | ||
806 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; | ||
807 | btrfs_set_super_incompat_flags(disk_super, features); | ||
808 | } | ||
809 | |||
776 | return 0; | 810 | return 0; |
777 | 811 | ||
778 | err_reservations: | 812 | err_reservations: |
@@ -889,7 +923,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
889 | char *name, | 923 | char *name, |
890 | unsigned long fd, | 924 | unsigned long fd, |
891 | int subvol, | 925 | int subvol, |
892 | u64 *transid) | 926 | u64 *transid, |
927 | bool readonly) | ||
893 | { | 928 | { |
894 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 929 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; |
895 | struct file *src_file; | 930 | struct file *src_file; |
@@ -907,7 +942,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
907 | 942 | ||
908 | if (subvol) { | 943 | if (subvol) { |
909 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 944 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
910 | NULL, transid); | 945 | NULL, transid, readonly); |
911 | } else { | 946 | } else { |
912 | struct inode *src_inode; | 947 | struct inode *src_inode; |
913 | src_file = fget(fd); | 948 | src_file = fget(fd); |
@@ -926,7 +961,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
926 | } | 961 | } |
927 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 962 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
928 | BTRFS_I(src_inode)->root, | 963 | BTRFS_I(src_inode)->root, |
929 | transid); | 964 | transid, readonly); |
930 | fput(src_file); | 965 | fput(src_file); |
931 | } | 966 | } |
932 | out: | 967 | out: |
@@ -934,49 +969,142 @@ out: | |||
934 | } | 969 | } |
935 | 970 | ||
936 | static noinline int btrfs_ioctl_snap_create(struct file *file, | 971 | static noinline int btrfs_ioctl_snap_create(struct file *file, |
937 | void __user *arg, int subvol, | 972 | void __user *arg, int subvol) |
938 | int async) | ||
939 | { | 973 | { |
940 | struct btrfs_ioctl_vol_args *vol_args = NULL; | 974 | struct btrfs_ioctl_vol_args *vol_args; |
941 | struct btrfs_ioctl_async_vol_args *async_vol_args = NULL; | ||
942 | char *name; | ||
943 | u64 fd; | ||
944 | u64 transid = 0; | ||
945 | int ret; | 975 | int ret; |
946 | 976 | ||
947 | if (async) { | 977 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
948 | async_vol_args = memdup_user(arg, sizeof(*async_vol_args)); | 978 | if (IS_ERR(vol_args)) |
949 | if (IS_ERR(async_vol_args)) | 979 | return PTR_ERR(vol_args); |
950 | return PTR_ERR(async_vol_args); | 980 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
951 | 981 | ||
952 | name = async_vol_args->name; | 982 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, |
953 | fd = async_vol_args->fd; | 983 | vol_args->fd, subvol, |
954 | async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0'; | 984 | NULL, false); |
955 | } else { | ||
956 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
957 | if (IS_ERR(vol_args)) | ||
958 | return PTR_ERR(vol_args); | ||
959 | name = vol_args->name; | ||
960 | fd = vol_args->fd; | ||
961 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
962 | } | ||
963 | 985 | ||
964 | ret = btrfs_ioctl_snap_create_transid(file, name, fd, | 986 | kfree(vol_args); |
965 | subvol, &transid); | 987 | return ret; |
988 | } | ||
966 | 989 | ||
967 | if (!ret && async) { | 990 | static noinline int btrfs_ioctl_snap_create_v2(struct file *file, |
968 | if (copy_to_user(arg + | 991 | void __user *arg, int subvol) |
969 | offsetof(struct btrfs_ioctl_async_vol_args, | 992 | { |
970 | transid), &transid, sizeof(transid))) | 993 | struct btrfs_ioctl_vol_args_v2 *vol_args; |
971 | return -EFAULT; | 994 | int ret; |
995 | u64 transid = 0; | ||
996 | u64 *ptr = NULL; | ||
997 | bool readonly = false; | ||
998 | |||
999 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
1000 | if (IS_ERR(vol_args)) | ||
1001 | return PTR_ERR(vol_args); | ||
1002 | vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | ||
1003 | |||
1004 | if (vol_args->flags & | ||
1005 | ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { | ||
1006 | ret = -EOPNOTSUPP; | ||
1007 | goto out; | ||
972 | } | 1008 | } |
973 | 1009 | ||
1010 | if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) | ||
1011 | ptr = &transid; | ||
1012 | if (vol_args->flags & BTRFS_SUBVOL_RDONLY) | ||
1013 | readonly = true; | ||
1014 | |||
1015 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | ||
1016 | vol_args->fd, subvol, | ||
1017 | ptr, readonly); | ||
1018 | |||
1019 | if (ret == 0 && ptr && | ||
1020 | copy_to_user(arg + | ||
1021 | offsetof(struct btrfs_ioctl_vol_args_v2, | ||
1022 | transid), ptr, sizeof(*ptr))) | ||
1023 | ret = -EFAULT; | ||
1024 | out: | ||
974 | kfree(vol_args); | 1025 | kfree(vol_args); |
975 | kfree(async_vol_args); | 1026 | return ret; |
1027 | } | ||
1028 | |||
1029 | static noinline int btrfs_ioctl_subvol_getflags(struct file *file, | ||
1030 | void __user *arg) | ||
1031 | { | ||
1032 | struct inode *inode = fdentry(file)->d_inode; | ||
1033 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1034 | int ret = 0; | ||
1035 | u64 flags = 0; | ||
1036 | |||
1037 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
1038 | return -EINVAL; | ||
1039 | |||
1040 | down_read(&root->fs_info->subvol_sem); | ||
1041 | if (btrfs_root_readonly(root)) | ||
1042 | flags |= BTRFS_SUBVOL_RDONLY; | ||
1043 | up_read(&root->fs_info->subvol_sem); | ||
1044 | |||
1045 | if (copy_to_user(arg, &flags, sizeof(flags))) | ||
1046 | ret = -EFAULT; | ||
976 | 1047 | ||
977 | return ret; | 1048 | return ret; |
978 | } | 1049 | } |
979 | 1050 | ||
1051 | static noinline int btrfs_ioctl_subvol_setflags(struct file *file, | ||
1052 | void __user *arg) | ||
1053 | { | ||
1054 | struct inode *inode = fdentry(file)->d_inode; | ||
1055 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1056 | struct btrfs_trans_handle *trans; | ||
1057 | u64 root_flags; | ||
1058 | u64 flags; | ||
1059 | int ret = 0; | ||
1060 | |||
1061 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
1062 | return -EROFS; | ||
1063 | |||
1064 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
1065 | return -EINVAL; | ||
1066 | |||
1067 | if (copy_from_user(&flags, arg, sizeof(flags))) | ||
1068 | return -EFAULT; | ||
1069 | |||
1070 | if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) | ||
1071 | return -EINVAL; | ||
1072 | |||
1073 | if (flags & ~BTRFS_SUBVOL_RDONLY) | ||
1074 | return -EOPNOTSUPP; | ||
1075 | |||
1076 | down_write(&root->fs_info->subvol_sem); | ||
1077 | |||
1078 | /* nothing to do */ | ||
1079 | if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) | ||
1080 | goto out; | ||
1081 | |||
1082 | root_flags = btrfs_root_flags(&root->root_item); | ||
1083 | if (flags & BTRFS_SUBVOL_RDONLY) | ||
1084 | btrfs_set_root_flags(&root->root_item, | ||
1085 | root_flags | BTRFS_ROOT_SUBVOL_RDONLY); | ||
1086 | else | ||
1087 | btrfs_set_root_flags(&root->root_item, | ||
1088 | root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); | ||
1089 | |||
1090 | trans = btrfs_start_transaction(root, 1); | ||
1091 | if (IS_ERR(trans)) { | ||
1092 | ret = PTR_ERR(trans); | ||
1093 | goto out_reset; | ||
1094 | } | ||
1095 | |||
1096 | ret = btrfs_update_root(trans, root, | ||
1097 | &root->root_key, &root->root_item); | ||
1098 | |||
1099 | btrfs_commit_transaction(trans, root); | ||
1100 | out_reset: | ||
1101 | if (ret) | ||
1102 | btrfs_set_root_flags(&root->root_item, root_flags); | ||
1103 | out: | ||
1104 | up_write(&root->fs_info->subvol_sem); | ||
1105 | return ret; | ||
1106 | } | ||
1107 | |||
980 | /* | 1108 | /* |
981 | * helper to check if the subvolume references other subvolumes | 1109 | * helper to check if the subvolume references other subvolumes |
982 | */ | 1110 | */ |
@@ -1485,6 +1613,9 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
1485 | struct btrfs_ioctl_defrag_range_args *range; | 1613 | struct btrfs_ioctl_defrag_range_args *range; |
1486 | int ret; | 1614 | int ret; |
1487 | 1615 | ||
1616 | if (btrfs_root_readonly(root)) | ||
1617 | return -EROFS; | ||
1618 | |||
1488 | ret = mnt_want_write(file->f_path.mnt); | 1619 | ret = mnt_want_write(file->f_path.mnt); |
1489 | if (ret) | 1620 | if (ret) |
1490 | return ret; | 1621 | return ret; |
@@ -1613,6 +1744,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1613 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) | 1744 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) |
1614 | return -EINVAL; | 1745 | return -EINVAL; |
1615 | 1746 | ||
1747 | if (btrfs_root_readonly(root)) | ||
1748 | return -EROFS; | ||
1749 | |||
1616 | ret = mnt_want_write(file->f_path.mnt); | 1750 | ret = mnt_want_write(file->f_path.mnt); |
1617 | if (ret) | 1751 | if (ret) |
1618 | return ret; | 1752 | return ret; |
@@ -1669,12 +1803,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1669 | olen = len = src->i_size - off; | 1803 | olen = len = src->i_size - off; |
1670 | /* if we extend to eof, continue to block boundary */ | 1804 | /* if we extend to eof, continue to block boundary */ |
1671 | if (off + len == src->i_size) | 1805 | if (off + len == src->i_size) |
1672 | len = ((src->i_size + bs-1) & ~(bs-1)) | 1806 | len = ALIGN(src->i_size, bs) - off; |
1673 | - off; | ||
1674 | 1807 | ||
1675 | /* verify the end result is block aligned */ | 1808 | /* verify the end result is block aligned */ |
1676 | if ((off & (bs-1)) || | 1809 | if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || |
1677 | ((off + len) & (bs-1))) | 1810 | !IS_ALIGNED(destoff, bs)) |
1678 | goto out_unlock; | 1811 | goto out_unlock; |
1679 | 1812 | ||
1680 | /* do any pending delalloc/csum calc on src, one way or | 1813 | /* do any pending delalloc/csum calc on src, one way or |
@@ -1874,8 +2007,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1874 | * but shouldn't round up the file size | 2007 | * but shouldn't round up the file size |
1875 | */ | 2008 | */ |
1876 | endoff = new_key.offset + datal; | 2009 | endoff = new_key.offset + datal; |
1877 | if (endoff > off+olen) | 2010 | if (endoff > destoff+olen) |
1878 | endoff = off+olen; | 2011 | endoff = destoff+olen; |
1879 | if (endoff > inode->i_size) | 2012 | if (endoff > inode->i_size) |
1880 | btrfs_i_size_write(inode, endoff); | 2013 | btrfs_i_size_write(inode, endoff); |
1881 | 2014 | ||
@@ -1935,6 +2068,10 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
1935 | if (file->private_data) | 2068 | if (file->private_data) |
1936 | goto out; | 2069 | goto out; |
1937 | 2070 | ||
2071 | ret = -EROFS; | ||
2072 | if (btrfs_root_readonly(root)) | ||
2073 | goto out; | ||
2074 | |||
1938 | ret = mnt_want_write(file->f_path.mnt); | 2075 | ret = mnt_want_write(file->f_path.mnt); |
1939 | if (ret) | 2076 | if (ret) |
1940 | goto out; | 2077 | goto out; |
@@ -2234,13 +2371,17 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
2234 | case FS_IOC_GETVERSION: | 2371 | case FS_IOC_GETVERSION: |
2235 | return btrfs_ioctl_getversion(file, argp); | 2372 | return btrfs_ioctl_getversion(file, argp); |
2236 | case BTRFS_IOC_SNAP_CREATE: | 2373 | case BTRFS_IOC_SNAP_CREATE: |
2237 | return btrfs_ioctl_snap_create(file, argp, 0, 0); | 2374 | return btrfs_ioctl_snap_create(file, argp, 0); |
2238 | case BTRFS_IOC_SNAP_CREATE_ASYNC: | 2375 | case BTRFS_IOC_SNAP_CREATE_V2: |
2239 | return btrfs_ioctl_snap_create(file, argp, 0, 1); | 2376 | return btrfs_ioctl_snap_create_v2(file, argp, 0); |
2240 | case BTRFS_IOC_SUBVOL_CREATE: | 2377 | case BTRFS_IOC_SUBVOL_CREATE: |
2241 | return btrfs_ioctl_snap_create(file, argp, 1, 0); | 2378 | return btrfs_ioctl_snap_create(file, argp, 1); |
2242 | case BTRFS_IOC_SNAP_DESTROY: | 2379 | case BTRFS_IOC_SNAP_DESTROY: |
2243 | return btrfs_ioctl_snap_destroy(file, argp); | 2380 | return btrfs_ioctl_snap_destroy(file, argp); |
2381 | case BTRFS_IOC_SUBVOL_GETFLAGS: | ||
2382 | return btrfs_ioctl_subvol_getflags(file, argp); | ||
2383 | case BTRFS_IOC_SUBVOL_SETFLAGS: | ||
2384 | return btrfs_ioctl_subvol_setflags(file, argp); | ||
2244 | case BTRFS_IOC_DEFAULT_SUBVOL: | 2385 | case BTRFS_IOC_DEFAULT_SUBVOL: |
2245 | return btrfs_ioctl_default_subvol(file, argp); | 2386 | return btrfs_ioctl_default_subvol(file, argp); |
2246 | case BTRFS_IOC_DEFRAG: | 2387 | case BTRFS_IOC_DEFRAG: |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 17c99ebdf960..8fb382167b13 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -30,11 +30,16 @@ struct btrfs_ioctl_vol_args { | |||
30 | char name[BTRFS_PATH_NAME_MAX + 1]; | 30 | char name[BTRFS_PATH_NAME_MAX + 1]; |
31 | }; | 31 | }; |
32 | 32 | ||
33 | #define BTRFS_SNAPSHOT_NAME_MAX 4079 | 33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) |
34 | struct btrfs_ioctl_async_vol_args { | 34 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) |
35 | |||
36 | #define BTRFS_SUBVOL_NAME_MAX 4039 | ||
37 | struct btrfs_ioctl_vol_args_v2 { | ||
35 | __s64 fd; | 38 | __s64 fd; |
36 | __u64 transid; | 39 | __u64 transid; |
37 | char name[BTRFS_SNAPSHOT_NAME_MAX + 1]; | 40 | __u64 flags; |
41 | __u64 unused[4]; | ||
42 | char name[BTRFS_SUBVOL_NAME_MAX + 1]; | ||
38 | }; | 43 | }; |
39 | 44 | ||
40 | #define BTRFS_INO_LOOKUP_PATH_MAX 4080 | 45 | #define BTRFS_INO_LOOKUP_PATH_MAX 4080 |
@@ -129,8 +134,15 @@ struct btrfs_ioctl_defrag_range_args { | |||
129 | */ | 134 | */ |
130 | __u32 extent_thresh; | 135 | __u32 extent_thresh; |
131 | 136 | ||
137 | /* | ||
138 | * which compression method to use if turning on compression | ||
139 | * for this defrag operation. If unspecified, zlib will | ||
140 | * be used | ||
141 | */ | ||
142 | __u32 compress_type; | ||
143 | |||
132 | /* spare for later */ | 144 | /* spare for later */ |
133 | __u32 unused[5]; | 145 | __u32 unused[4]; |
134 | }; | 146 | }; |
135 | 147 | ||
136 | struct btrfs_ioctl_space_info { | 148 | struct btrfs_ioctl_space_info { |
@@ -187,6 +199,8 @@ struct btrfs_ioctl_space_args { | |||
187 | struct btrfs_ioctl_space_args) | 199 | struct btrfs_ioctl_space_args) |
188 | #define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) | 200 | #define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) |
189 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) | 201 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) |
190 | #define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \ | 202 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ |
191 | struct btrfs_ioctl_async_vol_args) | 203 | struct btrfs_ioctl_vol_args_v2) |
204 | #define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) | ||
205 | #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) | ||
192 | #endif | 206 | #endif |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c new file mode 100644 index 000000000000..cc9b450399df --- /dev/null +++ b/fs/btrfs/lzo.c | |||
@@ -0,0 +1,420 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include <linux/vmalloc.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/err.h> | ||
24 | #include <linux/sched.h> | ||
25 | #include <linux/pagemap.h> | ||
26 | #include <linux/bio.h> | ||
27 | #include <linux/lzo.h> | ||
28 | #include "compression.h" | ||
29 | |||
30 | #define LZO_LEN 4 | ||
31 | |||
32 | struct workspace { | ||
33 | void *mem; | ||
34 | void *buf; /* where compressed data goes */ | ||
35 | void *cbuf; /* where decompressed data goes */ | ||
36 | struct list_head list; | ||
37 | }; | ||
38 | |||
39 | static void lzo_free_workspace(struct list_head *ws) | ||
40 | { | ||
41 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
42 | |||
43 | vfree(workspace->buf); | ||
44 | vfree(workspace->cbuf); | ||
45 | vfree(workspace->mem); | ||
46 | kfree(workspace); | ||
47 | } | ||
48 | |||
49 | static struct list_head *lzo_alloc_workspace(void) | ||
50 | { | ||
51 | struct workspace *workspace; | ||
52 | |||
53 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | ||
54 | if (!workspace) | ||
55 | return ERR_PTR(-ENOMEM); | ||
56 | |||
57 | workspace->mem = vmalloc(LZO1X_MEM_COMPRESS); | ||
58 | workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); | ||
59 | workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); | ||
60 | if (!workspace->mem || !workspace->buf || !workspace->cbuf) | ||
61 | goto fail; | ||
62 | |||
63 | INIT_LIST_HEAD(&workspace->list); | ||
64 | |||
65 | return &workspace->list; | ||
66 | fail: | ||
67 | lzo_free_workspace(&workspace->list); | ||
68 | return ERR_PTR(-ENOMEM); | ||
69 | } | ||
70 | |||
71 | static inline void write_compress_length(char *buf, size_t len) | ||
72 | { | ||
73 | __le32 dlen; | ||
74 | |||
75 | dlen = cpu_to_le32(len); | ||
76 | memcpy(buf, &dlen, LZO_LEN); | ||
77 | } | ||
78 | |||
79 | static inline size_t read_compress_length(char *buf) | ||
80 | { | ||
81 | __le32 dlen; | ||
82 | |||
83 | memcpy(&dlen, buf, LZO_LEN); | ||
84 | return le32_to_cpu(dlen); | ||
85 | } | ||
86 | |||
87 | static int lzo_compress_pages(struct list_head *ws, | ||
88 | struct address_space *mapping, | ||
89 | u64 start, unsigned long len, | ||
90 | struct page **pages, | ||
91 | unsigned long nr_dest_pages, | ||
92 | unsigned long *out_pages, | ||
93 | unsigned long *total_in, | ||
94 | unsigned long *total_out, | ||
95 | unsigned long max_out) | ||
96 | { | ||
97 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
98 | int ret = 0; | ||
99 | char *data_in; | ||
100 | char *cpage_out; | ||
101 | int nr_pages = 0; | ||
102 | struct page *in_page = NULL; | ||
103 | struct page *out_page = NULL; | ||
104 | unsigned long bytes_left; | ||
105 | |||
106 | size_t in_len; | ||
107 | size_t out_len; | ||
108 | char *buf; | ||
109 | unsigned long tot_in = 0; | ||
110 | unsigned long tot_out = 0; | ||
111 | unsigned long pg_bytes_left; | ||
112 | unsigned long out_offset; | ||
113 | unsigned long bytes; | ||
114 | |||
115 | *out_pages = 0; | ||
116 | *total_out = 0; | ||
117 | *total_in = 0; | ||
118 | |||
119 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
120 | data_in = kmap(in_page); | ||
121 | |||
122 | /* | ||
123 | * store the size of all chunks of compressed data in | ||
124 | * the first 4 bytes | ||
125 | */ | ||
126 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
127 | if (out_page == NULL) { | ||
128 | ret = -ENOMEM; | ||
129 | goto out; | ||
130 | } | ||
131 | cpage_out = kmap(out_page); | ||
132 | out_offset = LZO_LEN; | ||
133 | tot_out = LZO_LEN; | ||
134 | pages[0] = out_page; | ||
135 | nr_pages = 1; | ||
136 | pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; | ||
137 | |||
138 | /* compress at most one page of data each time */ | ||
139 | in_len = min(len, PAGE_CACHE_SIZE); | ||
140 | while (tot_in < len) { | ||
141 | ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, | ||
142 | &out_len, workspace->mem); | ||
143 | if (ret != LZO_E_OK) { | ||
144 | printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", | ||
145 | ret); | ||
146 | ret = -1; | ||
147 | goto out; | ||
148 | } | ||
149 | |||
150 | /* store the size of this chunk of compressed data */ | ||
151 | write_compress_length(cpage_out + out_offset, out_len); | ||
152 | tot_out += LZO_LEN; | ||
153 | out_offset += LZO_LEN; | ||
154 | pg_bytes_left -= LZO_LEN; | ||
155 | |||
156 | tot_in += in_len; | ||
157 | tot_out += out_len; | ||
158 | |||
159 | /* copy bytes from the working buffer into the pages */ | ||
160 | buf = workspace->cbuf; | ||
161 | while (out_len) { | ||
162 | bytes = min_t(unsigned long, pg_bytes_left, out_len); | ||
163 | |||
164 | memcpy(cpage_out + out_offset, buf, bytes); | ||
165 | |||
166 | out_len -= bytes; | ||
167 | pg_bytes_left -= bytes; | ||
168 | buf += bytes; | ||
169 | out_offset += bytes; | ||
170 | |||
171 | /* | ||
172 | * we need another page for writing out. | ||
173 | * | ||
174 | * Note if there's less than 4 bytes left, we just | ||
175 | * skip to a new page. | ||
176 | */ | ||
177 | if ((out_len == 0 && pg_bytes_left < LZO_LEN) || | ||
178 | pg_bytes_left == 0) { | ||
179 | if (pg_bytes_left) { | ||
180 | memset(cpage_out + out_offset, 0, | ||
181 | pg_bytes_left); | ||
182 | tot_out += pg_bytes_left; | ||
183 | } | ||
184 | |||
185 | /* we're done, don't allocate new page */ | ||
186 | if (out_len == 0 && tot_in >= len) | ||
187 | break; | ||
188 | |||
189 | kunmap(out_page); | ||
190 | if (nr_pages == nr_dest_pages) { | ||
191 | out_page = NULL; | ||
192 | ret = -1; | ||
193 | goto out; | ||
194 | } | ||
195 | |||
196 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
197 | if (out_page == NULL) { | ||
198 | ret = -ENOMEM; | ||
199 | goto out; | ||
200 | } | ||
201 | cpage_out = kmap(out_page); | ||
202 | pages[nr_pages++] = out_page; | ||
203 | |||
204 | pg_bytes_left = PAGE_CACHE_SIZE; | ||
205 | out_offset = 0; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | /* we're making it bigger, give up */ | ||
210 | if (tot_in > 8192 && tot_in < tot_out) | ||
211 | goto out; | ||
212 | |||
213 | /* we're all done */ | ||
214 | if (tot_in >= len) | ||
215 | break; | ||
216 | |||
217 | if (tot_out > max_out) | ||
218 | break; | ||
219 | |||
220 | bytes_left = len - tot_in; | ||
221 | kunmap(in_page); | ||
222 | page_cache_release(in_page); | ||
223 | |||
224 | start += PAGE_CACHE_SIZE; | ||
225 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
226 | data_in = kmap(in_page); | ||
227 | in_len = min(bytes_left, PAGE_CACHE_SIZE); | ||
228 | } | ||
229 | |||
230 | if (tot_out > tot_in) | ||
231 | goto out; | ||
232 | |||
233 | /* store the size of all chunks of compressed data */ | ||
234 | cpage_out = kmap(pages[0]); | ||
235 | write_compress_length(cpage_out, tot_out); | ||
236 | |||
237 | kunmap(pages[0]); | ||
238 | |||
239 | ret = 0; | ||
240 | *total_out = tot_out; | ||
241 | *total_in = tot_in; | ||
242 | out: | ||
243 | *out_pages = nr_pages; | ||
244 | if (out_page) | ||
245 | kunmap(out_page); | ||
246 | |||
247 | if (in_page) { | ||
248 | kunmap(in_page); | ||
249 | page_cache_release(in_page); | ||
250 | } | ||
251 | |||
252 | return ret; | ||
253 | } | ||
254 | |||
255 | static int lzo_decompress_biovec(struct list_head *ws, | ||
256 | struct page **pages_in, | ||
257 | u64 disk_start, | ||
258 | struct bio_vec *bvec, | ||
259 | int vcnt, | ||
260 | size_t srclen) | ||
261 | { | ||
262 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
263 | int ret = 0, ret2; | ||
264 | char *data_in; | ||
265 | unsigned long page_in_index = 0; | ||
266 | unsigned long page_out_index = 0; | ||
267 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | ||
268 | PAGE_CACHE_SIZE; | ||
269 | unsigned long buf_start; | ||
270 | unsigned long buf_offset = 0; | ||
271 | unsigned long bytes; | ||
272 | unsigned long working_bytes; | ||
273 | unsigned long pg_offset; | ||
274 | |||
275 | size_t in_len; | ||
276 | size_t out_len; | ||
277 | unsigned long in_offset; | ||
278 | unsigned long in_page_bytes_left; | ||
279 | unsigned long tot_in; | ||
280 | unsigned long tot_out; | ||
281 | unsigned long tot_len; | ||
282 | char *buf; | ||
283 | |||
284 | data_in = kmap(pages_in[0]); | ||
285 | tot_len = read_compress_length(data_in); | ||
286 | |||
287 | tot_in = LZO_LEN; | ||
288 | in_offset = LZO_LEN; | ||
289 | tot_len = min_t(size_t, srclen, tot_len); | ||
290 | in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; | ||
291 | |||
292 | tot_out = 0; | ||
293 | pg_offset = 0; | ||
294 | |||
295 | while (tot_in < tot_len) { | ||
296 | in_len = read_compress_length(data_in + in_offset); | ||
297 | in_page_bytes_left -= LZO_LEN; | ||
298 | in_offset += LZO_LEN; | ||
299 | tot_in += LZO_LEN; | ||
300 | |||
301 | tot_in += in_len; | ||
302 | working_bytes = in_len; | ||
303 | |||
304 | /* fast path: avoid using the working buffer */ | ||
305 | if (in_page_bytes_left >= in_len) { | ||
306 | buf = data_in + in_offset; | ||
307 | bytes = in_len; | ||
308 | goto cont; | ||
309 | } | ||
310 | |||
311 | /* copy bytes from the pages into the working buffer */ | ||
312 | buf = workspace->cbuf; | ||
313 | buf_offset = 0; | ||
314 | while (working_bytes) { | ||
315 | bytes = min(working_bytes, in_page_bytes_left); | ||
316 | |||
317 | memcpy(buf + buf_offset, data_in + in_offset, bytes); | ||
318 | buf_offset += bytes; | ||
319 | cont: | ||
320 | working_bytes -= bytes; | ||
321 | in_page_bytes_left -= bytes; | ||
322 | in_offset += bytes; | ||
323 | |||
324 | /* check if we need to pick another page */ | ||
325 | if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN) | ||
326 | || in_page_bytes_left == 0) { | ||
327 | tot_in += in_page_bytes_left; | ||
328 | |||
329 | if (working_bytes == 0 && tot_in >= tot_len) | ||
330 | break; | ||
331 | |||
332 | kunmap(pages_in[page_in_index]); | ||
333 | page_in_index++; | ||
334 | if (page_in_index >= total_pages_in) { | ||
335 | ret = -1; | ||
336 | data_in = NULL; | ||
337 | goto done; | ||
338 | } | ||
339 | data_in = kmap(pages_in[page_in_index]); | ||
340 | |||
341 | in_page_bytes_left = PAGE_CACHE_SIZE; | ||
342 | in_offset = 0; | ||
343 | } | ||
344 | } | ||
345 | |||
346 | out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); | ||
347 | ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, | ||
348 | &out_len); | ||
349 | if (ret != LZO_E_OK) { | ||
350 | printk(KERN_WARNING "btrfs decompress failed\n"); | ||
351 | ret = -1; | ||
352 | break; | ||
353 | } | ||
354 | |||
355 | buf_start = tot_out; | ||
356 | tot_out += out_len; | ||
357 | |||
358 | ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, | ||
359 | tot_out, disk_start, | ||
360 | bvec, vcnt, | ||
361 | &page_out_index, &pg_offset); | ||
362 | if (ret2 == 0) | ||
363 | break; | ||
364 | } | ||
365 | done: | ||
366 | if (data_in) | ||
367 | kunmap(pages_in[page_in_index]); | ||
368 | return ret; | ||
369 | } | ||
370 | |||
371 | static int lzo_decompress(struct list_head *ws, unsigned char *data_in, | ||
372 | struct page *dest_page, | ||
373 | unsigned long start_byte, | ||
374 | size_t srclen, size_t destlen) | ||
375 | { | ||
376 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
377 | size_t in_len; | ||
378 | size_t out_len; | ||
379 | size_t tot_len; | ||
380 | int ret = 0; | ||
381 | char *kaddr; | ||
382 | unsigned long bytes; | ||
383 | |||
384 | BUG_ON(srclen < LZO_LEN); | ||
385 | |||
386 | tot_len = read_compress_length(data_in); | ||
387 | data_in += LZO_LEN; | ||
388 | |||
389 | in_len = read_compress_length(data_in); | ||
390 | data_in += LZO_LEN; | ||
391 | |||
392 | out_len = PAGE_CACHE_SIZE; | ||
393 | ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); | ||
394 | if (ret != LZO_E_OK) { | ||
395 | printk(KERN_WARNING "btrfs decompress failed!\n"); | ||
396 | ret = -1; | ||
397 | goto out; | ||
398 | } | ||
399 | |||
400 | if (out_len < start_byte) { | ||
401 | ret = -1; | ||
402 | goto out; | ||
403 | } | ||
404 | |||
405 | bytes = min_t(unsigned long, destlen, out_len - start_byte); | ||
406 | |||
407 | kaddr = kmap_atomic(dest_page, KM_USER0); | ||
408 | memcpy(kaddr, workspace->buf + start_byte, bytes); | ||
409 | kunmap_atomic(kaddr, KM_USER0); | ||
410 | out: | ||
411 | return ret; | ||
412 | } | ||
413 | |||
414 | struct btrfs_compress_op btrfs_lzo_compress = { | ||
415 | .alloc_workspace = lzo_alloc_workspace, | ||
416 | .free_workspace = lzo_free_workspace, | ||
417 | .compress_pages = lzo_compress_pages, | ||
418 | .decompress_biovec = lzo_decompress_biovec, | ||
419 | .decompress = lzo_decompress, | ||
420 | }; | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index f4621f6deca1..2b61e1ddcd99 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -172,7 +172,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
172 | */ | 172 | */ |
173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
174 | u64 start, u64 len, u64 disk_len, | 174 | u64 start, u64 len, u64 disk_len, |
175 | int type, int dio) | 175 | int type, int dio, int compress_type) |
176 | { | 176 | { |
177 | struct btrfs_ordered_inode_tree *tree; | 177 | struct btrfs_ordered_inode_tree *tree; |
178 | struct rb_node *node; | 178 | struct rb_node *node; |
@@ -189,6 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
189 | entry->disk_len = disk_len; | 189 | entry->disk_len = disk_len; |
190 | entry->bytes_left = len; | 190 | entry->bytes_left = len; |
191 | entry->inode = inode; | 191 | entry->inode = inode; |
192 | entry->compress_type = compress_type; | ||
192 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 193 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
193 | set_bit(type, &entry->flags); | 194 | set_bit(type, &entry->flags); |
194 | 195 | ||
@@ -220,14 +221,25 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
220 | u64 start, u64 len, u64 disk_len, int type) | 221 | u64 start, u64 len, u64 disk_len, int type) |
221 | { | 222 | { |
222 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | 223 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
223 | disk_len, type, 0); | 224 | disk_len, type, 0, |
225 | BTRFS_COMPRESS_NONE); | ||
224 | } | 226 | } |
225 | 227 | ||
226 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 228 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
227 | u64 start, u64 len, u64 disk_len, int type) | 229 | u64 start, u64 len, u64 disk_len, int type) |
228 | { | 230 | { |
229 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | 231 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
230 | disk_len, type, 1); | 232 | disk_len, type, 1, |
233 | BTRFS_COMPRESS_NONE); | ||
234 | } | ||
235 | |||
236 | int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, | ||
237 | u64 start, u64 len, u64 disk_len, | ||
238 | int type, int compress_type) | ||
239 | { | ||
240 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
241 | disk_len, type, 0, | ||
242 | compress_type); | ||
231 | } | 243 | } |
232 | 244 | ||
233 | /* | 245 | /* |
@@ -250,6 +262,73 @@ int btrfs_add_ordered_sum(struct inode *inode, | |||
250 | 262 | ||
251 | /* | 263 | /* |
252 | * this is used to account for finished IO across a given range | 264 | * this is used to account for finished IO across a given range |
265 | * of the file. The IO may span ordered extents. If | ||
266 | * a given ordered_extent is completely done, 1 is returned, otherwise | ||
267 | * 0. | ||
268 | * | ||
269 | * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used | ||
270 | * to make sure this function only returns 1 once for a given ordered extent. | ||
271 | * | ||
272 | * file_offset is updated to one byte past the range that is recorded as | ||
273 | * complete. This allows you to walk forward in the file. | ||
274 | */ | ||
275 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, | ||
276 | struct btrfs_ordered_extent **cached, | ||
277 | u64 *file_offset, u64 io_size) | ||
278 | { | ||
279 | struct btrfs_ordered_inode_tree *tree; | ||
280 | struct rb_node *node; | ||
281 | struct btrfs_ordered_extent *entry = NULL; | ||
282 | int ret; | ||
283 | u64 dec_end; | ||
284 | u64 dec_start; | ||
285 | u64 to_dec; | ||
286 | |||
287 | tree = &BTRFS_I(inode)->ordered_tree; | ||
288 | spin_lock(&tree->lock); | ||
289 | node = tree_search(tree, *file_offset); | ||
290 | if (!node) { | ||
291 | ret = 1; | ||
292 | goto out; | ||
293 | } | ||
294 | |||
295 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
296 | if (!offset_in_entry(entry, *file_offset)) { | ||
297 | ret = 1; | ||
298 | goto out; | ||
299 | } | ||
300 | |||
301 | dec_start = max(*file_offset, entry->file_offset); | ||
302 | dec_end = min(*file_offset + io_size, entry->file_offset + | ||
303 | entry->len); | ||
304 | *file_offset = dec_end; | ||
305 | if (dec_start > dec_end) { | ||
306 | printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n", | ||
307 | (unsigned long long)dec_start, | ||
308 | (unsigned long long)dec_end); | ||
309 | } | ||
310 | to_dec = dec_end - dec_start; | ||
311 | if (to_dec > entry->bytes_left) { | ||
312 | printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", | ||
313 | (unsigned long long)entry->bytes_left, | ||
314 | (unsigned long long)to_dec); | ||
315 | } | ||
316 | entry->bytes_left -= to_dec; | ||
317 | if (entry->bytes_left == 0) | ||
318 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | ||
319 | else | ||
320 | ret = 1; | ||
321 | out: | ||
322 | if (!ret && cached && entry) { | ||
323 | *cached = entry; | ||
324 | atomic_inc(&entry->refs); | ||
325 | } | ||
326 | spin_unlock(&tree->lock); | ||
327 | return ret == 0; | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * this is used to account for finished IO across a given range | ||
253 | * of the file. The IO should not span ordered extents. If | 332 | * of the file. The IO should not span ordered extents. If |
254 | * a given ordered_extent is completely done, 1 is returned, otherwise | 333 | * a given ordered_extent is completely done, 1 is returned, otherwise |
255 | * 0. | 334 | * 0. |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 8ac365492a3f..ff1f69aa1883 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -68,7 +68,7 @@ struct btrfs_ordered_sum { | |||
68 | 68 | ||
69 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ | 69 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ |
70 | 70 | ||
71 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ | 71 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */ |
72 | 72 | ||
73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ | 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ |
74 | 74 | ||
@@ -93,6 +93,9 @@ struct btrfs_ordered_extent { | |||
93 | /* flags (described above) */ | 93 | /* flags (described above) */ |
94 | unsigned long flags; | 94 | unsigned long flags; |
95 | 95 | ||
96 | /* compression algorithm */ | ||
97 | int compress_type; | ||
98 | |||
96 | /* reference count */ | 99 | /* reference count */ |
97 | atomic_t refs; | 100 | atomic_t refs; |
98 | 101 | ||
@@ -141,10 +144,16 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
141 | int btrfs_dec_test_ordered_pending(struct inode *inode, | 144 | int btrfs_dec_test_ordered_pending(struct inode *inode, |
142 | struct btrfs_ordered_extent **cached, | 145 | struct btrfs_ordered_extent **cached, |
143 | u64 file_offset, u64 io_size); | 146 | u64 file_offset, u64 io_size); |
147 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, | ||
148 | struct btrfs_ordered_extent **cached, | ||
149 | u64 *file_offset, u64 io_size); | ||
144 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 150 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
145 | u64 start, u64 len, u64 disk_len, int type); | 151 | u64 start, u64 len, u64 disk_len, int type); |
146 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 152 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
147 | u64 start, u64 len, u64 disk_len, int type); | 153 | u64 start, u64 len, u64 disk_len, int type); |
154 | int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, | ||
155 | u64 start, u64 len, u64 disk_len, | ||
156 | int type, int compress_type); | ||
148 | int btrfs_add_ordered_sum(struct inode *inode, | 157 | int btrfs_add_ordered_sum(struct inode *inode, |
149 | struct btrfs_ordered_extent *entry, | 158 | struct btrfs_ordered_extent *entry, |
150 | struct btrfs_ordered_sum *sum); | 159 | struct btrfs_ordered_sum *sum); |
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c index 79cba5fbc28e..f8be250963a0 100644 --- a/fs/btrfs/orphan.c +++ b/fs/btrfs/orphan.c | |||
@@ -56,8 +56,12 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, | |||
56 | return -ENOMEM; | 56 | return -ENOMEM; |
57 | 57 | ||
58 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 58 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
59 | if (ret) | 59 | if (ret < 0) |
60 | goto out; | 60 | goto out; |
61 | if (ret) { | ||
62 | ret = -ENOENT; | ||
63 | goto out; | ||
64 | } | ||
61 | 65 | ||
62 | ret = btrfs_del_item(trans, root, path); | 66 | ret = btrfs_del_item(trans, root, path); |
63 | 67 | ||
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8299a25ffc8f..b2130c46fdb5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -54,6 +54,90 @@ | |||
54 | 54 | ||
55 | static const struct super_operations btrfs_super_ops; | 55 | static const struct super_operations btrfs_super_ops; |
56 | 56 | ||
57 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, | ||
58 | char nbuf[16]) | ||
59 | { | ||
60 | char *errstr = NULL; | ||
61 | |||
62 | switch (errno) { | ||
63 | case -EIO: | ||
64 | errstr = "IO failure"; | ||
65 | break; | ||
66 | case -ENOMEM: | ||
67 | errstr = "Out of memory"; | ||
68 | break; | ||
69 | case -EROFS: | ||
70 | errstr = "Readonly filesystem"; | ||
71 | break; | ||
72 | default: | ||
73 | if (nbuf) { | ||
74 | if (snprintf(nbuf, 16, "error %d", -errno) >= 0) | ||
75 | errstr = nbuf; | ||
76 | } | ||
77 | break; | ||
78 | } | ||
79 | |||
80 | return errstr; | ||
81 | } | ||
82 | |||
83 | static void __save_error_info(struct btrfs_fs_info *fs_info) | ||
84 | { | ||
85 | /* | ||
86 | * today we only save the error info into ram. Long term we'll | ||
87 | * also send it down to the disk | ||
88 | */ | ||
89 | fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; | ||
90 | } | ||
91 | |||
92 | /* NOTE: | ||
93 | * We move write_super stuff at umount in order to avoid deadlock | ||
94 | * for umount hold all lock. | ||
95 | */ | ||
96 | static void save_error_info(struct btrfs_fs_info *fs_info) | ||
97 | { | ||
98 | __save_error_info(fs_info); | ||
99 | } | ||
100 | |||
101 | /* btrfs handle error by forcing the filesystem readonly */ | ||
102 | static void btrfs_handle_error(struct btrfs_fs_info *fs_info) | ||
103 | { | ||
104 | struct super_block *sb = fs_info->sb; | ||
105 | |||
106 | if (sb->s_flags & MS_RDONLY) | ||
107 | return; | ||
108 | |||
109 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
110 | sb->s_flags |= MS_RDONLY; | ||
111 | printk(KERN_INFO "btrfs is forced readonly\n"); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * __btrfs_std_error decodes expected errors from the caller and | ||
117 | * invokes the approciate error response. | ||
118 | */ | ||
119 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
120 | unsigned int line, int errno) | ||
121 | { | ||
122 | struct super_block *sb = fs_info->sb; | ||
123 | char nbuf[16]; | ||
124 | const char *errstr; | ||
125 | |||
126 | /* | ||
127 | * Special case: if the error is EROFS, and we're already | ||
128 | * under MS_RDONLY, then it is safe here. | ||
129 | */ | ||
130 | if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) | ||
131 | return; | ||
132 | |||
133 | errstr = btrfs_decode_error(fs_info, errno, nbuf); | ||
134 | printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", | ||
135 | sb->s_id, function, line, errstr); | ||
136 | save_error_info(fs_info); | ||
137 | |||
138 | btrfs_handle_error(fs_info); | ||
139 | } | ||
140 | |||
57 | static void btrfs_put_super(struct super_block *sb) | 141 | static void btrfs_put_super(struct super_block *sb) |
58 | { | 142 | { |
59 | struct btrfs_root *root = btrfs_sb(sb); | 143 | struct btrfs_root *root = btrfs_sb(sb); |
@@ -69,9 +153,9 @@ enum { | |||
69 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, | 153 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, |
70 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, | 154 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, |
71 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, | 155 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, |
72 | Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, | 156 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, |
73 | Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err, | 157 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
74 | Opt_user_subvol_rm_allowed, | 158 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, |
75 | }; | 159 | }; |
76 | 160 | ||
77 | static match_table_t tokens = { | 161 | static match_table_t tokens = { |
@@ -86,7 +170,9 @@ static match_table_t tokens = { | |||
86 | {Opt_alloc_start, "alloc_start=%s"}, | 170 | {Opt_alloc_start, "alloc_start=%s"}, |
87 | {Opt_thread_pool, "thread_pool=%d"}, | 171 | {Opt_thread_pool, "thread_pool=%d"}, |
88 | {Opt_compress, "compress"}, | 172 | {Opt_compress, "compress"}, |
173 | {Opt_compress_type, "compress=%s"}, | ||
89 | {Opt_compress_force, "compress-force"}, | 174 | {Opt_compress_force, "compress-force"}, |
175 | {Opt_compress_force_type, "compress-force=%s"}, | ||
90 | {Opt_ssd, "ssd"}, | 176 | {Opt_ssd, "ssd"}, |
91 | {Opt_ssd_spread, "ssd_spread"}, | 177 | {Opt_ssd_spread, "ssd_spread"}, |
92 | {Opt_nossd, "nossd"}, | 178 | {Opt_nossd, "nossd"}, |
@@ -112,6 +198,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
112 | char *p, *num, *orig; | 198 | char *p, *num, *orig; |
113 | int intarg; | 199 | int intarg; |
114 | int ret = 0; | 200 | int ret = 0; |
201 | char *compress_type; | ||
202 | bool compress_force = false; | ||
115 | 203 | ||
116 | if (!options) | 204 | if (!options) |
117 | return 0; | 205 | return 0; |
@@ -154,14 +242,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
154 | btrfs_set_opt(info->mount_opt, NODATACOW); | 242 | btrfs_set_opt(info->mount_opt, NODATACOW); |
155 | btrfs_set_opt(info->mount_opt, NODATASUM); | 243 | btrfs_set_opt(info->mount_opt, NODATASUM); |
156 | break; | 244 | break; |
157 | case Opt_compress: | ||
158 | printk(KERN_INFO "btrfs: use compression\n"); | ||
159 | btrfs_set_opt(info->mount_opt, COMPRESS); | ||
160 | break; | ||
161 | case Opt_compress_force: | 245 | case Opt_compress_force: |
162 | printk(KERN_INFO "btrfs: forcing compression\n"); | 246 | case Opt_compress_force_type: |
163 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | 247 | compress_force = true; |
248 | case Opt_compress: | ||
249 | case Opt_compress_type: | ||
250 | if (token == Opt_compress || | ||
251 | token == Opt_compress_force || | ||
252 | strcmp(args[0].from, "zlib") == 0) { | ||
253 | compress_type = "zlib"; | ||
254 | info->compress_type = BTRFS_COMPRESS_ZLIB; | ||
255 | } else if (strcmp(args[0].from, "lzo") == 0) { | ||
256 | compress_type = "lzo"; | ||
257 | info->compress_type = BTRFS_COMPRESS_LZO; | ||
258 | } else { | ||
259 | ret = -EINVAL; | ||
260 | goto out; | ||
261 | } | ||
262 | |||
164 | btrfs_set_opt(info->mount_opt, COMPRESS); | 263 | btrfs_set_opt(info->mount_opt, COMPRESS); |
264 | if (compress_force) { | ||
265 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | ||
266 | pr_info("btrfs: force %s compression\n", | ||
267 | compress_type); | ||
268 | } else | ||
269 | pr_info("btrfs: use %s compression\n", | ||
270 | compress_type); | ||
165 | break; | 271 | break; |
166 | case Opt_ssd: | 272 | case Opt_ssd: |
167 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); | 273 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); |
@@ -244,6 +350,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
244 | case Opt_space_cache: | 350 | case Opt_space_cache: |
245 | printk(KERN_INFO "btrfs: enabling disk space caching\n"); | 351 | printk(KERN_INFO "btrfs: enabling disk space caching\n"); |
246 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); | 352 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); |
353 | break; | ||
247 | case Opt_clear_cache: | 354 | case Opt_clear_cache: |
248 | printk(KERN_INFO "btrfs: force clearing of disk cache\n"); | 355 | printk(KERN_INFO "btrfs: force clearing of disk cache\n"); |
249 | btrfs_set_opt(info->mount_opt, CLEAR_CACHE); | 356 | btrfs_set_opt(info->mount_opt, CLEAR_CACHE); |
@@ -459,6 +566,7 @@ static int btrfs_fill_super(struct super_block *sb, | |||
459 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 566 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
460 | sb->s_magic = BTRFS_SUPER_MAGIC; | 567 | sb->s_magic = BTRFS_SUPER_MAGIC; |
461 | sb->s_op = &btrfs_super_ops; | 568 | sb->s_op = &btrfs_super_ops; |
569 | sb->s_d_op = &btrfs_dentry_operations; | ||
462 | sb->s_export_op = &btrfs_export_ops; | 570 | sb->s_export_op = &btrfs_export_ops; |
463 | sb->s_xattr = btrfs_xattr_handlers; | 571 | sb->s_xattr = btrfs_xattr_handlers; |
464 | sb->s_time_gran = 1; | 572 | sb->s_time_gran = 1; |
@@ -562,12 +670,26 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
562 | 670 | ||
563 | static int btrfs_test_super(struct super_block *s, void *data) | 671 | static int btrfs_test_super(struct super_block *s, void *data) |
564 | { | 672 | { |
565 | struct btrfs_fs_devices *test_fs_devices = data; | 673 | struct btrfs_root *test_root = data; |
566 | struct btrfs_root *root = btrfs_sb(s); | 674 | struct btrfs_root *root = btrfs_sb(s); |
567 | 675 | ||
568 | return root->fs_info->fs_devices == test_fs_devices; | 676 | /* |
677 | * If this super block is going away, return false as it | ||
678 | * can't match as an existing super block. | ||
679 | */ | ||
680 | if (!atomic_read(&s->s_active)) | ||
681 | return 0; | ||
682 | return root->fs_info->fs_devices == test_root->fs_info->fs_devices; | ||
569 | } | 683 | } |
570 | 684 | ||
685 | static int btrfs_set_super(struct super_block *s, void *data) | ||
686 | { | ||
687 | s->s_fs_info = data; | ||
688 | |||
689 | return set_anon_super(s, data); | ||
690 | } | ||
691 | |||
692 | |||
571 | /* | 693 | /* |
572 | * Find a superblock for the given device / mount point. | 694 | * Find a superblock for the given device / mount point. |
573 | * | 695 | * |
@@ -581,6 +703,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
581 | struct super_block *s; | 703 | struct super_block *s; |
582 | struct dentry *root; | 704 | struct dentry *root; |
583 | struct btrfs_fs_devices *fs_devices = NULL; | 705 | struct btrfs_fs_devices *fs_devices = NULL; |
706 | struct btrfs_root *tree_root = NULL; | ||
707 | struct btrfs_fs_info *fs_info = NULL; | ||
584 | fmode_t mode = FMODE_READ; | 708 | fmode_t mode = FMODE_READ; |
585 | char *subvol_name = NULL; | 709 | char *subvol_name = NULL; |
586 | u64 subvol_objectid = 0; | 710 | u64 subvol_objectid = 0; |
@@ -608,8 +732,24 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
608 | goto error_close_devices; | 732 | goto error_close_devices; |
609 | } | 733 | } |
610 | 734 | ||
735 | /* | ||
736 | * Setup a dummy root and fs_info for test/set super. This is because | ||
737 | * we don't actually fill this stuff out until open_ctree, but we need | ||
738 | * it for searching for existing supers, so this lets us do that and | ||
739 | * then open_ctree will properly initialize everything later. | ||
740 | */ | ||
741 | fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); | ||
742 | tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
743 | if (!fs_info || !tree_root) { | ||
744 | error = -ENOMEM; | ||
745 | goto error_close_devices; | ||
746 | } | ||
747 | fs_info->tree_root = tree_root; | ||
748 | fs_info->fs_devices = fs_devices; | ||
749 | tree_root->fs_info = fs_info; | ||
750 | |||
611 | bdev = fs_devices->latest_bdev; | 751 | bdev = fs_devices->latest_bdev; |
612 | s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); | 752 | s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root); |
613 | if (IS_ERR(s)) | 753 | if (IS_ERR(s)) |
614 | goto error_s; | 754 | goto error_s; |
615 | 755 | ||
@@ -652,9 +792,9 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
652 | mutex_unlock(&root->d_inode->i_mutex); | 792 | mutex_unlock(&root->d_inode->i_mutex); |
653 | 793 | ||
654 | if (IS_ERR(new_root)) { | 794 | if (IS_ERR(new_root)) { |
795 | dput(root); | ||
655 | deactivate_locked_super(s); | 796 | deactivate_locked_super(s); |
656 | error = PTR_ERR(new_root); | 797 | error = PTR_ERR(new_root); |
657 | dput(root); | ||
658 | goto error_free_subvol_name; | 798 | goto error_free_subvol_name; |
659 | } | 799 | } |
660 | if (!new_root->d_inode) { | 800 | if (!new_root->d_inode) { |
@@ -675,6 +815,8 @@ error_s: | |||
675 | error = PTR_ERR(s); | 815 | error = PTR_ERR(s); |
676 | error_close_devices: | 816 | error_close_devices: |
677 | btrfs_close_devices(fs_devices); | 817 | btrfs_close_devices(fs_devices); |
818 | kfree(fs_info); | ||
819 | kfree(tree_root); | ||
678 | error_free_subvol_name: | 820 | error_free_subvol_name: |
679 | kfree(subvol_name); | 821 | kfree(subvol_name); |
680 | return ERR_PTR(error); | 822 | return ERR_PTR(error); |
@@ -717,6 +859,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
717 | return 0; | 859 | return 0; |
718 | } | 860 | } |
719 | 861 | ||
862 | /* | ||
863 | * The helper to calc the free space on the devices that can be used to store | ||
864 | * file data. | ||
865 | */ | ||
866 | static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | ||
867 | { | ||
868 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
869 | struct btrfs_device_info *devices_info; | ||
870 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
871 | struct btrfs_device *device; | ||
872 | u64 skip_space; | ||
873 | u64 type; | ||
874 | u64 avail_space; | ||
875 | u64 used_space; | ||
876 | u64 min_stripe_size; | ||
877 | int min_stripes = 1; | ||
878 | int i = 0, nr_devices; | ||
879 | int ret; | ||
880 | |||
881 | nr_devices = fs_info->fs_devices->rw_devices; | ||
882 | BUG_ON(!nr_devices); | ||
883 | |||
884 | devices_info = kmalloc(sizeof(*devices_info) * nr_devices, | ||
885 | GFP_NOFS); | ||
886 | if (!devices_info) | ||
887 | return -ENOMEM; | ||
888 | |||
889 | /* calc min stripe number for data space alloction */ | ||
890 | type = btrfs_get_alloc_profile(root, 1); | ||
891 | if (type & BTRFS_BLOCK_GROUP_RAID0) | ||
892 | min_stripes = 2; | ||
893 | else if (type & BTRFS_BLOCK_GROUP_RAID1) | ||
894 | min_stripes = 2; | ||
895 | else if (type & BTRFS_BLOCK_GROUP_RAID10) | ||
896 | min_stripes = 4; | ||
897 | |||
898 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
899 | min_stripe_size = 2 * BTRFS_STRIPE_LEN; | ||
900 | else | ||
901 | min_stripe_size = BTRFS_STRIPE_LEN; | ||
902 | |||
903 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | ||
904 | if (!device->in_fs_metadata) | ||
905 | continue; | ||
906 | |||
907 | avail_space = device->total_bytes - device->bytes_used; | ||
908 | |||
909 | /* align with stripe_len */ | ||
910 | do_div(avail_space, BTRFS_STRIPE_LEN); | ||
911 | avail_space *= BTRFS_STRIPE_LEN; | ||
912 | |||
913 | /* | ||
914 | * In order to avoid overwritting the superblock on the drive, | ||
915 | * btrfs starts at an offset of at least 1MB when doing chunk | ||
916 | * allocation. | ||
917 | */ | ||
918 | skip_space = 1024 * 1024; | ||
919 | |||
920 | /* user can set the offset in fs_info->alloc_start. */ | ||
921 | if (fs_info->alloc_start + BTRFS_STRIPE_LEN <= | ||
922 | device->total_bytes) | ||
923 | skip_space = max(fs_info->alloc_start, skip_space); | ||
924 | |||
925 | /* | ||
926 | * btrfs can not use the free space in [0, skip_space - 1], | ||
927 | * we must subtract it from the total. In order to implement | ||
928 | * it, we account the used space in this range first. | ||
929 | */ | ||
930 | ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1, | ||
931 | &used_space); | ||
932 | if (ret) { | ||
933 | kfree(devices_info); | ||
934 | return ret; | ||
935 | } | ||
936 | |||
937 | /* calc the free space in [0, skip_space - 1] */ | ||
938 | skip_space -= used_space; | ||
939 | |||
940 | /* | ||
941 | * we can use the free space in [0, skip_space - 1], subtract | ||
942 | * it from the total. | ||
943 | */ | ||
944 | if (avail_space && avail_space >= skip_space) | ||
945 | avail_space -= skip_space; | ||
946 | else | ||
947 | avail_space = 0; | ||
948 | |||
949 | if (avail_space < min_stripe_size) | ||
950 | continue; | ||
951 | |||
952 | devices_info[i].dev = device; | ||
953 | devices_info[i].max_avail = avail_space; | ||
954 | |||
955 | i++; | ||
956 | } | ||
957 | |||
958 | nr_devices = i; | ||
959 | |||
960 | btrfs_descending_sort_devices(devices_info, nr_devices); | ||
961 | |||
962 | i = nr_devices - 1; | ||
963 | avail_space = 0; | ||
964 | while (nr_devices >= min_stripes) { | ||
965 | if (devices_info[i].max_avail >= min_stripe_size) { | ||
966 | int j; | ||
967 | u64 alloc_size; | ||
968 | |||
969 | avail_space += devices_info[i].max_avail * min_stripes; | ||
970 | alloc_size = devices_info[i].max_avail; | ||
971 | for (j = i + 1 - min_stripes; j <= i; j++) | ||
972 | devices_info[j].max_avail -= alloc_size; | ||
973 | } | ||
974 | i--; | ||
975 | nr_devices--; | ||
976 | } | ||
977 | |||
978 | kfree(devices_info); | ||
979 | *free_bytes = avail_space; | ||
980 | return 0; | ||
981 | } | ||
982 | |||
720 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 983 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
721 | { | 984 | { |
722 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); | 985 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); |
@@ -724,17 +987,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
724 | struct list_head *head = &root->fs_info->space_info; | 987 | struct list_head *head = &root->fs_info->space_info; |
725 | struct btrfs_space_info *found; | 988 | struct btrfs_space_info *found; |
726 | u64 total_used = 0; | 989 | u64 total_used = 0; |
727 | u64 total_used_data = 0; | 990 | u64 total_free_data = 0; |
728 | int bits = dentry->d_sb->s_blocksize_bits; | 991 | int bits = dentry->d_sb->s_blocksize_bits; |
729 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | 992 | __be32 *fsid = (__be32 *)root->fs_info->fsid; |
993 | int ret; | ||
730 | 994 | ||
995 | /* holding chunk_muext to avoid allocating new chunks */ | ||
996 | mutex_lock(&root->fs_info->chunk_mutex); | ||
731 | rcu_read_lock(); | 997 | rcu_read_lock(); |
732 | list_for_each_entry_rcu(found, head, list) { | 998 | list_for_each_entry_rcu(found, head, list) { |
733 | if (found->flags & (BTRFS_BLOCK_GROUP_METADATA | | 999 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) { |
734 | BTRFS_BLOCK_GROUP_SYSTEM)) | 1000 | total_free_data += found->disk_total - found->disk_used; |
735 | total_used_data += found->disk_total; | 1001 | total_free_data -= |
736 | else | 1002 | btrfs_account_ro_block_groups_free_space(found); |
737 | total_used_data += found->disk_used; | 1003 | } |
1004 | |||
738 | total_used += found->disk_used; | 1005 | total_used += found->disk_used; |
739 | } | 1006 | } |
740 | rcu_read_unlock(); | 1007 | rcu_read_unlock(); |
@@ -742,9 +1009,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
742 | buf->f_namelen = BTRFS_NAME_LEN; | 1009 | buf->f_namelen = BTRFS_NAME_LEN; |
743 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; | 1010 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; |
744 | buf->f_bfree = buf->f_blocks - (total_used >> bits); | 1011 | buf->f_bfree = buf->f_blocks - (total_used >> bits); |
745 | buf->f_bavail = buf->f_blocks - (total_used_data >> bits); | ||
746 | buf->f_bsize = dentry->d_sb->s_blocksize; | 1012 | buf->f_bsize = dentry->d_sb->s_blocksize; |
747 | buf->f_type = BTRFS_SUPER_MAGIC; | 1013 | buf->f_type = BTRFS_SUPER_MAGIC; |
1014 | buf->f_bavail = total_free_data; | ||
1015 | ret = btrfs_calc_avail_data_space(root, &total_free_data); | ||
1016 | if (ret) { | ||
1017 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
1018 | return ret; | ||
1019 | } | ||
1020 | buf->f_bavail += total_free_data; | ||
1021 | buf->f_bavail = buf->f_bavail >> bits; | ||
1022 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
748 | 1023 | ||
749 | /* We treat it as constant endianness (it doesn't matter _which_) | 1024 | /* We treat it as constant endianness (it doesn't matter _which_) |
750 | because we want the fsid to come out the same whether mounted | 1025 | because we want the fsid to come out the same whether mounted |
@@ -861,10 +1136,14 @@ static int __init init_btrfs_fs(void) | |||
861 | if (err) | 1136 | if (err) |
862 | return err; | 1137 | return err; |
863 | 1138 | ||
864 | err = btrfs_init_cachep(); | 1139 | err = btrfs_init_compress(); |
865 | if (err) | 1140 | if (err) |
866 | goto free_sysfs; | 1141 | goto free_sysfs; |
867 | 1142 | ||
1143 | err = btrfs_init_cachep(); | ||
1144 | if (err) | ||
1145 | goto free_compress; | ||
1146 | |||
868 | err = extent_io_init(); | 1147 | err = extent_io_init(); |
869 | if (err) | 1148 | if (err) |
870 | goto free_cachep; | 1149 | goto free_cachep; |
@@ -892,6 +1171,8 @@ free_extent_io: | |||
892 | extent_io_exit(); | 1171 | extent_io_exit(); |
893 | free_cachep: | 1172 | free_cachep: |
894 | btrfs_destroy_cachep(); | 1173 | btrfs_destroy_cachep(); |
1174 | free_compress: | ||
1175 | btrfs_exit_compress(); | ||
895 | free_sysfs: | 1176 | free_sysfs: |
896 | btrfs_exit_sysfs(); | 1177 | btrfs_exit_sysfs(); |
897 | return err; | 1178 | return err; |
@@ -906,7 +1187,7 @@ static void __exit exit_btrfs_fs(void) | |||
906 | unregister_filesystem(&btrfs_fs_type); | 1187 | unregister_filesystem(&btrfs_fs_type); |
907 | btrfs_exit_sysfs(); | 1188 | btrfs_exit_sysfs(); |
908 | btrfs_cleanup_fs_uuids(); | 1189 | btrfs_cleanup_fs_uuids(); |
909 | btrfs_zlib_exit(); | 1190 | btrfs_exit_compress(); |
910 | } | 1191 | } |
911 | 1192 | ||
912 | module_init(init_btrfs_fs) | 1193 | module_init(init_btrfs_fs) |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1fffbc017bdf..bae5c7b8bbe2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -181,6 +181,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
181 | struct btrfs_trans_handle *h; | 181 | struct btrfs_trans_handle *h; |
182 | struct btrfs_transaction *cur_trans; | 182 | struct btrfs_transaction *cur_trans; |
183 | int ret; | 183 | int ret; |
184 | |||
185 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
186 | return ERR_PTR(-EROFS); | ||
184 | again: | 187 | again: |
185 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 188 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); |
186 | if (!h) | 189 | if (!h) |
@@ -902,6 +905,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
902 | struct btrfs_root *root = pending->root; | 905 | struct btrfs_root *root = pending->root; |
903 | struct btrfs_root *parent_root; | 906 | struct btrfs_root *parent_root; |
904 | struct inode *parent_inode; | 907 | struct inode *parent_inode; |
908 | struct dentry *parent; | ||
905 | struct dentry *dentry; | 909 | struct dentry *dentry; |
906 | struct extent_buffer *tmp; | 910 | struct extent_buffer *tmp; |
907 | struct extent_buffer *old; | 911 | struct extent_buffer *old; |
@@ -909,6 +913,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
909 | u64 to_reserve = 0; | 913 | u64 to_reserve = 0; |
910 | u64 index = 0; | 914 | u64 index = 0; |
911 | u64 objectid; | 915 | u64 objectid; |
916 | u64 root_flags; | ||
912 | 917 | ||
913 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 918 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
914 | if (!new_root_item) { | 919 | if (!new_root_item) { |
@@ -941,7 +946,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
941 | trans->block_rsv = &pending->block_rsv; | 946 | trans->block_rsv = &pending->block_rsv; |
942 | 947 | ||
943 | dentry = pending->dentry; | 948 | dentry = pending->dentry; |
944 | parent_inode = dentry->d_parent->d_inode; | 949 | parent = dget_parent(dentry); |
950 | parent_inode = parent->d_inode; | ||
945 | parent_root = BTRFS_I(parent_inode)->root; | 951 | parent_root = BTRFS_I(parent_inode)->root; |
946 | record_root_in_trans(trans, parent_root); | 952 | record_root_in_trans(trans, parent_root); |
947 | 953 | ||
@@ -965,6 +971,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
965 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 971 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
966 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 972 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
967 | 973 | ||
974 | root_flags = btrfs_root_flags(new_root_item); | ||
975 | if (pending->readonly) | ||
976 | root_flags |= BTRFS_ROOT_SUBVOL_RDONLY; | ||
977 | else | ||
978 | root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; | ||
979 | btrfs_set_root_flags(new_root_item, root_flags); | ||
980 | |||
968 | old = btrfs_lock_root_node(root); | 981 | old = btrfs_lock_root_node(root); |
969 | btrfs_cow_block(trans, root, old, NULL, 0, &old); | 982 | btrfs_cow_block(trans, root, old, NULL, 0, &old); |
970 | btrfs_set_lock_blocking(old); | 983 | btrfs_set_lock_blocking(old); |
@@ -989,6 +1002,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
989 | parent_inode->i_ino, index, | 1002 | parent_inode->i_ino, index, |
990 | dentry->d_name.name, dentry->d_name.len); | 1003 | dentry->d_name.name, dentry->d_name.len); |
991 | BUG_ON(ret); | 1004 | BUG_ON(ret); |
1005 | dput(parent); | ||
992 | 1006 | ||
993 | key.offset = (u64)-1; | 1007 | key.offset = (u64)-1; |
994 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); | 1008 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f104b57ad4ef..229a594cacd5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -62,6 +62,7 @@ struct btrfs_pending_snapshot { | |||
62 | struct btrfs_block_rsv block_rsv; | 62 | struct btrfs_block_rsv block_rsv; |
63 | /* extra metadata reseration for relocation */ | 63 | /* extra metadata reseration for relocation */ |
64 | int error; | 64 | int error; |
65 | bool readonly; | ||
65 | struct list_head list; | 66 | struct list_head list; |
66 | }; | 67 | }; |
67 | 68 | ||
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a29f19384a27..054744ac5719 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -2869,6 +2869,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
2869 | { | 2869 | { |
2870 | int ret = 0; | 2870 | int ret = 0; |
2871 | struct btrfs_root *root; | 2871 | struct btrfs_root *root; |
2872 | struct dentry *old_parent = NULL; | ||
2872 | 2873 | ||
2873 | /* | 2874 | /* |
2874 | * for regular files, if its inode is already on disk, we don't | 2875 | * for regular files, if its inode is already on disk, we don't |
@@ -2910,10 +2911,13 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
2910 | if (IS_ROOT(parent)) | 2911 | if (IS_ROOT(parent)) |
2911 | break; | 2912 | break; |
2912 | 2913 | ||
2913 | parent = parent->d_parent; | 2914 | parent = dget_parent(parent); |
2915 | dput(old_parent); | ||
2916 | old_parent = parent; | ||
2914 | inode = parent->d_inode; | 2917 | inode = parent->d_inode; |
2915 | 2918 | ||
2916 | } | 2919 | } |
2920 | dput(old_parent); | ||
2917 | out: | 2921 | out: |
2918 | return ret; | 2922 | return ret; |
2919 | } | 2923 | } |
@@ -2945,6 +2949,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2945 | { | 2949 | { |
2946 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; | 2950 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; |
2947 | struct super_block *sb; | 2951 | struct super_block *sb; |
2952 | struct dentry *old_parent = NULL; | ||
2948 | int ret = 0; | 2953 | int ret = 0; |
2949 | u64 last_committed = root->fs_info->last_trans_committed; | 2954 | u64 last_committed = root->fs_info->last_trans_committed; |
2950 | 2955 | ||
@@ -3016,10 +3021,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
3016 | if (IS_ROOT(parent)) | 3021 | if (IS_ROOT(parent)) |
3017 | break; | 3022 | break; |
3018 | 3023 | ||
3019 | parent = parent->d_parent; | 3024 | parent = dget_parent(parent); |
3025 | dput(old_parent); | ||
3026 | old_parent = parent; | ||
3020 | } | 3027 | } |
3021 | ret = 0; | 3028 | ret = 0; |
3022 | end_trans: | 3029 | end_trans: |
3030 | dput(old_parent); | ||
3023 | if (ret < 0) { | 3031 | if (ret < 0) { |
3024 | BUG_ON(ret != -ENOSPC); | 3032 | BUG_ON(ret != -ENOSPC); |
3025 | root->fs_info->last_trans_log_full_commit = trans->transid; | 3033 | root->fs_info->last_trans_log_full_commit = trans->transid; |
@@ -3039,8 +3047,13 @@ end_no_trans: | |||
3039 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 3047 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
3040 | struct btrfs_root *root, struct dentry *dentry) | 3048 | struct btrfs_root *root, struct dentry *dentry) |
3041 | { | 3049 | { |
3042 | return btrfs_log_inode_parent(trans, root, dentry->d_inode, | 3050 | struct dentry *parent = dget_parent(dentry); |
3043 | dentry->d_parent, 0); | 3051 | int ret; |
3052 | |||
3053 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0); | ||
3054 | dput(parent); | ||
3055 | |||
3056 | return ret; | ||
3044 | } | 3057 | } |
3045 | 3058 | ||
3046 | /* | 3059 | /* |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cc04dc1445d6..d158530233b7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
23 | #include <linux/random.h> | 23 | #include <linux/random.h> |
24 | #include <linux/iocontext.h> | 24 | #include <linux/iocontext.h> |
25 | #include <linux/capability.h> | ||
25 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
26 | #include "compat.h" | 27 | #include "compat.h" |
27 | #include "ctree.h" | 28 | #include "ctree.h" |
@@ -412,12 +413,16 @@ static noinline int device_list_add(const char *path, | |||
412 | 413 | ||
413 | device->fs_devices = fs_devices; | 414 | device->fs_devices = fs_devices; |
414 | fs_devices->num_devices++; | 415 | fs_devices->num_devices++; |
415 | } else if (strcmp(device->name, path)) { | 416 | } else if (!device->name || strcmp(device->name, path)) { |
416 | name = kstrdup(path, GFP_NOFS); | 417 | name = kstrdup(path, GFP_NOFS); |
417 | if (!name) | 418 | if (!name) |
418 | return -ENOMEM; | 419 | return -ENOMEM; |
419 | kfree(device->name); | 420 | kfree(device->name); |
420 | device->name = name; | 421 | device->name = name; |
422 | if (device->missing) { | ||
423 | fs_devices->missing_devices--; | ||
424 | device->missing = 0; | ||
425 | } | ||
421 | } | 426 | } |
422 | 427 | ||
423 | if (found_transid > fs_devices->latest_trans) { | 428 | if (found_transid > fs_devices->latest_trans) { |
@@ -489,7 +494,7 @@ again: | |||
489 | continue; | 494 | continue; |
490 | 495 | ||
491 | if (device->bdev) { | 496 | if (device->bdev) { |
492 | close_bdev_exclusive(device->bdev, device->mode); | 497 | blkdev_put(device->bdev, device->mode); |
493 | device->bdev = NULL; | 498 | device->bdev = NULL; |
494 | fs_devices->open_devices--; | 499 | fs_devices->open_devices--; |
495 | } | 500 | } |
@@ -523,7 +528,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
523 | 528 | ||
524 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | 529 | list_for_each_entry(device, &fs_devices->devices, dev_list) { |
525 | if (device->bdev) { | 530 | if (device->bdev) { |
526 | close_bdev_exclusive(device->bdev, device->mode); | 531 | blkdev_put(device->bdev, device->mode); |
527 | fs_devices->open_devices--; | 532 | fs_devices->open_devices--; |
528 | } | 533 | } |
529 | if (device->writeable) { | 534 | if (device->writeable) { |
@@ -580,13 +585,15 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
580 | int seeding = 1; | 585 | int seeding = 1; |
581 | int ret = 0; | 586 | int ret = 0; |
582 | 587 | ||
588 | flags |= FMODE_EXCL; | ||
589 | |||
583 | list_for_each_entry(device, head, dev_list) { | 590 | list_for_each_entry(device, head, dev_list) { |
584 | if (device->bdev) | 591 | if (device->bdev) |
585 | continue; | 592 | continue; |
586 | if (!device->name) | 593 | if (!device->name) |
587 | continue; | 594 | continue; |
588 | 595 | ||
589 | bdev = open_bdev_exclusive(device->name, flags, holder); | 596 | bdev = blkdev_get_by_path(device->name, flags, holder); |
590 | if (IS_ERR(bdev)) { | 597 | if (IS_ERR(bdev)) { |
591 | printk(KERN_INFO "open %s failed\n", device->name); | 598 | printk(KERN_INFO "open %s failed\n", device->name); |
592 | goto error; | 599 | goto error; |
@@ -594,8 +601,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
594 | set_blocksize(bdev, 4096); | 601 | set_blocksize(bdev, 4096); |
595 | 602 | ||
596 | bh = btrfs_read_dev_super(bdev); | 603 | bh = btrfs_read_dev_super(bdev); |
597 | if (!bh) | 604 | if (!bh) { |
605 | ret = -EINVAL; | ||
598 | goto error_close; | 606 | goto error_close; |
607 | } | ||
599 | 608 | ||
600 | disk_super = (struct btrfs_super_block *)bh->b_data; | 609 | disk_super = (struct btrfs_super_block *)bh->b_data; |
601 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 610 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
@@ -638,7 +647,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
638 | error_brelse: | 647 | error_brelse: |
639 | brelse(bh); | 648 | brelse(bh); |
640 | error_close: | 649 | error_close: |
641 | close_bdev_exclusive(bdev, FMODE_READ); | 650 | blkdev_put(bdev, flags); |
642 | error: | 651 | error: |
643 | continue; | 652 | continue; |
644 | } | 653 | } |
@@ -684,7 +693,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
684 | 693 | ||
685 | mutex_lock(&uuid_mutex); | 694 | mutex_lock(&uuid_mutex); |
686 | 695 | ||
687 | bdev = open_bdev_exclusive(path, flags, holder); | 696 | flags |= FMODE_EXCL; |
697 | bdev = blkdev_get_by_path(path, flags, holder); | ||
688 | 698 | ||
689 | if (IS_ERR(bdev)) { | 699 | if (IS_ERR(bdev)) { |
690 | ret = PTR_ERR(bdev); | 700 | ret = PTR_ERR(bdev); |
@@ -696,7 +706,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
696 | goto error_close; | 706 | goto error_close; |
697 | bh = btrfs_read_dev_super(bdev); | 707 | bh = btrfs_read_dev_super(bdev); |
698 | if (!bh) { | 708 | if (!bh) { |
699 | ret = -EIO; | 709 | ret = -EINVAL; |
700 | goto error_close; | 710 | goto error_close; |
701 | } | 711 | } |
702 | disk_super = (struct btrfs_super_block *)bh->b_data; | 712 | disk_super = (struct btrfs_super_block *)bh->b_data; |
@@ -716,65 +726,173 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
716 | 726 | ||
717 | brelse(bh); | 727 | brelse(bh); |
718 | error_close: | 728 | error_close: |
719 | close_bdev_exclusive(bdev, flags); | 729 | blkdev_put(bdev, flags); |
720 | error: | 730 | error: |
721 | mutex_unlock(&uuid_mutex); | 731 | mutex_unlock(&uuid_mutex); |
722 | return ret; | 732 | return ret; |
723 | } | 733 | } |
724 | 734 | ||
735 | /* helper to account the used device space in the range */ | ||
736 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
737 | u64 end, u64 *length) | ||
738 | { | ||
739 | struct btrfs_key key; | ||
740 | struct btrfs_root *root = device->dev_root; | ||
741 | struct btrfs_dev_extent *dev_extent; | ||
742 | struct btrfs_path *path; | ||
743 | u64 extent_end; | ||
744 | int ret; | ||
745 | int slot; | ||
746 | struct extent_buffer *l; | ||
747 | |||
748 | *length = 0; | ||
749 | |||
750 | if (start >= device->total_bytes) | ||
751 | return 0; | ||
752 | |||
753 | path = btrfs_alloc_path(); | ||
754 | if (!path) | ||
755 | return -ENOMEM; | ||
756 | path->reada = 2; | ||
757 | |||
758 | key.objectid = device->devid; | ||
759 | key.offset = start; | ||
760 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
761 | |||
762 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
763 | if (ret < 0) | ||
764 | goto out; | ||
765 | if (ret > 0) { | ||
766 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | ||
767 | if (ret < 0) | ||
768 | goto out; | ||
769 | } | ||
770 | |||
771 | while (1) { | ||
772 | l = path->nodes[0]; | ||
773 | slot = path->slots[0]; | ||
774 | if (slot >= btrfs_header_nritems(l)) { | ||
775 | ret = btrfs_next_leaf(root, path); | ||
776 | if (ret == 0) | ||
777 | continue; | ||
778 | if (ret < 0) | ||
779 | goto out; | ||
780 | |||
781 | break; | ||
782 | } | ||
783 | btrfs_item_key_to_cpu(l, &key, slot); | ||
784 | |||
785 | if (key.objectid < device->devid) | ||
786 | goto next; | ||
787 | |||
788 | if (key.objectid > device->devid) | ||
789 | break; | ||
790 | |||
791 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
792 | goto next; | ||
793 | |||
794 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | ||
795 | extent_end = key.offset + btrfs_dev_extent_length(l, | ||
796 | dev_extent); | ||
797 | if (key.offset <= start && extent_end > end) { | ||
798 | *length = end - start + 1; | ||
799 | break; | ||
800 | } else if (key.offset <= start && extent_end > start) | ||
801 | *length += extent_end - start; | ||
802 | else if (key.offset > start && extent_end <= end) | ||
803 | *length += extent_end - key.offset; | ||
804 | else if (key.offset > start && key.offset <= end) { | ||
805 | *length += end - key.offset + 1; | ||
806 | break; | ||
807 | } else if (key.offset > end) | ||
808 | break; | ||
809 | |||
810 | next: | ||
811 | path->slots[0]++; | ||
812 | } | ||
813 | ret = 0; | ||
814 | out: | ||
815 | btrfs_free_path(path); | ||
816 | return ret; | ||
817 | } | ||
818 | |||
725 | /* | 819 | /* |
820 | * find_free_dev_extent - find free space in the specified device | ||
821 | * @trans: transaction handler | ||
822 | * @device: the device which we search the free space in | ||
823 | * @num_bytes: the size of the free space that we need | ||
824 | * @start: store the start of the free space. | ||
825 | * @len: the size of the free space. that we find, or the size of the max | ||
826 | * free space if we don't find suitable free space | ||
827 | * | ||
726 | * this uses a pretty simple search, the expectation is that it is | 828 | * this uses a pretty simple search, the expectation is that it is |
727 | * called very infrequently and that a given device has a small number | 829 | * called very infrequently and that a given device has a small number |
728 | * of extents | 830 | * of extents |
831 | * | ||
832 | * @start is used to store the start of the free space if we find. But if we | ||
833 | * don't find suitable free space, it will be used to store the start position | ||
834 | * of the max free space. | ||
835 | * | ||
836 | * @len is used to store the size of the free space that we find. | ||
837 | * But if we don't find suitable free space, it is used to store the size of | ||
838 | * the max free space. | ||
729 | */ | 839 | */ |
730 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | 840 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
731 | struct btrfs_device *device, u64 num_bytes, | 841 | struct btrfs_device *device, u64 num_bytes, |
732 | u64 *start, u64 *max_avail) | 842 | u64 *start, u64 *len) |
733 | { | 843 | { |
734 | struct btrfs_key key; | 844 | struct btrfs_key key; |
735 | struct btrfs_root *root = device->dev_root; | 845 | struct btrfs_root *root = device->dev_root; |
736 | struct btrfs_dev_extent *dev_extent = NULL; | 846 | struct btrfs_dev_extent *dev_extent; |
737 | struct btrfs_path *path; | 847 | struct btrfs_path *path; |
738 | u64 hole_size = 0; | 848 | u64 hole_size; |
739 | u64 last_byte = 0; | 849 | u64 max_hole_start; |
740 | u64 search_start = 0; | 850 | u64 max_hole_size; |
851 | u64 extent_end; | ||
852 | u64 search_start; | ||
741 | u64 search_end = device->total_bytes; | 853 | u64 search_end = device->total_bytes; |
742 | int ret; | 854 | int ret; |
743 | int slot = 0; | 855 | int slot; |
744 | int start_found; | ||
745 | struct extent_buffer *l; | 856 | struct extent_buffer *l; |
746 | 857 | ||
747 | path = btrfs_alloc_path(); | ||
748 | if (!path) | ||
749 | return -ENOMEM; | ||
750 | path->reada = 2; | ||
751 | start_found = 0; | ||
752 | |||
753 | /* FIXME use last free of some kind */ | 858 | /* FIXME use last free of some kind */ |
754 | 859 | ||
755 | /* we don't want to overwrite the superblock on the drive, | 860 | /* we don't want to overwrite the superblock on the drive, |
756 | * so we make sure to start at an offset of at least 1MB | 861 | * so we make sure to start at an offset of at least 1MB |
757 | */ | 862 | */ |
758 | search_start = max((u64)1024 * 1024, search_start); | 863 | search_start = 1024 * 1024; |
759 | 864 | ||
760 | if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) | 865 | if (root->fs_info->alloc_start + num_bytes <= search_end) |
761 | search_start = max(root->fs_info->alloc_start, search_start); | 866 | search_start = max(root->fs_info->alloc_start, search_start); |
762 | 867 | ||
868 | max_hole_start = search_start; | ||
869 | max_hole_size = 0; | ||
870 | |||
871 | if (search_start >= search_end) { | ||
872 | ret = -ENOSPC; | ||
873 | goto error; | ||
874 | } | ||
875 | |||
876 | path = btrfs_alloc_path(); | ||
877 | if (!path) { | ||
878 | ret = -ENOMEM; | ||
879 | goto error; | ||
880 | } | ||
881 | path->reada = 2; | ||
882 | |||
763 | key.objectid = device->devid; | 883 | key.objectid = device->devid; |
764 | key.offset = search_start; | 884 | key.offset = search_start; |
765 | key.type = BTRFS_DEV_EXTENT_KEY; | 885 | key.type = BTRFS_DEV_EXTENT_KEY; |
886 | |||
766 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 887 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); |
767 | if (ret < 0) | 888 | if (ret < 0) |
768 | goto error; | 889 | goto out; |
769 | if (ret > 0) { | 890 | if (ret > 0) { |
770 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | 891 | ret = btrfs_previous_item(root, path, key.objectid, key.type); |
771 | if (ret < 0) | 892 | if (ret < 0) |
772 | goto error; | 893 | goto out; |
773 | if (ret > 0) | ||
774 | start_found = 1; | ||
775 | } | 894 | } |
776 | l = path->nodes[0]; | 895 | |
777 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | ||
778 | while (1) { | 896 | while (1) { |
779 | l = path->nodes[0]; | 897 | l = path->nodes[0]; |
780 | slot = path->slots[0]; | 898 | slot = path->slots[0]; |
@@ -783,24 +901,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
783 | if (ret == 0) | 901 | if (ret == 0) |
784 | continue; | 902 | continue; |
785 | if (ret < 0) | 903 | if (ret < 0) |
786 | goto error; | 904 | goto out; |
787 | no_more_items: | 905 | |
788 | if (!start_found) { | 906 | break; |
789 | if (search_start >= search_end) { | ||
790 | ret = -ENOSPC; | ||
791 | goto error; | ||
792 | } | ||
793 | *start = search_start; | ||
794 | start_found = 1; | ||
795 | goto check_pending; | ||
796 | } | ||
797 | *start = last_byte > search_start ? | ||
798 | last_byte : search_start; | ||
799 | if (search_end <= *start) { | ||
800 | ret = -ENOSPC; | ||
801 | goto error; | ||
802 | } | ||
803 | goto check_pending; | ||
804 | } | 907 | } |
805 | btrfs_item_key_to_cpu(l, &key, slot); | 908 | btrfs_item_key_to_cpu(l, &key, slot); |
806 | 909 | ||
@@ -808,48 +911,62 @@ no_more_items: | |||
808 | goto next; | 911 | goto next; |
809 | 912 | ||
810 | if (key.objectid > device->devid) | 913 | if (key.objectid > device->devid) |
811 | goto no_more_items; | 914 | break; |
812 | 915 | ||
813 | if (key.offset >= search_start && key.offset > last_byte && | 916 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) |
814 | start_found) { | 917 | goto next; |
815 | if (last_byte < search_start) | ||
816 | last_byte = search_start; | ||
817 | hole_size = key.offset - last_byte; | ||
818 | 918 | ||
819 | if (hole_size > *max_avail) | 919 | if (key.offset > search_start) { |
820 | *max_avail = hole_size; | 920 | hole_size = key.offset - search_start; |
921 | |||
922 | if (hole_size > max_hole_size) { | ||
923 | max_hole_start = search_start; | ||
924 | max_hole_size = hole_size; | ||
925 | } | ||
821 | 926 | ||
822 | if (key.offset > last_byte && | 927 | /* |
823 | hole_size >= num_bytes) { | 928 | * If this free space is greater than which we need, |
824 | *start = last_byte; | 929 | * it must be the max free space that we have found |
825 | goto check_pending; | 930 | * until now, so max_hole_start must point to the start |
931 | * of this free space and the length of this free space | ||
932 | * is stored in max_hole_size. Thus, we return | ||
933 | * max_hole_start and max_hole_size and go back to the | ||
934 | * caller. | ||
935 | */ | ||
936 | if (hole_size >= num_bytes) { | ||
937 | ret = 0; | ||
938 | goto out; | ||
826 | } | 939 | } |
827 | } | 940 | } |
828 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
829 | goto next; | ||
830 | 941 | ||
831 | start_found = 1; | ||
832 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 942 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
833 | last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); | 943 | extent_end = key.offset + btrfs_dev_extent_length(l, |
944 | dev_extent); | ||
945 | if (extent_end > search_start) | ||
946 | search_start = extent_end; | ||
834 | next: | 947 | next: |
835 | path->slots[0]++; | 948 | path->slots[0]++; |
836 | cond_resched(); | 949 | cond_resched(); |
837 | } | 950 | } |
838 | check_pending: | ||
839 | /* we have to make sure we didn't find an extent that has already | ||
840 | * been allocated by the map tree or the original allocation | ||
841 | */ | ||
842 | BUG_ON(*start < search_start); | ||
843 | 951 | ||
844 | if (*start + num_bytes > search_end) { | 952 | hole_size = search_end- search_start; |
845 | ret = -ENOSPC; | 953 | if (hole_size > max_hole_size) { |
846 | goto error; | 954 | max_hole_start = search_start; |
955 | max_hole_size = hole_size; | ||
847 | } | 956 | } |
848 | /* check for pending inserts here */ | ||
849 | ret = 0; | ||
850 | 957 | ||
851 | error: | 958 | /* See above. */ |
959 | if (hole_size < num_bytes) | ||
960 | ret = -ENOSPC; | ||
961 | else | ||
962 | ret = 0; | ||
963 | |||
964 | out: | ||
852 | btrfs_free_path(path); | 965 | btrfs_free_path(path); |
966 | error: | ||
967 | *start = max_hole_start; | ||
968 | if (len) | ||
969 | *len = max_hole_size; | ||
853 | return ret; | 970 | return ret; |
854 | } | 971 | } |
855 | 972 | ||
@@ -1179,8 +1296,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1179 | goto out; | 1296 | goto out; |
1180 | } | 1297 | } |
1181 | } else { | 1298 | } else { |
1182 | bdev = open_bdev_exclusive(device_path, FMODE_READ, | 1299 | bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL, |
1183 | root->fs_info->bdev_holder); | 1300 | root->fs_info->bdev_holder); |
1184 | if (IS_ERR(bdev)) { | 1301 | if (IS_ERR(bdev)) { |
1185 | ret = PTR_ERR(bdev); | 1302 | ret = PTR_ERR(bdev); |
1186 | goto out; | 1303 | goto out; |
@@ -1189,7 +1306,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1189 | set_blocksize(bdev, 4096); | 1306 | set_blocksize(bdev, 4096); |
1190 | bh = btrfs_read_dev_super(bdev); | 1307 | bh = btrfs_read_dev_super(bdev); |
1191 | if (!bh) { | 1308 | if (!bh) { |
1192 | ret = -EIO; | 1309 | ret = -EINVAL; |
1193 | goto error_close; | 1310 | goto error_close; |
1194 | } | 1311 | } |
1195 | disk_super = (struct btrfs_super_block *)bh->b_data; | 1312 | disk_super = (struct btrfs_super_block *)bh->b_data; |
@@ -1236,6 +1353,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1236 | 1353 | ||
1237 | device->fs_devices->num_devices--; | 1354 | device->fs_devices->num_devices--; |
1238 | 1355 | ||
1356 | if (device->missing) | ||
1357 | root->fs_info->fs_devices->missing_devices--; | ||
1358 | |||
1239 | next_device = list_entry(root->fs_info->fs_devices->devices.next, | 1359 | next_device = list_entry(root->fs_info->fs_devices->devices.next, |
1240 | struct btrfs_device, dev_list); | 1360 | struct btrfs_device, dev_list); |
1241 | if (device->bdev == root->fs_info->sb->s_bdev) | 1361 | if (device->bdev == root->fs_info->sb->s_bdev) |
@@ -1244,7 +1364,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1244 | root->fs_info->fs_devices->latest_bdev = next_device->bdev; | 1364 | root->fs_info->fs_devices->latest_bdev = next_device->bdev; |
1245 | 1365 | ||
1246 | if (device->bdev) { | 1366 | if (device->bdev) { |
1247 | close_bdev_exclusive(device->bdev, device->mode); | 1367 | blkdev_put(device->bdev, device->mode); |
1248 | device->bdev = NULL; | 1368 | device->bdev = NULL; |
1249 | device->fs_devices->open_devices--; | 1369 | device->fs_devices->open_devices--; |
1250 | } | 1370 | } |
@@ -1287,7 +1407,7 @@ error_brelse: | |||
1287 | brelse(bh); | 1407 | brelse(bh); |
1288 | error_close: | 1408 | error_close: |
1289 | if (bdev) | 1409 | if (bdev) |
1290 | close_bdev_exclusive(bdev, FMODE_READ); | 1410 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); |
1291 | out: | 1411 | out: |
1292 | mutex_unlock(&root->fs_info->volume_mutex); | 1412 | mutex_unlock(&root->fs_info->volume_mutex); |
1293 | mutex_unlock(&uuid_mutex); | 1413 | mutex_unlock(&uuid_mutex); |
@@ -1439,7 +1559,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1439 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) | 1559 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) |
1440 | return -EINVAL; | 1560 | return -EINVAL; |
1441 | 1561 | ||
1442 | bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); | 1562 | bdev = blkdev_get_by_path(device_path, FMODE_EXCL, |
1563 | root->fs_info->bdev_holder); | ||
1443 | if (IS_ERR(bdev)) | 1564 | if (IS_ERR(bdev)) |
1444 | return PTR_ERR(bdev); | 1565 | return PTR_ERR(bdev); |
1445 | 1566 | ||
@@ -1565,7 +1686,7 @@ out: | |||
1565 | mutex_unlock(&root->fs_info->volume_mutex); | 1686 | mutex_unlock(&root->fs_info->volume_mutex); |
1566 | return ret; | 1687 | return ret; |
1567 | error: | 1688 | error: |
1568 | close_bdev_exclusive(bdev, 0); | 1689 | blkdev_put(bdev, FMODE_EXCL); |
1569 | if (seeding_dev) { | 1690 | if (seeding_dev) { |
1570 | mutex_unlock(&uuid_mutex); | 1691 | mutex_unlock(&uuid_mutex); |
1571 | up_write(&sb->s_umount); | 1692 | up_write(&sb->s_umount); |
@@ -1905,6 +2026,9 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1905 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) | 2026 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) |
1906 | return -EROFS; | 2027 | return -EROFS; |
1907 | 2028 | ||
2029 | if (!capable(CAP_SYS_ADMIN)) | ||
2030 | return -EPERM; | ||
2031 | |||
1908 | mutex_lock(&dev_root->fs_info->volume_mutex); | 2032 | mutex_lock(&dev_root->fs_info->volume_mutex); |
1909 | dev_root = dev_root->fs_info->dev_root; | 2033 | dev_root = dev_root->fs_info->dev_root; |
1910 | 2034 | ||
@@ -2143,66 +2267,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, | |||
2143 | return calc_size * num_stripes; | 2267 | return calc_size * num_stripes; |
2144 | } | 2268 | } |
2145 | 2269 | ||
2146 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 2270 | /* Used to sort the devices by max_avail(descending sort) */ |
2147 | struct btrfs_root *extent_root, | 2271 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) |
2148 | struct map_lookup **map_ret, | ||
2149 | u64 *num_bytes, u64 *stripe_size, | ||
2150 | u64 start, u64 type) | ||
2151 | { | 2272 | { |
2152 | struct btrfs_fs_info *info = extent_root->fs_info; | 2273 | if (((struct btrfs_device_info *)dev_info1)->max_avail > |
2153 | struct btrfs_device *device = NULL; | 2274 | ((struct btrfs_device_info *)dev_info2)->max_avail) |
2154 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | 2275 | return -1; |
2155 | struct list_head *cur; | 2276 | else if (((struct btrfs_device_info *)dev_info1)->max_avail < |
2156 | struct map_lookup *map = NULL; | 2277 | ((struct btrfs_device_info *)dev_info2)->max_avail) |
2157 | struct extent_map_tree *em_tree; | 2278 | return 1; |
2158 | struct extent_map *em; | 2279 | else |
2159 | struct list_head private_devs; | 2280 | return 0; |
2160 | int min_stripe_size = 1 * 1024 * 1024; | 2281 | } |
2161 | u64 calc_size = 1024 * 1024 * 1024; | ||
2162 | u64 max_chunk_size = calc_size; | ||
2163 | u64 min_free; | ||
2164 | u64 avail; | ||
2165 | u64 max_avail = 0; | ||
2166 | u64 dev_offset; | ||
2167 | int num_stripes = 1; | ||
2168 | int min_stripes = 1; | ||
2169 | int sub_stripes = 0; | ||
2170 | int looped = 0; | ||
2171 | int ret; | ||
2172 | int index; | ||
2173 | int stripe_len = 64 * 1024; | ||
2174 | 2282 | ||
2175 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | 2283 | static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, |
2176 | (type & BTRFS_BLOCK_GROUP_DUP)) { | 2284 | int *num_stripes, int *min_stripes, |
2177 | WARN_ON(1); | 2285 | int *sub_stripes) |
2178 | type &= ~BTRFS_BLOCK_GROUP_DUP; | 2286 | { |
2179 | } | 2287 | *num_stripes = 1; |
2180 | if (list_empty(&fs_devices->alloc_list)) | 2288 | *min_stripes = 1; |
2181 | return -ENOSPC; | 2289 | *sub_stripes = 0; |
2182 | 2290 | ||
2183 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { | 2291 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { |
2184 | num_stripes = fs_devices->rw_devices; | 2292 | *num_stripes = fs_devices->rw_devices; |
2185 | min_stripes = 2; | 2293 | *min_stripes = 2; |
2186 | } | 2294 | } |
2187 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { | 2295 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { |
2188 | num_stripes = 2; | 2296 | *num_stripes = 2; |
2189 | min_stripes = 2; | 2297 | *min_stripes = 2; |
2190 | } | 2298 | } |
2191 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | 2299 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { |
2192 | if (fs_devices->rw_devices < 2) | 2300 | if (fs_devices->rw_devices < 2) |
2193 | return -ENOSPC; | 2301 | return -ENOSPC; |
2194 | num_stripes = 2; | 2302 | *num_stripes = 2; |
2195 | min_stripes = 2; | 2303 | *min_stripes = 2; |
2196 | } | 2304 | } |
2197 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | 2305 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { |
2198 | num_stripes = fs_devices->rw_devices; | 2306 | *num_stripes = fs_devices->rw_devices; |
2199 | if (num_stripes < 4) | 2307 | if (*num_stripes < 4) |
2200 | return -ENOSPC; | 2308 | return -ENOSPC; |
2201 | num_stripes &= ~(u32)1; | 2309 | *num_stripes &= ~(u32)1; |
2202 | sub_stripes = 2; | 2310 | *sub_stripes = 2; |
2203 | min_stripes = 4; | 2311 | *min_stripes = 4; |
2204 | } | 2312 | } |
2205 | 2313 | ||
2314 | return 0; | ||
2315 | } | ||
2316 | |||
2317 | static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, | ||
2318 | u64 proposed_size, u64 type, | ||
2319 | int num_stripes, int small_stripe) | ||
2320 | { | ||
2321 | int min_stripe_size = 1 * 1024 * 1024; | ||
2322 | u64 calc_size = proposed_size; | ||
2323 | u64 max_chunk_size = calc_size; | ||
2324 | int ncopies = 1; | ||
2325 | |||
2326 | if (type & (BTRFS_BLOCK_GROUP_RAID1 | | ||
2327 | BTRFS_BLOCK_GROUP_DUP | | ||
2328 | BTRFS_BLOCK_GROUP_RAID10)) | ||
2329 | ncopies = 2; | ||
2330 | |||
2206 | if (type & BTRFS_BLOCK_GROUP_DATA) { | 2331 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
2207 | max_chunk_size = 10 * calc_size; | 2332 | max_chunk_size = 10 * calc_size; |
2208 | min_stripe_size = 64 * 1024 * 1024; | 2333 | min_stripe_size = 64 * 1024 * 1024; |
@@ -2219,51 +2344,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2219 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), | 2344 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), |
2220 | max_chunk_size); | 2345 | max_chunk_size); |
2221 | 2346 | ||
2222 | again: | 2347 | if (calc_size * num_stripes > max_chunk_size * ncopies) { |
2223 | max_avail = 0; | 2348 | calc_size = max_chunk_size * ncopies; |
2224 | if (!map || map->num_stripes != num_stripes) { | ||
2225 | kfree(map); | ||
2226 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
2227 | if (!map) | ||
2228 | return -ENOMEM; | ||
2229 | map->num_stripes = num_stripes; | ||
2230 | } | ||
2231 | |||
2232 | if (calc_size * num_stripes > max_chunk_size) { | ||
2233 | calc_size = max_chunk_size; | ||
2234 | do_div(calc_size, num_stripes); | 2349 | do_div(calc_size, num_stripes); |
2235 | do_div(calc_size, stripe_len); | 2350 | do_div(calc_size, BTRFS_STRIPE_LEN); |
2236 | calc_size *= stripe_len; | 2351 | calc_size *= BTRFS_STRIPE_LEN; |
2237 | } | 2352 | } |
2238 | 2353 | ||
2239 | /* we don't want tiny stripes */ | 2354 | /* we don't want tiny stripes */ |
2240 | if (!looped) | 2355 | if (!small_stripe) |
2241 | calc_size = max_t(u64, min_stripe_size, calc_size); | 2356 | calc_size = max_t(u64, min_stripe_size, calc_size); |
2242 | 2357 | ||
2243 | /* | 2358 | /* |
2244 | * we're about to do_div by the stripe_len so lets make sure | 2359 | * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure |
2245 | * we end up with something bigger than a stripe | 2360 | * we end up with something bigger than a stripe |
2246 | */ | 2361 | */ |
2247 | calc_size = max_t(u64, calc_size, stripe_len * 4); | 2362 | calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); |
2363 | |||
2364 | do_div(calc_size, BTRFS_STRIPE_LEN); | ||
2365 | calc_size *= BTRFS_STRIPE_LEN; | ||
2366 | |||
2367 | return calc_size; | ||
2368 | } | ||
2248 | 2369 | ||
2249 | do_div(calc_size, stripe_len); | 2370 | static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map, |
2250 | calc_size *= stripe_len; | 2371 | int num_stripes) |
2372 | { | ||
2373 | struct map_lookup *new; | ||
2374 | size_t len = map_lookup_size(num_stripes); | ||
2375 | |||
2376 | BUG_ON(map->num_stripes < num_stripes); | ||
2377 | |||
2378 | if (map->num_stripes == num_stripes) | ||
2379 | return map; | ||
2380 | |||
2381 | new = kmalloc(len, GFP_NOFS); | ||
2382 | if (!new) { | ||
2383 | /* just change map->num_stripes */ | ||
2384 | map->num_stripes = num_stripes; | ||
2385 | return map; | ||
2386 | } | ||
2387 | |||
2388 | memcpy(new, map, len); | ||
2389 | new->num_stripes = num_stripes; | ||
2390 | kfree(map); | ||
2391 | return new; | ||
2392 | } | ||
2393 | |||
2394 | /* | ||
2395 | * helper to allocate device space from btrfs_device_info, in which we stored | ||
2396 | * max free space information of every device. It is used when we can not | ||
2397 | * allocate chunks by default size. | ||
2398 | * | ||
2399 | * By this helper, we can allocate a new chunk as larger as possible. | ||
2400 | */ | ||
2401 | static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans, | ||
2402 | struct btrfs_fs_devices *fs_devices, | ||
2403 | struct btrfs_device_info *devices, | ||
2404 | int nr_device, u64 type, | ||
2405 | struct map_lookup **map_lookup, | ||
2406 | int min_stripes, u64 *stripe_size) | ||
2407 | { | ||
2408 | int i, index, sort_again = 0; | ||
2409 | int min_devices = min_stripes; | ||
2410 | u64 max_avail, min_free; | ||
2411 | struct map_lookup *map = *map_lookup; | ||
2412 | int ret; | ||
2413 | |||
2414 | if (nr_device < min_stripes) | ||
2415 | return -ENOSPC; | ||
2416 | |||
2417 | btrfs_descending_sort_devices(devices, nr_device); | ||
2418 | |||
2419 | max_avail = devices[0].max_avail; | ||
2420 | if (!max_avail) | ||
2421 | return -ENOSPC; | ||
2422 | |||
2423 | for (i = 0; i < nr_device; i++) { | ||
2424 | /* | ||
2425 | * if dev_offset = 0, it means the free space of this device | ||
2426 | * is less than what we need, and we didn't search max avail | ||
2427 | * extent on this device, so do it now. | ||
2428 | */ | ||
2429 | if (!devices[i].dev_offset) { | ||
2430 | ret = find_free_dev_extent(trans, devices[i].dev, | ||
2431 | max_avail, | ||
2432 | &devices[i].dev_offset, | ||
2433 | &devices[i].max_avail); | ||
2434 | if (ret != 0 && ret != -ENOSPC) | ||
2435 | return ret; | ||
2436 | sort_again = 1; | ||
2437 | } | ||
2438 | } | ||
2439 | |||
2440 | /* we update the max avail free extent of each devices, sort again */ | ||
2441 | if (sort_again) | ||
2442 | btrfs_descending_sort_devices(devices, nr_device); | ||
2443 | |||
2444 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2445 | min_devices = 1; | ||
2446 | |||
2447 | if (!devices[min_devices - 1].max_avail) | ||
2448 | return -ENOSPC; | ||
2449 | |||
2450 | max_avail = devices[min_devices - 1].max_avail; | ||
2451 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2452 | do_div(max_avail, 2); | ||
2453 | |||
2454 | max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, | ||
2455 | min_stripes, 1); | ||
2456 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2457 | min_free = max_avail * 2; | ||
2458 | else | ||
2459 | min_free = max_avail; | ||
2460 | |||
2461 | if (min_free > devices[min_devices - 1].max_avail) | ||
2462 | return -ENOSPC; | ||
2463 | |||
2464 | map = __shrink_map_lookup_stripes(map, min_stripes); | ||
2465 | *stripe_size = max_avail; | ||
2466 | |||
2467 | index = 0; | ||
2468 | for (i = 0; i < min_stripes; i++) { | ||
2469 | map->stripes[i].dev = devices[index].dev; | ||
2470 | map->stripes[i].physical = devices[index].dev_offset; | ||
2471 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
2472 | i++; | ||
2473 | map->stripes[i].dev = devices[index].dev; | ||
2474 | map->stripes[i].physical = devices[index].dev_offset + | ||
2475 | max_avail; | ||
2476 | } | ||
2477 | index++; | ||
2478 | } | ||
2479 | *map_lookup = map; | ||
2480 | |||
2481 | return 0; | ||
2482 | } | ||
2483 | |||
2484 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | ||
2485 | struct btrfs_root *extent_root, | ||
2486 | struct map_lookup **map_ret, | ||
2487 | u64 *num_bytes, u64 *stripe_size, | ||
2488 | u64 start, u64 type) | ||
2489 | { | ||
2490 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
2491 | struct btrfs_device *device = NULL; | ||
2492 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | ||
2493 | struct list_head *cur; | ||
2494 | struct map_lookup *map; | ||
2495 | struct extent_map_tree *em_tree; | ||
2496 | struct extent_map *em; | ||
2497 | struct btrfs_device_info *devices_info; | ||
2498 | struct list_head private_devs; | ||
2499 | u64 calc_size = 1024 * 1024 * 1024; | ||
2500 | u64 min_free; | ||
2501 | u64 avail; | ||
2502 | u64 dev_offset; | ||
2503 | int num_stripes; | ||
2504 | int min_stripes; | ||
2505 | int sub_stripes; | ||
2506 | int min_devices; /* the min number of devices we need */ | ||
2507 | int i; | ||
2508 | int ret; | ||
2509 | int index; | ||
2510 | |||
2511 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | ||
2512 | (type & BTRFS_BLOCK_GROUP_DUP)) { | ||
2513 | WARN_ON(1); | ||
2514 | type &= ~BTRFS_BLOCK_GROUP_DUP; | ||
2515 | } | ||
2516 | if (list_empty(&fs_devices->alloc_list)) | ||
2517 | return -ENOSPC; | ||
2518 | |||
2519 | ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes, | ||
2520 | &min_stripes, &sub_stripes); | ||
2521 | if (ret) | ||
2522 | return ret; | ||
2523 | |||
2524 | devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, | ||
2525 | GFP_NOFS); | ||
2526 | if (!devices_info) | ||
2527 | return -ENOMEM; | ||
2528 | |||
2529 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
2530 | if (!map) { | ||
2531 | ret = -ENOMEM; | ||
2532 | goto error; | ||
2533 | } | ||
2534 | map->num_stripes = num_stripes; | ||
2251 | 2535 | ||
2252 | cur = fs_devices->alloc_list.next; | 2536 | cur = fs_devices->alloc_list.next; |
2253 | index = 0; | 2537 | index = 0; |
2538 | i = 0; | ||
2254 | 2539 | ||
2255 | if (type & BTRFS_BLOCK_GROUP_DUP) | 2540 | calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, |
2541 | num_stripes, 0); | ||
2542 | |||
2543 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
2256 | min_free = calc_size * 2; | 2544 | min_free = calc_size * 2; |
2257 | else | 2545 | min_devices = 1; |
2546 | } else { | ||
2258 | min_free = calc_size; | 2547 | min_free = calc_size; |
2259 | 2548 | min_devices = min_stripes; | |
2260 | /* | 2549 | } |
2261 | * we add 1MB because we never use the first 1MB of the device, unless | ||
2262 | * we've looped, then we are likely allocating the maximum amount of | ||
2263 | * space left already | ||
2264 | */ | ||
2265 | if (!looped) | ||
2266 | min_free += 1024 * 1024; | ||
2267 | 2550 | ||
2268 | INIT_LIST_HEAD(&private_devs); | 2551 | INIT_LIST_HEAD(&private_devs); |
2269 | while (index < num_stripes) { | 2552 | while (index < num_stripes) { |
@@ -2276,27 +2559,39 @@ again: | |||
2276 | cur = cur->next; | 2559 | cur = cur->next; |
2277 | 2560 | ||
2278 | if (device->in_fs_metadata && avail >= min_free) { | 2561 | if (device->in_fs_metadata && avail >= min_free) { |
2279 | ret = find_free_dev_extent(trans, device, | 2562 | ret = find_free_dev_extent(trans, device, min_free, |
2280 | min_free, &dev_offset, | 2563 | &devices_info[i].dev_offset, |
2281 | &max_avail); | 2564 | &devices_info[i].max_avail); |
2282 | if (ret == 0) { | 2565 | if (ret == 0) { |
2283 | list_move_tail(&device->dev_alloc_list, | 2566 | list_move_tail(&device->dev_alloc_list, |
2284 | &private_devs); | 2567 | &private_devs); |
2285 | map->stripes[index].dev = device; | 2568 | map->stripes[index].dev = device; |
2286 | map->stripes[index].physical = dev_offset; | 2569 | map->stripes[index].physical = |
2570 | devices_info[i].dev_offset; | ||
2287 | index++; | 2571 | index++; |
2288 | if (type & BTRFS_BLOCK_GROUP_DUP) { | 2572 | if (type & BTRFS_BLOCK_GROUP_DUP) { |
2289 | map->stripes[index].dev = device; | 2573 | map->stripes[index].dev = device; |
2290 | map->stripes[index].physical = | 2574 | map->stripes[index].physical = |
2291 | dev_offset + calc_size; | 2575 | devices_info[i].dev_offset + |
2576 | calc_size; | ||
2292 | index++; | 2577 | index++; |
2293 | } | 2578 | } |
2294 | } | 2579 | } else if (ret != -ENOSPC) |
2295 | } else if (device->in_fs_metadata && avail > max_avail) | 2580 | goto error; |
2296 | max_avail = avail; | 2581 | |
2582 | devices_info[i].dev = device; | ||
2583 | i++; | ||
2584 | } else if (device->in_fs_metadata && | ||
2585 | avail >= BTRFS_STRIPE_LEN) { | ||
2586 | devices_info[i].dev = device; | ||
2587 | devices_info[i].max_avail = avail; | ||
2588 | i++; | ||
2589 | } | ||
2590 | |||
2297 | if (cur == &fs_devices->alloc_list) | 2591 | if (cur == &fs_devices->alloc_list) |
2298 | break; | 2592 | break; |
2299 | } | 2593 | } |
2594 | |||
2300 | list_splice(&private_devs, &fs_devices->alloc_list); | 2595 | list_splice(&private_devs, &fs_devices->alloc_list); |
2301 | if (index < num_stripes) { | 2596 | if (index < num_stripes) { |
2302 | if (index >= min_stripes) { | 2597 | if (index >= min_stripes) { |
@@ -2305,34 +2600,36 @@ again: | |||
2305 | num_stripes /= sub_stripes; | 2600 | num_stripes /= sub_stripes; |
2306 | num_stripes *= sub_stripes; | 2601 | num_stripes *= sub_stripes; |
2307 | } | 2602 | } |
2308 | looped = 1; | 2603 | |
2309 | goto again; | 2604 | map = __shrink_map_lookup_stripes(map, num_stripes); |
2310 | } | 2605 | } else if (i >= min_devices) { |
2311 | if (!looped && max_avail > 0) { | 2606 | ret = __btrfs_alloc_tiny_space(trans, fs_devices, |
2312 | looped = 1; | 2607 | devices_info, i, type, |
2313 | calc_size = max_avail; | 2608 | &map, min_stripes, |
2314 | goto again; | 2609 | &calc_size); |
2610 | if (ret) | ||
2611 | goto error; | ||
2612 | } else { | ||
2613 | ret = -ENOSPC; | ||
2614 | goto error; | ||
2315 | } | 2615 | } |
2316 | kfree(map); | ||
2317 | return -ENOSPC; | ||
2318 | } | 2616 | } |
2319 | map->sector_size = extent_root->sectorsize; | 2617 | map->sector_size = extent_root->sectorsize; |
2320 | map->stripe_len = stripe_len; | 2618 | map->stripe_len = BTRFS_STRIPE_LEN; |
2321 | map->io_align = stripe_len; | 2619 | map->io_align = BTRFS_STRIPE_LEN; |
2322 | map->io_width = stripe_len; | 2620 | map->io_width = BTRFS_STRIPE_LEN; |
2323 | map->type = type; | 2621 | map->type = type; |
2324 | map->num_stripes = num_stripes; | ||
2325 | map->sub_stripes = sub_stripes; | 2622 | map->sub_stripes = sub_stripes; |
2326 | 2623 | ||
2327 | *map_ret = map; | 2624 | *map_ret = map; |
2328 | *stripe_size = calc_size; | 2625 | *stripe_size = calc_size; |
2329 | *num_bytes = chunk_bytes_by_type(type, calc_size, | 2626 | *num_bytes = chunk_bytes_by_type(type, calc_size, |
2330 | num_stripes, sub_stripes); | 2627 | map->num_stripes, sub_stripes); |
2331 | 2628 | ||
2332 | em = alloc_extent_map(GFP_NOFS); | 2629 | em = alloc_extent_map(GFP_NOFS); |
2333 | if (!em) { | 2630 | if (!em) { |
2334 | kfree(map); | 2631 | ret = -ENOMEM; |
2335 | return -ENOMEM; | 2632 | goto error; |
2336 | } | 2633 | } |
2337 | em->bdev = (struct block_device *)map; | 2634 | em->bdev = (struct block_device *)map; |
2338 | em->start = start; | 2635 | em->start = start; |
@@ -2365,7 +2662,13 @@ again: | |||
2365 | index++; | 2662 | index++; |
2366 | } | 2663 | } |
2367 | 2664 | ||
2665 | kfree(devices_info); | ||
2368 | return 0; | 2666 | return 0; |
2667 | |||
2668 | error: | ||
2669 | kfree(map); | ||
2670 | kfree(devices_info); | ||
2671 | return ret; | ||
2369 | } | 2672 | } |
2370 | 2673 | ||
2371 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | 2674 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, |
@@ -3080,7 +3383,9 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, | |||
3080 | device->devid = devid; | 3383 | device->devid = devid; |
3081 | device->work.func = pending_bios_fn; | 3384 | device->work.func = pending_bios_fn; |
3082 | device->fs_devices = fs_devices; | 3385 | device->fs_devices = fs_devices; |
3386 | device->missing = 1; | ||
3083 | fs_devices->num_devices++; | 3387 | fs_devices->num_devices++; |
3388 | fs_devices->missing_devices++; | ||
3084 | spin_lock_init(&device->io_lock); | 3389 | spin_lock_init(&device->io_lock); |
3085 | INIT_LIST_HEAD(&device->dev_alloc_list); | 3390 | INIT_LIST_HEAD(&device->dev_alloc_list); |
3086 | memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); | 3391 | memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); |
@@ -3278,6 +3583,15 @@ static int read_one_dev(struct btrfs_root *root, | |||
3278 | device = add_missing_dev(root, devid, dev_uuid); | 3583 | device = add_missing_dev(root, devid, dev_uuid); |
3279 | if (!device) | 3584 | if (!device) |
3280 | return -ENOMEM; | 3585 | return -ENOMEM; |
3586 | } else if (!device->missing) { | ||
3587 | /* | ||
3588 | * this happens when a device that was properly setup | ||
3589 | * in the device info lists suddenly goes bad. | ||
3590 | * device->bdev is NULL, and so we have to set | ||
3591 | * device->missing to one here | ||
3592 | */ | ||
3593 | root->fs_info->fs_devices->missing_devices++; | ||
3594 | device->missing = 1; | ||
3281 | } | 3595 | } |
3282 | } | 3596 | } |
3283 | 3597 | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2b638b6e4eea..7fb59d45fe8c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -20,8 +20,11 @@ | |||
20 | #define __BTRFS_VOLUMES_ | 20 | #define __BTRFS_VOLUMES_ |
21 | 21 | ||
22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
23 | #include <linux/sort.h> | ||
23 | #include "async-thread.h" | 24 | #include "async-thread.h" |
24 | 25 | ||
26 | #define BTRFS_STRIPE_LEN (64 * 1024) | ||
27 | |||
25 | struct buffer_head; | 28 | struct buffer_head; |
26 | struct btrfs_pending_bios { | 29 | struct btrfs_pending_bios { |
27 | struct bio *head; | 30 | struct bio *head; |
@@ -44,12 +47,13 @@ struct btrfs_device { | |||
44 | 47 | ||
45 | int writeable; | 48 | int writeable; |
46 | int in_fs_metadata; | 49 | int in_fs_metadata; |
50 | int missing; | ||
47 | 51 | ||
48 | spinlock_t io_lock; | 52 | spinlock_t io_lock; |
49 | 53 | ||
50 | struct block_device *bdev; | 54 | struct block_device *bdev; |
51 | 55 | ||
52 | /* the mode sent to open_bdev_exclusive */ | 56 | /* the mode sent to blkdev_get */ |
53 | fmode_t mode; | 57 | fmode_t mode; |
54 | 58 | ||
55 | char *name; | 59 | char *name; |
@@ -93,6 +97,7 @@ struct btrfs_fs_devices { | |||
93 | u64 num_devices; | 97 | u64 num_devices; |
94 | u64 open_devices; | 98 | u64 open_devices; |
95 | u64 rw_devices; | 99 | u64 rw_devices; |
100 | u64 missing_devices; | ||
96 | u64 total_rw_bytes; | 101 | u64 total_rw_bytes; |
97 | struct block_device *latest_bdev; | 102 | struct block_device *latest_bdev; |
98 | 103 | ||
@@ -134,6 +139,30 @@ struct btrfs_multi_bio { | |||
134 | struct btrfs_bio_stripe stripes[]; | 139 | struct btrfs_bio_stripe stripes[]; |
135 | }; | 140 | }; |
136 | 141 | ||
142 | struct btrfs_device_info { | ||
143 | struct btrfs_device *dev; | ||
144 | u64 dev_offset; | ||
145 | u64 max_avail; | ||
146 | }; | ||
147 | |||
148 | /* Used to sort the devices by max_avail(descending sort) */ | ||
149 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); | ||
150 | |||
151 | /* | ||
152 | * sort the devices by max_avail, in which max free extent size of each device | ||
153 | * is stored.(Descending Sort) | ||
154 | */ | ||
155 | static inline void btrfs_descending_sort_devices( | ||
156 | struct btrfs_device_info *devices, | ||
157 | size_t nr_devices) | ||
158 | { | ||
159 | sort(devices, nr_devices, sizeof(struct btrfs_device_info), | ||
160 | btrfs_cmp_device_free_bytes, NULL); | ||
161 | } | ||
162 | |||
163 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
164 | u64 end, u64 *length); | ||
165 | |||
137 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ | 166 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ |
138 | (sizeof(struct btrfs_bio_stripe) * (n))) | 167 | (sizeof(struct btrfs_bio_stripe) * (n))) |
139 | 168 | ||
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 698fdd2c739c..a5776531dc2b 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -316,6 +316,15 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, | |||
316 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | 316 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, |
317 | size_t size, int flags) | 317 | size_t size, int flags) |
318 | { | 318 | { |
319 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | ||
320 | |||
321 | /* | ||
322 | * The permission on security.* and system.* is not checked | ||
323 | * in permission(). | ||
324 | */ | ||
325 | if (btrfs_root_readonly(root)) | ||
326 | return -EROFS; | ||
327 | |||
319 | /* | 328 | /* |
320 | * If this is a request for a synthetic attribute in the system.* | 329 | * If this is a request for a synthetic attribute in the system.* |
321 | * namespace use the generic infrastructure to resolve a handler | 330 | * namespace use the generic infrastructure to resolve a handler |
@@ -336,6 +345,15 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
336 | 345 | ||
337 | int btrfs_removexattr(struct dentry *dentry, const char *name) | 346 | int btrfs_removexattr(struct dentry *dentry, const char *name) |
338 | { | 347 | { |
348 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | ||
349 | |||
350 | /* | ||
351 | * The permission on security.* and system.* is not checked | ||
352 | * in permission(). | ||
353 | */ | ||
354 | if (btrfs_root_readonly(root)) | ||
355 | return -EROFS; | ||
356 | |||
339 | /* | 357 | /* |
340 | * If this is a request for a synthetic attribute in the system.* | 358 | * If this is a request for a synthetic attribute in the system.* |
341 | * namespace use the generic infrastructure to resolve a handler | 359 | * namespace use the generic infrastructure to resolve a handler |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index b9cd5445f71c..f5ec2d44150d 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -32,15 +32,6 @@ | |||
32 | #include <linux/bio.h> | 32 | #include <linux/bio.h> |
33 | #include "compression.h" | 33 | #include "compression.h" |
34 | 34 | ||
35 | /* Plan: call deflate() with avail_in == *sourcelen, | ||
36 | avail_out = *dstlen - 12 and flush == Z_FINISH. | ||
37 | If it doesn't manage to finish, call it again with | ||
38 | avail_in == 0 and avail_out set to the remaining 12 | ||
39 | bytes for it to clean up. | ||
40 | Q: Is 12 bytes sufficient? | ||
41 | */ | ||
42 | #define STREAM_END_SPACE 12 | ||
43 | |||
44 | struct workspace { | 35 | struct workspace { |
45 | z_stream inf_strm; | 36 | z_stream inf_strm; |
46 | z_stream def_strm; | 37 | z_stream def_strm; |
@@ -48,152 +39,51 @@ struct workspace { | |||
48 | struct list_head list; | 39 | struct list_head list; |
49 | }; | 40 | }; |
50 | 41 | ||
51 | static LIST_HEAD(idle_workspace); | 42 | static void zlib_free_workspace(struct list_head *ws) |
52 | static DEFINE_SPINLOCK(workspace_lock); | 43 | { |
53 | static unsigned long num_workspace; | 44 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
54 | static atomic_t alloc_workspace = ATOMIC_INIT(0); | ||
55 | static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); | ||
56 | 45 | ||
57 | /* | 46 | vfree(workspace->def_strm.workspace); |
58 | * this finds an available zlib workspace or allocates a new one | 47 | vfree(workspace->inf_strm.workspace); |
59 | * NULL or an ERR_PTR is returned if things go bad. | 48 | kfree(workspace->buf); |
60 | */ | 49 | kfree(workspace); |
61 | static struct workspace *find_zlib_workspace(void) | 50 | } |
51 | |||
52 | static struct list_head *zlib_alloc_workspace(void) | ||
62 | { | 53 | { |
63 | struct workspace *workspace; | 54 | struct workspace *workspace; |
64 | int ret; | ||
65 | int cpus = num_online_cpus(); | ||
66 | |||
67 | again: | ||
68 | spin_lock(&workspace_lock); | ||
69 | if (!list_empty(&idle_workspace)) { | ||
70 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
71 | list); | ||
72 | list_del(&workspace->list); | ||
73 | num_workspace--; | ||
74 | spin_unlock(&workspace_lock); | ||
75 | return workspace; | ||
76 | 55 | ||
77 | } | ||
78 | spin_unlock(&workspace_lock); | ||
79 | if (atomic_read(&alloc_workspace) > cpus) { | ||
80 | DEFINE_WAIT(wait); | ||
81 | prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
82 | if (atomic_read(&alloc_workspace) > cpus) | ||
83 | schedule(); | ||
84 | finish_wait(&workspace_wait, &wait); | ||
85 | goto again; | ||
86 | } | ||
87 | atomic_inc(&alloc_workspace); | ||
88 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | 56 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); |
89 | if (!workspace) { | 57 | if (!workspace) |
90 | ret = -ENOMEM; | 58 | return ERR_PTR(-ENOMEM); |
91 | goto fail; | ||
92 | } | ||
93 | 59 | ||
94 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); | 60 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); |
95 | if (!workspace->def_strm.workspace) { | ||
96 | ret = -ENOMEM; | ||
97 | goto fail; | ||
98 | } | ||
99 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); | 61 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); |
100 | if (!workspace->inf_strm.workspace) { | ||
101 | ret = -ENOMEM; | ||
102 | goto fail_inflate; | ||
103 | } | ||
104 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 62 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
105 | if (!workspace->buf) { | 63 | if (!workspace->def_strm.workspace || |
106 | ret = -ENOMEM; | 64 | !workspace->inf_strm.workspace || !workspace->buf) |
107 | goto fail_kmalloc; | 65 | goto fail; |
108 | } | ||
109 | return workspace; | ||
110 | |||
111 | fail_kmalloc: | ||
112 | vfree(workspace->inf_strm.workspace); | ||
113 | fail_inflate: | ||
114 | vfree(workspace->def_strm.workspace); | ||
115 | fail: | ||
116 | kfree(workspace); | ||
117 | atomic_dec(&alloc_workspace); | ||
118 | wake_up(&workspace_wait); | ||
119 | return ERR_PTR(ret); | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * put a workspace struct back on the list or free it if we have enough | ||
124 | * idle ones sitting around | ||
125 | */ | ||
126 | static int free_workspace(struct workspace *workspace) | ||
127 | { | ||
128 | spin_lock(&workspace_lock); | ||
129 | if (num_workspace < num_online_cpus()) { | ||
130 | list_add_tail(&workspace->list, &idle_workspace); | ||
131 | num_workspace++; | ||
132 | spin_unlock(&workspace_lock); | ||
133 | if (waitqueue_active(&workspace_wait)) | ||
134 | wake_up(&workspace_wait); | ||
135 | return 0; | ||
136 | } | ||
137 | spin_unlock(&workspace_lock); | ||
138 | vfree(workspace->def_strm.workspace); | ||
139 | vfree(workspace->inf_strm.workspace); | ||
140 | kfree(workspace->buf); | ||
141 | kfree(workspace); | ||
142 | 66 | ||
143 | atomic_dec(&alloc_workspace); | 67 | INIT_LIST_HEAD(&workspace->list); |
144 | if (waitqueue_active(&workspace_wait)) | ||
145 | wake_up(&workspace_wait); | ||
146 | return 0; | ||
147 | } | ||
148 | 68 | ||
149 | /* | 69 | return &workspace->list; |
150 | * cleanup function for module exit | 70 | fail: |
151 | */ | 71 | zlib_free_workspace(&workspace->list); |
152 | static void free_workspaces(void) | 72 | return ERR_PTR(-ENOMEM); |
153 | { | ||
154 | struct workspace *workspace; | ||
155 | while (!list_empty(&idle_workspace)) { | ||
156 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
157 | list); | ||
158 | list_del(&workspace->list); | ||
159 | vfree(workspace->def_strm.workspace); | ||
160 | vfree(workspace->inf_strm.workspace); | ||
161 | kfree(workspace->buf); | ||
162 | kfree(workspace); | ||
163 | atomic_dec(&alloc_workspace); | ||
164 | } | ||
165 | } | 73 | } |
166 | 74 | ||
167 | /* | 75 | static int zlib_compress_pages(struct list_head *ws, |
168 | * given an address space and start/len, compress the bytes. | 76 | struct address_space *mapping, |
169 | * | 77 | u64 start, unsigned long len, |
170 | * pages are allocated to hold the compressed result and stored | 78 | struct page **pages, |
171 | * in 'pages' | 79 | unsigned long nr_dest_pages, |
172 | * | 80 | unsigned long *out_pages, |
173 | * out_pages is used to return the number of pages allocated. There | 81 | unsigned long *total_in, |
174 | * may be pages allocated even if we return an error | 82 | unsigned long *total_out, |
175 | * | 83 | unsigned long max_out) |
176 | * total_in is used to return the number of bytes actually read. It | ||
177 | * may be smaller then len if we had to exit early because we | ||
178 | * ran out of room in the pages array or because we cross the | ||
179 | * max_out threshold. | ||
180 | * | ||
181 | * total_out is used to return the total number of compressed bytes | ||
182 | * | ||
183 | * max_out tells us the max number of bytes that we're allowed to | ||
184 | * stuff into pages | ||
185 | */ | ||
186 | int btrfs_zlib_compress_pages(struct address_space *mapping, | ||
187 | u64 start, unsigned long len, | ||
188 | struct page **pages, | ||
189 | unsigned long nr_dest_pages, | ||
190 | unsigned long *out_pages, | ||
191 | unsigned long *total_in, | ||
192 | unsigned long *total_out, | ||
193 | unsigned long max_out) | ||
194 | { | 84 | { |
85 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
195 | int ret; | 86 | int ret; |
196 | struct workspace *workspace; | ||
197 | char *data_in; | 87 | char *data_in; |
198 | char *cpage_out; | 88 | char *cpage_out; |
199 | int nr_pages = 0; | 89 | int nr_pages = 0; |
@@ -205,10 +95,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
205 | *total_out = 0; | 95 | *total_out = 0; |
206 | *total_in = 0; | 96 | *total_in = 0; |
207 | 97 | ||
208 | workspace = find_zlib_workspace(); | ||
209 | if (IS_ERR(workspace)) | ||
210 | return -1; | ||
211 | |||
212 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 98 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { |
213 | printk(KERN_WARNING "deflateInit failed\n"); | 99 | printk(KERN_WARNING "deflateInit failed\n"); |
214 | ret = -1; | 100 | ret = -1; |
@@ -222,6 +108,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
222 | data_in = kmap(in_page); | 108 | data_in = kmap(in_page); |
223 | 109 | ||
224 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 110 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
111 | if (out_page == NULL) { | ||
112 | ret = -1; | ||
113 | goto out; | ||
114 | } | ||
225 | cpage_out = kmap(out_page); | 115 | cpage_out = kmap(out_page); |
226 | pages[0] = out_page; | 116 | pages[0] = out_page; |
227 | nr_pages = 1; | 117 | nr_pages = 1; |
@@ -260,6 +150,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
260 | goto out; | 150 | goto out; |
261 | } | 151 | } |
262 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 152 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
153 | if (out_page == NULL) { | ||
154 | ret = -1; | ||
155 | goto out; | ||
156 | } | ||
263 | cpage_out = kmap(out_page); | 157 | cpage_out = kmap(out_page); |
264 | pages[nr_pages] = out_page; | 158 | pages[nr_pages] = out_page; |
265 | nr_pages++; | 159 | nr_pages++; |
@@ -314,55 +208,26 @@ out: | |||
314 | kunmap(in_page); | 208 | kunmap(in_page); |
315 | page_cache_release(in_page); | 209 | page_cache_release(in_page); |
316 | } | 210 | } |
317 | free_workspace(workspace); | ||
318 | return ret; | 211 | return ret; |
319 | } | 212 | } |
320 | 213 | ||
321 | /* | 214 | static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, |
322 | * pages_in is an array of pages with compressed data. | 215 | u64 disk_start, |
323 | * | 216 | struct bio_vec *bvec, |
324 | * disk_start is the starting logical offset of this array in the file | 217 | int vcnt, |
325 | * | 218 | size_t srclen) |
326 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
327 | * | ||
328 | * vcnt is the count of pages in the biovec | ||
329 | * | ||
330 | * srclen is the number of bytes in pages_in | ||
331 | * | ||
332 | * The basic idea is that we have a bio that was created by readpages. | ||
333 | * The pages in the bio are for the uncompressed data, and they may not | ||
334 | * be contiguous. They all correspond to the range of bytes covered by | ||
335 | * the compressed extent. | ||
336 | */ | ||
337 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | ||
338 | u64 disk_start, | ||
339 | struct bio_vec *bvec, | ||
340 | int vcnt, | ||
341 | size_t srclen) | ||
342 | { | 219 | { |
343 | int ret = 0; | 220 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
221 | int ret = 0, ret2; | ||
344 | int wbits = MAX_WBITS; | 222 | int wbits = MAX_WBITS; |
345 | struct workspace *workspace; | ||
346 | char *data_in; | 223 | char *data_in; |
347 | size_t total_out = 0; | 224 | size_t total_out = 0; |
348 | unsigned long page_bytes_left; | ||
349 | unsigned long page_in_index = 0; | 225 | unsigned long page_in_index = 0; |
350 | unsigned long page_out_index = 0; | 226 | unsigned long page_out_index = 0; |
351 | struct page *page_out; | ||
352 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | 227 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / |
353 | PAGE_CACHE_SIZE; | 228 | PAGE_CACHE_SIZE; |
354 | unsigned long buf_start; | 229 | unsigned long buf_start; |
355 | unsigned long buf_offset; | ||
356 | unsigned long bytes; | ||
357 | unsigned long working_bytes; | ||
358 | unsigned long pg_offset; | 230 | unsigned long pg_offset; |
359 | unsigned long start_byte; | ||
360 | unsigned long current_buf_start; | ||
361 | char *kaddr; | ||
362 | |||
363 | workspace = find_zlib_workspace(); | ||
364 | if (IS_ERR(workspace)) | ||
365 | return -ENOMEM; | ||
366 | 231 | ||
367 | data_in = kmap(pages_in[page_in_index]); | 232 | data_in = kmap(pages_in[page_in_index]); |
368 | workspace->inf_strm.next_in = data_in; | 233 | workspace->inf_strm.next_in = data_in; |
@@ -372,8 +237,6 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
372 | workspace->inf_strm.total_out = 0; | 237 | workspace->inf_strm.total_out = 0; |
373 | workspace->inf_strm.next_out = workspace->buf; | 238 | workspace->inf_strm.next_out = workspace->buf; |
374 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 239 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
375 | page_out = bvec[page_out_index].bv_page; | ||
376 | page_bytes_left = PAGE_CACHE_SIZE; | ||
377 | pg_offset = 0; | 240 | pg_offset = 0; |
378 | 241 | ||
379 | /* If it's deflate, and it's got no preset dictionary, then | 242 | /* If it's deflate, and it's got no preset dictionary, then |
@@ -389,107 +252,29 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
389 | 252 | ||
390 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 253 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
391 | printk(KERN_WARNING "inflateInit failed\n"); | 254 | printk(KERN_WARNING "inflateInit failed\n"); |
392 | ret = -1; | 255 | return -1; |
393 | goto out; | ||
394 | } | 256 | } |
395 | while (workspace->inf_strm.total_in < srclen) { | 257 | while (workspace->inf_strm.total_in < srclen) { |
396 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | 258 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); |
397 | if (ret != Z_OK && ret != Z_STREAM_END) | 259 | if (ret != Z_OK && ret != Z_STREAM_END) |
398 | break; | 260 | break; |
399 | /* | ||
400 | * buf start is the byte offset we're of the start of | ||
401 | * our workspace buffer | ||
402 | */ | ||
403 | buf_start = total_out; | ||
404 | 261 | ||
405 | /* total_out is the last byte of the workspace buffer */ | 262 | buf_start = total_out; |
406 | total_out = workspace->inf_strm.total_out; | 263 | total_out = workspace->inf_strm.total_out; |
407 | 264 | ||
408 | working_bytes = total_out - buf_start; | 265 | /* we didn't make progress in this inflate call, we're done */ |
409 | 266 | if (buf_start == total_out) | |
410 | /* | ||
411 | * start byte is the first byte of the page we're currently | ||
412 | * copying into relative to the start of the compressed data. | ||
413 | */ | ||
414 | start_byte = page_offset(page_out) - disk_start; | ||
415 | |||
416 | if (working_bytes == 0) { | ||
417 | /* we didn't make progress in this inflate | ||
418 | * call, we're done | ||
419 | */ | ||
420 | if (ret != Z_STREAM_END) | ||
421 | ret = -1; | ||
422 | break; | 267 | break; |
423 | } | ||
424 | 268 | ||
425 | /* we haven't yet hit data corresponding to this page */ | 269 | ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, |
426 | if (total_out <= start_byte) | 270 | total_out, disk_start, |
427 | goto next; | 271 | bvec, vcnt, |
428 | 272 | &page_out_index, &pg_offset); | |
429 | /* | 273 | if (ret2 == 0) { |
430 | * the start of the data we care about is offset into | 274 | ret = 0; |
431 | * the middle of our working buffer | 275 | goto done; |
432 | */ | ||
433 | if (total_out > start_byte && buf_start < start_byte) { | ||
434 | buf_offset = start_byte - buf_start; | ||
435 | working_bytes -= buf_offset; | ||
436 | } else { | ||
437 | buf_offset = 0; | ||
438 | } | ||
439 | current_buf_start = buf_start; | ||
440 | |||
441 | /* copy bytes from the working buffer into the pages */ | ||
442 | while (working_bytes > 0) { | ||
443 | bytes = min(PAGE_CACHE_SIZE - pg_offset, | ||
444 | PAGE_CACHE_SIZE - buf_offset); | ||
445 | bytes = min(bytes, working_bytes); | ||
446 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
447 | memcpy(kaddr + pg_offset, workspace->buf + buf_offset, | ||
448 | bytes); | ||
449 | kunmap_atomic(kaddr, KM_USER0); | ||
450 | flush_dcache_page(page_out); | ||
451 | |||
452 | pg_offset += bytes; | ||
453 | page_bytes_left -= bytes; | ||
454 | buf_offset += bytes; | ||
455 | working_bytes -= bytes; | ||
456 | current_buf_start += bytes; | ||
457 | |||
458 | /* check if we need to pick another page */ | ||
459 | if (page_bytes_left == 0) { | ||
460 | page_out_index++; | ||
461 | if (page_out_index >= vcnt) { | ||
462 | ret = 0; | ||
463 | goto done; | ||
464 | } | ||
465 | |||
466 | page_out = bvec[page_out_index].bv_page; | ||
467 | pg_offset = 0; | ||
468 | page_bytes_left = PAGE_CACHE_SIZE; | ||
469 | start_byte = page_offset(page_out) - disk_start; | ||
470 | |||
471 | /* | ||
472 | * make sure our new page is covered by this | ||
473 | * working buffer | ||
474 | */ | ||
475 | if (total_out <= start_byte) | ||
476 | goto next; | ||
477 | |||
478 | /* the next page in the biovec might not | ||
479 | * be adjacent to the last page, but it | ||
480 | * might still be found inside this working | ||
481 | * buffer. bump our offset pointer | ||
482 | */ | ||
483 | if (total_out > start_byte && | ||
484 | current_buf_start < start_byte) { | ||
485 | buf_offset = start_byte - buf_start; | ||
486 | working_bytes = total_out - start_byte; | ||
487 | current_buf_start = buf_start + | ||
488 | buf_offset; | ||
489 | } | ||
490 | } | ||
491 | } | 276 | } |
492 | next: | 277 | |
493 | workspace->inf_strm.next_out = workspace->buf; | 278 | workspace->inf_strm.next_out = workspace->buf; |
494 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 279 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
495 | 280 | ||
@@ -516,35 +301,21 @@ done: | |||
516 | zlib_inflateEnd(&workspace->inf_strm); | 301 | zlib_inflateEnd(&workspace->inf_strm); |
517 | if (data_in) | 302 | if (data_in) |
518 | kunmap(pages_in[page_in_index]); | 303 | kunmap(pages_in[page_in_index]); |
519 | out: | ||
520 | free_workspace(workspace); | ||
521 | return ret; | 304 | return ret; |
522 | } | 305 | } |
523 | 306 | ||
524 | /* | 307 | static int zlib_decompress(struct list_head *ws, unsigned char *data_in, |
525 | * a less complex decompression routine. Our compressed data fits in a | 308 | struct page *dest_page, |
526 | * single page, and we want to read a single page out of it. | 309 | unsigned long start_byte, |
527 | * start_byte tells us the offset into the compressed data we're interested in | 310 | size_t srclen, size_t destlen) |
528 | */ | ||
529 | int btrfs_zlib_decompress(unsigned char *data_in, | ||
530 | struct page *dest_page, | ||
531 | unsigned long start_byte, | ||
532 | size_t srclen, size_t destlen) | ||
533 | { | 311 | { |
312 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
534 | int ret = 0; | 313 | int ret = 0; |
535 | int wbits = MAX_WBITS; | 314 | int wbits = MAX_WBITS; |
536 | struct workspace *workspace; | ||
537 | unsigned long bytes_left = destlen; | 315 | unsigned long bytes_left = destlen; |
538 | unsigned long total_out = 0; | 316 | unsigned long total_out = 0; |
539 | char *kaddr; | 317 | char *kaddr; |
540 | 318 | ||
541 | if (destlen > PAGE_CACHE_SIZE) | ||
542 | return -ENOMEM; | ||
543 | |||
544 | workspace = find_zlib_workspace(); | ||
545 | if (IS_ERR(workspace)) | ||
546 | return -ENOMEM; | ||
547 | |||
548 | workspace->inf_strm.next_in = data_in; | 319 | workspace->inf_strm.next_in = data_in; |
549 | workspace->inf_strm.avail_in = srclen; | 320 | workspace->inf_strm.avail_in = srclen; |
550 | workspace->inf_strm.total_in = 0; | 321 | workspace->inf_strm.total_in = 0; |
@@ -565,8 +336,7 @@ int btrfs_zlib_decompress(unsigned char *data_in, | |||
565 | 336 | ||
566 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 337 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
567 | printk(KERN_WARNING "inflateInit failed\n"); | 338 | printk(KERN_WARNING "inflateInit failed\n"); |
568 | ret = -1; | 339 | return -1; |
569 | goto out; | ||
570 | } | 340 | } |
571 | 341 | ||
572 | while (bytes_left > 0) { | 342 | while (bytes_left > 0) { |
@@ -616,12 +386,13 @@ next: | |||
616 | ret = 0; | 386 | ret = 0; |
617 | 387 | ||
618 | zlib_inflateEnd(&workspace->inf_strm); | 388 | zlib_inflateEnd(&workspace->inf_strm); |
619 | out: | ||
620 | free_workspace(workspace); | ||
621 | return ret; | 389 | return ret; |
622 | } | 390 | } |
623 | 391 | ||
624 | void btrfs_zlib_exit(void) | 392 | struct btrfs_compress_op btrfs_zlib_compress = { |
625 | { | 393 | .alloc_workspace = zlib_alloc_workspace, |
626 | free_workspaces(); | 394 | .free_workspace = zlib_free_workspace, |
627 | } | 395 | .compress_pages = zlib_compress_pages, |
396 | .decompress_biovec = zlib_decompress_biovec, | ||
397 | .decompress = zlib_decompress, | ||
398 | }; | ||