diff options
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r-- | fs/btrfs/file.c | 406 |
1 files changed, 311 insertions, 95 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c02033596f02..c800d58f3013 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -24,10 +24,12 @@ | |||
24 | #include <linux/string.h> | 24 | #include <linux/string.h> |
25 | #include <linux/backing-dev.h> | 25 | #include <linux/backing-dev.h> |
26 | #include <linux/mpage.h> | 26 | #include <linux/mpage.h> |
27 | #include <linux/falloc.h> | ||
27 | #include <linux/swap.h> | 28 | #include <linux/swap.h> |
28 | #include <linux/writeback.h> | 29 | #include <linux/writeback.h> |
29 | #include <linux/statfs.h> | 30 | #include <linux/statfs.h> |
30 | #include <linux/compat.h> | 31 | #include <linux/compat.h> |
32 | #include <linux/slab.h> | ||
31 | #include "ctree.h" | 33 | #include "ctree.h" |
32 | #include "disk-io.h" | 34 | #include "disk-io.h" |
33 | #include "transaction.h" | 35 | #include "transaction.h" |
@@ -45,32 +47,46 @@ | |||
45 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 47 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
46 | int write_bytes, | 48 | int write_bytes, |
47 | struct page **prepared_pages, | 49 | struct page **prepared_pages, |
48 | const char __user *buf) | 50 | struct iov_iter *i) |
49 | { | 51 | { |
50 | long page_fault = 0; | 52 | size_t copied = 0; |
51 | int i; | 53 | int pg = 0; |
52 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 54 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
55 | int total_copied = 0; | ||
53 | 56 | ||
54 | for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { | 57 | while (write_bytes > 0) { |
55 | size_t count = min_t(size_t, | 58 | size_t count = min_t(size_t, |
56 | PAGE_CACHE_SIZE - offset, write_bytes); | 59 | PAGE_CACHE_SIZE - offset, write_bytes); |
57 | struct page *page = prepared_pages[i]; | 60 | struct page *page = prepared_pages[pg]; |
58 | fault_in_pages_readable(buf, count); | 61 | /* |
62 | * Copy data from userspace to the current page | ||
63 | * | ||
64 | * Disable pagefault to avoid recursive lock since | ||
65 | * the pages are already locked | ||
66 | */ | ||
67 | pagefault_disable(); | ||
68 | copied = iov_iter_copy_from_user_atomic(page, i, offset, count); | ||
69 | pagefault_enable(); | ||
59 | 70 | ||
60 | /* Copy data from userspace to the current page */ | ||
61 | kmap(page); | ||
62 | page_fault = __copy_from_user(page_address(page) + offset, | ||
63 | buf, count); | ||
64 | /* Flush processor's dcache for this page */ | 71 | /* Flush processor's dcache for this page */ |
65 | flush_dcache_page(page); | 72 | flush_dcache_page(page); |
66 | kunmap(page); | 73 | iov_iter_advance(i, copied); |
67 | buf += count; | 74 | write_bytes -= copied; |
68 | write_bytes -= count; | 75 | total_copied += copied; |
69 | 76 | ||
70 | if (page_fault) | 77 | /* Return to btrfs_file_aio_write to fault page */ |
78 | if (unlikely(copied == 0)) { | ||
71 | break; | 79 | break; |
80 | } | ||
81 | |||
82 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | ||
83 | offset += copied; | ||
84 | } else { | ||
85 | pg++; | ||
86 | offset = 0; | ||
87 | } | ||
72 | } | 88 | } |
73 | return page_fault ? -EFAULT : 0; | 89 | return total_copied; |
74 | } | 90 | } |
75 | 91 | ||
76 | /* | 92 | /* |
@@ -123,9 +139,9 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
123 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 139 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
124 | 140 | ||
125 | end_of_last_block = start_pos + num_bytes - 1; | 141 | end_of_last_block = start_pos + num_bytes - 1; |
126 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | 142 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
127 | if (err) | 143 | NULL); |
128 | return err; | 144 | BUG_ON(err); |
129 | 145 | ||
130 | for (i = 0; i < num_pages; i++) { | 146 | for (i = 0; i < num_pages; i++) { |
131 | struct page *p = pages[i]; | 147 | struct page *p = pages[i]; |
@@ -140,7 +156,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
140 | * at this time. | 156 | * at this time. |
141 | */ | 157 | */ |
142 | } | 158 | } |
143 | return err; | 159 | return 0; |
144 | } | 160 | } |
145 | 161 | ||
146 | /* | 162 | /* |
@@ -209,6 +225,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
209 | 225 | ||
210 | split->bdev = em->bdev; | 226 | split->bdev = em->bdev; |
211 | split->flags = flags; | 227 | split->flags = flags; |
228 | split->compress_type = em->compress_type; | ||
212 | ret = add_extent_mapping(em_tree, split); | 229 | ret = add_extent_mapping(em_tree, split); |
213 | BUG_ON(ret); | 230 | BUG_ON(ret); |
214 | free_extent_map(split); | 231 | free_extent_map(split); |
@@ -223,6 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
223 | split->len = em->start + em->len - (start + len); | 240 | split->len = em->start + em->len - (start + len); |
224 | split->bdev = em->bdev; | 241 | split->bdev = em->bdev; |
225 | split->flags = flags; | 242 | split->flags = flags; |
243 | split->compress_type = em->compress_type; | ||
226 | 244 | ||
227 | if (compressed) { | 245 | if (compressed) { |
228 | split->block_len = em->block_len; | 246 | split->block_len = em->block_len; |
@@ -720,13 +738,15 @@ again: | |||
720 | inode->i_ino, orig_offset); | 738 | inode->i_ino, orig_offset); |
721 | BUG_ON(ret); | 739 | BUG_ON(ret); |
722 | } | 740 | } |
723 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
724 | struct btrfs_file_extent_item); | ||
725 | if (del_nr == 0) { | 741 | if (del_nr == 0) { |
742 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
743 | struct btrfs_file_extent_item); | ||
726 | btrfs_set_file_extent_type(leaf, fi, | 744 | btrfs_set_file_extent_type(leaf, fi, |
727 | BTRFS_FILE_EXTENT_REG); | 745 | BTRFS_FILE_EXTENT_REG); |
728 | btrfs_mark_buffer_dirty(leaf); | 746 | btrfs_mark_buffer_dirty(leaf); |
729 | } else { | 747 | } else { |
748 | fi = btrfs_item_ptr(leaf, del_slot - 1, | ||
749 | struct btrfs_file_extent_item); | ||
730 | btrfs_set_file_extent_type(leaf, fi, | 750 | btrfs_set_file_extent_type(leaf, fi, |
731 | BTRFS_FILE_EXTENT_REG); | 751 | BTRFS_FILE_EXTENT_REG); |
732 | btrfs_set_file_extent_num_bytes(leaf, fi, | 752 | btrfs_set_file_extent_num_bytes(leaf, fi, |
@@ -751,6 +771,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
751 | loff_t pos, unsigned long first_index, | 771 | loff_t pos, unsigned long first_index, |
752 | unsigned long last_index, size_t write_bytes) | 772 | unsigned long last_index, size_t write_bytes) |
753 | { | 773 | { |
774 | struct extent_state *cached_state = NULL; | ||
754 | int i; | 775 | int i; |
755 | unsigned long index = pos >> PAGE_CACHE_SHIFT; | 776 | unsigned long index = pos >> PAGE_CACHE_SHIFT; |
756 | struct inode *inode = fdentry(file)->d_inode; | 777 | struct inode *inode = fdentry(file)->d_inode; |
@@ -779,16 +800,18 @@ again: | |||
779 | } | 800 | } |
780 | if (start_pos < inode->i_size) { | 801 | if (start_pos < inode->i_size) { |
781 | struct btrfs_ordered_extent *ordered; | 802 | struct btrfs_ordered_extent *ordered; |
782 | lock_extent(&BTRFS_I(inode)->io_tree, | 803 | lock_extent_bits(&BTRFS_I(inode)->io_tree, |
783 | start_pos, last_pos - 1, GFP_NOFS); | 804 | start_pos, last_pos - 1, 0, &cached_state, |
805 | GFP_NOFS); | ||
784 | ordered = btrfs_lookup_first_ordered_extent(inode, | 806 | ordered = btrfs_lookup_first_ordered_extent(inode, |
785 | last_pos - 1); | 807 | last_pos - 1); |
786 | if (ordered && | 808 | if (ordered && |
787 | ordered->file_offset + ordered->len > start_pos && | 809 | ordered->file_offset + ordered->len > start_pos && |
788 | ordered->file_offset < last_pos) { | 810 | ordered->file_offset < last_pos) { |
789 | btrfs_put_ordered_extent(ordered); | 811 | btrfs_put_ordered_extent(ordered); |
790 | unlock_extent(&BTRFS_I(inode)->io_tree, | 812 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
791 | start_pos, last_pos - 1, GFP_NOFS); | 813 | start_pos, last_pos - 1, |
814 | &cached_state, GFP_NOFS); | ||
792 | for (i = 0; i < num_pages; i++) { | 815 | for (i = 0; i < num_pages; i++) { |
793 | unlock_page(pages[i]); | 816 | unlock_page(pages[i]); |
794 | page_cache_release(pages[i]); | 817 | page_cache_release(pages[i]); |
@@ -800,12 +823,13 @@ again: | |||
800 | if (ordered) | 823 | if (ordered) |
801 | btrfs_put_ordered_extent(ordered); | 824 | btrfs_put_ordered_extent(ordered); |
802 | 825 | ||
803 | clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, | 826 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, |
804 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | | 827 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | |
805 | EXTENT_DO_ACCOUNTING, | 828 | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, |
806 | GFP_NOFS); | 829 | GFP_NOFS); |
807 | unlock_extent(&BTRFS_I(inode)->io_tree, | 830 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
808 | start_pos, last_pos - 1, GFP_NOFS); | 831 | start_pos, last_pos - 1, &cached_state, |
832 | GFP_NOFS); | ||
809 | } | 833 | } |
810 | for (i = 0; i < num_pages; i++) { | 834 | for (i = 0; i < num_pages; i++) { |
811 | clear_page_dirty_for_io(pages[i]); | 835 | clear_page_dirty_for_io(pages[i]); |
@@ -815,45 +839,48 @@ again: | |||
815 | return 0; | 839 | return 0; |
816 | } | 840 | } |
817 | 841 | ||
818 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | 842 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
819 | size_t count, loff_t *ppos) | 843 | const struct iovec *iov, |
844 | unsigned long nr_segs, loff_t pos) | ||
820 | { | 845 | { |
821 | loff_t pos; | 846 | struct file *file = iocb->ki_filp; |
847 | struct inode *inode = fdentry(file)->d_inode; | ||
848 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
849 | struct page *pinned[2]; | ||
850 | struct page **pages = NULL; | ||
851 | struct iov_iter i; | ||
852 | loff_t *ppos = &iocb->ki_pos; | ||
822 | loff_t start_pos; | 853 | loff_t start_pos; |
823 | ssize_t num_written = 0; | 854 | ssize_t num_written = 0; |
824 | ssize_t err = 0; | 855 | ssize_t err = 0; |
856 | size_t count; | ||
857 | size_t ocount; | ||
825 | int ret = 0; | 858 | int ret = 0; |
826 | struct inode *inode = fdentry(file)->d_inode; | ||
827 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
828 | struct page **pages = NULL; | ||
829 | int nrptrs; | 859 | int nrptrs; |
830 | struct page *pinned[2]; | ||
831 | unsigned long first_index; | 860 | unsigned long first_index; |
832 | unsigned long last_index; | 861 | unsigned long last_index; |
833 | int will_write; | 862 | int will_write; |
863 | int buffered = 0; | ||
864 | int copied = 0; | ||
865 | int dirty_pages = 0; | ||
834 | 866 | ||
835 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | 867 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || |
836 | (file->f_flags & O_DIRECT)); | 868 | (file->f_flags & O_DIRECT)); |
837 | 869 | ||
838 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, | ||
839 | PAGE_CACHE_SIZE / (sizeof(struct page *))); | ||
840 | pinned[0] = NULL; | 870 | pinned[0] = NULL; |
841 | pinned[1] = NULL; | 871 | pinned[1] = NULL; |
842 | 872 | ||
843 | pos = *ppos; | ||
844 | start_pos = pos; | 873 | start_pos = pos; |
845 | 874 | ||
846 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 875 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
847 | 876 | ||
848 | /* do the reserve before the mutex lock in case we have to do some | ||
849 | * flushing. We wouldn't deadlock, but this is more polite. | ||
850 | */ | ||
851 | err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
852 | if (err) | ||
853 | goto out_nolock; | ||
854 | |||
855 | mutex_lock(&inode->i_mutex); | 877 | mutex_lock(&inode->i_mutex); |
856 | 878 | ||
879 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
880 | if (err) | ||
881 | goto out; | ||
882 | count = ocount; | ||
883 | |||
857 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 884 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
858 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 885 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
859 | if (err) | 886 | if (err) |
@@ -866,16 +893,65 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
866 | if (err) | 893 | if (err) |
867 | goto out; | 894 | goto out; |
868 | 895 | ||
896 | /* | ||
897 | * If BTRFS flips readonly due to some impossible error | ||
898 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | ||
899 | * although we have opened a file as writable, we have | ||
900 | * to stop this write operation to ensure FS consistency. | ||
901 | */ | ||
902 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
903 | err = -EROFS; | ||
904 | goto out; | ||
905 | } | ||
906 | |||
869 | file_update_time(file); | 907 | file_update_time(file); |
908 | BTRFS_I(inode)->sequence++; | ||
909 | |||
910 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
911 | num_written = generic_file_direct_write(iocb, iov, &nr_segs, | ||
912 | pos, ppos, count, | ||
913 | ocount); | ||
914 | /* | ||
915 | * the generic O_DIRECT will update in-memory i_size after the | ||
916 | * DIOs are done. But our endio handlers that update the on | ||
917 | * disk i_size never update past the in memory i_size. So we | ||
918 | * need one more update here to catch any additions to the | ||
919 | * file | ||
920 | */ | ||
921 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
922 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
923 | mark_inode_dirty(inode); | ||
924 | } | ||
925 | |||
926 | if (num_written < 0) { | ||
927 | ret = num_written; | ||
928 | num_written = 0; | ||
929 | goto out; | ||
930 | } else if (num_written == count) { | ||
931 | /* pick up pos changes done by the generic code */ | ||
932 | pos = *ppos; | ||
933 | goto out; | ||
934 | } | ||
935 | /* | ||
936 | * We are going to do buffered for the rest of the range, so we | ||
937 | * need to make sure to invalidate the buffered pages when we're | ||
938 | * done. | ||
939 | */ | ||
940 | buffered = 1; | ||
941 | pos += num_written; | ||
942 | } | ||
870 | 943 | ||
944 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
945 | nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / | ||
946 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | ||
947 | (sizeof(struct page *))); | ||
871 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 948 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
872 | 949 | ||
873 | /* generic_write_checks can change our pos */ | 950 | /* generic_write_checks can change our pos */ |
874 | start_pos = pos; | 951 | start_pos = pos; |
875 | 952 | ||
876 | BTRFS_I(inode)->sequence++; | ||
877 | first_index = pos >> PAGE_CACHE_SHIFT; | 953 | first_index = pos >> PAGE_CACHE_SHIFT; |
878 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 954 | last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; |
879 | 955 | ||
880 | /* | 956 | /* |
881 | * there are lots of better ways to do this, but this code | 957 | * there are lots of better ways to do this, but this code |
@@ -892,7 +968,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
892 | unlock_page(pinned[0]); | 968 | unlock_page(pinned[0]); |
893 | } | 969 | } |
894 | } | 970 | } |
895 | if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { | 971 | if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) { |
896 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); | 972 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); |
897 | if (!PageUptodate(pinned[1])) { | 973 | if (!PageUptodate(pinned[1])) { |
898 | ret = btrfs_readpage(NULL, pinned[1]); | 974 | ret = btrfs_readpage(NULL, pinned[1]); |
@@ -903,10 +979,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
903 | } | 979 | } |
904 | } | 980 | } |
905 | 981 | ||
906 | while (count > 0) { | 982 | while (iov_iter_count(&i) > 0) { |
907 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 983 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
908 | size_t write_bytes = min(count, nrptrs * | 984 | size_t write_bytes = min(iov_iter_count(&i), |
909 | (size_t)PAGE_CACHE_SIZE - | 985 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
910 | offset); | 986 | offset); |
911 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> | 987 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> |
912 | PAGE_CACHE_SHIFT; | 988 | PAGE_CACHE_SHIFT; |
@@ -914,7 +990,17 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
914 | WARN_ON(num_pages > nrptrs); | 990 | WARN_ON(num_pages > nrptrs); |
915 | memset(pages, 0, sizeof(struct page *) * nrptrs); | 991 | memset(pages, 0, sizeof(struct page *) * nrptrs); |
916 | 992 | ||
917 | ret = btrfs_check_data_free_space(root, inode, write_bytes); | 993 | /* |
994 | * Fault pages before locking them in prepare_pages | ||
995 | * to avoid recursive lock | ||
996 | */ | ||
997 | if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { | ||
998 | ret = -EFAULT; | ||
999 | goto out; | ||
1000 | } | ||
1001 | |||
1002 | ret = btrfs_delalloc_reserve_space(inode, | ||
1003 | num_pages << PAGE_CACHE_SHIFT); | ||
918 | if (ret) | 1004 | if (ret) |
919 | goto out; | 1005 | goto out; |
920 | 1006 | ||
@@ -922,45 +1008,49 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
922 | pos, first_index, last_index, | 1008 | pos, first_index, last_index, |
923 | write_bytes); | 1009 | write_bytes); |
924 | if (ret) { | 1010 | if (ret) { |
925 | btrfs_free_reserved_data_space(root, inode, | 1011 | btrfs_delalloc_release_space(inode, |
926 | write_bytes); | 1012 | num_pages << PAGE_CACHE_SHIFT); |
927 | goto out; | 1013 | goto out; |
928 | } | 1014 | } |
929 | 1015 | ||
930 | ret = btrfs_copy_from_user(pos, num_pages, | 1016 | copied = btrfs_copy_from_user(pos, num_pages, |
931 | write_bytes, pages, buf); | 1017 | write_bytes, pages, &i); |
932 | if (ret) { | 1018 | dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> |
933 | btrfs_free_reserved_data_space(root, inode, | 1019 | PAGE_CACHE_SHIFT; |
934 | write_bytes); | 1020 | |
935 | btrfs_drop_pages(pages, num_pages); | 1021 | if (num_pages > dirty_pages) { |
936 | goto out; | 1022 | if (copied > 0) |
1023 | atomic_inc( | ||
1024 | &BTRFS_I(inode)->outstanding_extents); | ||
1025 | btrfs_delalloc_release_space(inode, | ||
1026 | (num_pages - dirty_pages) << | ||
1027 | PAGE_CACHE_SHIFT); | ||
937 | } | 1028 | } |
938 | 1029 | ||
939 | ret = dirty_and_release_pages(NULL, root, file, pages, | 1030 | if (copied > 0) { |
940 | num_pages, pos, write_bytes); | 1031 | dirty_and_release_pages(NULL, root, file, pages, |
941 | btrfs_drop_pages(pages, num_pages); | 1032 | dirty_pages, pos, copied); |
942 | if (ret) { | ||
943 | btrfs_free_reserved_data_space(root, inode, | ||
944 | write_bytes); | ||
945 | goto out; | ||
946 | } | 1033 | } |
947 | 1034 | ||
948 | if (will_write) { | 1035 | btrfs_drop_pages(pages, num_pages); |
949 | filemap_fdatawrite_range(inode->i_mapping, pos, | 1036 | |
950 | pos + write_bytes - 1); | 1037 | if (copied > 0) { |
951 | } else { | 1038 | if (will_write) { |
952 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1039 | filemap_fdatawrite_range(inode->i_mapping, pos, |
953 | num_pages); | 1040 | pos + copied - 1); |
954 | if (num_pages < | 1041 | } else { |
955 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | 1042 | balance_dirty_pages_ratelimited_nr( |
956 | btrfs_btree_balance_dirty(root, 1); | 1043 | inode->i_mapping, |
957 | btrfs_throttle(root); | 1044 | dirty_pages); |
1045 | if (dirty_pages < | ||
1046 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | ||
1047 | btrfs_btree_balance_dirty(root, 1); | ||
1048 | btrfs_throttle(root); | ||
1049 | } | ||
958 | } | 1050 | } |
959 | 1051 | ||
960 | buf += write_bytes; | 1052 | pos += copied; |
961 | count -= write_bytes; | 1053 | num_written += copied; |
962 | pos += write_bytes; | ||
963 | num_written += write_bytes; | ||
964 | 1054 | ||
965 | cond_resched(); | 1055 | cond_resched(); |
966 | } | 1056 | } |
@@ -968,9 +1058,7 @@ out: | |||
968 | mutex_unlock(&inode->i_mutex); | 1058 | mutex_unlock(&inode->i_mutex); |
969 | if (ret) | 1059 | if (ret) |
970 | err = ret; | 1060 | err = ret; |
971 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
972 | 1061 | ||
973 | out_nolock: | ||
974 | kfree(pages); | 1062 | kfree(pages); |
975 | if (pinned[0]) | 1063 | if (pinned[0]) |
976 | page_cache_release(pinned[0]); | 1064 | page_cache_release(pinned[0]); |
@@ -1000,9 +1088,15 @@ out_nolock: | |||
1000 | num_written = err; | 1088 | num_written = err; |
1001 | 1089 | ||
1002 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 1090 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { |
1003 | trans = btrfs_start_transaction(root, 1); | 1091 | trans = btrfs_start_transaction(root, 0); |
1092 | if (IS_ERR(trans)) { | ||
1093 | num_written = PTR_ERR(trans); | ||
1094 | goto done; | ||
1095 | } | ||
1096 | mutex_lock(&inode->i_mutex); | ||
1004 | ret = btrfs_log_dentry_safe(trans, root, | 1097 | ret = btrfs_log_dentry_safe(trans, root, |
1005 | file->f_dentry); | 1098 | file->f_dentry); |
1099 | mutex_unlock(&inode->i_mutex); | ||
1006 | if (ret == 0) { | 1100 | if (ret == 0) { |
1007 | ret = btrfs_sync_log(trans, root); | 1101 | ret = btrfs_sync_log(trans, root); |
1008 | if (ret == 0) | 1102 | if (ret == 0) |
@@ -1015,12 +1109,13 @@ out_nolock: | |||
1015 | btrfs_end_transaction(trans, root); | 1109 | btrfs_end_transaction(trans, root); |
1016 | } | 1110 | } |
1017 | } | 1111 | } |
1018 | if (file->f_flags & O_DIRECT) { | 1112 | if (file->f_flags & O_DIRECT && buffered) { |
1019 | invalidate_mapping_pages(inode->i_mapping, | 1113 | invalidate_mapping_pages(inode->i_mapping, |
1020 | start_pos >> PAGE_CACHE_SHIFT, | 1114 | start_pos >> PAGE_CACHE_SHIFT, |
1021 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | 1115 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); |
1022 | } | 1116 | } |
1023 | } | 1117 | } |
1118 | done: | ||
1024 | current->backing_dev_info = NULL; | 1119 | current->backing_dev_info = NULL; |
1025 | return num_written ? num_written : err; | 1120 | return num_written ? num_written : err; |
1026 | } | 1121 | } |
@@ -1055,8 +1150,9 @@ int btrfs_release_file(struct inode *inode, struct file *filp) | |||
1055 | * important optimization for directories because holding the mutex prevents | 1150 | * important optimization for directories because holding the mutex prevents |
1056 | * new operations on the dir while we write to disk. | 1151 | * new operations on the dir while we write to disk. |
1057 | */ | 1152 | */ |
1058 | int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | 1153 | int btrfs_sync_file(struct file *file, int datasync) |
1059 | { | 1154 | { |
1155 | struct dentry *dentry = file->f_path.dentry; | ||
1060 | struct inode *inode = dentry->d_inode; | 1156 | struct inode *inode = dentry->d_inode; |
1061 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1157 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1062 | int ret = 0; | 1158 | int ret = 0; |
@@ -1093,12 +1189,12 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1093 | /* | 1189 | /* |
1094 | * ok we haven't committed the transaction yet, lets do a commit | 1190 | * ok we haven't committed the transaction yet, lets do a commit |
1095 | */ | 1191 | */ |
1096 | if (file && file->private_data) | 1192 | if (file->private_data) |
1097 | btrfs_ioctl_trans_end(file); | 1193 | btrfs_ioctl_trans_end(file); |
1098 | 1194 | ||
1099 | trans = btrfs_start_transaction(root, 1); | 1195 | trans = btrfs_start_transaction(root, 0); |
1100 | if (!trans) { | 1196 | if (IS_ERR(trans)) { |
1101 | ret = -ENOMEM; | 1197 | ret = PTR_ERR(trans); |
1102 | goto out; | 1198 | goto out; |
1103 | } | 1199 | } |
1104 | 1200 | ||
@@ -1133,7 +1229,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1133 | } | 1229 | } |
1134 | mutex_lock(&dentry->d_inode->i_mutex); | 1230 | mutex_lock(&dentry->d_inode->i_mutex); |
1135 | out: | 1231 | out: |
1136 | return ret > 0 ? EIO : ret; | 1232 | return ret > 0 ? -EIO : ret; |
1137 | } | 1233 | } |
1138 | 1234 | ||
1139 | static const struct vm_operations_struct btrfs_file_vm_ops = { | 1235 | static const struct vm_operations_struct btrfs_file_vm_ops = { |
@@ -1143,21 +1239,141 @@ static const struct vm_operations_struct btrfs_file_vm_ops = { | |||
1143 | 1239 | ||
1144 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | 1240 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) |
1145 | { | 1241 | { |
1146 | vma->vm_ops = &btrfs_file_vm_ops; | 1242 | struct address_space *mapping = filp->f_mapping; |
1243 | |||
1244 | if (!mapping->a_ops->readpage) | ||
1245 | return -ENOEXEC; | ||
1246 | |||
1147 | file_accessed(filp); | 1247 | file_accessed(filp); |
1248 | vma->vm_ops = &btrfs_file_vm_ops; | ||
1249 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
1250 | |||
1148 | return 0; | 1251 | return 0; |
1149 | } | 1252 | } |
1150 | 1253 | ||
1254 | static long btrfs_fallocate(struct file *file, int mode, | ||
1255 | loff_t offset, loff_t len) | ||
1256 | { | ||
1257 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1258 | struct extent_state *cached_state = NULL; | ||
1259 | u64 cur_offset; | ||
1260 | u64 last_byte; | ||
1261 | u64 alloc_start; | ||
1262 | u64 alloc_end; | ||
1263 | u64 alloc_hint = 0; | ||
1264 | u64 locked_end; | ||
1265 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
1266 | struct extent_map *em; | ||
1267 | int ret; | ||
1268 | |||
1269 | alloc_start = offset & ~mask; | ||
1270 | alloc_end = (offset + len + mask) & ~mask; | ||
1271 | |||
1272 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | ||
1273 | if (mode & ~FALLOC_FL_KEEP_SIZE) | ||
1274 | return -EOPNOTSUPP; | ||
1275 | |||
1276 | /* | ||
1277 | * wait for ordered IO before we have any locks. We'll loop again | ||
1278 | * below with the locks held. | ||
1279 | */ | ||
1280 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
1281 | |||
1282 | mutex_lock(&inode->i_mutex); | ||
1283 | ret = inode_newsize_ok(inode, alloc_end); | ||
1284 | if (ret) | ||
1285 | goto out; | ||
1286 | |||
1287 | if (alloc_start > inode->i_size) { | ||
1288 | ret = btrfs_cont_expand(inode, alloc_start); | ||
1289 | if (ret) | ||
1290 | goto out; | ||
1291 | } | ||
1292 | |||
1293 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | ||
1294 | if (ret) | ||
1295 | goto out; | ||
1296 | |||
1297 | locked_end = alloc_end - 1; | ||
1298 | while (1) { | ||
1299 | struct btrfs_ordered_extent *ordered; | ||
1300 | |||
1301 | /* the extent lock is ordered inside the running | ||
1302 | * transaction | ||
1303 | */ | ||
1304 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, | ||
1305 | locked_end, 0, &cached_state, GFP_NOFS); | ||
1306 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
1307 | alloc_end - 1); | ||
1308 | if (ordered && | ||
1309 | ordered->file_offset + ordered->len > alloc_start && | ||
1310 | ordered->file_offset < alloc_end) { | ||
1311 | btrfs_put_ordered_extent(ordered); | ||
1312 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
1313 | alloc_start, locked_end, | ||
1314 | &cached_state, GFP_NOFS); | ||
1315 | /* | ||
1316 | * we can't wait on the range with the transaction | ||
1317 | * running or with the extent lock held | ||
1318 | */ | ||
1319 | btrfs_wait_ordered_range(inode, alloc_start, | ||
1320 | alloc_end - alloc_start); | ||
1321 | } else { | ||
1322 | if (ordered) | ||
1323 | btrfs_put_ordered_extent(ordered); | ||
1324 | break; | ||
1325 | } | ||
1326 | } | ||
1327 | |||
1328 | cur_offset = alloc_start; | ||
1329 | while (1) { | ||
1330 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | ||
1331 | alloc_end - cur_offset, 0); | ||
1332 | BUG_ON(IS_ERR(em) || !em); | ||
1333 | last_byte = min(extent_map_end(em), alloc_end); | ||
1334 | last_byte = (last_byte + mask) & ~mask; | ||
1335 | if (em->block_start == EXTENT_MAP_HOLE || | ||
1336 | (cur_offset >= inode->i_size && | ||
1337 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
1338 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | ||
1339 | last_byte - cur_offset, | ||
1340 | 1 << inode->i_blkbits, | ||
1341 | offset + len, | ||
1342 | &alloc_hint); | ||
1343 | if (ret < 0) { | ||
1344 | free_extent_map(em); | ||
1345 | break; | ||
1346 | } | ||
1347 | } | ||
1348 | free_extent_map(em); | ||
1349 | |||
1350 | cur_offset = last_byte; | ||
1351 | if (cur_offset >= alloc_end) { | ||
1352 | ret = 0; | ||
1353 | break; | ||
1354 | } | ||
1355 | } | ||
1356 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
1357 | &cached_state, GFP_NOFS); | ||
1358 | |||
1359 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | ||
1360 | out: | ||
1361 | mutex_unlock(&inode->i_mutex); | ||
1362 | return ret; | ||
1363 | } | ||
1364 | |||
1151 | const struct file_operations btrfs_file_operations = { | 1365 | const struct file_operations btrfs_file_operations = { |
1152 | .llseek = generic_file_llseek, | 1366 | .llseek = generic_file_llseek, |
1153 | .read = do_sync_read, | 1367 | .read = do_sync_read, |
1368 | .write = do_sync_write, | ||
1154 | .aio_read = generic_file_aio_read, | 1369 | .aio_read = generic_file_aio_read, |
1155 | .splice_read = generic_file_splice_read, | 1370 | .splice_read = generic_file_splice_read, |
1156 | .write = btrfs_file_write, | 1371 | .aio_write = btrfs_file_aio_write, |
1157 | .mmap = btrfs_file_mmap, | 1372 | .mmap = btrfs_file_mmap, |
1158 | .open = generic_file_open, | 1373 | .open = generic_file_open, |
1159 | .release = btrfs_release_file, | 1374 | .release = btrfs_release_file, |
1160 | .fsync = btrfs_sync_file, | 1375 | .fsync = btrfs_sync_file, |
1376 | .fallocate = btrfs_fallocate, | ||
1161 | .unlocked_ioctl = btrfs_ioctl, | 1377 | .unlocked_ioctl = btrfs_ioctl, |
1162 | #ifdef CONFIG_COMPAT | 1378 | #ifdef CONFIG_COMPAT |
1163 | .compat_ioctl = btrfs_ioctl, | 1379 | .compat_ioctl = btrfs_ioctl, |