aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c406
1 files changed, 311 insertions, 95 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c02033596f02..c800d58f3013 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,10 +24,12 @@
24#include <linux/string.h> 24#include <linux/string.h>
25#include <linux/backing-dev.h> 25#include <linux/backing-dev.h>
26#include <linux/mpage.h> 26#include <linux/mpage.h>
27#include <linux/falloc.h>
27#include <linux/swap.h> 28#include <linux/swap.h>
28#include <linux/writeback.h> 29#include <linux/writeback.h>
29#include <linux/statfs.h> 30#include <linux/statfs.h>
30#include <linux/compat.h> 31#include <linux/compat.h>
32#include <linux/slab.h>
31#include "ctree.h" 33#include "ctree.h"
32#include "disk-io.h" 34#include "disk-io.h"
33#include "transaction.h" 35#include "transaction.h"
@@ -45,32 +47,46 @@
45static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, 47static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
46 int write_bytes, 48 int write_bytes,
47 struct page **prepared_pages, 49 struct page **prepared_pages,
48 const char __user *buf) 50 struct iov_iter *i)
49{ 51{
50 long page_fault = 0; 52 size_t copied = 0;
51 int i; 53 int pg = 0;
52 int offset = pos & (PAGE_CACHE_SIZE - 1); 54 int offset = pos & (PAGE_CACHE_SIZE - 1);
55 int total_copied = 0;
53 56
54 for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { 57 while (write_bytes > 0) {
55 size_t count = min_t(size_t, 58 size_t count = min_t(size_t,
56 PAGE_CACHE_SIZE - offset, write_bytes); 59 PAGE_CACHE_SIZE - offset, write_bytes);
57 struct page *page = prepared_pages[i]; 60 struct page *page = prepared_pages[pg];
58 fault_in_pages_readable(buf, count); 61 /*
62 * Copy data from userspace to the current page
63 *
64 * Disable pagefault to avoid recursive lock since
65 * the pages are already locked
66 */
67 pagefault_disable();
68 copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
69 pagefault_enable();
59 70
60 /* Copy data from userspace to the current page */
61 kmap(page);
62 page_fault = __copy_from_user(page_address(page) + offset,
63 buf, count);
64 /* Flush processor's dcache for this page */ 71 /* Flush processor's dcache for this page */
65 flush_dcache_page(page); 72 flush_dcache_page(page);
66 kunmap(page); 73 iov_iter_advance(i, copied);
67 buf += count; 74 write_bytes -= copied;
68 write_bytes -= count; 75 total_copied += copied;
69 76
70 if (page_fault) 77 /* Return to btrfs_file_aio_write to fault page */
78 if (unlikely(copied == 0)) {
71 break; 79 break;
80 }
81
82 if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
83 offset += copied;
84 } else {
85 pg++;
86 offset = 0;
87 }
72 } 88 }
73 return page_fault ? -EFAULT : 0; 89 return total_copied;
74} 90}
75 91
76/* 92/*
@@ -123,9 +139,9 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
123 root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 139 root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
124 140
125 end_of_last_block = start_pos + num_bytes - 1; 141 end_of_last_block = start_pos + num_bytes - 1;
126 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); 142 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
127 if (err) 143 NULL);
128 return err; 144 BUG_ON(err);
129 145
130 for (i = 0; i < num_pages; i++) { 146 for (i = 0; i < num_pages; i++) {
131 struct page *p = pages[i]; 147 struct page *p = pages[i];
@@ -140,7 +156,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
140 * at this time. 156 * at this time.
141 */ 157 */
142 } 158 }
143 return err; 159 return 0;
144} 160}
145 161
146/* 162/*
@@ -209,6 +225,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
209 225
210 split->bdev = em->bdev; 226 split->bdev = em->bdev;
211 split->flags = flags; 227 split->flags = flags;
228 split->compress_type = em->compress_type;
212 ret = add_extent_mapping(em_tree, split); 229 ret = add_extent_mapping(em_tree, split);
213 BUG_ON(ret); 230 BUG_ON(ret);
214 free_extent_map(split); 231 free_extent_map(split);
@@ -223,6 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
223 split->len = em->start + em->len - (start + len); 240 split->len = em->start + em->len - (start + len);
224 split->bdev = em->bdev; 241 split->bdev = em->bdev;
225 split->flags = flags; 242 split->flags = flags;
243 split->compress_type = em->compress_type;
226 244
227 if (compressed) { 245 if (compressed) {
228 split->block_len = em->block_len; 246 split->block_len = em->block_len;
@@ -720,13 +738,15 @@ again:
720 inode->i_ino, orig_offset); 738 inode->i_ino, orig_offset);
721 BUG_ON(ret); 739 BUG_ON(ret);
722 } 740 }
723 fi = btrfs_item_ptr(leaf, path->slots[0],
724 struct btrfs_file_extent_item);
725 if (del_nr == 0) { 741 if (del_nr == 0) {
742 fi = btrfs_item_ptr(leaf, path->slots[0],
743 struct btrfs_file_extent_item);
726 btrfs_set_file_extent_type(leaf, fi, 744 btrfs_set_file_extent_type(leaf, fi,
727 BTRFS_FILE_EXTENT_REG); 745 BTRFS_FILE_EXTENT_REG);
728 btrfs_mark_buffer_dirty(leaf); 746 btrfs_mark_buffer_dirty(leaf);
729 } else { 747 } else {
748 fi = btrfs_item_ptr(leaf, del_slot - 1,
749 struct btrfs_file_extent_item);
730 btrfs_set_file_extent_type(leaf, fi, 750 btrfs_set_file_extent_type(leaf, fi,
731 BTRFS_FILE_EXTENT_REG); 751 BTRFS_FILE_EXTENT_REG);
732 btrfs_set_file_extent_num_bytes(leaf, fi, 752 btrfs_set_file_extent_num_bytes(leaf, fi,
@@ -751,6 +771,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
751 loff_t pos, unsigned long first_index, 771 loff_t pos, unsigned long first_index,
752 unsigned long last_index, size_t write_bytes) 772 unsigned long last_index, size_t write_bytes)
753{ 773{
774 struct extent_state *cached_state = NULL;
754 int i; 775 int i;
755 unsigned long index = pos >> PAGE_CACHE_SHIFT; 776 unsigned long index = pos >> PAGE_CACHE_SHIFT;
756 struct inode *inode = fdentry(file)->d_inode; 777 struct inode *inode = fdentry(file)->d_inode;
@@ -779,16 +800,18 @@ again:
779 } 800 }
780 if (start_pos < inode->i_size) { 801 if (start_pos < inode->i_size) {
781 struct btrfs_ordered_extent *ordered; 802 struct btrfs_ordered_extent *ordered;
782 lock_extent(&BTRFS_I(inode)->io_tree, 803 lock_extent_bits(&BTRFS_I(inode)->io_tree,
783 start_pos, last_pos - 1, GFP_NOFS); 804 start_pos, last_pos - 1, 0, &cached_state,
805 GFP_NOFS);
784 ordered = btrfs_lookup_first_ordered_extent(inode, 806 ordered = btrfs_lookup_first_ordered_extent(inode,
785 last_pos - 1); 807 last_pos - 1);
786 if (ordered && 808 if (ordered &&
787 ordered->file_offset + ordered->len > start_pos && 809 ordered->file_offset + ordered->len > start_pos &&
788 ordered->file_offset < last_pos) { 810 ordered->file_offset < last_pos) {
789 btrfs_put_ordered_extent(ordered); 811 btrfs_put_ordered_extent(ordered);
790 unlock_extent(&BTRFS_I(inode)->io_tree, 812 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
791 start_pos, last_pos - 1, GFP_NOFS); 813 start_pos, last_pos - 1,
814 &cached_state, GFP_NOFS);
792 for (i = 0; i < num_pages; i++) { 815 for (i = 0; i < num_pages; i++) {
793 unlock_page(pages[i]); 816 unlock_page(pages[i]);
794 page_cache_release(pages[i]); 817 page_cache_release(pages[i]);
@@ -800,12 +823,13 @@ again:
800 if (ordered) 823 if (ordered)
801 btrfs_put_ordered_extent(ordered); 824 btrfs_put_ordered_extent(ordered);
802 825
803 clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, 826 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
804 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 827 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
805 EXTENT_DO_ACCOUNTING, 828 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
806 GFP_NOFS); 829 GFP_NOFS);
807 unlock_extent(&BTRFS_I(inode)->io_tree, 830 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
808 start_pos, last_pos - 1, GFP_NOFS); 831 start_pos, last_pos - 1, &cached_state,
832 GFP_NOFS);
809 } 833 }
810 for (i = 0; i < num_pages; i++) { 834 for (i = 0; i < num_pages; i++) {
811 clear_page_dirty_for_io(pages[i]); 835 clear_page_dirty_for_io(pages[i]);
@@ -815,45 +839,48 @@ again:
815 return 0; 839 return 0;
816} 840}
817 841
818static ssize_t btrfs_file_write(struct file *file, const char __user *buf, 842static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
819 size_t count, loff_t *ppos) 843 const struct iovec *iov,
844 unsigned long nr_segs, loff_t pos)
820{ 845{
821 loff_t pos; 846 struct file *file = iocb->ki_filp;
847 struct inode *inode = fdentry(file)->d_inode;
848 struct btrfs_root *root = BTRFS_I(inode)->root;
849 struct page *pinned[2];
850 struct page **pages = NULL;
851 struct iov_iter i;
852 loff_t *ppos = &iocb->ki_pos;
822 loff_t start_pos; 853 loff_t start_pos;
823 ssize_t num_written = 0; 854 ssize_t num_written = 0;
824 ssize_t err = 0; 855 ssize_t err = 0;
856 size_t count;
857 size_t ocount;
825 int ret = 0; 858 int ret = 0;
826 struct inode *inode = fdentry(file)->d_inode;
827 struct btrfs_root *root = BTRFS_I(inode)->root;
828 struct page **pages = NULL;
829 int nrptrs; 859 int nrptrs;
830 struct page *pinned[2];
831 unsigned long first_index; 860 unsigned long first_index;
832 unsigned long last_index; 861 unsigned long last_index;
833 int will_write; 862 int will_write;
863 int buffered = 0;
864 int copied = 0;
865 int dirty_pages = 0;
834 866
835 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || 867 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
836 (file->f_flags & O_DIRECT)); 868 (file->f_flags & O_DIRECT));
837 869
838 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
839 PAGE_CACHE_SIZE / (sizeof(struct page *)));
840 pinned[0] = NULL; 870 pinned[0] = NULL;
841 pinned[1] = NULL; 871 pinned[1] = NULL;
842 872
843 pos = *ppos;
844 start_pos = pos; 873 start_pos = pos;
845 874
846 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 875 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
847 876
848 /* do the reserve before the mutex lock in case we have to do some
849 * flushing. We wouldn't deadlock, but this is more polite.
850 */
851 err = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
852 if (err)
853 goto out_nolock;
854
855 mutex_lock(&inode->i_mutex); 877 mutex_lock(&inode->i_mutex);
856 878
879 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
880 if (err)
881 goto out;
882 count = ocount;
883
857 current->backing_dev_info = inode->i_mapping->backing_dev_info; 884 current->backing_dev_info = inode->i_mapping->backing_dev_info;
858 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 885 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
859 if (err) 886 if (err)
@@ -866,16 +893,65 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
866 if (err) 893 if (err)
867 goto out; 894 goto out;
868 895
896 /*
897 * If BTRFS flips readonly due to some impossible error
898 * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
899 * although we have opened a file as writable, we have
900 * to stop this write operation to ensure FS consistency.
901 */
902 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
903 err = -EROFS;
904 goto out;
905 }
906
869 file_update_time(file); 907 file_update_time(file);
908 BTRFS_I(inode)->sequence++;
909
910 if (unlikely(file->f_flags & O_DIRECT)) {
911 num_written = generic_file_direct_write(iocb, iov, &nr_segs,
912 pos, ppos, count,
913 ocount);
914 /*
915 * the generic O_DIRECT will update in-memory i_size after the
916 * DIOs are done. But our endio handlers that update the on
917 * disk i_size never update past the in memory i_size. So we
918 * need one more update here to catch any additions to the
919 * file
920 */
921 if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
922 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
923 mark_inode_dirty(inode);
924 }
925
926 if (num_written < 0) {
927 ret = num_written;
928 num_written = 0;
929 goto out;
930 } else if (num_written == count) {
931 /* pick up pos changes done by the generic code */
932 pos = *ppos;
933 goto out;
934 }
935 /*
936 * We are going to do buffered for the rest of the range, so we
937 * need to make sure to invalidate the buffered pages when we're
938 * done.
939 */
940 buffered = 1;
941 pos += num_written;
942 }
870 943
944 iov_iter_init(&i, iov, nr_segs, count, num_written);
945 nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) /
946 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
947 (sizeof(struct page *)));
871 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 948 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
872 949
873 /* generic_write_checks can change our pos */ 950 /* generic_write_checks can change our pos */
874 start_pos = pos; 951 start_pos = pos;
875 952
876 BTRFS_I(inode)->sequence++;
877 first_index = pos >> PAGE_CACHE_SHIFT; 953 first_index = pos >> PAGE_CACHE_SHIFT;
878 last_index = (pos + count) >> PAGE_CACHE_SHIFT; 954 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
879 955
880 /* 956 /*
881 * there are lots of better ways to do this, but this code 957 * there are lots of better ways to do this, but this code
@@ -892,7 +968,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
892 unlock_page(pinned[0]); 968 unlock_page(pinned[0]);
893 } 969 }
894 } 970 }
895 if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { 971 if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
896 pinned[1] = grab_cache_page(inode->i_mapping, last_index); 972 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
897 if (!PageUptodate(pinned[1])) { 973 if (!PageUptodate(pinned[1])) {
898 ret = btrfs_readpage(NULL, pinned[1]); 974 ret = btrfs_readpage(NULL, pinned[1]);
@@ -903,10 +979,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
903 } 979 }
904 } 980 }
905 981
906 while (count > 0) { 982 while (iov_iter_count(&i) > 0) {
907 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 983 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
908 size_t write_bytes = min(count, nrptrs * 984 size_t write_bytes = min(iov_iter_count(&i),
909 (size_t)PAGE_CACHE_SIZE - 985 nrptrs * (size_t)PAGE_CACHE_SIZE -
910 offset); 986 offset);
911 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> 987 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
912 PAGE_CACHE_SHIFT; 988 PAGE_CACHE_SHIFT;
@@ -914,7 +990,17 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
914 WARN_ON(num_pages > nrptrs); 990 WARN_ON(num_pages > nrptrs);
915 memset(pages, 0, sizeof(struct page *) * nrptrs); 991 memset(pages, 0, sizeof(struct page *) * nrptrs);
916 992
917 ret = btrfs_check_data_free_space(root, inode, write_bytes); 993 /*
994 * Fault pages before locking them in prepare_pages
995 * to avoid recursive lock
996 */
997 if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
998 ret = -EFAULT;
999 goto out;
1000 }
1001
1002 ret = btrfs_delalloc_reserve_space(inode,
1003 num_pages << PAGE_CACHE_SHIFT);
918 if (ret) 1004 if (ret)
919 goto out; 1005 goto out;
920 1006
@@ -922,45 +1008,49 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
922 pos, first_index, last_index, 1008 pos, first_index, last_index,
923 write_bytes); 1009 write_bytes);
924 if (ret) { 1010 if (ret) {
925 btrfs_free_reserved_data_space(root, inode, 1011 btrfs_delalloc_release_space(inode,
926 write_bytes); 1012 num_pages << PAGE_CACHE_SHIFT);
927 goto out; 1013 goto out;
928 } 1014 }
929 1015
930 ret = btrfs_copy_from_user(pos, num_pages, 1016 copied = btrfs_copy_from_user(pos, num_pages,
931 write_bytes, pages, buf); 1017 write_bytes, pages, &i);
932 if (ret) { 1018 dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
933 btrfs_free_reserved_data_space(root, inode, 1019 PAGE_CACHE_SHIFT;
934 write_bytes); 1020
935 btrfs_drop_pages(pages, num_pages); 1021 if (num_pages > dirty_pages) {
936 goto out; 1022 if (copied > 0)
1023 atomic_inc(
1024 &BTRFS_I(inode)->outstanding_extents);
1025 btrfs_delalloc_release_space(inode,
1026 (num_pages - dirty_pages) <<
1027 PAGE_CACHE_SHIFT);
937 } 1028 }
938 1029
939 ret = dirty_and_release_pages(NULL, root, file, pages, 1030 if (copied > 0) {
940 num_pages, pos, write_bytes); 1031 dirty_and_release_pages(NULL, root, file, pages,
941 btrfs_drop_pages(pages, num_pages); 1032 dirty_pages, pos, copied);
942 if (ret) {
943 btrfs_free_reserved_data_space(root, inode,
944 write_bytes);
945 goto out;
946 } 1033 }
947 1034
948 if (will_write) { 1035 btrfs_drop_pages(pages, num_pages);
949 filemap_fdatawrite_range(inode->i_mapping, pos, 1036
950 pos + write_bytes - 1); 1037 if (copied > 0) {
951 } else { 1038 if (will_write) {
952 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1039 filemap_fdatawrite_range(inode->i_mapping, pos,
953 num_pages); 1040 pos + copied - 1);
954 if (num_pages < 1041 } else {
955 (root->leafsize >> PAGE_CACHE_SHIFT) + 1) 1042 balance_dirty_pages_ratelimited_nr(
956 btrfs_btree_balance_dirty(root, 1); 1043 inode->i_mapping,
957 btrfs_throttle(root); 1044 dirty_pages);
1045 if (dirty_pages <
1046 (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1047 btrfs_btree_balance_dirty(root, 1);
1048 btrfs_throttle(root);
1049 }
958 } 1050 }
959 1051
960 buf += write_bytes; 1052 pos += copied;
961 count -= write_bytes; 1053 num_written += copied;
962 pos += write_bytes;
963 num_written += write_bytes;
964 1054
965 cond_resched(); 1055 cond_resched();
966 } 1056 }
@@ -968,9 +1058,7 @@ out:
968 mutex_unlock(&inode->i_mutex); 1058 mutex_unlock(&inode->i_mutex);
969 if (ret) 1059 if (ret)
970 err = ret; 1060 err = ret;
971 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
972 1061
973out_nolock:
974 kfree(pages); 1062 kfree(pages);
975 if (pinned[0]) 1063 if (pinned[0])
976 page_cache_release(pinned[0]); 1064 page_cache_release(pinned[0]);
@@ -1000,9 +1088,15 @@ out_nolock:
1000 num_written = err; 1088 num_written = err;
1001 1089
1002 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { 1090 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
1003 trans = btrfs_start_transaction(root, 1); 1091 trans = btrfs_start_transaction(root, 0);
1092 if (IS_ERR(trans)) {
1093 num_written = PTR_ERR(trans);
1094 goto done;
1095 }
1096 mutex_lock(&inode->i_mutex);
1004 ret = btrfs_log_dentry_safe(trans, root, 1097 ret = btrfs_log_dentry_safe(trans, root,
1005 file->f_dentry); 1098 file->f_dentry);
1099 mutex_unlock(&inode->i_mutex);
1006 if (ret == 0) { 1100 if (ret == 0) {
1007 ret = btrfs_sync_log(trans, root); 1101 ret = btrfs_sync_log(trans, root);
1008 if (ret == 0) 1102 if (ret == 0)
@@ -1015,12 +1109,13 @@ out_nolock:
1015 btrfs_end_transaction(trans, root); 1109 btrfs_end_transaction(trans, root);
1016 } 1110 }
1017 } 1111 }
1018 if (file->f_flags & O_DIRECT) { 1112 if (file->f_flags & O_DIRECT && buffered) {
1019 invalidate_mapping_pages(inode->i_mapping, 1113 invalidate_mapping_pages(inode->i_mapping,
1020 start_pos >> PAGE_CACHE_SHIFT, 1114 start_pos >> PAGE_CACHE_SHIFT,
1021 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); 1115 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
1022 } 1116 }
1023 } 1117 }
1118done:
1024 current->backing_dev_info = NULL; 1119 current->backing_dev_info = NULL;
1025 return num_written ? num_written : err; 1120 return num_written ? num_written : err;
1026} 1121}
@@ -1055,8 +1150,9 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
1055 * important optimization for directories because holding the mutex prevents 1150 * important optimization for directories because holding the mutex prevents
1056 * new operations on the dir while we write to disk. 1151 * new operations on the dir while we write to disk.
1057 */ 1152 */
1058int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) 1153int btrfs_sync_file(struct file *file, int datasync)
1059{ 1154{
1155 struct dentry *dentry = file->f_path.dentry;
1060 struct inode *inode = dentry->d_inode; 1156 struct inode *inode = dentry->d_inode;
1061 struct btrfs_root *root = BTRFS_I(inode)->root; 1157 struct btrfs_root *root = BTRFS_I(inode)->root;
1062 int ret = 0; 1158 int ret = 0;
@@ -1093,12 +1189,12 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
1093 /* 1189 /*
1094 * ok we haven't committed the transaction yet, lets do a commit 1190 * ok we haven't committed the transaction yet, lets do a commit
1095 */ 1191 */
1096 if (file && file->private_data) 1192 if (file->private_data)
1097 btrfs_ioctl_trans_end(file); 1193 btrfs_ioctl_trans_end(file);
1098 1194
1099 trans = btrfs_start_transaction(root, 1); 1195 trans = btrfs_start_transaction(root, 0);
1100 if (!trans) { 1196 if (IS_ERR(trans)) {
1101 ret = -ENOMEM; 1197 ret = PTR_ERR(trans);
1102 goto out; 1198 goto out;
1103 } 1199 }
1104 1200
@@ -1133,7 +1229,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
1133 } 1229 }
1134 mutex_lock(&dentry->d_inode->i_mutex); 1230 mutex_lock(&dentry->d_inode->i_mutex);
1135out: 1231out:
1136 return ret > 0 ? EIO : ret; 1232 return ret > 0 ? -EIO : ret;
1137} 1233}
1138 1234
1139static const struct vm_operations_struct btrfs_file_vm_ops = { 1235static const struct vm_operations_struct btrfs_file_vm_ops = {
@@ -1143,21 +1239,141 @@ static const struct vm_operations_struct btrfs_file_vm_ops = {
1143 1239
1144static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) 1240static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
1145{ 1241{
1146 vma->vm_ops = &btrfs_file_vm_ops; 1242 struct address_space *mapping = filp->f_mapping;
1243
1244 if (!mapping->a_ops->readpage)
1245 return -ENOEXEC;
1246
1147 file_accessed(filp); 1247 file_accessed(filp);
1248 vma->vm_ops = &btrfs_file_vm_ops;
1249 vma->vm_flags |= VM_CAN_NONLINEAR;
1250
1148 return 0; 1251 return 0;
1149} 1252}
1150 1253
1254static long btrfs_fallocate(struct file *file, int mode,
1255 loff_t offset, loff_t len)
1256{
1257 struct inode *inode = file->f_path.dentry->d_inode;
1258 struct extent_state *cached_state = NULL;
1259 u64 cur_offset;
1260 u64 last_byte;
1261 u64 alloc_start;
1262 u64 alloc_end;
1263 u64 alloc_hint = 0;
1264 u64 locked_end;
1265 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
1266 struct extent_map *em;
1267 int ret;
1268
1269 alloc_start = offset & ~mask;
1270 alloc_end = (offset + len + mask) & ~mask;
1271
1272 /* We only support the FALLOC_FL_KEEP_SIZE mode */
1273 if (mode & ~FALLOC_FL_KEEP_SIZE)
1274 return -EOPNOTSUPP;
1275
1276 /*
1277 * wait for ordered IO before we have any locks. We'll loop again
1278 * below with the locks held.
1279 */
1280 btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
1281
1282 mutex_lock(&inode->i_mutex);
1283 ret = inode_newsize_ok(inode, alloc_end);
1284 if (ret)
1285 goto out;
1286
1287 if (alloc_start > inode->i_size) {
1288 ret = btrfs_cont_expand(inode, alloc_start);
1289 if (ret)
1290 goto out;
1291 }
1292
1293 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
1294 if (ret)
1295 goto out;
1296
1297 locked_end = alloc_end - 1;
1298 while (1) {
1299 struct btrfs_ordered_extent *ordered;
1300
1301 /* the extent lock is ordered inside the running
1302 * transaction
1303 */
1304 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
1305 locked_end, 0, &cached_state, GFP_NOFS);
1306 ordered = btrfs_lookup_first_ordered_extent(inode,
1307 alloc_end - 1);
1308 if (ordered &&
1309 ordered->file_offset + ordered->len > alloc_start &&
1310 ordered->file_offset < alloc_end) {
1311 btrfs_put_ordered_extent(ordered);
1312 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1313 alloc_start, locked_end,
1314 &cached_state, GFP_NOFS);
1315 /*
1316 * we can't wait on the range with the transaction
1317 * running or with the extent lock held
1318 */
1319 btrfs_wait_ordered_range(inode, alloc_start,
1320 alloc_end - alloc_start);
1321 } else {
1322 if (ordered)
1323 btrfs_put_ordered_extent(ordered);
1324 break;
1325 }
1326 }
1327
1328 cur_offset = alloc_start;
1329 while (1) {
1330 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
1331 alloc_end - cur_offset, 0);
1332 BUG_ON(IS_ERR(em) || !em);
1333 last_byte = min(extent_map_end(em), alloc_end);
1334 last_byte = (last_byte + mask) & ~mask;
1335 if (em->block_start == EXTENT_MAP_HOLE ||
1336 (cur_offset >= inode->i_size &&
1337 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
1338 ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
1339 last_byte - cur_offset,
1340 1 << inode->i_blkbits,
1341 offset + len,
1342 &alloc_hint);
1343 if (ret < 0) {
1344 free_extent_map(em);
1345 break;
1346 }
1347 }
1348 free_extent_map(em);
1349
1350 cur_offset = last_byte;
1351 if (cur_offset >= alloc_end) {
1352 ret = 0;
1353 break;
1354 }
1355 }
1356 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
1357 &cached_state, GFP_NOFS);
1358
1359 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
1360out:
1361 mutex_unlock(&inode->i_mutex);
1362 return ret;
1363}
1364
1151const struct file_operations btrfs_file_operations = { 1365const struct file_operations btrfs_file_operations = {
1152 .llseek = generic_file_llseek, 1366 .llseek = generic_file_llseek,
1153 .read = do_sync_read, 1367 .read = do_sync_read,
1368 .write = do_sync_write,
1154 .aio_read = generic_file_aio_read, 1369 .aio_read = generic_file_aio_read,
1155 .splice_read = generic_file_splice_read, 1370 .splice_read = generic_file_splice_read,
1156 .write = btrfs_file_write, 1371 .aio_write = btrfs_file_aio_write,
1157 .mmap = btrfs_file_mmap, 1372 .mmap = btrfs_file_mmap,
1158 .open = generic_file_open, 1373 .open = generic_file_open,
1159 .release = btrfs_release_file, 1374 .release = btrfs_release_file,
1160 .fsync = btrfs_sync_file, 1375 .fsync = btrfs_sync_file,
1376 .fallocate = btrfs_fallocate,
1161 .unlocked_ioctl = btrfs_ioctl, 1377 .unlocked_ioctl = btrfs_ioctl,
1162#ifdef CONFIG_COMPAT 1378#ifdef CONFIG_COMPAT
1163 .compat_ioctl = btrfs_ioctl, 1379 .compat_ioctl = btrfs_ioctl,