diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/extent_io.c | 11 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 176 |
2 files changed, 104 insertions, 83 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1a57c17d4029..a53aca338c7f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -2017,6 +2017,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
| 2017 | sector_t sector; | 2017 | sector_t sector; |
| 2018 | struct extent_map *em; | 2018 | struct extent_map *em; |
| 2019 | struct block_device *bdev; | 2019 | struct block_device *bdev; |
| 2020 | struct btrfs_ordered_extent *ordered; | ||
| 2020 | int ret; | 2021 | int ret; |
| 2021 | int nr = 0; | 2022 | int nr = 0; |
| 2022 | size_t page_offset = 0; | 2023 | size_t page_offset = 0; |
| @@ -2028,7 +2029,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
| 2028 | set_page_extent_mapped(page); | 2029 | set_page_extent_mapped(page); |
| 2029 | 2030 | ||
| 2030 | end = page_end; | 2031 | end = page_end; |
| 2031 | lock_extent(tree, start, end, GFP_NOFS); | 2032 | while (1) { |
| 2033 | lock_extent(tree, start, end, GFP_NOFS); | ||
| 2034 | ordered = btrfs_lookup_ordered_extent(inode, start); | ||
| 2035 | if (!ordered) | ||
| 2036 | break; | ||
| 2037 | unlock_extent(tree, start, end, GFP_NOFS); | ||
| 2038 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 2039 | btrfs_put_ordered_extent(ordered); | ||
| 2040 | } | ||
| 2032 | 2041 | ||
| 2033 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { | 2042 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { |
| 2034 | char *userpage; | 2043 | char *userpage; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a28810abfb98..233aea2e5ef2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -46,32 +46,42 @@ | |||
| 46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
| 47 | int write_bytes, | 47 | int write_bytes, |
| 48 | struct page **prepared_pages, | 48 | struct page **prepared_pages, |
| 49 | const char __user *buf) | 49 | struct iov_iter *i) |
| 50 | { | 50 | { |
| 51 | long page_fault = 0; | 51 | size_t copied; |
| 52 | int i; | 52 | int pg = 0; |
| 53 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 53 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
| 54 | 54 | ||
| 55 | for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { | 55 | while (write_bytes > 0) { |
| 56 | size_t count = min_t(size_t, | 56 | size_t count = min_t(size_t, |
| 57 | PAGE_CACHE_SIZE - offset, write_bytes); | 57 | PAGE_CACHE_SIZE - offset, write_bytes); |
| 58 | struct page *page = prepared_pages[i]; | 58 | struct page *page = prepared_pages[pg]; |
| 59 | fault_in_pages_readable(buf, count); | 59 | again: |
| 60 | if (unlikely(iov_iter_fault_in_readable(i, count))) | ||
| 61 | return -EFAULT; | ||
| 60 | 62 | ||
| 61 | /* Copy data from userspace to the current page */ | 63 | /* Copy data from userspace to the current page */ |
| 62 | kmap(page); | 64 | copied = iov_iter_copy_from_user(page, i, offset, count); |
| 63 | page_fault = __copy_from_user(page_address(page) + offset, | 65 | |
| 64 | buf, count); | ||
| 65 | /* Flush processor's dcache for this page */ | 66 | /* Flush processor's dcache for this page */ |
| 66 | flush_dcache_page(page); | 67 | flush_dcache_page(page); |
| 67 | kunmap(page); | 68 | iov_iter_advance(i, copied); |
| 68 | buf += count; | 69 | write_bytes -= copied; |
| 69 | write_bytes -= count; | ||
| 70 | 70 | ||
| 71 | if (page_fault) | 71 | if (unlikely(copied == 0)) { |
| 72 | break; | 72 | count = min_t(size_t, PAGE_CACHE_SIZE - offset, |
| 73 | iov_iter_single_seg_count(i)); | ||
| 74 | goto again; | ||
| 75 | } | ||
| 76 | |||
| 77 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | ||
| 78 | offset += copied; | ||
| 79 | } else { | ||
| 80 | pg++; | ||
| 81 | offset = 0; | ||
| 82 | } | ||
| 73 | } | 83 | } |
| 74 | return page_fault ? -EFAULT : 0; | 84 | return 0; |
| 75 | } | 85 | } |
| 76 | 86 | ||
| 77 | /* | 87 | /* |
| @@ -822,60 +832,24 @@ again: | |||
| 822 | return 0; | 832 | return 0; |
| 823 | } | 833 | } |
| 824 | 834 | ||
| 825 | /* Copied from read-write.c */ | 835 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
| 826 | static void wait_on_retry_sync_kiocb(struct kiocb *iocb) | 836 | const struct iovec *iov, |
| 827 | { | 837 | unsigned long nr_segs, loff_t pos) |
| 828 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 829 | if (!kiocbIsKicked(iocb)) | ||
| 830 | schedule(); | ||
| 831 | else | ||
| 832 | kiocbClearKicked(iocb); | ||
| 833 | __set_current_state(TASK_RUNNING); | ||
| 834 | } | ||
| 835 | |||
| 836 | /* | ||
| 837 | * Just a copy of what do_sync_write does. | ||
| 838 | */ | ||
| 839 | static ssize_t __btrfs_direct_write(struct file *file, const char __user *buf, | ||
| 840 | size_t count, loff_t pos, loff_t *ppos) | ||
| 841 | { | 838 | { |
| 842 | struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; | 839 | struct file *file = iocb->ki_filp; |
| 843 | unsigned long nr_segs = 1; | 840 | struct inode *inode = fdentry(file)->d_inode; |
| 844 | struct kiocb kiocb; | 841 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 845 | ssize_t ret; | 842 | struct page *pinned[2]; |
| 846 | 843 | struct page **pages = NULL; | |
| 847 | init_sync_kiocb(&kiocb, file); | 844 | struct iov_iter i; |
| 848 | kiocb.ki_pos = pos; | 845 | loff_t *ppos = &iocb->ki_pos; |
| 849 | kiocb.ki_left = count; | ||
| 850 | kiocb.ki_nbytes = count; | ||
| 851 | |||
| 852 | while (1) { | ||
| 853 | ret = generic_file_direct_write(&kiocb, &iov, &nr_segs, pos, | ||
| 854 | ppos, count, count); | ||
| 855 | if (ret != -EIOCBRETRY) | ||
| 856 | break; | ||
| 857 | wait_on_retry_sync_kiocb(&kiocb); | ||
| 858 | } | ||
| 859 | |||
| 860 | if (ret == -EIOCBQUEUED) | ||
| 861 | ret = wait_on_sync_kiocb(&kiocb); | ||
| 862 | *ppos = kiocb.ki_pos; | ||
| 863 | return ret; | ||
| 864 | } | ||
| 865 | |||
| 866 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | ||
| 867 | size_t count, loff_t *ppos) | ||
| 868 | { | ||
| 869 | loff_t pos; | ||
| 870 | loff_t start_pos; | 846 | loff_t start_pos; |
| 871 | ssize_t num_written = 0; | 847 | ssize_t num_written = 0; |
| 872 | ssize_t err = 0; | 848 | ssize_t err = 0; |
| 849 | size_t count; | ||
| 850 | size_t ocount; | ||
| 873 | int ret = 0; | 851 | int ret = 0; |
| 874 | struct inode *inode = fdentry(file)->d_inode; | ||
| 875 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 876 | struct page **pages = NULL; | ||
| 877 | int nrptrs; | 852 | int nrptrs; |
| 878 | struct page *pinned[2]; | ||
| 879 | unsigned long first_index; | 853 | unsigned long first_index; |
| 880 | unsigned long last_index; | 854 | unsigned long last_index; |
| 881 | int will_write; | 855 | int will_write; |
| @@ -887,13 +861,17 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 887 | pinned[0] = NULL; | 861 | pinned[0] = NULL; |
| 888 | pinned[1] = NULL; | 862 | pinned[1] = NULL; |
| 889 | 863 | ||
| 890 | pos = *ppos; | ||
| 891 | start_pos = pos; | 864 | start_pos = pos; |
| 892 | 865 | ||
| 893 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 866 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
| 894 | 867 | ||
| 895 | mutex_lock(&inode->i_mutex); | 868 | mutex_lock(&inode->i_mutex); |
| 896 | 869 | ||
| 870 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
| 871 | if (err) | ||
| 872 | goto out; | ||
| 873 | count = ocount; | ||
| 874 | |||
| 897 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 875 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
| 898 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 876 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
| 899 | if (err) | 877 | if (err) |
| @@ -910,14 +888,48 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 910 | BTRFS_I(inode)->sequence++; | 888 | BTRFS_I(inode)->sequence++; |
| 911 | 889 | ||
| 912 | if (unlikely(file->f_flags & O_DIRECT)) { | 890 | if (unlikely(file->f_flags & O_DIRECT)) { |
| 913 | num_written = __btrfs_direct_write(file, buf, count, pos, | 891 | ret = btrfs_delalloc_reserve_space(inode, count); |
| 914 | ppos); | 892 | if (ret) |
| 915 | pos += num_written; | 893 | goto out; |
| 916 | count -= num_written; | ||
| 917 | 894 | ||
| 918 | /* We've written everything we wanted to, exit */ | 895 | num_written = generic_file_direct_write(iocb, iov, &nr_segs, |
| 919 | if (num_written < 0 || !count) | 896 | pos, ppos, count, |
| 897 | ocount); | ||
| 898 | |||
| 899 | /* | ||
| 900 | * the generic O_DIRECT will update in-memory i_size after the | ||
| 901 | * DIOs are done. But our endio handlers that update the on | ||
| 902 | * disk i_size never update past the in memory i_size. So we | ||
| 903 | * need one more update here to catch any additions to the | ||
| 904 | * file | ||
| 905 | */ | ||
| 906 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
| 907 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
| 908 | mark_inode_dirty(inode); | ||
| 909 | } | ||
| 910 | |||
| 911 | if (num_written < 0) { | ||
| 912 | if (num_written != -EIOCBQUEUED) { | ||
| 913 | /* | ||
| 914 | * aio land will take care of releasing the | ||
| 915 | * delalloc | ||
| 916 | */ | ||
| 917 | btrfs_delalloc_release_space(inode, count); | ||
| 918 | } | ||
| 919 | ret = num_written; | ||
| 920 | num_written = 0; | ||
| 920 | goto out; | 921 | goto out; |
| 922 | } else if (num_written == count) { | ||
| 923 | /* pick up pos changes done by the generic code */ | ||
| 924 | pos = *ppos; | ||
| 925 | goto out; | ||
| 926 | } | ||
| 927 | |||
| 928 | /* | ||
| 929 | * the buffered IO will reserve bytes for the rest of the | ||
| 930 | * range, don't double count them here | ||
| 931 | */ | ||
| 932 | btrfs_delalloc_release_space(inode, count - num_written); | ||
| 921 | 933 | ||
| 922 | /* | 934 | /* |
| 923 | * We are going to do buffered for the rest of the range, so we | 935 | * We are going to do buffered for the rest of the range, so we |
| @@ -925,18 +937,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 925 | * done. | 937 | * done. |
| 926 | */ | 938 | */ |
| 927 | buffered = 1; | 939 | buffered = 1; |
| 928 | buf += num_written; | 940 | pos += num_written; |
| 929 | } | 941 | } |
| 930 | 942 | ||
| 931 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, | 943 | iov_iter_init(&i, iov, nr_segs, count, num_written); |
| 932 | PAGE_CACHE_SIZE / (sizeof(struct page *))); | 944 | nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / |
| 945 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | ||
| 946 | (sizeof(struct page *))); | ||
| 933 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 947 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
| 934 | 948 | ||
| 935 | /* generic_write_checks can change our pos */ | 949 | /* generic_write_checks can change our pos */ |
| 936 | start_pos = pos; | 950 | start_pos = pos; |
| 937 | 951 | ||
| 938 | first_index = pos >> PAGE_CACHE_SHIFT; | 952 | first_index = pos >> PAGE_CACHE_SHIFT; |
| 939 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 953 | last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; |
| 940 | 954 | ||
| 941 | /* | 955 | /* |
| 942 | * there are lots of better ways to do this, but this code | 956 | * there are lots of better ways to do this, but this code |
| @@ -953,7 +967,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 953 | unlock_page(pinned[0]); | 967 | unlock_page(pinned[0]); |
| 954 | } | 968 | } |
| 955 | } | 969 | } |
| 956 | if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { | 970 | if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) { |
| 957 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); | 971 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); |
| 958 | if (!PageUptodate(pinned[1])) { | 972 | if (!PageUptodate(pinned[1])) { |
| 959 | ret = btrfs_readpage(NULL, pinned[1]); | 973 | ret = btrfs_readpage(NULL, pinned[1]); |
| @@ -964,10 +978,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 964 | } | 978 | } |
| 965 | } | 979 | } |
| 966 | 980 | ||
| 967 | while (count > 0) { | 981 | while (iov_iter_count(&i) > 0) { |
| 968 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 982 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
| 969 | size_t write_bytes = min(count, nrptrs * | 983 | size_t write_bytes = min(iov_iter_count(&i), |
| 970 | (size_t)PAGE_CACHE_SIZE - | 984 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
| 971 | offset); | 985 | offset); |
| 972 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> | 986 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> |
| 973 | PAGE_CACHE_SHIFT; | 987 | PAGE_CACHE_SHIFT; |
| @@ -988,7 +1002,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 988 | } | 1002 | } |
| 989 | 1003 | ||
| 990 | ret = btrfs_copy_from_user(pos, num_pages, | 1004 | ret = btrfs_copy_from_user(pos, num_pages, |
| 991 | write_bytes, pages, buf); | 1005 | write_bytes, pages, &i); |
| 992 | if (ret == 0) { | 1006 | if (ret == 0) { |
| 993 | dirty_and_release_pages(NULL, root, file, pages, | 1007 | dirty_and_release_pages(NULL, root, file, pages, |
| 994 | num_pages, pos, write_bytes); | 1008 | num_pages, pos, write_bytes); |
| @@ -1012,8 +1026,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 1012 | btrfs_throttle(root); | 1026 | btrfs_throttle(root); |
| 1013 | } | 1027 | } |
| 1014 | 1028 | ||
| 1015 | buf += write_bytes; | ||
| 1016 | count -= write_bytes; | ||
| 1017 | pos += write_bytes; | 1029 | pos += write_bytes; |
| 1018 | num_written += write_bytes; | 1030 | num_written += write_bytes; |
| 1019 | 1031 | ||
| @@ -1206,7 +1218,7 @@ const struct file_operations btrfs_file_operations = { | |||
| 1206 | .read = do_sync_read, | 1218 | .read = do_sync_read, |
| 1207 | .aio_read = generic_file_aio_read, | 1219 | .aio_read = generic_file_aio_read, |
| 1208 | .splice_read = generic_file_splice_read, | 1220 | .splice_read = generic_file_splice_read, |
| 1209 | .write = btrfs_file_write, | 1221 | .aio_write = btrfs_file_aio_write, |
| 1210 | .mmap = btrfs_file_mmap, | 1222 | .mmap = btrfs_file_mmap, |
| 1211 | .open = generic_file_open, | 1223 | .open = generic_file_open, |
| 1212 | .release = btrfs_release_file, | 1224 | .release = btrfs_release_file, |
