aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2010-05-23 11:07:21 -0400
committerChris Mason <chris.mason@oracle.com>2010-05-25 10:34:57 -0400
commit11c65dccf70be9ace5dbd3906778e1a099b1fee1 (patch)
tree1289f139ddf652e39672374b6f9051994c21ce57 /fs/btrfs
parent4b46fce23349bfca781a32e2707a18328ca5ae22 (diff)
Btrfs: do aio_write instead of write
In order for AIO to work, we need to implement aio_write. This patch converts our btrfs_file_write to btrfs_aio_write. I've tested this with xfstests and nothing broke, and the AIO stuff magically started working. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/extent_io.c11
-rw-r--r--fs/btrfs/file.c176
2 files changed, 104 insertions, 83 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1a57c17d402..a53aca338c7 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2017,6 +2017,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2017 sector_t sector; 2017 sector_t sector;
2018 struct extent_map *em; 2018 struct extent_map *em;
2019 struct block_device *bdev; 2019 struct block_device *bdev;
2020 struct btrfs_ordered_extent *ordered;
2020 int ret; 2021 int ret;
2021 int nr = 0; 2022 int nr = 0;
2022 size_t page_offset = 0; 2023 size_t page_offset = 0;
@@ -2028,7 +2029,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2028 set_page_extent_mapped(page); 2029 set_page_extent_mapped(page);
2029 2030
2030 end = page_end; 2031 end = page_end;
2031 lock_extent(tree, start, end, GFP_NOFS); 2032 while (1) {
2033 lock_extent(tree, start, end, GFP_NOFS);
2034 ordered = btrfs_lookup_ordered_extent(inode, start);
2035 if (!ordered)
2036 break;
2037 unlock_extent(tree, start, end, GFP_NOFS);
2038 btrfs_start_ordered_extent(inode, ordered, 1);
2039 btrfs_put_ordered_extent(ordered);
2040 }
2032 2041
2033 if (page->index == last_byte >> PAGE_CACHE_SHIFT) { 2042 if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
2034 char *userpage; 2043 char *userpage;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a28810abfb9..233aea2e5ef 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -46,32 +46,42 @@
46static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, 46static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
47 int write_bytes, 47 int write_bytes,
48 struct page **prepared_pages, 48 struct page **prepared_pages,
49 const char __user *buf) 49 struct iov_iter *i)
50{ 50{
51 long page_fault = 0; 51 size_t copied;
52 int i; 52 int pg = 0;
53 int offset = pos & (PAGE_CACHE_SIZE - 1); 53 int offset = pos & (PAGE_CACHE_SIZE - 1);
54 54
55 for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { 55 while (write_bytes > 0) {
56 size_t count = min_t(size_t, 56 size_t count = min_t(size_t,
57 PAGE_CACHE_SIZE - offset, write_bytes); 57 PAGE_CACHE_SIZE - offset, write_bytes);
58 struct page *page = prepared_pages[i]; 58 struct page *page = prepared_pages[pg];
59 fault_in_pages_readable(buf, count); 59again:
60 if (unlikely(iov_iter_fault_in_readable(i, count)))
61 return -EFAULT;
60 62
61 /* Copy data from userspace to the current page */ 63 /* Copy data from userspace to the current page */
62 kmap(page); 64 copied = iov_iter_copy_from_user(page, i, offset, count);
63 page_fault = __copy_from_user(page_address(page) + offset, 65
64 buf, count);
65 /* Flush processor's dcache for this page */ 66 /* Flush processor's dcache for this page */
66 flush_dcache_page(page); 67 flush_dcache_page(page);
67 kunmap(page); 68 iov_iter_advance(i, copied);
68 buf += count; 69 write_bytes -= copied;
69 write_bytes -= count;
70 70
71 if (page_fault) 71 if (unlikely(copied == 0)) {
72 break; 72 count = min_t(size_t, PAGE_CACHE_SIZE - offset,
73 iov_iter_single_seg_count(i));
74 goto again;
75 }
76
77 if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
78 offset += copied;
79 } else {
80 pg++;
81 offset = 0;
82 }
73 } 83 }
74 return page_fault ? -EFAULT : 0; 84 return 0;
75} 85}
76 86
77/* 87/*
@@ -822,60 +832,24 @@ again:
822 return 0; 832 return 0;
823} 833}
824 834
825/* Copied from read-write.c */ 835static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
826static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 836 const struct iovec *iov,
827{ 837 unsigned long nr_segs, loff_t pos)
828 set_current_state(TASK_UNINTERRUPTIBLE);
829 if (!kiocbIsKicked(iocb))
830 schedule();
831 else
832 kiocbClearKicked(iocb);
833 __set_current_state(TASK_RUNNING);
834}
835
836/*
837 * Just a copy of what do_sync_write does.
838 */
839static ssize_t __btrfs_direct_write(struct file *file, const char __user *buf,
840 size_t count, loff_t pos, loff_t *ppos)
841{ 838{
842 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; 839 struct file *file = iocb->ki_filp;
843 unsigned long nr_segs = 1; 840 struct inode *inode = fdentry(file)->d_inode;
844 struct kiocb kiocb; 841 struct btrfs_root *root = BTRFS_I(inode)->root;
845 ssize_t ret; 842 struct page *pinned[2];
846 843 struct page **pages = NULL;
847 init_sync_kiocb(&kiocb, file); 844 struct iov_iter i;
848 kiocb.ki_pos = pos; 845 loff_t *ppos = &iocb->ki_pos;
849 kiocb.ki_left = count;
850 kiocb.ki_nbytes = count;
851
852 while (1) {
853 ret = generic_file_direct_write(&kiocb, &iov, &nr_segs, pos,
854 ppos, count, count);
855 if (ret != -EIOCBRETRY)
856 break;
857 wait_on_retry_sync_kiocb(&kiocb);
858 }
859
860 if (ret == -EIOCBQUEUED)
861 ret = wait_on_sync_kiocb(&kiocb);
862 *ppos = kiocb.ki_pos;
863 return ret;
864}
865
866static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
867 size_t count, loff_t *ppos)
868{
869 loff_t pos;
870 loff_t start_pos; 846 loff_t start_pos;
871 ssize_t num_written = 0; 847 ssize_t num_written = 0;
872 ssize_t err = 0; 848 ssize_t err = 0;
849 size_t count;
850 size_t ocount;
873 int ret = 0; 851 int ret = 0;
874 struct inode *inode = fdentry(file)->d_inode;
875 struct btrfs_root *root = BTRFS_I(inode)->root;
876 struct page **pages = NULL;
877 int nrptrs; 852 int nrptrs;
878 struct page *pinned[2];
879 unsigned long first_index; 853 unsigned long first_index;
880 unsigned long last_index; 854 unsigned long last_index;
881 int will_write; 855 int will_write;
@@ -887,13 +861,17 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
887 pinned[0] = NULL; 861 pinned[0] = NULL;
888 pinned[1] = NULL; 862 pinned[1] = NULL;
889 863
890 pos = *ppos;
891 start_pos = pos; 864 start_pos = pos;
892 865
893 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 866 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
894 867
895 mutex_lock(&inode->i_mutex); 868 mutex_lock(&inode->i_mutex);
896 869
870 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
871 if (err)
872 goto out;
873 count = ocount;
874
897 current->backing_dev_info = inode->i_mapping->backing_dev_info; 875 current->backing_dev_info = inode->i_mapping->backing_dev_info;
898 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 876 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
899 if (err) 877 if (err)
@@ -910,14 +888,48 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
910 BTRFS_I(inode)->sequence++; 888 BTRFS_I(inode)->sequence++;
911 889
912 if (unlikely(file->f_flags & O_DIRECT)) { 890 if (unlikely(file->f_flags & O_DIRECT)) {
913 num_written = __btrfs_direct_write(file, buf, count, pos, 891 ret = btrfs_delalloc_reserve_space(inode, count);
914 ppos); 892 if (ret)
915 pos += num_written; 893 goto out;
916 count -= num_written;
917 894
918 /* We've written everything we wanted to, exit */ 895 num_written = generic_file_direct_write(iocb, iov, &nr_segs,
919 if (num_written < 0 || !count) 896 pos, ppos, count,
897 ocount);
898
899 /*
900 * the generic O_DIRECT will update in-memory i_size after the
901 * DIOs are done. But our endio handlers that update the on
902 * disk i_size never update past the in memory i_size. So we
903 * need one more update here to catch any additions to the
904 * file
905 */
906 if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
907 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
908 mark_inode_dirty(inode);
909 }
910
911 if (num_written < 0) {
912 if (num_written != -EIOCBQUEUED) {
913 /*
914 * aio land will take care of releasing the
915 * delalloc
916 */
917 btrfs_delalloc_release_space(inode, count);
918 }
919 ret = num_written;
920 num_written = 0;
920 goto out; 921 goto out;
922 } else if (num_written == count) {
923 /* pick up pos changes done by the generic code */
924 pos = *ppos;
925 goto out;
926 }
927
928 /*
929 * the buffered IO will reserve bytes for the rest of the
930 * range, don't double count them here
931 */
932 btrfs_delalloc_release_space(inode, count - num_written);
921 933
922 /* 934 /*
923 * We are going to do buffered for the rest of the range, so we 935 * We are going to do buffered for the rest of the range, so we
@@ -925,18 +937,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
925 * done. 937 * done.
926 */ 938 */
927 buffered = 1; 939 buffered = 1;
928 buf += num_written; 940 pos += num_written;
929 } 941 }
930 942
931 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, 943 iov_iter_init(&i, iov, nr_segs, count, num_written);
932 PAGE_CACHE_SIZE / (sizeof(struct page *))); 944 nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) /
945 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
946 (sizeof(struct page *)));
933 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 947 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
934 948
935 /* generic_write_checks can change our pos */ 949 /* generic_write_checks can change our pos */
936 start_pos = pos; 950 start_pos = pos;
937 951
938 first_index = pos >> PAGE_CACHE_SHIFT; 952 first_index = pos >> PAGE_CACHE_SHIFT;
939 last_index = (pos + count) >> PAGE_CACHE_SHIFT; 953 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
940 954
941 /* 955 /*
942 * there are lots of better ways to do this, but this code 956 * there are lots of better ways to do this, but this code
@@ -953,7 +967,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
953 unlock_page(pinned[0]); 967 unlock_page(pinned[0]);
954 } 968 }
955 } 969 }
956 if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { 970 if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
957 pinned[1] = grab_cache_page(inode->i_mapping, last_index); 971 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
958 if (!PageUptodate(pinned[1])) { 972 if (!PageUptodate(pinned[1])) {
959 ret = btrfs_readpage(NULL, pinned[1]); 973 ret = btrfs_readpage(NULL, pinned[1]);
@@ -964,10 +978,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
964 } 978 }
965 } 979 }
966 980
967 while (count > 0) { 981 while (iov_iter_count(&i) > 0) {
968 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 982 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
969 size_t write_bytes = min(count, nrptrs * 983 size_t write_bytes = min(iov_iter_count(&i),
970 (size_t)PAGE_CACHE_SIZE - 984 nrptrs * (size_t)PAGE_CACHE_SIZE -
971 offset); 985 offset);
972 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> 986 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
973 PAGE_CACHE_SHIFT; 987 PAGE_CACHE_SHIFT;
@@ -988,7 +1002,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
988 } 1002 }
989 1003
990 ret = btrfs_copy_from_user(pos, num_pages, 1004 ret = btrfs_copy_from_user(pos, num_pages,
991 write_bytes, pages, buf); 1005 write_bytes, pages, &i);
992 if (ret == 0) { 1006 if (ret == 0) {
993 dirty_and_release_pages(NULL, root, file, pages, 1007 dirty_and_release_pages(NULL, root, file, pages,
994 num_pages, pos, write_bytes); 1008 num_pages, pos, write_bytes);
@@ -1012,8 +1026,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1012 btrfs_throttle(root); 1026 btrfs_throttle(root);
1013 } 1027 }
1014 1028
1015 buf += write_bytes;
1016 count -= write_bytes;
1017 pos += write_bytes; 1029 pos += write_bytes;
1018 num_written += write_bytes; 1030 num_written += write_bytes;
1019 1031
@@ -1206,7 +1218,7 @@ const struct file_operations btrfs_file_operations = {
1206 .read = do_sync_read, 1218 .read = do_sync_read,
1207 .aio_read = generic_file_aio_read, 1219 .aio_read = generic_file_aio_read,
1208 .splice_read = generic_file_splice_read, 1220 .splice_read = generic_file_splice_read,
1209 .write = btrfs_file_write, 1221 .aio_write = btrfs_file_aio_write,
1210 .mmap = btrfs_file_mmap, 1222 .mmap = btrfs_file_mmap,
1211 .open = generic_file_open, 1223 .open = generic_file_open,
1212 .release = btrfs_release_file, 1224 .release = btrfs_release_file,