aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2010-05-23 11:00:55 -0400
committerChris Mason <chris.mason@oracle.com>2010-05-25 10:34:57 -0400
commit4b46fce23349bfca781a32e2707a18328ca5ae22 (patch)
tree68f1200f2bc82d3f35218aef38e6d5d92bff4aca /fs/btrfs/file.c
parentc2c6ca417e2db7a519e6e92c82f4a933d940d076 (diff)
Btrfs: add basic DIO read/write support
This provides basic DIO support for reading and writing. It does not do the work to recover from mismatching checksums, that will come later. A few design changes have been made from Jim's code (sorry Jim!) 1) Use the generic direct-io code. Jim originally re-wrote all the generic DIO code in order to account for all of BTRFS's oddities, but thanks to that work it seems like the best bet is to just ignore compression and such and just opt to fallback on buffered IO. 2) Fallback on buffered IO for compressed or inline extents. Jim's code did it's own buffering to make dio with compressed extents work. Now we just fallback onto normal buffered IO. 3) Use ordered extents for the writes so that all of the lock_extent() lookup_ordered() type checks continue to work. 4) Do the lock_extent() lookup_ordered() loop in readpage so we don't race with DIO writes. I've tested this with fsx and everything works great. This patch depends on my dio and filemap.c patches to work. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c69
1 files changed, 65 insertions, 4 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 6d8f817eadb5..a28810abfb98 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -822,6 +822,47 @@ again:
822 return 0; 822 return 0;
823} 823}
824 824
825/* Copied from read-write.c */
826static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
827{
828 set_current_state(TASK_UNINTERRUPTIBLE);
829 if (!kiocbIsKicked(iocb))
830 schedule();
831 else
832 kiocbClearKicked(iocb);
833 __set_current_state(TASK_RUNNING);
834}
835
836/*
837 * Just a copy of what do_sync_write does.
838 */
839static ssize_t __btrfs_direct_write(struct file *file, const char __user *buf,
840 size_t count, loff_t pos, loff_t *ppos)
841{
842 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
843 unsigned long nr_segs = 1;
844 struct kiocb kiocb;
845 ssize_t ret;
846
847 init_sync_kiocb(&kiocb, file);
848 kiocb.ki_pos = pos;
849 kiocb.ki_left = count;
850 kiocb.ki_nbytes = count;
851
852 while (1) {
853 ret = generic_file_direct_write(&kiocb, &iov, &nr_segs, pos,
854 ppos, count, count);
855 if (ret != -EIOCBRETRY)
856 break;
857 wait_on_retry_sync_kiocb(&kiocb);
858 }
859
860 if (ret == -EIOCBQUEUED)
861 ret = wait_on_sync_kiocb(&kiocb);
862 *ppos = kiocb.ki_pos;
863 return ret;
864}
865
825static ssize_t btrfs_file_write(struct file *file, const char __user *buf, 866static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
826 size_t count, loff_t *ppos) 867 size_t count, loff_t *ppos)
827{ 868{
@@ -838,12 +879,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
838 unsigned long first_index; 879 unsigned long first_index;
839 unsigned long last_index; 880 unsigned long last_index;
840 int will_write; 881 int will_write;
882 int buffered = 0;
841 883
842 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || 884 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
843 (file->f_flags & O_DIRECT)); 885 (file->f_flags & O_DIRECT));
844 886
845 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
846 PAGE_CACHE_SIZE / (sizeof(struct page *)));
847 pinned[0] = NULL; 887 pinned[0] = NULL;
848 pinned[1] = NULL; 888 pinned[1] = NULL;
849 889
@@ -867,13 +907,34 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
867 goto out; 907 goto out;
868 908
869 file_update_time(file); 909 file_update_time(file);
910 BTRFS_I(inode)->sequence++;
911
912 if (unlikely(file->f_flags & O_DIRECT)) {
913 num_written = __btrfs_direct_write(file, buf, count, pos,
914 ppos);
915 pos += num_written;
916 count -= num_written;
917
918 /* We've written everything we wanted to, exit */
919 if (num_written < 0 || !count)
920 goto out;
870 921
922 /*
923 * We are going to do buffered for the rest of the range, so we
924 * need to make sure to invalidate the buffered pages when we're
925 * done.
926 */
927 buffered = 1;
928 buf += num_written;
929 }
930
931 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
932 PAGE_CACHE_SIZE / (sizeof(struct page *)));
871 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 933 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
872 934
873 /* generic_write_checks can change our pos */ 935 /* generic_write_checks can change our pos */
874 start_pos = pos; 936 start_pos = pos;
875 937
876 BTRFS_I(inode)->sequence++;
877 first_index = pos >> PAGE_CACHE_SHIFT; 938 first_index = pos >> PAGE_CACHE_SHIFT;
878 last_index = (pos + count) >> PAGE_CACHE_SHIFT; 939 last_index = (pos + count) >> PAGE_CACHE_SHIFT;
879 940
@@ -1007,7 +1068,7 @@ out:
1007 btrfs_end_transaction(trans, root); 1068 btrfs_end_transaction(trans, root);
1008 } 1069 }
1009 } 1070 }
1010 if (file->f_flags & O_DIRECT) { 1071 if (file->f_flags & O_DIRECT && buffered) {
1011 invalidate_mapping_pages(inode->i_mapping, 1072 invalidate_mapping_pages(inode->i_mapping,
1012 start_pos >> PAGE_CACHE_SHIFT, 1073 start_pos >> PAGE_CACHE_SHIFT,
1013 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); 1074 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);