aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-10-03 12:30:02 -0400
committerChris Mason <chris.mason@oracle.com>2008-10-03 12:30:02 -0400
commitcb843a6f513a1a91c54951005e60bd9b95bdf973 (patch)
tree12edfb1154691f1a8aaeeadb97899397574aa785 /fs
parent323ac95bce442bbde514e3ce57e840402f80d909 (diff)
Btrfs: O_DIRECT writes via buffered writes + invaldiate
This reworks the btrfs O_DIRECT write code a bit. It had always fallen back to buffered IO and done an invalidate, but needed to be updated for the data=ordered code. The invalidate wasn't actually removing pages because they were still inside an ordered extent. This also combines the O_DIRECT/O_SYNC paths where possible, and kicks off IO in the main btrfs_file_write loop to keep the pipe down the the disk full as we process long writes. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/file.c64
-rw-r--r--fs/btrfs/ordered-data.c3
-rw-r--r--fs/btrfs/ordered-data.h2
3 files changed, 38 insertions, 31 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3088a1184483..a03d1bbb19ad 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -905,6 +905,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
905 struct page *pinned[2]; 905 struct page *pinned[2];
906 unsigned long first_index; 906 unsigned long first_index;
907 unsigned long last_index; 907 unsigned long last_index;
908 int will_write;
909
910 will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) ||
911 (file->f_flags & O_DIRECT));
908 912
909 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, 913 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
910 PAGE_CACHE_SIZE / (sizeof(struct page *))); 914 PAGE_CACHE_SIZE / (sizeof(struct page *)));
@@ -1001,15 +1005,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1001 if (ret) 1005 if (ret)
1002 goto out; 1006 goto out;
1003 1007
1008 if (will_write) {
1009 btrfs_fdatawrite_range(inode->i_mapping, pos,
1010 pos + write_bytes - 1,
1011 WB_SYNC_NONE);
1012 } else {
1013 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
1014 num_pages);
1015 if (num_pages <
1016 (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1017 btrfs_btree_balance_dirty(root, 1);
1018 btrfs_throttle(root);
1019 }
1020
1004 buf += write_bytes; 1021 buf += write_bytes;
1005 count -= write_bytes; 1022 count -= write_bytes;
1006 pos += write_bytes; 1023 pos += write_bytes;
1007 num_written += write_bytes; 1024 num_written += write_bytes;
1008 1025
1009 balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
1010 if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1011 btrfs_btree_balance_dirty(root, 1);
1012 btrfs_throttle(root);
1013 cond_resched(); 1026 cond_resched();
1014 } 1027 }
1015out: 1028out:
@@ -1023,36 +1036,29 @@ out_nolock:
1023 page_cache_release(pinned[1]); 1036 page_cache_release(pinned[1]);
1024 *ppos = pos; 1037 *ppos = pos;
1025 1038
1026 if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 1039 if (num_written > 0 && will_write) {
1027 struct btrfs_trans_handle *trans; 1040 struct btrfs_trans_handle *trans;
1028 1041
1029 err = btrfs_fdatawrite_range(inode->i_mapping, start_pos, 1042 err = btrfs_wait_ordered_range(inode, start_pos, num_written);
1030 start_pos + num_written -1, 1043 if (err)
1031 WB_SYNC_NONE);
1032 if (err < 0)
1033 num_written = err;
1034
1035 err = btrfs_wait_on_page_writeback_range(inode->i_mapping,
1036 start_pos, start_pos + num_written - 1);
1037 if (err < 0)
1038 num_written = err; 1044 num_written = err;
1039 1045
1040 trans = btrfs_start_transaction(root, 1); 1046 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
1041 ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); 1047 trans = btrfs_start_transaction(root, 1);
1042 if (ret == 0) { 1048 ret = btrfs_log_dentry_safe(trans, root,
1043 btrfs_sync_log(trans, root); 1049 file->f_dentry);
1044 btrfs_end_transaction(trans, root); 1050 if (ret == 0) {
1045 } else { 1051 btrfs_sync_log(trans, root);
1046 btrfs_commit_transaction(trans, root); 1052 btrfs_end_transaction(trans, root);
1053 } else {
1054 btrfs_commit_transaction(trans, root);
1055 }
1056 }
1057 if (file->f_flags & O_DIRECT) {
1058 invalidate_mapping_pages(inode->i_mapping,
1059 start_pos >> PAGE_CACHE_SHIFT,
1060 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
1047 } 1061 }
1048 } else if (num_written > 0 && (file->f_flags & O_DIRECT)) {
1049 do_sync_mapping_range(inode->i_mapping, start_pos,
1050 start_pos + num_written - 1,
1051 SYNC_FILE_RANGE_WRITE |
1052 SYNC_FILE_RANGE_WAIT_AFTER);
1053 invalidate_mapping_pages(inode->i_mapping,
1054 start_pos >> PAGE_CACHE_SHIFT,
1055 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
1056 } 1062 }
1057 current->backing_dev_info = NULL; 1063 current->backing_dev_info = NULL;
1058 return num_written ? num_written : err; 1064 return num_written ? num_written : err;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index dcc1730dd837..2eb6caba57c2 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -397,7 +397,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
397/* 397/*
398 * Used to wait on ordered extents across a large range of bytes. 398 * Used to wait on ordered extents across a large range of bytes.
399 */ 399 */
400void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) 400int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
401{ 401{
402 u64 end; 402 u64 end;
403 u64 orig_end; 403 u64 orig_end;
@@ -451,6 +451,7 @@ again:
451 (unsigned long long)orig_end); 451 (unsigned long long)orig_end);
452 goto again; 452 goto again;
453 } 453 }
454 return 0;
454} 455}
455 456
456/* 457/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index fd45519f30a8..f50f8870a144 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -135,7 +135,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
135 u64 file_offset); 135 u64 file_offset);
136void btrfs_start_ordered_extent(struct inode *inode, 136void btrfs_start_ordered_extent(struct inode *inode,
137 struct btrfs_ordered_extent *entry, int wait); 137 struct btrfs_ordered_extent *entry, int wait);
138void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); 138int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
139struct btrfs_ordered_extent * 139struct btrfs_ordered_extent *
140btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); 140btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
141int btrfs_ordered_update_i_size(struct inode *inode, 141int btrfs_ordered_update_i_size(struct inode *inode,