diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-10-03 12:30:02 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-10-03 12:30:02 -0400 |
commit | cb843a6f513a1a91c54951005e60bd9b95bdf973 (patch) | |
tree | 12edfb1154691f1a8aaeeadb97899397574aa785 | |
parent | 323ac95bce442bbde514e3ce57e840402f80d909 (diff) |
Btrfs: O_DIRECT writes via buffered writes + invaldiate
This reworks the btrfs O_DIRECT write code a bit. It had always fallen
back to buffered IO and done an invalidate, but needed to be updated
for the data=ordered code. The invalidate wasn't actually removing pages
because they were still inside an ordered extent.
This also combines the O_DIRECT/O_SYNC paths where possible, and kicks
off IO in the main btrfs_file_write loop to keep the pipe down the the
disk full as we process long writes.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/file.c | 64 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 3 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 2 |
3 files changed, 38 insertions, 31 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3088a1184483..a03d1bbb19ad 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -905,6 +905,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
905 | struct page *pinned[2]; | 905 | struct page *pinned[2]; |
906 | unsigned long first_index; | 906 | unsigned long first_index; |
907 | unsigned long last_index; | 907 | unsigned long last_index; |
908 | int will_write; | ||
909 | |||
910 | will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || | ||
911 | (file->f_flags & O_DIRECT)); | ||
908 | 912 | ||
909 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, | 913 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, |
910 | PAGE_CACHE_SIZE / (sizeof(struct page *))); | 914 | PAGE_CACHE_SIZE / (sizeof(struct page *))); |
@@ -1001,15 +1005,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
1001 | if (ret) | 1005 | if (ret) |
1002 | goto out; | 1006 | goto out; |
1003 | 1007 | ||
1008 | if (will_write) { | ||
1009 | btrfs_fdatawrite_range(inode->i_mapping, pos, | ||
1010 | pos + write_bytes - 1, | ||
1011 | WB_SYNC_NONE); | ||
1012 | } else { | ||
1013 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
1014 | num_pages); | ||
1015 | if (num_pages < | ||
1016 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | ||
1017 | btrfs_btree_balance_dirty(root, 1); | ||
1018 | btrfs_throttle(root); | ||
1019 | } | ||
1020 | |||
1004 | buf += write_bytes; | 1021 | buf += write_bytes; |
1005 | count -= write_bytes; | 1022 | count -= write_bytes; |
1006 | pos += write_bytes; | 1023 | pos += write_bytes; |
1007 | num_written += write_bytes; | 1024 | num_written += write_bytes; |
1008 | 1025 | ||
1009 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); | ||
1010 | if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | ||
1011 | btrfs_btree_balance_dirty(root, 1); | ||
1012 | btrfs_throttle(root); | ||
1013 | cond_resched(); | 1026 | cond_resched(); |
1014 | } | 1027 | } |
1015 | out: | 1028 | out: |
@@ -1023,36 +1036,29 @@ out_nolock: | |||
1023 | page_cache_release(pinned[1]); | 1036 | page_cache_release(pinned[1]); |
1024 | *ppos = pos; | 1037 | *ppos = pos; |
1025 | 1038 | ||
1026 | if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | 1039 | if (num_written > 0 && will_write) { |
1027 | struct btrfs_trans_handle *trans; | 1040 | struct btrfs_trans_handle *trans; |
1028 | 1041 | ||
1029 | err = btrfs_fdatawrite_range(inode->i_mapping, start_pos, | 1042 | err = btrfs_wait_ordered_range(inode, start_pos, num_written); |
1030 | start_pos + num_written -1, | 1043 | if (err) |
1031 | WB_SYNC_NONE); | ||
1032 | if (err < 0) | ||
1033 | num_written = err; | ||
1034 | |||
1035 | err = btrfs_wait_on_page_writeback_range(inode->i_mapping, | ||
1036 | start_pos, start_pos + num_written - 1); | ||
1037 | if (err < 0) | ||
1038 | num_written = err; | 1044 | num_written = err; |
1039 | 1045 | ||
1040 | trans = btrfs_start_transaction(root, 1); | 1046 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { |
1041 | ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); | 1047 | trans = btrfs_start_transaction(root, 1); |
1042 | if (ret == 0) { | 1048 | ret = btrfs_log_dentry_safe(trans, root, |
1043 | btrfs_sync_log(trans, root); | 1049 | file->f_dentry); |
1044 | btrfs_end_transaction(trans, root); | 1050 | if (ret == 0) { |
1045 | } else { | 1051 | btrfs_sync_log(trans, root); |
1046 | btrfs_commit_transaction(trans, root); | 1052 | btrfs_end_transaction(trans, root); |
1053 | } else { | ||
1054 | btrfs_commit_transaction(trans, root); | ||
1055 | } | ||
1056 | } | ||
1057 | if (file->f_flags & O_DIRECT) { | ||
1058 | invalidate_mapping_pages(inode->i_mapping, | ||
1059 | start_pos >> PAGE_CACHE_SHIFT, | ||
1060 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | ||
1047 | } | 1061 | } |
1048 | } else if (num_written > 0 && (file->f_flags & O_DIRECT)) { | ||
1049 | do_sync_mapping_range(inode->i_mapping, start_pos, | ||
1050 | start_pos + num_written - 1, | ||
1051 | SYNC_FILE_RANGE_WRITE | | ||
1052 | SYNC_FILE_RANGE_WAIT_AFTER); | ||
1053 | invalidate_mapping_pages(inode->i_mapping, | ||
1054 | start_pos >> PAGE_CACHE_SHIFT, | ||
1055 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | ||
1056 | } | 1062 | } |
1057 | current->backing_dev_info = NULL; | 1063 | current->backing_dev_info = NULL; |
1058 | return num_written ? num_written : err; | 1064 | return num_written ? num_written : err; |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index dcc1730dd837..2eb6caba57c2 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -397,7 +397,7 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
397 | /* | 397 | /* |
398 | * Used to wait on ordered extents across a large range of bytes. | 398 | * Used to wait on ordered extents across a large range of bytes. |
399 | */ | 399 | */ |
400 | void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | 400 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) |
401 | { | 401 | { |
402 | u64 end; | 402 | u64 end; |
403 | u64 orig_end; | 403 | u64 orig_end; |
@@ -451,6 +451,7 @@ again: | |||
451 | (unsigned long long)orig_end); | 451 | (unsigned long long)orig_end); |
452 | goto again; | 452 | goto again; |
453 | } | 453 | } |
454 | return 0; | ||
454 | } | 455 | } |
455 | 456 | ||
456 | /* | 457 | /* |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index fd45519f30a8..f50f8870a144 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -135,7 +135,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, | |||
135 | u64 file_offset); | 135 | u64 file_offset); |
136 | void btrfs_start_ordered_extent(struct inode *inode, | 136 | void btrfs_start_ordered_extent(struct inode *inode, |
137 | struct btrfs_ordered_extent *entry, int wait); | 137 | struct btrfs_ordered_extent *entry, int wait); |
138 | void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); | 138 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); |
139 | struct btrfs_ordered_extent * | 139 | struct btrfs_ordered_extent * |
140 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | 140 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); |
141 | int btrfs_ordered_update_i_size(struct inode *inode, | 141 | int btrfs_ordered_update_i_size(struct inode *inode, |