diff options
author | Josef Bacik <josef@redhat.com> | 2010-05-23 11:00:55 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2010-05-25 10:34:57 -0400 |
commit | 4b46fce23349bfca781a32e2707a18328ca5ae22 (patch) | |
tree | 68f1200f2bc82d3f35218aef38e6d5d92bff4aca /fs/btrfs/ordered-data.c | |
parent | c2c6ca417e2db7a519e6e92c82f4a933d940d076 (diff) |
Btrfs: add basic DIO read/write support
This provides basic DIO support for reading and writing. It does not do the
work to recover from mismatching checksums, that will come later. A few design
changes have been made from Jim's code (sorry Jim!)
1) Use the generic direct-io code. Jim originally re-wrote all the generic DIO
code in order to account for all of BTRFS's oddities, but thanks to that work it
seems like the best bet is to just ignore compression and such and just opt to
fallback on buffered IO.
2) Fallback on buffered IO for compressed or inline extents. Jim's code did
it's own buffering to make dio with compressed extents work. Now we just
fallback onto normal buffered IO.
3) Use ordered extents for the writes so that all of the
lock_extent()
lookup_ordered()
type checks continue to work.
4) Do the lock_extent() lookup_ordered() loop in readpage so we don't race with
DIO writes.
I've tested this with fsx and everything works great. This patch depends on my
dio and filemap.c patches to work. Thanks,
Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r-- | fs/btrfs/ordered-data.c | 75 |
1 files changed, 72 insertions, 3 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index c9f1020572f2..e56c72bc5add 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -124,6 +124,15 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) | |||
124 | return 1; | 124 | return 1; |
125 | } | 125 | } |
126 | 126 | ||
127 | static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset, | ||
128 | u64 len) | ||
129 | { | ||
130 | if (file_offset + len <= entry->file_offset || | ||
131 | entry->file_offset + entry->len <= file_offset) | ||
132 | return 0; | ||
133 | return 1; | ||
134 | } | ||
135 | |||
127 | /* | 136 | /* |
128 | * look find the first ordered struct that has this offset, otherwise | 137 | * look find the first ordered struct that has this offset, otherwise |
129 | * the first one less than this offset | 138 | * the first one less than this offset |
@@ -161,8 +170,9 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
161 | * The tree is given a single reference on the ordered extent that was | 170 | * The tree is given a single reference on the ordered extent that was |
162 | * inserted. | 171 | * inserted. |
163 | */ | 172 | */ |
164 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
165 | u64 start, u64 len, u64 disk_len, int type) | 174 | u64 start, u64 len, u64 disk_len, |
175 | int type, int dio) | ||
166 | { | 176 | { |
167 | struct btrfs_ordered_inode_tree *tree; | 177 | struct btrfs_ordered_inode_tree *tree; |
168 | struct rb_node *node; | 178 | struct rb_node *node; |
@@ -182,6 +192,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
182 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 192 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
183 | set_bit(type, &entry->flags); | 193 | set_bit(type, &entry->flags); |
184 | 194 | ||
195 | if (dio) | ||
196 | set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); | ||
197 | |||
185 | /* one ref for the tree */ | 198 | /* one ref for the tree */ |
186 | atomic_set(&entry->refs, 1); | 199 | atomic_set(&entry->refs, 1); |
187 | init_waitqueue_head(&entry->wait); | 200 | init_waitqueue_head(&entry->wait); |
@@ -203,6 +216,20 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
203 | return 0; | 216 | return 0; |
204 | } | 217 | } |
205 | 218 | ||
219 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | ||
220 | u64 start, u64 len, u64 disk_len, int type) | ||
221 | { | ||
222 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
223 | disk_len, type, 0); | ||
224 | } | ||
225 | |||
226 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
227 | u64 start, u64 len, u64 disk_len, int type) | ||
228 | { | ||
229 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
230 | disk_len, type, 1); | ||
231 | } | ||
232 | |||
206 | /* | 233 | /* |
207 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted | 234 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted |
208 | * when an ordered extent is finished. If the list covers more than one | 235 | * when an ordered extent is finished. If the list covers more than one |
@@ -484,7 +511,8 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
484 | * start IO on any dirty ones so the wait doesn't stall waiting | 511 | * start IO on any dirty ones so the wait doesn't stall waiting |
485 | * for pdflush to find them | 512 | * for pdflush to find them |
486 | */ | 513 | */ |
487 | filemap_fdatawrite_range(inode->i_mapping, start, end); | 514 | if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) |
515 | filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
488 | if (wait) { | 516 | if (wait) { |
489 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 517 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
490 | &entry->flags)); | 518 | &entry->flags)); |
@@ -581,6 +609,47 @@ out: | |||
581 | return entry; | 609 | return entry; |
582 | } | 610 | } |
583 | 611 | ||
612 | /* Since the DIO code tries to lock a wide area we need to look for any ordered | ||
613 | * extents that exist in the range, rather than just the start of the range. | ||
614 | */ | ||
615 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
616 | u64 file_offset, | ||
617 | u64 len) | ||
618 | { | ||
619 | struct btrfs_ordered_inode_tree *tree; | ||
620 | struct rb_node *node; | ||
621 | struct btrfs_ordered_extent *entry = NULL; | ||
622 | |||
623 | tree = &BTRFS_I(inode)->ordered_tree; | ||
624 | spin_lock(&tree->lock); | ||
625 | node = tree_search(tree, file_offset); | ||
626 | if (!node) { | ||
627 | node = tree_search(tree, file_offset + len); | ||
628 | if (!node) | ||
629 | goto out; | ||
630 | } | ||
631 | |||
632 | while (1) { | ||
633 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
634 | if (range_overlaps(entry, file_offset, len)) | ||
635 | break; | ||
636 | |||
637 | if (entry->file_offset >= file_offset + len) { | ||
638 | entry = NULL; | ||
639 | break; | ||
640 | } | ||
641 | entry = NULL; | ||
642 | node = rb_next(node); | ||
643 | if (!node) | ||
644 | break; | ||
645 | } | ||
646 | out: | ||
647 | if (entry) | ||
648 | atomic_inc(&entry->refs); | ||
649 | spin_unlock(&tree->lock); | ||
650 | return entry; | ||
651 | } | ||
652 | |||
584 | /* | 653 | /* |
585 | * lookup and return any extent before 'file_offset'. NULL is returned | 654 | * lookup and return any extent before 'file_offset'. NULL is returned |
586 | * if none is found | 655 | * if none is found |