aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ordered-data.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r--fs/btrfs/ordered-data.c209
1 files changed, 177 insertions, 32 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5c2a9e78a949..2b61e1ddcd99 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -16,7 +16,6 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/gfp.h>
20#include <linux/slab.h> 19#include <linux/slab.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/writeback.h> 21#include <linux/writeback.h>
@@ -125,6 +124,15 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
125 return 1; 124 return 1;
126} 125}
127 126
127static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset,
128 u64 len)
129{
130 if (file_offset + len <= entry->file_offset ||
131 entry->file_offset + entry->len <= file_offset)
132 return 0;
133 return 1;
134}
135
128/* 136/*
129 * look find the first ordered struct that has this offset, otherwise 137 * look find the first ordered struct that has this offset, otherwise
130 * the first one less than this offset 138 * the first one less than this offset
@@ -162,8 +170,9 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
162 * The tree is given a single reference on the ordered extent that was 170 * The tree is given a single reference on the ordered extent that was
163 * inserted. 171 * inserted.
164 */ 172 */
165int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, 173static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
166 u64 start, u64 len, u64 disk_len, int type) 174 u64 start, u64 len, u64 disk_len,
175 int type, int dio, int compress_type)
167{ 176{
168 struct btrfs_ordered_inode_tree *tree; 177 struct btrfs_ordered_inode_tree *tree;
169 struct rb_node *node; 178 struct rb_node *node;
@@ -174,36 +183,65 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
174 if (!entry) 183 if (!entry)
175 return -ENOMEM; 184 return -ENOMEM;
176 185
177 mutex_lock(&tree->mutex);
178 entry->file_offset = file_offset; 186 entry->file_offset = file_offset;
179 entry->start = start; 187 entry->start = start;
180 entry->len = len; 188 entry->len = len;
181 entry->disk_len = disk_len; 189 entry->disk_len = disk_len;
182 entry->bytes_left = len; 190 entry->bytes_left = len;
183 entry->inode = inode; 191 entry->inode = inode;
192 entry->compress_type = compress_type;
184 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 193 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
185 set_bit(type, &entry->flags); 194 set_bit(type, &entry->flags);
186 195
196 if (dio)
197 set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
198
187 /* one ref for the tree */ 199 /* one ref for the tree */
188 atomic_set(&entry->refs, 1); 200 atomic_set(&entry->refs, 1);
189 init_waitqueue_head(&entry->wait); 201 init_waitqueue_head(&entry->wait);
190 INIT_LIST_HEAD(&entry->list); 202 INIT_LIST_HEAD(&entry->list);
191 INIT_LIST_HEAD(&entry->root_extent_list); 203 INIT_LIST_HEAD(&entry->root_extent_list);
192 204
205 spin_lock(&tree->lock);
193 node = tree_insert(&tree->tree, file_offset, 206 node = tree_insert(&tree->tree, file_offset,
194 &entry->rb_node); 207 &entry->rb_node);
195 BUG_ON(node); 208 BUG_ON(node);
209 spin_unlock(&tree->lock);
196 210
197 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 211 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
198 list_add_tail(&entry->root_extent_list, 212 list_add_tail(&entry->root_extent_list,
199 &BTRFS_I(inode)->root->fs_info->ordered_extents); 213 &BTRFS_I(inode)->root->fs_info->ordered_extents);
200 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 214 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
201 215
202 mutex_unlock(&tree->mutex);
203 BUG_ON(node); 216 BUG_ON(node);
204 return 0; 217 return 0;
205} 218}
206 219
220int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
221 u64 start, u64 len, u64 disk_len, int type)
222{
223 return __btrfs_add_ordered_extent(inode, file_offset, start, len,
224 disk_len, type, 0,
225 BTRFS_COMPRESS_NONE);
226}
227
228int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
229 u64 start, u64 len, u64 disk_len, int type)
230{
231 return __btrfs_add_ordered_extent(inode, file_offset, start, len,
232 disk_len, type, 1,
233 BTRFS_COMPRESS_NONE);
234}
235
236int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
237 u64 start, u64 len, u64 disk_len,
238 int type, int compress_type)
239{
240 return __btrfs_add_ordered_extent(inode, file_offset, start, len,
241 disk_len, type, 0,
242 compress_type);
243}
244
207/* 245/*
208 * Add a struct btrfs_ordered_sum into the list of checksums to be inserted 246 * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
209 * when an ordered extent is finished. If the list covers more than one 247 * when an ordered extent is finished. If the list covers more than one
@@ -216,14 +254,81 @@ int btrfs_add_ordered_sum(struct inode *inode,
216 struct btrfs_ordered_inode_tree *tree; 254 struct btrfs_ordered_inode_tree *tree;
217 255
218 tree = &BTRFS_I(inode)->ordered_tree; 256 tree = &BTRFS_I(inode)->ordered_tree;
219 mutex_lock(&tree->mutex); 257 spin_lock(&tree->lock);
220 list_add_tail(&sum->list, &entry->list); 258 list_add_tail(&sum->list, &entry->list);
221 mutex_unlock(&tree->mutex); 259 spin_unlock(&tree->lock);
222 return 0; 260 return 0;
223} 261}
224 262
225/* 263/*
226 * this is used to account for finished IO across a given range 264 * this is used to account for finished IO across a given range
265 * of the file. The IO may span ordered extents. If
266 * a given ordered_extent is completely done, 1 is returned, otherwise
267 * 0.
268 *
269 * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
270 * to make sure this function only returns 1 once for a given ordered extent.
271 *
272 * file_offset is updated to one byte past the range that is recorded as
273 * complete. This allows you to walk forward in the file.
274 */
275int btrfs_dec_test_first_ordered_pending(struct inode *inode,
276 struct btrfs_ordered_extent **cached,
277 u64 *file_offset, u64 io_size)
278{
279 struct btrfs_ordered_inode_tree *tree;
280 struct rb_node *node;
281 struct btrfs_ordered_extent *entry = NULL;
282 int ret;
283 u64 dec_end;
284 u64 dec_start;
285 u64 to_dec;
286
287 tree = &BTRFS_I(inode)->ordered_tree;
288 spin_lock(&tree->lock);
289 node = tree_search(tree, *file_offset);
290 if (!node) {
291 ret = 1;
292 goto out;
293 }
294
295 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
296 if (!offset_in_entry(entry, *file_offset)) {
297 ret = 1;
298 goto out;
299 }
300
301 dec_start = max(*file_offset, entry->file_offset);
302 dec_end = min(*file_offset + io_size, entry->file_offset +
303 entry->len);
304 *file_offset = dec_end;
305 if (dec_start > dec_end) {
306 printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
307 (unsigned long long)dec_start,
308 (unsigned long long)dec_end);
309 }
310 to_dec = dec_end - dec_start;
311 if (to_dec > entry->bytes_left) {
312 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
313 (unsigned long long)entry->bytes_left,
314 (unsigned long long)to_dec);
315 }
316 entry->bytes_left -= to_dec;
317 if (entry->bytes_left == 0)
318 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
319 else
320 ret = 1;
321out:
322 if (!ret && cached && entry) {
323 *cached = entry;
324 atomic_inc(&entry->refs);
325 }
326 spin_unlock(&tree->lock);
327 return ret == 0;
328}
329
330/*
331 * this is used to account for finished IO across a given range
227 * of the file. The IO should not span ordered extents. If 332 * of the file. The IO should not span ordered extents. If
228 * a given ordered_extent is completely done, 1 is returned, otherwise 333 * a given ordered_extent is completely done, 1 is returned, otherwise
229 * 0. 334 * 0.
@@ -232,15 +337,16 @@ int btrfs_add_ordered_sum(struct inode *inode,
232 * to make sure this function only returns 1 once for a given ordered extent. 337 * to make sure this function only returns 1 once for a given ordered extent.
233 */ 338 */
234int btrfs_dec_test_ordered_pending(struct inode *inode, 339int btrfs_dec_test_ordered_pending(struct inode *inode,
340 struct btrfs_ordered_extent **cached,
235 u64 file_offset, u64 io_size) 341 u64 file_offset, u64 io_size)
236{ 342{
237 struct btrfs_ordered_inode_tree *tree; 343 struct btrfs_ordered_inode_tree *tree;
238 struct rb_node *node; 344 struct rb_node *node;
239 struct btrfs_ordered_extent *entry; 345 struct btrfs_ordered_extent *entry = NULL;
240 int ret; 346 int ret;
241 347
242 tree = &BTRFS_I(inode)->ordered_tree; 348 tree = &BTRFS_I(inode)->ordered_tree;
243 mutex_lock(&tree->mutex); 349 spin_lock(&tree->lock);
244 node = tree_search(tree, file_offset); 350 node = tree_search(tree, file_offset);
245 if (!node) { 351 if (!node) {
246 ret = 1; 352 ret = 1;
@@ -264,7 +370,11 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
264 else 370 else
265 ret = 1; 371 ret = 1;
266out: 372out:
267 mutex_unlock(&tree->mutex); 373 if (!ret && cached && entry) {
374 *cached = entry;
375 atomic_inc(&entry->refs);
376 }
377 spin_unlock(&tree->lock);
268 return ret == 0; 378 return ret == 0;
269} 379}
270 380
@@ -291,13 +401,14 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
291 401
292/* 402/*
293 * remove an ordered extent from the tree. No references are dropped 403 * remove an ordered extent from the tree. No references are dropped
294 * and you must wake_up entry->wait. You must hold the tree mutex 404 * and you must wake_up entry->wait. You must hold the tree lock
295 * while you call this function. 405 * while you call this function.
296 */ 406 */
297static int __btrfs_remove_ordered_extent(struct inode *inode, 407static int __btrfs_remove_ordered_extent(struct inode *inode,
298 struct btrfs_ordered_extent *entry) 408 struct btrfs_ordered_extent *entry)
299{ 409{
300 struct btrfs_ordered_inode_tree *tree; 410 struct btrfs_ordered_inode_tree *tree;
411 struct btrfs_root *root = BTRFS_I(inode)->root;
301 struct rb_node *node; 412 struct rb_node *node;
302 413
303 tree = &BTRFS_I(inode)->ordered_tree; 414 tree = &BTRFS_I(inode)->ordered_tree;
@@ -306,13 +417,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
306 tree->last = NULL; 417 tree->last = NULL;
307 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 418 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
308 419
309 spin_lock(&BTRFS_I(inode)->accounting_lock); 420 spin_lock(&root->fs_info->ordered_extent_lock);
310 BTRFS_I(inode)->outstanding_extents--;
311 spin_unlock(&BTRFS_I(inode)->accounting_lock);
312 btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root,
313 inode, 1);
314
315 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
316 list_del_init(&entry->root_extent_list); 421 list_del_init(&entry->root_extent_list);
317 422
318 /* 423 /*
@@ -324,7 +429,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
324 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { 429 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
325 list_del_init(&BTRFS_I(inode)->ordered_operations); 430 list_del_init(&BTRFS_I(inode)->ordered_operations);
326 } 431 }
327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 432 spin_unlock(&root->fs_info->ordered_extent_lock);
328 433
329 return 0; 434 return 0;
330} 435}
@@ -340,9 +445,9 @@ int btrfs_remove_ordered_extent(struct inode *inode,
340 int ret; 445 int ret;
341 446
342 tree = &BTRFS_I(inode)->ordered_tree; 447 tree = &BTRFS_I(inode)->ordered_tree;
343 mutex_lock(&tree->mutex); 448 spin_lock(&tree->lock);
344 ret = __btrfs_remove_ordered_extent(inode, entry); 449 ret = __btrfs_remove_ordered_extent(inode, entry);
345 mutex_unlock(&tree->mutex); 450 spin_unlock(&tree->lock);
346 wake_up(&entry->wait); 451 wake_up(&entry->wait);
347 452
348 return ret; 453 return ret;
@@ -485,7 +590,8 @@ void btrfs_start_ordered_extent(struct inode *inode,
485 * start IO on any dirty ones so the wait doesn't stall waiting 590 * start IO on any dirty ones so the wait doesn't stall waiting
486 * for pdflush to find them 591 * for pdflush to find them
487 */ 592 */
488 filemap_fdatawrite_range(inode->i_mapping, start, end); 593 if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
594 filemap_fdatawrite_range(inode->i_mapping, start, end);
489 if (wait) { 595 if (wait) {
490 wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, 596 wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
491 &entry->flags)); 597 &entry->flags));
@@ -499,7 +605,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
499{ 605{
500 u64 end; 606 u64 end;
501 u64 orig_end; 607 u64 orig_end;
502 u64 wait_end;
503 struct btrfs_ordered_extent *ordered; 608 struct btrfs_ordered_extent *ordered;
504 int found; 609 int found;
505 610
@@ -510,7 +615,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
510 if (orig_end > INT_LIMIT(loff_t)) 615 if (orig_end > INT_LIMIT(loff_t))
511 orig_end = INT_LIMIT(loff_t); 616 orig_end = INT_LIMIT(loff_t);
512 } 617 }
513 wait_end = orig_end;
514again: 618again:
515 /* start IO across the range first to instantiate any delalloc 619 /* start IO across the range first to instantiate any delalloc
516 * extents 620 * extents
@@ -567,7 +671,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
567 struct btrfs_ordered_extent *entry = NULL; 671 struct btrfs_ordered_extent *entry = NULL;
568 672
569 tree = &BTRFS_I(inode)->ordered_tree; 673 tree = &BTRFS_I(inode)->ordered_tree;
570 mutex_lock(&tree->mutex); 674 spin_lock(&tree->lock);
571 node = tree_search(tree, file_offset); 675 node = tree_search(tree, file_offset);
572 if (!node) 676 if (!node)
573 goto out; 677 goto out;
@@ -578,7 +682,48 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
578 if (entry) 682 if (entry)
579 atomic_inc(&entry->refs); 683 atomic_inc(&entry->refs);
580out: 684out:
581 mutex_unlock(&tree->mutex); 685 spin_unlock(&tree->lock);
686 return entry;
687}
688
689/* Since the DIO code tries to lock a wide area we need to look for any ordered
690 * extents that exist in the range, rather than just the start of the range.
691 */
692struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
693 u64 file_offset,
694 u64 len)
695{
696 struct btrfs_ordered_inode_tree *tree;
697 struct rb_node *node;
698 struct btrfs_ordered_extent *entry = NULL;
699
700 tree = &BTRFS_I(inode)->ordered_tree;
701 spin_lock(&tree->lock);
702 node = tree_search(tree, file_offset);
703 if (!node) {
704 node = tree_search(tree, file_offset + len);
705 if (!node)
706 goto out;
707 }
708
709 while (1) {
710 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
711 if (range_overlaps(entry, file_offset, len))
712 break;
713
714 if (entry->file_offset >= file_offset + len) {
715 entry = NULL;
716 break;
717 }
718 entry = NULL;
719 node = rb_next(node);
720 if (!node)
721 break;
722 }
723out:
724 if (entry)
725 atomic_inc(&entry->refs);
726 spin_unlock(&tree->lock);
582 return entry; 727 return entry;
583} 728}
584 729
@@ -594,7 +739,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
594 struct btrfs_ordered_extent *entry = NULL; 739 struct btrfs_ordered_extent *entry = NULL;
595 740
596 tree = &BTRFS_I(inode)->ordered_tree; 741 tree = &BTRFS_I(inode)->ordered_tree;
597 mutex_lock(&tree->mutex); 742 spin_lock(&tree->lock);
598 node = tree_search(tree, file_offset); 743 node = tree_search(tree, file_offset);
599 if (!node) 744 if (!node)
600 goto out; 745 goto out;
@@ -602,7 +747,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
602 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 747 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
603 atomic_inc(&entry->refs); 748 atomic_inc(&entry->refs);
604out: 749out:
605 mutex_unlock(&tree->mutex); 750 spin_unlock(&tree->lock);
606 return entry; 751 return entry;
607} 752}
608 753
@@ -629,7 +774,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
629 else 774 else
630 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); 775 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
631 776
632 mutex_lock(&tree->mutex); 777 spin_lock(&tree->lock);
633 disk_i_size = BTRFS_I(inode)->disk_i_size; 778 disk_i_size = BTRFS_I(inode)->disk_i_size;
634 779
635 /* truncate file */ 780 /* truncate file */
@@ -735,7 +880,7 @@ out:
735 */ 880 */
736 if (ordered) 881 if (ordered)
737 __btrfs_remove_ordered_extent(inode, ordered); 882 __btrfs_remove_ordered_extent(inode, ordered);
738 mutex_unlock(&tree->mutex); 883 spin_unlock(&tree->lock);
739 if (ordered) 884 if (ordered)
740 wake_up(&ordered->wait); 885 wake_up(&ordered->wait);
741 return ret; 886 return ret;
@@ -762,7 +907,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
762 if (!ordered) 907 if (!ordered)
763 return 1; 908 return 1;
764 909
765 mutex_lock(&tree->mutex); 910 spin_lock(&tree->lock);
766 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { 911 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
767 if (disk_bytenr >= ordered_sum->bytenr) { 912 if (disk_bytenr >= ordered_sum->bytenr) {
768 num_sectors = ordered_sum->len / sectorsize; 913 num_sectors = ordered_sum->len / sectorsize;
@@ -777,7 +922,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
777 } 922 }
778 } 923 }
779out: 924out:
780 mutex_unlock(&tree->mutex); 925 spin_unlock(&tree->lock);
781 btrfs_put_ordered_extent(ordered); 926 btrfs_put_ordered_extent(ordered);
782 return ret; 927 return ret;
783} 928}