diff options
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r-- | fs/btrfs/ordered-data.c | 124 |
1 files changed, 120 insertions, 4 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a20940170274..d6f0806c682f 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -310,6 +310,16 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
310 | 310 | ||
311 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 311 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
312 | list_del_init(&entry->root_extent_list); | 312 | list_del_init(&entry->root_extent_list); |
313 | |||
314 | /* | ||
315 | * we have no more ordered extents for this inode and | ||
316 | * no dirty pages. We can safely remove it from the | ||
317 | * list of ordered extents | ||
318 | */ | ||
319 | if (RB_EMPTY_ROOT(&tree->tree) && | ||
320 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { | ||
321 | list_del_init(&BTRFS_I(inode)->ordered_operations); | ||
322 | } | ||
313 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 323 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
314 | 324 | ||
315 | mutex_unlock(&tree->mutex); | 325 | mutex_unlock(&tree->mutex); |
@@ -370,6 +380,68 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) | |||
370 | } | 380 | } |
371 | 381 | ||
372 | /* | 382 | /* |
383 | * this is used during transaction commit to write all the inodes | ||
384 | * added to the ordered operation list. These files must be fully on | ||
385 | * disk before the transaction commits. | ||
386 | * | ||
387 | * we have two modes here, one is to just start the IO via filemap_flush | ||
388 | * and the other is to wait for all the io. When we wait, we have an | ||
389 | * extra check to make sure the ordered operation list really is empty | ||
390 | * before we return | ||
391 | */ | ||
392 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) | ||
393 | { | ||
394 | struct btrfs_inode *btrfs_inode; | ||
395 | struct inode *inode; | ||
396 | struct list_head splice; | ||
397 | |||
398 | INIT_LIST_HEAD(&splice); | ||
399 | |||
400 | mutex_lock(&root->fs_info->ordered_operations_mutex); | ||
401 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
402 | again: | ||
403 | list_splice_init(&root->fs_info->ordered_operations, &splice); | ||
404 | |||
405 | while (!list_empty(&splice)) { | ||
406 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
407 | ordered_operations); | ||
408 | |||
409 | inode = &btrfs_inode->vfs_inode; | ||
410 | |||
411 | list_del_init(&btrfs_inode->ordered_operations); | ||
412 | |||
413 | /* | ||
414 | * the inode may be getting freed (in sys_unlink path). | ||
415 | */ | ||
416 | inode = igrab(inode); | ||
417 | |||
418 | if (!wait && inode) { | ||
419 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | ||
420 | &root->fs_info->ordered_operations); | ||
421 | } | ||
422 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
423 | |||
424 | if (inode) { | ||
425 | if (wait) | ||
426 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
427 | else | ||
428 | filemap_flush(inode->i_mapping); | ||
429 | iput(inode); | ||
430 | } | ||
431 | |||
432 | cond_resched(); | ||
433 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
434 | } | ||
435 | if (wait && !list_empty(&root->fs_info->ordered_operations)) | ||
436 | goto again; | ||
437 | |||
438 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
439 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | ||
440 | |||
441 | return 0; | ||
442 | } | ||
443 | |||
444 | /* | ||
373 | * Used to start IO or wait for a given ordered extent to finish. | 445 | * Used to start IO or wait for a given ordered extent to finish. |
374 | * | 446 | * |
375 | * If wait is one, this effectively waits on page writeback for all the pages | 447 | * If wait is one, this effectively waits on page writeback for all the pages |
@@ -417,7 +489,7 @@ again: | |||
417 | /* start IO across the range first to instantiate any delalloc | 489 | /* start IO across the range first to instantiate any delalloc |
418 | * extents | 490 | * extents |
419 | */ | 491 | */ |
420 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); | 492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); |
421 | 493 | ||
422 | /* The compression code will leave pages locked but return from | 494 | /* The compression code will leave pages locked but return from |
423 | * writepage without setting the page writeback. Starting again | 495 | * writepage without setting the page writeback. Starting again |
@@ -613,7 +685,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
613 | struct btrfs_sector_sum *sector_sums; | 685 | struct btrfs_sector_sum *sector_sums; |
614 | struct btrfs_ordered_extent *ordered; | 686 | struct btrfs_ordered_extent *ordered; |
615 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; | 687 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; |
616 | struct list_head *cur; | ||
617 | unsigned long num_sectors; | 688 | unsigned long num_sectors; |
618 | unsigned long i; | 689 | unsigned long i; |
619 | u32 sectorsize = BTRFS_I(inode)->root->sectorsize; | 690 | u32 sectorsize = BTRFS_I(inode)->root->sectorsize; |
@@ -624,8 +695,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
624 | return 1; | 695 | return 1; |
625 | 696 | ||
626 | mutex_lock(&tree->mutex); | 697 | mutex_lock(&tree->mutex); |
627 | list_for_each_prev(cur, &ordered->list) { | 698 | list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { |
628 | ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list); | ||
629 | if (disk_bytenr >= ordered_sum->bytenr) { | 699 | if (disk_bytenr >= ordered_sum->bytenr) { |
630 | num_sectors = ordered_sum->len / sectorsize; | 700 | num_sectors = ordered_sum->len / sectorsize; |
631 | sector_sums = ordered_sum->sums; | 701 | sector_sums = ordered_sum->sums; |
@@ -728,3 +798,49 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | |||
728 | 798 | ||
729 | return ret; | 799 | return ret; |
730 | } | 800 | } |
801 | |||
802 | /* | ||
803 | * add a given inode to the list of inodes that must be fully on | ||
804 | * disk before a transaction commit finishes. | ||
805 | * | ||
806 | * This basically gives us the ext3 style data=ordered mode, and it is mostly | ||
807 | * used to make sure renamed files are fully on disk. | ||
808 | * | ||
809 | * It is a noop if the inode is already fully on disk. | ||
810 | * | ||
811 | * If trans is not null, we'll do a friendly check for a transaction that | ||
812 | * is already flushing things and force the IO down ourselves. | ||
813 | */ | ||
814 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | ||
815 | struct btrfs_root *root, | ||
816 | struct inode *inode) | ||
817 | { | ||
818 | u64 last_mod; | ||
819 | |||
820 | last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); | ||
821 | |||
822 | /* | ||
823 | * if this file hasn't been changed since the last transaction | ||
824 | * commit, we can safely return without doing anything | ||
825 | */ | ||
826 | if (last_mod < root->fs_info->last_trans_committed) | ||
827 | return 0; | ||
828 | |||
829 | /* | ||
830 | * the transaction is already committing. Just start the IO and | ||
831 | * don't bother with all of this list nonsense | ||
832 | */ | ||
833 | if (trans && root->fs_info->running_transaction->blocked) { | ||
834 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
835 | return 0; | ||
836 | } | ||
837 | |||
838 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
839 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { | ||
840 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | ||
841 | &root->fs_info->ordered_operations); | ||
842 | } | ||
843 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
844 | |||
845 | return 0; | ||
846 | } | ||