diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-28 18:31:05 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-28 18:31:05 -0400 |
commit | 212a17ab878305600e607f637d2d8a49d9f7ef25 (patch) | |
tree | 64ad97fcc3d5c49ebd735f5508643c798aeecddf /fs | |
parent | baaca1a61497d97cec595fedce03b0a23b983e64 (diff) | |
parent | d9d04879321af570ea7285c6dad92d9c3cd108a1 (diff) |
Merge branch 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (45 commits)
Btrfs: fix __btrfs_map_block on 32 bit machines
btrfs: fix possible deadlock by clearing __GFP_FS flag
btrfs: check link counter overflow in link(2)
btrfs: don't mess with i_nlink of unlocked inode in rename()
Btrfs: check return value of btrfs_alloc_path()
Btrfs: fix OOPS of empty filesystem after balance
Btrfs: fix memory leak of empty filesystem after balance
Btrfs: fix return value of setflags ioctl
Btrfs: fix uncheck memory allocations
btrfs: make inode ref log recovery faster
Btrfs: add btrfs_trim_fs() to handle FITRIM
Btrfs: adjust btrfs_discard_extent() return errors and trimmed bytes
Btrfs: make btrfs_map_block() return entire free extent for each device of RAID0/1/10/DUP
Btrfs: make update_reserved_bytes() public
btrfs: return EXDEV when linking from different subvolumes
Btrfs: Per file/directory controls for COW and compression
Btrfs: add datacow flag in inode flag
btrfs: use GFP_NOFS instead of GFP_KERNEL
Btrfs: check return value of read_tree_block()
btrfs: properly access unaligned checksum buffer
...
Fix up trivial conflicts in fs/btrfs/volumes.c due to plug removal in
the block layer.
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 3 | ||||
-rw-r--r-- | fs/btrfs/compression.c | 17 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 159 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 19 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.c | 6 | ||||
-rw-r--r-- | fs/btrfs/dir-item.c | 45 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 132 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 229 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 3 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 1 | ||||
-rw-r--r-- | fs/btrfs/file-item.c | 5 | ||||
-rw-r--r-- | fs/btrfs/file.c | 388 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 510 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.h | 2 | ||||
-rw-r--r-- | fs/btrfs/inode-map.c | 3 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 366 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 100 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 8 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 8 | ||||
-rw-r--r-- | fs/btrfs/root-tree.c | 6 | ||||
-rw-r--r-- | fs/btrfs/super.c | 5 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 14 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 57 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 164 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 12 | ||||
-rw-r--r-- | fs/btrfs/xattr.c | 2 |
26 files changed, 1460 insertions, 804 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index ccc991c542df..57c3bb2884ce 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -136,9 +136,8 @@ struct btrfs_inode { | |||
136 | * items we think we'll end up using, and reserved_extents is the number | 136 | * items we think we'll end up using, and reserved_extents is the number |
137 | * of extent items we've reserved metadata for. | 137 | * of extent items we've reserved metadata for. |
138 | */ | 138 | */ |
139 | spinlock_t accounting_lock; | ||
140 | atomic_t outstanding_extents; | 139 | atomic_t outstanding_extents; |
141 | int reserved_extents; | 140 | atomic_t reserved_extents; |
142 | 141 | ||
143 | /* | 142 | /* |
144 | * ordered_data_close is set by truncate when a file that used | 143 | * ordered_data_close is set by truncate when a file that used |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 4d2110eafe29..41d1d7c70e29 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -340,6 +340,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
340 | 340 | ||
341 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); | 341 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); |
342 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 342 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
343 | if (!cb) | ||
344 | return -ENOMEM; | ||
343 | atomic_set(&cb->pending_bios, 0); | 345 | atomic_set(&cb->pending_bios, 0); |
344 | cb->errors = 0; | 346 | cb->errors = 0; |
345 | cb->inode = inode; | 347 | cb->inode = inode; |
@@ -354,6 +356,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
354 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 356 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
355 | 357 | ||
356 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); | 358 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); |
359 | if(!bio) { | ||
360 | kfree(cb); | ||
361 | return -ENOMEM; | ||
362 | } | ||
357 | bio->bi_private = cb; | 363 | bio->bi_private = cb; |
358 | bio->bi_end_io = end_compressed_bio_write; | 364 | bio->bi_end_io = end_compressed_bio_write; |
359 | atomic_inc(&cb->pending_bios); | 365 | atomic_inc(&cb->pending_bios); |
@@ -657,8 +663,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
657 | atomic_inc(&cb->pending_bios); | 663 | atomic_inc(&cb->pending_bios); |
658 | 664 | ||
659 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | 665 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
660 | btrfs_lookup_bio_sums(root, inode, comp_bio, | 666 | ret = btrfs_lookup_bio_sums(root, inode, |
661 | sums); | 667 | comp_bio, sums); |
668 | BUG_ON(ret); | ||
662 | } | 669 | } |
663 | sums += (comp_bio->bi_size + root->sectorsize - 1) / | 670 | sums += (comp_bio->bi_size + root->sectorsize - 1) / |
664 | root->sectorsize; | 671 | root->sectorsize; |
@@ -683,8 +690,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
683 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | 690 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); |
684 | BUG_ON(ret); | 691 | BUG_ON(ret); |
685 | 692 | ||
686 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) | 693 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
687 | btrfs_lookup_bio_sums(root, inode, comp_bio, sums); | 694 | ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums); |
695 | BUG_ON(ret); | ||
696 | } | ||
688 | 697 | ||
689 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); | 698 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); |
690 | BUG_ON(ret); | 699 | BUG_ON(ret); |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b5baff0dccfe..84d7ca1fe0ba 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -147,10 +147,11 @@ noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) | |||
147 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root) | 147 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root) |
148 | { | 148 | { |
149 | struct extent_buffer *eb; | 149 | struct extent_buffer *eb; |
150 | spin_lock(&root->node_lock); | 150 | |
151 | eb = root->node; | 151 | rcu_read_lock(); |
152 | eb = rcu_dereference(root->node); | ||
152 | extent_buffer_get(eb); | 153 | extent_buffer_get(eb); |
153 | spin_unlock(&root->node_lock); | 154 | rcu_read_unlock(); |
154 | return eb; | 155 | return eb; |
155 | } | 156 | } |
156 | 157 | ||
@@ -165,14 +166,8 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | |||
165 | while (1) { | 166 | while (1) { |
166 | eb = btrfs_root_node(root); | 167 | eb = btrfs_root_node(root); |
167 | btrfs_tree_lock(eb); | 168 | btrfs_tree_lock(eb); |
168 | 169 | if (eb == root->node) | |
169 | spin_lock(&root->node_lock); | ||
170 | if (eb == root->node) { | ||
171 | spin_unlock(&root->node_lock); | ||
172 | break; | 170 | break; |
173 | } | ||
174 | spin_unlock(&root->node_lock); | ||
175 | |||
176 | btrfs_tree_unlock(eb); | 171 | btrfs_tree_unlock(eb); |
177 | free_extent_buffer(eb); | 172 | free_extent_buffer(eb); |
178 | } | 173 | } |
@@ -458,10 +453,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
458 | else | 453 | else |
459 | parent_start = 0; | 454 | parent_start = 0; |
460 | 455 | ||
461 | spin_lock(&root->node_lock); | ||
462 | root->node = cow; | ||
463 | extent_buffer_get(cow); | 456 | extent_buffer_get(cow); |
464 | spin_unlock(&root->node_lock); | 457 | rcu_assign_pointer(root->node, cow); |
465 | 458 | ||
466 | btrfs_free_tree_block(trans, root, buf, parent_start, | 459 | btrfs_free_tree_block(trans, root, buf, parent_start, |
467 | last_ref); | 460 | last_ref); |
@@ -542,6 +535,9 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
542 | 535 | ||
543 | ret = __btrfs_cow_block(trans, root, buf, parent, | 536 | ret = __btrfs_cow_block(trans, root, buf, parent, |
544 | parent_slot, cow_ret, search_start, 0); | 537 | parent_slot, cow_ret, search_start, 0); |
538 | |||
539 | trace_btrfs_cow_block(root, buf, *cow_ret); | ||
540 | |||
545 | return ret; | 541 | return ret; |
546 | } | 542 | } |
547 | 543 | ||
@@ -686,6 +682,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
686 | if (!cur) { | 682 | if (!cur) { |
687 | cur = read_tree_block(root, blocknr, | 683 | cur = read_tree_block(root, blocknr, |
688 | blocksize, gen); | 684 | blocksize, gen); |
685 | if (!cur) | ||
686 | return -EIO; | ||
689 | } else if (!uptodate) { | 687 | } else if (!uptodate) { |
690 | btrfs_read_buffer(cur, gen); | 688 | btrfs_read_buffer(cur, gen); |
691 | } | 689 | } |
@@ -732,122 +730,6 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, | |||
732 | return btrfs_item_offset_nr(leaf, nr - 1); | 730 | return btrfs_item_offset_nr(leaf, nr - 1); |
733 | } | 731 | } |
734 | 732 | ||
735 | /* | ||
736 | * extra debugging checks to make sure all the items in a key are | ||
737 | * well formed and in the proper order | ||
738 | */ | ||
739 | static int check_node(struct btrfs_root *root, struct btrfs_path *path, | ||
740 | int level) | ||
741 | { | ||
742 | struct extent_buffer *parent = NULL; | ||
743 | struct extent_buffer *node = path->nodes[level]; | ||
744 | struct btrfs_disk_key parent_key; | ||
745 | struct btrfs_disk_key node_key; | ||
746 | int parent_slot; | ||
747 | int slot; | ||
748 | struct btrfs_key cpukey; | ||
749 | u32 nritems = btrfs_header_nritems(node); | ||
750 | |||
751 | if (path->nodes[level + 1]) | ||
752 | parent = path->nodes[level + 1]; | ||
753 | |||
754 | slot = path->slots[level]; | ||
755 | BUG_ON(nritems == 0); | ||
756 | if (parent) { | ||
757 | parent_slot = path->slots[level + 1]; | ||
758 | btrfs_node_key(parent, &parent_key, parent_slot); | ||
759 | btrfs_node_key(node, &node_key, 0); | ||
760 | BUG_ON(memcmp(&parent_key, &node_key, | ||
761 | sizeof(struct btrfs_disk_key))); | ||
762 | BUG_ON(btrfs_node_blockptr(parent, parent_slot) != | ||
763 | btrfs_header_bytenr(node)); | ||
764 | } | ||
765 | BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); | ||
766 | if (slot != 0) { | ||
767 | btrfs_node_key_to_cpu(node, &cpukey, slot - 1); | ||
768 | btrfs_node_key(node, &node_key, slot); | ||
769 | BUG_ON(comp_keys(&node_key, &cpukey) <= 0); | ||
770 | } | ||
771 | if (slot < nritems - 1) { | ||
772 | btrfs_node_key_to_cpu(node, &cpukey, slot + 1); | ||
773 | btrfs_node_key(node, &node_key, slot); | ||
774 | BUG_ON(comp_keys(&node_key, &cpukey) >= 0); | ||
775 | } | ||
776 | return 0; | ||
777 | } | ||
778 | |||
779 | /* | ||
780 | * extra checking to make sure all the items in a leaf are | ||
781 | * well formed and in the proper order | ||
782 | */ | ||
783 | static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, | ||
784 | int level) | ||
785 | { | ||
786 | struct extent_buffer *leaf = path->nodes[level]; | ||
787 | struct extent_buffer *parent = NULL; | ||
788 | int parent_slot; | ||
789 | struct btrfs_key cpukey; | ||
790 | struct btrfs_disk_key parent_key; | ||
791 | struct btrfs_disk_key leaf_key; | ||
792 | int slot = path->slots[0]; | ||
793 | |||
794 | u32 nritems = btrfs_header_nritems(leaf); | ||
795 | |||
796 | if (path->nodes[level + 1]) | ||
797 | parent = path->nodes[level + 1]; | ||
798 | |||
799 | if (nritems == 0) | ||
800 | return 0; | ||
801 | |||
802 | if (parent) { | ||
803 | parent_slot = path->slots[level + 1]; | ||
804 | btrfs_node_key(parent, &parent_key, parent_slot); | ||
805 | btrfs_item_key(leaf, &leaf_key, 0); | ||
806 | |||
807 | BUG_ON(memcmp(&parent_key, &leaf_key, | ||
808 | sizeof(struct btrfs_disk_key))); | ||
809 | BUG_ON(btrfs_node_blockptr(parent, parent_slot) != | ||
810 | btrfs_header_bytenr(leaf)); | ||
811 | } | ||
812 | if (slot != 0 && slot < nritems - 1) { | ||
813 | btrfs_item_key(leaf, &leaf_key, slot); | ||
814 | btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); | ||
815 | if (comp_keys(&leaf_key, &cpukey) <= 0) { | ||
816 | btrfs_print_leaf(root, leaf); | ||
817 | printk(KERN_CRIT "slot %d offset bad key\n", slot); | ||
818 | BUG_ON(1); | ||
819 | } | ||
820 | if (btrfs_item_offset_nr(leaf, slot - 1) != | ||
821 | btrfs_item_end_nr(leaf, slot)) { | ||
822 | btrfs_print_leaf(root, leaf); | ||
823 | printk(KERN_CRIT "slot %d offset bad\n", slot); | ||
824 | BUG_ON(1); | ||
825 | } | ||
826 | } | ||
827 | if (slot < nritems - 1) { | ||
828 | btrfs_item_key(leaf, &leaf_key, slot); | ||
829 | btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); | ||
830 | BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); | ||
831 | if (btrfs_item_offset_nr(leaf, slot) != | ||
832 | btrfs_item_end_nr(leaf, slot + 1)) { | ||
833 | btrfs_print_leaf(root, leaf); | ||
834 | printk(KERN_CRIT "slot %d offset bad\n", slot); | ||
835 | BUG_ON(1); | ||
836 | } | ||
837 | } | ||
838 | BUG_ON(btrfs_item_offset_nr(leaf, 0) + | ||
839 | btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); | ||
840 | return 0; | ||
841 | } | ||
842 | |||
843 | static noinline int check_block(struct btrfs_root *root, | ||
844 | struct btrfs_path *path, int level) | ||
845 | { | ||
846 | return 0; | ||
847 | if (level == 0) | ||
848 | return check_leaf(root, path, level); | ||
849 | return check_node(root, path, level); | ||
850 | } | ||
851 | 733 | ||
852 | /* | 734 | /* |
853 | * search for key in the extent_buffer. The items start at offset p, | 735 | * search for key in the extent_buffer. The items start at offset p, |
@@ -1046,9 +928,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1046 | goto enospc; | 928 | goto enospc; |
1047 | } | 929 | } |
1048 | 930 | ||
1049 | spin_lock(&root->node_lock); | 931 | rcu_assign_pointer(root->node, child); |
1050 | root->node = child; | ||
1051 | spin_unlock(&root->node_lock); | ||
1052 | 932 | ||
1053 | add_root_to_dirty_list(root); | 933 | add_root_to_dirty_list(root); |
1054 | btrfs_tree_unlock(child); | 934 | btrfs_tree_unlock(child); |
@@ -1188,7 +1068,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1188 | } | 1068 | } |
1189 | } | 1069 | } |
1190 | /* double check we haven't messed things up */ | 1070 | /* double check we haven't messed things up */ |
1191 | check_block(root, path, level); | ||
1192 | if (orig_ptr != | 1071 | if (orig_ptr != |
1193 | btrfs_node_blockptr(path->nodes[level], path->slots[level])) | 1072 | btrfs_node_blockptr(path->nodes[level], path->slots[level])) |
1194 | BUG(); | 1073 | BUG(); |
@@ -1798,12 +1677,6 @@ cow_done: | |||
1798 | if (!cow) | 1677 | if (!cow) |
1799 | btrfs_unlock_up_safe(p, level + 1); | 1678 | btrfs_unlock_up_safe(p, level + 1); |
1800 | 1679 | ||
1801 | ret = check_block(root, p, level); | ||
1802 | if (ret) { | ||
1803 | ret = -1; | ||
1804 | goto done; | ||
1805 | } | ||
1806 | |||
1807 | ret = bin_search(b, key, level, &slot); | 1680 | ret = bin_search(b, key, level, &slot); |
1808 | 1681 | ||
1809 | if (level != 0) { | 1682 | if (level != 0) { |
@@ -2130,10 +2003,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2130 | 2003 | ||
2131 | btrfs_mark_buffer_dirty(c); | 2004 | btrfs_mark_buffer_dirty(c); |
2132 | 2005 | ||
2133 | spin_lock(&root->node_lock); | ||
2134 | old = root->node; | 2006 | old = root->node; |
2135 | root->node = c; | 2007 | rcu_assign_pointer(root->node, c); |
2136 | spin_unlock(&root->node_lock); | ||
2137 | 2008 | ||
2138 | /* the super has an extra ref to root->node */ | 2009 | /* the super has an extra ref to root->node */ |
2139 | free_extent_buffer(old); | 2010 | free_extent_buffer(old); |
@@ -3840,7 +3711,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3840 | unsigned long ptr; | 3711 | unsigned long ptr; |
3841 | 3712 | ||
3842 | path = btrfs_alloc_path(); | 3713 | path = btrfs_alloc_path(); |
3843 | BUG_ON(!path); | 3714 | if (!path) |
3715 | return -ENOMEM; | ||
3844 | ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); | 3716 | ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); |
3845 | if (!ret) { | 3717 | if (!ret) { |
3846 | leaf = path->nodes[0]; | 3718 | leaf = path->nodes[0]; |
@@ -4217,6 +4089,7 @@ find_next_key: | |||
4217 | } | 4089 | } |
4218 | btrfs_set_path_blocking(path); | 4090 | btrfs_set_path_blocking(path); |
4219 | cur = read_node_slot(root, cur, slot); | 4091 | cur = read_node_slot(root, cur, slot); |
4092 | BUG_ON(!cur); | ||
4220 | 4093 | ||
4221 | btrfs_tree_lock(cur); | 4094 | btrfs_tree_lock(cur); |
4222 | 4095 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7f78cc78fdd0..d47ce8307854 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/wait.h> | 28 | #include <linux/wait.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/kobject.h> | 30 | #include <linux/kobject.h> |
31 | #include <trace/events/btrfs.h> | ||
31 | #include <asm/kmap_types.h> | 32 | #include <asm/kmap_types.h> |
32 | #include "extent_io.h" | 33 | #include "extent_io.h" |
33 | #include "extent_map.h" | 34 | #include "extent_map.h" |
@@ -40,6 +41,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep; | |||
40 | extern struct kmem_cache *btrfs_transaction_cachep; | 41 | extern struct kmem_cache *btrfs_transaction_cachep; |
41 | extern struct kmem_cache *btrfs_bit_radix_cachep; | 42 | extern struct kmem_cache *btrfs_bit_radix_cachep; |
42 | extern struct kmem_cache *btrfs_path_cachep; | 43 | extern struct kmem_cache *btrfs_path_cachep; |
44 | extern struct kmem_cache *btrfs_free_space_cachep; | ||
43 | struct btrfs_ordered_sum; | 45 | struct btrfs_ordered_sum; |
44 | 46 | ||
45 | #define BTRFS_MAGIC "_BHRfS_M" | 47 | #define BTRFS_MAGIC "_BHRfS_M" |
@@ -782,9 +784,6 @@ struct btrfs_free_cluster { | |||
782 | /* first extent starting offset */ | 784 | /* first extent starting offset */ |
783 | u64 window_start; | 785 | u64 window_start; |
784 | 786 | ||
785 | /* if this cluster simply points at a bitmap in the block group */ | ||
786 | bool points_to_bitmap; | ||
787 | |||
788 | struct btrfs_block_group_cache *block_group; | 787 | struct btrfs_block_group_cache *block_group; |
789 | /* | 788 | /* |
790 | * when a cluster is allocated from a block group, we put the | 789 | * when a cluster is allocated from a block group, we put the |
@@ -1283,6 +1282,7 @@ struct btrfs_root { | |||
1283 | #define BTRFS_INODE_NODUMP (1 << 8) | 1282 | #define BTRFS_INODE_NODUMP (1 << 8) |
1284 | #define BTRFS_INODE_NOATIME (1 << 9) | 1283 | #define BTRFS_INODE_NOATIME (1 << 9) |
1285 | #define BTRFS_INODE_DIRSYNC (1 << 10) | 1284 | #define BTRFS_INODE_DIRSYNC (1 << 10) |
1285 | #define BTRFS_INODE_COMPRESS (1 << 11) | ||
1286 | 1286 | ||
1287 | /* some macros to generate set/get funcs for the struct fields. This | 1287 | /* some macros to generate set/get funcs for the struct fields. This |
1288 | * assumes there is a lefoo_to_cpu for every type, so lets make a simple | 1288 | * assumes there is a lefoo_to_cpu for every type, so lets make a simple |
@@ -2157,6 +2157,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
2157 | u64 root_objectid, u64 owner, u64 offset); | 2157 | u64 root_objectid, u64 owner, u64 offset); |
2158 | 2158 | ||
2159 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 2159 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); |
2160 | int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
2161 | u64 num_bytes, int reserve, int sinfo); | ||
2160 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 2162 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
2161 | struct btrfs_root *root); | 2163 | struct btrfs_root *root); |
2162 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 2164 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
@@ -2227,10 +2229,12 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); | |||
2227 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, | 2229 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, |
2228 | u64 start, u64 end); | 2230 | u64 start, u64 end); |
2229 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | 2231 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, |
2230 | u64 num_bytes); | 2232 | u64 num_bytes, u64 *actual_bytes); |
2231 | int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, | 2233 | int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, |
2232 | struct btrfs_root *root, u64 type); | 2234 | struct btrfs_root *root, u64 type); |
2235 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); | ||
2233 | 2236 | ||
2237 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | ||
2234 | /* ctree.c */ | 2238 | /* ctree.c */ |
2235 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2239 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2236 | int level, int *slot); | 2240 | int level, int *slot); |
@@ -2392,6 +2396,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
2392 | struct btrfs_path *path, u64 dir, | 2396 | struct btrfs_path *path, u64 dir, |
2393 | const char *name, u16 name_len, | 2397 | const char *name, u16 name_len, |
2394 | int mod); | 2398 | int mod); |
2399 | int verify_dir_item(struct btrfs_root *root, | ||
2400 | struct extent_buffer *leaf, | ||
2401 | struct btrfs_dir_item *dir_item); | ||
2395 | 2402 | ||
2396 | /* orphan.c */ | 2403 | /* orphan.c */ |
2397 | int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | 2404 | int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, |
@@ -2528,7 +2535,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2528 | struct inode *inode); | 2535 | struct inode *inode); |
2529 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | 2536 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); |
2530 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2537 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
2531 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2538 | int btrfs_orphan_cleanup(struct btrfs_root *root); |
2532 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | 2539 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, |
2533 | struct btrfs_pending_snapshot *pending, | 2540 | struct btrfs_pending_snapshot *pending, |
2534 | u64 *bytes_to_reserve); | 2541 | u64 *bytes_to_reserve); |
@@ -2536,7 +2543,7 @@ void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | |||
2536 | struct btrfs_pending_snapshot *pending); | 2543 | struct btrfs_pending_snapshot *pending); |
2537 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | 2544 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, |
2538 | struct btrfs_root *root); | 2545 | struct btrfs_root *root); |
2539 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2546 | int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); |
2540 | int btrfs_invalidate_inodes(struct btrfs_root *root); | 2547 | int btrfs_invalidate_inodes(struct btrfs_root *root); |
2541 | void btrfs_add_delayed_iput(struct inode *inode); | 2548 | void btrfs_add_delayed_iput(struct inode *inode); |
2542 | void btrfs_run_delayed_iputs(struct btrfs_root *root); | 2549 | void btrfs_run_delayed_iputs(struct btrfs_root *root); |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index e807b143b857..bce28f653899 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -483,6 +483,8 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans, | |||
483 | INIT_LIST_HEAD(&head_ref->cluster); | 483 | INIT_LIST_HEAD(&head_ref->cluster); |
484 | mutex_init(&head_ref->mutex); | 484 | mutex_init(&head_ref->mutex); |
485 | 485 | ||
486 | trace_btrfs_delayed_ref_head(ref, head_ref, action); | ||
487 | |||
486 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); | 488 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); |
487 | 489 | ||
488 | if (existing) { | 490 | if (existing) { |
@@ -537,6 +539,8 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
537 | } | 539 | } |
538 | full_ref->level = level; | 540 | full_ref->level = level; |
539 | 541 | ||
542 | trace_btrfs_delayed_tree_ref(ref, full_ref, action); | ||
543 | |||
540 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); | 544 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); |
541 | 545 | ||
542 | if (existing) { | 546 | if (existing) { |
@@ -591,6 +595,8 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
591 | full_ref->objectid = owner; | 595 | full_ref->objectid = owner; |
592 | full_ref->offset = offset; | 596 | full_ref->offset = offset; |
593 | 597 | ||
598 | trace_btrfs_delayed_data_ref(ref, full_ref, action); | ||
599 | |||
594 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); | 600 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); |
595 | 601 | ||
596 | if (existing) { | 602 | if (existing) { |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index f0cad5ae5be7..c62f02f6ae69 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -151,7 +151,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
151 | ret = PTR_ERR(dir_item); | 151 | ret = PTR_ERR(dir_item); |
152 | if (ret == -EEXIST) | 152 | if (ret == -EEXIST) |
153 | goto second_insert; | 153 | goto second_insert; |
154 | goto out; | 154 | goto out_free; |
155 | } | 155 | } |
156 | 156 | ||
157 | leaf = path->nodes[0]; | 157 | leaf = path->nodes[0]; |
@@ -170,7 +170,7 @@ second_insert: | |||
170 | /* FIXME, use some real flag for selecting the extra index */ | 170 | /* FIXME, use some real flag for selecting the extra index */ |
171 | if (root == root->fs_info->tree_root) { | 171 | if (root == root->fs_info->tree_root) { |
172 | ret = 0; | 172 | ret = 0; |
173 | goto out; | 173 | goto out_free; |
174 | } | 174 | } |
175 | btrfs_release_path(root, path); | 175 | btrfs_release_path(root, path); |
176 | 176 | ||
@@ -180,7 +180,7 @@ second_insert: | |||
180 | name, name_len); | 180 | name, name_len); |
181 | if (IS_ERR(dir_item)) { | 181 | if (IS_ERR(dir_item)) { |
182 | ret2 = PTR_ERR(dir_item); | 182 | ret2 = PTR_ERR(dir_item); |
183 | goto out; | 183 | goto out_free; |
184 | } | 184 | } |
185 | leaf = path->nodes[0]; | 185 | leaf = path->nodes[0]; |
186 | btrfs_cpu_key_to_disk(&disk_key, location); | 186 | btrfs_cpu_key_to_disk(&disk_key, location); |
@@ -192,7 +192,9 @@ second_insert: | |||
192 | name_ptr = (unsigned long)(dir_item + 1); | 192 | name_ptr = (unsigned long)(dir_item + 1); |
193 | write_extent_buffer(leaf, name, name_ptr, name_len); | 193 | write_extent_buffer(leaf, name, name_ptr, name_len); |
194 | btrfs_mark_buffer_dirty(leaf); | 194 | btrfs_mark_buffer_dirty(leaf); |
195 | out: | 195 | |
196 | out_free: | ||
197 | |||
196 | btrfs_free_path(path); | 198 | btrfs_free_path(path); |
197 | if (ret) | 199 | if (ret) |
198 | return ret; | 200 | return ret; |
@@ -377,6 +379,9 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | |||
377 | 379 | ||
378 | leaf = path->nodes[0]; | 380 | leaf = path->nodes[0]; |
379 | dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); | 381 | dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); |
382 | if (verify_dir_item(root, leaf, dir_item)) | ||
383 | return NULL; | ||
384 | |||
380 | total_len = btrfs_item_size_nr(leaf, path->slots[0]); | 385 | total_len = btrfs_item_size_nr(leaf, path->slots[0]); |
381 | while (cur < total_len) { | 386 | while (cur < total_len) { |
382 | this_len = sizeof(*dir_item) + | 387 | this_len = sizeof(*dir_item) + |
@@ -429,3 +434,35 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, | |||
429 | } | 434 | } |
430 | return ret; | 435 | return ret; |
431 | } | 436 | } |
437 | |||
438 | int verify_dir_item(struct btrfs_root *root, | ||
439 | struct extent_buffer *leaf, | ||
440 | struct btrfs_dir_item *dir_item) | ||
441 | { | ||
442 | u16 namelen = BTRFS_NAME_LEN; | ||
443 | u8 type = btrfs_dir_type(leaf, dir_item); | ||
444 | |||
445 | if (type >= BTRFS_FT_MAX) { | ||
446 | printk(KERN_CRIT "btrfs: invalid dir item type: %d\n", | ||
447 | (int)type); | ||
448 | return 1; | ||
449 | } | ||
450 | |||
451 | if (type == BTRFS_FT_XATTR) | ||
452 | namelen = XATTR_NAME_MAX; | ||
453 | |||
454 | if (btrfs_dir_name_len(leaf, dir_item) > namelen) { | ||
455 | printk(KERN_CRIT "btrfS: invalid dir item name len: %u\n", | ||
456 | (unsigned)btrfs_dir_data_len(leaf, dir_item)); | ||
457 | return 1; | ||
458 | } | ||
459 | |||
460 | /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ | ||
461 | if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) { | ||
462 | printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n", | ||
463 | (unsigned)btrfs_dir_data_len(leaf, dir_item)); | ||
464 | return 1; | ||
465 | } | ||
466 | |||
467 | return 0; | ||
468 | } | ||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 830d261d0e6b..d7a7315bd031 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/crc32c.h> | 29 | #include <linux/crc32c.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/migrate.h> | 31 | #include <linux/migrate.h> |
32 | #include <asm/unaligned.h> | ||
32 | #include "compat.h" | 33 | #include "compat.h" |
33 | #include "ctree.h" | 34 | #include "ctree.h" |
34 | #include "disk-io.h" | 35 | #include "disk-io.h" |
@@ -198,7 +199,7 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) | |||
198 | 199 | ||
199 | void btrfs_csum_final(u32 crc, char *result) | 200 | void btrfs_csum_final(u32 crc, char *result) |
200 | { | 201 | { |
201 | *(__le32 *)result = ~cpu_to_le32(crc); | 202 | put_unaligned_le32(~crc, result); |
202 | } | 203 | } |
203 | 204 | ||
204 | /* | 205 | /* |
@@ -323,6 +324,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
323 | int num_copies = 0; | 324 | int num_copies = 0; |
324 | int mirror_num = 0; | 325 | int mirror_num = 0; |
325 | 326 | ||
327 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | ||
326 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 328 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
327 | while (1) { | 329 | while (1) { |
328 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, | 330 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, |
@@ -331,6 +333,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
331 | !verify_parent_transid(io_tree, eb, parent_transid)) | 333 | !verify_parent_transid(io_tree, eb, parent_transid)) |
332 | return ret; | 334 | return ret; |
333 | 335 | ||
336 | /* | ||
337 | * This buffer's crc is fine, but its contents are corrupted, so | ||
338 | * there is no reason to read the other copies, they won't be | ||
339 | * any less wrong. | ||
340 | */ | ||
341 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) | ||
342 | return ret; | ||
343 | |||
334 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | 344 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, |
335 | eb->start, eb->len); | 345 | eb->start, eb->len); |
336 | if (num_copies == 1) | 346 | if (num_copies == 1) |
@@ -419,6 +429,73 @@ static int check_tree_block_fsid(struct btrfs_root *root, | |||
419 | return ret; | 429 | return ret; |
420 | } | 430 | } |
421 | 431 | ||
432 | #define CORRUPT(reason, eb, root, slot) \ | ||
433 | printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ | ||
434 | "root=%llu, slot=%d\n", reason, \ | ||
435 | (unsigned long long)btrfs_header_bytenr(eb), \ | ||
436 | (unsigned long long)root->objectid, slot) | ||
437 | |||
438 | static noinline int check_leaf(struct btrfs_root *root, | ||
439 | struct extent_buffer *leaf) | ||
440 | { | ||
441 | struct btrfs_key key; | ||
442 | struct btrfs_key leaf_key; | ||
443 | u32 nritems = btrfs_header_nritems(leaf); | ||
444 | int slot; | ||
445 | |||
446 | if (nritems == 0) | ||
447 | return 0; | ||
448 | |||
449 | /* Check the 0 item */ | ||
450 | if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != | ||
451 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
452 | CORRUPT("invalid item offset size pair", leaf, root, 0); | ||
453 | return -EIO; | ||
454 | } | ||
455 | |||
456 | /* | ||
457 | * Check to make sure each items keys are in the correct order and their | ||
458 | * offsets make sense. We only have to loop through nritems-1 because | ||
459 | * we check the current slot against the next slot, which verifies the | ||
460 | * next slot's offset+size makes sense and that the current's slot | ||
461 | * offset is correct. | ||
462 | */ | ||
463 | for (slot = 0; slot < nritems - 1; slot++) { | ||
464 | btrfs_item_key_to_cpu(leaf, &leaf_key, slot); | ||
465 | btrfs_item_key_to_cpu(leaf, &key, slot + 1); | ||
466 | |||
467 | /* Make sure the keys are in the right order */ | ||
468 | if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { | ||
469 | CORRUPT("bad key order", leaf, root, slot); | ||
470 | return -EIO; | ||
471 | } | ||
472 | |||
473 | /* | ||
474 | * Make sure the offset and ends are right, remember that the | ||
475 | * item data starts at the end of the leaf and grows towards the | ||
476 | * front. | ||
477 | */ | ||
478 | if (btrfs_item_offset_nr(leaf, slot) != | ||
479 | btrfs_item_end_nr(leaf, slot + 1)) { | ||
480 | CORRUPT("slot offset bad", leaf, root, slot); | ||
481 | return -EIO; | ||
482 | } | ||
483 | |||
484 | /* | ||
485 | * Check to make sure that we don't point outside of the leaf, | ||
486 | * just incase all the items are consistent to eachother, but | ||
487 | * all point outside of the leaf. | ||
488 | */ | ||
489 | if (btrfs_item_end_nr(leaf, slot) > | ||
490 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
491 | CORRUPT("slot end outside of leaf", leaf, root, slot); | ||
492 | return -EIO; | ||
493 | } | ||
494 | } | ||
495 | |||
496 | return 0; | ||
497 | } | ||
498 | |||
422 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 499 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
423 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) | 500 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) |
424 | { | 501 | { |
@@ -485,8 +562,20 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
485 | btrfs_set_buffer_lockdep_class(eb, found_level); | 562 | btrfs_set_buffer_lockdep_class(eb, found_level); |
486 | 563 | ||
487 | ret = csum_tree_block(root, eb, 1); | 564 | ret = csum_tree_block(root, eb, 1); |
488 | if (ret) | 565 | if (ret) { |
489 | ret = -EIO; | 566 | ret = -EIO; |
567 | goto err; | ||
568 | } | ||
569 | |||
570 | /* | ||
571 | * If this is a leaf block and it is corrupt, set the corrupt bit so | ||
572 | * that we don't try and read the other copies of this block, just | ||
573 | * return -EIO. | ||
574 | */ | ||
575 | if (found_level == 0 && check_leaf(root, eb)) { | ||
576 | set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | ||
577 | ret = -EIO; | ||
578 | } | ||
490 | 579 | ||
491 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); | 580 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); |
492 | end = eb->start + end - 1; | 581 | end = eb->start + end - 1; |
@@ -1159,7 +1248,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1159 | root, fs_info, location->objectid); | 1248 | root, fs_info, location->objectid); |
1160 | 1249 | ||
1161 | path = btrfs_alloc_path(); | 1250 | path = btrfs_alloc_path(); |
1162 | BUG_ON(!path); | 1251 | if (!path) { |
1252 | kfree(root); | ||
1253 | return ERR_PTR(-ENOMEM); | ||
1254 | } | ||
1163 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | 1255 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); |
1164 | if (ret == 0) { | 1256 | if (ret == 0) { |
1165 | l = path->nodes[0]; | 1257 | l = path->nodes[0]; |
@@ -1553,6 +1645,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1553 | goto fail_bdi; | 1645 | goto fail_bdi; |
1554 | } | 1646 | } |
1555 | 1647 | ||
1648 | fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; | ||
1649 | |||
1556 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | 1650 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); |
1557 | INIT_LIST_HEAD(&fs_info->trans_list); | 1651 | INIT_LIST_HEAD(&fs_info->trans_list); |
1558 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1652 | INIT_LIST_HEAD(&fs_info->dead_roots); |
@@ -1683,6 +1777,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1683 | 1777 | ||
1684 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | 1778 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); |
1685 | 1779 | ||
1780 | /* | ||
1781 | * In the long term, we'll store the compression type in the super | ||
1782 | * block, and it'll be used for per file compression control. | ||
1783 | */ | ||
1784 | fs_info->compress_type = BTRFS_COMPRESS_ZLIB; | ||
1785 | |||
1686 | ret = btrfs_parse_options(tree_root, options); | 1786 | ret = btrfs_parse_options(tree_root, options); |
1687 | if (ret) { | 1787 | if (ret) { |
1688 | err = ret; | 1788 | err = ret; |
@@ -1888,6 +1988,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1888 | fs_info->metadata_alloc_profile = (u64)-1; | 1988 | fs_info->metadata_alloc_profile = (u64)-1; |
1889 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | 1989 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; |
1890 | 1990 | ||
1991 | ret = btrfs_init_space_info(fs_info); | ||
1992 | if (ret) { | ||
1993 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); | ||
1994 | goto fail_block_groups; | ||
1995 | } | ||
1996 | |||
1891 | ret = btrfs_read_block_groups(extent_root); | 1997 | ret = btrfs_read_block_groups(extent_root); |
1892 | if (ret) { | 1998 | if (ret) { |
1893 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | 1999 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); |
@@ -1979,9 +2085,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1979 | 2085 | ||
1980 | if (!(sb->s_flags & MS_RDONLY)) { | 2086 | if (!(sb->s_flags & MS_RDONLY)) { |
1981 | down_read(&fs_info->cleanup_work_sem); | 2087 | down_read(&fs_info->cleanup_work_sem); |
1982 | btrfs_orphan_cleanup(fs_info->fs_root); | 2088 | err = btrfs_orphan_cleanup(fs_info->fs_root); |
1983 | btrfs_orphan_cleanup(fs_info->tree_root); | 2089 | if (!err) |
2090 | err = btrfs_orphan_cleanup(fs_info->tree_root); | ||
1984 | up_read(&fs_info->cleanup_work_sem); | 2091 | up_read(&fs_info->cleanup_work_sem); |
2092 | if (err) { | ||
2093 | close_ctree(tree_root); | ||
2094 | return ERR_PTR(err); | ||
2095 | } | ||
1985 | } | 2096 | } |
1986 | 2097 | ||
1987 | return tree_root; | 2098 | return tree_root; |
@@ -2356,8 +2467,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
2356 | 2467 | ||
2357 | root_objectid = gang[ret - 1]->root_key.objectid + 1; | 2468 | root_objectid = gang[ret - 1]->root_key.objectid + 1; |
2358 | for (i = 0; i < ret; i++) { | 2469 | for (i = 0; i < ret; i++) { |
2470 | int err; | ||
2471 | |||
2359 | root_objectid = gang[i]->root_key.objectid; | 2472 | root_objectid = gang[i]->root_key.objectid; |
2360 | btrfs_orphan_cleanup(gang[i]); | 2473 | err = btrfs_orphan_cleanup(gang[i]); |
2474 | if (err) | ||
2475 | return err; | ||
2361 | } | 2476 | } |
2362 | root_objectid++; | 2477 | root_objectid++; |
2363 | } | 2478 | } |
@@ -2868,7 +2983,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | |||
2868 | break; | 2983 | break; |
2869 | 2984 | ||
2870 | /* opt_discard */ | 2985 | /* opt_discard */ |
2871 | ret = btrfs_error_discard_extent(root, start, end + 1 - start); | 2986 | if (btrfs_test_opt(root, DISCARD)) |
2987 | ret = btrfs_error_discard_extent(root, start, | ||
2988 | end + 1 - start, | ||
2989 | NULL); | ||
2872 | 2990 | ||
2873 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 2991 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
2874 | btrfs_error_unpin_extent_range(root, start, end); | 2992 | btrfs_error_unpin_extent_range(root, start, end); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7b3089b5c2df..f619c3cb13b7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -36,8 +36,6 @@ | |||
36 | static int update_block_group(struct btrfs_trans_handle *trans, | 36 | static int update_block_group(struct btrfs_trans_handle *trans, |
37 | struct btrfs_root *root, | 37 | struct btrfs_root *root, |
38 | u64 bytenr, u64 num_bytes, int alloc); | 38 | u64 bytenr, u64 num_bytes, int alloc); |
39 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
40 | u64 num_bytes, int reserve, int sinfo); | ||
41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 39 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
42 | struct btrfs_root *root, | 40 | struct btrfs_root *root, |
43 | u64 bytenr, u64 num_bytes, u64 parent, | 41 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -442,7 +440,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
442 | * allocate blocks for the tree root we can't do the fast caching since | 440 | * allocate blocks for the tree root we can't do the fast caching since |
443 | * we likely hold important locks. | 441 | * we likely hold important locks. |
444 | */ | 442 | */ |
445 | if (!trans->transaction->in_commit && | 443 | if (trans && (!trans->transaction->in_commit) && |
446 | (root && root != root->fs_info->tree_root)) { | 444 | (root && root != root->fs_info->tree_root)) { |
447 | spin_lock(&cache->lock); | 445 | spin_lock(&cache->lock); |
448 | if (cache->cached != BTRFS_CACHE_NO) { | 446 | if (cache->cached != BTRFS_CACHE_NO) { |
@@ -471,7 +469,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
471 | if (load_cache_only) | 469 | if (load_cache_only) |
472 | return 0; | 470 | return 0; |
473 | 471 | ||
474 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); | 472 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); |
475 | BUG_ON(!caching_ctl); | 473 | BUG_ON(!caching_ctl); |
476 | 474 | ||
477 | INIT_LIST_HEAD(&caching_ctl->list); | 475 | INIT_LIST_HEAD(&caching_ctl->list); |
@@ -1740,39 +1738,45 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1740 | return ret; | 1738 | return ret; |
1741 | } | 1739 | } |
1742 | 1740 | ||
1743 | static void btrfs_issue_discard(struct block_device *bdev, | 1741 | static int btrfs_issue_discard(struct block_device *bdev, |
1744 | u64 start, u64 len) | 1742 | u64 start, u64 len) |
1745 | { | 1743 | { |
1746 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0); | 1744 | return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); |
1747 | } | 1745 | } |
1748 | 1746 | ||
1749 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1747 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
1750 | u64 num_bytes) | 1748 | u64 num_bytes, u64 *actual_bytes) |
1751 | { | 1749 | { |
1752 | int ret; | 1750 | int ret; |
1753 | u64 map_length = num_bytes; | 1751 | u64 discarded_bytes = 0; |
1754 | struct btrfs_multi_bio *multi = NULL; | 1752 | struct btrfs_multi_bio *multi = NULL; |
1755 | 1753 | ||
1756 | if (!btrfs_test_opt(root, DISCARD)) | ||
1757 | return 0; | ||
1758 | 1754 | ||
1759 | /* Tell the block device(s) that the sectors can be discarded */ | 1755 | /* Tell the block device(s) that the sectors can be discarded */ |
1760 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, | 1756 | ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, |
1761 | bytenr, &map_length, &multi, 0); | 1757 | bytenr, &num_bytes, &multi, 0); |
1762 | if (!ret) { | 1758 | if (!ret) { |
1763 | struct btrfs_bio_stripe *stripe = multi->stripes; | 1759 | struct btrfs_bio_stripe *stripe = multi->stripes; |
1764 | int i; | 1760 | int i; |
1765 | 1761 | ||
1766 | if (map_length > num_bytes) | ||
1767 | map_length = num_bytes; | ||
1768 | 1762 | ||
1769 | for (i = 0; i < multi->num_stripes; i++, stripe++) { | 1763 | for (i = 0; i < multi->num_stripes; i++, stripe++) { |
1770 | btrfs_issue_discard(stripe->dev->bdev, | 1764 | ret = btrfs_issue_discard(stripe->dev->bdev, |
1771 | stripe->physical, | 1765 | stripe->physical, |
1772 | map_length); | 1766 | stripe->length); |
1767 | if (!ret) | ||
1768 | discarded_bytes += stripe->length; | ||
1769 | else if (ret != -EOPNOTSUPP) | ||
1770 | break; | ||
1773 | } | 1771 | } |
1774 | kfree(multi); | 1772 | kfree(multi); |
1775 | } | 1773 | } |
1774 | if (discarded_bytes && ret == -EOPNOTSUPP) | ||
1775 | ret = 0; | ||
1776 | |||
1777 | if (actual_bytes) | ||
1778 | *actual_bytes = discarded_bytes; | ||
1779 | |||
1776 | 1780 | ||
1777 | return ret; | 1781 | return ret; |
1778 | } | 1782 | } |
@@ -3996,6 +4000,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3996 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 4000 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
3997 | u64 to_reserve; | 4001 | u64 to_reserve; |
3998 | int nr_extents; | 4002 | int nr_extents; |
4003 | int reserved_extents; | ||
3999 | int ret; | 4004 | int ret; |
4000 | 4005 | ||
4001 | if (btrfs_transaction_in_commit(root->fs_info)) | 4006 | if (btrfs_transaction_in_commit(root->fs_info)) |
@@ -4003,25 +4008,24 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4003 | 4008 | ||
4004 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4009 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4005 | 4010 | ||
4006 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
4007 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | 4011 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; |
4008 | if (nr_extents > BTRFS_I(inode)->reserved_extents) { | 4012 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); |
4009 | nr_extents -= BTRFS_I(inode)->reserved_extents; | 4013 | |
4014 | if (nr_extents > reserved_extents) { | ||
4015 | nr_extents -= reserved_extents; | ||
4010 | to_reserve = calc_trans_metadata_size(root, nr_extents); | 4016 | to_reserve = calc_trans_metadata_size(root, nr_extents); |
4011 | } else { | 4017 | } else { |
4012 | nr_extents = 0; | 4018 | nr_extents = 0; |
4013 | to_reserve = 0; | 4019 | to_reserve = 0; |
4014 | } | 4020 | } |
4015 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 4021 | |
4016 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 4022 | to_reserve += calc_csum_metadata_size(inode, num_bytes); |
4017 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); | 4023 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); |
4018 | if (ret) | 4024 | if (ret) |
4019 | return ret; | 4025 | return ret; |
4020 | 4026 | ||
4021 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 4027 | atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); |
4022 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
4023 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 4028 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
4024 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
4025 | 4029 | ||
4026 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 4030 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
4027 | 4031 | ||
@@ -4036,20 +4040,30 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
4036 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4040 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4037 | u64 to_free; | 4041 | u64 to_free; |
4038 | int nr_extents; | 4042 | int nr_extents; |
4043 | int reserved_extents; | ||
4039 | 4044 | ||
4040 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4045 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4041 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 4046 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
4042 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); | 4047 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); |
4043 | 4048 | ||
4044 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 4049 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); |
4045 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | 4050 | do { |
4046 | if (nr_extents < BTRFS_I(inode)->reserved_extents) { | 4051 | int old, new; |
4047 | nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; | 4052 | |
4048 | BTRFS_I(inode)->reserved_extents -= nr_extents; | 4053 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); |
4049 | } else { | 4054 | if (nr_extents >= reserved_extents) { |
4050 | nr_extents = 0; | 4055 | nr_extents = 0; |
4051 | } | 4056 | break; |
4052 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 4057 | } |
4058 | old = reserved_extents; | ||
4059 | nr_extents = reserved_extents - nr_extents; | ||
4060 | new = reserved_extents - nr_extents; | ||
4061 | old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, | ||
4062 | reserved_extents, new); | ||
4063 | if (likely(old == reserved_extents)) | ||
4064 | break; | ||
4065 | reserved_extents = old; | ||
4066 | } while (1); | ||
4053 | 4067 | ||
4054 | to_free = calc_csum_metadata_size(inode, num_bytes); | 4068 | to_free = calc_csum_metadata_size(inode, num_bytes); |
4055 | if (nr_extents > 0) | 4069 | if (nr_extents > 0) |
@@ -4223,8 +4237,8 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
4223 | * update size of reserved extents. this function may return -EAGAIN | 4237 | * update size of reserved extents. this function may return -EAGAIN |
4224 | * if 'reserve' is true or 'sinfo' is false. | 4238 | * if 'reserve' is true or 'sinfo' is false. |
4225 | */ | 4239 | */ |
4226 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | 4240 | int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, |
4227 | u64 num_bytes, int reserve, int sinfo) | 4241 | u64 num_bytes, int reserve, int sinfo) |
4228 | { | 4242 | { |
4229 | int ret = 0; | 4243 | int ret = 0; |
4230 | if (sinfo) { | 4244 | if (sinfo) { |
@@ -4363,7 +4377,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
4363 | if (ret) | 4377 | if (ret) |
4364 | break; | 4378 | break; |
4365 | 4379 | ||
4366 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 4380 | if (btrfs_test_opt(root, DISCARD)) |
4381 | ret = btrfs_discard_extent(root, start, | ||
4382 | end + 1 - start, NULL); | ||
4367 | 4383 | ||
4368 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 4384 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
4369 | unpin_extent_range(root, start, end); | 4385 | unpin_extent_range(root, start, end); |
@@ -4704,10 +4720,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
4704 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | 4720 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); |
4705 | 4721 | ||
4706 | btrfs_add_free_space(cache, buf->start, buf->len); | 4722 | btrfs_add_free_space(cache, buf->start, buf->len); |
4707 | ret = update_reserved_bytes(cache, buf->len, 0, 0); | 4723 | ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); |
4708 | if (ret == -EAGAIN) { | 4724 | if (ret == -EAGAIN) { |
4709 | /* block group became read-only */ | 4725 | /* block group became read-only */ |
4710 | update_reserved_bytes(cache, buf->len, 0, 1); | 4726 | btrfs_update_reserved_bytes(cache, buf->len, 0, 1); |
4711 | goto out; | 4727 | goto out; |
4712 | } | 4728 | } |
4713 | 4729 | ||
@@ -4744,6 +4760,11 @@ pin: | |||
4744 | } | 4760 | } |
4745 | } | 4761 | } |
4746 | out: | 4762 | out: |
4763 | /* | ||
4764 | * Deleting the buffer, clear the corrupt flag since it doesn't matter | ||
4765 | * anymore. | ||
4766 | */ | ||
4767 | clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); | ||
4747 | btrfs_put_block_group(cache); | 4768 | btrfs_put_block_group(cache); |
4748 | } | 4769 | } |
4749 | 4770 | ||
@@ -5191,7 +5212,7 @@ checks: | |||
5191 | search_start - offset); | 5212 | search_start - offset); |
5192 | BUG_ON(offset > search_start); | 5213 | BUG_ON(offset > search_start); |
5193 | 5214 | ||
5194 | ret = update_reserved_bytes(block_group, num_bytes, 1, | 5215 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, |
5195 | (data & BTRFS_BLOCK_GROUP_DATA)); | 5216 | (data & BTRFS_BLOCK_GROUP_DATA)); |
5196 | if (ret == -EAGAIN) { | 5217 | if (ret == -EAGAIN) { |
5197 | btrfs_add_free_space(block_group, offset, num_bytes); | 5218 | btrfs_add_free_space(block_group, offset, num_bytes); |
@@ -5397,6 +5418,8 @@ again: | |||
5397 | dump_space_info(sinfo, num_bytes, 1); | 5418 | dump_space_info(sinfo, num_bytes, 1); |
5398 | } | 5419 | } |
5399 | 5420 | ||
5421 | trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); | ||
5422 | |||
5400 | return ret; | 5423 | return ret; |
5401 | } | 5424 | } |
5402 | 5425 | ||
@@ -5412,12 +5435,15 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
5412 | return -ENOSPC; | 5435 | return -ENOSPC; |
5413 | } | 5436 | } |
5414 | 5437 | ||
5415 | ret = btrfs_discard_extent(root, start, len); | 5438 | if (btrfs_test_opt(root, DISCARD)) |
5439 | ret = btrfs_discard_extent(root, start, len, NULL); | ||
5416 | 5440 | ||
5417 | btrfs_add_free_space(cache, start, len); | 5441 | btrfs_add_free_space(cache, start, len); |
5418 | update_reserved_bytes(cache, len, 0, 1); | 5442 | btrfs_update_reserved_bytes(cache, len, 0, 1); |
5419 | btrfs_put_block_group(cache); | 5443 | btrfs_put_block_group(cache); |
5420 | 5444 | ||
5445 | trace_btrfs_reserved_extent_free(root, start, len); | ||
5446 | |||
5421 | return ret; | 5447 | return ret; |
5422 | } | 5448 | } |
5423 | 5449 | ||
@@ -5444,7 +5470,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
5444 | size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); | 5470 | size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); |
5445 | 5471 | ||
5446 | path = btrfs_alloc_path(); | 5472 | path = btrfs_alloc_path(); |
5447 | BUG_ON(!path); | 5473 | if (!path) |
5474 | return -ENOMEM; | ||
5448 | 5475 | ||
5449 | path->leave_spinning = 1; | 5476 | path->leave_spinning = 1; |
5450 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, | 5477 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, |
@@ -5614,7 +5641,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
5614 | put_caching_control(caching_ctl); | 5641 | put_caching_control(caching_ctl); |
5615 | } | 5642 | } |
5616 | 5643 | ||
5617 | ret = update_reserved_bytes(block_group, ins->offset, 1, 1); | 5644 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); |
5618 | BUG_ON(ret); | 5645 | BUG_ON(ret); |
5619 | btrfs_put_block_group(block_group); | 5646 | btrfs_put_block_group(block_group); |
5620 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 5647 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
@@ -6047,6 +6074,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
6047 | if (reada && level == 1) | 6074 | if (reada && level == 1) |
6048 | reada_walk_down(trans, root, wc, path); | 6075 | reada_walk_down(trans, root, wc, path); |
6049 | next = read_tree_block(root, bytenr, blocksize, generation); | 6076 | next = read_tree_block(root, bytenr, blocksize, generation); |
6077 | if (!next) | ||
6078 | return -EIO; | ||
6050 | btrfs_tree_lock(next); | 6079 | btrfs_tree_lock(next); |
6051 | btrfs_set_lock_blocking(next); | 6080 | btrfs_set_lock_blocking(next); |
6052 | } | 6081 | } |
@@ -6438,10 +6467,14 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
6438 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | 6467 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); |
6439 | 6468 | ||
6440 | path = btrfs_alloc_path(); | 6469 | path = btrfs_alloc_path(); |
6441 | BUG_ON(!path); | 6470 | if (!path) |
6471 | return -ENOMEM; | ||
6442 | 6472 | ||
6443 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 6473 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
6444 | BUG_ON(!wc); | 6474 | if (!wc) { |
6475 | btrfs_free_path(path); | ||
6476 | return -ENOMEM; | ||
6477 | } | ||
6445 | 6478 | ||
6446 | btrfs_assert_tree_locked(parent); | 6479 | btrfs_assert_tree_locked(parent); |
6447 | parent_level = btrfs_header_level(parent); | 6480 | parent_level = btrfs_header_level(parent); |
@@ -6899,7 +6932,11 @@ static noinline int get_new_locations(struct inode *reloc_inode, | |||
6899 | } | 6932 | } |
6900 | 6933 | ||
6901 | path = btrfs_alloc_path(); | 6934 | path = btrfs_alloc_path(); |
6902 | BUG_ON(!path); | 6935 | if (!path) { |
6936 | if (exts != *extents) | ||
6937 | kfree(exts); | ||
6938 | return -ENOMEM; | ||
6939 | } | ||
6903 | 6940 | ||
6904 | cur_pos = extent_key->objectid - offset; | 6941 | cur_pos = extent_key->objectid - offset; |
6905 | last_byte = extent_key->objectid + extent_key->offset; | 6942 | last_byte = extent_key->objectid + extent_key->offset; |
@@ -6941,6 +6978,10 @@ static noinline int get_new_locations(struct inode *reloc_inode, | |||
6941 | struct disk_extent *old = exts; | 6978 | struct disk_extent *old = exts; |
6942 | max *= 2; | 6979 | max *= 2; |
6943 | exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); | 6980 | exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); |
6981 | if (!exts) { | ||
6982 | ret = -ENOMEM; | ||
6983 | goto out; | ||
6984 | } | ||
6944 | memcpy(exts, old, sizeof(*exts) * nr); | 6985 | memcpy(exts, old, sizeof(*exts) * nr); |
6945 | if (old != *extents) | 6986 | if (old != *extents) |
6946 | kfree(old); | 6987 | kfree(old); |
@@ -7423,7 +7464,8 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, | |||
7423 | int ret; | 7464 | int ret; |
7424 | 7465 | ||
7425 | new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); | 7466 | new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); |
7426 | BUG_ON(!new_extent); | 7467 | if (!new_extent) |
7468 | return -ENOMEM; | ||
7427 | 7469 | ||
7428 | ref = btrfs_lookup_leaf_ref(root, leaf->start); | 7470 | ref = btrfs_lookup_leaf_ref(root, leaf->start); |
7429 | BUG_ON(!ref); | 7471 | BUG_ON(!ref); |
@@ -7609,7 +7651,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root) | |||
7609 | 7651 | ||
7610 | reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); | 7652 | reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); |
7611 | BUG_ON(!reloc_root); | 7653 | BUG_ON(!reloc_root); |
7612 | btrfs_orphan_cleanup(reloc_root); | 7654 | ret = btrfs_orphan_cleanup(reloc_root); |
7655 | BUG_ON(ret); | ||
7613 | return 0; | 7656 | return 0; |
7614 | } | 7657 | } |
7615 | 7658 | ||
@@ -7627,7 +7670,8 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, | |||
7627 | return 0; | 7670 | return 0; |
7628 | 7671 | ||
7629 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | 7672 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); |
7630 | BUG_ON(!root_item); | 7673 | if (!root_item) |
7674 | return -ENOMEM; | ||
7631 | 7675 | ||
7632 | ret = btrfs_copy_root(trans, root, root->commit_root, | 7676 | ret = btrfs_copy_root(trans, root, root->commit_root, |
7633 | &eb, BTRFS_TREE_RELOC_OBJECTID); | 7677 | &eb, BTRFS_TREE_RELOC_OBJECTID); |
@@ -7653,7 +7697,7 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, | |||
7653 | 7697 | ||
7654 | reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, | 7698 | reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, |
7655 | &root_key); | 7699 | &root_key); |
7656 | BUG_ON(!reloc_root); | 7700 | BUG_ON(IS_ERR(reloc_root)); |
7657 | reloc_root->last_trans = trans->transid; | 7701 | reloc_root->last_trans = trans->transid; |
7658 | reloc_root->commit_root = NULL; | 7702 | reloc_root->commit_root = NULL; |
7659 | reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; | 7703 | reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; |
@@ -7906,6 +7950,10 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root, | |||
7906 | 7950 | ||
7907 | eb = read_tree_block(found_root, block_start, | 7951 | eb = read_tree_block(found_root, block_start, |
7908 | block_size, 0); | 7952 | block_size, 0); |
7953 | if (!eb) { | ||
7954 | ret = -EIO; | ||
7955 | goto out; | ||
7956 | } | ||
7909 | btrfs_tree_lock(eb); | 7957 | btrfs_tree_lock(eb); |
7910 | BUG_ON(level != btrfs_header_level(eb)); | 7958 | BUG_ON(level != btrfs_header_level(eb)); |
7911 | 7959 | ||
@@ -8621,6 +8669,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8621 | BUG_ON(!block_group); | 8669 | BUG_ON(!block_group); |
8622 | BUG_ON(!block_group->ro); | 8670 | BUG_ON(!block_group->ro); |
8623 | 8671 | ||
8672 | /* | ||
8673 | * Free the reserved super bytes from this block group before | ||
8674 | * remove it. | ||
8675 | */ | ||
8676 | free_excluded_extents(root, block_group); | ||
8677 | |||
8624 | memcpy(&key, &block_group->key, sizeof(key)); | 8678 | memcpy(&key, &block_group->key, sizeof(key)); |
8625 | if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | | 8679 | if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | |
8626 | BTRFS_BLOCK_GROUP_RAID1 | | 8680 | BTRFS_BLOCK_GROUP_RAID1 | |
@@ -8724,13 +8778,84 @@ out: | |||
8724 | return ret; | 8778 | return ret; |
8725 | } | 8779 | } |
8726 | 8780 | ||
8781 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info) | ||
8782 | { | ||
8783 | struct btrfs_space_info *space_info; | ||
8784 | int ret; | ||
8785 | |||
8786 | ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0, | ||
8787 | &space_info); | ||
8788 | if (ret) | ||
8789 | return ret; | ||
8790 | |||
8791 | ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0, | ||
8792 | &space_info); | ||
8793 | if (ret) | ||
8794 | return ret; | ||
8795 | |||
8796 | ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0, | ||
8797 | &space_info); | ||
8798 | if (ret) | ||
8799 | return ret; | ||
8800 | |||
8801 | return ret; | ||
8802 | } | ||
8803 | |||
8727 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | 8804 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) |
8728 | { | 8805 | { |
8729 | return unpin_extent_range(root, start, end); | 8806 | return unpin_extent_range(root, start, end); |
8730 | } | 8807 | } |
8731 | 8808 | ||
8732 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | 8809 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, |
8733 | u64 num_bytes) | 8810 | u64 num_bytes, u64 *actual_bytes) |
8811 | { | ||
8812 | return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); | ||
8813 | } | ||
8814 | |||
8815 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | ||
8734 | { | 8816 | { |
8735 | return btrfs_discard_extent(root, bytenr, num_bytes); | 8817 | struct btrfs_fs_info *fs_info = root->fs_info; |
8818 | struct btrfs_block_group_cache *cache = NULL; | ||
8819 | u64 group_trimmed; | ||
8820 | u64 start; | ||
8821 | u64 end; | ||
8822 | u64 trimmed = 0; | ||
8823 | int ret = 0; | ||
8824 | |||
8825 | cache = btrfs_lookup_block_group(fs_info, range->start); | ||
8826 | |||
8827 | while (cache) { | ||
8828 | if (cache->key.objectid >= (range->start + range->len)) { | ||
8829 | btrfs_put_block_group(cache); | ||
8830 | break; | ||
8831 | } | ||
8832 | |||
8833 | start = max(range->start, cache->key.objectid); | ||
8834 | end = min(range->start + range->len, | ||
8835 | cache->key.objectid + cache->key.offset); | ||
8836 | |||
8837 | if (end - start >= range->minlen) { | ||
8838 | if (!block_group_cache_done(cache)) { | ||
8839 | ret = cache_block_group(cache, NULL, root, 0); | ||
8840 | if (!ret) | ||
8841 | wait_block_group_cache_done(cache); | ||
8842 | } | ||
8843 | ret = btrfs_trim_block_group(cache, | ||
8844 | &group_trimmed, | ||
8845 | start, | ||
8846 | end, | ||
8847 | range->minlen); | ||
8848 | |||
8849 | trimmed += group_trimmed; | ||
8850 | if (ret) { | ||
8851 | btrfs_put_block_group(cache); | ||
8852 | break; | ||
8853 | } | ||
8854 | } | ||
8855 | |||
8856 | cache = next_block_group(fs_info->tree_root, cache); | ||
8857 | } | ||
8858 | |||
8859 | range->len = trimmed; | ||
8860 | return ret; | ||
8736 | } | 8861 | } |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b5b92824a271..20ddb28602a8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2192,6 +2192,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2192 | else | 2192 | else |
2193 | write_flags = WRITE; | 2193 | write_flags = WRITE; |
2194 | 2194 | ||
2195 | trace___extent_writepage(page, inode, wbc); | ||
2196 | |||
2195 | WARN_ON(!PageLocked(page)); | 2197 | WARN_ON(!PageLocked(page)); |
2196 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2198 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
2197 | if (page->index > end_index || | 2199 | if (page->index > end_index || |
@@ -3690,6 +3692,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
3690 | "wanted %lu %lu\n", (unsigned long long)eb->start, | 3692 | "wanted %lu %lu\n", (unsigned long long)eb->start, |
3691 | eb->len, start, min_len); | 3693 | eb->len, start, min_len); |
3692 | WARN_ON(1); | 3694 | WARN_ON(1); |
3695 | return -EINVAL; | ||
3693 | } | 3696 | } |
3694 | 3697 | ||
3695 | p = extent_buffer_page(eb, i); | 3698 | p = extent_buffer_page(eb, i); |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 9318dfefd59c..f62c5442835d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -31,6 +31,7 @@ | |||
31 | #define EXTENT_BUFFER_UPTODATE 0 | 31 | #define EXTENT_BUFFER_UPTODATE 0 |
32 | #define EXTENT_BUFFER_BLOCKING 1 | 32 | #define EXTENT_BUFFER_BLOCKING 1 |
33 | #define EXTENT_BUFFER_DIRTY 2 | 33 | #define EXTENT_BUFFER_DIRTY 2 |
34 | #define EXTENT_BUFFER_CORRUPT 3 | ||
34 | 35 | ||
35 | /* these are flags for extent_clear_unlock_delalloc */ | 36 | /* these are flags for extent_clear_unlock_delalloc */ |
36 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 | 37 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 4f19a3e1bf32..a6a9d4e8b491 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -48,7 +48,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | |||
48 | struct extent_buffer *leaf; | 48 | struct extent_buffer *leaf; |
49 | 49 | ||
50 | path = btrfs_alloc_path(); | 50 | path = btrfs_alloc_path(); |
51 | BUG_ON(!path); | 51 | if (!path) |
52 | return -ENOMEM; | ||
52 | file_key.objectid = objectid; | 53 | file_key.objectid = objectid; |
53 | file_key.offset = pos; | 54 | file_key.offset = pos; |
54 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); | 55 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); |
@@ -169,6 +170,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
169 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 170 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
170 | 171 | ||
171 | path = btrfs_alloc_path(); | 172 | path = btrfs_alloc_path(); |
173 | if (!path) | ||
174 | return -ENOMEM; | ||
172 | if (bio->bi_size > PAGE_CACHE_SIZE * 8) | 175 | if (bio->bi_size > PAGE_CACHE_SIZE * 8) |
173 | path->reada = 2; | 176 | path->reada = 2; |
174 | 177 | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f447b783bb84..656bc0a892b1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -45,14 +45,14 @@ | |||
45 | * and be replaced with calls into generic code. | 45 | * and be replaced with calls into generic code. |
46 | */ | 46 | */ |
47 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 47 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
48 | int write_bytes, | 48 | size_t write_bytes, |
49 | struct page **prepared_pages, | 49 | struct page **prepared_pages, |
50 | struct iov_iter *i) | 50 | struct iov_iter *i) |
51 | { | 51 | { |
52 | size_t copied = 0; | 52 | size_t copied = 0; |
53 | size_t total_copied = 0; | ||
53 | int pg = 0; | 54 | int pg = 0; |
54 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 55 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
55 | int total_copied = 0; | ||
56 | 56 | ||
57 | while (write_bytes > 0) { | 57 | while (write_bytes > 0) { |
58 | size_t count = min_t(size_t, | 58 | size_t count = min_t(size_t, |
@@ -88,9 +88,8 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
88 | total_copied += copied; | 88 | total_copied += copied; |
89 | 89 | ||
90 | /* Return to btrfs_file_aio_write to fault page */ | 90 | /* Return to btrfs_file_aio_write to fault page */ |
91 | if (unlikely(copied == 0)) { | 91 | if (unlikely(copied == 0)) |
92 | break; | 92 | break; |
93 | } | ||
94 | 93 | ||
95 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | 94 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { |
96 | offset += copied; | 95 | offset += copied; |
@@ -109,8 +108,6 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
109 | { | 108 | { |
110 | size_t i; | 109 | size_t i; |
111 | for (i = 0; i < num_pages; i++) { | 110 | for (i = 0; i < num_pages; i++) { |
112 | if (!pages[i]) | ||
113 | break; | ||
114 | /* page checked is some magic around finding pages that | 111 | /* page checked is some magic around finding pages that |
115 | * have been modified without going through btrfs_set_page_dirty | 112 | * have been modified without going through btrfs_set_page_dirty |
116 | * clear it here | 113 | * clear it here |
@@ -130,13 +127,12 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
130 | * this also makes the decision about creating an inline extent vs | 127 | * this also makes the decision about creating an inline extent vs |
131 | * doing real data extents, marking pages dirty and delalloc as required. | 128 | * doing real data extents, marking pages dirty and delalloc as required. |
132 | */ | 129 | */ |
133 | static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | 130 | static noinline int dirty_and_release_pages(struct btrfs_root *root, |
134 | struct btrfs_root *root, | 131 | struct file *file, |
135 | struct file *file, | 132 | struct page **pages, |
136 | struct page **pages, | 133 | size_t num_pages, |
137 | size_t num_pages, | 134 | loff_t pos, |
138 | loff_t pos, | 135 | size_t write_bytes) |
139 | size_t write_bytes) | ||
140 | { | 136 | { |
141 | int err = 0; | 137 | int err = 0; |
142 | int i; | 138 | int i; |
@@ -154,7 +150,8 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
154 | end_of_last_block = start_pos + num_bytes - 1; | 150 | end_of_last_block = start_pos + num_bytes - 1; |
155 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 151 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
156 | NULL); | 152 | NULL); |
157 | BUG_ON(err); | 153 | if (err) |
154 | return err; | ||
158 | 155 | ||
159 | for (i = 0; i < num_pages; i++) { | 156 | for (i = 0; i < num_pages; i++) { |
160 | struct page *p = pages[i]; | 157 | struct page *p = pages[i]; |
@@ -162,13 +159,14 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
162 | ClearPageChecked(p); | 159 | ClearPageChecked(p); |
163 | set_page_dirty(p); | 160 | set_page_dirty(p); |
164 | } | 161 | } |
165 | if (end_pos > isize) { | 162 | |
163 | /* | ||
164 | * we've only changed i_size in ram, and we haven't updated | ||
165 | * the disk i_size. There is no need to log the inode | ||
166 | * at this time. | ||
167 | */ | ||
168 | if (end_pos > isize) | ||
166 | i_size_write(inode, end_pos); | 169 | i_size_write(inode, end_pos); |
167 | /* we've only changed i_size in ram, and we haven't updated | ||
168 | * the disk i_size. There is no need to log the inode | ||
169 | * at this time. | ||
170 | */ | ||
171 | } | ||
172 | return 0; | 170 | return 0; |
173 | } | 171 | } |
174 | 172 | ||
@@ -610,6 +608,8 @@ again: | |||
610 | key.offset = split; | 608 | key.offset = split; |
611 | 609 | ||
612 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 610 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
611 | if (ret < 0) | ||
612 | goto out; | ||
613 | if (ret > 0 && path->slots[0] > 0) | 613 | if (ret > 0 && path->slots[0] > 0) |
614 | path->slots[0]--; | 614 | path->slots[0]--; |
615 | 615 | ||
@@ -819,12 +819,11 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
819 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; | 819 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; |
820 | 820 | ||
821 | if (start_pos > inode->i_size) { | 821 | if (start_pos > inode->i_size) { |
822 | err = btrfs_cont_expand(inode, start_pos); | 822 | err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); |
823 | if (err) | 823 | if (err) |
824 | return err; | 824 | return err; |
825 | } | 825 | } |
826 | 826 | ||
827 | memset(pages, 0, num_pages * sizeof(struct page *)); | ||
828 | again: | 827 | again: |
829 | for (i = 0; i < num_pages; i++) { | 828 | for (i = 0; i < num_pages; i++) { |
830 | pages[i] = grab_cache_page(inode->i_mapping, index + i); | 829 | pages[i] = grab_cache_page(inode->i_mapping, index + i); |
@@ -896,156 +895,71 @@ fail: | |||
896 | 895 | ||
897 | } | 896 | } |
898 | 897 | ||
899 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | 898 | static noinline ssize_t __btrfs_buffered_write(struct file *file, |
900 | const struct iovec *iov, | 899 | struct iov_iter *i, |
901 | unsigned long nr_segs, loff_t pos) | 900 | loff_t pos) |
902 | { | 901 | { |
903 | struct file *file = iocb->ki_filp; | ||
904 | struct inode *inode = fdentry(file)->d_inode; | 902 | struct inode *inode = fdentry(file)->d_inode; |
905 | struct btrfs_root *root = BTRFS_I(inode)->root; | 903 | struct btrfs_root *root = BTRFS_I(inode)->root; |
906 | struct page **pages = NULL; | 904 | struct page **pages = NULL; |
907 | struct iov_iter i; | ||
908 | loff_t *ppos = &iocb->ki_pos; | ||
909 | loff_t start_pos; | ||
910 | ssize_t num_written = 0; | ||
911 | ssize_t err = 0; | ||
912 | size_t count; | ||
913 | size_t ocount; | ||
914 | int ret = 0; | ||
915 | int nrptrs; | ||
916 | unsigned long first_index; | 905 | unsigned long first_index; |
917 | unsigned long last_index; | 906 | unsigned long last_index; |
918 | int will_write; | 907 | size_t num_written = 0; |
919 | int buffered = 0; | 908 | int nrptrs; |
920 | int copied = 0; | 909 | int ret; |
921 | int dirty_pages = 0; | ||
922 | |||
923 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | ||
924 | (file->f_flags & O_DIRECT)); | ||
925 | |||
926 | start_pos = pos; | ||
927 | |||
928 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
929 | |||
930 | mutex_lock(&inode->i_mutex); | ||
931 | |||
932 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
933 | if (err) | ||
934 | goto out; | ||
935 | count = ocount; | ||
936 | |||
937 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | ||
938 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | ||
939 | if (err) | ||
940 | goto out; | ||
941 | |||
942 | if (count == 0) | ||
943 | goto out; | ||
944 | |||
945 | err = file_remove_suid(file); | ||
946 | if (err) | ||
947 | goto out; | ||
948 | |||
949 | /* | ||
950 | * If BTRFS flips readonly due to some impossible error | ||
951 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | ||
952 | * although we have opened a file as writable, we have | ||
953 | * to stop this write operation to ensure FS consistency. | ||
954 | */ | ||
955 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
956 | err = -EROFS; | ||
957 | goto out; | ||
958 | } | ||
959 | |||
960 | file_update_time(file); | ||
961 | BTRFS_I(inode)->sequence++; | ||
962 | |||
963 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
964 | num_written = generic_file_direct_write(iocb, iov, &nr_segs, | ||
965 | pos, ppos, count, | ||
966 | ocount); | ||
967 | /* | ||
968 | * the generic O_DIRECT will update in-memory i_size after the | ||
969 | * DIOs are done. But our endio handlers that update the on | ||
970 | * disk i_size never update past the in memory i_size. So we | ||
971 | * need one more update here to catch any additions to the | ||
972 | * file | ||
973 | */ | ||
974 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
975 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
976 | mark_inode_dirty(inode); | ||
977 | } | ||
978 | |||
979 | if (num_written < 0) { | ||
980 | ret = num_written; | ||
981 | num_written = 0; | ||
982 | goto out; | ||
983 | } else if (num_written == count) { | ||
984 | /* pick up pos changes done by the generic code */ | ||
985 | pos = *ppos; | ||
986 | goto out; | ||
987 | } | ||
988 | /* | ||
989 | * We are going to do buffered for the rest of the range, so we | ||
990 | * need to make sure to invalidate the buffered pages when we're | ||
991 | * done. | ||
992 | */ | ||
993 | buffered = 1; | ||
994 | pos += num_written; | ||
995 | } | ||
996 | 910 | ||
997 | iov_iter_init(&i, iov, nr_segs, count, num_written); | 911 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / |
998 | nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / | ||
999 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | 912 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / |
1000 | (sizeof(struct page *))); | 913 | (sizeof(struct page *))); |
1001 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 914 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
1002 | if (!pages) { | 915 | if (!pages) |
1003 | ret = -ENOMEM; | 916 | return -ENOMEM; |
1004 | goto out; | ||
1005 | } | ||
1006 | |||
1007 | /* generic_write_checks can change our pos */ | ||
1008 | start_pos = pos; | ||
1009 | 917 | ||
1010 | first_index = pos >> PAGE_CACHE_SHIFT; | 918 | first_index = pos >> PAGE_CACHE_SHIFT; |
1011 | last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; | 919 | last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT; |
1012 | 920 | ||
1013 | while (iov_iter_count(&i) > 0) { | 921 | while (iov_iter_count(i) > 0) { |
1014 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 922 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
1015 | size_t write_bytes = min(iov_iter_count(&i), | 923 | size_t write_bytes = min(iov_iter_count(i), |
1016 | nrptrs * (size_t)PAGE_CACHE_SIZE - | 924 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
1017 | offset); | 925 | offset); |
1018 | size_t num_pages = (write_bytes + offset + | 926 | size_t num_pages = (write_bytes + offset + |
1019 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 927 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
928 | size_t dirty_pages; | ||
929 | size_t copied; | ||
1020 | 930 | ||
1021 | WARN_ON(num_pages > nrptrs); | 931 | WARN_ON(num_pages > nrptrs); |
1022 | memset(pages, 0, sizeof(struct page *) * nrptrs); | ||
1023 | 932 | ||
1024 | /* | 933 | /* |
1025 | * Fault pages before locking them in prepare_pages | 934 | * Fault pages before locking them in prepare_pages |
1026 | * to avoid recursive lock | 935 | * to avoid recursive lock |
1027 | */ | 936 | */ |
1028 | if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { | 937 | if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) { |
1029 | ret = -EFAULT; | 938 | ret = -EFAULT; |
1030 | goto out; | 939 | break; |
1031 | } | 940 | } |
1032 | 941 | ||
1033 | ret = btrfs_delalloc_reserve_space(inode, | 942 | ret = btrfs_delalloc_reserve_space(inode, |
1034 | num_pages << PAGE_CACHE_SHIFT); | 943 | num_pages << PAGE_CACHE_SHIFT); |
1035 | if (ret) | 944 | if (ret) |
1036 | goto out; | 945 | break; |
1037 | 946 | ||
947 | /* | ||
948 | * This is going to setup the pages array with the number of | ||
949 | * pages we want, so we don't really need to worry about the | ||
950 | * contents of pages from loop to loop | ||
951 | */ | ||
1038 | ret = prepare_pages(root, file, pages, num_pages, | 952 | ret = prepare_pages(root, file, pages, num_pages, |
1039 | pos, first_index, last_index, | 953 | pos, first_index, last_index, |
1040 | write_bytes); | 954 | write_bytes); |
1041 | if (ret) { | 955 | if (ret) { |
1042 | btrfs_delalloc_release_space(inode, | 956 | btrfs_delalloc_release_space(inode, |
1043 | num_pages << PAGE_CACHE_SHIFT); | 957 | num_pages << PAGE_CACHE_SHIFT); |
1044 | goto out; | 958 | break; |
1045 | } | 959 | } |
1046 | 960 | ||
1047 | copied = btrfs_copy_from_user(pos, num_pages, | 961 | copied = btrfs_copy_from_user(pos, num_pages, |
1048 | write_bytes, pages, &i); | 962 | write_bytes, pages, i); |
1049 | 963 | ||
1050 | /* | 964 | /* |
1051 | * if we have trouble faulting in the pages, fall | 965 | * if we have trouble faulting in the pages, fall |
@@ -1061,6 +975,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1061 | PAGE_CACHE_SIZE - 1) >> | 975 | PAGE_CACHE_SIZE - 1) >> |
1062 | PAGE_CACHE_SHIFT; | 976 | PAGE_CACHE_SHIFT; |
1063 | 977 | ||
978 | /* | ||
979 | * If we had a short copy we need to release the excess delaloc | ||
980 | * bytes we reserved. We need to increment outstanding_extents | ||
981 | * because btrfs_delalloc_release_space will decrement it, but | ||
982 | * we still have an outstanding extent for the chunk we actually | ||
983 | * managed to copy. | ||
984 | */ | ||
1064 | if (num_pages > dirty_pages) { | 985 | if (num_pages > dirty_pages) { |
1065 | if (copied > 0) | 986 | if (copied > 0) |
1066 | atomic_inc( | 987 | atomic_inc( |
@@ -1071,39 +992,157 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1071 | } | 992 | } |
1072 | 993 | ||
1073 | if (copied > 0) { | 994 | if (copied > 0) { |
1074 | dirty_and_release_pages(NULL, root, file, pages, | 995 | ret = dirty_and_release_pages(root, file, pages, |
1075 | dirty_pages, pos, copied); | 996 | dirty_pages, pos, |
997 | copied); | ||
998 | if (ret) { | ||
999 | btrfs_delalloc_release_space(inode, | ||
1000 | dirty_pages << PAGE_CACHE_SHIFT); | ||
1001 | btrfs_drop_pages(pages, num_pages); | ||
1002 | break; | ||
1003 | } | ||
1076 | } | 1004 | } |
1077 | 1005 | ||
1078 | btrfs_drop_pages(pages, num_pages); | 1006 | btrfs_drop_pages(pages, num_pages); |
1079 | 1007 | ||
1080 | if (copied > 0) { | 1008 | cond_resched(); |
1081 | if (will_write) { | 1009 | |
1082 | filemap_fdatawrite_range(inode->i_mapping, pos, | 1010 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, |
1083 | pos + copied - 1); | 1011 | dirty_pages); |
1084 | } else { | 1012 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) |
1085 | balance_dirty_pages_ratelimited_nr( | 1013 | btrfs_btree_balance_dirty(root, 1); |
1086 | inode->i_mapping, | 1014 | btrfs_throttle(root); |
1087 | dirty_pages); | ||
1088 | if (dirty_pages < | ||
1089 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | ||
1090 | btrfs_btree_balance_dirty(root, 1); | ||
1091 | btrfs_throttle(root); | ||
1092 | } | ||
1093 | } | ||
1094 | 1015 | ||
1095 | pos += copied; | 1016 | pos += copied; |
1096 | num_written += copied; | 1017 | num_written += copied; |
1018 | } | ||
1097 | 1019 | ||
1098 | cond_resched(); | 1020 | kfree(pages); |
1021 | |||
1022 | return num_written ? num_written : ret; | ||
1023 | } | ||
1024 | |||
1025 | static ssize_t __btrfs_direct_write(struct kiocb *iocb, | ||
1026 | const struct iovec *iov, | ||
1027 | unsigned long nr_segs, loff_t pos, | ||
1028 | loff_t *ppos, size_t count, size_t ocount) | ||
1029 | { | ||
1030 | struct file *file = iocb->ki_filp; | ||
1031 | struct inode *inode = fdentry(file)->d_inode; | ||
1032 | struct iov_iter i; | ||
1033 | ssize_t written; | ||
1034 | ssize_t written_buffered; | ||
1035 | loff_t endbyte; | ||
1036 | int err; | ||
1037 | |||
1038 | written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, | ||
1039 | count, ocount); | ||
1040 | |||
1041 | /* | ||
1042 | * the generic O_DIRECT will update in-memory i_size after the | ||
1043 | * DIOs are done. But our endio handlers that update the on | ||
1044 | * disk i_size never update past the in memory i_size. So we | ||
1045 | * need one more update here to catch any additions to the | ||
1046 | * file | ||
1047 | */ | ||
1048 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
1049 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
1050 | mark_inode_dirty(inode); | ||
1099 | } | 1051 | } |
1052 | |||
1053 | if (written < 0 || written == count) | ||
1054 | return written; | ||
1055 | |||
1056 | pos += written; | ||
1057 | count -= written; | ||
1058 | iov_iter_init(&i, iov, nr_segs, count, written); | ||
1059 | written_buffered = __btrfs_buffered_write(file, &i, pos); | ||
1060 | if (written_buffered < 0) { | ||
1061 | err = written_buffered; | ||
1062 | goto out; | ||
1063 | } | ||
1064 | endbyte = pos + written_buffered - 1; | ||
1065 | err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); | ||
1066 | if (err) | ||
1067 | goto out; | ||
1068 | written += written_buffered; | ||
1069 | *ppos = pos + written_buffered; | ||
1070 | invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT, | ||
1071 | endbyte >> PAGE_CACHE_SHIFT); | ||
1100 | out: | 1072 | out: |
1101 | mutex_unlock(&inode->i_mutex); | 1073 | return written ? written : err; |
1102 | if (ret) | 1074 | } |
1103 | err = ret; | ||
1104 | 1075 | ||
1105 | kfree(pages); | 1076 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
1106 | *ppos = pos; | 1077 | const struct iovec *iov, |
1078 | unsigned long nr_segs, loff_t pos) | ||
1079 | { | ||
1080 | struct file *file = iocb->ki_filp; | ||
1081 | struct inode *inode = fdentry(file)->d_inode; | ||
1082 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1083 | loff_t *ppos = &iocb->ki_pos; | ||
1084 | ssize_t num_written = 0; | ||
1085 | ssize_t err = 0; | ||
1086 | size_t count, ocount; | ||
1087 | |||
1088 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
1089 | |||
1090 | mutex_lock(&inode->i_mutex); | ||
1091 | |||
1092 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
1093 | if (err) { | ||
1094 | mutex_unlock(&inode->i_mutex); | ||
1095 | goto out; | ||
1096 | } | ||
1097 | count = ocount; | ||
1098 | |||
1099 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | ||
1100 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | ||
1101 | if (err) { | ||
1102 | mutex_unlock(&inode->i_mutex); | ||
1103 | goto out; | ||
1104 | } | ||
1105 | |||
1106 | if (count == 0) { | ||
1107 | mutex_unlock(&inode->i_mutex); | ||
1108 | goto out; | ||
1109 | } | ||
1110 | |||
1111 | err = file_remove_suid(file); | ||
1112 | if (err) { | ||
1113 | mutex_unlock(&inode->i_mutex); | ||
1114 | goto out; | ||
1115 | } | ||
1116 | |||
1117 | /* | ||
1118 | * If BTRFS flips readonly due to some impossible error | ||
1119 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | ||
1120 | * although we have opened a file as writable, we have | ||
1121 | * to stop this write operation to ensure FS consistency. | ||
1122 | */ | ||
1123 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
1124 | mutex_unlock(&inode->i_mutex); | ||
1125 | err = -EROFS; | ||
1126 | goto out; | ||
1127 | } | ||
1128 | |||
1129 | file_update_time(file); | ||
1130 | BTRFS_I(inode)->sequence++; | ||
1131 | |||
1132 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
1133 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | ||
1134 | pos, ppos, count, ocount); | ||
1135 | } else { | ||
1136 | struct iov_iter i; | ||
1137 | |||
1138 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
1139 | |||
1140 | num_written = __btrfs_buffered_write(file, &i, pos); | ||
1141 | if (num_written > 0) | ||
1142 | *ppos = pos + num_written; | ||
1143 | } | ||
1144 | |||
1145 | mutex_unlock(&inode->i_mutex); | ||
1107 | 1146 | ||
1108 | /* | 1147 | /* |
1109 | * we want to make sure fsync finds this change | 1148 | * we want to make sure fsync finds this change |
@@ -1118,43 +1157,12 @@ out: | |||
1118 | * one running right now. | 1157 | * one running right now. |
1119 | */ | 1158 | */ |
1120 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | 1159 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; |
1121 | 1160 | if (num_written > 0 || num_written == -EIOCBQUEUED) { | |
1122 | if (num_written > 0 && will_write) { | 1161 | err = generic_write_sync(file, pos, num_written); |
1123 | struct btrfs_trans_handle *trans; | 1162 | if (err < 0 && num_written > 0) |
1124 | |||
1125 | err = btrfs_wait_ordered_range(inode, start_pos, num_written); | ||
1126 | if (err) | ||
1127 | num_written = err; | 1163 | num_written = err; |
1128 | |||
1129 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | ||
1130 | trans = btrfs_start_transaction(root, 0); | ||
1131 | if (IS_ERR(trans)) { | ||
1132 | num_written = PTR_ERR(trans); | ||
1133 | goto done; | ||
1134 | } | ||
1135 | mutex_lock(&inode->i_mutex); | ||
1136 | ret = btrfs_log_dentry_safe(trans, root, | ||
1137 | file->f_dentry); | ||
1138 | mutex_unlock(&inode->i_mutex); | ||
1139 | if (ret == 0) { | ||
1140 | ret = btrfs_sync_log(trans, root); | ||
1141 | if (ret == 0) | ||
1142 | btrfs_end_transaction(trans, root); | ||
1143 | else | ||
1144 | btrfs_commit_transaction(trans, root); | ||
1145 | } else if (ret != BTRFS_NO_LOG_SYNC) { | ||
1146 | btrfs_commit_transaction(trans, root); | ||
1147 | } else { | ||
1148 | btrfs_end_transaction(trans, root); | ||
1149 | } | ||
1150 | } | ||
1151 | if (file->f_flags & O_DIRECT && buffered) { | ||
1152 | invalidate_mapping_pages(inode->i_mapping, | ||
1153 | start_pos >> PAGE_CACHE_SHIFT, | ||
1154 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | ||
1155 | } | ||
1156 | } | 1164 | } |
1157 | done: | 1165 | out: |
1158 | current->backing_dev_info = NULL; | 1166 | current->backing_dev_info = NULL; |
1159 | return num_written ? num_written : err; | 1167 | return num_written ? num_written : err; |
1160 | } | 1168 | } |
@@ -1197,6 +1205,7 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1197 | int ret = 0; | 1205 | int ret = 0; |
1198 | struct btrfs_trans_handle *trans; | 1206 | struct btrfs_trans_handle *trans; |
1199 | 1207 | ||
1208 | trace_btrfs_sync_file(file, datasync); | ||
1200 | 1209 | ||
1201 | /* we wait first, since the writeback may change the inode */ | 1210 | /* we wait first, since the writeback may change the inode */ |
1202 | root->log_batch++; | 1211 | root->log_batch++; |
@@ -1324,7 +1333,8 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1324 | goto out; | 1333 | goto out; |
1325 | 1334 | ||
1326 | if (alloc_start > inode->i_size) { | 1335 | if (alloc_start > inode->i_size) { |
1327 | ret = btrfs_cont_expand(inode, alloc_start); | 1336 | ret = btrfs_cont_expand(inode, i_size_read(inode), |
1337 | alloc_start); | ||
1328 | if (ret) | 1338 | if (ret) |
1329 | goto out; | 1339 | goto out; |
1330 | } | 1340 | } |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index a0390657451b..0037427d8a9d 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -393,7 +393,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
393 | break; | 393 | break; |
394 | 394 | ||
395 | need_loop = 1; | 395 | need_loop = 1; |
396 | e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | 396 | e = kmem_cache_zalloc(btrfs_free_space_cachep, |
397 | GFP_NOFS); | ||
397 | if (!e) { | 398 | if (!e) { |
398 | kunmap(page); | 399 | kunmap(page); |
399 | unlock_page(page); | 400 | unlock_page(page); |
@@ -405,7 +406,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
405 | e->bytes = le64_to_cpu(entry->bytes); | 406 | e->bytes = le64_to_cpu(entry->bytes); |
406 | if (!e->bytes) { | 407 | if (!e->bytes) { |
407 | kunmap(page); | 408 | kunmap(page); |
408 | kfree(e); | 409 | kmem_cache_free(btrfs_free_space_cachep, e); |
409 | unlock_page(page); | 410 | unlock_page(page); |
410 | page_cache_release(page); | 411 | page_cache_release(page); |
411 | goto free_cache; | 412 | goto free_cache; |
@@ -420,7 +421,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
420 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 421 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
421 | if (!e->bitmap) { | 422 | if (!e->bitmap) { |
422 | kunmap(page); | 423 | kunmap(page); |
423 | kfree(e); | 424 | kmem_cache_free( |
425 | btrfs_free_space_cachep, e); | ||
424 | unlock_page(page); | 426 | unlock_page(page); |
425 | page_cache_release(page); | 427 | page_cache_release(page); |
426 | goto free_cache; | 428 | goto free_cache; |
@@ -1187,7 +1189,7 @@ static void free_bitmap(struct btrfs_block_group_cache *block_group, | |||
1187 | { | 1189 | { |
1188 | unlink_free_space(block_group, bitmap_info); | 1190 | unlink_free_space(block_group, bitmap_info); |
1189 | kfree(bitmap_info->bitmap); | 1191 | kfree(bitmap_info->bitmap); |
1190 | kfree(bitmap_info); | 1192 | kmem_cache_free(btrfs_free_space_cachep, bitmap_info); |
1191 | block_group->total_bitmaps--; | 1193 | block_group->total_bitmaps--; |
1192 | recalculate_thresholds(block_group); | 1194 | recalculate_thresholds(block_group); |
1193 | } | 1195 | } |
@@ -1285,9 +1287,22 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, | |||
1285 | * If we are below the extents threshold then we can add this as an | 1287 | * If we are below the extents threshold then we can add this as an |
1286 | * extent, and don't have to deal with the bitmap | 1288 | * extent, and don't have to deal with the bitmap |
1287 | */ | 1289 | */ |
1288 | if (block_group->free_extents < block_group->extents_thresh && | 1290 | if (block_group->free_extents < block_group->extents_thresh) { |
1289 | info->bytes > block_group->sectorsize * 4) | 1291 | /* |
1290 | return 0; | 1292 | * If this block group has some small extents we don't want to |
1293 | * use up all of our free slots in the cache with them, we want | ||
1294 | * to reserve them to larger extents, however if we have plent | ||
1295 | * of cache left then go ahead an dadd them, no sense in adding | ||
1296 | * the overhead of a bitmap if we don't have to. | ||
1297 | */ | ||
1298 | if (info->bytes <= block_group->sectorsize * 4) { | ||
1299 | if (block_group->free_extents * 2 <= | ||
1300 | block_group->extents_thresh) | ||
1301 | return 0; | ||
1302 | } else { | ||
1303 | return 0; | ||
1304 | } | ||
1305 | } | ||
1291 | 1306 | ||
1292 | /* | 1307 | /* |
1293 | * some block groups are so tiny they can't be enveloped by a bitmap, so | 1308 | * some block groups are so tiny they can't be enveloped by a bitmap, so |
@@ -1342,8 +1357,8 @@ new_bitmap: | |||
1342 | 1357 | ||
1343 | /* no pre-allocated info, allocate a new one */ | 1358 | /* no pre-allocated info, allocate a new one */ |
1344 | if (!info) { | 1359 | if (!info) { |
1345 | info = kzalloc(sizeof(struct btrfs_free_space), | 1360 | info = kmem_cache_zalloc(btrfs_free_space_cachep, |
1346 | GFP_NOFS); | 1361 | GFP_NOFS); |
1347 | if (!info) { | 1362 | if (!info) { |
1348 | spin_lock(&block_group->tree_lock); | 1363 | spin_lock(&block_group->tree_lock); |
1349 | ret = -ENOMEM; | 1364 | ret = -ENOMEM; |
@@ -1365,7 +1380,7 @@ out: | |||
1365 | if (info) { | 1380 | if (info) { |
1366 | if (info->bitmap) | 1381 | if (info->bitmap) |
1367 | kfree(info->bitmap); | 1382 | kfree(info->bitmap); |
1368 | kfree(info); | 1383 | kmem_cache_free(btrfs_free_space_cachep, info); |
1369 | } | 1384 | } |
1370 | 1385 | ||
1371 | return ret; | 1386 | return ret; |
@@ -1398,7 +1413,7 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group, | |||
1398 | else | 1413 | else |
1399 | __unlink_free_space(block_group, right_info); | 1414 | __unlink_free_space(block_group, right_info); |
1400 | info->bytes += right_info->bytes; | 1415 | info->bytes += right_info->bytes; |
1401 | kfree(right_info); | 1416 | kmem_cache_free(btrfs_free_space_cachep, right_info); |
1402 | merged = true; | 1417 | merged = true; |
1403 | } | 1418 | } |
1404 | 1419 | ||
@@ -1410,7 +1425,7 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group, | |||
1410 | __unlink_free_space(block_group, left_info); | 1425 | __unlink_free_space(block_group, left_info); |
1411 | info->offset = left_info->offset; | 1426 | info->offset = left_info->offset; |
1412 | info->bytes += left_info->bytes; | 1427 | info->bytes += left_info->bytes; |
1413 | kfree(left_info); | 1428 | kmem_cache_free(btrfs_free_space_cachep, left_info); |
1414 | merged = true; | 1429 | merged = true; |
1415 | } | 1430 | } |
1416 | 1431 | ||
@@ -1423,7 +1438,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | |||
1423 | struct btrfs_free_space *info; | 1438 | struct btrfs_free_space *info; |
1424 | int ret = 0; | 1439 | int ret = 0; |
1425 | 1440 | ||
1426 | info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | 1441 | info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); |
1427 | if (!info) | 1442 | if (!info) |
1428 | return -ENOMEM; | 1443 | return -ENOMEM; |
1429 | 1444 | ||
@@ -1450,7 +1465,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | |||
1450 | link: | 1465 | link: |
1451 | ret = link_free_space(block_group, info); | 1466 | ret = link_free_space(block_group, info); |
1452 | if (ret) | 1467 | if (ret) |
1453 | kfree(info); | 1468 | kmem_cache_free(btrfs_free_space_cachep, info); |
1454 | out: | 1469 | out: |
1455 | spin_unlock(&block_group->tree_lock); | 1470 | spin_unlock(&block_group->tree_lock); |
1456 | 1471 | ||
@@ -1520,7 +1535,7 @@ again: | |||
1520 | kfree(info->bitmap); | 1535 | kfree(info->bitmap); |
1521 | block_group->total_bitmaps--; | 1536 | block_group->total_bitmaps--; |
1522 | } | 1537 | } |
1523 | kfree(info); | 1538 | kmem_cache_free(btrfs_free_space_cachep, info); |
1524 | goto out_lock; | 1539 | goto out_lock; |
1525 | } | 1540 | } |
1526 | 1541 | ||
@@ -1556,7 +1571,7 @@ again: | |||
1556 | /* the hole we're creating ends at the end | 1571 | /* the hole we're creating ends at the end |
1557 | * of the info struct, just free the info | 1572 | * of the info struct, just free the info |
1558 | */ | 1573 | */ |
1559 | kfree(info); | 1574 | kmem_cache_free(btrfs_free_space_cachep, info); |
1560 | } | 1575 | } |
1561 | spin_unlock(&block_group->tree_lock); | 1576 | spin_unlock(&block_group->tree_lock); |
1562 | 1577 | ||
@@ -1629,30 +1644,28 @@ __btrfs_return_cluster_to_free_space( | |||
1629 | { | 1644 | { |
1630 | struct btrfs_free_space *entry; | 1645 | struct btrfs_free_space *entry; |
1631 | struct rb_node *node; | 1646 | struct rb_node *node; |
1632 | bool bitmap; | ||
1633 | 1647 | ||
1634 | spin_lock(&cluster->lock); | 1648 | spin_lock(&cluster->lock); |
1635 | if (cluster->block_group != block_group) | 1649 | if (cluster->block_group != block_group) |
1636 | goto out; | 1650 | goto out; |
1637 | 1651 | ||
1638 | bitmap = cluster->points_to_bitmap; | ||
1639 | cluster->block_group = NULL; | 1652 | cluster->block_group = NULL; |
1640 | cluster->window_start = 0; | 1653 | cluster->window_start = 0; |
1641 | list_del_init(&cluster->block_group_list); | 1654 | list_del_init(&cluster->block_group_list); |
1642 | cluster->points_to_bitmap = false; | ||
1643 | |||
1644 | if (bitmap) | ||
1645 | goto out; | ||
1646 | 1655 | ||
1647 | node = rb_first(&cluster->root); | 1656 | node = rb_first(&cluster->root); |
1648 | while (node) { | 1657 | while (node) { |
1658 | bool bitmap; | ||
1659 | |||
1649 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 1660 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
1650 | node = rb_next(&entry->offset_index); | 1661 | node = rb_next(&entry->offset_index); |
1651 | rb_erase(&entry->offset_index, &cluster->root); | 1662 | rb_erase(&entry->offset_index, &cluster->root); |
1652 | BUG_ON(entry->bitmap); | 1663 | |
1653 | try_merge_free_space(block_group, entry, false); | 1664 | bitmap = (entry->bitmap != NULL); |
1665 | if (!bitmap) | ||
1666 | try_merge_free_space(block_group, entry, false); | ||
1654 | tree_insert_offset(&block_group->free_space_offset, | 1667 | tree_insert_offset(&block_group->free_space_offset, |
1655 | entry->offset, &entry->offset_index, 0); | 1668 | entry->offset, &entry->offset_index, bitmap); |
1656 | } | 1669 | } |
1657 | cluster->root = RB_ROOT; | 1670 | cluster->root = RB_ROOT; |
1658 | 1671 | ||
@@ -1689,7 +1702,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | |||
1689 | unlink_free_space(block_group, info); | 1702 | unlink_free_space(block_group, info); |
1690 | if (info->bitmap) | 1703 | if (info->bitmap) |
1691 | kfree(info->bitmap); | 1704 | kfree(info->bitmap); |
1692 | kfree(info); | 1705 | kmem_cache_free(btrfs_free_space_cachep, info); |
1693 | if (need_resched()) { | 1706 | if (need_resched()) { |
1694 | spin_unlock(&block_group->tree_lock); | 1707 | spin_unlock(&block_group->tree_lock); |
1695 | cond_resched(); | 1708 | cond_resched(); |
@@ -1722,7 +1735,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | |||
1722 | entry->offset += bytes; | 1735 | entry->offset += bytes; |
1723 | entry->bytes -= bytes; | 1736 | entry->bytes -= bytes; |
1724 | if (!entry->bytes) | 1737 | if (!entry->bytes) |
1725 | kfree(entry); | 1738 | kmem_cache_free(btrfs_free_space_cachep, entry); |
1726 | else | 1739 | else |
1727 | link_free_space(block_group, entry); | 1740 | link_free_space(block_group, entry); |
1728 | } | 1741 | } |
@@ -1775,50 +1788,24 @@ int btrfs_return_cluster_to_free_space( | |||
1775 | 1788 | ||
1776 | static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, | 1789 | static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, |
1777 | struct btrfs_free_cluster *cluster, | 1790 | struct btrfs_free_cluster *cluster, |
1791 | struct btrfs_free_space *entry, | ||
1778 | u64 bytes, u64 min_start) | 1792 | u64 bytes, u64 min_start) |
1779 | { | 1793 | { |
1780 | struct btrfs_free_space *entry; | ||
1781 | int err; | 1794 | int err; |
1782 | u64 search_start = cluster->window_start; | 1795 | u64 search_start = cluster->window_start; |
1783 | u64 search_bytes = bytes; | 1796 | u64 search_bytes = bytes; |
1784 | u64 ret = 0; | 1797 | u64 ret = 0; |
1785 | 1798 | ||
1786 | spin_lock(&block_group->tree_lock); | ||
1787 | spin_lock(&cluster->lock); | ||
1788 | |||
1789 | if (!cluster->points_to_bitmap) | ||
1790 | goto out; | ||
1791 | |||
1792 | if (cluster->block_group != block_group) | ||
1793 | goto out; | ||
1794 | |||
1795 | /* | ||
1796 | * search_start is the beginning of the bitmap, but at some point it may | ||
1797 | * be a good idea to point to the actual start of the free area in the | ||
1798 | * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only | ||
1799 | * to 1 to make sure we get the bitmap entry | ||
1800 | */ | ||
1801 | entry = tree_search_offset(block_group, | ||
1802 | offset_to_bitmap(block_group, search_start), | ||
1803 | 1, 0); | ||
1804 | if (!entry || !entry->bitmap) | ||
1805 | goto out; | ||
1806 | |||
1807 | search_start = min_start; | 1799 | search_start = min_start; |
1808 | search_bytes = bytes; | 1800 | search_bytes = bytes; |
1809 | 1801 | ||
1810 | err = search_bitmap(block_group, entry, &search_start, | 1802 | err = search_bitmap(block_group, entry, &search_start, |
1811 | &search_bytes); | 1803 | &search_bytes); |
1812 | if (err) | 1804 | if (err) |
1813 | goto out; | 1805 | return 0; |
1814 | 1806 | ||
1815 | ret = search_start; | 1807 | ret = search_start; |
1816 | bitmap_clear_bits(block_group, entry, ret, bytes); | 1808 | bitmap_clear_bits(block_group, entry, ret, bytes); |
1817 | if (entry->bytes == 0) | ||
1818 | free_bitmap(block_group, entry); | ||
1819 | out: | ||
1820 | spin_unlock(&cluster->lock); | ||
1821 | spin_unlock(&block_group->tree_lock); | ||
1822 | 1809 | ||
1823 | return ret; | 1810 | return ret; |
1824 | } | 1811 | } |
@@ -1836,10 +1823,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1836 | struct rb_node *node; | 1823 | struct rb_node *node; |
1837 | u64 ret = 0; | 1824 | u64 ret = 0; |
1838 | 1825 | ||
1839 | if (cluster->points_to_bitmap) | ||
1840 | return btrfs_alloc_from_bitmap(block_group, cluster, bytes, | ||
1841 | min_start); | ||
1842 | |||
1843 | spin_lock(&cluster->lock); | 1826 | spin_lock(&cluster->lock); |
1844 | if (bytes > cluster->max_size) | 1827 | if (bytes > cluster->max_size) |
1845 | goto out; | 1828 | goto out; |
@@ -1852,9 +1835,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1852 | goto out; | 1835 | goto out; |
1853 | 1836 | ||
1854 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 1837 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
1855 | |||
1856 | while(1) { | 1838 | while(1) { |
1857 | if (entry->bytes < bytes || entry->offset < min_start) { | 1839 | if (entry->bytes < bytes || |
1840 | (!entry->bitmap && entry->offset < min_start)) { | ||
1858 | struct rb_node *node; | 1841 | struct rb_node *node; |
1859 | 1842 | ||
1860 | node = rb_next(&entry->offset_index); | 1843 | node = rb_next(&entry->offset_index); |
@@ -1864,10 +1847,27 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1864 | offset_index); | 1847 | offset_index); |
1865 | continue; | 1848 | continue; |
1866 | } | 1849 | } |
1867 | ret = entry->offset; | ||
1868 | 1850 | ||
1869 | entry->offset += bytes; | 1851 | if (entry->bitmap) { |
1870 | entry->bytes -= bytes; | 1852 | ret = btrfs_alloc_from_bitmap(block_group, |
1853 | cluster, entry, bytes, | ||
1854 | min_start); | ||
1855 | if (ret == 0) { | ||
1856 | struct rb_node *node; | ||
1857 | node = rb_next(&entry->offset_index); | ||
1858 | if (!node) | ||
1859 | break; | ||
1860 | entry = rb_entry(node, struct btrfs_free_space, | ||
1861 | offset_index); | ||
1862 | continue; | ||
1863 | } | ||
1864 | } else { | ||
1865 | |||
1866 | ret = entry->offset; | ||
1867 | |||
1868 | entry->offset += bytes; | ||
1869 | entry->bytes -= bytes; | ||
1870 | } | ||
1871 | 1871 | ||
1872 | if (entry->bytes == 0) | 1872 | if (entry->bytes == 0) |
1873 | rb_erase(&entry->offset_index, &cluster->root); | 1873 | rb_erase(&entry->offset_index, &cluster->root); |
@@ -1884,7 +1884,12 @@ out: | |||
1884 | block_group->free_space -= bytes; | 1884 | block_group->free_space -= bytes; |
1885 | if (entry->bytes == 0) { | 1885 | if (entry->bytes == 0) { |
1886 | block_group->free_extents--; | 1886 | block_group->free_extents--; |
1887 | kfree(entry); | 1887 | if (entry->bitmap) { |
1888 | kfree(entry->bitmap); | ||
1889 | block_group->total_bitmaps--; | ||
1890 | recalculate_thresholds(block_group); | ||
1891 | } | ||
1892 | kmem_cache_free(btrfs_free_space_cachep, entry); | ||
1888 | } | 1893 | } |
1889 | 1894 | ||
1890 | spin_unlock(&block_group->tree_lock); | 1895 | spin_unlock(&block_group->tree_lock); |
@@ -1904,12 +1909,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, | |||
1904 | unsigned long found_bits; | 1909 | unsigned long found_bits; |
1905 | unsigned long start = 0; | 1910 | unsigned long start = 0; |
1906 | unsigned long total_found = 0; | 1911 | unsigned long total_found = 0; |
1912 | int ret; | ||
1907 | bool found = false; | 1913 | bool found = false; |
1908 | 1914 | ||
1909 | i = offset_to_bit(entry->offset, block_group->sectorsize, | 1915 | i = offset_to_bit(entry->offset, block_group->sectorsize, |
1910 | max_t(u64, offset, entry->offset)); | 1916 | max_t(u64, offset, entry->offset)); |
1911 | search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); | 1917 | search_bits = bytes_to_bits(bytes, block_group->sectorsize); |
1912 | total_bits = bytes_to_bits(bytes, block_group->sectorsize); | 1918 | total_bits = bytes_to_bits(min_bytes, block_group->sectorsize); |
1913 | 1919 | ||
1914 | again: | 1920 | again: |
1915 | found_bits = 0; | 1921 | found_bits = 0; |
@@ -1926,7 +1932,7 @@ again: | |||
1926 | } | 1932 | } |
1927 | 1933 | ||
1928 | if (!found_bits) | 1934 | if (!found_bits) |
1929 | return -1; | 1935 | return -ENOSPC; |
1930 | 1936 | ||
1931 | if (!found) { | 1937 | if (!found) { |
1932 | start = i; | 1938 | start = i; |
@@ -1950,189 +1956,208 @@ again: | |||
1950 | 1956 | ||
1951 | cluster->window_start = start * block_group->sectorsize + | 1957 | cluster->window_start = start * block_group->sectorsize + |
1952 | entry->offset; | 1958 | entry->offset; |
1953 | cluster->points_to_bitmap = true; | 1959 | rb_erase(&entry->offset_index, &block_group->free_space_offset); |
1960 | ret = tree_insert_offset(&cluster->root, entry->offset, | ||
1961 | &entry->offset_index, 1); | ||
1962 | BUG_ON(ret); | ||
1954 | 1963 | ||
1955 | return 0; | 1964 | return 0; |
1956 | } | 1965 | } |
1957 | 1966 | ||
1958 | /* | 1967 | /* |
1959 | * here we try to find a cluster of blocks in a block group. The goal | 1968 | * This searches the block group for just extents to fill the cluster with. |
1960 | * is to find at least bytes free and up to empty_size + bytes free. | ||
1961 | * We might not find them all in one contiguous area. | ||
1962 | * | ||
1963 | * returns zero and sets up cluster if things worked out, otherwise | ||
1964 | * it returns -enospc | ||
1965 | */ | 1969 | */ |
1966 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | 1970 | static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, |
1967 | struct btrfs_root *root, | 1971 | struct btrfs_free_cluster *cluster, |
1968 | struct btrfs_block_group_cache *block_group, | 1972 | u64 offset, u64 bytes, u64 min_bytes) |
1969 | struct btrfs_free_cluster *cluster, | ||
1970 | u64 offset, u64 bytes, u64 empty_size) | ||
1971 | { | 1973 | { |
1974 | struct btrfs_free_space *first = NULL; | ||
1972 | struct btrfs_free_space *entry = NULL; | 1975 | struct btrfs_free_space *entry = NULL; |
1976 | struct btrfs_free_space *prev = NULL; | ||
1977 | struct btrfs_free_space *last; | ||
1973 | struct rb_node *node; | 1978 | struct rb_node *node; |
1974 | struct btrfs_free_space *next; | ||
1975 | struct btrfs_free_space *last = NULL; | ||
1976 | u64 min_bytes; | ||
1977 | u64 window_start; | 1979 | u64 window_start; |
1978 | u64 window_free; | 1980 | u64 window_free; |
1979 | u64 max_extent = 0; | 1981 | u64 max_extent; |
1980 | bool found_bitmap = false; | 1982 | u64 max_gap = 128 * 1024; |
1981 | int ret; | ||
1982 | 1983 | ||
1983 | /* for metadata, allow allocates with more holes */ | 1984 | entry = tree_search_offset(block_group, offset, 0, 1); |
1984 | if (btrfs_test_opt(root, SSD_SPREAD)) { | 1985 | if (!entry) |
1985 | min_bytes = bytes + empty_size; | 1986 | return -ENOSPC; |
1986 | } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
1987 | /* | ||
1988 | * we want to do larger allocations when we are | ||
1989 | * flushing out the delayed refs, it helps prevent | ||
1990 | * making more work as we go along. | ||
1991 | */ | ||
1992 | if (trans->transaction->delayed_refs.flushing) | ||
1993 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
1994 | else | ||
1995 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
1996 | } else | ||
1997 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
1998 | |||
1999 | spin_lock(&block_group->tree_lock); | ||
2000 | spin_lock(&cluster->lock); | ||
2001 | |||
2002 | /* someone already found a cluster, hooray */ | ||
2003 | if (cluster->block_group) { | ||
2004 | ret = 0; | ||
2005 | goto out; | ||
2006 | } | ||
2007 | again: | ||
2008 | entry = tree_search_offset(block_group, offset, found_bitmap, 1); | ||
2009 | if (!entry) { | ||
2010 | ret = -ENOSPC; | ||
2011 | goto out; | ||
2012 | } | ||
2013 | 1987 | ||
2014 | /* | 1988 | /* |
2015 | * If found_bitmap is true, we exhausted our search for extent entries, | 1989 | * We don't want bitmaps, so just move along until we find a normal |
2016 | * and we just want to search all of the bitmaps that we can find, and | 1990 | * extent entry. |
2017 | * ignore any extent entries we find. | ||
2018 | */ | 1991 | */ |
2019 | while (entry->bitmap || found_bitmap || | 1992 | while (entry->bitmap) { |
2020 | (!entry->bitmap && entry->bytes < min_bytes)) { | 1993 | node = rb_next(&entry->offset_index); |
2021 | struct rb_node *node = rb_next(&entry->offset_index); | 1994 | if (!node) |
2022 | 1995 | return -ENOSPC; | |
2023 | if (entry->bitmap && entry->bytes > bytes + empty_size) { | ||
2024 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, | ||
2025 | offset, bytes + empty_size, | ||
2026 | min_bytes); | ||
2027 | if (!ret) | ||
2028 | goto got_it; | ||
2029 | } | ||
2030 | |||
2031 | if (!node) { | ||
2032 | ret = -ENOSPC; | ||
2033 | goto out; | ||
2034 | } | ||
2035 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 1996 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
2036 | } | 1997 | } |
2037 | 1998 | ||
2038 | /* | ||
2039 | * We already searched all the extent entries from the passed in offset | ||
2040 | * to the end and didn't find enough space for the cluster, and we also | ||
2041 | * didn't find any bitmaps that met our criteria, just go ahead and exit | ||
2042 | */ | ||
2043 | if (found_bitmap) { | ||
2044 | ret = -ENOSPC; | ||
2045 | goto out; | ||
2046 | } | ||
2047 | |||
2048 | cluster->points_to_bitmap = false; | ||
2049 | window_start = entry->offset; | 1999 | window_start = entry->offset; |
2050 | window_free = entry->bytes; | 2000 | window_free = entry->bytes; |
2051 | last = entry; | ||
2052 | max_extent = entry->bytes; | 2001 | max_extent = entry->bytes; |
2002 | first = entry; | ||
2003 | last = entry; | ||
2004 | prev = entry; | ||
2053 | 2005 | ||
2054 | while (1) { | 2006 | while (window_free <= min_bytes) { |
2055 | /* out window is just right, lets fill it */ | 2007 | node = rb_next(&entry->offset_index); |
2056 | if (window_free >= bytes + empty_size) | 2008 | if (!node) |
2057 | break; | 2009 | return -ENOSPC; |
2058 | 2010 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | |
2059 | node = rb_next(&last->offset_index); | ||
2060 | if (!node) { | ||
2061 | if (found_bitmap) | ||
2062 | goto again; | ||
2063 | ret = -ENOSPC; | ||
2064 | goto out; | ||
2065 | } | ||
2066 | next = rb_entry(node, struct btrfs_free_space, offset_index); | ||
2067 | 2011 | ||
2068 | /* | 2012 | if (entry->bitmap) |
2069 | * we found a bitmap, so if this search doesn't result in a | ||
2070 | * cluster, we know to go and search again for the bitmaps and | ||
2071 | * start looking for space there | ||
2072 | */ | ||
2073 | if (next->bitmap) { | ||
2074 | if (!found_bitmap) | ||
2075 | offset = next->offset; | ||
2076 | found_bitmap = true; | ||
2077 | last = next; | ||
2078 | continue; | 2013 | continue; |
2079 | } | ||
2080 | |||
2081 | /* | 2014 | /* |
2082 | * we haven't filled the empty size and the window is | 2015 | * we haven't filled the empty size and the window is |
2083 | * very large. reset and try again | 2016 | * very large. reset and try again |
2084 | */ | 2017 | */ |
2085 | if (next->offset - (last->offset + last->bytes) > 128 * 1024 || | 2018 | if (entry->offset - (prev->offset + prev->bytes) > max_gap || |
2086 | next->offset - window_start > (bytes + empty_size) * 2) { | 2019 | entry->offset - window_start > (min_bytes * 2)) { |
2087 | entry = next; | 2020 | first = entry; |
2088 | window_start = entry->offset; | 2021 | window_start = entry->offset; |
2089 | window_free = entry->bytes; | 2022 | window_free = entry->bytes; |
2090 | last = entry; | 2023 | last = entry; |
2091 | max_extent = entry->bytes; | 2024 | max_extent = entry->bytes; |
2092 | } else { | 2025 | } else { |
2093 | last = next; | 2026 | last = entry; |
2094 | window_free += next->bytes; | 2027 | window_free += entry->bytes; |
2095 | if (entry->bytes > max_extent) | 2028 | if (entry->bytes > max_extent) |
2096 | max_extent = entry->bytes; | 2029 | max_extent = entry->bytes; |
2097 | } | 2030 | } |
2031 | prev = entry; | ||
2098 | } | 2032 | } |
2099 | 2033 | ||
2100 | cluster->window_start = entry->offset; | 2034 | cluster->window_start = first->offset; |
2035 | |||
2036 | node = &first->offset_index; | ||
2101 | 2037 | ||
2102 | /* | 2038 | /* |
2103 | * now we've found our entries, pull them out of the free space | 2039 | * now we've found our entries, pull them out of the free space |
2104 | * cache and put them into the cluster rbtree | 2040 | * cache and put them into the cluster rbtree |
2105 | * | ||
2106 | * The cluster includes an rbtree, but only uses the offset index | ||
2107 | * of each free space cache entry. | ||
2108 | */ | 2041 | */ |
2109 | while (1) { | 2042 | do { |
2043 | int ret; | ||
2044 | |||
2045 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
2110 | node = rb_next(&entry->offset_index); | 2046 | node = rb_next(&entry->offset_index); |
2111 | if (entry->bitmap && node) { | 2047 | if (entry->bitmap) |
2112 | entry = rb_entry(node, struct btrfs_free_space, | ||
2113 | offset_index); | ||
2114 | continue; | 2048 | continue; |
2115 | } else if (entry->bitmap && !node) { | ||
2116 | break; | ||
2117 | } | ||
2118 | 2049 | ||
2119 | rb_erase(&entry->offset_index, &block_group->free_space_offset); | 2050 | rb_erase(&entry->offset_index, &block_group->free_space_offset); |
2120 | ret = tree_insert_offset(&cluster->root, entry->offset, | 2051 | ret = tree_insert_offset(&cluster->root, entry->offset, |
2121 | &entry->offset_index, 0); | 2052 | &entry->offset_index, 0); |
2122 | BUG_ON(ret); | 2053 | BUG_ON(ret); |
2054 | } while (node && entry != last); | ||
2123 | 2055 | ||
2124 | if (!node || entry == last) | 2056 | cluster->max_size = max_extent; |
2125 | break; | 2057 | |
2058 | return 0; | ||
2059 | } | ||
2060 | |||
2061 | /* | ||
2062 | * This specifically looks for bitmaps that may work in the cluster, we assume | ||
2063 | * that we have already failed to find extents that will work. | ||
2064 | */ | ||
2065 | static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, | ||
2066 | struct btrfs_free_cluster *cluster, | ||
2067 | u64 offset, u64 bytes, u64 min_bytes) | ||
2068 | { | ||
2069 | struct btrfs_free_space *entry; | ||
2070 | struct rb_node *node; | ||
2071 | int ret = -ENOSPC; | ||
2072 | |||
2073 | if (block_group->total_bitmaps == 0) | ||
2074 | return -ENOSPC; | ||
2126 | 2075 | ||
2076 | entry = tree_search_offset(block_group, | ||
2077 | offset_to_bitmap(block_group, offset), | ||
2078 | 0, 1); | ||
2079 | if (!entry) | ||
2080 | return -ENOSPC; | ||
2081 | |||
2082 | node = &entry->offset_index; | ||
2083 | do { | ||
2127 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2084 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
2085 | node = rb_next(&entry->offset_index); | ||
2086 | if (!entry->bitmap) | ||
2087 | continue; | ||
2088 | if (entry->bytes < min_bytes) | ||
2089 | continue; | ||
2090 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, | ||
2091 | bytes, min_bytes); | ||
2092 | } while (ret && node); | ||
2093 | |||
2094 | return ret; | ||
2095 | } | ||
2096 | |||
2097 | /* | ||
2098 | * here we try to find a cluster of blocks in a block group. The goal | ||
2099 | * is to find at least bytes free and up to empty_size + bytes free. | ||
2100 | * We might not find them all in one contiguous area. | ||
2101 | * | ||
2102 | * returns zero and sets up cluster if things worked out, otherwise | ||
2103 | * it returns -enospc | ||
2104 | */ | ||
2105 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | ||
2106 | struct btrfs_root *root, | ||
2107 | struct btrfs_block_group_cache *block_group, | ||
2108 | struct btrfs_free_cluster *cluster, | ||
2109 | u64 offset, u64 bytes, u64 empty_size) | ||
2110 | { | ||
2111 | u64 min_bytes; | ||
2112 | int ret; | ||
2113 | |||
2114 | /* for metadata, allow allocates with more holes */ | ||
2115 | if (btrfs_test_opt(root, SSD_SPREAD)) { | ||
2116 | min_bytes = bytes + empty_size; | ||
2117 | } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
2118 | /* | ||
2119 | * we want to do larger allocations when we are | ||
2120 | * flushing out the delayed refs, it helps prevent | ||
2121 | * making more work as we go along. | ||
2122 | */ | ||
2123 | if (trans->transaction->delayed_refs.flushing) | ||
2124 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
2125 | else | ||
2126 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
2127 | } else | ||
2128 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
2129 | |||
2130 | spin_lock(&block_group->tree_lock); | ||
2131 | |||
2132 | /* | ||
2133 | * If we know we don't have enough space to make a cluster don't even | ||
2134 | * bother doing all the work to try and find one. | ||
2135 | */ | ||
2136 | if (block_group->free_space < min_bytes) { | ||
2137 | spin_unlock(&block_group->tree_lock); | ||
2138 | return -ENOSPC; | ||
2128 | } | 2139 | } |
2129 | 2140 | ||
2130 | cluster->max_size = max_extent; | 2141 | spin_lock(&cluster->lock); |
2131 | got_it: | 2142 | |
2132 | ret = 0; | 2143 | /* someone already found a cluster, hooray */ |
2133 | atomic_inc(&block_group->count); | 2144 | if (cluster->block_group) { |
2134 | list_add_tail(&cluster->block_group_list, &block_group->cluster_list); | 2145 | ret = 0; |
2135 | cluster->block_group = block_group; | 2146 | goto out; |
2147 | } | ||
2148 | |||
2149 | ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes, | ||
2150 | min_bytes); | ||
2151 | if (ret) | ||
2152 | ret = setup_cluster_bitmap(block_group, cluster, offset, | ||
2153 | bytes, min_bytes); | ||
2154 | |||
2155 | if (!ret) { | ||
2156 | atomic_inc(&block_group->count); | ||
2157 | list_add_tail(&cluster->block_group_list, | ||
2158 | &block_group->cluster_list); | ||
2159 | cluster->block_group = block_group; | ||
2160 | } | ||
2136 | out: | 2161 | out: |
2137 | spin_unlock(&cluster->lock); | 2162 | spin_unlock(&cluster->lock); |
2138 | spin_unlock(&block_group->tree_lock); | 2163 | spin_unlock(&block_group->tree_lock); |
@@ -2149,8 +2174,99 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | |||
2149 | spin_lock_init(&cluster->refill_lock); | 2174 | spin_lock_init(&cluster->refill_lock); |
2150 | cluster->root = RB_ROOT; | 2175 | cluster->root = RB_ROOT; |
2151 | cluster->max_size = 0; | 2176 | cluster->max_size = 0; |
2152 | cluster->points_to_bitmap = false; | ||
2153 | INIT_LIST_HEAD(&cluster->block_group_list); | 2177 | INIT_LIST_HEAD(&cluster->block_group_list); |
2154 | cluster->block_group = NULL; | 2178 | cluster->block_group = NULL; |
2155 | } | 2179 | } |
2156 | 2180 | ||
2181 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | ||
2182 | u64 *trimmed, u64 start, u64 end, u64 minlen) | ||
2183 | { | ||
2184 | struct btrfs_free_space *entry = NULL; | ||
2185 | struct btrfs_fs_info *fs_info = block_group->fs_info; | ||
2186 | u64 bytes = 0; | ||
2187 | u64 actually_trimmed; | ||
2188 | int ret = 0; | ||
2189 | |||
2190 | *trimmed = 0; | ||
2191 | |||
2192 | while (start < end) { | ||
2193 | spin_lock(&block_group->tree_lock); | ||
2194 | |||
2195 | if (block_group->free_space < minlen) { | ||
2196 | spin_unlock(&block_group->tree_lock); | ||
2197 | break; | ||
2198 | } | ||
2199 | |||
2200 | entry = tree_search_offset(block_group, start, 0, 1); | ||
2201 | if (!entry) | ||
2202 | entry = tree_search_offset(block_group, | ||
2203 | offset_to_bitmap(block_group, | ||
2204 | start), | ||
2205 | 1, 1); | ||
2206 | |||
2207 | if (!entry || entry->offset >= end) { | ||
2208 | spin_unlock(&block_group->tree_lock); | ||
2209 | break; | ||
2210 | } | ||
2211 | |||
2212 | if (entry->bitmap) { | ||
2213 | ret = search_bitmap(block_group, entry, &start, &bytes); | ||
2214 | if (!ret) { | ||
2215 | if (start >= end) { | ||
2216 | spin_unlock(&block_group->tree_lock); | ||
2217 | break; | ||
2218 | } | ||
2219 | bytes = min(bytes, end - start); | ||
2220 | bitmap_clear_bits(block_group, entry, | ||
2221 | start, bytes); | ||
2222 | if (entry->bytes == 0) | ||
2223 | free_bitmap(block_group, entry); | ||
2224 | } else { | ||
2225 | start = entry->offset + BITS_PER_BITMAP * | ||
2226 | block_group->sectorsize; | ||
2227 | spin_unlock(&block_group->tree_lock); | ||
2228 | ret = 0; | ||
2229 | continue; | ||
2230 | } | ||
2231 | } else { | ||
2232 | start = entry->offset; | ||
2233 | bytes = min(entry->bytes, end - start); | ||
2234 | unlink_free_space(block_group, entry); | ||
2235 | kfree(entry); | ||
2236 | } | ||
2237 | |||
2238 | spin_unlock(&block_group->tree_lock); | ||
2239 | |||
2240 | if (bytes >= minlen) { | ||
2241 | int update_ret; | ||
2242 | update_ret = btrfs_update_reserved_bytes(block_group, | ||
2243 | bytes, 1, 1); | ||
2244 | |||
2245 | ret = btrfs_error_discard_extent(fs_info->extent_root, | ||
2246 | start, | ||
2247 | bytes, | ||
2248 | &actually_trimmed); | ||
2249 | |||
2250 | btrfs_add_free_space(block_group, | ||
2251 | start, bytes); | ||
2252 | if (!update_ret) | ||
2253 | btrfs_update_reserved_bytes(block_group, | ||
2254 | bytes, 0, 1); | ||
2255 | |||
2256 | if (ret) | ||
2257 | break; | ||
2258 | *trimmed += actually_trimmed; | ||
2259 | } | ||
2260 | start += bytes; | ||
2261 | bytes = 0; | ||
2262 | |||
2263 | if (fatal_signal_pending(current)) { | ||
2264 | ret = -ERESTARTSYS; | ||
2265 | break; | ||
2266 | } | ||
2267 | |||
2268 | cond_resched(); | ||
2269 | } | ||
2270 | |||
2271 | return ret; | ||
2272 | } | ||
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index e49ca5c321b5..65c3b935289f 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h | |||
@@ -68,4 +68,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
68 | int btrfs_return_cluster_to_free_space( | 68 | int btrfs_return_cluster_to_free_space( |
69 | struct btrfs_block_group_cache *block_group, | 69 | struct btrfs_block_group_cache *block_group, |
70 | struct btrfs_free_cluster *cluster); | 70 | struct btrfs_free_cluster *cluster); |
71 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | ||
72 | u64 *trimmed, u64 start, u64 end, u64 minlen); | ||
71 | #endif | 73 | #endif |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index c56eb5909172..c05a08f4c411 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -30,7 +30,8 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) | |||
30 | int slot; | 30 | int slot; |
31 | 31 | ||
32 | path = btrfs_alloc_path(); | 32 | path = btrfs_alloc_path(); |
33 | BUG_ON(!path); | 33 | if (!path) |
34 | return -ENOMEM; | ||
34 | 35 | ||
35 | search_key.objectid = BTRFS_LAST_FREE_OBJECTID; | 36 | search_key.objectid = BTRFS_LAST_FREE_OBJECTID; |
36 | search_key.type = -1; | 37 | search_key.type = -1; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 119520bdb9a5..93c28a1d6bdc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include "tree-log.h" | 50 | #include "tree-log.h" |
51 | #include "compression.h" | 51 | #include "compression.h" |
52 | #include "locking.h" | 52 | #include "locking.h" |
53 | #include "free-space-cache.h" | ||
53 | 54 | ||
54 | struct btrfs_iget_args { | 55 | struct btrfs_iget_args { |
55 | u64 ino; | 56 | u64 ino; |
@@ -70,6 +71,7 @@ static struct kmem_cache *btrfs_inode_cachep; | |||
70 | struct kmem_cache *btrfs_trans_handle_cachep; | 71 | struct kmem_cache *btrfs_trans_handle_cachep; |
71 | struct kmem_cache *btrfs_transaction_cachep; | 72 | struct kmem_cache *btrfs_transaction_cachep; |
72 | struct kmem_cache *btrfs_path_cachep; | 73 | struct kmem_cache *btrfs_path_cachep; |
74 | struct kmem_cache *btrfs_free_space_cachep; | ||
73 | 75 | ||
74 | #define S_SHIFT 12 | 76 | #define S_SHIFT 12 |
75 | static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | 77 | static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { |
@@ -82,7 +84,8 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | |||
82 | [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, | 84 | [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, |
83 | }; | 85 | }; |
84 | 86 | ||
85 | static void btrfs_truncate(struct inode *inode); | 87 | static int btrfs_setsize(struct inode *inode, loff_t newsize); |
88 | static int btrfs_truncate(struct inode *inode); | ||
86 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); | 89 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); |
87 | static noinline int cow_file_range(struct inode *inode, | 90 | static noinline int cow_file_range(struct inode *inode, |
88 | struct page *locked_page, | 91 | struct page *locked_page, |
@@ -288,6 +291,7 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
288 | struct async_extent *async_extent; | 291 | struct async_extent *async_extent; |
289 | 292 | ||
290 | async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); | 293 | async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); |
294 | BUG_ON(!async_extent); | ||
291 | async_extent->start = start; | 295 | async_extent->start = start; |
292 | async_extent->ram_size = ram_size; | 296 | async_extent->ram_size = ram_size; |
293 | async_extent->compressed_size = compressed_size; | 297 | async_extent->compressed_size = compressed_size; |
@@ -382,9 +386,11 @@ again: | |||
382 | */ | 386 | */ |
383 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && | 387 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && |
384 | (btrfs_test_opt(root, COMPRESS) || | 388 | (btrfs_test_opt(root, COMPRESS) || |
385 | (BTRFS_I(inode)->force_compress))) { | 389 | (BTRFS_I(inode)->force_compress) || |
390 | (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { | ||
386 | WARN_ON(pages); | 391 | WARN_ON(pages); |
387 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 392 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
393 | BUG_ON(!pages); | ||
388 | 394 | ||
389 | if (BTRFS_I(inode)->force_compress) | 395 | if (BTRFS_I(inode)->force_compress) |
390 | compress_type = BTRFS_I(inode)->force_compress; | 396 | compress_type = BTRFS_I(inode)->force_compress; |
@@ -1254,7 +1260,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1254 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1260 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1255 | page_started, 0, nr_written); | 1261 | page_started, 0, nr_written); |
1256 | else if (!btrfs_test_opt(root, COMPRESS) && | 1262 | else if (!btrfs_test_opt(root, COMPRESS) && |
1257 | !(BTRFS_I(inode)->force_compress)) | 1263 | !(BTRFS_I(inode)->force_compress) && |
1264 | !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) | ||
1258 | ret = cow_file_range(inode, locked_page, start, end, | 1265 | ret = cow_file_range(inode, locked_page, start, end, |
1259 | page_started, nr_written, 1); | 1266 | page_started, nr_written, 1); |
1260 | else | 1267 | else |
@@ -1461,8 +1468,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1461 | if (bio_flags & EXTENT_BIO_COMPRESSED) { | 1468 | if (bio_flags & EXTENT_BIO_COMPRESSED) { |
1462 | return btrfs_submit_compressed_read(inode, bio, | 1469 | return btrfs_submit_compressed_read(inode, bio, |
1463 | mirror_num, bio_flags); | 1470 | mirror_num, bio_flags); |
1464 | } else if (!skip_sum) | 1471 | } else if (!skip_sum) { |
1465 | btrfs_lookup_bio_sums(root, inode, bio, NULL); | 1472 | ret = btrfs_lookup_bio_sums(root, inode, bio, NULL); |
1473 | if (ret) | ||
1474 | return ret; | ||
1475 | } | ||
1466 | goto mapit; | 1476 | goto mapit; |
1467 | } else if (!skip_sum) { | 1477 | } else if (!skip_sum) { |
1468 | /* csum items have already been cloned */ | 1478 | /* csum items have already been cloned */ |
@@ -1785,6 +1795,8 @@ out: | |||
1785 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | 1795 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
1786 | struct extent_state *state, int uptodate) | 1796 | struct extent_state *state, int uptodate) |
1787 | { | 1797 | { |
1798 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); | ||
1799 | |||
1788 | ClearPagePrivate2(page); | 1800 | ClearPagePrivate2(page); |
1789 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | 1801 | return btrfs_finish_ordered_io(page->mapping->host, start, end); |
1790 | } | 1802 | } |
@@ -1895,10 +1907,10 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1895 | else | 1907 | else |
1896 | rw = READ; | 1908 | rw = READ; |
1897 | 1909 | ||
1898 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | 1910 | ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, |
1899 | failrec->last_mirror, | 1911 | failrec->last_mirror, |
1900 | failrec->bio_flags, 0); | 1912 | failrec->bio_flags, 0); |
1901 | return 0; | 1913 | return ret; |
1902 | } | 1914 | } |
1903 | 1915 | ||
1904 | /* | 1916 | /* |
@@ -2282,7 +2294,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2282 | * this cleans up any orphans that may be left on the list from the last use | 2294 | * this cleans up any orphans that may be left on the list from the last use |
2283 | * of this root. | 2295 | * of this root. |
2284 | */ | 2296 | */ |
2285 | void btrfs_orphan_cleanup(struct btrfs_root *root) | 2297 | int btrfs_orphan_cleanup(struct btrfs_root *root) |
2286 | { | 2298 | { |
2287 | struct btrfs_path *path; | 2299 | struct btrfs_path *path; |
2288 | struct extent_buffer *leaf; | 2300 | struct extent_buffer *leaf; |
@@ -2292,10 +2304,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2292 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | 2304 | int ret = 0, nr_unlink = 0, nr_truncate = 0; |
2293 | 2305 | ||
2294 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) | 2306 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) |
2295 | return; | 2307 | return 0; |
2296 | 2308 | ||
2297 | path = btrfs_alloc_path(); | 2309 | path = btrfs_alloc_path(); |
2298 | BUG_ON(!path); | 2310 | if (!path) { |
2311 | ret = -ENOMEM; | ||
2312 | goto out; | ||
2313 | } | ||
2299 | path->reada = -1; | 2314 | path->reada = -1; |
2300 | 2315 | ||
2301 | key.objectid = BTRFS_ORPHAN_OBJECTID; | 2316 | key.objectid = BTRFS_ORPHAN_OBJECTID; |
@@ -2304,11 +2319,8 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2304 | 2319 | ||
2305 | while (1) { | 2320 | while (1) { |
2306 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 2321 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
2307 | if (ret < 0) { | 2322 | if (ret < 0) |
2308 | printk(KERN_ERR "Error searching slot for orphan: %d" | 2323 | goto out; |
2309 | "\n", ret); | ||
2310 | break; | ||
2311 | } | ||
2312 | 2324 | ||
2313 | /* | 2325 | /* |
2314 | * if ret == 0 means we found what we were searching for, which | 2326 | * if ret == 0 means we found what we were searching for, which |
@@ -2316,6 +2328,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2316 | * find the key and see if we have stuff that matches | 2328 | * find the key and see if we have stuff that matches |
2317 | */ | 2329 | */ |
2318 | if (ret > 0) { | 2330 | if (ret > 0) { |
2331 | ret = 0; | ||
2319 | if (path->slots[0] == 0) | 2332 | if (path->slots[0] == 0) |
2320 | break; | 2333 | break; |
2321 | path->slots[0]--; | 2334 | path->slots[0]--; |
@@ -2343,7 +2356,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2343 | found_key.type = BTRFS_INODE_ITEM_KEY; | 2356 | found_key.type = BTRFS_INODE_ITEM_KEY; |
2344 | found_key.offset = 0; | 2357 | found_key.offset = 0; |
2345 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); | 2358 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
2346 | BUG_ON(IS_ERR(inode)); | 2359 | if (IS_ERR(inode)) { |
2360 | ret = PTR_ERR(inode); | ||
2361 | goto out; | ||
2362 | } | ||
2347 | 2363 | ||
2348 | /* | 2364 | /* |
2349 | * add this inode to the orphan list so btrfs_orphan_del does | 2365 | * add this inode to the orphan list so btrfs_orphan_del does |
@@ -2361,7 +2377,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2361 | */ | 2377 | */ |
2362 | if (is_bad_inode(inode)) { | 2378 | if (is_bad_inode(inode)) { |
2363 | trans = btrfs_start_transaction(root, 0); | 2379 | trans = btrfs_start_transaction(root, 0); |
2364 | BUG_ON(IS_ERR(trans)); | 2380 | if (IS_ERR(trans)) { |
2381 | ret = PTR_ERR(trans); | ||
2382 | goto out; | ||
2383 | } | ||
2365 | btrfs_orphan_del(trans, inode); | 2384 | btrfs_orphan_del(trans, inode); |
2366 | btrfs_end_transaction(trans, root); | 2385 | btrfs_end_transaction(trans, root); |
2367 | iput(inode); | 2386 | iput(inode); |
@@ -2370,17 +2389,22 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2370 | 2389 | ||
2371 | /* if we have links, this was a truncate, lets do that */ | 2390 | /* if we have links, this was a truncate, lets do that */ |
2372 | if (inode->i_nlink) { | 2391 | if (inode->i_nlink) { |
2392 | if (!S_ISREG(inode->i_mode)) { | ||
2393 | WARN_ON(1); | ||
2394 | iput(inode); | ||
2395 | continue; | ||
2396 | } | ||
2373 | nr_truncate++; | 2397 | nr_truncate++; |
2374 | btrfs_truncate(inode); | 2398 | ret = btrfs_truncate(inode); |
2375 | } else { | 2399 | } else { |
2376 | nr_unlink++; | 2400 | nr_unlink++; |
2377 | } | 2401 | } |
2378 | 2402 | ||
2379 | /* this will do delete_inode and everything for us */ | 2403 | /* this will do delete_inode and everything for us */ |
2380 | iput(inode); | 2404 | iput(inode); |
2405 | if (ret) | ||
2406 | goto out; | ||
2381 | } | 2407 | } |
2382 | btrfs_free_path(path); | ||
2383 | |||
2384 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; | 2408 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; |
2385 | 2409 | ||
2386 | if (root->orphan_block_rsv) | 2410 | if (root->orphan_block_rsv) |
@@ -2389,14 +2413,20 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2389 | 2413 | ||
2390 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | 2414 | if (root->orphan_block_rsv || root->orphan_item_inserted) { |
2391 | trans = btrfs_join_transaction(root, 1); | 2415 | trans = btrfs_join_transaction(root, 1); |
2392 | BUG_ON(IS_ERR(trans)); | 2416 | if (!IS_ERR(trans)) |
2393 | btrfs_end_transaction(trans, root); | 2417 | btrfs_end_transaction(trans, root); |
2394 | } | 2418 | } |
2395 | 2419 | ||
2396 | if (nr_unlink) | 2420 | if (nr_unlink) |
2397 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); | 2421 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); |
2398 | if (nr_truncate) | 2422 | if (nr_truncate) |
2399 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); | 2423 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); |
2424 | |||
2425 | out: | ||
2426 | if (ret) | ||
2427 | printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret); | ||
2428 | btrfs_free_path(path); | ||
2429 | return ret; | ||
2400 | } | 2430 | } |
2401 | 2431 | ||
2402 | /* | 2432 | /* |
@@ -2507,6 +2537,8 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2507 | BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); | 2537 | BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); |
2508 | 2538 | ||
2509 | alloc_group_block = btrfs_inode_block_group(leaf, inode_item); | 2539 | alloc_group_block = btrfs_inode_block_group(leaf, inode_item); |
2540 | if (location.objectid == BTRFS_FREE_SPACE_OBJECTID) | ||
2541 | inode->i_mapping->flags &= ~__GFP_FS; | ||
2510 | 2542 | ||
2511 | /* | 2543 | /* |
2512 | * try to precache a NULL acl entry for files that don't have | 2544 | * try to precache a NULL acl entry for files that don't have |
@@ -2635,10 +2667,10 @@ failed: | |||
2635 | * recovery code. It remove a link in a directory with a given name, and | 2667 | * recovery code. It remove a link in a directory with a given name, and |
2636 | * also drops the back refs in the inode to the directory | 2668 | * also drops the back refs in the inode to the directory |
2637 | */ | 2669 | */ |
2638 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | 2670 | static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, |
2639 | struct btrfs_root *root, | 2671 | struct btrfs_root *root, |
2640 | struct inode *dir, struct inode *inode, | 2672 | struct inode *dir, struct inode *inode, |
2641 | const char *name, int name_len) | 2673 | const char *name, int name_len) |
2642 | { | 2674 | { |
2643 | struct btrfs_path *path; | 2675 | struct btrfs_path *path; |
2644 | int ret = 0; | 2676 | int ret = 0; |
@@ -2710,12 +2742,25 @@ err: | |||
2710 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 2742 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
2711 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 2743 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
2712 | btrfs_update_inode(trans, root, dir); | 2744 | btrfs_update_inode(trans, root, dir); |
2713 | btrfs_drop_nlink(inode); | ||
2714 | ret = btrfs_update_inode(trans, root, inode); | ||
2715 | out: | 2745 | out: |
2716 | return ret; | 2746 | return ret; |
2717 | } | 2747 | } |
2718 | 2748 | ||
2749 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | ||
2750 | struct btrfs_root *root, | ||
2751 | struct inode *dir, struct inode *inode, | ||
2752 | const char *name, int name_len) | ||
2753 | { | ||
2754 | int ret; | ||
2755 | ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len); | ||
2756 | if (!ret) { | ||
2757 | btrfs_drop_nlink(inode); | ||
2758 | ret = btrfs_update_inode(trans, root, inode); | ||
2759 | } | ||
2760 | return ret; | ||
2761 | } | ||
2762 | |||
2763 | |||
2719 | /* helper to check if there is any shared block in the path */ | 2764 | /* helper to check if there is any shared block in the path */ |
2720 | static int check_path_shared(struct btrfs_root *root, | 2765 | static int check_path_shared(struct btrfs_root *root, |
2721 | struct btrfs_path *path) | 2766 | struct btrfs_path *path) |
@@ -3537,7 +3582,13 @@ out: | |||
3537 | return ret; | 3582 | return ret; |
3538 | } | 3583 | } |
3539 | 3584 | ||
3540 | int btrfs_cont_expand(struct inode *inode, loff_t size) | 3585 | /* |
3586 | * This function puts in dummy file extents for the area we're creating a hole | ||
3587 | * for. So if we are truncating this file to a larger size we need to insert | ||
3588 | * these file extents so that btrfs_get_extent will return a EXTENT_MAP_HOLE for | ||
3589 | * the range between oldsize and size | ||
3590 | */ | ||
3591 | int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | ||
3541 | { | 3592 | { |
3542 | struct btrfs_trans_handle *trans; | 3593 | struct btrfs_trans_handle *trans; |
3543 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3594 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -3545,7 +3596,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3545 | struct extent_map *em = NULL; | 3596 | struct extent_map *em = NULL; |
3546 | struct extent_state *cached_state = NULL; | 3597 | struct extent_state *cached_state = NULL; |
3547 | u64 mask = root->sectorsize - 1; | 3598 | u64 mask = root->sectorsize - 1; |
3548 | u64 hole_start = (inode->i_size + mask) & ~mask; | 3599 | u64 hole_start = (oldsize + mask) & ~mask; |
3549 | u64 block_end = (size + mask) & ~mask; | 3600 | u64 block_end = (size + mask) & ~mask; |
3550 | u64 last_byte; | 3601 | u64 last_byte; |
3551 | u64 cur_offset; | 3602 | u64 cur_offset; |
@@ -3590,13 +3641,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3590 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3641 | err = btrfs_drop_extents(trans, inode, cur_offset, |
3591 | cur_offset + hole_size, | 3642 | cur_offset + hole_size, |
3592 | &hint_byte, 1); | 3643 | &hint_byte, 1); |
3593 | BUG_ON(err); | 3644 | if (err) |
3645 | break; | ||
3594 | 3646 | ||
3595 | err = btrfs_insert_file_extent(trans, root, | 3647 | err = btrfs_insert_file_extent(trans, root, |
3596 | inode->i_ino, cur_offset, 0, | 3648 | inode->i_ino, cur_offset, 0, |
3597 | 0, hole_size, 0, hole_size, | 3649 | 0, hole_size, 0, hole_size, |
3598 | 0, 0, 0); | 3650 | 0, 0, 0); |
3599 | BUG_ON(err); | 3651 | if (err) |
3652 | break; | ||
3600 | 3653 | ||
3601 | btrfs_drop_extent_cache(inode, hole_start, | 3654 | btrfs_drop_extent_cache(inode, hole_start, |
3602 | last_byte - 1, 0); | 3655 | last_byte - 1, 0); |
@@ -3616,81 +3669,41 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3616 | return err; | 3669 | return err; |
3617 | } | 3670 | } |
3618 | 3671 | ||
3619 | static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | 3672 | static int btrfs_setsize(struct inode *inode, loff_t newsize) |
3620 | { | 3673 | { |
3621 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3674 | loff_t oldsize = i_size_read(inode); |
3622 | struct btrfs_trans_handle *trans; | ||
3623 | unsigned long nr; | ||
3624 | int ret; | 3675 | int ret; |
3625 | 3676 | ||
3626 | if (attr->ia_size == inode->i_size) | 3677 | if (newsize == oldsize) |
3627 | return 0; | 3678 | return 0; |
3628 | 3679 | ||
3629 | if (attr->ia_size > inode->i_size) { | 3680 | if (newsize > oldsize) { |
3630 | unsigned long limit; | 3681 | i_size_write(inode, newsize); |
3631 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | 3682 | btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); |
3632 | if (attr->ia_size > inode->i_sb->s_maxbytes) | 3683 | truncate_pagecache(inode, oldsize, newsize); |
3633 | return -EFBIG; | 3684 | ret = btrfs_cont_expand(inode, oldsize, newsize); |
3634 | if (limit != RLIM_INFINITY && attr->ia_size > limit) { | ||
3635 | send_sig(SIGXFSZ, current, 0); | ||
3636 | return -EFBIG; | ||
3637 | } | ||
3638 | } | ||
3639 | |||
3640 | trans = btrfs_start_transaction(root, 5); | ||
3641 | if (IS_ERR(trans)) | ||
3642 | return PTR_ERR(trans); | ||
3643 | |||
3644 | btrfs_set_trans_block_group(trans, inode); | ||
3645 | |||
3646 | ret = btrfs_orphan_add(trans, inode); | ||
3647 | BUG_ON(ret); | ||
3648 | |||
3649 | nr = trans->blocks_used; | ||
3650 | btrfs_end_transaction(trans, root); | ||
3651 | btrfs_btree_balance_dirty(root, nr); | ||
3652 | |||
3653 | if (attr->ia_size > inode->i_size) { | ||
3654 | ret = btrfs_cont_expand(inode, attr->ia_size); | ||
3655 | if (ret) { | 3685 | if (ret) { |
3656 | btrfs_truncate(inode); | 3686 | btrfs_setsize(inode, oldsize); |
3657 | return ret; | 3687 | return ret; |
3658 | } | 3688 | } |
3659 | 3689 | ||
3660 | i_size_write(inode, attr->ia_size); | 3690 | mark_inode_dirty(inode); |
3661 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 3691 | } else { |
3662 | 3692 | ||
3663 | trans = btrfs_start_transaction(root, 0); | 3693 | /* |
3664 | BUG_ON(IS_ERR(trans)); | 3694 | * We're truncating a file that used to have good data down to |
3665 | btrfs_set_trans_block_group(trans, inode); | 3695 | * zero. Make sure it gets into the ordered flush list so that |
3666 | trans->block_rsv = root->orphan_block_rsv; | 3696 | * any new writes get down to disk quickly. |
3667 | BUG_ON(!trans->block_rsv); | 3697 | */ |
3698 | if (newsize == 0) | ||
3699 | BTRFS_I(inode)->ordered_data_close = 1; | ||
3668 | 3700 | ||
3669 | ret = btrfs_update_inode(trans, root, inode); | 3701 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ |
3670 | BUG_ON(ret); | 3702 | truncate_setsize(inode, newsize); |
3671 | if (inode->i_nlink > 0) { | 3703 | ret = btrfs_truncate(inode); |
3672 | ret = btrfs_orphan_del(trans, inode); | ||
3673 | BUG_ON(ret); | ||
3674 | } | ||
3675 | nr = trans->blocks_used; | ||
3676 | btrfs_end_transaction(trans, root); | ||
3677 | btrfs_btree_balance_dirty(root, nr); | ||
3678 | return 0; | ||
3679 | } | 3704 | } |
3680 | 3705 | ||
3681 | /* | 3706 | return ret; |
3682 | * We're truncating a file that used to have good data down to | ||
3683 | * zero. Make sure it gets into the ordered flush list so that | ||
3684 | * any new writes get down to disk quickly. | ||
3685 | */ | ||
3686 | if (attr->ia_size == 0) | ||
3687 | BTRFS_I(inode)->ordered_data_close = 1; | ||
3688 | |||
3689 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ | ||
3690 | ret = vmtruncate(inode, attr->ia_size); | ||
3691 | BUG_ON(ret); | ||
3692 | |||
3693 | return 0; | ||
3694 | } | 3707 | } |
3695 | 3708 | ||
3696 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | 3709 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) |
@@ -3707,7 +3720,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3707 | return err; | 3720 | return err; |
3708 | 3721 | ||
3709 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { | 3722 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { |
3710 | err = btrfs_setattr_size(inode, attr); | 3723 | err = btrfs_setsize(inode, attr->ia_size); |
3711 | if (err) | 3724 | if (err) |
3712 | return err; | 3725 | return err; |
3713 | } | 3726 | } |
@@ -3730,6 +3743,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
3730 | unsigned long nr; | 3743 | unsigned long nr; |
3731 | int ret; | 3744 | int ret; |
3732 | 3745 | ||
3746 | trace_btrfs_inode_evict(inode); | ||
3747 | |||
3733 | truncate_inode_pages(&inode->i_data, 0); | 3748 | truncate_inode_pages(&inode->i_data, 0); |
3734 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || | 3749 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || |
3735 | root == root->fs_info->tree_root)) | 3750 | root == root->fs_info->tree_root)) |
@@ -4072,7 +4087,6 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
4072 | BTRFS_I(inode)->root = root; | 4087 | BTRFS_I(inode)->root = root; |
4073 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); | 4088 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); |
4074 | btrfs_read_locked_inode(inode); | 4089 | btrfs_read_locked_inode(inode); |
4075 | |||
4076 | inode_tree_add(inode); | 4090 | inode_tree_add(inode); |
4077 | unlock_new_inode(inode); | 4091 | unlock_new_inode(inode); |
4078 | if (new) | 4092 | if (new) |
@@ -4147,8 +4161,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
4147 | if (!IS_ERR(inode) && root != sub_root) { | 4161 | if (!IS_ERR(inode) && root != sub_root) { |
4148 | down_read(&root->fs_info->cleanup_work_sem); | 4162 | down_read(&root->fs_info->cleanup_work_sem); |
4149 | if (!(inode->i_sb->s_flags & MS_RDONLY)) | 4163 | if (!(inode->i_sb->s_flags & MS_RDONLY)) |
4150 | btrfs_orphan_cleanup(sub_root); | 4164 | ret = btrfs_orphan_cleanup(sub_root); |
4151 | up_read(&root->fs_info->cleanup_work_sem); | 4165 | up_read(&root->fs_info->cleanup_work_sem); |
4166 | if (ret) | ||
4167 | inode = ERR_PTR(ret); | ||
4152 | } | 4168 | } |
4153 | 4169 | ||
4154 | return inode; | 4170 | return inode; |
@@ -4282,6 +4298,9 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4282 | while (di_cur < di_total) { | 4298 | while (di_cur < di_total) { |
4283 | struct btrfs_key location; | 4299 | struct btrfs_key location; |
4284 | 4300 | ||
4301 | if (verify_dir_item(root, leaf, di)) | ||
4302 | break; | ||
4303 | |||
4285 | name_len = btrfs_dir_name_len(leaf, di); | 4304 | name_len = btrfs_dir_name_len(leaf, di); |
4286 | if (name_len <= sizeof(tmp_name)) { | 4305 | if (name_len <= sizeof(tmp_name)) { |
4287 | name_ptr = tmp_name; | 4306 | name_ptr = tmp_name; |
@@ -4517,6 +4536,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4517 | return ERR_PTR(-ENOMEM); | 4536 | return ERR_PTR(-ENOMEM); |
4518 | 4537 | ||
4519 | if (dir) { | 4538 | if (dir) { |
4539 | trace_btrfs_inode_request(dir); | ||
4540 | |||
4520 | ret = btrfs_set_inode_index(dir, index); | 4541 | ret = btrfs_set_inode_index(dir, index); |
4521 | if (ret) { | 4542 | if (ret) { |
4522 | iput(inode); | 4543 | iput(inode); |
@@ -4585,12 +4606,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4585 | if ((mode & S_IFREG)) { | 4606 | if ((mode & S_IFREG)) { |
4586 | if (btrfs_test_opt(root, NODATASUM)) | 4607 | if (btrfs_test_opt(root, NODATASUM)) |
4587 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; | 4608 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; |
4588 | if (btrfs_test_opt(root, NODATACOW)) | 4609 | if (btrfs_test_opt(root, NODATACOW) || |
4610 | (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW)) | ||
4589 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; | 4611 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; |
4590 | } | 4612 | } |
4591 | 4613 | ||
4592 | insert_inode_hash(inode); | 4614 | insert_inode_hash(inode); |
4593 | inode_tree_add(inode); | 4615 | inode_tree_add(inode); |
4616 | |||
4617 | trace_btrfs_inode_new(inode); | ||
4618 | |||
4594 | return inode; | 4619 | return inode; |
4595 | fail: | 4620 | fail: |
4596 | if (dir) | 4621 | if (dir) |
@@ -4809,7 +4834,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4809 | 4834 | ||
4810 | /* do not allow sys_link's with other subvols of the same device */ | 4835 | /* do not allow sys_link's with other subvols of the same device */ |
4811 | if (root->objectid != BTRFS_I(inode)->root->objectid) | 4836 | if (root->objectid != BTRFS_I(inode)->root->objectid) |
4812 | return -EPERM; | 4837 | return -EXDEV; |
4838 | |||
4839 | if (inode->i_nlink == ~0U) | ||
4840 | return -EMLINK; | ||
4813 | 4841 | ||
4814 | btrfs_inc_nlink(inode); | 4842 | btrfs_inc_nlink(inode); |
4815 | inode->i_ctime = CURRENT_TIME; | 4843 | inode->i_ctime = CURRENT_TIME; |
@@ -5265,6 +5293,9 @@ insert: | |||
5265 | } | 5293 | } |
5266 | write_unlock(&em_tree->lock); | 5294 | write_unlock(&em_tree->lock); |
5267 | out: | 5295 | out: |
5296 | |||
5297 | trace_btrfs_get_extent(root, em); | ||
5298 | |||
5268 | if (path) | 5299 | if (path) |
5269 | btrfs_free_path(path); | 5300 | btrfs_free_path(path); |
5270 | if (trans) { | 5301 | if (trans) { |
@@ -5748,6 +5779,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
5748 | 5779 | ||
5749 | kfree(dip->csums); | 5780 | kfree(dip->csums); |
5750 | kfree(dip); | 5781 | kfree(dip); |
5782 | |||
5783 | /* If we had a csum failure make sure to clear the uptodate flag */ | ||
5784 | if (err) | ||
5785 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
5751 | dio_end_io(bio, err); | 5786 | dio_end_io(bio, err); |
5752 | } | 5787 | } |
5753 | 5788 | ||
@@ -5849,6 +5884,10 @@ out_done: | |||
5849 | 5884 | ||
5850 | kfree(dip->csums); | 5885 | kfree(dip->csums); |
5851 | kfree(dip); | 5886 | kfree(dip); |
5887 | |||
5888 | /* If we had an error make sure to clear the uptodate flag */ | ||
5889 | if (err) | ||
5890 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
5852 | dio_end_io(bio, err); | 5891 | dio_end_io(bio, err); |
5853 | } | 5892 | } |
5854 | 5893 | ||
@@ -5922,9 +5961,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
5922 | __btrfs_submit_bio_start_direct_io, | 5961 | __btrfs_submit_bio_start_direct_io, |
5923 | __btrfs_submit_bio_done); | 5962 | __btrfs_submit_bio_done); |
5924 | goto err; | 5963 | goto err; |
5925 | } else if (!skip_sum) | 5964 | } else if (!skip_sum) { |
5926 | btrfs_lookup_bio_sums_dio(root, inode, bio, | 5965 | ret = btrfs_lookup_bio_sums_dio(root, inode, bio, |
5927 | file_offset, csums); | 5966 | file_offset, csums); |
5967 | if (ret) | ||
5968 | goto err; | ||
5969 | } | ||
5928 | 5970 | ||
5929 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | 5971 | ret = btrfs_map_bio(root, rw, bio, 0, 1); |
5930 | err: | 5972 | err: |
@@ -5948,6 +5990,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
5948 | int nr_pages = 0; | 5990 | int nr_pages = 0; |
5949 | u32 *csums = dip->csums; | 5991 | u32 *csums = dip->csums; |
5950 | int ret = 0; | 5992 | int ret = 0; |
5993 | int write = rw & REQ_WRITE; | ||
5951 | 5994 | ||
5952 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); | 5995 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); |
5953 | if (!bio) | 5996 | if (!bio) |
@@ -5984,7 +6027,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
5984 | goto out_err; | 6027 | goto out_err; |
5985 | } | 6028 | } |
5986 | 6029 | ||
5987 | if (!skip_sum) | 6030 | /* Write's use the ordered csums */ |
6031 | if (!write && !skip_sum) | ||
5988 | csums = csums + nr_pages; | 6032 | csums = csums + nr_pages; |
5989 | start_sector += submit_len >> 9; | 6033 | start_sector += submit_len >> 9; |
5990 | file_offset += submit_len; | 6034 | file_offset += submit_len; |
@@ -6052,7 +6096,8 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | |||
6052 | } | 6096 | } |
6053 | dip->csums = NULL; | 6097 | dip->csums = NULL; |
6054 | 6098 | ||
6055 | if (!skip_sum) { | 6099 | /* Write's use the ordered csum stuff, so we don't need dip->csums */ |
6100 | if (!write && !skip_sum) { | ||
6056 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); | 6101 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); |
6057 | if (!dip->csums) { | 6102 | if (!dip->csums) { |
6058 | kfree(dip); | 6103 | kfree(dip); |
@@ -6474,28 +6519,42 @@ out: | |||
6474 | return ret; | 6519 | return ret; |
6475 | } | 6520 | } |
6476 | 6521 | ||
6477 | static void btrfs_truncate(struct inode *inode) | 6522 | static int btrfs_truncate(struct inode *inode) |
6478 | { | 6523 | { |
6479 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6524 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6480 | int ret; | 6525 | int ret; |
6526 | int err = 0; | ||
6481 | struct btrfs_trans_handle *trans; | 6527 | struct btrfs_trans_handle *trans; |
6482 | unsigned long nr; | 6528 | unsigned long nr; |
6483 | u64 mask = root->sectorsize - 1; | 6529 | u64 mask = root->sectorsize - 1; |
6484 | 6530 | ||
6485 | if (!S_ISREG(inode->i_mode)) { | ||
6486 | WARN_ON(1); | ||
6487 | return; | ||
6488 | } | ||
6489 | |||
6490 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); | 6531 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); |
6491 | if (ret) | 6532 | if (ret) |
6492 | return; | 6533 | return ret; |
6493 | 6534 | ||
6494 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 6535 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
6495 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 6536 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
6496 | 6537 | ||
6538 | trans = btrfs_start_transaction(root, 5); | ||
6539 | if (IS_ERR(trans)) | ||
6540 | return PTR_ERR(trans); | ||
6541 | |||
6542 | btrfs_set_trans_block_group(trans, inode); | ||
6543 | |||
6544 | ret = btrfs_orphan_add(trans, inode); | ||
6545 | if (ret) { | ||
6546 | btrfs_end_transaction(trans, root); | ||
6547 | return ret; | ||
6548 | } | ||
6549 | |||
6550 | nr = trans->blocks_used; | ||
6551 | btrfs_end_transaction(trans, root); | ||
6552 | btrfs_btree_balance_dirty(root, nr); | ||
6553 | |||
6554 | /* Now start a transaction for the truncate */ | ||
6497 | trans = btrfs_start_transaction(root, 0); | 6555 | trans = btrfs_start_transaction(root, 0); |
6498 | BUG_ON(IS_ERR(trans)); | 6556 | if (IS_ERR(trans)) |
6557 | return PTR_ERR(trans); | ||
6499 | btrfs_set_trans_block_group(trans, inode); | 6558 | btrfs_set_trans_block_group(trans, inode); |
6500 | trans->block_rsv = root->orphan_block_rsv; | 6559 | trans->block_rsv = root->orphan_block_rsv; |
6501 | 6560 | ||
@@ -6522,29 +6581,38 @@ static void btrfs_truncate(struct inode *inode) | |||
6522 | while (1) { | 6581 | while (1) { |
6523 | if (!trans) { | 6582 | if (!trans) { |
6524 | trans = btrfs_start_transaction(root, 0); | 6583 | trans = btrfs_start_transaction(root, 0); |
6525 | BUG_ON(IS_ERR(trans)); | 6584 | if (IS_ERR(trans)) |
6585 | return PTR_ERR(trans); | ||
6526 | btrfs_set_trans_block_group(trans, inode); | 6586 | btrfs_set_trans_block_group(trans, inode); |
6527 | trans->block_rsv = root->orphan_block_rsv; | 6587 | trans->block_rsv = root->orphan_block_rsv; |
6528 | } | 6588 | } |
6529 | 6589 | ||
6530 | ret = btrfs_block_rsv_check(trans, root, | 6590 | ret = btrfs_block_rsv_check(trans, root, |
6531 | root->orphan_block_rsv, 0, 5); | 6591 | root->orphan_block_rsv, 0, 5); |
6532 | if (ret) { | 6592 | if (ret == -EAGAIN) { |
6533 | BUG_ON(ret != -EAGAIN); | ||
6534 | ret = btrfs_commit_transaction(trans, root); | 6593 | ret = btrfs_commit_transaction(trans, root); |
6535 | BUG_ON(ret); | 6594 | if (ret) |
6595 | return ret; | ||
6536 | trans = NULL; | 6596 | trans = NULL; |
6537 | continue; | 6597 | continue; |
6598 | } else if (ret) { | ||
6599 | err = ret; | ||
6600 | break; | ||
6538 | } | 6601 | } |
6539 | 6602 | ||
6540 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6603 | ret = btrfs_truncate_inode_items(trans, root, inode, |
6541 | inode->i_size, | 6604 | inode->i_size, |
6542 | BTRFS_EXTENT_DATA_KEY); | 6605 | BTRFS_EXTENT_DATA_KEY); |
6543 | if (ret != -EAGAIN) | 6606 | if (ret != -EAGAIN) { |
6607 | err = ret; | ||
6544 | break; | 6608 | break; |
6609 | } | ||
6545 | 6610 | ||
6546 | ret = btrfs_update_inode(trans, root, inode); | 6611 | ret = btrfs_update_inode(trans, root, inode); |
6547 | BUG_ON(ret); | 6612 | if (ret) { |
6613 | err = ret; | ||
6614 | break; | ||
6615 | } | ||
6548 | 6616 | ||
6549 | nr = trans->blocks_used; | 6617 | nr = trans->blocks_used; |
6550 | btrfs_end_transaction(trans, root); | 6618 | btrfs_end_transaction(trans, root); |
@@ -6554,16 +6622,27 @@ static void btrfs_truncate(struct inode *inode) | |||
6554 | 6622 | ||
6555 | if (ret == 0 && inode->i_nlink > 0) { | 6623 | if (ret == 0 && inode->i_nlink > 0) { |
6556 | ret = btrfs_orphan_del(trans, inode); | 6624 | ret = btrfs_orphan_del(trans, inode); |
6557 | BUG_ON(ret); | 6625 | if (ret) |
6626 | err = ret; | ||
6627 | } else if (ret && inode->i_nlink > 0) { | ||
6628 | /* | ||
6629 | * Failed to do the truncate, remove us from the in memory | ||
6630 | * orphan list. | ||
6631 | */ | ||
6632 | ret = btrfs_orphan_del(NULL, inode); | ||
6558 | } | 6633 | } |
6559 | 6634 | ||
6560 | ret = btrfs_update_inode(trans, root, inode); | 6635 | ret = btrfs_update_inode(trans, root, inode); |
6561 | BUG_ON(ret); | 6636 | if (ret && !err) |
6637 | err = ret; | ||
6562 | 6638 | ||
6563 | nr = trans->blocks_used; | 6639 | nr = trans->blocks_used; |
6564 | ret = btrfs_end_transaction_throttle(trans, root); | 6640 | ret = btrfs_end_transaction_throttle(trans, root); |
6565 | BUG_ON(ret); | 6641 | if (ret && !err) |
6642 | err = ret; | ||
6566 | btrfs_btree_balance_dirty(root, nr); | 6643 | btrfs_btree_balance_dirty(root, nr); |
6644 | |||
6645 | return err; | ||
6567 | } | 6646 | } |
6568 | 6647 | ||
6569 | /* | 6648 | /* |
@@ -6630,9 +6709,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6630 | ei->index_cnt = (u64)-1; | 6709 | ei->index_cnt = (u64)-1; |
6631 | ei->last_unlink_trans = 0; | 6710 | ei->last_unlink_trans = 0; |
6632 | 6711 | ||
6633 | spin_lock_init(&ei->accounting_lock); | ||
6634 | atomic_set(&ei->outstanding_extents, 0); | 6712 | atomic_set(&ei->outstanding_extents, 0); |
6635 | ei->reserved_extents = 0; | 6713 | atomic_set(&ei->reserved_extents, 0); |
6636 | 6714 | ||
6637 | ei->ordered_data_close = 0; | 6715 | ei->ordered_data_close = 0; |
6638 | ei->orphan_meta_reserved = 0; | 6716 | ei->orphan_meta_reserved = 0; |
@@ -6668,7 +6746,7 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6668 | WARN_ON(!list_empty(&inode->i_dentry)); | 6746 | WARN_ON(!list_empty(&inode->i_dentry)); |
6669 | WARN_ON(inode->i_data.nrpages); | 6747 | WARN_ON(inode->i_data.nrpages); |
6670 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | 6748 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); |
6671 | WARN_ON(BTRFS_I(inode)->reserved_extents); | 6749 | WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); |
6672 | 6750 | ||
6673 | /* | 6751 | /* |
6674 | * This can happen where we create an inode, but somebody else also | 6752 | * This can happen where we create an inode, but somebody else also |
@@ -6760,6 +6838,8 @@ void btrfs_destroy_cachep(void) | |||
6760 | kmem_cache_destroy(btrfs_transaction_cachep); | 6838 | kmem_cache_destroy(btrfs_transaction_cachep); |
6761 | if (btrfs_path_cachep) | 6839 | if (btrfs_path_cachep) |
6762 | kmem_cache_destroy(btrfs_path_cachep); | 6840 | kmem_cache_destroy(btrfs_path_cachep); |
6841 | if (btrfs_free_space_cachep) | ||
6842 | kmem_cache_destroy(btrfs_free_space_cachep); | ||
6763 | } | 6843 | } |
6764 | 6844 | ||
6765 | int btrfs_init_cachep(void) | 6845 | int btrfs_init_cachep(void) |
@@ -6788,6 +6868,12 @@ int btrfs_init_cachep(void) | |||
6788 | if (!btrfs_path_cachep) | 6868 | if (!btrfs_path_cachep) |
6789 | goto fail; | 6869 | goto fail; |
6790 | 6870 | ||
6871 | btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache", | ||
6872 | sizeof(struct btrfs_free_space), 0, | ||
6873 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
6874 | if (!btrfs_free_space_cachep) | ||
6875 | goto fail; | ||
6876 | |||
6791 | return 0; | 6877 | return 0; |
6792 | fail: | 6878 | fail: |
6793 | btrfs_destroy_cachep(); | 6879 | btrfs_destroy_cachep(); |
@@ -6806,6 +6892,26 @@ static int btrfs_getattr(struct vfsmount *mnt, | |||
6806 | return 0; | 6892 | return 0; |
6807 | } | 6893 | } |
6808 | 6894 | ||
6895 | /* | ||
6896 | * If a file is moved, it will inherit the cow and compression flags of the new | ||
6897 | * directory. | ||
6898 | */ | ||
6899 | static void fixup_inode_flags(struct inode *dir, struct inode *inode) | ||
6900 | { | ||
6901 | struct btrfs_inode *b_dir = BTRFS_I(dir); | ||
6902 | struct btrfs_inode *b_inode = BTRFS_I(inode); | ||
6903 | |||
6904 | if (b_dir->flags & BTRFS_INODE_NODATACOW) | ||
6905 | b_inode->flags |= BTRFS_INODE_NODATACOW; | ||
6906 | else | ||
6907 | b_inode->flags &= ~BTRFS_INODE_NODATACOW; | ||
6908 | |||
6909 | if (b_dir->flags & BTRFS_INODE_COMPRESS) | ||
6910 | b_inode->flags |= BTRFS_INODE_COMPRESS; | ||
6911 | else | ||
6912 | b_inode->flags &= ~BTRFS_INODE_COMPRESS; | ||
6913 | } | ||
6914 | |||
6809 | static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | 6915 | static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, |
6810 | struct inode *new_dir, struct dentry *new_dentry) | 6916 | struct inode *new_dir, struct dentry *new_dentry) |
6811 | { | 6917 | { |
@@ -6908,11 +7014,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6908 | old_dentry->d_name.name, | 7014 | old_dentry->d_name.name, |
6909 | old_dentry->d_name.len); | 7015 | old_dentry->d_name.len); |
6910 | } else { | 7016 | } else { |
6911 | btrfs_inc_nlink(old_dentry->d_inode); | 7017 | ret = __btrfs_unlink_inode(trans, root, old_dir, |
6912 | ret = btrfs_unlink_inode(trans, root, old_dir, | 7018 | old_dentry->d_inode, |
6913 | old_dentry->d_inode, | 7019 | old_dentry->d_name.name, |
6914 | old_dentry->d_name.name, | 7020 | old_dentry->d_name.len); |
6915 | old_dentry->d_name.len); | 7021 | if (!ret) |
7022 | ret = btrfs_update_inode(trans, root, old_inode); | ||
6916 | } | 7023 | } |
6917 | BUG_ON(ret); | 7024 | BUG_ON(ret); |
6918 | 7025 | ||
@@ -6939,6 +7046,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6939 | } | 7046 | } |
6940 | } | 7047 | } |
6941 | 7048 | ||
7049 | fixup_inode_flags(new_dir, old_inode); | ||
7050 | |||
6942 | ret = btrfs_add_link(trans, new_dir, old_inode, | 7051 | ret = btrfs_add_link(trans, new_dir, old_inode, |
6943 | new_dentry->d_name.name, | 7052 | new_dentry->d_name.name, |
6944 | new_dentry->d_name.len, 0, index); | 7053 | new_dentry->d_name.len, 0, index); |
@@ -7355,7 +7464,6 @@ static const struct address_space_operations btrfs_symlink_aops = { | |||
7355 | }; | 7464 | }; |
7356 | 7465 | ||
7357 | static const struct inode_operations btrfs_file_inode_operations = { | 7466 | static const struct inode_operations btrfs_file_inode_operations = { |
7358 | .truncate = btrfs_truncate, | ||
7359 | .getattr = btrfs_getattr, | 7467 | .getattr = btrfs_getattr, |
7360 | .setattr = btrfs_setattr, | 7468 | .setattr = btrfs_setattr, |
7361 | .setxattr = btrfs_setxattr, | 7469 | .setxattr = btrfs_setxattr, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d1bace3df9b6..7c07fe26b7cf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/xattr.h> | 40 | #include <linux/xattr.h> |
41 | #include <linux/vmalloc.h> | 41 | #include <linux/vmalloc.h> |
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/blkdev.h> | ||
43 | #include "compat.h" | 44 | #include "compat.h" |
44 | #include "ctree.h" | 45 | #include "ctree.h" |
45 | #include "disk-io.h" | 46 | #include "disk-io.h" |
@@ -138,6 +139,24 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg) | |||
138 | return 0; | 139 | return 0; |
139 | } | 140 | } |
140 | 141 | ||
142 | static int check_flags(unsigned int flags) | ||
143 | { | ||
144 | if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ | ||
145 | FS_NOATIME_FL | FS_NODUMP_FL | \ | ||
146 | FS_SYNC_FL | FS_DIRSYNC_FL | \ | ||
147 | FS_NOCOMP_FL | FS_COMPR_FL | \ | ||
148 | FS_NOCOW_FL | FS_COW_FL)) | ||
149 | return -EOPNOTSUPP; | ||
150 | |||
151 | if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) | ||
152 | return -EINVAL; | ||
153 | |||
154 | if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL)) | ||
155 | return -EINVAL; | ||
156 | |||
157 | return 0; | ||
158 | } | ||
159 | |||
141 | static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | 160 | static int btrfs_ioctl_setflags(struct file *file, void __user *arg) |
142 | { | 161 | { |
143 | struct inode *inode = file->f_path.dentry->d_inode; | 162 | struct inode *inode = file->f_path.dentry->d_inode; |
@@ -153,10 +172,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
153 | if (copy_from_user(&flags, arg, sizeof(flags))) | 172 | if (copy_from_user(&flags, arg, sizeof(flags))) |
154 | return -EFAULT; | 173 | return -EFAULT; |
155 | 174 | ||
156 | if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ | 175 | ret = check_flags(flags); |
157 | FS_NOATIME_FL | FS_NODUMP_FL | \ | 176 | if (ret) |
158 | FS_SYNC_FL | FS_DIRSYNC_FL)) | 177 | return ret; |
159 | return -EOPNOTSUPP; | ||
160 | 178 | ||
161 | if (!inode_owner_or_capable(inode)) | 179 | if (!inode_owner_or_capable(inode)) |
162 | return -EACCES; | 180 | return -EACCES; |
@@ -201,6 +219,22 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
201 | else | 219 | else |
202 | ip->flags &= ~BTRFS_INODE_DIRSYNC; | 220 | ip->flags &= ~BTRFS_INODE_DIRSYNC; |
203 | 221 | ||
222 | /* | ||
223 | * The COMPRESS flag can only be changed by users, while the NOCOMPRESS | ||
224 | * flag may be changed automatically if compression code won't make | ||
225 | * things smaller. | ||
226 | */ | ||
227 | if (flags & FS_NOCOMP_FL) { | ||
228 | ip->flags &= ~BTRFS_INODE_COMPRESS; | ||
229 | ip->flags |= BTRFS_INODE_NOCOMPRESS; | ||
230 | } else if (flags & FS_COMPR_FL) { | ||
231 | ip->flags |= BTRFS_INODE_COMPRESS; | ||
232 | ip->flags &= ~BTRFS_INODE_NOCOMPRESS; | ||
233 | } | ||
234 | if (flags & FS_NOCOW_FL) | ||
235 | ip->flags |= BTRFS_INODE_NODATACOW; | ||
236 | else if (flags & FS_COW_FL) | ||
237 | ip->flags &= ~BTRFS_INODE_NODATACOW; | ||
204 | 238 | ||
205 | trans = btrfs_join_transaction(root, 1); | 239 | trans = btrfs_join_transaction(root, 1); |
206 | BUG_ON(IS_ERR(trans)); | 240 | BUG_ON(IS_ERR(trans)); |
@@ -213,9 +247,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
213 | btrfs_end_transaction(trans, root); | 247 | btrfs_end_transaction(trans, root); |
214 | 248 | ||
215 | mnt_drop_write(file->f_path.mnt); | 249 | mnt_drop_write(file->f_path.mnt); |
250 | |||
251 | ret = 0; | ||
216 | out_unlock: | 252 | out_unlock: |
217 | mutex_unlock(&inode->i_mutex); | 253 | mutex_unlock(&inode->i_mutex); |
218 | return 0; | 254 | return ret; |
219 | } | 255 | } |
220 | 256 | ||
221 | static int btrfs_ioctl_getversion(struct file *file, int __user *arg) | 257 | static int btrfs_ioctl_getversion(struct file *file, int __user *arg) |
@@ -225,6 +261,49 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg) | |||
225 | return put_user(inode->i_generation, arg); | 261 | return put_user(inode->i_generation, arg); |
226 | } | 262 | } |
227 | 263 | ||
264 | static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) | ||
265 | { | ||
266 | struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info; | ||
267 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
268 | struct btrfs_device *device; | ||
269 | struct request_queue *q; | ||
270 | struct fstrim_range range; | ||
271 | u64 minlen = ULLONG_MAX; | ||
272 | u64 num_devices = 0; | ||
273 | int ret; | ||
274 | |||
275 | if (!capable(CAP_SYS_ADMIN)) | ||
276 | return -EPERM; | ||
277 | |||
278 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | ||
279 | list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { | ||
280 | if (!device->bdev) | ||
281 | continue; | ||
282 | q = bdev_get_queue(device->bdev); | ||
283 | if (blk_queue_discard(q)) { | ||
284 | num_devices++; | ||
285 | minlen = min((u64)q->limits.discard_granularity, | ||
286 | minlen); | ||
287 | } | ||
288 | } | ||
289 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
290 | if (!num_devices) | ||
291 | return -EOPNOTSUPP; | ||
292 | |||
293 | if (copy_from_user(&range, arg, sizeof(range))) | ||
294 | return -EFAULT; | ||
295 | |||
296 | range.minlen = max(range.minlen, minlen); | ||
297 | ret = btrfs_trim_fs(root, &range); | ||
298 | if (ret < 0) | ||
299 | return ret; | ||
300 | |||
301 | if (copy_to_user(arg, &range, sizeof(range))) | ||
302 | return -EFAULT; | ||
303 | |||
304 | return 0; | ||
305 | } | ||
306 | |||
228 | static noinline int create_subvol(struct btrfs_root *root, | 307 | static noinline int create_subvol(struct btrfs_root *root, |
229 | struct dentry *dentry, | 308 | struct dentry *dentry, |
230 | char *name, int namelen, | 309 | char *name, int namelen, |
@@ -409,7 +488,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
409 | if (ret) | 488 | if (ret) |
410 | goto fail; | 489 | goto fail; |
411 | 490 | ||
412 | btrfs_orphan_cleanup(pending_snapshot->snap); | 491 | ret = btrfs_orphan_cleanup(pending_snapshot->snap); |
492 | if (ret) | ||
493 | goto fail; | ||
413 | 494 | ||
414 | parent = dget_parent(dentry); | 495 | parent = dget_parent(dentry); |
415 | inode = btrfs_lookup_dentry(parent->d_inode, dentry); | 496 | inode = btrfs_lookup_dentry(parent->d_inode, dentry); |
@@ -2348,12 +2429,15 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp | |||
2348 | struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; | 2429 | struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; |
2349 | struct btrfs_trans_handle *trans; | 2430 | struct btrfs_trans_handle *trans; |
2350 | u64 transid; | 2431 | u64 transid; |
2432 | int ret; | ||
2351 | 2433 | ||
2352 | trans = btrfs_start_transaction(root, 0); | 2434 | trans = btrfs_start_transaction(root, 0); |
2353 | if (IS_ERR(trans)) | 2435 | if (IS_ERR(trans)) |
2354 | return PTR_ERR(trans); | 2436 | return PTR_ERR(trans); |
2355 | transid = trans->transid; | 2437 | transid = trans->transid; |
2356 | btrfs_commit_transaction_async(trans, root, 0); | 2438 | ret = btrfs_commit_transaction_async(trans, root, 0); |
2439 | if (ret) | ||
2440 | return ret; | ||
2357 | 2441 | ||
2358 | if (argp) | 2442 | if (argp) |
2359 | if (copy_to_user(argp, &transid, sizeof(transid))) | 2443 | if (copy_to_user(argp, &transid, sizeof(transid))) |
@@ -2388,6 +2472,8 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
2388 | return btrfs_ioctl_setflags(file, argp); | 2472 | return btrfs_ioctl_setflags(file, argp); |
2389 | case FS_IOC_GETVERSION: | 2473 | case FS_IOC_GETVERSION: |
2390 | return btrfs_ioctl_getversion(file, argp); | 2474 | return btrfs_ioctl_getversion(file, argp); |
2475 | case FITRIM: | ||
2476 | return btrfs_ioctl_fitrim(file, argp); | ||
2391 | case BTRFS_IOC_SNAP_CREATE: | 2477 | case BTRFS_IOC_SNAP_CREATE: |
2392 | return btrfs_ioctl_snap_create(file, argp, 0); | 2478 | return btrfs_ioctl_snap_create(file, argp, 0); |
2393 | case BTRFS_IOC_SNAP_CREATE_V2: | 2479 | case BTRFS_IOC_SNAP_CREATE_V2: |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 083a55477375..a1c940425307 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -202,6 +202,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
202 | INIT_LIST_HEAD(&entry->list); | 202 | INIT_LIST_HEAD(&entry->list); |
203 | INIT_LIST_HEAD(&entry->root_extent_list); | 203 | INIT_LIST_HEAD(&entry->root_extent_list); |
204 | 204 | ||
205 | trace_btrfs_ordered_extent_add(inode, entry); | ||
206 | |||
205 | spin_lock(&tree->lock); | 207 | spin_lock(&tree->lock); |
206 | node = tree_insert(&tree->tree, file_offset, | 208 | node = tree_insert(&tree->tree, file_offset, |
207 | &entry->rb_node); | 209 | &entry->rb_node); |
@@ -387,6 +389,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
387 | struct list_head *cur; | 389 | struct list_head *cur; |
388 | struct btrfs_ordered_sum *sum; | 390 | struct btrfs_ordered_sum *sum; |
389 | 391 | ||
392 | trace_btrfs_ordered_extent_put(entry->inode, entry); | ||
393 | |||
390 | if (atomic_dec_and_test(&entry->refs)) { | 394 | if (atomic_dec_and_test(&entry->refs)) { |
391 | while (!list_empty(&entry->list)) { | 395 | while (!list_empty(&entry->list)) { |
392 | cur = entry->list.next; | 396 | cur = entry->list.next; |
@@ -420,6 +424,8 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, | |||
420 | spin_lock(&root->fs_info->ordered_extent_lock); | 424 | spin_lock(&root->fs_info->ordered_extent_lock); |
421 | list_del_init(&entry->root_extent_list); | 425 | list_del_init(&entry->root_extent_list); |
422 | 426 | ||
427 | trace_btrfs_ordered_extent_remove(inode, entry); | ||
428 | |||
423 | /* | 429 | /* |
424 | * we have no more ordered extents for this inode and | 430 | * we have no more ordered extents for this inode and |
425 | * no dirty pages. We can safely remove it from the | 431 | * no dirty pages. We can safely remove it from the |
@@ -585,6 +591,8 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
585 | u64 start = entry->file_offset; | 591 | u64 start = entry->file_offset; |
586 | u64 end = start + entry->len - 1; | 592 | u64 end = start + entry->len - 1; |
587 | 593 | ||
594 | trace_btrfs_ordered_extent_start(inode, entry); | ||
595 | |||
588 | /* | 596 | /* |
589 | * pages in the range can be dirty, clean or writeback. We | 597 | * pages in the range can be dirty, clean or writeback. We |
590 | * start IO on any dirty ones so the wait doesn't stall waiting | 598 | * start IO on any dirty ones so the wait doesn't stall waiting |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 31ade5802ae8..58250e09eb05 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -1724,6 +1724,7 @@ again: | |||
1724 | 1724 | ||
1725 | eb = read_tree_block(dest, old_bytenr, blocksize, | 1725 | eb = read_tree_block(dest, old_bytenr, blocksize, |
1726 | old_ptr_gen); | 1726 | old_ptr_gen); |
1727 | BUG_ON(!eb); | ||
1727 | btrfs_tree_lock(eb); | 1728 | btrfs_tree_lock(eb); |
1728 | if (cow) { | 1729 | if (cow) { |
1729 | ret = btrfs_cow_block(trans, dest, eb, parent, | 1730 | ret = btrfs_cow_block(trans, dest, eb, parent, |
@@ -2513,6 +2514,10 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2513 | blocksize = btrfs_level_size(root, node->level); | 2514 | blocksize = btrfs_level_size(root, node->level); |
2514 | generation = btrfs_node_ptr_generation(upper->eb, slot); | 2515 | generation = btrfs_node_ptr_generation(upper->eb, slot); |
2515 | eb = read_tree_block(root, bytenr, blocksize, generation); | 2516 | eb = read_tree_block(root, bytenr, blocksize, generation); |
2517 | if (!eb) { | ||
2518 | err = -EIO; | ||
2519 | goto next; | ||
2520 | } | ||
2516 | btrfs_tree_lock(eb); | 2521 | btrfs_tree_lock(eb); |
2517 | btrfs_set_lock_blocking(eb); | 2522 | btrfs_set_lock_blocking(eb); |
2518 | 2523 | ||
@@ -2670,6 +2675,7 @@ static int get_tree_block_key(struct reloc_control *rc, | |||
2670 | BUG_ON(block->key_ready); | 2675 | BUG_ON(block->key_ready); |
2671 | eb = read_tree_block(rc->extent_root, block->bytenr, | 2676 | eb = read_tree_block(rc->extent_root, block->bytenr, |
2672 | block->key.objectid, block->key.offset); | 2677 | block->key.objectid, block->key.offset); |
2678 | BUG_ON(!eb); | ||
2673 | WARN_ON(btrfs_header_level(eb) != block->level); | 2679 | WARN_ON(btrfs_header_level(eb) != block->level); |
2674 | if (block->level == 0) | 2680 | if (block->level == 0) |
2675 | btrfs_item_key_to_cpu(eb, &block->key, 0); | 2681 | btrfs_item_key_to_cpu(eb, &block->key, 0); |
@@ -4209,7 +4215,7 @@ out: | |||
4209 | if (IS_ERR(fs_root)) | 4215 | if (IS_ERR(fs_root)) |
4210 | err = PTR_ERR(fs_root); | 4216 | err = PTR_ERR(fs_root); |
4211 | else | 4217 | else |
4212 | btrfs_orphan_cleanup(fs_root); | 4218 | err = btrfs_orphan_cleanup(fs_root); |
4213 | } | 4219 | } |
4214 | return err; | 4220 | return err; |
4215 | } | 4221 | } |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 6a1086e83ffc..29b2d7c930eb 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -88,7 +88,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, | |||
88 | search_key.offset = (u64)-1; | 88 | search_key.offset = (u64)-1; |
89 | 89 | ||
90 | path = btrfs_alloc_path(); | 90 | path = btrfs_alloc_path(); |
91 | BUG_ON(!path); | 91 | if (!path) |
92 | return -ENOMEM; | ||
92 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); | 93 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); |
93 | if (ret < 0) | 94 | if (ret < 0) |
94 | goto out; | 95 | goto out; |
@@ -332,7 +333,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
332 | struct extent_buffer *leaf; | 333 | struct extent_buffer *leaf; |
333 | 334 | ||
334 | path = btrfs_alloc_path(); | 335 | path = btrfs_alloc_path(); |
335 | BUG_ON(!path); | 336 | if (!path) |
337 | return -ENOMEM; | ||
336 | ret = btrfs_search_slot(trans, root, key, path, -1, 1); | 338 | ret = btrfs_search_slot(trans, root, key, path, -1, 1); |
337 | if (ret < 0) | 339 | if (ret < 0) |
338 | goto out; | 340 | goto out; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d39a9895d932..2edfc039f098 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -52,6 +52,9 @@ | |||
52 | #include "export.h" | 52 | #include "export.h" |
53 | #include "compression.h" | 53 | #include "compression.h" |
54 | 54 | ||
55 | #define CREATE_TRACE_POINTS | ||
56 | #include <trace/events/btrfs.h> | ||
57 | |||
55 | static const struct super_operations btrfs_super_ops; | 58 | static const struct super_operations btrfs_super_ops; |
56 | 59 | ||
57 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, | 60 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, |
@@ -620,6 +623,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
620 | struct btrfs_root *root = btrfs_sb(sb); | 623 | struct btrfs_root *root = btrfs_sb(sb); |
621 | int ret; | 624 | int ret; |
622 | 625 | ||
626 | trace_btrfs_sync_fs(wait); | ||
627 | |||
623 | if (!wait) { | 628 | if (!wait) { |
624 | filemap_flush(root->fs_info->btree_inode->i_mapping); | 629 | filemap_flush(root->fs_info->btree_inode->i_mapping); |
625 | return 0; | 630 | return 0; |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3d73c8d93bbb..ce48eb59d615 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -57,7 +57,8 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
57 | if (!cur_trans) { | 57 | if (!cur_trans) { |
58 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, | 58 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, |
59 | GFP_NOFS); | 59 | GFP_NOFS); |
60 | BUG_ON(!cur_trans); | 60 | if (!cur_trans) |
61 | return -ENOMEM; | ||
61 | root->fs_info->generation++; | 62 | root->fs_info->generation++; |
62 | cur_trans->num_writers = 1; | 63 | cur_trans->num_writers = 1; |
63 | cur_trans->num_joined = 0; | 64 | cur_trans->num_joined = 0; |
@@ -195,7 +196,11 @@ again: | |||
195 | wait_current_trans(root); | 196 | wait_current_trans(root); |
196 | 197 | ||
197 | ret = join_transaction(root); | 198 | ret = join_transaction(root); |
198 | BUG_ON(ret); | 199 | if (ret < 0) { |
200 | if (type != TRANS_JOIN_NOLOCK) | ||
201 | mutex_unlock(&root->fs_info->trans_mutex); | ||
202 | return ERR_PTR(ret); | ||
203 | } | ||
199 | 204 | ||
200 | cur_trans = root->fs_info->running_transaction; | 205 | cur_trans = root->fs_info->running_transaction; |
201 | cur_trans->use_count++; | 206 | cur_trans->use_count++; |
@@ -1156,7 +1161,8 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
1156 | struct btrfs_transaction *cur_trans; | 1161 | struct btrfs_transaction *cur_trans; |
1157 | 1162 | ||
1158 | ac = kmalloc(sizeof(*ac), GFP_NOFS); | 1163 | ac = kmalloc(sizeof(*ac), GFP_NOFS); |
1159 | BUG_ON(!ac); | 1164 | if (!ac) |
1165 | return -ENOMEM; | ||
1160 | 1166 | ||
1161 | INIT_DELAYED_WORK(&ac->work, do_async_commit); | 1167 | INIT_DELAYED_WORK(&ac->work, do_async_commit); |
1162 | ac->root = root; | 1168 | ac->root = root; |
@@ -1389,6 +1395,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1389 | put_transaction(cur_trans); | 1395 | put_transaction(cur_trans); |
1390 | put_transaction(cur_trans); | 1396 | put_transaction(cur_trans); |
1391 | 1397 | ||
1398 | trace_btrfs_transaction_commit(root); | ||
1399 | |||
1392 | mutex_unlock(&root->fs_info->trans_mutex); | 1400 | mutex_unlock(&root->fs_info->trans_mutex); |
1393 | 1401 | ||
1394 | if (current->journal_info == trans) | 1402 | if (current->journal_info == trans) |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a4bbb854dfd2..c50271ad3157 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -799,12 +799,12 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | |||
799 | struct inode *dir; | 799 | struct inode *dir; |
800 | int ret; | 800 | int ret; |
801 | struct btrfs_inode_ref *ref; | 801 | struct btrfs_inode_ref *ref; |
802 | struct btrfs_dir_item *di; | ||
803 | struct inode *inode; | 802 | struct inode *inode; |
804 | char *name; | 803 | char *name; |
805 | int namelen; | 804 | int namelen; |
806 | unsigned long ref_ptr; | 805 | unsigned long ref_ptr; |
807 | unsigned long ref_end; | 806 | unsigned long ref_end; |
807 | int search_done = 0; | ||
808 | 808 | ||
809 | /* | 809 | /* |
810 | * it is possible that we didn't log all the parent directories | 810 | * it is possible that we didn't log all the parent directories |
@@ -845,7 +845,10 @@ again: | |||
845 | * existing back reference, and we don't want to create | 845 | * existing back reference, and we don't want to create |
846 | * dangling pointers in the directory. | 846 | * dangling pointers in the directory. |
847 | */ | 847 | */ |
848 | conflict_again: | 848 | |
849 | if (search_done) | ||
850 | goto insert; | ||
851 | |||
849 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); | 852 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); |
850 | if (ret == 0) { | 853 | if (ret == 0) { |
851 | char *victim_name; | 854 | char *victim_name; |
@@ -886,37 +889,21 @@ conflict_again: | |||
886 | ret = btrfs_unlink_inode(trans, root, dir, | 889 | ret = btrfs_unlink_inode(trans, root, dir, |
887 | inode, victim_name, | 890 | inode, victim_name, |
888 | victim_name_len); | 891 | victim_name_len); |
889 | kfree(victim_name); | ||
890 | btrfs_release_path(root, path); | ||
891 | goto conflict_again; | ||
892 | } | 892 | } |
893 | kfree(victim_name); | 893 | kfree(victim_name); |
894 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; | 894 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; |
895 | } | 895 | } |
896 | BUG_ON(ret); | 896 | BUG_ON(ret); |
897 | } | ||
898 | btrfs_release_path(root, path); | ||
899 | |||
900 | /* look for a conflicting sequence number */ | ||
901 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, | ||
902 | btrfs_inode_ref_index(eb, ref), | ||
903 | name, namelen, 0); | ||
904 | if (di && !IS_ERR(di)) { | ||
905 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
906 | BUG_ON(ret); | ||
907 | } | ||
908 | btrfs_release_path(root, path); | ||
909 | 897 | ||
910 | 898 | /* | |
911 | /* look for a conflicting name */ | 899 | * NOTE: we have searched root tree and checked the |
912 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | 900 | * coresponding ref, it does not need to check again. |
913 | name, namelen, 0); | 901 | */ |
914 | if (di && !IS_ERR(di)) { | 902 | search_done = 1; |
915 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
916 | BUG_ON(ret); | ||
917 | } | 903 | } |
918 | btrfs_release_path(root, path); | 904 | btrfs_release_path(root, path); |
919 | 905 | ||
906 | insert: | ||
920 | /* insert our name */ | 907 | /* insert our name */ |
921 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, | 908 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, |
922 | btrfs_inode_ref_index(eb, ref)); | 909 | btrfs_inode_ref_index(eb, ref)); |
@@ -1286,6 +1273,8 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, | |||
1286 | ptr_end = ptr + item_size; | 1273 | ptr_end = ptr + item_size; |
1287 | while (ptr < ptr_end) { | 1274 | while (ptr < ptr_end) { |
1288 | di = (struct btrfs_dir_item *)ptr; | 1275 | di = (struct btrfs_dir_item *)ptr; |
1276 | if (verify_dir_item(root, eb, di)) | ||
1277 | return -EIO; | ||
1289 | name_len = btrfs_dir_name_len(eb, di); | 1278 | name_len = btrfs_dir_name_len(eb, di); |
1290 | ret = replay_one_name(trans, root, path, eb, di, key); | 1279 | ret = replay_one_name(trans, root, path, eb, di, key); |
1291 | BUG_ON(ret); | 1280 | BUG_ON(ret); |
@@ -1412,6 +1401,11 @@ again: | |||
1412 | ptr_end = ptr + item_size; | 1401 | ptr_end = ptr + item_size; |
1413 | while (ptr < ptr_end) { | 1402 | while (ptr < ptr_end) { |
1414 | di = (struct btrfs_dir_item *)ptr; | 1403 | di = (struct btrfs_dir_item *)ptr; |
1404 | if (verify_dir_item(root, eb, di)) { | ||
1405 | ret = -EIO; | ||
1406 | goto out; | ||
1407 | } | ||
1408 | |||
1415 | name_len = btrfs_dir_name_len(eb, di); | 1409 | name_len = btrfs_dir_name_len(eb, di); |
1416 | name = kmalloc(name_len, GFP_NOFS); | 1410 | name = kmalloc(name_len, GFP_NOFS); |
1417 | if (!name) { | 1411 | if (!name) { |
@@ -1821,7 +1815,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, | |||
1821 | int orig_level; | 1815 | int orig_level; |
1822 | 1816 | ||
1823 | path = btrfs_alloc_path(); | 1817 | path = btrfs_alloc_path(); |
1824 | BUG_ON(!path); | 1818 | if (!path) |
1819 | return -ENOMEM; | ||
1825 | 1820 | ||
1826 | level = btrfs_header_level(log->node); | 1821 | level = btrfs_header_level(log->node); |
1827 | orig_level = level; | 1822 | orig_level = level; |
@@ -3107,9 +3102,11 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
3107 | .stage = 0, | 3102 | .stage = 0, |
3108 | }; | 3103 | }; |
3109 | 3104 | ||
3110 | fs_info->log_root_recovering = 1; | ||
3111 | path = btrfs_alloc_path(); | 3105 | path = btrfs_alloc_path(); |
3112 | BUG_ON(!path); | 3106 | if (!path) |
3107 | return -ENOMEM; | ||
3108 | |||
3109 | fs_info->log_root_recovering = 1; | ||
3113 | 3110 | ||
3114 | trans = btrfs_start_transaction(fs_info->tree_root, 0); | 3111 | trans = btrfs_start_transaction(fs_info->tree_root, 0); |
3115 | BUG_ON(IS_ERR(trans)); | 3112 | BUG_ON(IS_ERR(trans)); |
@@ -3117,7 +3114,8 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
3117 | wc.trans = trans; | 3114 | wc.trans = trans; |
3118 | wc.pin = 1; | 3115 | wc.pin = 1; |
3119 | 3116 | ||
3120 | walk_log_tree(trans, log_root_tree, &wc); | 3117 | ret = walk_log_tree(trans, log_root_tree, &wc); |
3118 | BUG_ON(ret); | ||
3121 | 3119 | ||
3122 | again: | 3120 | again: |
3123 | key.objectid = BTRFS_TREE_LOG_OBJECTID; | 3121 | key.objectid = BTRFS_TREE_LOG_OBJECTID; |
@@ -3141,8 +3139,7 @@ again: | |||
3141 | 3139 | ||
3142 | log = btrfs_read_fs_root_no_radix(log_root_tree, | 3140 | log = btrfs_read_fs_root_no_radix(log_root_tree, |
3143 | &found_key); | 3141 | &found_key); |
3144 | BUG_ON(!log); | 3142 | BUG_ON(IS_ERR(log)); |
3145 | |||
3146 | 3143 | ||
3147 | tmp_key.objectid = found_key.offset; | 3144 | tmp_key.objectid = found_key.offset; |
3148 | tmp_key.type = BTRFS_ROOT_ITEM_KEY; | 3145 | tmp_key.type = BTRFS_ROOT_ITEM_KEY; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9d554e8e6583..309a57b9fc85 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -33,17 +33,6 @@ | |||
33 | #include "volumes.h" | 33 | #include "volumes.h" |
34 | #include "async-thread.h" | 34 | #include "async-thread.h" |
35 | 35 | ||
36 | struct map_lookup { | ||
37 | u64 type; | ||
38 | int io_align; | ||
39 | int io_width; | ||
40 | int stripe_len; | ||
41 | int sector_size; | ||
42 | int num_stripes; | ||
43 | int sub_stripes; | ||
44 | struct btrfs_bio_stripe stripes[]; | ||
45 | }; | ||
46 | |||
47 | static int init_first_rw_device(struct btrfs_trans_handle *trans, | 36 | static int init_first_rw_device(struct btrfs_trans_handle *trans, |
48 | struct btrfs_root *root, | 37 | struct btrfs_root *root, |
49 | struct btrfs_device *device); | 38 | struct btrfs_device *device); |
@@ -1879,6 +1868,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1879 | 1868 | ||
1880 | BUG_ON(ret); | 1869 | BUG_ON(ret); |
1881 | 1870 | ||
1871 | trace_btrfs_chunk_free(root, map, chunk_offset, em->len); | ||
1872 | |||
1882 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { | 1873 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { |
1883 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); | 1874 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); |
1884 | BUG_ON(ret); | 1875 | BUG_ON(ret); |
@@ -2606,6 +2597,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2606 | *num_bytes = chunk_bytes_by_type(type, calc_size, | 2597 | *num_bytes = chunk_bytes_by_type(type, calc_size, |
2607 | map->num_stripes, sub_stripes); | 2598 | map->num_stripes, sub_stripes); |
2608 | 2599 | ||
2600 | trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes); | ||
2601 | |||
2609 | em = alloc_extent_map(GFP_NOFS); | 2602 | em = alloc_extent_map(GFP_NOFS); |
2610 | if (!em) { | 2603 | if (!em) { |
2611 | ret = -ENOMEM; | 2604 | ret = -ENOMEM; |
@@ -2714,6 +2707,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2714 | item_size); | 2707 | item_size); |
2715 | BUG_ON(ret); | 2708 | BUG_ON(ret); |
2716 | } | 2709 | } |
2710 | |||
2717 | kfree(chunk); | 2711 | kfree(chunk); |
2718 | return 0; | 2712 | return 0; |
2719 | } | 2713 | } |
@@ -2918,7 +2912,10 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
2918 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 2912 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
2919 | u64 offset; | 2913 | u64 offset; |
2920 | u64 stripe_offset; | 2914 | u64 stripe_offset; |
2915 | u64 stripe_end_offset; | ||
2921 | u64 stripe_nr; | 2916 | u64 stripe_nr; |
2917 | u64 stripe_nr_orig; | ||
2918 | u64 stripe_nr_end; | ||
2922 | int stripes_allocated = 8; | 2919 | int stripes_allocated = 8; |
2923 | int stripes_required = 1; | 2920 | int stripes_required = 1; |
2924 | int stripe_index; | 2921 | int stripe_index; |
@@ -2927,7 +2924,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
2927 | int max_errors = 0; | 2924 | int max_errors = 0; |
2928 | struct btrfs_multi_bio *multi = NULL; | 2925 | struct btrfs_multi_bio *multi = NULL; |
2929 | 2926 | ||
2930 | if (multi_ret && !(rw & REQ_WRITE)) | 2927 | if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD))) |
2931 | stripes_allocated = 1; | 2928 | stripes_allocated = 1; |
2932 | again: | 2929 | again: |
2933 | if (multi_ret) { | 2930 | if (multi_ret) { |
@@ -2968,7 +2965,15 @@ again: | |||
2968 | max_errors = 1; | 2965 | max_errors = 1; |
2969 | } | 2966 | } |
2970 | } | 2967 | } |
2971 | if (multi_ret && (rw & REQ_WRITE) && | 2968 | if (rw & REQ_DISCARD) { |
2969 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | | ||
2970 | BTRFS_BLOCK_GROUP_RAID1 | | ||
2971 | BTRFS_BLOCK_GROUP_DUP | | ||
2972 | BTRFS_BLOCK_GROUP_RAID10)) { | ||
2973 | stripes_required = map->num_stripes; | ||
2974 | } | ||
2975 | } | ||
2976 | if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) && | ||
2972 | stripes_allocated < stripes_required) { | 2977 | stripes_allocated < stripes_required) { |
2973 | stripes_allocated = map->num_stripes; | 2978 | stripes_allocated = map->num_stripes; |
2974 | free_extent_map(em); | 2979 | free_extent_map(em); |
@@ -2988,12 +2993,15 @@ again: | |||
2988 | /* stripe_offset is the offset of this block in its stripe*/ | 2993 | /* stripe_offset is the offset of this block in its stripe*/ |
2989 | stripe_offset = offset - stripe_offset; | 2994 | stripe_offset = offset - stripe_offset; |
2990 | 2995 | ||
2991 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | | 2996 | if (rw & REQ_DISCARD) |
2992 | BTRFS_BLOCK_GROUP_RAID10 | | 2997 | *length = min_t(u64, em->len - offset, *length); |
2993 | BTRFS_BLOCK_GROUP_DUP)) { | 2998 | else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | |
2999 | BTRFS_BLOCK_GROUP_RAID1 | | ||
3000 | BTRFS_BLOCK_GROUP_RAID10 | | ||
3001 | BTRFS_BLOCK_GROUP_DUP)) { | ||
2994 | /* we limit the length of each bio to what fits in a stripe */ | 3002 | /* we limit the length of each bio to what fits in a stripe */ |
2995 | *length = min_t(u64, em->len - offset, | 3003 | *length = min_t(u64, em->len - offset, |
2996 | map->stripe_len - stripe_offset); | 3004 | map->stripe_len - stripe_offset); |
2997 | } else { | 3005 | } else { |
2998 | *length = em->len - offset; | 3006 | *length = em->len - offset; |
2999 | } | 3007 | } |
@@ -3003,8 +3011,19 @@ again: | |||
3003 | 3011 | ||
3004 | num_stripes = 1; | 3012 | num_stripes = 1; |
3005 | stripe_index = 0; | 3013 | stripe_index = 0; |
3006 | if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 3014 | stripe_nr_orig = stripe_nr; |
3007 | if (rw & REQ_WRITE) | 3015 | stripe_nr_end = (offset + *length + map->stripe_len - 1) & |
3016 | (~(map->stripe_len - 1)); | ||
3017 | do_div(stripe_nr_end, map->stripe_len); | ||
3018 | stripe_end_offset = stripe_nr_end * map->stripe_len - | ||
3019 | (offset + *length); | ||
3020 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | ||
3021 | if (rw & REQ_DISCARD) | ||
3022 | num_stripes = min_t(u64, map->num_stripes, | ||
3023 | stripe_nr_end - stripe_nr_orig); | ||
3024 | stripe_index = do_div(stripe_nr, map->num_stripes); | ||
3025 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | ||
3026 | if (rw & (REQ_WRITE | REQ_DISCARD)) | ||
3008 | num_stripes = map->num_stripes; | 3027 | num_stripes = map->num_stripes; |
3009 | else if (mirror_num) | 3028 | else if (mirror_num) |
3010 | stripe_index = mirror_num - 1; | 3029 | stripe_index = mirror_num - 1; |
@@ -3015,7 +3034,7 @@ again: | |||
3015 | } | 3034 | } |
3016 | 3035 | ||
3017 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 3036 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
3018 | if (rw & REQ_WRITE) | 3037 | if (rw & (REQ_WRITE | REQ_DISCARD)) |
3019 | num_stripes = map->num_stripes; | 3038 | num_stripes = map->num_stripes; |
3020 | else if (mirror_num) | 3039 | else if (mirror_num) |
3021 | stripe_index = mirror_num - 1; | 3040 | stripe_index = mirror_num - 1; |
@@ -3028,6 +3047,10 @@ again: | |||
3028 | 3047 | ||
3029 | if (rw & REQ_WRITE) | 3048 | if (rw & REQ_WRITE) |
3030 | num_stripes = map->sub_stripes; | 3049 | num_stripes = map->sub_stripes; |
3050 | else if (rw & REQ_DISCARD) | ||
3051 | num_stripes = min_t(u64, map->sub_stripes * | ||
3052 | (stripe_nr_end - stripe_nr_orig), | ||
3053 | map->num_stripes); | ||
3031 | else if (mirror_num) | 3054 | else if (mirror_num) |
3032 | stripe_index += mirror_num - 1; | 3055 | stripe_index += mirror_num - 1; |
3033 | else { | 3056 | else { |
@@ -3045,12 +3068,101 @@ again: | |||
3045 | } | 3068 | } |
3046 | BUG_ON(stripe_index >= map->num_stripes); | 3069 | BUG_ON(stripe_index >= map->num_stripes); |
3047 | 3070 | ||
3048 | for (i = 0; i < num_stripes; i++) { | 3071 | if (rw & REQ_DISCARD) { |
3049 | multi->stripes[i].physical = | 3072 | for (i = 0; i < num_stripes; i++) { |
3050 | map->stripes[stripe_index].physical + | 3073 | multi->stripes[i].physical = |
3051 | stripe_offset + stripe_nr * map->stripe_len; | 3074 | map->stripes[stripe_index].physical + |
3052 | multi->stripes[i].dev = map->stripes[stripe_index].dev; | 3075 | stripe_offset + stripe_nr * map->stripe_len; |
3053 | stripe_index++; | 3076 | multi->stripes[i].dev = map->stripes[stripe_index].dev; |
3077 | |||
3078 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | ||
3079 | u64 stripes; | ||
3080 | u32 last_stripe = 0; | ||
3081 | int j; | ||
3082 | |||
3083 | div_u64_rem(stripe_nr_end - 1, | ||
3084 | map->num_stripes, | ||
3085 | &last_stripe); | ||
3086 | |||
3087 | for (j = 0; j < map->num_stripes; j++) { | ||
3088 | u32 test; | ||
3089 | |||
3090 | div_u64_rem(stripe_nr_end - 1 - j, | ||
3091 | map->num_stripes, &test); | ||
3092 | if (test == stripe_index) | ||
3093 | break; | ||
3094 | } | ||
3095 | stripes = stripe_nr_end - 1 - j; | ||
3096 | do_div(stripes, map->num_stripes); | ||
3097 | multi->stripes[i].length = map->stripe_len * | ||
3098 | (stripes - stripe_nr + 1); | ||
3099 | |||
3100 | if (i == 0) { | ||
3101 | multi->stripes[i].length -= | ||
3102 | stripe_offset; | ||
3103 | stripe_offset = 0; | ||
3104 | } | ||
3105 | if (stripe_index == last_stripe) | ||
3106 | multi->stripes[i].length -= | ||
3107 | stripe_end_offset; | ||
3108 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | ||
3109 | u64 stripes; | ||
3110 | int j; | ||
3111 | int factor = map->num_stripes / | ||
3112 | map->sub_stripes; | ||
3113 | u32 last_stripe = 0; | ||
3114 | |||
3115 | div_u64_rem(stripe_nr_end - 1, | ||
3116 | factor, &last_stripe); | ||
3117 | last_stripe *= map->sub_stripes; | ||
3118 | |||
3119 | for (j = 0; j < factor; j++) { | ||
3120 | u32 test; | ||
3121 | |||
3122 | div_u64_rem(stripe_nr_end - 1 - j, | ||
3123 | factor, &test); | ||
3124 | |||
3125 | if (test == | ||
3126 | stripe_index / map->sub_stripes) | ||
3127 | break; | ||
3128 | } | ||
3129 | stripes = stripe_nr_end - 1 - j; | ||
3130 | do_div(stripes, factor); | ||
3131 | multi->stripes[i].length = map->stripe_len * | ||
3132 | (stripes - stripe_nr + 1); | ||
3133 | |||
3134 | if (i < map->sub_stripes) { | ||
3135 | multi->stripes[i].length -= | ||
3136 | stripe_offset; | ||
3137 | if (i == map->sub_stripes - 1) | ||
3138 | stripe_offset = 0; | ||
3139 | } | ||
3140 | if (stripe_index >= last_stripe && | ||
3141 | stripe_index <= (last_stripe + | ||
3142 | map->sub_stripes - 1)) { | ||
3143 | multi->stripes[i].length -= | ||
3144 | stripe_end_offset; | ||
3145 | } | ||
3146 | } else | ||
3147 | multi->stripes[i].length = *length; | ||
3148 | |||
3149 | stripe_index++; | ||
3150 | if (stripe_index == map->num_stripes) { | ||
3151 | /* This could only happen for RAID0/10 */ | ||
3152 | stripe_index = 0; | ||
3153 | stripe_nr++; | ||
3154 | } | ||
3155 | } | ||
3156 | } else { | ||
3157 | for (i = 0; i < num_stripes; i++) { | ||
3158 | multi->stripes[i].physical = | ||
3159 | map->stripes[stripe_index].physical + | ||
3160 | stripe_offset + | ||
3161 | stripe_nr * map->stripe_len; | ||
3162 | multi->stripes[i].dev = | ||
3163 | map->stripes[stripe_index].dev; | ||
3164 | stripe_index++; | ||
3165 | } | ||
3054 | } | 3166 | } |
3055 | if (multi_ret) { | 3167 | if (multi_ret) { |
3056 | *multi_ret = multi; | 3168 | *multi_ret = multi; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7fb59d45fe8c..cc2eadaf7a27 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -126,6 +126,7 @@ struct btrfs_fs_devices { | |||
126 | struct btrfs_bio_stripe { | 126 | struct btrfs_bio_stripe { |
127 | struct btrfs_device *dev; | 127 | struct btrfs_device *dev; |
128 | u64 physical; | 128 | u64 physical; |
129 | u64 length; /* only used for discard mappings */ | ||
129 | }; | 130 | }; |
130 | 131 | ||
131 | struct btrfs_multi_bio { | 132 | struct btrfs_multi_bio { |
@@ -145,6 +146,17 @@ struct btrfs_device_info { | |||
145 | u64 max_avail; | 146 | u64 max_avail; |
146 | }; | 147 | }; |
147 | 148 | ||
149 | struct map_lookup { | ||
150 | u64 type; | ||
151 | int io_align; | ||
152 | int io_width; | ||
153 | int stripe_len; | ||
154 | int sector_size; | ||
155 | int num_stripes; | ||
156 | int sub_stripes; | ||
157 | struct btrfs_bio_stripe stripes[]; | ||
158 | }; | ||
159 | |||
148 | /* Used to sort the devices by max_avail(descending sort) */ | 160 | /* Used to sort the devices by max_avail(descending sort) */ |
149 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); | 161 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); |
150 | 162 | ||
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index d779cefcfd7d..a5303b871b13 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -242,6 +242,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
242 | break; | 242 | break; |
243 | 243 | ||
244 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | 244 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); |
245 | if (verify_dir_item(root, leaf, di)) | ||
246 | continue; | ||
245 | 247 | ||
246 | name_len = btrfs_dir_name_len(leaf, di); | 248 | name_len = btrfs_dir_name_len(leaf, di); |
247 | total_size += name_len + 1; | 249 | total_size += name_len + 1; |