diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/acl.c | 11 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 3 | ||||
-rw-r--r-- | fs/btrfs/compression.c | 17 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 159 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 32 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.c | 6 | ||||
-rw-r--r-- | fs/btrfs/dir-item.c | 45 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 217 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 354 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 87 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 3 | ||||
-rw-r--r-- | fs/btrfs/extent_map.c | 2 | ||||
-rw-r--r-- | fs/btrfs/file-item.c | 5 | ||||
-rw-r--r-- | fs/btrfs/file.c | 391 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 713 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.h | 2 | ||||
-rw-r--r-- | fs/btrfs/inode-map.c | 3 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 557 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 112 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 8 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 10 | ||||
-rw-r--r-- | fs/btrfs/root-tree.c | 24 | ||||
-rw-r--r-- | fs/btrfs/super.c | 66 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 64 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 4 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 57 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 235 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 12 | ||||
-rw-r--r-- | fs/btrfs/xattr.c | 35 | ||||
-rw-r--r-- | fs/btrfs/zlib.c | 3 |
30 files changed, 2031 insertions, 1206 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 9c949348510b..5d505aaa72fb 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -170,7 +170,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
170 | int ret; | 170 | int ret; |
171 | struct posix_acl *acl = NULL; | 171 | struct posix_acl *acl = NULL; |
172 | 172 | ||
173 | if (!is_owner_or_cap(dentry->d_inode)) | 173 | if (!inode_owner_or_capable(dentry->d_inode)) |
174 | return -EPERM; | 174 | return -EPERM; |
175 | 175 | ||
176 | if (!IS_POSIXACL(dentry->d_inode)) | 176 | if (!IS_POSIXACL(dentry->d_inode)) |
@@ -178,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
178 | 178 | ||
179 | if (value) { | 179 | if (value) { |
180 | acl = posix_acl_from_xattr(value, size); | 180 | acl = posix_acl_from_xattr(value, size); |
181 | if (acl == NULL) { | 181 | if (acl) { |
182 | value = NULL; | 182 | ret = posix_acl_valid(acl); |
183 | size = 0; | 183 | if (ret) |
184 | goto out; | ||
184 | } else if (IS_ERR(acl)) { | 185 | } else if (IS_ERR(acl)) { |
185 | return PTR_ERR(acl); | 186 | return PTR_ERR(acl); |
186 | } | 187 | } |
187 | } | 188 | } |
188 | 189 | ||
189 | ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); | 190 | ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); |
190 | 191 | out: | |
191 | posix_acl_release(acl); | 192 | posix_acl_release(acl); |
192 | 193 | ||
193 | return ret; | 194 | return ret; |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index ccc991c542df..57c3bb2884ce 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -136,9 +136,8 @@ struct btrfs_inode { | |||
136 | * items we think we'll end up using, and reserved_extents is the number | 136 | * items we think we'll end up using, and reserved_extents is the number |
137 | * of extent items we've reserved metadata for. | 137 | * of extent items we've reserved metadata for. |
138 | */ | 138 | */ |
139 | spinlock_t accounting_lock; | ||
140 | atomic_t outstanding_extents; | 139 | atomic_t outstanding_extents; |
141 | int reserved_extents; | 140 | atomic_t reserved_extents; |
142 | 141 | ||
143 | /* | 142 | /* |
144 | * ordered_data_close is set by truncate when a file that used | 143 | * ordered_data_close is set by truncate when a file that used |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 4d2110eafe29..41d1d7c70e29 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -340,6 +340,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
340 | 340 | ||
341 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); | 341 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); |
342 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 342 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
343 | if (!cb) | ||
344 | return -ENOMEM; | ||
343 | atomic_set(&cb->pending_bios, 0); | 345 | atomic_set(&cb->pending_bios, 0); |
344 | cb->errors = 0; | 346 | cb->errors = 0; |
345 | cb->inode = inode; | 347 | cb->inode = inode; |
@@ -354,6 +356,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
354 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 356 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
355 | 357 | ||
356 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); | 358 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); |
359 | if(!bio) { | ||
360 | kfree(cb); | ||
361 | return -ENOMEM; | ||
362 | } | ||
357 | bio->bi_private = cb; | 363 | bio->bi_private = cb; |
358 | bio->bi_end_io = end_compressed_bio_write; | 364 | bio->bi_end_io = end_compressed_bio_write; |
359 | atomic_inc(&cb->pending_bios); | 365 | atomic_inc(&cb->pending_bios); |
@@ -657,8 +663,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
657 | atomic_inc(&cb->pending_bios); | 663 | atomic_inc(&cb->pending_bios); |
658 | 664 | ||
659 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | 665 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
660 | btrfs_lookup_bio_sums(root, inode, comp_bio, | 666 | ret = btrfs_lookup_bio_sums(root, inode, |
661 | sums); | 667 | comp_bio, sums); |
668 | BUG_ON(ret); | ||
662 | } | 669 | } |
663 | sums += (comp_bio->bi_size + root->sectorsize - 1) / | 670 | sums += (comp_bio->bi_size + root->sectorsize - 1) / |
664 | root->sectorsize; | 671 | root->sectorsize; |
@@ -683,8 +690,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
683 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | 690 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); |
684 | BUG_ON(ret); | 691 | BUG_ON(ret); |
685 | 692 | ||
686 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) | 693 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
687 | btrfs_lookup_bio_sums(root, inode, comp_bio, sums); | 694 | ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums); |
695 | BUG_ON(ret); | ||
696 | } | ||
688 | 697 | ||
689 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); | 698 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); |
690 | BUG_ON(ret); | 699 | BUG_ON(ret); |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b5baff0dccfe..84d7ca1fe0ba 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -147,10 +147,11 @@ noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) | |||
147 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root) | 147 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root) |
148 | { | 148 | { |
149 | struct extent_buffer *eb; | 149 | struct extent_buffer *eb; |
150 | spin_lock(&root->node_lock); | 150 | |
151 | eb = root->node; | 151 | rcu_read_lock(); |
152 | eb = rcu_dereference(root->node); | ||
152 | extent_buffer_get(eb); | 153 | extent_buffer_get(eb); |
153 | spin_unlock(&root->node_lock); | 154 | rcu_read_unlock(); |
154 | return eb; | 155 | return eb; |
155 | } | 156 | } |
156 | 157 | ||
@@ -165,14 +166,8 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | |||
165 | while (1) { | 166 | while (1) { |
166 | eb = btrfs_root_node(root); | 167 | eb = btrfs_root_node(root); |
167 | btrfs_tree_lock(eb); | 168 | btrfs_tree_lock(eb); |
168 | 169 | if (eb == root->node) | |
169 | spin_lock(&root->node_lock); | ||
170 | if (eb == root->node) { | ||
171 | spin_unlock(&root->node_lock); | ||
172 | break; | 170 | break; |
173 | } | ||
174 | spin_unlock(&root->node_lock); | ||
175 | |||
176 | btrfs_tree_unlock(eb); | 171 | btrfs_tree_unlock(eb); |
177 | free_extent_buffer(eb); | 172 | free_extent_buffer(eb); |
178 | } | 173 | } |
@@ -458,10 +453,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
458 | else | 453 | else |
459 | parent_start = 0; | 454 | parent_start = 0; |
460 | 455 | ||
461 | spin_lock(&root->node_lock); | ||
462 | root->node = cow; | ||
463 | extent_buffer_get(cow); | 456 | extent_buffer_get(cow); |
464 | spin_unlock(&root->node_lock); | 457 | rcu_assign_pointer(root->node, cow); |
465 | 458 | ||
466 | btrfs_free_tree_block(trans, root, buf, parent_start, | 459 | btrfs_free_tree_block(trans, root, buf, parent_start, |
467 | last_ref); | 460 | last_ref); |
@@ -542,6 +535,9 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
542 | 535 | ||
543 | ret = __btrfs_cow_block(trans, root, buf, parent, | 536 | ret = __btrfs_cow_block(trans, root, buf, parent, |
544 | parent_slot, cow_ret, search_start, 0); | 537 | parent_slot, cow_ret, search_start, 0); |
538 | |||
539 | trace_btrfs_cow_block(root, buf, *cow_ret); | ||
540 | |||
545 | return ret; | 541 | return ret; |
546 | } | 542 | } |
547 | 543 | ||
@@ -686,6 +682,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
686 | if (!cur) { | 682 | if (!cur) { |
687 | cur = read_tree_block(root, blocknr, | 683 | cur = read_tree_block(root, blocknr, |
688 | blocksize, gen); | 684 | blocksize, gen); |
685 | if (!cur) | ||
686 | return -EIO; | ||
689 | } else if (!uptodate) { | 687 | } else if (!uptodate) { |
690 | btrfs_read_buffer(cur, gen); | 688 | btrfs_read_buffer(cur, gen); |
691 | } | 689 | } |
@@ -732,122 +730,6 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, | |||
732 | return btrfs_item_offset_nr(leaf, nr - 1); | 730 | return btrfs_item_offset_nr(leaf, nr - 1); |
733 | } | 731 | } |
734 | 732 | ||
735 | /* | ||
736 | * extra debugging checks to make sure all the items in a key are | ||
737 | * well formed and in the proper order | ||
738 | */ | ||
739 | static int check_node(struct btrfs_root *root, struct btrfs_path *path, | ||
740 | int level) | ||
741 | { | ||
742 | struct extent_buffer *parent = NULL; | ||
743 | struct extent_buffer *node = path->nodes[level]; | ||
744 | struct btrfs_disk_key parent_key; | ||
745 | struct btrfs_disk_key node_key; | ||
746 | int parent_slot; | ||
747 | int slot; | ||
748 | struct btrfs_key cpukey; | ||
749 | u32 nritems = btrfs_header_nritems(node); | ||
750 | |||
751 | if (path->nodes[level + 1]) | ||
752 | parent = path->nodes[level + 1]; | ||
753 | |||
754 | slot = path->slots[level]; | ||
755 | BUG_ON(nritems == 0); | ||
756 | if (parent) { | ||
757 | parent_slot = path->slots[level + 1]; | ||
758 | btrfs_node_key(parent, &parent_key, parent_slot); | ||
759 | btrfs_node_key(node, &node_key, 0); | ||
760 | BUG_ON(memcmp(&parent_key, &node_key, | ||
761 | sizeof(struct btrfs_disk_key))); | ||
762 | BUG_ON(btrfs_node_blockptr(parent, parent_slot) != | ||
763 | btrfs_header_bytenr(node)); | ||
764 | } | ||
765 | BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); | ||
766 | if (slot != 0) { | ||
767 | btrfs_node_key_to_cpu(node, &cpukey, slot - 1); | ||
768 | btrfs_node_key(node, &node_key, slot); | ||
769 | BUG_ON(comp_keys(&node_key, &cpukey) <= 0); | ||
770 | } | ||
771 | if (slot < nritems - 1) { | ||
772 | btrfs_node_key_to_cpu(node, &cpukey, slot + 1); | ||
773 | btrfs_node_key(node, &node_key, slot); | ||
774 | BUG_ON(comp_keys(&node_key, &cpukey) >= 0); | ||
775 | } | ||
776 | return 0; | ||
777 | } | ||
778 | |||
779 | /* | ||
780 | * extra checking to make sure all the items in a leaf are | ||
781 | * well formed and in the proper order | ||
782 | */ | ||
783 | static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, | ||
784 | int level) | ||
785 | { | ||
786 | struct extent_buffer *leaf = path->nodes[level]; | ||
787 | struct extent_buffer *parent = NULL; | ||
788 | int parent_slot; | ||
789 | struct btrfs_key cpukey; | ||
790 | struct btrfs_disk_key parent_key; | ||
791 | struct btrfs_disk_key leaf_key; | ||
792 | int slot = path->slots[0]; | ||
793 | |||
794 | u32 nritems = btrfs_header_nritems(leaf); | ||
795 | |||
796 | if (path->nodes[level + 1]) | ||
797 | parent = path->nodes[level + 1]; | ||
798 | |||
799 | if (nritems == 0) | ||
800 | return 0; | ||
801 | |||
802 | if (parent) { | ||
803 | parent_slot = path->slots[level + 1]; | ||
804 | btrfs_node_key(parent, &parent_key, parent_slot); | ||
805 | btrfs_item_key(leaf, &leaf_key, 0); | ||
806 | |||
807 | BUG_ON(memcmp(&parent_key, &leaf_key, | ||
808 | sizeof(struct btrfs_disk_key))); | ||
809 | BUG_ON(btrfs_node_blockptr(parent, parent_slot) != | ||
810 | btrfs_header_bytenr(leaf)); | ||
811 | } | ||
812 | if (slot != 0 && slot < nritems - 1) { | ||
813 | btrfs_item_key(leaf, &leaf_key, slot); | ||
814 | btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); | ||
815 | if (comp_keys(&leaf_key, &cpukey) <= 0) { | ||
816 | btrfs_print_leaf(root, leaf); | ||
817 | printk(KERN_CRIT "slot %d offset bad key\n", slot); | ||
818 | BUG_ON(1); | ||
819 | } | ||
820 | if (btrfs_item_offset_nr(leaf, slot - 1) != | ||
821 | btrfs_item_end_nr(leaf, slot)) { | ||
822 | btrfs_print_leaf(root, leaf); | ||
823 | printk(KERN_CRIT "slot %d offset bad\n", slot); | ||
824 | BUG_ON(1); | ||
825 | } | ||
826 | } | ||
827 | if (slot < nritems - 1) { | ||
828 | btrfs_item_key(leaf, &leaf_key, slot); | ||
829 | btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); | ||
830 | BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); | ||
831 | if (btrfs_item_offset_nr(leaf, slot) != | ||
832 | btrfs_item_end_nr(leaf, slot + 1)) { | ||
833 | btrfs_print_leaf(root, leaf); | ||
834 | printk(KERN_CRIT "slot %d offset bad\n", slot); | ||
835 | BUG_ON(1); | ||
836 | } | ||
837 | } | ||
838 | BUG_ON(btrfs_item_offset_nr(leaf, 0) + | ||
839 | btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); | ||
840 | return 0; | ||
841 | } | ||
842 | |||
843 | static noinline int check_block(struct btrfs_root *root, | ||
844 | struct btrfs_path *path, int level) | ||
845 | { | ||
846 | return 0; | ||
847 | if (level == 0) | ||
848 | return check_leaf(root, path, level); | ||
849 | return check_node(root, path, level); | ||
850 | } | ||
851 | 733 | ||
852 | /* | 734 | /* |
853 | * search for key in the extent_buffer. The items start at offset p, | 735 | * search for key in the extent_buffer. The items start at offset p, |
@@ -1046,9 +928,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1046 | goto enospc; | 928 | goto enospc; |
1047 | } | 929 | } |
1048 | 930 | ||
1049 | spin_lock(&root->node_lock); | 931 | rcu_assign_pointer(root->node, child); |
1050 | root->node = child; | ||
1051 | spin_unlock(&root->node_lock); | ||
1052 | 932 | ||
1053 | add_root_to_dirty_list(root); | 933 | add_root_to_dirty_list(root); |
1054 | btrfs_tree_unlock(child); | 934 | btrfs_tree_unlock(child); |
@@ -1188,7 +1068,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1188 | } | 1068 | } |
1189 | } | 1069 | } |
1190 | /* double check we haven't messed things up */ | 1070 | /* double check we haven't messed things up */ |
1191 | check_block(root, path, level); | ||
1192 | if (orig_ptr != | 1071 | if (orig_ptr != |
1193 | btrfs_node_blockptr(path->nodes[level], path->slots[level])) | 1072 | btrfs_node_blockptr(path->nodes[level], path->slots[level])) |
1194 | BUG(); | 1073 | BUG(); |
@@ -1798,12 +1677,6 @@ cow_done: | |||
1798 | if (!cow) | 1677 | if (!cow) |
1799 | btrfs_unlock_up_safe(p, level + 1); | 1678 | btrfs_unlock_up_safe(p, level + 1); |
1800 | 1679 | ||
1801 | ret = check_block(root, p, level); | ||
1802 | if (ret) { | ||
1803 | ret = -1; | ||
1804 | goto done; | ||
1805 | } | ||
1806 | |||
1807 | ret = bin_search(b, key, level, &slot); | 1680 | ret = bin_search(b, key, level, &slot); |
1808 | 1681 | ||
1809 | if (level != 0) { | 1682 | if (level != 0) { |
@@ -2130,10 +2003,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2130 | 2003 | ||
2131 | btrfs_mark_buffer_dirty(c); | 2004 | btrfs_mark_buffer_dirty(c); |
2132 | 2005 | ||
2133 | spin_lock(&root->node_lock); | ||
2134 | old = root->node; | 2006 | old = root->node; |
2135 | root->node = c; | 2007 | rcu_assign_pointer(root->node, c); |
2136 | spin_unlock(&root->node_lock); | ||
2137 | 2008 | ||
2138 | /* the super has an extra ref to root->node */ | 2009 | /* the super has an extra ref to root->node */ |
2139 | free_extent_buffer(old); | 2010 | free_extent_buffer(old); |
@@ -3840,7 +3711,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3840 | unsigned long ptr; | 3711 | unsigned long ptr; |
3841 | 3712 | ||
3842 | path = btrfs_alloc_path(); | 3713 | path = btrfs_alloc_path(); |
3843 | BUG_ON(!path); | 3714 | if (!path) |
3715 | return -ENOMEM; | ||
3844 | ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); | 3716 | ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); |
3845 | if (!ret) { | 3717 | if (!ret) { |
3846 | leaf = path->nodes[0]; | 3718 | leaf = path->nodes[0]; |
@@ -4217,6 +4089,7 @@ find_next_key: | |||
4217 | } | 4089 | } |
4218 | btrfs_set_path_blocking(path); | 4090 | btrfs_set_path_blocking(path); |
4219 | cur = read_node_slot(root, cur, slot); | 4091 | cur = read_node_slot(root, cur, slot); |
4092 | BUG_ON(!cur); | ||
4220 | 4093 | ||
4221 | btrfs_tree_lock(cur); | 4094 | btrfs_tree_lock(cur); |
4222 | 4095 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7f78cc78fdd0..2e61fe1b6b8c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/wait.h> | 28 | #include <linux/wait.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/kobject.h> | 30 | #include <linux/kobject.h> |
31 | #include <trace/events/btrfs.h> | ||
31 | #include <asm/kmap_types.h> | 32 | #include <asm/kmap_types.h> |
32 | #include "extent_io.h" | 33 | #include "extent_io.h" |
33 | #include "extent_map.h" | 34 | #include "extent_map.h" |
@@ -40,6 +41,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep; | |||
40 | extern struct kmem_cache *btrfs_transaction_cachep; | 41 | extern struct kmem_cache *btrfs_transaction_cachep; |
41 | extern struct kmem_cache *btrfs_bit_radix_cachep; | 42 | extern struct kmem_cache *btrfs_bit_radix_cachep; |
42 | extern struct kmem_cache *btrfs_path_cachep; | 43 | extern struct kmem_cache *btrfs_path_cachep; |
44 | extern struct kmem_cache *btrfs_free_space_cachep; | ||
43 | struct btrfs_ordered_sum; | 45 | struct btrfs_ordered_sum; |
44 | 46 | ||
45 | #define BTRFS_MAGIC "_BHRfS_M" | 47 | #define BTRFS_MAGIC "_BHRfS_M" |
@@ -738,8 +740,10 @@ struct btrfs_space_info { | |||
738 | */ | 740 | */ |
739 | unsigned long reservation_progress; | 741 | unsigned long reservation_progress; |
740 | 742 | ||
741 | int full; /* indicates that we cannot allocate any more | 743 | int full:1; /* indicates that we cannot allocate any more |
742 | chunks for this space */ | 744 | chunks for this space */ |
745 | int chunk_alloc:1; /* set if we are allocating a chunk */ | ||
746 | |||
743 | int force_alloc; /* set if we need to force a chunk alloc for | 747 | int force_alloc; /* set if we need to force a chunk alloc for |
744 | this space */ | 748 | this space */ |
745 | 749 | ||
@@ -782,9 +786,6 @@ struct btrfs_free_cluster { | |||
782 | /* first extent starting offset */ | 786 | /* first extent starting offset */ |
783 | u64 window_start; | 787 | u64 window_start; |
784 | 788 | ||
785 | /* if this cluster simply points at a bitmap in the block group */ | ||
786 | bool points_to_bitmap; | ||
787 | |||
788 | struct btrfs_block_group_cache *block_group; | 789 | struct btrfs_block_group_cache *block_group; |
789 | /* | 790 | /* |
790 | * when a cluster is allocated from a block group, we put the | 791 | * when a cluster is allocated from a block group, we put the |
@@ -1283,6 +1284,9 @@ struct btrfs_root { | |||
1283 | #define BTRFS_INODE_NODUMP (1 << 8) | 1284 | #define BTRFS_INODE_NODUMP (1 << 8) |
1284 | #define BTRFS_INODE_NOATIME (1 << 9) | 1285 | #define BTRFS_INODE_NOATIME (1 << 9) |
1285 | #define BTRFS_INODE_DIRSYNC (1 << 10) | 1286 | #define BTRFS_INODE_DIRSYNC (1 << 10) |
1287 | #define BTRFS_INODE_COMPRESS (1 << 11) | ||
1288 | |||
1289 | #define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) | ||
1286 | 1290 | ||
1287 | /* some macros to generate set/get funcs for the struct fields. This | 1291 | /* some macros to generate set/get funcs for the struct fields. This |
1288 | * assumes there is a lefoo_to_cpu for every type, so lets make a simple | 1292 | * assumes there is a lefoo_to_cpu for every type, so lets make a simple |
@@ -2157,6 +2161,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
2157 | u64 root_objectid, u64 owner, u64 offset); | 2161 | u64 root_objectid, u64 owner, u64 offset); |
2158 | 2162 | ||
2159 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 2163 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); |
2164 | int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
2165 | u64 num_bytes, int reserve, int sinfo); | ||
2160 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 2166 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
2161 | struct btrfs_root *root); | 2167 | struct btrfs_root *root); |
2162 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 2168 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
@@ -2227,10 +2233,12 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); | |||
2227 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, | 2233 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, |
2228 | u64 start, u64 end); | 2234 | u64 start, u64 end); |
2229 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | 2235 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, |
2230 | u64 num_bytes); | 2236 | u64 num_bytes, u64 *actual_bytes); |
2231 | int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, | 2237 | int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, |
2232 | struct btrfs_root *root, u64 type); | 2238 | struct btrfs_root *root, u64 type); |
2239 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); | ||
2233 | 2240 | ||
2241 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | ||
2234 | /* ctree.c */ | 2242 | /* ctree.c */ |
2235 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2243 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2236 | int level, int *slot); | 2244 | int level, int *slot); |
@@ -2355,6 +2363,8 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); | |||
2355 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); | 2363 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); |
2356 | int btrfs_set_root_node(struct btrfs_root_item *item, | 2364 | int btrfs_set_root_node(struct btrfs_root_item *item, |
2357 | struct extent_buffer *node); | 2365 | struct extent_buffer *node); |
2366 | void btrfs_check_and_init_root_item(struct btrfs_root_item *item); | ||
2367 | |||
2358 | /* dir-item.c */ | 2368 | /* dir-item.c */ |
2359 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, | 2369 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, |
2360 | struct btrfs_root *root, const char *name, | 2370 | struct btrfs_root *root, const char *name, |
@@ -2392,6 +2402,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
2392 | struct btrfs_path *path, u64 dir, | 2402 | struct btrfs_path *path, u64 dir, |
2393 | const char *name, u16 name_len, | 2403 | const char *name, u16 name_len, |
2394 | int mod); | 2404 | int mod); |
2405 | int verify_dir_item(struct btrfs_root *root, | ||
2406 | struct extent_buffer *leaf, | ||
2407 | struct btrfs_dir_item *dir_item); | ||
2395 | 2408 | ||
2396 | /* orphan.c */ | 2409 | /* orphan.c */ |
2397 | int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | 2410 | int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, |
@@ -2528,7 +2541,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2528 | struct inode *inode); | 2541 | struct inode *inode); |
2529 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | 2542 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); |
2530 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2543 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
2531 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2544 | int btrfs_orphan_cleanup(struct btrfs_root *root); |
2532 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | 2545 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, |
2533 | struct btrfs_pending_snapshot *pending, | 2546 | struct btrfs_pending_snapshot *pending, |
2534 | u64 *bytes_to_reserve); | 2547 | u64 *bytes_to_reserve); |
@@ -2536,7 +2549,7 @@ void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | |||
2536 | struct btrfs_pending_snapshot *pending); | 2549 | struct btrfs_pending_snapshot *pending); |
2537 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | 2550 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, |
2538 | struct btrfs_root *root); | 2551 | struct btrfs_root *root); |
2539 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2552 | int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); |
2540 | int btrfs_invalidate_inodes(struct btrfs_root *root); | 2553 | int btrfs_invalidate_inodes(struct btrfs_root *root); |
2541 | void btrfs_add_delayed_iput(struct inode *inode); | 2554 | void btrfs_add_delayed_iput(struct inode *inode); |
2542 | void btrfs_run_delayed_iputs(struct btrfs_root *root); | 2555 | void btrfs_run_delayed_iputs(struct btrfs_root *root); |
@@ -2565,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | |||
2565 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 2578 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
2566 | struct inode *inode, u64 start, u64 end); | 2579 | struct inode *inode, u64 start, u64 end); |
2567 | int btrfs_release_file(struct inode *inode, struct file *file); | 2580 | int btrfs_release_file(struct inode *inode, struct file *file); |
2581 | void btrfs_drop_pages(struct page **pages, size_t num_pages); | ||
2582 | int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, | ||
2583 | struct page **pages, size_t num_pages, | ||
2584 | loff_t pos, size_t write_bytes, | ||
2585 | struct extent_state **cached); | ||
2568 | 2586 | ||
2569 | /* tree-defrag.c */ | 2587 | /* tree-defrag.c */ |
2570 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | 2588 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index e807b143b857..bce28f653899 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -483,6 +483,8 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans, | |||
483 | INIT_LIST_HEAD(&head_ref->cluster); | 483 | INIT_LIST_HEAD(&head_ref->cluster); |
484 | mutex_init(&head_ref->mutex); | 484 | mutex_init(&head_ref->mutex); |
485 | 485 | ||
486 | trace_btrfs_delayed_ref_head(ref, head_ref, action); | ||
487 | |||
486 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); | 488 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); |
487 | 489 | ||
488 | if (existing) { | 490 | if (existing) { |
@@ -537,6 +539,8 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
537 | } | 539 | } |
538 | full_ref->level = level; | 540 | full_ref->level = level; |
539 | 541 | ||
542 | trace_btrfs_delayed_tree_ref(ref, full_ref, action); | ||
543 | |||
540 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); | 544 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); |
541 | 545 | ||
542 | if (existing) { | 546 | if (existing) { |
@@ -591,6 +595,8 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
591 | full_ref->objectid = owner; | 595 | full_ref->objectid = owner; |
592 | full_ref->offset = offset; | 596 | full_ref->offset = offset; |
593 | 597 | ||
598 | trace_btrfs_delayed_data_ref(ref, full_ref, action); | ||
599 | |||
594 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); | 600 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); |
595 | 601 | ||
596 | if (existing) { | 602 | if (existing) { |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index f0cad5ae5be7..c62f02f6ae69 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -151,7 +151,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
151 | ret = PTR_ERR(dir_item); | 151 | ret = PTR_ERR(dir_item); |
152 | if (ret == -EEXIST) | 152 | if (ret == -EEXIST) |
153 | goto second_insert; | 153 | goto second_insert; |
154 | goto out; | 154 | goto out_free; |
155 | } | 155 | } |
156 | 156 | ||
157 | leaf = path->nodes[0]; | 157 | leaf = path->nodes[0]; |
@@ -170,7 +170,7 @@ second_insert: | |||
170 | /* FIXME, use some real flag for selecting the extra index */ | 170 | /* FIXME, use some real flag for selecting the extra index */ |
171 | if (root == root->fs_info->tree_root) { | 171 | if (root == root->fs_info->tree_root) { |
172 | ret = 0; | 172 | ret = 0; |
173 | goto out; | 173 | goto out_free; |
174 | } | 174 | } |
175 | btrfs_release_path(root, path); | 175 | btrfs_release_path(root, path); |
176 | 176 | ||
@@ -180,7 +180,7 @@ second_insert: | |||
180 | name, name_len); | 180 | name, name_len); |
181 | if (IS_ERR(dir_item)) { | 181 | if (IS_ERR(dir_item)) { |
182 | ret2 = PTR_ERR(dir_item); | 182 | ret2 = PTR_ERR(dir_item); |
183 | goto out; | 183 | goto out_free; |
184 | } | 184 | } |
185 | leaf = path->nodes[0]; | 185 | leaf = path->nodes[0]; |
186 | btrfs_cpu_key_to_disk(&disk_key, location); | 186 | btrfs_cpu_key_to_disk(&disk_key, location); |
@@ -192,7 +192,9 @@ second_insert: | |||
192 | name_ptr = (unsigned long)(dir_item + 1); | 192 | name_ptr = (unsigned long)(dir_item + 1); |
193 | write_extent_buffer(leaf, name, name_ptr, name_len); | 193 | write_extent_buffer(leaf, name, name_ptr, name_len); |
194 | btrfs_mark_buffer_dirty(leaf); | 194 | btrfs_mark_buffer_dirty(leaf); |
195 | out: | 195 | |
196 | out_free: | ||
197 | |||
196 | btrfs_free_path(path); | 198 | btrfs_free_path(path); |
197 | if (ret) | 199 | if (ret) |
198 | return ret; | 200 | return ret; |
@@ -377,6 +379,9 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | |||
377 | 379 | ||
378 | leaf = path->nodes[0]; | 380 | leaf = path->nodes[0]; |
379 | dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); | 381 | dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); |
382 | if (verify_dir_item(root, leaf, dir_item)) | ||
383 | return NULL; | ||
384 | |||
380 | total_len = btrfs_item_size_nr(leaf, path->slots[0]); | 385 | total_len = btrfs_item_size_nr(leaf, path->slots[0]); |
381 | while (cur < total_len) { | 386 | while (cur < total_len) { |
382 | this_len = sizeof(*dir_item) + | 387 | this_len = sizeof(*dir_item) + |
@@ -429,3 +434,35 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, | |||
429 | } | 434 | } |
430 | return ret; | 435 | return ret; |
431 | } | 436 | } |
437 | |||
438 | int verify_dir_item(struct btrfs_root *root, | ||
439 | struct extent_buffer *leaf, | ||
440 | struct btrfs_dir_item *dir_item) | ||
441 | { | ||
442 | u16 namelen = BTRFS_NAME_LEN; | ||
443 | u8 type = btrfs_dir_type(leaf, dir_item); | ||
444 | |||
445 | if (type >= BTRFS_FT_MAX) { | ||
446 | printk(KERN_CRIT "btrfs: invalid dir item type: %d\n", | ||
447 | (int)type); | ||
448 | return 1; | ||
449 | } | ||
450 | |||
451 | if (type == BTRFS_FT_XATTR) | ||
452 | namelen = XATTR_NAME_MAX; | ||
453 | |||
454 | if (btrfs_dir_name_len(leaf, dir_item) > namelen) { | ||
455 | printk(KERN_CRIT "btrfS: invalid dir item name len: %u\n", | ||
456 | (unsigned)btrfs_dir_data_len(leaf, dir_item)); | ||
457 | return 1; | ||
458 | } | ||
459 | |||
460 | /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ | ||
461 | if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) { | ||
462 | printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n", | ||
463 | (unsigned)btrfs_dir_data_len(leaf, dir_item)); | ||
464 | return 1; | ||
465 | } | ||
466 | |||
467 | return 0; | ||
468 | } | ||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 100b07f021b4..68c84c8c24bd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/crc32c.h> | 29 | #include <linux/crc32c.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/migrate.h> | 31 | #include <linux/migrate.h> |
32 | #include <asm/unaligned.h> | ||
32 | #include "compat.h" | 33 | #include "compat.h" |
33 | #include "ctree.h" | 34 | #include "ctree.h" |
34 | #include "disk-io.h" | 35 | #include "disk-io.h" |
@@ -198,7 +199,7 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) | |||
198 | 199 | ||
199 | void btrfs_csum_final(u32 crc, char *result) | 200 | void btrfs_csum_final(u32 crc, char *result) |
200 | { | 201 | { |
201 | *(__le32 *)result = ~cpu_to_le32(crc); | 202 | put_unaligned_le32(~crc, result); |
202 | } | 203 | } |
203 | 204 | ||
204 | /* | 205 | /* |
@@ -323,6 +324,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
323 | int num_copies = 0; | 324 | int num_copies = 0; |
324 | int mirror_num = 0; | 325 | int mirror_num = 0; |
325 | 326 | ||
327 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | ||
326 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 328 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
327 | while (1) { | 329 | while (1) { |
328 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, | 330 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, |
@@ -331,6 +333,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
331 | !verify_parent_transid(io_tree, eb, parent_transid)) | 333 | !verify_parent_transid(io_tree, eb, parent_transid)) |
332 | return ret; | 334 | return ret; |
333 | 335 | ||
336 | /* | ||
337 | * This buffer's crc is fine, but its contents are corrupted, so | ||
338 | * there is no reason to read the other copies, they won't be | ||
339 | * any less wrong. | ||
340 | */ | ||
341 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) | ||
342 | return ret; | ||
343 | |||
334 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | 344 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, |
335 | eb->start, eb->len); | 345 | eb->start, eb->len); |
336 | if (num_copies == 1) | 346 | if (num_copies == 1) |
@@ -419,6 +429,73 @@ static int check_tree_block_fsid(struct btrfs_root *root, | |||
419 | return ret; | 429 | return ret; |
420 | } | 430 | } |
421 | 431 | ||
432 | #define CORRUPT(reason, eb, root, slot) \ | ||
433 | printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ | ||
434 | "root=%llu, slot=%d\n", reason, \ | ||
435 | (unsigned long long)btrfs_header_bytenr(eb), \ | ||
436 | (unsigned long long)root->objectid, slot) | ||
437 | |||
438 | static noinline int check_leaf(struct btrfs_root *root, | ||
439 | struct extent_buffer *leaf) | ||
440 | { | ||
441 | struct btrfs_key key; | ||
442 | struct btrfs_key leaf_key; | ||
443 | u32 nritems = btrfs_header_nritems(leaf); | ||
444 | int slot; | ||
445 | |||
446 | if (nritems == 0) | ||
447 | return 0; | ||
448 | |||
449 | /* Check the 0 item */ | ||
450 | if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != | ||
451 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
452 | CORRUPT("invalid item offset size pair", leaf, root, 0); | ||
453 | return -EIO; | ||
454 | } | ||
455 | |||
456 | /* | ||
457 | * Check to make sure each items keys are in the correct order and their | ||
458 | * offsets make sense. We only have to loop through nritems-1 because | ||
459 | * we check the current slot against the next slot, which verifies the | ||
460 | * next slot's offset+size makes sense and that the current's slot | ||
461 | * offset is correct. | ||
462 | */ | ||
463 | for (slot = 0; slot < nritems - 1; slot++) { | ||
464 | btrfs_item_key_to_cpu(leaf, &leaf_key, slot); | ||
465 | btrfs_item_key_to_cpu(leaf, &key, slot + 1); | ||
466 | |||
467 | /* Make sure the keys are in the right order */ | ||
468 | if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { | ||
469 | CORRUPT("bad key order", leaf, root, slot); | ||
470 | return -EIO; | ||
471 | } | ||
472 | |||
473 | /* | ||
474 | * Make sure the offset and ends are right, remember that the | ||
475 | * item data starts at the end of the leaf and grows towards the | ||
476 | * front. | ||
477 | */ | ||
478 | if (btrfs_item_offset_nr(leaf, slot) != | ||
479 | btrfs_item_end_nr(leaf, slot + 1)) { | ||
480 | CORRUPT("slot offset bad", leaf, root, slot); | ||
481 | return -EIO; | ||
482 | } | ||
483 | |||
484 | /* | ||
485 | * Check to make sure that we don't point outside of the leaf, | ||
486 | * just incase all the items are consistent to eachother, but | ||
487 | * all point outside of the leaf. | ||
488 | */ | ||
489 | if (btrfs_item_end_nr(leaf, slot) > | ||
490 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
491 | CORRUPT("slot end outside of leaf", leaf, root, slot); | ||
492 | return -EIO; | ||
493 | } | ||
494 | } | ||
495 | |||
496 | return 0; | ||
497 | } | ||
498 | |||
422 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 499 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
423 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) | 500 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) |
424 | { | 501 | { |
@@ -485,8 +562,20 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
485 | btrfs_set_buffer_lockdep_class(eb, found_level); | 562 | btrfs_set_buffer_lockdep_class(eb, found_level); |
486 | 563 | ||
487 | ret = csum_tree_block(root, eb, 1); | 564 | ret = csum_tree_block(root, eb, 1); |
488 | if (ret) | 565 | if (ret) { |
566 | ret = -EIO; | ||
567 | goto err; | ||
568 | } | ||
569 | |||
570 | /* | ||
571 | * If this is a leaf block and it is corrupt, set the corrupt bit so | ||
572 | * that we don't try and read the other copies of this block, just | ||
573 | * return -EIO. | ||
574 | */ | ||
575 | if (found_level == 0 && check_leaf(root, eb)) { | ||
576 | set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | ||
489 | ret = -EIO; | 577 | ret = -EIO; |
578 | } | ||
490 | 579 | ||
491 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); | 580 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); |
492 | end = eb->start + end - 1; | 581 | end = eb->start + end - 1; |
@@ -847,7 +936,6 @@ static const struct address_space_operations btree_aops = { | |||
847 | .writepages = btree_writepages, | 936 | .writepages = btree_writepages, |
848 | .releasepage = btree_releasepage, | 937 | .releasepage = btree_releasepage, |
849 | .invalidatepage = btree_invalidatepage, | 938 | .invalidatepage = btree_invalidatepage, |
850 | .sync_page = block_sync_page, | ||
851 | #ifdef CONFIG_MIGRATION | 939 | #ifdef CONFIG_MIGRATION |
852 | .migratepage = btree_migratepage, | 940 | .migratepage = btree_migratepage, |
853 | #endif | 941 | #endif |
@@ -1160,7 +1248,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1160 | root, fs_info, location->objectid); | 1248 | root, fs_info, location->objectid); |
1161 | 1249 | ||
1162 | path = btrfs_alloc_path(); | 1250 | path = btrfs_alloc_path(); |
1163 | BUG_ON(!path); | 1251 | if (!path) { |
1252 | kfree(root); | ||
1253 | return ERR_PTR(-ENOMEM); | ||
1254 | } | ||
1164 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | 1255 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); |
1165 | if (ret == 0) { | 1256 | if (ret == 0) { |
1166 | l = path->nodes[0]; | 1257 | l = path->nodes[0]; |
@@ -1184,8 +1275,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1184 | root->commit_root = btrfs_root_node(root); | 1275 | root->commit_root = btrfs_root_node(root); |
1185 | BUG_ON(!root->node); | 1276 | BUG_ON(!root->node); |
1186 | out: | 1277 | out: |
1187 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) | 1278 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { |
1188 | root->ref_cows = 1; | 1279 | root->ref_cows = 1; |
1280 | btrfs_check_and_init_root_item(&root->root_item); | ||
1281 | } | ||
1189 | 1282 | ||
1190 | return root; | 1283 | return root; |
1191 | } | 1284 | } |
@@ -1331,82 +1424,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1331 | } | 1424 | } |
1332 | 1425 | ||
1333 | /* | 1426 | /* |
1334 | * this unplugs every device on the box, and it is only used when page | ||
1335 | * is null | ||
1336 | */ | ||
1337 | static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
1338 | { | ||
1339 | struct btrfs_device *device; | ||
1340 | struct btrfs_fs_info *info; | ||
1341 | |||
1342 | info = (struct btrfs_fs_info *)bdi->unplug_io_data; | ||
1343 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { | ||
1344 | if (!device->bdev) | ||
1345 | continue; | ||
1346 | |||
1347 | bdi = blk_get_backing_dev_info(device->bdev); | ||
1348 | if (bdi->unplug_io_fn) | ||
1349 | bdi->unplug_io_fn(bdi, page); | ||
1350 | } | ||
1351 | } | ||
1352 | |||
1353 | static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
1354 | { | ||
1355 | struct inode *inode; | ||
1356 | struct extent_map_tree *em_tree; | ||
1357 | struct extent_map *em; | ||
1358 | struct address_space *mapping; | ||
1359 | u64 offset; | ||
1360 | |||
1361 | /* the generic O_DIRECT read code does this */ | ||
1362 | if (1 || !page) { | ||
1363 | __unplug_io_fn(bdi, page); | ||
1364 | return; | ||
1365 | } | ||
1366 | |||
1367 | /* | ||
1368 | * page->mapping may change at any time. Get a consistent copy | ||
1369 | * and use that for everything below | ||
1370 | */ | ||
1371 | smp_mb(); | ||
1372 | mapping = page->mapping; | ||
1373 | if (!mapping) | ||
1374 | return; | ||
1375 | |||
1376 | inode = mapping->host; | ||
1377 | |||
1378 | /* | ||
1379 | * don't do the expensive searching for a small number of | ||
1380 | * devices | ||
1381 | */ | ||
1382 | if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) { | ||
1383 | __unplug_io_fn(bdi, page); | ||
1384 | return; | ||
1385 | } | ||
1386 | |||
1387 | offset = page_offset(page); | ||
1388 | |||
1389 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
1390 | read_lock(&em_tree->lock); | ||
1391 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | ||
1392 | read_unlock(&em_tree->lock); | ||
1393 | if (!em) { | ||
1394 | __unplug_io_fn(bdi, page); | ||
1395 | return; | ||
1396 | } | ||
1397 | |||
1398 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
1399 | free_extent_map(em); | ||
1400 | __unplug_io_fn(bdi, page); | ||
1401 | return; | ||
1402 | } | ||
1403 | offset = offset - em->start; | ||
1404 | btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree, | ||
1405 | em->block_start + offset, page); | ||
1406 | free_extent_map(em); | ||
1407 | } | ||
1408 | |||
1409 | /* | ||
1410 | * If this fails, caller must call bdi_destroy() to get rid of the | 1427 | * If this fails, caller must call bdi_destroy() to get rid of the |
1411 | * bdi again. | 1428 | * bdi again. |
1412 | */ | 1429 | */ |
@@ -1420,8 +1437,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1420 | return err; | 1437 | return err; |
1421 | 1438 | ||
1422 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1439 | bdi->ra_pages = default_backing_dev_info.ra_pages; |
1423 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | ||
1424 | bdi->unplug_io_data = info; | ||
1425 | bdi->congested_fn = btrfs_congested_fn; | 1440 | bdi->congested_fn = btrfs_congested_fn; |
1426 | bdi->congested_data = info; | 1441 | bdi->congested_data = info; |
1427 | return 0; | 1442 | return 0; |
@@ -1632,6 +1647,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1632 | goto fail_bdi; | 1647 | goto fail_bdi; |
1633 | } | 1648 | } |
1634 | 1649 | ||
1650 | fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; | ||
1651 | |||
1635 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | 1652 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); |
1636 | INIT_LIST_HEAD(&fs_info->trans_list); | 1653 | INIT_LIST_HEAD(&fs_info->trans_list); |
1637 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1654 | INIT_LIST_HEAD(&fs_info->dead_roots); |
@@ -1762,6 +1779,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1762 | 1779 | ||
1763 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | 1780 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); |
1764 | 1781 | ||
1782 | /* | ||
1783 | * In the long term, we'll store the compression type in the super | ||
1784 | * block, and it'll be used for per file compression control. | ||
1785 | */ | ||
1786 | fs_info->compress_type = BTRFS_COMPRESS_ZLIB; | ||
1787 | |||
1765 | ret = btrfs_parse_options(tree_root, options); | 1788 | ret = btrfs_parse_options(tree_root, options); |
1766 | if (ret) { | 1789 | if (ret) { |
1767 | err = ret; | 1790 | err = ret; |
@@ -1967,6 +1990,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1967 | fs_info->metadata_alloc_profile = (u64)-1; | 1990 | fs_info->metadata_alloc_profile = (u64)-1; |
1968 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | 1991 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; |
1969 | 1992 | ||
1993 | ret = btrfs_init_space_info(fs_info); | ||
1994 | if (ret) { | ||
1995 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); | ||
1996 | goto fail_block_groups; | ||
1997 | } | ||
1998 | |||
1970 | ret = btrfs_read_block_groups(extent_root); | 1999 | ret = btrfs_read_block_groups(extent_root); |
1971 | if (ret) { | 2000 | if (ret) { |
1972 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | 2001 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); |
@@ -2058,9 +2087,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
2058 | 2087 | ||
2059 | if (!(sb->s_flags & MS_RDONLY)) { | 2088 | if (!(sb->s_flags & MS_RDONLY)) { |
2060 | down_read(&fs_info->cleanup_work_sem); | 2089 | down_read(&fs_info->cleanup_work_sem); |
2061 | btrfs_orphan_cleanup(fs_info->fs_root); | 2090 | err = btrfs_orphan_cleanup(fs_info->fs_root); |
2062 | btrfs_orphan_cleanup(fs_info->tree_root); | 2091 | if (!err) |
2092 | err = btrfs_orphan_cleanup(fs_info->tree_root); | ||
2063 | up_read(&fs_info->cleanup_work_sem); | 2093 | up_read(&fs_info->cleanup_work_sem); |
2094 | if (err) { | ||
2095 | close_ctree(tree_root); | ||
2096 | return ERR_PTR(err); | ||
2097 | } | ||
2064 | } | 2098 | } |
2065 | 2099 | ||
2066 | return tree_root; | 2100 | return tree_root; |
@@ -2435,8 +2469,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
2435 | 2469 | ||
2436 | root_objectid = gang[ret - 1]->root_key.objectid + 1; | 2470 | root_objectid = gang[ret - 1]->root_key.objectid + 1; |
2437 | for (i = 0; i < ret; i++) { | 2471 | for (i = 0; i < ret; i++) { |
2472 | int err; | ||
2473 | |||
2438 | root_objectid = gang[i]->root_key.objectid; | 2474 | root_objectid = gang[i]->root_key.objectid; |
2439 | btrfs_orphan_cleanup(gang[i]); | 2475 | err = btrfs_orphan_cleanup(gang[i]); |
2476 | if (err) | ||
2477 | return err; | ||
2440 | } | 2478 | } |
2441 | root_objectid++; | 2479 | root_objectid++; |
2442 | } | 2480 | } |
@@ -2947,7 +2985,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | |||
2947 | break; | 2985 | break; |
2948 | 2986 | ||
2949 | /* opt_discard */ | 2987 | /* opt_discard */ |
2950 | ret = btrfs_error_discard_extent(root, start, end + 1 - start); | 2988 | if (btrfs_test_opt(root, DISCARD)) |
2989 | ret = btrfs_error_discard_extent(root, start, | ||
2990 | end + 1 - start, | ||
2991 | NULL); | ||
2951 | 2992 | ||
2952 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 2993 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
2953 | btrfs_error_unpin_extent_range(root, start, end); | 2994 | btrfs_error_unpin_extent_range(root, start, end); |
@@ -3016,7 +3057,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3016 | btrfs_destroy_pinned_extent(root, | 3057 | btrfs_destroy_pinned_extent(root, |
3017 | root->fs_info->pinned_extents); | 3058 | root->fs_info->pinned_extents); |
3018 | 3059 | ||
3019 | t->use_count = 0; | 3060 | atomic_set(&t->use_count, 0); |
3020 | list_del_init(&t->list); | 3061 | list_del_init(&t->list); |
3021 | memset(t, 0, sizeof(*t)); | 3062 | memset(t, 0, sizeof(*t)); |
3022 | kmem_cache_free(btrfs_transaction_cachep, t); | 3063 | kmem_cache_free(btrfs_transaction_cachep, t); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7b3089b5c2df..31f33ba56fe8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -33,11 +33,28 @@ | |||
33 | #include "locking.h" | 33 | #include "locking.h" |
34 | #include "free-space-cache.h" | 34 | #include "free-space-cache.h" |
35 | 35 | ||
36 | /* control flags for do_chunk_alloc's force field | ||
37 | * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk | ||
38 | * if we really need one. | ||
39 | * | ||
40 | * CHUNK_ALLOC_FORCE means it must try to allocate one | ||
41 | * | ||
42 | * CHUNK_ALLOC_LIMITED means to only try and allocate one | ||
43 | * if we have very few chunks already allocated. This is | ||
44 | * used as part of the clustering code to help make sure | ||
45 | * we have a good pool of storage to cluster in, without | ||
46 | * filling the FS with empty chunks | ||
47 | * | ||
48 | */ | ||
49 | enum { | ||
50 | CHUNK_ALLOC_NO_FORCE = 0, | ||
51 | CHUNK_ALLOC_FORCE = 1, | ||
52 | CHUNK_ALLOC_LIMITED = 2, | ||
53 | }; | ||
54 | |||
36 | static int update_block_group(struct btrfs_trans_handle *trans, | 55 | static int update_block_group(struct btrfs_trans_handle *trans, |
37 | struct btrfs_root *root, | 56 | struct btrfs_root *root, |
38 | u64 bytenr, u64 num_bytes, int alloc); | 57 | u64 bytenr, u64 num_bytes, int alloc); |
39 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
40 | u64 num_bytes, int reserve, int sinfo); | ||
41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 58 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
42 | struct btrfs_root *root, | 59 | struct btrfs_root *root, |
43 | u64 bytenr, u64 num_bytes, u64 parent, | 60 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -442,7 +459,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
442 | * allocate blocks for the tree root we can't do the fast caching since | 459 | * allocate blocks for the tree root we can't do the fast caching since |
443 | * we likely hold important locks. | 460 | * we likely hold important locks. |
444 | */ | 461 | */ |
445 | if (!trans->transaction->in_commit && | 462 | if (trans && (!trans->transaction->in_commit) && |
446 | (root && root != root->fs_info->tree_root)) { | 463 | (root && root != root->fs_info->tree_root)) { |
447 | spin_lock(&cache->lock); | 464 | spin_lock(&cache->lock); |
448 | if (cache->cached != BTRFS_CACHE_NO) { | 465 | if (cache->cached != BTRFS_CACHE_NO) { |
@@ -471,7 +488,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
471 | if (load_cache_only) | 488 | if (load_cache_only) |
472 | return 0; | 489 | return 0; |
473 | 490 | ||
474 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); | 491 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); |
475 | BUG_ON(!caching_ctl); | 492 | BUG_ON(!caching_ctl); |
476 | 493 | ||
477 | INIT_LIST_HEAD(&caching_ctl->list); | 494 | INIT_LIST_HEAD(&caching_ctl->list); |
@@ -1740,39 +1757,45 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1740 | return ret; | 1757 | return ret; |
1741 | } | 1758 | } |
1742 | 1759 | ||
1743 | static void btrfs_issue_discard(struct block_device *bdev, | 1760 | static int btrfs_issue_discard(struct block_device *bdev, |
1744 | u64 start, u64 len) | 1761 | u64 start, u64 len) |
1745 | { | 1762 | { |
1746 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0); | 1763 | return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); |
1747 | } | 1764 | } |
1748 | 1765 | ||
1749 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1766 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
1750 | u64 num_bytes) | 1767 | u64 num_bytes, u64 *actual_bytes) |
1751 | { | 1768 | { |
1752 | int ret; | 1769 | int ret; |
1753 | u64 map_length = num_bytes; | 1770 | u64 discarded_bytes = 0; |
1754 | struct btrfs_multi_bio *multi = NULL; | 1771 | struct btrfs_multi_bio *multi = NULL; |
1755 | 1772 | ||
1756 | if (!btrfs_test_opt(root, DISCARD)) | ||
1757 | return 0; | ||
1758 | 1773 | ||
1759 | /* Tell the block device(s) that the sectors can be discarded */ | 1774 | /* Tell the block device(s) that the sectors can be discarded */ |
1760 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, | 1775 | ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, |
1761 | bytenr, &map_length, &multi, 0); | 1776 | bytenr, &num_bytes, &multi, 0); |
1762 | if (!ret) { | 1777 | if (!ret) { |
1763 | struct btrfs_bio_stripe *stripe = multi->stripes; | 1778 | struct btrfs_bio_stripe *stripe = multi->stripes; |
1764 | int i; | 1779 | int i; |
1765 | 1780 | ||
1766 | if (map_length > num_bytes) | ||
1767 | map_length = num_bytes; | ||
1768 | 1781 | ||
1769 | for (i = 0; i < multi->num_stripes; i++, stripe++) { | 1782 | for (i = 0; i < multi->num_stripes; i++, stripe++) { |
1770 | btrfs_issue_discard(stripe->dev->bdev, | 1783 | ret = btrfs_issue_discard(stripe->dev->bdev, |
1771 | stripe->physical, | 1784 | stripe->physical, |
1772 | map_length); | 1785 | stripe->length); |
1786 | if (!ret) | ||
1787 | discarded_bytes += stripe->length; | ||
1788 | else if (ret != -EOPNOTSUPP) | ||
1789 | break; | ||
1773 | } | 1790 | } |
1774 | kfree(multi); | 1791 | kfree(multi); |
1775 | } | 1792 | } |
1793 | if (discarded_bytes && ret == -EOPNOTSUPP) | ||
1794 | ret = 0; | ||
1795 | |||
1796 | if (actual_bytes) | ||
1797 | *actual_bytes = discarded_bytes; | ||
1798 | |||
1776 | 1799 | ||
1777 | return ret; | 1800 | return ret; |
1778 | } | 1801 | } |
@@ -3015,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3015 | found->bytes_readonly = 0; | 3038 | found->bytes_readonly = 0; |
3016 | found->bytes_may_use = 0; | 3039 | found->bytes_may_use = 0; |
3017 | found->full = 0; | 3040 | found->full = 0; |
3018 | found->force_alloc = 0; | 3041 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; |
3042 | found->chunk_alloc = 0; | ||
3019 | *space_info = found; | 3043 | *space_info = found; |
3020 | list_add_rcu(&found->list, &info->space_info); | 3044 | list_add_rcu(&found->list, &info->space_info); |
3021 | atomic_set(&found->caching_threads, 0); | 3045 | atomic_set(&found->caching_threads, 0); |
@@ -3146,7 +3170,7 @@ again: | |||
3146 | if (!data_sinfo->full && alloc_chunk) { | 3170 | if (!data_sinfo->full && alloc_chunk) { |
3147 | u64 alloc_target; | 3171 | u64 alloc_target; |
3148 | 3172 | ||
3149 | data_sinfo->force_alloc = 1; | 3173 | data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; |
3150 | spin_unlock(&data_sinfo->lock); | 3174 | spin_unlock(&data_sinfo->lock); |
3151 | alloc: | 3175 | alloc: |
3152 | alloc_target = btrfs_get_alloc_profile(root, 1); | 3176 | alloc_target = btrfs_get_alloc_profile(root, 1); |
@@ -3156,7 +3180,8 @@ alloc: | |||
3156 | 3180 | ||
3157 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 3181 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
3158 | bytes + 2 * 1024 * 1024, | 3182 | bytes + 2 * 1024 * 1024, |
3159 | alloc_target, 0); | 3183 | alloc_target, |
3184 | CHUNK_ALLOC_NO_FORCE); | ||
3160 | btrfs_end_transaction(trans, root); | 3185 | btrfs_end_transaction(trans, root); |
3161 | if (ret < 0) { | 3186 | if (ret < 0) { |
3162 | if (ret != -ENOSPC) | 3187 | if (ret != -ENOSPC) |
@@ -3235,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) | |||
3235 | rcu_read_lock(); | 3260 | rcu_read_lock(); |
3236 | list_for_each_entry_rcu(found, head, list) { | 3261 | list_for_each_entry_rcu(found, head, list) { |
3237 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) | 3262 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) |
3238 | found->force_alloc = 1; | 3263 | found->force_alloc = CHUNK_ALLOC_FORCE; |
3239 | } | 3264 | } |
3240 | rcu_read_unlock(); | 3265 | rcu_read_unlock(); |
3241 | } | 3266 | } |
3242 | 3267 | ||
3243 | static int should_alloc_chunk(struct btrfs_root *root, | 3268 | static int should_alloc_chunk(struct btrfs_root *root, |
3244 | struct btrfs_space_info *sinfo, u64 alloc_bytes) | 3269 | struct btrfs_space_info *sinfo, u64 alloc_bytes, |
3270 | int force) | ||
3245 | { | 3271 | { |
3246 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | 3272 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; |
3273 | u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; | ||
3247 | u64 thresh; | 3274 | u64 thresh; |
3248 | 3275 | ||
3249 | if (sinfo->bytes_used + sinfo->bytes_reserved + | 3276 | if (force == CHUNK_ALLOC_FORCE) |
3250 | alloc_bytes + 256 * 1024 * 1024 < num_bytes) | 3277 | return 1; |
3278 | |||
3279 | /* | ||
3280 | * in limited mode, we want to have some free space up to | ||
3281 | * about 1% of the FS size. | ||
3282 | */ | ||
3283 | if (force == CHUNK_ALLOC_LIMITED) { | ||
3284 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | ||
3285 | thresh = max_t(u64, 64 * 1024 * 1024, | ||
3286 | div_factor_fine(thresh, 1)); | ||
3287 | |||
3288 | if (num_bytes - num_allocated < thresh) | ||
3289 | return 1; | ||
3290 | } | ||
3291 | |||
3292 | /* | ||
3293 | * we have two similar checks here, one based on percentage | ||
3294 | * and once based on a hard number of 256MB. The idea | ||
3295 | * is that if we have a good amount of free | ||
3296 | * room, don't allocate a chunk. A good mount is | ||
3297 | * less than 80% utilized of the chunks we have allocated, | ||
3298 | * or more than 256MB free | ||
3299 | */ | ||
3300 | if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes) | ||
3251 | return 0; | 3301 | return 0; |
3252 | 3302 | ||
3253 | if (sinfo->bytes_used + sinfo->bytes_reserved + | 3303 | if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) |
3254 | alloc_bytes < div_factor(num_bytes, 8)) | ||
3255 | return 0; | 3304 | return 0; |
3256 | 3305 | ||
3257 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | 3306 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); |
3307 | |||
3308 | /* 256MB or 5% of the FS */ | ||
3258 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); | 3309 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); |
3259 | 3310 | ||
3260 | if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) | 3311 | if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) |
3261 | return 0; | 3312 | return 0; |
3262 | |||
3263 | return 1; | 3313 | return 1; |
3264 | } | 3314 | } |
3265 | 3315 | ||
@@ -3269,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3269 | { | 3319 | { |
3270 | struct btrfs_space_info *space_info; | 3320 | struct btrfs_space_info *space_info; |
3271 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3321 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
3322 | int wait_for_alloc = 0; | ||
3272 | int ret = 0; | 3323 | int ret = 0; |
3273 | 3324 | ||
3274 | mutex_lock(&fs_info->chunk_mutex); | ||
3275 | |||
3276 | flags = btrfs_reduce_alloc_profile(extent_root, flags); | 3325 | flags = btrfs_reduce_alloc_profile(extent_root, flags); |
3277 | 3326 | ||
3278 | space_info = __find_space_info(extent_root->fs_info, flags); | 3327 | space_info = __find_space_info(extent_root->fs_info, flags); |
@@ -3283,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3283 | } | 3332 | } |
3284 | BUG_ON(!space_info); | 3333 | BUG_ON(!space_info); |
3285 | 3334 | ||
3335 | again: | ||
3286 | spin_lock(&space_info->lock); | 3336 | spin_lock(&space_info->lock); |
3287 | if (space_info->force_alloc) | 3337 | if (space_info->force_alloc) |
3288 | force = 1; | 3338 | force = space_info->force_alloc; |
3289 | if (space_info->full) { | 3339 | if (space_info->full) { |
3290 | spin_unlock(&space_info->lock); | 3340 | spin_unlock(&space_info->lock); |
3291 | goto out; | 3341 | return 0; |
3292 | } | 3342 | } |
3293 | 3343 | ||
3294 | if (!force && !should_alloc_chunk(extent_root, space_info, | 3344 | if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { |
3295 | alloc_bytes)) { | ||
3296 | spin_unlock(&space_info->lock); | 3345 | spin_unlock(&space_info->lock); |
3297 | goto out; | 3346 | return 0; |
3347 | } else if (space_info->chunk_alloc) { | ||
3348 | wait_for_alloc = 1; | ||
3349 | } else { | ||
3350 | space_info->chunk_alloc = 1; | ||
3298 | } | 3351 | } |
3352 | |||
3299 | spin_unlock(&space_info->lock); | 3353 | spin_unlock(&space_info->lock); |
3300 | 3354 | ||
3355 | mutex_lock(&fs_info->chunk_mutex); | ||
3356 | |||
3357 | /* | ||
3358 | * The chunk_mutex is held throughout the entirety of a chunk | ||
3359 | * allocation, so once we've acquired the chunk_mutex we know that the | ||
3360 | * other guy is done and we need to recheck and see if we should | ||
3361 | * allocate. | ||
3362 | */ | ||
3363 | if (wait_for_alloc) { | ||
3364 | mutex_unlock(&fs_info->chunk_mutex); | ||
3365 | wait_for_alloc = 0; | ||
3366 | goto again; | ||
3367 | } | ||
3368 | |||
3301 | /* | 3369 | /* |
3302 | * If we have mixed data/metadata chunks we want to make sure we keep | 3370 | * If we have mixed data/metadata chunks we want to make sure we keep |
3303 | * allocating mixed chunks instead of individual chunks. | 3371 | * allocating mixed chunks instead of individual chunks. |
@@ -3323,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3323 | space_info->full = 1; | 3391 | space_info->full = 1; |
3324 | else | 3392 | else |
3325 | ret = 1; | 3393 | ret = 1; |
3326 | space_info->force_alloc = 0; | 3394 | |
3395 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; | ||
3396 | space_info->chunk_alloc = 0; | ||
3327 | spin_unlock(&space_info->lock); | 3397 | spin_unlock(&space_info->lock); |
3328 | out: | ||
3329 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3398 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
3330 | return ret; | 3399 | return ret; |
3331 | } | 3400 | } |
@@ -3996,6 +4065,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3996 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 4065 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
3997 | u64 to_reserve; | 4066 | u64 to_reserve; |
3998 | int nr_extents; | 4067 | int nr_extents; |
4068 | int reserved_extents; | ||
3999 | int ret; | 4069 | int ret; |
4000 | 4070 | ||
4001 | if (btrfs_transaction_in_commit(root->fs_info)) | 4071 | if (btrfs_transaction_in_commit(root->fs_info)) |
@@ -4003,25 +4073,24 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4003 | 4073 | ||
4004 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4074 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4005 | 4075 | ||
4006 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
4007 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | 4076 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; |
4008 | if (nr_extents > BTRFS_I(inode)->reserved_extents) { | 4077 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); |
4009 | nr_extents -= BTRFS_I(inode)->reserved_extents; | 4078 | |
4079 | if (nr_extents > reserved_extents) { | ||
4080 | nr_extents -= reserved_extents; | ||
4010 | to_reserve = calc_trans_metadata_size(root, nr_extents); | 4081 | to_reserve = calc_trans_metadata_size(root, nr_extents); |
4011 | } else { | 4082 | } else { |
4012 | nr_extents = 0; | 4083 | nr_extents = 0; |
4013 | to_reserve = 0; | 4084 | to_reserve = 0; |
4014 | } | 4085 | } |
4015 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 4086 | |
4016 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 4087 | to_reserve += calc_csum_metadata_size(inode, num_bytes); |
4017 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); | 4088 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); |
4018 | if (ret) | 4089 | if (ret) |
4019 | return ret; | 4090 | return ret; |
4020 | 4091 | ||
4021 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 4092 | atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); |
4022 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
4023 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 4093 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
4024 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
4025 | 4094 | ||
4026 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 4095 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
4027 | 4096 | ||
@@ -4036,20 +4105,30 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
4036 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4105 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4037 | u64 to_free; | 4106 | u64 to_free; |
4038 | int nr_extents; | 4107 | int nr_extents; |
4108 | int reserved_extents; | ||
4039 | 4109 | ||
4040 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4110 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4041 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 4111 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
4042 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); | 4112 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); |
4043 | 4113 | ||
4044 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 4114 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); |
4045 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | 4115 | do { |
4046 | if (nr_extents < BTRFS_I(inode)->reserved_extents) { | 4116 | int old, new; |
4047 | nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; | 4117 | |
4048 | BTRFS_I(inode)->reserved_extents -= nr_extents; | 4118 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); |
4049 | } else { | 4119 | if (nr_extents >= reserved_extents) { |
4050 | nr_extents = 0; | 4120 | nr_extents = 0; |
4051 | } | 4121 | break; |
4052 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 4122 | } |
4123 | old = reserved_extents; | ||
4124 | nr_extents = reserved_extents - nr_extents; | ||
4125 | new = reserved_extents - nr_extents; | ||
4126 | old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, | ||
4127 | reserved_extents, new); | ||
4128 | if (likely(old == reserved_extents)) | ||
4129 | break; | ||
4130 | reserved_extents = old; | ||
4131 | } while (1); | ||
4053 | 4132 | ||
4054 | to_free = calc_csum_metadata_size(inode, num_bytes); | 4133 | to_free = calc_csum_metadata_size(inode, num_bytes); |
4055 | if (nr_extents > 0) | 4134 | if (nr_extents > 0) |
@@ -4223,8 +4302,8 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
4223 | * update size of reserved extents. this function may return -EAGAIN | 4302 | * update size of reserved extents. this function may return -EAGAIN |
4224 | * if 'reserve' is true or 'sinfo' is false. | 4303 | * if 'reserve' is true or 'sinfo' is false. |
4225 | */ | 4304 | */ |
4226 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | 4305 | int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, |
4227 | u64 num_bytes, int reserve, int sinfo) | 4306 | u64 num_bytes, int reserve, int sinfo) |
4228 | { | 4307 | { |
4229 | int ret = 0; | 4308 | int ret = 0; |
4230 | if (sinfo) { | 4309 | if (sinfo) { |
@@ -4363,7 +4442,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
4363 | if (ret) | 4442 | if (ret) |
4364 | break; | 4443 | break; |
4365 | 4444 | ||
4366 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 4445 | if (btrfs_test_opt(root, DISCARD)) |
4446 | ret = btrfs_discard_extent(root, start, | ||
4447 | end + 1 - start, NULL); | ||
4367 | 4448 | ||
4368 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 4449 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
4369 | unpin_extent_range(root, start, end); | 4450 | unpin_extent_range(root, start, end); |
@@ -4704,10 +4785,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
4704 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | 4785 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); |
4705 | 4786 | ||
4706 | btrfs_add_free_space(cache, buf->start, buf->len); | 4787 | btrfs_add_free_space(cache, buf->start, buf->len); |
4707 | ret = update_reserved_bytes(cache, buf->len, 0, 0); | 4788 | ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); |
4708 | if (ret == -EAGAIN) { | 4789 | if (ret == -EAGAIN) { |
4709 | /* block group became read-only */ | 4790 | /* block group became read-only */ |
4710 | update_reserved_bytes(cache, buf->len, 0, 1); | 4791 | btrfs_update_reserved_bytes(cache, buf->len, 0, 1); |
4711 | goto out; | 4792 | goto out; |
4712 | } | 4793 | } |
4713 | 4794 | ||
@@ -4744,6 +4825,11 @@ pin: | |||
4744 | } | 4825 | } |
4745 | } | 4826 | } |
4746 | out: | 4827 | out: |
4828 | /* | ||
4829 | * Deleting the buffer, clear the corrupt flag since it doesn't matter | ||
4830 | * anymore. | ||
4831 | */ | ||
4832 | clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); | ||
4747 | btrfs_put_block_group(cache); | 4833 | btrfs_put_block_group(cache); |
4748 | } | 4834 | } |
4749 | 4835 | ||
@@ -5191,7 +5277,7 @@ checks: | |||
5191 | search_start - offset); | 5277 | search_start - offset); |
5192 | BUG_ON(offset > search_start); | 5278 | BUG_ON(offset > search_start); |
5193 | 5279 | ||
5194 | ret = update_reserved_bytes(block_group, num_bytes, 1, | 5280 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, |
5195 | (data & BTRFS_BLOCK_GROUP_DATA)); | 5281 | (data & BTRFS_BLOCK_GROUP_DATA)); |
5196 | if (ret == -EAGAIN) { | 5282 | if (ret == -EAGAIN) { |
5197 | btrfs_add_free_space(block_group, offset, num_bytes); | 5283 | btrfs_add_free_space(block_group, offset, num_bytes); |
@@ -5282,11 +5368,13 @@ loop: | |||
5282 | 5368 | ||
5283 | if (allowed_chunk_alloc) { | 5369 | if (allowed_chunk_alloc) { |
5284 | ret = do_chunk_alloc(trans, root, num_bytes + | 5370 | ret = do_chunk_alloc(trans, root, num_bytes + |
5285 | 2 * 1024 * 1024, data, 1); | 5371 | 2 * 1024 * 1024, data, |
5372 | CHUNK_ALLOC_LIMITED); | ||
5286 | allowed_chunk_alloc = 0; | 5373 | allowed_chunk_alloc = 0; |
5287 | done_chunk_alloc = 1; | 5374 | done_chunk_alloc = 1; |
5288 | } else if (!done_chunk_alloc) { | 5375 | } else if (!done_chunk_alloc && |
5289 | space_info->force_alloc = 1; | 5376 | space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) { |
5377 | space_info->force_alloc = CHUNK_ALLOC_LIMITED; | ||
5290 | } | 5378 | } |
5291 | 5379 | ||
5292 | if (loop < LOOP_NO_EMPTY_SIZE) { | 5380 | if (loop < LOOP_NO_EMPTY_SIZE) { |
@@ -5372,7 +5460,8 @@ again: | |||
5372 | */ | 5460 | */ |
5373 | if (empty_size || root->ref_cows) | 5461 | if (empty_size || root->ref_cows) |
5374 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 5462 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
5375 | num_bytes + 2 * 1024 * 1024, data, 0); | 5463 | num_bytes + 2 * 1024 * 1024, data, |
5464 | CHUNK_ALLOC_NO_FORCE); | ||
5376 | 5465 | ||
5377 | WARN_ON(num_bytes < root->sectorsize); | 5466 | WARN_ON(num_bytes < root->sectorsize); |
5378 | ret = find_free_extent(trans, root, num_bytes, empty_size, | 5467 | ret = find_free_extent(trans, root, num_bytes, empty_size, |
@@ -5384,7 +5473,7 @@ again: | |||
5384 | num_bytes = num_bytes & ~(root->sectorsize - 1); | 5473 | num_bytes = num_bytes & ~(root->sectorsize - 1); |
5385 | num_bytes = max(num_bytes, min_alloc_size); | 5474 | num_bytes = max(num_bytes, min_alloc_size); |
5386 | do_chunk_alloc(trans, root->fs_info->extent_root, | 5475 | do_chunk_alloc(trans, root->fs_info->extent_root, |
5387 | num_bytes, data, 1); | 5476 | num_bytes, data, CHUNK_ALLOC_FORCE); |
5388 | goto again; | 5477 | goto again; |
5389 | } | 5478 | } |
5390 | if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { | 5479 | if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { |
@@ -5397,6 +5486,8 @@ again: | |||
5397 | dump_space_info(sinfo, num_bytes, 1); | 5486 | dump_space_info(sinfo, num_bytes, 1); |
5398 | } | 5487 | } |
5399 | 5488 | ||
5489 | trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); | ||
5490 | |||
5400 | return ret; | 5491 | return ret; |
5401 | } | 5492 | } |
5402 | 5493 | ||
@@ -5412,12 +5503,15 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
5412 | return -ENOSPC; | 5503 | return -ENOSPC; |
5413 | } | 5504 | } |
5414 | 5505 | ||
5415 | ret = btrfs_discard_extent(root, start, len); | 5506 | if (btrfs_test_opt(root, DISCARD)) |
5507 | ret = btrfs_discard_extent(root, start, len, NULL); | ||
5416 | 5508 | ||
5417 | btrfs_add_free_space(cache, start, len); | 5509 | btrfs_add_free_space(cache, start, len); |
5418 | update_reserved_bytes(cache, len, 0, 1); | 5510 | btrfs_update_reserved_bytes(cache, len, 0, 1); |
5419 | btrfs_put_block_group(cache); | 5511 | btrfs_put_block_group(cache); |
5420 | 5512 | ||
5513 | trace_btrfs_reserved_extent_free(root, start, len); | ||
5514 | |||
5421 | return ret; | 5515 | return ret; |
5422 | } | 5516 | } |
5423 | 5517 | ||
@@ -5444,7 +5538,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
5444 | size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); | 5538 | size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); |
5445 | 5539 | ||
5446 | path = btrfs_alloc_path(); | 5540 | path = btrfs_alloc_path(); |
5447 | BUG_ON(!path); | 5541 | if (!path) |
5542 | return -ENOMEM; | ||
5448 | 5543 | ||
5449 | path->leave_spinning = 1; | 5544 | path->leave_spinning = 1; |
5450 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, | 5545 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, |
@@ -5614,7 +5709,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
5614 | put_caching_control(caching_ctl); | 5709 | put_caching_control(caching_ctl); |
5615 | } | 5710 | } |
5616 | 5711 | ||
5617 | ret = update_reserved_bytes(block_group, ins->offset, 1, 1); | 5712 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); |
5618 | BUG_ON(ret); | 5713 | BUG_ON(ret); |
5619 | btrfs_put_block_group(block_group); | 5714 | btrfs_put_block_group(block_group); |
5620 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 5715 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
@@ -6047,6 +6142,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
6047 | if (reada && level == 1) | 6142 | if (reada && level == 1) |
6048 | reada_walk_down(trans, root, wc, path); | 6143 | reada_walk_down(trans, root, wc, path); |
6049 | next = read_tree_block(root, bytenr, blocksize, generation); | 6144 | next = read_tree_block(root, bytenr, blocksize, generation); |
6145 | if (!next) | ||
6146 | return -EIO; | ||
6050 | btrfs_tree_lock(next); | 6147 | btrfs_tree_lock(next); |
6051 | btrfs_set_lock_blocking(next); | 6148 | btrfs_set_lock_blocking(next); |
6052 | } | 6149 | } |
@@ -6438,10 +6535,14 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
6438 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | 6535 | BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); |
6439 | 6536 | ||
6440 | path = btrfs_alloc_path(); | 6537 | path = btrfs_alloc_path(); |
6441 | BUG_ON(!path); | 6538 | if (!path) |
6539 | return -ENOMEM; | ||
6442 | 6540 | ||
6443 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 6541 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
6444 | BUG_ON(!wc); | 6542 | if (!wc) { |
6543 | btrfs_free_path(path); | ||
6544 | return -ENOMEM; | ||
6545 | } | ||
6445 | 6546 | ||
6446 | btrfs_assert_tree_locked(parent); | 6547 | btrfs_assert_tree_locked(parent); |
6447 | parent_level = btrfs_header_level(parent); | 6548 | parent_level = btrfs_header_level(parent); |
@@ -6899,7 +7000,11 @@ static noinline int get_new_locations(struct inode *reloc_inode, | |||
6899 | } | 7000 | } |
6900 | 7001 | ||
6901 | path = btrfs_alloc_path(); | 7002 | path = btrfs_alloc_path(); |
6902 | BUG_ON(!path); | 7003 | if (!path) { |
7004 | if (exts != *extents) | ||
7005 | kfree(exts); | ||
7006 | return -ENOMEM; | ||
7007 | } | ||
6903 | 7008 | ||
6904 | cur_pos = extent_key->objectid - offset; | 7009 | cur_pos = extent_key->objectid - offset; |
6905 | last_byte = extent_key->objectid + extent_key->offset; | 7010 | last_byte = extent_key->objectid + extent_key->offset; |
@@ -6941,6 +7046,10 @@ static noinline int get_new_locations(struct inode *reloc_inode, | |||
6941 | struct disk_extent *old = exts; | 7046 | struct disk_extent *old = exts; |
6942 | max *= 2; | 7047 | max *= 2; |
6943 | exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); | 7048 | exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); |
7049 | if (!exts) { | ||
7050 | ret = -ENOMEM; | ||
7051 | goto out; | ||
7052 | } | ||
6944 | memcpy(exts, old, sizeof(*exts) * nr); | 7053 | memcpy(exts, old, sizeof(*exts) * nr); |
6945 | if (old != *extents) | 7054 | if (old != *extents) |
6946 | kfree(old); | 7055 | kfree(old); |
@@ -7423,7 +7532,8 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, | |||
7423 | int ret; | 7532 | int ret; |
7424 | 7533 | ||
7425 | new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); | 7534 | new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); |
7426 | BUG_ON(!new_extent); | 7535 | if (!new_extent) |
7536 | return -ENOMEM; | ||
7427 | 7537 | ||
7428 | ref = btrfs_lookup_leaf_ref(root, leaf->start); | 7538 | ref = btrfs_lookup_leaf_ref(root, leaf->start); |
7429 | BUG_ON(!ref); | 7539 | BUG_ON(!ref); |
@@ -7609,7 +7719,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root) | |||
7609 | 7719 | ||
7610 | reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); | 7720 | reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); |
7611 | BUG_ON(!reloc_root); | 7721 | BUG_ON(!reloc_root); |
7612 | btrfs_orphan_cleanup(reloc_root); | 7722 | ret = btrfs_orphan_cleanup(reloc_root); |
7723 | BUG_ON(ret); | ||
7613 | return 0; | 7724 | return 0; |
7614 | } | 7725 | } |
7615 | 7726 | ||
@@ -7627,7 +7738,8 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, | |||
7627 | return 0; | 7738 | return 0; |
7628 | 7739 | ||
7629 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | 7740 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); |
7630 | BUG_ON(!root_item); | 7741 | if (!root_item) |
7742 | return -ENOMEM; | ||
7631 | 7743 | ||
7632 | ret = btrfs_copy_root(trans, root, root->commit_root, | 7744 | ret = btrfs_copy_root(trans, root, root->commit_root, |
7633 | &eb, BTRFS_TREE_RELOC_OBJECTID); | 7745 | &eb, BTRFS_TREE_RELOC_OBJECTID); |
@@ -7653,7 +7765,7 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, | |||
7653 | 7765 | ||
7654 | reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, | 7766 | reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, |
7655 | &root_key); | 7767 | &root_key); |
7656 | BUG_ON(!reloc_root); | 7768 | BUG_ON(IS_ERR(reloc_root)); |
7657 | reloc_root->last_trans = trans->transid; | 7769 | reloc_root->last_trans = trans->transid; |
7658 | reloc_root->commit_root = NULL; | 7770 | reloc_root->commit_root = NULL; |
7659 | reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; | 7771 | reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; |
@@ -7906,6 +8018,10 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root, | |||
7906 | 8018 | ||
7907 | eb = read_tree_block(found_root, block_start, | 8019 | eb = read_tree_block(found_root, block_start, |
7908 | block_size, 0); | 8020 | block_size, 0); |
8021 | if (!eb) { | ||
8022 | ret = -EIO; | ||
8023 | goto out; | ||
8024 | } | ||
7909 | btrfs_tree_lock(eb); | 8025 | btrfs_tree_lock(eb); |
7910 | BUG_ON(level != btrfs_header_level(eb)); | 8026 | BUG_ON(level != btrfs_header_level(eb)); |
7911 | 8027 | ||
@@ -8061,13 +8177,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
8061 | 8177 | ||
8062 | alloc_flags = update_block_group_flags(root, cache->flags); | 8178 | alloc_flags = update_block_group_flags(root, cache->flags); |
8063 | if (alloc_flags != cache->flags) | 8179 | if (alloc_flags != cache->flags) |
8064 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | 8180 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, |
8181 | CHUNK_ALLOC_FORCE); | ||
8065 | 8182 | ||
8066 | ret = set_block_group_ro(cache); | 8183 | ret = set_block_group_ro(cache); |
8067 | if (!ret) | 8184 | if (!ret) |
8068 | goto out; | 8185 | goto out; |
8069 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | 8186 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); |
8070 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | 8187 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, |
8188 | CHUNK_ALLOC_FORCE); | ||
8071 | if (ret < 0) | 8189 | if (ret < 0) |
8072 | goto out; | 8190 | goto out; |
8073 | ret = set_block_group_ro(cache); | 8191 | ret = set_block_group_ro(cache); |
@@ -8080,7 +8198,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, | |||
8080 | struct btrfs_root *root, u64 type) | 8198 | struct btrfs_root *root, u64 type) |
8081 | { | 8199 | { |
8082 | u64 alloc_flags = get_alloc_profile(root, type); | 8200 | u64 alloc_flags = get_alloc_profile(root, type); |
8083 | return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | 8201 | return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, |
8202 | CHUNK_ALLOC_FORCE); | ||
8084 | } | 8203 | } |
8085 | 8204 | ||
8086 | /* | 8205 | /* |
@@ -8621,6 +8740,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8621 | BUG_ON(!block_group); | 8740 | BUG_ON(!block_group); |
8622 | BUG_ON(!block_group->ro); | 8741 | BUG_ON(!block_group->ro); |
8623 | 8742 | ||
8743 | /* | ||
8744 | * Free the reserved super bytes from this block group before | ||
8745 | * remove it. | ||
8746 | */ | ||
8747 | free_excluded_extents(root, block_group); | ||
8748 | |||
8624 | memcpy(&key, &block_group->key, sizeof(key)); | 8749 | memcpy(&key, &block_group->key, sizeof(key)); |
8625 | if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | | 8750 | if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | |
8626 | BTRFS_BLOCK_GROUP_RAID1 | | 8751 | BTRFS_BLOCK_GROUP_RAID1 | |
@@ -8724,13 +8849,84 @@ out: | |||
8724 | return ret; | 8849 | return ret; |
8725 | } | 8850 | } |
8726 | 8851 | ||
8852 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info) | ||
8853 | { | ||
8854 | struct btrfs_space_info *space_info; | ||
8855 | int ret; | ||
8856 | |||
8857 | ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0, | ||
8858 | &space_info); | ||
8859 | if (ret) | ||
8860 | return ret; | ||
8861 | |||
8862 | ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0, | ||
8863 | &space_info); | ||
8864 | if (ret) | ||
8865 | return ret; | ||
8866 | |||
8867 | ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0, | ||
8868 | &space_info); | ||
8869 | if (ret) | ||
8870 | return ret; | ||
8871 | |||
8872 | return ret; | ||
8873 | } | ||
8874 | |||
8727 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | 8875 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) |
8728 | { | 8876 | { |
8729 | return unpin_extent_range(root, start, end); | 8877 | return unpin_extent_range(root, start, end); |
8730 | } | 8878 | } |
8731 | 8879 | ||
8732 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | 8880 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, |
8733 | u64 num_bytes) | 8881 | u64 num_bytes, u64 *actual_bytes) |
8882 | { | ||
8883 | return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); | ||
8884 | } | ||
8885 | |||
8886 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | ||
8734 | { | 8887 | { |
8735 | return btrfs_discard_extent(root, bytenr, num_bytes); | 8888 | struct btrfs_fs_info *fs_info = root->fs_info; |
8889 | struct btrfs_block_group_cache *cache = NULL; | ||
8890 | u64 group_trimmed; | ||
8891 | u64 start; | ||
8892 | u64 end; | ||
8893 | u64 trimmed = 0; | ||
8894 | int ret = 0; | ||
8895 | |||
8896 | cache = btrfs_lookup_block_group(fs_info, range->start); | ||
8897 | |||
8898 | while (cache) { | ||
8899 | if (cache->key.objectid >= (range->start + range->len)) { | ||
8900 | btrfs_put_block_group(cache); | ||
8901 | break; | ||
8902 | } | ||
8903 | |||
8904 | start = max(range->start, cache->key.objectid); | ||
8905 | end = min(range->start + range->len, | ||
8906 | cache->key.objectid + cache->key.offset); | ||
8907 | |||
8908 | if (end - start >= range->minlen) { | ||
8909 | if (!block_group_cache_done(cache)) { | ||
8910 | ret = cache_block_group(cache, NULL, root, 0); | ||
8911 | if (!ret) | ||
8912 | wait_block_group_cache_done(cache); | ||
8913 | } | ||
8914 | ret = btrfs_trim_block_group(cache, | ||
8915 | &group_trimmed, | ||
8916 | start, | ||
8917 | end, | ||
8918 | range->minlen); | ||
8919 | |||
8920 | trimmed += group_trimmed; | ||
8921 | if (ret) { | ||
8922 | btrfs_put_block_group(cache); | ||
8923 | break; | ||
8924 | } | ||
8925 | } | ||
8926 | |||
8927 | cache = next_block_group(fs_info->tree_root, cache); | ||
8928 | } | ||
8929 | |||
8930 | range->len = trimmed; | ||
8931 | return ret; | ||
8736 | } | 8932 | } |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 714adc4ac4c2..315138605088 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -690,6 +690,15 @@ static void cache_state(struct extent_state *state, | |||
690 | } | 690 | } |
691 | } | 691 | } |
692 | 692 | ||
693 | static void uncache_state(struct extent_state **cached_ptr) | ||
694 | { | ||
695 | if (cached_ptr && (*cached_ptr)) { | ||
696 | struct extent_state *state = *cached_ptr; | ||
697 | *cached_ptr = NULL; | ||
698 | free_extent_state(state); | ||
699 | } | ||
700 | } | ||
701 | |||
693 | /* | 702 | /* |
694 | * set some bits on a range in the tree. This may require allocations or | 703 | * set some bits on a range in the tree. This may require allocations or |
695 | * sleeping, so the gfp mask is used to indicate what is allowed. | 704 | * sleeping, so the gfp mask is used to indicate what is allowed. |
@@ -940,10 +949,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | |||
940 | } | 949 | } |
941 | 950 | ||
942 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 951 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
943 | gfp_t mask) | 952 | struct extent_state **cached_state, gfp_t mask) |
944 | { | 953 | { |
945 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, | 954 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, |
946 | NULL, mask); | 955 | NULL, cached_state, mask); |
947 | } | 956 | } |
948 | 957 | ||
949 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | 958 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, |
@@ -1012,8 +1021,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, | |||
1012 | mask); | 1021 | mask); |
1013 | } | 1022 | } |
1014 | 1023 | ||
1015 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 1024 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) |
1016 | gfp_t mask) | ||
1017 | { | 1025 | { |
1018 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, | 1026 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, |
1019 | mask); | 1027 | mask); |
@@ -1735,6 +1743,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1735 | 1743 | ||
1736 | do { | 1744 | do { |
1737 | struct page *page = bvec->bv_page; | 1745 | struct page *page = bvec->bv_page; |
1746 | struct extent_state *cached = NULL; | ||
1747 | struct extent_state *state; | ||
1748 | |||
1738 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 1749 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
1739 | 1750 | ||
1740 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | 1751 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + |
@@ -1749,9 +1760,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1749 | if (++bvec <= bvec_end) | 1760 | if (++bvec <= bvec_end) |
1750 | prefetchw(&bvec->bv_page->flags); | 1761 | prefetchw(&bvec->bv_page->flags); |
1751 | 1762 | ||
1763 | spin_lock(&tree->lock); | ||
1764 | state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED); | ||
1765 | if (state && state->start == start) { | ||
1766 | /* | ||
1767 | * take a reference on the state, unlock will drop | ||
1768 | * the ref | ||
1769 | */ | ||
1770 | cache_state(state, &cached); | ||
1771 | } | ||
1772 | spin_unlock(&tree->lock); | ||
1773 | |||
1752 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { | 1774 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { |
1753 | ret = tree->ops->readpage_end_io_hook(page, start, end, | 1775 | ret = tree->ops->readpage_end_io_hook(page, start, end, |
1754 | NULL); | 1776 | state); |
1755 | if (ret) | 1777 | if (ret) |
1756 | uptodate = 0; | 1778 | uptodate = 0; |
1757 | } | 1779 | } |
@@ -1764,15 +1786,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1764 | test_bit(BIO_UPTODATE, &bio->bi_flags); | 1786 | test_bit(BIO_UPTODATE, &bio->bi_flags); |
1765 | if (err) | 1787 | if (err) |
1766 | uptodate = 0; | 1788 | uptodate = 0; |
1789 | uncache_state(&cached); | ||
1767 | continue; | 1790 | continue; |
1768 | } | 1791 | } |
1769 | } | 1792 | } |
1770 | 1793 | ||
1771 | if (uptodate) { | 1794 | if (uptodate) { |
1772 | set_extent_uptodate(tree, start, end, | 1795 | set_extent_uptodate(tree, start, end, &cached, |
1773 | GFP_ATOMIC); | 1796 | GFP_ATOMIC); |
1774 | } | 1797 | } |
1775 | unlock_extent(tree, start, end, GFP_ATOMIC); | 1798 | unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); |
1776 | 1799 | ||
1777 | if (whole_page) { | 1800 | if (whole_page) { |
1778 | if (uptodate) { | 1801 | if (uptodate) { |
@@ -1811,6 +1834,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) | |||
1811 | 1834 | ||
1812 | do { | 1835 | do { |
1813 | struct page *page = bvec->bv_page; | 1836 | struct page *page = bvec->bv_page; |
1837 | struct extent_state *cached = NULL; | ||
1814 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 1838 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
1815 | 1839 | ||
1816 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | 1840 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + |
@@ -1821,13 +1845,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) | |||
1821 | prefetchw(&bvec->bv_page->flags); | 1845 | prefetchw(&bvec->bv_page->flags); |
1822 | 1846 | ||
1823 | if (uptodate) { | 1847 | if (uptodate) { |
1824 | set_extent_uptodate(tree, start, end, GFP_ATOMIC); | 1848 | set_extent_uptodate(tree, start, end, &cached, |
1849 | GFP_ATOMIC); | ||
1825 | } else { | 1850 | } else { |
1826 | ClearPageUptodate(page); | 1851 | ClearPageUptodate(page); |
1827 | SetPageError(page); | 1852 | SetPageError(page); |
1828 | } | 1853 | } |
1829 | 1854 | ||
1830 | unlock_extent(tree, start, end, GFP_ATOMIC); | 1855 | unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); |
1831 | 1856 | ||
1832 | } while (bvec >= bio->bi_io_vec); | 1857 | } while (bvec >= bio->bi_io_vec); |
1833 | 1858 | ||
@@ -2016,14 +2041,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2016 | while (cur <= end) { | 2041 | while (cur <= end) { |
2017 | if (cur >= last_byte) { | 2042 | if (cur >= last_byte) { |
2018 | char *userpage; | 2043 | char *userpage; |
2044 | struct extent_state *cached = NULL; | ||
2045 | |||
2019 | iosize = PAGE_CACHE_SIZE - page_offset; | 2046 | iosize = PAGE_CACHE_SIZE - page_offset; |
2020 | userpage = kmap_atomic(page, KM_USER0); | 2047 | userpage = kmap_atomic(page, KM_USER0); |
2021 | memset(userpage + page_offset, 0, iosize); | 2048 | memset(userpage + page_offset, 0, iosize); |
2022 | flush_dcache_page(page); | 2049 | flush_dcache_page(page); |
2023 | kunmap_atomic(userpage, KM_USER0); | 2050 | kunmap_atomic(userpage, KM_USER0); |
2024 | set_extent_uptodate(tree, cur, cur + iosize - 1, | 2051 | set_extent_uptodate(tree, cur, cur + iosize - 1, |
2025 | GFP_NOFS); | 2052 | &cached, GFP_NOFS); |
2026 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2053 | unlock_extent_cached(tree, cur, cur + iosize - 1, |
2054 | &cached, GFP_NOFS); | ||
2027 | break; | 2055 | break; |
2028 | } | 2056 | } |
2029 | em = get_extent(inode, page, page_offset, cur, | 2057 | em = get_extent(inode, page, page_offset, cur, |
@@ -2063,14 +2091,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2063 | /* we've found a hole, just zero and go on */ | 2091 | /* we've found a hole, just zero and go on */ |
2064 | if (block_start == EXTENT_MAP_HOLE) { | 2092 | if (block_start == EXTENT_MAP_HOLE) { |
2065 | char *userpage; | 2093 | char *userpage; |
2094 | struct extent_state *cached = NULL; | ||
2095 | |||
2066 | userpage = kmap_atomic(page, KM_USER0); | 2096 | userpage = kmap_atomic(page, KM_USER0); |
2067 | memset(userpage + page_offset, 0, iosize); | 2097 | memset(userpage + page_offset, 0, iosize); |
2068 | flush_dcache_page(page); | 2098 | flush_dcache_page(page); |
2069 | kunmap_atomic(userpage, KM_USER0); | 2099 | kunmap_atomic(userpage, KM_USER0); |
2070 | 2100 | ||
2071 | set_extent_uptodate(tree, cur, cur + iosize - 1, | 2101 | set_extent_uptodate(tree, cur, cur + iosize - 1, |
2072 | GFP_NOFS); | 2102 | &cached, GFP_NOFS); |
2073 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2103 | unlock_extent_cached(tree, cur, cur + iosize - 1, |
2104 | &cached, GFP_NOFS); | ||
2074 | cur = cur + iosize; | 2105 | cur = cur + iosize; |
2075 | page_offset += iosize; | 2106 | page_offset += iosize; |
2076 | continue; | 2107 | continue; |
@@ -2188,10 +2219,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2188 | unsigned long nr_written = 0; | 2219 | unsigned long nr_written = 0; |
2189 | 2220 | ||
2190 | if (wbc->sync_mode == WB_SYNC_ALL) | 2221 | if (wbc->sync_mode == WB_SYNC_ALL) |
2191 | write_flags = WRITE_SYNC_PLUG; | 2222 | write_flags = WRITE_SYNC; |
2192 | else | 2223 | else |
2193 | write_flags = WRITE; | 2224 | write_flags = WRITE; |
2194 | 2225 | ||
2226 | trace___extent_writepage(page, inode, wbc); | ||
2227 | |||
2195 | WARN_ON(!PageLocked(page)); | 2228 | WARN_ON(!PageLocked(page)); |
2196 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2229 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
2197 | if (page->index > end_index || | 2230 | if (page->index > end_index || |
@@ -2787,9 +2820,12 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
2787 | iocount++; | 2820 | iocount++; |
2788 | block_start = block_start + iosize; | 2821 | block_start = block_start + iosize; |
2789 | } else { | 2822 | } else { |
2790 | set_extent_uptodate(tree, block_start, cur_end, | 2823 | struct extent_state *cached = NULL; |
2824 | |||
2825 | set_extent_uptodate(tree, block_start, cur_end, &cached, | ||
2791 | GFP_NOFS); | 2826 | GFP_NOFS); |
2792 | unlock_extent(tree, block_start, cur_end, GFP_NOFS); | 2827 | unlock_extent_cached(tree, block_start, cur_end, |
2828 | &cached, GFP_NOFS); | ||
2793 | block_start = cur_end + 1; | 2829 | block_start = cur_end + 1; |
2794 | } | 2830 | } |
2795 | page_offset = block_start & (PAGE_CACHE_SIZE - 1); | 2831 | page_offset = block_start & (PAGE_CACHE_SIZE - 1); |
@@ -3455,7 +3491,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3455 | num_pages = num_extent_pages(eb->start, eb->len); | 3491 | num_pages = num_extent_pages(eb->start, eb->len); |
3456 | 3492 | ||
3457 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3493 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
3458 | GFP_NOFS); | 3494 | NULL, GFP_NOFS); |
3459 | for (i = 0; i < num_pages; i++) { | 3495 | for (i = 0; i < num_pages; i++) { |
3460 | page = extent_buffer_page(eb, i); | 3496 | page = extent_buffer_page(eb, i); |
3461 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || | 3497 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || |
@@ -3690,6 +3726,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
3690 | "wanted %lu %lu\n", (unsigned long long)eb->start, | 3726 | "wanted %lu %lu\n", (unsigned long long)eb->start, |
3691 | eb->len, start, min_len); | 3727 | eb->len, start, min_len); |
3692 | WARN_ON(1); | 3728 | WARN_ON(1); |
3729 | return -EINVAL; | ||
3693 | } | 3730 | } |
3694 | 3731 | ||
3695 | p = extent_buffer_page(eb, i); | 3732 | p = extent_buffer_page(eb, i); |
@@ -3882,6 +3919,12 @@ static void move_pages(struct page *dst_page, struct page *src_page, | |||
3882 | kunmap_atomic(dst_kaddr, KM_USER0); | 3919 | kunmap_atomic(dst_kaddr, KM_USER0); |
3883 | } | 3920 | } |
3884 | 3921 | ||
3922 | static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) | ||
3923 | { | ||
3924 | unsigned long distance = (src > dst) ? src - dst : dst - src; | ||
3925 | return distance < len; | ||
3926 | } | ||
3927 | |||
3885 | static void copy_pages(struct page *dst_page, struct page *src_page, | 3928 | static void copy_pages(struct page *dst_page, struct page *src_page, |
3886 | unsigned long dst_off, unsigned long src_off, | 3929 | unsigned long dst_off, unsigned long src_off, |
3887 | unsigned long len) | 3930 | unsigned long len) |
@@ -3889,10 +3932,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page, | |||
3889 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | 3932 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); |
3890 | char *src_kaddr; | 3933 | char *src_kaddr; |
3891 | 3934 | ||
3892 | if (dst_page != src_page) | 3935 | if (dst_page != src_page) { |
3893 | src_kaddr = kmap_atomic(src_page, KM_USER1); | 3936 | src_kaddr = kmap_atomic(src_page, KM_USER1); |
3894 | else | 3937 | } else { |
3895 | src_kaddr = dst_kaddr; | 3938 | src_kaddr = dst_kaddr; |
3939 | BUG_ON(areas_overlap(src_off, dst_off, len)); | ||
3940 | } | ||
3896 | 3941 | ||
3897 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); | 3942 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); |
3898 | kunmap_atomic(dst_kaddr, KM_USER0); | 3943 | kunmap_atomic(dst_kaddr, KM_USER0); |
@@ -3967,7 +4012,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | |||
3967 | "len %lu len %lu\n", dst_offset, len, dst->len); | 4012 | "len %lu len %lu\n", dst_offset, len, dst->len); |
3968 | BUG_ON(1); | 4013 | BUG_ON(1); |
3969 | } | 4014 | } |
3970 | if (dst_offset < src_offset) { | 4015 | if (!areas_overlap(src_offset, dst_offset, len)) { |
3971 | memcpy_extent_buffer(dst, dst_offset, src_offset, len); | 4016 | memcpy_extent_buffer(dst, dst_offset, src_offset, len); |
3972 | return; | 4017 | return; |
3973 | } | 4018 | } |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 9318dfefd59c..af2d7179c372 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -31,6 +31,7 @@ | |||
31 | #define EXTENT_BUFFER_UPTODATE 0 | 31 | #define EXTENT_BUFFER_UPTODATE 0 |
32 | #define EXTENT_BUFFER_BLOCKING 1 | 32 | #define EXTENT_BUFFER_BLOCKING 1 |
33 | #define EXTENT_BUFFER_DIRTY 2 | 33 | #define EXTENT_BUFFER_DIRTY 2 |
34 | #define EXTENT_BUFFER_CORRUPT 3 | ||
34 | 35 | ||
35 | /* these are flags for extent_clear_unlock_delalloc */ | 36 | /* these are flags for extent_clear_unlock_delalloc */ |
36 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 | 37 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 |
@@ -207,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
207 | int bits, int exclusive_bits, u64 *failed_start, | 208 | int bits, int exclusive_bits, u64 *failed_start, |
208 | struct extent_state **cached_state, gfp_t mask); | 209 | struct extent_state **cached_state, gfp_t mask); |
209 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 210 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
210 | gfp_t mask); | 211 | struct extent_state **cached_state, gfp_t mask); |
211 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 212 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
212 | gfp_t mask); | 213 | gfp_t mask); |
213 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 214 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2b6c12e983b3..a24a3f2fa13e 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -243,7 +243,7 @@ out: | |||
243 | * Insert @em into @tree or perform a simple forward/backward merge with | 243 | * Insert @em into @tree or perform a simple forward/backward merge with |
244 | * existing mappings. The extent_map struct passed in will be inserted | 244 | * existing mappings. The extent_map struct passed in will be inserted |
245 | * into the tree directly, with an additional reference taken, or a | 245 | * into the tree directly, with an additional reference taken, or a |
246 | * reference dropped if the merge attempt was successfull. | 246 | * reference dropped if the merge attempt was successful. |
247 | */ | 247 | */ |
248 | int add_extent_mapping(struct extent_map_tree *tree, | 248 | int add_extent_mapping(struct extent_map_tree *tree, |
249 | struct extent_map *em) | 249 | struct extent_map *em) |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 4f19a3e1bf32..a6a9d4e8b491 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -48,7 +48,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | |||
48 | struct extent_buffer *leaf; | 48 | struct extent_buffer *leaf; |
49 | 49 | ||
50 | path = btrfs_alloc_path(); | 50 | path = btrfs_alloc_path(); |
51 | BUG_ON(!path); | 51 | if (!path) |
52 | return -ENOMEM; | ||
52 | file_key.objectid = objectid; | 53 | file_key.objectid = objectid; |
53 | file_key.offset = pos; | 54 | file_key.offset = pos; |
54 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); | 55 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); |
@@ -169,6 +170,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
169 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 170 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
170 | 171 | ||
171 | path = btrfs_alloc_path(); | 172 | path = btrfs_alloc_path(); |
173 | if (!path) | ||
174 | return -ENOMEM; | ||
172 | if (bio->bi_size > PAGE_CACHE_SIZE * 8) | 175 | if (bio->bi_size > PAGE_CACHE_SIZE * 8) |
173 | path->reada = 2; | 176 | path->reada = 2; |
174 | 177 | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f447b783bb84..75899a01dded 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -45,14 +45,14 @@ | |||
45 | * and be replaced with calls into generic code. | 45 | * and be replaced with calls into generic code. |
46 | */ | 46 | */ |
47 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 47 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
48 | int write_bytes, | 48 | size_t write_bytes, |
49 | struct page **prepared_pages, | 49 | struct page **prepared_pages, |
50 | struct iov_iter *i) | 50 | struct iov_iter *i) |
51 | { | 51 | { |
52 | size_t copied = 0; | 52 | size_t copied = 0; |
53 | size_t total_copied = 0; | ||
53 | int pg = 0; | 54 | int pg = 0; |
54 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 55 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
55 | int total_copied = 0; | ||
56 | 56 | ||
57 | while (write_bytes > 0) { | 57 | while (write_bytes > 0) { |
58 | size_t count = min_t(size_t, | 58 | size_t count = min_t(size_t, |
@@ -88,9 +88,8 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
88 | total_copied += copied; | 88 | total_copied += copied; |
89 | 89 | ||
90 | /* Return to btrfs_file_aio_write to fault page */ | 90 | /* Return to btrfs_file_aio_write to fault page */ |
91 | if (unlikely(copied == 0)) { | 91 | if (unlikely(copied == 0)) |
92 | break; | 92 | break; |
93 | } | ||
94 | 93 | ||
95 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | 94 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { |
96 | offset += copied; | 95 | offset += copied; |
@@ -105,12 +104,10 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
105 | /* | 104 | /* |
106 | * unlocks pages after btrfs_file_write is done with them | 105 | * unlocks pages after btrfs_file_write is done with them |
107 | */ | 106 | */ |
108 | static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) | 107 | void btrfs_drop_pages(struct page **pages, size_t num_pages) |
109 | { | 108 | { |
110 | size_t i; | 109 | size_t i; |
111 | for (i = 0; i < num_pages; i++) { | 110 | for (i = 0; i < num_pages; i++) { |
112 | if (!pages[i]) | ||
113 | break; | ||
114 | /* page checked is some magic around finding pages that | 111 | /* page checked is some magic around finding pages that |
115 | * have been modified without going through btrfs_set_page_dirty | 112 | * have been modified without going through btrfs_set_page_dirty |
116 | * clear it here | 113 | * clear it here |
@@ -130,17 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
130 | * this also makes the decision about creating an inline extent vs | 127 | * this also makes the decision about creating an inline extent vs |
131 | * doing real data extents, marking pages dirty and delalloc as required. | 128 | * doing real data extents, marking pages dirty and delalloc as required. |
132 | */ | 129 | */ |
133 | static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | 130 | int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, |
134 | struct btrfs_root *root, | 131 | struct page **pages, size_t num_pages, |
135 | struct file *file, | 132 | loff_t pos, size_t write_bytes, |
136 | struct page **pages, | 133 | struct extent_state **cached) |
137 | size_t num_pages, | ||
138 | loff_t pos, | ||
139 | size_t write_bytes) | ||
140 | { | 134 | { |
141 | int err = 0; | 135 | int err = 0; |
142 | int i; | 136 | int i; |
143 | struct inode *inode = fdentry(file)->d_inode; | ||
144 | u64 num_bytes; | 137 | u64 num_bytes; |
145 | u64 start_pos; | 138 | u64 start_pos; |
146 | u64 end_of_last_block; | 139 | u64 end_of_last_block; |
@@ -153,8 +146,9 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
153 | 146 | ||
154 | end_of_last_block = start_pos + num_bytes - 1; | 147 | end_of_last_block = start_pos + num_bytes - 1; |
155 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 148 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
156 | NULL); | 149 | cached); |
157 | BUG_ON(err); | 150 | if (err) |
151 | return err; | ||
158 | 152 | ||
159 | for (i = 0; i < num_pages; i++) { | 153 | for (i = 0; i < num_pages; i++) { |
160 | struct page *p = pages[i]; | 154 | struct page *p = pages[i]; |
@@ -162,13 +156,14 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
162 | ClearPageChecked(p); | 156 | ClearPageChecked(p); |
163 | set_page_dirty(p); | 157 | set_page_dirty(p); |
164 | } | 158 | } |
165 | if (end_pos > isize) { | 159 | |
160 | /* | ||
161 | * we've only changed i_size in ram, and we haven't updated | ||
162 | * the disk i_size. There is no need to log the inode | ||
163 | * at this time. | ||
164 | */ | ||
165 | if (end_pos > isize) | ||
166 | i_size_write(inode, end_pos); | 166 | i_size_write(inode, end_pos); |
167 | /* we've only changed i_size in ram, and we haven't updated | ||
168 | * the disk i_size. There is no need to log the inode | ||
169 | * at this time. | ||
170 | */ | ||
171 | } | ||
172 | return 0; | 167 | return 0; |
173 | } | 168 | } |
174 | 169 | ||
@@ -610,6 +605,8 @@ again: | |||
610 | key.offset = split; | 605 | key.offset = split; |
611 | 606 | ||
612 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 607 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
608 | if (ret < 0) | ||
609 | goto out; | ||
613 | if (ret > 0 && path->slots[0] > 0) | 610 | if (ret > 0 && path->slots[0] > 0) |
614 | path->slots[0]--; | 611 | path->slots[0]--; |
615 | 612 | ||
@@ -819,12 +816,11 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
819 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; | 816 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; |
820 | 817 | ||
821 | if (start_pos > inode->i_size) { | 818 | if (start_pos > inode->i_size) { |
822 | err = btrfs_cont_expand(inode, start_pos); | 819 | err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); |
823 | if (err) | 820 | if (err) |
824 | return err; | 821 | return err; |
825 | } | 822 | } |
826 | 823 | ||
827 | memset(pages, 0, num_pages * sizeof(struct page *)); | ||
828 | again: | 824 | again: |
829 | for (i = 0; i < num_pages; i++) { | 825 | for (i = 0; i < num_pages; i++) { |
830 | pages[i] = grab_cache_page(inode->i_mapping, index + i); | 826 | pages[i] = grab_cache_page(inode->i_mapping, index + i); |
@@ -896,156 +892,71 @@ fail: | |||
896 | 892 | ||
897 | } | 893 | } |
898 | 894 | ||
899 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | 895 | static noinline ssize_t __btrfs_buffered_write(struct file *file, |
900 | const struct iovec *iov, | 896 | struct iov_iter *i, |
901 | unsigned long nr_segs, loff_t pos) | 897 | loff_t pos) |
902 | { | 898 | { |
903 | struct file *file = iocb->ki_filp; | ||
904 | struct inode *inode = fdentry(file)->d_inode; | 899 | struct inode *inode = fdentry(file)->d_inode; |
905 | struct btrfs_root *root = BTRFS_I(inode)->root; | 900 | struct btrfs_root *root = BTRFS_I(inode)->root; |
906 | struct page **pages = NULL; | 901 | struct page **pages = NULL; |
907 | struct iov_iter i; | ||
908 | loff_t *ppos = &iocb->ki_pos; | ||
909 | loff_t start_pos; | ||
910 | ssize_t num_written = 0; | ||
911 | ssize_t err = 0; | ||
912 | size_t count; | ||
913 | size_t ocount; | ||
914 | int ret = 0; | ||
915 | int nrptrs; | ||
916 | unsigned long first_index; | 902 | unsigned long first_index; |
917 | unsigned long last_index; | 903 | unsigned long last_index; |
918 | int will_write; | 904 | size_t num_written = 0; |
919 | int buffered = 0; | 905 | int nrptrs; |
920 | int copied = 0; | 906 | int ret = 0; |
921 | int dirty_pages = 0; | ||
922 | |||
923 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | ||
924 | (file->f_flags & O_DIRECT)); | ||
925 | |||
926 | start_pos = pos; | ||
927 | |||
928 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
929 | |||
930 | mutex_lock(&inode->i_mutex); | ||
931 | |||
932 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
933 | if (err) | ||
934 | goto out; | ||
935 | count = ocount; | ||
936 | |||
937 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | ||
938 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | ||
939 | if (err) | ||
940 | goto out; | ||
941 | |||
942 | if (count == 0) | ||
943 | goto out; | ||
944 | |||
945 | err = file_remove_suid(file); | ||
946 | if (err) | ||
947 | goto out; | ||
948 | |||
949 | /* | ||
950 | * If BTRFS flips readonly due to some impossible error | ||
951 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | ||
952 | * although we have opened a file as writable, we have | ||
953 | * to stop this write operation to ensure FS consistency. | ||
954 | */ | ||
955 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
956 | err = -EROFS; | ||
957 | goto out; | ||
958 | } | ||
959 | |||
960 | file_update_time(file); | ||
961 | BTRFS_I(inode)->sequence++; | ||
962 | |||
963 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
964 | num_written = generic_file_direct_write(iocb, iov, &nr_segs, | ||
965 | pos, ppos, count, | ||
966 | ocount); | ||
967 | /* | ||
968 | * the generic O_DIRECT will update in-memory i_size after the | ||
969 | * DIOs are done. But our endio handlers that update the on | ||
970 | * disk i_size never update past the in memory i_size. So we | ||
971 | * need one more update here to catch any additions to the | ||
972 | * file | ||
973 | */ | ||
974 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
975 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
976 | mark_inode_dirty(inode); | ||
977 | } | ||
978 | |||
979 | if (num_written < 0) { | ||
980 | ret = num_written; | ||
981 | num_written = 0; | ||
982 | goto out; | ||
983 | } else if (num_written == count) { | ||
984 | /* pick up pos changes done by the generic code */ | ||
985 | pos = *ppos; | ||
986 | goto out; | ||
987 | } | ||
988 | /* | ||
989 | * We are going to do buffered for the rest of the range, so we | ||
990 | * need to make sure to invalidate the buffered pages when we're | ||
991 | * done. | ||
992 | */ | ||
993 | buffered = 1; | ||
994 | pos += num_written; | ||
995 | } | ||
996 | 907 | ||
997 | iov_iter_init(&i, iov, nr_segs, count, num_written); | 908 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / |
998 | nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / | ||
999 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | 909 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / |
1000 | (sizeof(struct page *))); | 910 | (sizeof(struct page *))); |
1001 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 911 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
1002 | if (!pages) { | 912 | if (!pages) |
1003 | ret = -ENOMEM; | 913 | return -ENOMEM; |
1004 | goto out; | ||
1005 | } | ||
1006 | |||
1007 | /* generic_write_checks can change our pos */ | ||
1008 | start_pos = pos; | ||
1009 | 914 | ||
1010 | first_index = pos >> PAGE_CACHE_SHIFT; | 915 | first_index = pos >> PAGE_CACHE_SHIFT; |
1011 | last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; | 916 | last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT; |
1012 | 917 | ||
1013 | while (iov_iter_count(&i) > 0) { | 918 | while (iov_iter_count(i) > 0) { |
1014 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 919 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
1015 | size_t write_bytes = min(iov_iter_count(&i), | 920 | size_t write_bytes = min(iov_iter_count(i), |
1016 | nrptrs * (size_t)PAGE_CACHE_SIZE - | 921 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
1017 | offset); | 922 | offset); |
1018 | size_t num_pages = (write_bytes + offset + | 923 | size_t num_pages = (write_bytes + offset + |
1019 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 924 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
925 | size_t dirty_pages; | ||
926 | size_t copied; | ||
1020 | 927 | ||
1021 | WARN_ON(num_pages > nrptrs); | 928 | WARN_ON(num_pages > nrptrs); |
1022 | memset(pages, 0, sizeof(struct page *) * nrptrs); | ||
1023 | 929 | ||
1024 | /* | 930 | /* |
1025 | * Fault pages before locking them in prepare_pages | 931 | * Fault pages before locking them in prepare_pages |
1026 | * to avoid recursive lock | 932 | * to avoid recursive lock |
1027 | */ | 933 | */ |
1028 | if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { | 934 | if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) { |
1029 | ret = -EFAULT; | 935 | ret = -EFAULT; |
1030 | goto out; | 936 | break; |
1031 | } | 937 | } |
1032 | 938 | ||
1033 | ret = btrfs_delalloc_reserve_space(inode, | 939 | ret = btrfs_delalloc_reserve_space(inode, |
1034 | num_pages << PAGE_CACHE_SHIFT); | 940 | num_pages << PAGE_CACHE_SHIFT); |
1035 | if (ret) | 941 | if (ret) |
1036 | goto out; | 942 | break; |
1037 | 943 | ||
944 | /* | ||
945 | * This is going to setup the pages array with the number of | ||
946 | * pages we want, so we don't really need to worry about the | ||
947 | * contents of pages from loop to loop | ||
948 | */ | ||
1038 | ret = prepare_pages(root, file, pages, num_pages, | 949 | ret = prepare_pages(root, file, pages, num_pages, |
1039 | pos, first_index, last_index, | 950 | pos, first_index, last_index, |
1040 | write_bytes); | 951 | write_bytes); |
1041 | if (ret) { | 952 | if (ret) { |
1042 | btrfs_delalloc_release_space(inode, | 953 | btrfs_delalloc_release_space(inode, |
1043 | num_pages << PAGE_CACHE_SHIFT); | 954 | num_pages << PAGE_CACHE_SHIFT); |
1044 | goto out; | 955 | break; |
1045 | } | 956 | } |
1046 | 957 | ||
1047 | copied = btrfs_copy_from_user(pos, num_pages, | 958 | copied = btrfs_copy_from_user(pos, num_pages, |
1048 | write_bytes, pages, &i); | 959 | write_bytes, pages, i); |
1049 | 960 | ||
1050 | /* | 961 | /* |
1051 | * if we have trouble faulting in the pages, fall | 962 | * if we have trouble faulting in the pages, fall |
@@ -1061,6 +972,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1061 | PAGE_CACHE_SIZE - 1) >> | 972 | PAGE_CACHE_SIZE - 1) >> |
1062 | PAGE_CACHE_SHIFT; | 973 | PAGE_CACHE_SHIFT; |
1063 | 974 | ||
975 | /* | ||
976 | * If we had a short copy we need to release the excess delaloc | ||
977 | * bytes we reserved. We need to increment outstanding_extents | ||
978 | * because btrfs_delalloc_release_space will decrement it, but | ||
979 | * we still have an outstanding extent for the chunk we actually | ||
980 | * managed to copy. | ||
981 | */ | ||
1064 | if (num_pages > dirty_pages) { | 982 | if (num_pages > dirty_pages) { |
1065 | if (copied > 0) | 983 | if (copied > 0) |
1066 | atomic_inc( | 984 | atomic_inc( |
@@ -1071,39 +989,157 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1071 | } | 989 | } |
1072 | 990 | ||
1073 | if (copied > 0) { | 991 | if (copied > 0) { |
1074 | dirty_and_release_pages(NULL, root, file, pages, | 992 | ret = btrfs_dirty_pages(root, inode, pages, |
1075 | dirty_pages, pos, copied); | 993 | dirty_pages, pos, copied, |
994 | NULL); | ||
995 | if (ret) { | ||
996 | btrfs_delalloc_release_space(inode, | ||
997 | dirty_pages << PAGE_CACHE_SHIFT); | ||
998 | btrfs_drop_pages(pages, num_pages); | ||
999 | break; | ||
1000 | } | ||
1076 | } | 1001 | } |
1077 | 1002 | ||
1078 | btrfs_drop_pages(pages, num_pages); | 1003 | btrfs_drop_pages(pages, num_pages); |
1079 | 1004 | ||
1080 | if (copied > 0) { | 1005 | cond_resched(); |
1081 | if (will_write) { | 1006 | |
1082 | filemap_fdatawrite_range(inode->i_mapping, pos, | 1007 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, |
1083 | pos + copied - 1); | 1008 | dirty_pages); |
1084 | } else { | 1009 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) |
1085 | balance_dirty_pages_ratelimited_nr( | 1010 | btrfs_btree_balance_dirty(root, 1); |
1086 | inode->i_mapping, | 1011 | btrfs_throttle(root); |
1087 | dirty_pages); | ||
1088 | if (dirty_pages < | ||
1089 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | ||
1090 | btrfs_btree_balance_dirty(root, 1); | ||
1091 | btrfs_throttle(root); | ||
1092 | } | ||
1093 | } | ||
1094 | 1012 | ||
1095 | pos += copied; | 1013 | pos += copied; |
1096 | num_written += copied; | 1014 | num_written += copied; |
1015 | } | ||
1097 | 1016 | ||
1098 | cond_resched(); | 1017 | kfree(pages); |
1018 | |||
1019 | return num_written ? num_written : ret; | ||
1020 | } | ||
1021 | |||
1022 | static ssize_t __btrfs_direct_write(struct kiocb *iocb, | ||
1023 | const struct iovec *iov, | ||
1024 | unsigned long nr_segs, loff_t pos, | ||
1025 | loff_t *ppos, size_t count, size_t ocount) | ||
1026 | { | ||
1027 | struct file *file = iocb->ki_filp; | ||
1028 | struct inode *inode = fdentry(file)->d_inode; | ||
1029 | struct iov_iter i; | ||
1030 | ssize_t written; | ||
1031 | ssize_t written_buffered; | ||
1032 | loff_t endbyte; | ||
1033 | int err; | ||
1034 | |||
1035 | written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, | ||
1036 | count, ocount); | ||
1037 | |||
1038 | /* | ||
1039 | * the generic O_DIRECT will update in-memory i_size after the | ||
1040 | * DIOs are done. But our endio handlers that update the on | ||
1041 | * disk i_size never update past the in memory i_size. So we | ||
1042 | * need one more update here to catch any additions to the | ||
1043 | * file | ||
1044 | */ | ||
1045 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
1046 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
1047 | mark_inode_dirty(inode); | ||
1099 | } | 1048 | } |
1049 | |||
1050 | if (written < 0 || written == count) | ||
1051 | return written; | ||
1052 | |||
1053 | pos += written; | ||
1054 | count -= written; | ||
1055 | iov_iter_init(&i, iov, nr_segs, count, written); | ||
1056 | written_buffered = __btrfs_buffered_write(file, &i, pos); | ||
1057 | if (written_buffered < 0) { | ||
1058 | err = written_buffered; | ||
1059 | goto out; | ||
1060 | } | ||
1061 | endbyte = pos + written_buffered - 1; | ||
1062 | err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); | ||
1063 | if (err) | ||
1064 | goto out; | ||
1065 | written += written_buffered; | ||
1066 | *ppos = pos + written_buffered; | ||
1067 | invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT, | ||
1068 | endbyte >> PAGE_CACHE_SHIFT); | ||
1100 | out: | 1069 | out: |
1101 | mutex_unlock(&inode->i_mutex); | 1070 | return written ? written : err; |
1102 | if (ret) | 1071 | } |
1103 | err = ret; | ||
1104 | 1072 | ||
1105 | kfree(pages); | 1073 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
1106 | *ppos = pos; | 1074 | const struct iovec *iov, |
1075 | unsigned long nr_segs, loff_t pos) | ||
1076 | { | ||
1077 | struct file *file = iocb->ki_filp; | ||
1078 | struct inode *inode = fdentry(file)->d_inode; | ||
1079 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1080 | loff_t *ppos = &iocb->ki_pos; | ||
1081 | ssize_t num_written = 0; | ||
1082 | ssize_t err = 0; | ||
1083 | size_t count, ocount; | ||
1084 | |||
1085 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
1086 | |||
1087 | mutex_lock(&inode->i_mutex); | ||
1088 | |||
1089 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
1090 | if (err) { | ||
1091 | mutex_unlock(&inode->i_mutex); | ||
1092 | goto out; | ||
1093 | } | ||
1094 | count = ocount; | ||
1095 | |||
1096 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | ||
1097 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | ||
1098 | if (err) { | ||
1099 | mutex_unlock(&inode->i_mutex); | ||
1100 | goto out; | ||
1101 | } | ||
1102 | |||
1103 | if (count == 0) { | ||
1104 | mutex_unlock(&inode->i_mutex); | ||
1105 | goto out; | ||
1106 | } | ||
1107 | |||
1108 | err = file_remove_suid(file); | ||
1109 | if (err) { | ||
1110 | mutex_unlock(&inode->i_mutex); | ||
1111 | goto out; | ||
1112 | } | ||
1113 | |||
1114 | /* | ||
1115 | * If BTRFS flips readonly due to some impossible error | ||
1116 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | ||
1117 | * although we have opened a file as writable, we have | ||
1118 | * to stop this write operation to ensure FS consistency. | ||
1119 | */ | ||
1120 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
1121 | mutex_unlock(&inode->i_mutex); | ||
1122 | err = -EROFS; | ||
1123 | goto out; | ||
1124 | } | ||
1125 | |||
1126 | file_update_time(file); | ||
1127 | BTRFS_I(inode)->sequence++; | ||
1128 | |||
1129 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
1130 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | ||
1131 | pos, ppos, count, ocount); | ||
1132 | } else { | ||
1133 | struct iov_iter i; | ||
1134 | |||
1135 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
1136 | |||
1137 | num_written = __btrfs_buffered_write(file, &i, pos); | ||
1138 | if (num_written > 0) | ||
1139 | *ppos = pos + num_written; | ||
1140 | } | ||
1141 | |||
1142 | mutex_unlock(&inode->i_mutex); | ||
1107 | 1143 | ||
1108 | /* | 1144 | /* |
1109 | * we want to make sure fsync finds this change | 1145 | * we want to make sure fsync finds this change |
@@ -1118,43 +1154,12 @@ out: | |||
1118 | * one running right now. | 1154 | * one running right now. |
1119 | */ | 1155 | */ |
1120 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | 1156 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; |
1121 | 1157 | if (num_written > 0 || num_written == -EIOCBQUEUED) { | |
1122 | if (num_written > 0 && will_write) { | 1158 | err = generic_write_sync(file, pos, num_written); |
1123 | struct btrfs_trans_handle *trans; | 1159 | if (err < 0 && num_written > 0) |
1124 | |||
1125 | err = btrfs_wait_ordered_range(inode, start_pos, num_written); | ||
1126 | if (err) | ||
1127 | num_written = err; | 1160 | num_written = err; |
1128 | |||
1129 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | ||
1130 | trans = btrfs_start_transaction(root, 0); | ||
1131 | if (IS_ERR(trans)) { | ||
1132 | num_written = PTR_ERR(trans); | ||
1133 | goto done; | ||
1134 | } | ||
1135 | mutex_lock(&inode->i_mutex); | ||
1136 | ret = btrfs_log_dentry_safe(trans, root, | ||
1137 | file->f_dentry); | ||
1138 | mutex_unlock(&inode->i_mutex); | ||
1139 | if (ret == 0) { | ||
1140 | ret = btrfs_sync_log(trans, root); | ||
1141 | if (ret == 0) | ||
1142 | btrfs_end_transaction(trans, root); | ||
1143 | else | ||
1144 | btrfs_commit_transaction(trans, root); | ||
1145 | } else if (ret != BTRFS_NO_LOG_SYNC) { | ||
1146 | btrfs_commit_transaction(trans, root); | ||
1147 | } else { | ||
1148 | btrfs_end_transaction(trans, root); | ||
1149 | } | ||
1150 | } | ||
1151 | if (file->f_flags & O_DIRECT && buffered) { | ||
1152 | invalidate_mapping_pages(inode->i_mapping, | ||
1153 | start_pos >> PAGE_CACHE_SHIFT, | ||
1154 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | ||
1155 | } | ||
1156 | } | 1161 | } |
1157 | done: | 1162 | out: |
1158 | current->backing_dev_info = NULL; | 1163 | current->backing_dev_info = NULL; |
1159 | return num_written ? num_written : err; | 1164 | return num_written ? num_written : err; |
1160 | } | 1165 | } |
@@ -1197,6 +1202,7 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1197 | int ret = 0; | 1202 | int ret = 0; |
1198 | struct btrfs_trans_handle *trans; | 1203 | struct btrfs_trans_handle *trans; |
1199 | 1204 | ||
1205 | trace_btrfs_sync_file(file, datasync); | ||
1200 | 1206 | ||
1201 | /* we wait first, since the writeback may change the inode */ | 1207 | /* we wait first, since the writeback may change the inode */ |
1202 | root->log_batch++; | 1208 | root->log_batch++; |
@@ -1324,7 +1330,8 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1324 | goto out; | 1330 | goto out; |
1325 | 1331 | ||
1326 | if (alloc_start > inode->i_size) { | 1332 | if (alloc_start > inode->i_size) { |
1327 | ret = btrfs_cont_expand(inode, alloc_start); | 1333 | ret = btrfs_cont_expand(inode, i_size_read(inode), |
1334 | alloc_start); | ||
1328 | if (ret) | 1335 | if (ret) |
1329 | goto out; | 1336 | goto out; |
1330 | } | 1337 | } |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index a0390657451b..11d2e9cea09e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "free-space-cache.h" | 24 | #include "free-space-cache.h" |
25 | #include "transaction.h" | 25 | #include "transaction.h" |
26 | #include "disk-io.h" | 26 | #include "disk-io.h" |
27 | #include "extent_io.h" | ||
27 | 28 | ||
28 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) | 29 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) |
29 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) | 30 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) |
@@ -81,6 +82,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
81 | return ERR_PTR(-ENOENT); | 82 | return ERR_PTR(-ENOENT); |
82 | } | 83 | } |
83 | 84 | ||
85 | inode->i_mapping->flags &= ~__GFP_FS; | ||
86 | |||
84 | spin_lock(&block_group->lock); | 87 | spin_lock(&block_group->lock); |
85 | if (!root->fs_info->closing) { | 88 | if (!root->fs_info->closing) { |
86 | block_group->inode = igrab(inode); | 89 | block_group->inode = igrab(inode); |
@@ -222,6 +225,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
222 | u64 num_entries; | 225 | u64 num_entries; |
223 | u64 num_bitmaps; | 226 | u64 num_bitmaps; |
224 | u64 generation; | 227 | u64 generation; |
228 | u64 used = btrfs_block_group_used(&block_group->item); | ||
225 | u32 cur_crc = ~(u32)0; | 229 | u32 cur_crc = ~(u32)0; |
226 | pgoff_t index = 0; | 230 | pgoff_t index = 0; |
227 | unsigned long first_page_offset; | 231 | unsigned long first_page_offset; |
@@ -393,7 +397,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
393 | break; | 397 | break; |
394 | 398 | ||
395 | need_loop = 1; | 399 | need_loop = 1; |
396 | e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | 400 | e = kmem_cache_zalloc(btrfs_free_space_cachep, |
401 | GFP_NOFS); | ||
397 | if (!e) { | 402 | if (!e) { |
398 | kunmap(page); | 403 | kunmap(page); |
399 | unlock_page(page); | 404 | unlock_page(page); |
@@ -405,7 +410,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
405 | e->bytes = le64_to_cpu(entry->bytes); | 410 | e->bytes = le64_to_cpu(entry->bytes); |
406 | if (!e->bytes) { | 411 | if (!e->bytes) { |
407 | kunmap(page); | 412 | kunmap(page); |
408 | kfree(e); | 413 | kmem_cache_free(btrfs_free_space_cachep, e); |
409 | unlock_page(page); | 414 | unlock_page(page); |
410 | page_cache_release(page); | 415 | page_cache_release(page); |
411 | goto free_cache; | 416 | goto free_cache; |
@@ -420,7 +425,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
420 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 425 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
421 | if (!e->bitmap) { | 426 | if (!e->bitmap) { |
422 | kunmap(page); | 427 | kunmap(page); |
423 | kfree(e); | 428 | kmem_cache_free( |
429 | btrfs_free_space_cachep, e); | ||
424 | unlock_page(page); | 430 | unlock_page(page); |
425 | page_cache_release(page); | 431 | page_cache_release(page); |
426 | goto free_cache; | 432 | goto free_cache; |
@@ -465,6 +471,17 @@ next: | |||
465 | index++; | 471 | index++; |
466 | } | 472 | } |
467 | 473 | ||
474 | spin_lock(&block_group->tree_lock); | ||
475 | if (block_group->free_space != (block_group->key.offset - used - | ||
476 | block_group->bytes_super)) { | ||
477 | spin_unlock(&block_group->tree_lock); | ||
478 | printk(KERN_ERR "block group %llu has an wrong amount of free " | ||
479 | "space\n", block_group->key.objectid); | ||
480 | ret = 0; | ||
481 | goto free_cache; | ||
482 | } | ||
483 | spin_unlock(&block_group->tree_lock); | ||
484 | |||
468 | ret = 1; | 485 | ret = 1; |
469 | out: | 486 | out: |
470 | kfree(checksums); | 487 | kfree(checksums); |
@@ -491,18 +508,23 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
491 | struct inode *inode; | 508 | struct inode *inode; |
492 | struct rb_node *node; | 509 | struct rb_node *node; |
493 | struct list_head *pos, *n; | 510 | struct list_head *pos, *n; |
511 | struct page **pages; | ||
494 | struct page *page; | 512 | struct page *page; |
495 | struct extent_state *cached_state = NULL; | 513 | struct extent_state *cached_state = NULL; |
514 | struct btrfs_free_cluster *cluster = NULL; | ||
515 | struct extent_io_tree *unpin = NULL; | ||
496 | struct list_head bitmap_list; | 516 | struct list_head bitmap_list; |
497 | struct btrfs_key key; | 517 | struct btrfs_key key; |
518 | u64 start, end, len; | ||
498 | u64 bytes = 0; | 519 | u64 bytes = 0; |
499 | u32 *crc, *checksums; | 520 | u32 *crc, *checksums; |
500 | pgoff_t index = 0, last_index = 0; | ||
501 | unsigned long first_page_offset; | 521 | unsigned long first_page_offset; |
502 | int num_checksums; | 522 | int index = 0, num_pages = 0; |
503 | int entries = 0; | 523 | int entries = 0; |
504 | int bitmaps = 0; | 524 | int bitmaps = 0; |
505 | int ret = 0; | 525 | int ret = 0; |
526 | bool next_page = false; | ||
527 | bool out_of_space = false; | ||
506 | 528 | ||
507 | root = root->fs_info->tree_root; | 529 | root = root->fs_info->tree_root; |
508 | 530 | ||
@@ -530,24 +552,43 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
530 | return 0; | 552 | return 0; |
531 | } | 553 | } |
532 | 554 | ||
533 | last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | 555 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> |
556 | PAGE_CACHE_SHIFT; | ||
534 | filemap_write_and_wait(inode->i_mapping); | 557 | filemap_write_and_wait(inode->i_mapping); |
535 | btrfs_wait_ordered_range(inode, inode->i_size & | 558 | btrfs_wait_ordered_range(inode, inode->i_size & |
536 | ~(root->sectorsize - 1), (u64)-1); | 559 | ~(root->sectorsize - 1), (u64)-1); |
537 | 560 | ||
538 | /* We need a checksum per page. */ | 561 | /* We need a checksum per page. */ |
539 | num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; | 562 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); |
540 | crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); | ||
541 | if (!crc) { | 563 | if (!crc) { |
542 | iput(inode); | 564 | iput(inode); |
543 | return 0; | 565 | return 0; |
544 | } | 566 | } |
545 | 567 | ||
568 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); | ||
569 | if (!pages) { | ||
570 | kfree(crc); | ||
571 | iput(inode); | ||
572 | return 0; | ||
573 | } | ||
574 | |||
546 | /* Since the first page has all of our checksums and our generation we | 575 | /* Since the first page has all of our checksums and our generation we |
547 | * need to calculate the offset into the page that we can start writing | 576 | * need to calculate the offset into the page that we can start writing |
548 | * our entries. | 577 | * our entries. |
549 | */ | 578 | */ |
550 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | 579 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); |
580 | |||
581 | /* Get the cluster for this block_group if it exists */ | ||
582 | if (!list_empty(&block_group->cluster_list)) | ||
583 | cluster = list_entry(block_group->cluster_list.next, | ||
584 | struct btrfs_free_cluster, | ||
585 | block_group_list); | ||
586 | |||
587 | /* | ||
588 | * We shouldn't have switched the pinned extents yet so this is the | ||
589 | * right one | ||
590 | */ | ||
591 | unpin = root->fs_info->pinned_extents; | ||
551 | 592 | ||
552 | /* | 593 | /* |
553 | * Lock all pages first so we can lock the extent safely. | 594 | * Lock all pages first so we can lock the extent safely. |
@@ -557,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
557 | * after find_get_page at this point. Just putting this here so people | 598 | * after find_get_page at this point. Just putting this here so people |
558 | * know and don't freak out. | 599 | * know and don't freak out. |
559 | */ | 600 | */ |
560 | while (index <= last_index) { | 601 | while (index < num_pages) { |
561 | page = grab_cache_page(inode->i_mapping, index); | 602 | page = grab_cache_page(inode->i_mapping, index); |
562 | if (!page) { | 603 | if (!page) { |
563 | pgoff_t i = 0; | 604 | int i; |
564 | 605 | ||
565 | while (i < index) { | 606 | for (i = 0; i < num_pages; i++) { |
566 | page = find_get_page(inode->i_mapping, i); | 607 | unlock_page(pages[i]); |
567 | unlock_page(page); | 608 | page_cache_release(pages[i]); |
568 | page_cache_release(page); | ||
569 | page_cache_release(page); | ||
570 | i++; | ||
571 | } | 609 | } |
572 | goto out_free; | 610 | goto out_free; |
573 | } | 611 | } |
612 | pages[index] = page; | ||
574 | index++; | 613 | index++; |
575 | } | 614 | } |
576 | 615 | ||
@@ -578,6 +617,12 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
578 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | 617 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, |
579 | 0, &cached_state, GFP_NOFS); | 618 | 0, &cached_state, GFP_NOFS); |
580 | 619 | ||
620 | /* | ||
621 | * When searching for pinned extents, we need to start at our start | ||
622 | * offset. | ||
623 | */ | ||
624 | start = block_group->key.objectid; | ||
625 | |||
581 | /* Write out the extent entries */ | 626 | /* Write out the extent entries */ |
582 | do { | 627 | do { |
583 | struct btrfs_free_space_entry *entry; | 628 | struct btrfs_free_space_entry *entry; |
@@ -585,18 +630,25 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
585 | unsigned long offset = 0; | 630 | unsigned long offset = 0; |
586 | unsigned long start_offset = 0; | 631 | unsigned long start_offset = 0; |
587 | 632 | ||
633 | next_page = false; | ||
634 | |||
588 | if (index == 0) { | 635 | if (index == 0) { |
589 | start_offset = first_page_offset; | 636 | start_offset = first_page_offset; |
590 | offset = start_offset; | 637 | offset = start_offset; |
591 | } | 638 | } |
592 | 639 | ||
593 | page = find_get_page(inode->i_mapping, index); | 640 | if (index >= num_pages) { |
641 | out_of_space = true; | ||
642 | break; | ||
643 | } | ||
644 | |||
645 | page = pages[index]; | ||
594 | 646 | ||
595 | addr = kmap(page); | 647 | addr = kmap(page); |
596 | entry = addr + start_offset; | 648 | entry = addr + start_offset; |
597 | 649 | ||
598 | memset(addr, 0, PAGE_CACHE_SIZE); | 650 | memset(addr, 0, PAGE_CACHE_SIZE); |
599 | while (1) { | 651 | while (node && !next_page) { |
600 | struct btrfs_free_space *e; | 652 | struct btrfs_free_space *e; |
601 | 653 | ||
602 | e = rb_entry(node, struct btrfs_free_space, offset_index); | 654 | e = rb_entry(node, struct btrfs_free_space, offset_index); |
@@ -612,12 +664,49 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
612 | entry->type = BTRFS_FREE_SPACE_EXTENT; | 664 | entry->type = BTRFS_FREE_SPACE_EXTENT; |
613 | } | 665 | } |
614 | node = rb_next(node); | 666 | node = rb_next(node); |
615 | if (!node) | 667 | if (!node && cluster) { |
616 | break; | 668 | node = rb_first(&cluster->root); |
669 | cluster = NULL; | ||
670 | } | ||
617 | offset += sizeof(struct btrfs_free_space_entry); | 671 | offset += sizeof(struct btrfs_free_space_entry); |
618 | if (offset + sizeof(struct btrfs_free_space_entry) >= | 672 | if (offset + sizeof(struct btrfs_free_space_entry) >= |
619 | PAGE_CACHE_SIZE) | 673 | PAGE_CACHE_SIZE) |
674 | next_page = true; | ||
675 | entry++; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * We want to add any pinned extents to our free space cache | ||
680 | * so we don't leak the space | ||
681 | */ | ||
682 | while (!next_page && (start < block_group->key.objectid + | ||
683 | block_group->key.offset)) { | ||
684 | ret = find_first_extent_bit(unpin, start, &start, &end, | ||
685 | EXTENT_DIRTY); | ||
686 | if (ret) { | ||
687 | ret = 0; | ||
688 | break; | ||
689 | } | ||
690 | |||
691 | /* This pinned extent is out of our range */ | ||
692 | if (start >= block_group->key.objectid + | ||
693 | block_group->key.offset) | ||
620 | break; | 694 | break; |
695 | |||
696 | len = block_group->key.objectid + | ||
697 | block_group->key.offset - start; | ||
698 | len = min(len, end + 1 - start); | ||
699 | |||
700 | entries++; | ||
701 | entry->offset = cpu_to_le64(start); | ||
702 | entry->bytes = cpu_to_le64(len); | ||
703 | entry->type = BTRFS_FREE_SPACE_EXTENT; | ||
704 | |||
705 | start = end + 1; | ||
706 | offset += sizeof(struct btrfs_free_space_entry); | ||
707 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
708 | PAGE_CACHE_SIZE) | ||
709 | next_page = true; | ||
621 | entry++; | 710 | entry++; |
622 | } | 711 | } |
623 | *crc = ~(u32)0; | 712 | *crc = ~(u32)0; |
@@ -630,25 +719,8 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
630 | 719 | ||
631 | bytes += PAGE_CACHE_SIZE; | 720 | bytes += PAGE_CACHE_SIZE; |
632 | 721 | ||
633 | ClearPageChecked(page); | ||
634 | set_page_extent_mapped(page); | ||
635 | SetPageUptodate(page); | ||
636 | set_page_dirty(page); | ||
637 | |||
638 | /* | ||
639 | * We need to release our reference we got for grab_cache_page, | ||
640 | * except for the first page which will hold our checksums, we | ||
641 | * do that below. | ||
642 | */ | ||
643 | if (index != 0) { | ||
644 | unlock_page(page); | ||
645 | page_cache_release(page); | ||
646 | } | ||
647 | |||
648 | page_cache_release(page); | ||
649 | |||
650 | index++; | 722 | index++; |
651 | } while (node); | 723 | } while (node || next_page); |
652 | 724 | ||
653 | /* Write out the bitmaps */ | 725 | /* Write out the bitmaps */ |
654 | list_for_each_safe(pos, n, &bitmap_list) { | 726 | list_for_each_safe(pos, n, &bitmap_list) { |
@@ -656,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
656 | struct btrfs_free_space *entry = | 728 | struct btrfs_free_space *entry = |
657 | list_entry(pos, struct btrfs_free_space, list); | 729 | list_entry(pos, struct btrfs_free_space, list); |
658 | 730 | ||
659 | page = find_get_page(inode->i_mapping, index); | 731 | if (index >= num_pages) { |
732 | out_of_space = true; | ||
733 | break; | ||
734 | } | ||
735 | page = pages[index]; | ||
660 | 736 | ||
661 | addr = kmap(page); | 737 | addr = kmap(page); |
662 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); | 738 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); |
@@ -667,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
667 | crc++; | 743 | crc++; |
668 | bytes += PAGE_CACHE_SIZE; | 744 | bytes += PAGE_CACHE_SIZE; |
669 | 745 | ||
670 | ClearPageChecked(page); | ||
671 | set_page_extent_mapped(page); | ||
672 | SetPageUptodate(page); | ||
673 | set_page_dirty(page); | ||
674 | unlock_page(page); | ||
675 | page_cache_release(page); | ||
676 | page_cache_release(page); | ||
677 | list_del_init(&entry->list); | 746 | list_del_init(&entry->list); |
678 | index++; | 747 | index++; |
679 | } | 748 | } |
680 | 749 | ||
750 | if (out_of_space) { | ||
751 | btrfs_drop_pages(pages, num_pages); | ||
752 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
753 | i_size_read(inode) - 1, &cached_state, | ||
754 | GFP_NOFS); | ||
755 | ret = 0; | ||
756 | goto out_free; | ||
757 | } | ||
758 | |||
681 | /* Zero out the rest of the pages just to make sure */ | 759 | /* Zero out the rest of the pages just to make sure */ |
682 | while (index <= last_index) { | 760 | while (index < num_pages) { |
683 | void *addr; | 761 | void *addr; |
684 | 762 | ||
685 | page = find_get_page(inode->i_mapping, index); | 763 | page = pages[index]; |
686 | |||
687 | addr = kmap(page); | 764 | addr = kmap(page); |
688 | memset(addr, 0, PAGE_CACHE_SIZE); | 765 | memset(addr, 0, PAGE_CACHE_SIZE); |
689 | kunmap(page); | 766 | kunmap(page); |
690 | ClearPageChecked(page); | ||
691 | set_page_extent_mapped(page); | ||
692 | SetPageUptodate(page); | ||
693 | set_page_dirty(page); | ||
694 | unlock_page(page); | ||
695 | page_cache_release(page); | ||
696 | page_cache_release(page); | ||
697 | bytes += PAGE_CACHE_SIZE; | 767 | bytes += PAGE_CACHE_SIZE; |
698 | index++; | 768 | index++; |
699 | } | 769 | } |
700 | 770 | ||
701 | btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state); | ||
702 | |||
703 | /* Write the checksums and trans id to the first page */ | 771 | /* Write the checksums and trans id to the first page */ |
704 | { | 772 | { |
705 | void *addr; | 773 | void *addr; |
706 | u64 *gen; | 774 | u64 *gen; |
707 | 775 | ||
708 | page = find_get_page(inode->i_mapping, 0); | 776 | page = pages[0]; |
709 | 777 | ||
710 | addr = kmap(page); | 778 | addr = kmap(page); |
711 | memcpy(addr, checksums, sizeof(u32) * num_checksums); | 779 | memcpy(addr, checksums, sizeof(u32) * num_pages); |
712 | gen = addr + (sizeof(u32) * num_checksums); | 780 | gen = addr + (sizeof(u32) * num_pages); |
713 | *gen = trans->transid; | 781 | *gen = trans->transid; |
714 | kunmap(page); | 782 | kunmap(page); |
715 | ClearPageChecked(page); | ||
716 | set_page_extent_mapped(page); | ||
717 | SetPageUptodate(page); | ||
718 | set_page_dirty(page); | ||
719 | unlock_page(page); | ||
720 | page_cache_release(page); | ||
721 | page_cache_release(page); | ||
722 | } | 783 | } |
723 | BTRFS_I(inode)->generation = trans->transid; | ||
724 | 784 | ||
785 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, | ||
786 | bytes, &cached_state); | ||
787 | btrfs_drop_pages(pages, num_pages); | ||
725 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | 788 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, |
726 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | 789 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); |
727 | 790 | ||
791 | if (ret) { | ||
792 | ret = 0; | ||
793 | goto out_free; | ||
794 | } | ||
795 | |||
796 | BTRFS_I(inode)->generation = trans->transid; | ||
797 | |||
728 | filemap_write_and_wait(inode->i_mapping); | 798 | filemap_write_and_wait(inode->i_mapping); |
729 | 799 | ||
730 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 800 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
@@ -775,6 +845,7 @@ out_free: | |||
775 | BTRFS_I(inode)->generation = 0; | 845 | BTRFS_I(inode)->generation = 0; |
776 | } | 846 | } |
777 | kfree(checksums); | 847 | kfree(checksums); |
848 | kfree(pages); | ||
778 | btrfs_update_inode(trans, root, inode); | 849 | btrfs_update_inode(trans, root, inode); |
779 | iput(inode); | 850 | iput(inode); |
780 | return ret; | 851 | return ret; |
@@ -1187,7 +1258,7 @@ static void free_bitmap(struct btrfs_block_group_cache *block_group, | |||
1187 | { | 1258 | { |
1188 | unlink_free_space(block_group, bitmap_info); | 1259 | unlink_free_space(block_group, bitmap_info); |
1189 | kfree(bitmap_info->bitmap); | 1260 | kfree(bitmap_info->bitmap); |
1190 | kfree(bitmap_info); | 1261 | kmem_cache_free(btrfs_free_space_cachep, bitmap_info); |
1191 | block_group->total_bitmaps--; | 1262 | block_group->total_bitmaps--; |
1192 | recalculate_thresholds(block_group); | 1263 | recalculate_thresholds(block_group); |
1193 | } | 1264 | } |
@@ -1285,9 +1356,22 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, | |||
1285 | * If we are below the extents threshold then we can add this as an | 1356 | * If we are below the extents threshold then we can add this as an |
1286 | * extent, and don't have to deal with the bitmap | 1357 | * extent, and don't have to deal with the bitmap |
1287 | */ | 1358 | */ |
1288 | if (block_group->free_extents < block_group->extents_thresh && | 1359 | if (block_group->free_extents < block_group->extents_thresh) { |
1289 | info->bytes > block_group->sectorsize * 4) | 1360 | /* |
1290 | return 0; | 1361 | * If this block group has some small extents we don't want to |
1362 | * use up all of our free slots in the cache with them, we want | ||
1363 | * to reserve them to larger extents, however if we have plent | ||
1364 | * of cache left then go ahead an dadd them, no sense in adding | ||
1365 | * the overhead of a bitmap if we don't have to. | ||
1366 | */ | ||
1367 | if (info->bytes <= block_group->sectorsize * 4) { | ||
1368 | if (block_group->free_extents * 2 <= | ||
1369 | block_group->extents_thresh) | ||
1370 | return 0; | ||
1371 | } else { | ||
1372 | return 0; | ||
1373 | } | ||
1374 | } | ||
1291 | 1375 | ||
1292 | /* | 1376 | /* |
1293 | * some block groups are so tiny they can't be enveloped by a bitmap, so | 1377 | * some block groups are so tiny they can't be enveloped by a bitmap, so |
@@ -1342,8 +1426,8 @@ new_bitmap: | |||
1342 | 1426 | ||
1343 | /* no pre-allocated info, allocate a new one */ | 1427 | /* no pre-allocated info, allocate a new one */ |
1344 | if (!info) { | 1428 | if (!info) { |
1345 | info = kzalloc(sizeof(struct btrfs_free_space), | 1429 | info = kmem_cache_zalloc(btrfs_free_space_cachep, |
1346 | GFP_NOFS); | 1430 | GFP_NOFS); |
1347 | if (!info) { | 1431 | if (!info) { |
1348 | spin_lock(&block_group->tree_lock); | 1432 | spin_lock(&block_group->tree_lock); |
1349 | ret = -ENOMEM; | 1433 | ret = -ENOMEM; |
@@ -1365,7 +1449,7 @@ out: | |||
1365 | if (info) { | 1449 | if (info) { |
1366 | if (info->bitmap) | 1450 | if (info->bitmap) |
1367 | kfree(info->bitmap); | 1451 | kfree(info->bitmap); |
1368 | kfree(info); | 1452 | kmem_cache_free(btrfs_free_space_cachep, info); |
1369 | } | 1453 | } |
1370 | 1454 | ||
1371 | return ret; | 1455 | return ret; |
@@ -1398,7 +1482,7 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group, | |||
1398 | else | 1482 | else |
1399 | __unlink_free_space(block_group, right_info); | 1483 | __unlink_free_space(block_group, right_info); |
1400 | info->bytes += right_info->bytes; | 1484 | info->bytes += right_info->bytes; |
1401 | kfree(right_info); | 1485 | kmem_cache_free(btrfs_free_space_cachep, right_info); |
1402 | merged = true; | 1486 | merged = true; |
1403 | } | 1487 | } |
1404 | 1488 | ||
@@ -1410,7 +1494,7 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group, | |||
1410 | __unlink_free_space(block_group, left_info); | 1494 | __unlink_free_space(block_group, left_info); |
1411 | info->offset = left_info->offset; | 1495 | info->offset = left_info->offset; |
1412 | info->bytes += left_info->bytes; | 1496 | info->bytes += left_info->bytes; |
1413 | kfree(left_info); | 1497 | kmem_cache_free(btrfs_free_space_cachep, left_info); |
1414 | merged = true; | 1498 | merged = true; |
1415 | } | 1499 | } |
1416 | 1500 | ||
@@ -1423,7 +1507,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | |||
1423 | struct btrfs_free_space *info; | 1507 | struct btrfs_free_space *info; |
1424 | int ret = 0; | 1508 | int ret = 0; |
1425 | 1509 | ||
1426 | info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | 1510 | info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); |
1427 | if (!info) | 1511 | if (!info) |
1428 | return -ENOMEM; | 1512 | return -ENOMEM; |
1429 | 1513 | ||
@@ -1450,7 +1534,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | |||
1450 | link: | 1534 | link: |
1451 | ret = link_free_space(block_group, info); | 1535 | ret = link_free_space(block_group, info); |
1452 | if (ret) | 1536 | if (ret) |
1453 | kfree(info); | 1537 | kmem_cache_free(btrfs_free_space_cachep, info); |
1454 | out: | 1538 | out: |
1455 | spin_unlock(&block_group->tree_lock); | 1539 | spin_unlock(&block_group->tree_lock); |
1456 | 1540 | ||
@@ -1520,7 +1604,7 @@ again: | |||
1520 | kfree(info->bitmap); | 1604 | kfree(info->bitmap); |
1521 | block_group->total_bitmaps--; | 1605 | block_group->total_bitmaps--; |
1522 | } | 1606 | } |
1523 | kfree(info); | 1607 | kmem_cache_free(btrfs_free_space_cachep, info); |
1524 | goto out_lock; | 1608 | goto out_lock; |
1525 | } | 1609 | } |
1526 | 1610 | ||
@@ -1556,7 +1640,7 @@ again: | |||
1556 | /* the hole we're creating ends at the end | 1640 | /* the hole we're creating ends at the end |
1557 | * of the info struct, just free the info | 1641 | * of the info struct, just free the info |
1558 | */ | 1642 | */ |
1559 | kfree(info); | 1643 | kmem_cache_free(btrfs_free_space_cachep, info); |
1560 | } | 1644 | } |
1561 | spin_unlock(&block_group->tree_lock); | 1645 | spin_unlock(&block_group->tree_lock); |
1562 | 1646 | ||
@@ -1629,30 +1713,28 @@ __btrfs_return_cluster_to_free_space( | |||
1629 | { | 1713 | { |
1630 | struct btrfs_free_space *entry; | 1714 | struct btrfs_free_space *entry; |
1631 | struct rb_node *node; | 1715 | struct rb_node *node; |
1632 | bool bitmap; | ||
1633 | 1716 | ||
1634 | spin_lock(&cluster->lock); | 1717 | spin_lock(&cluster->lock); |
1635 | if (cluster->block_group != block_group) | 1718 | if (cluster->block_group != block_group) |
1636 | goto out; | 1719 | goto out; |
1637 | 1720 | ||
1638 | bitmap = cluster->points_to_bitmap; | ||
1639 | cluster->block_group = NULL; | 1721 | cluster->block_group = NULL; |
1640 | cluster->window_start = 0; | 1722 | cluster->window_start = 0; |
1641 | list_del_init(&cluster->block_group_list); | 1723 | list_del_init(&cluster->block_group_list); |
1642 | cluster->points_to_bitmap = false; | ||
1643 | |||
1644 | if (bitmap) | ||
1645 | goto out; | ||
1646 | 1724 | ||
1647 | node = rb_first(&cluster->root); | 1725 | node = rb_first(&cluster->root); |
1648 | while (node) { | 1726 | while (node) { |
1727 | bool bitmap; | ||
1728 | |||
1649 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 1729 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
1650 | node = rb_next(&entry->offset_index); | 1730 | node = rb_next(&entry->offset_index); |
1651 | rb_erase(&entry->offset_index, &cluster->root); | 1731 | rb_erase(&entry->offset_index, &cluster->root); |
1652 | BUG_ON(entry->bitmap); | 1732 | |
1653 | try_merge_free_space(block_group, entry, false); | 1733 | bitmap = (entry->bitmap != NULL); |
1734 | if (!bitmap) | ||
1735 | try_merge_free_space(block_group, entry, false); | ||
1654 | tree_insert_offset(&block_group->free_space_offset, | 1736 | tree_insert_offset(&block_group->free_space_offset, |
1655 | entry->offset, &entry->offset_index, 0); | 1737 | entry->offset, &entry->offset_index, bitmap); |
1656 | } | 1738 | } |
1657 | cluster->root = RB_ROOT; | 1739 | cluster->root = RB_ROOT; |
1658 | 1740 | ||
@@ -1689,7 +1771,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | |||
1689 | unlink_free_space(block_group, info); | 1771 | unlink_free_space(block_group, info); |
1690 | if (info->bitmap) | 1772 | if (info->bitmap) |
1691 | kfree(info->bitmap); | 1773 | kfree(info->bitmap); |
1692 | kfree(info); | 1774 | kmem_cache_free(btrfs_free_space_cachep, info); |
1693 | if (need_resched()) { | 1775 | if (need_resched()) { |
1694 | spin_unlock(&block_group->tree_lock); | 1776 | spin_unlock(&block_group->tree_lock); |
1695 | cond_resched(); | 1777 | cond_resched(); |
@@ -1722,7 +1804,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | |||
1722 | entry->offset += bytes; | 1804 | entry->offset += bytes; |
1723 | entry->bytes -= bytes; | 1805 | entry->bytes -= bytes; |
1724 | if (!entry->bytes) | 1806 | if (!entry->bytes) |
1725 | kfree(entry); | 1807 | kmem_cache_free(btrfs_free_space_cachep, entry); |
1726 | else | 1808 | else |
1727 | link_free_space(block_group, entry); | 1809 | link_free_space(block_group, entry); |
1728 | } | 1810 | } |
@@ -1775,50 +1857,24 @@ int btrfs_return_cluster_to_free_space( | |||
1775 | 1857 | ||
1776 | static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, | 1858 | static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, |
1777 | struct btrfs_free_cluster *cluster, | 1859 | struct btrfs_free_cluster *cluster, |
1860 | struct btrfs_free_space *entry, | ||
1778 | u64 bytes, u64 min_start) | 1861 | u64 bytes, u64 min_start) |
1779 | { | 1862 | { |
1780 | struct btrfs_free_space *entry; | ||
1781 | int err; | 1863 | int err; |
1782 | u64 search_start = cluster->window_start; | 1864 | u64 search_start = cluster->window_start; |
1783 | u64 search_bytes = bytes; | 1865 | u64 search_bytes = bytes; |
1784 | u64 ret = 0; | 1866 | u64 ret = 0; |
1785 | 1867 | ||
1786 | spin_lock(&block_group->tree_lock); | ||
1787 | spin_lock(&cluster->lock); | ||
1788 | |||
1789 | if (!cluster->points_to_bitmap) | ||
1790 | goto out; | ||
1791 | |||
1792 | if (cluster->block_group != block_group) | ||
1793 | goto out; | ||
1794 | |||
1795 | /* | ||
1796 | * search_start is the beginning of the bitmap, but at some point it may | ||
1797 | * be a good idea to point to the actual start of the free area in the | ||
1798 | * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only | ||
1799 | * to 1 to make sure we get the bitmap entry | ||
1800 | */ | ||
1801 | entry = tree_search_offset(block_group, | ||
1802 | offset_to_bitmap(block_group, search_start), | ||
1803 | 1, 0); | ||
1804 | if (!entry || !entry->bitmap) | ||
1805 | goto out; | ||
1806 | |||
1807 | search_start = min_start; | 1868 | search_start = min_start; |
1808 | search_bytes = bytes; | 1869 | search_bytes = bytes; |
1809 | 1870 | ||
1810 | err = search_bitmap(block_group, entry, &search_start, | 1871 | err = search_bitmap(block_group, entry, &search_start, |
1811 | &search_bytes); | 1872 | &search_bytes); |
1812 | if (err) | 1873 | if (err) |
1813 | goto out; | 1874 | return 0; |
1814 | 1875 | ||
1815 | ret = search_start; | 1876 | ret = search_start; |
1816 | bitmap_clear_bits(block_group, entry, ret, bytes); | 1877 | bitmap_clear_bits(block_group, entry, ret, bytes); |
1817 | if (entry->bytes == 0) | ||
1818 | free_bitmap(block_group, entry); | ||
1819 | out: | ||
1820 | spin_unlock(&cluster->lock); | ||
1821 | spin_unlock(&block_group->tree_lock); | ||
1822 | 1878 | ||
1823 | return ret; | 1879 | return ret; |
1824 | } | 1880 | } |
@@ -1836,10 +1892,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1836 | struct rb_node *node; | 1892 | struct rb_node *node; |
1837 | u64 ret = 0; | 1893 | u64 ret = 0; |
1838 | 1894 | ||
1839 | if (cluster->points_to_bitmap) | ||
1840 | return btrfs_alloc_from_bitmap(block_group, cluster, bytes, | ||
1841 | min_start); | ||
1842 | |||
1843 | spin_lock(&cluster->lock); | 1895 | spin_lock(&cluster->lock); |
1844 | if (bytes > cluster->max_size) | 1896 | if (bytes > cluster->max_size) |
1845 | goto out; | 1897 | goto out; |
@@ -1852,9 +1904,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1852 | goto out; | 1904 | goto out; |
1853 | 1905 | ||
1854 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 1906 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
1855 | |||
1856 | while(1) { | 1907 | while(1) { |
1857 | if (entry->bytes < bytes || entry->offset < min_start) { | 1908 | if (entry->bytes < bytes || |
1909 | (!entry->bitmap && entry->offset < min_start)) { | ||
1858 | struct rb_node *node; | 1910 | struct rb_node *node; |
1859 | 1911 | ||
1860 | node = rb_next(&entry->offset_index); | 1912 | node = rb_next(&entry->offset_index); |
@@ -1864,10 +1916,27 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1864 | offset_index); | 1916 | offset_index); |
1865 | continue; | 1917 | continue; |
1866 | } | 1918 | } |
1867 | ret = entry->offset; | ||
1868 | 1919 | ||
1869 | entry->offset += bytes; | 1920 | if (entry->bitmap) { |
1870 | entry->bytes -= bytes; | 1921 | ret = btrfs_alloc_from_bitmap(block_group, |
1922 | cluster, entry, bytes, | ||
1923 | min_start); | ||
1924 | if (ret == 0) { | ||
1925 | struct rb_node *node; | ||
1926 | node = rb_next(&entry->offset_index); | ||
1927 | if (!node) | ||
1928 | break; | ||
1929 | entry = rb_entry(node, struct btrfs_free_space, | ||
1930 | offset_index); | ||
1931 | continue; | ||
1932 | } | ||
1933 | } else { | ||
1934 | |||
1935 | ret = entry->offset; | ||
1936 | |||
1937 | entry->offset += bytes; | ||
1938 | entry->bytes -= bytes; | ||
1939 | } | ||
1871 | 1940 | ||
1872 | if (entry->bytes == 0) | 1941 | if (entry->bytes == 0) |
1873 | rb_erase(&entry->offset_index, &cluster->root); | 1942 | rb_erase(&entry->offset_index, &cluster->root); |
@@ -1884,7 +1953,12 @@ out: | |||
1884 | block_group->free_space -= bytes; | 1953 | block_group->free_space -= bytes; |
1885 | if (entry->bytes == 0) { | 1954 | if (entry->bytes == 0) { |
1886 | block_group->free_extents--; | 1955 | block_group->free_extents--; |
1887 | kfree(entry); | 1956 | if (entry->bitmap) { |
1957 | kfree(entry->bitmap); | ||
1958 | block_group->total_bitmaps--; | ||
1959 | recalculate_thresholds(block_group); | ||
1960 | } | ||
1961 | kmem_cache_free(btrfs_free_space_cachep, entry); | ||
1888 | } | 1962 | } |
1889 | 1963 | ||
1890 | spin_unlock(&block_group->tree_lock); | 1964 | spin_unlock(&block_group->tree_lock); |
@@ -1904,12 +1978,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, | |||
1904 | unsigned long found_bits; | 1978 | unsigned long found_bits; |
1905 | unsigned long start = 0; | 1979 | unsigned long start = 0; |
1906 | unsigned long total_found = 0; | 1980 | unsigned long total_found = 0; |
1981 | int ret; | ||
1907 | bool found = false; | 1982 | bool found = false; |
1908 | 1983 | ||
1909 | i = offset_to_bit(entry->offset, block_group->sectorsize, | 1984 | i = offset_to_bit(entry->offset, block_group->sectorsize, |
1910 | max_t(u64, offset, entry->offset)); | 1985 | max_t(u64, offset, entry->offset)); |
1911 | search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); | 1986 | search_bits = bytes_to_bits(bytes, block_group->sectorsize); |
1912 | total_bits = bytes_to_bits(bytes, block_group->sectorsize); | 1987 | total_bits = bytes_to_bits(min_bytes, block_group->sectorsize); |
1913 | 1988 | ||
1914 | again: | 1989 | again: |
1915 | found_bits = 0; | 1990 | found_bits = 0; |
@@ -1926,7 +2001,7 @@ again: | |||
1926 | } | 2001 | } |
1927 | 2002 | ||
1928 | if (!found_bits) | 2003 | if (!found_bits) |
1929 | return -1; | 2004 | return -ENOSPC; |
1930 | 2005 | ||
1931 | if (!found) { | 2006 | if (!found) { |
1932 | start = i; | 2007 | start = i; |
@@ -1950,189 +2025,208 @@ again: | |||
1950 | 2025 | ||
1951 | cluster->window_start = start * block_group->sectorsize + | 2026 | cluster->window_start = start * block_group->sectorsize + |
1952 | entry->offset; | 2027 | entry->offset; |
1953 | cluster->points_to_bitmap = true; | 2028 | rb_erase(&entry->offset_index, &block_group->free_space_offset); |
2029 | ret = tree_insert_offset(&cluster->root, entry->offset, | ||
2030 | &entry->offset_index, 1); | ||
2031 | BUG_ON(ret); | ||
1954 | 2032 | ||
1955 | return 0; | 2033 | return 0; |
1956 | } | 2034 | } |
1957 | 2035 | ||
1958 | /* | 2036 | /* |
1959 | * here we try to find a cluster of blocks in a block group. The goal | 2037 | * This searches the block group for just extents to fill the cluster with. |
1960 | * is to find at least bytes free and up to empty_size + bytes free. | ||
1961 | * We might not find them all in one contiguous area. | ||
1962 | * | ||
1963 | * returns zero and sets up cluster if things worked out, otherwise | ||
1964 | * it returns -enospc | ||
1965 | */ | 2038 | */ |
1966 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | 2039 | static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, |
1967 | struct btrfs_root *root, | 2040 | struct btrfs_free_cluster *cluster, |
1968 | struct btrfs_block_group_cache *block_group, | 2041 | u64 offset, u64 bytes, u64 min_bytes) |
1969 | struct btrfs_free_cluster *cluster, | ||
1970 | u64 offset, u64 bytes, u64 empty_size) | ||
1971 | { | 2042 | { |
2043 | struct btrfs_free_space *first = NULL; | ||
1972 | struct btrfs_free_space *entry = NULL; | 2044 | struct btrfs_free_space *entry = NULL; |
2045 | struct btrfs_free_space *prev = NULL; | ||
2046 | struct btrfs_free_space *last; | ||
1973 | struct rb_node *node; | 2047 | struct rb_node *node; |
1974 | struct btrfs_free_space *next; | ||
1975 | struct btrfs_free_space *last = NULL; | ||
1976 | u64 min_bytes; | ||
1977 | u64 window_start; | 2048 | u64 window_start; |
1978 | u64 window_free; | 2049 | u64 window_free; |
1979 | u64 max_extent = 0; | 2050 | u64 max_extent; |
1980 | bool found_bitmap = false; | 2051 | u64 max_gap = 128 * 1024; |
1981 | int ret; | ||
1982 | 2052 | ||
1983 | /* for metadata, allow allocates with more holes */ | 2053 | entry = tree_search_offset(block_group, offset, 0, 1); |
1984 | if (btrfs_test_opt(root, SSD_SPREAD)) { | 2054 | if (!entry) |
1985 | min_bytes = bytes + empty_size; | 2055 | return -ENOSPC; |
1986 | } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
1987 | /* | ||
1988 | * we want to do larger allocations when we are | ||
1989 | * flushing out the delayed refs, it helps prevent | ||
1990 | * making more work as we go along. | ||
1991 | */ | ||
1992 | if (trans->transaction->delayed_refs.flushing) | ||
1993 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
1994 | else | ||
1995 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
1996 | } else | ||
1997 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
1998 | |||
1999 | spin_lock(&block_group->tree_lock); | ||
2000 | spin_lock(&cluster->lock); | ||
2001 | |||
2002 | /* someone already found a cluster, hooray */ | ||
2003 | if (cluster->block_group) { | ||
2004 | ret = 0; | ||
2005 | goto out; | ||
2006 | } | ||
2007 | again: | ||
2008 | entry = tree_search_offset(block_group, offset, found_bitmap, 1); | ||
2009 | if (!entry) { | ||
2010 | ret = -ENOSPC; | ||
2011 | goto out; | ||
2012 | } | ||
2013 | 2056 | ||
2014 | /* | 2057 | /* |
2015 | * If found_bitmap is true, we exhausted our search for extent entries, | 2058 | * We don't want bitmaps, so just move along until we find a normal |
2016 | * and we just want to search all of the bitmaps that we can find, and | 2059 | * extent entry. |
2017 | * ignore any extent entries we find. | ||
2018 | */ | 2060 | */ |
2019 | while (entry->bitmap || found_bitmap || | 2061 | while (entry->bitmap) { |
2020 | (!entry->bitmap && entry->bytes < min_bytes)) { | 2062 | node = rb_next(&entry->offset_index); |
2021 | struct rb_node *node = rb_next(&entry->offset_index); | 2063 | if (!node) |
2022 | 2064 | return -ENOSPC; | |
2023 | if (entry->bitmap && entry->bytes > bytes + empty_size) { | ||
2024 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, | ||
2025 | offset, bytes + empty_size, | ||
2026 | min_bytes); | ||
2027 | if (!ret) | ||
2028 | goto got_it; | ||
2029 | } | ||
2030 | |||
2031 | if (!node) { | ||
2032 | ret = -ENOSPC; | ||
2033 | goto out; | ||
2034 | } | ||
2035 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2065 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
2036 | } | 2066 | } |
2037 | 2067 | ||
2038 | /* | ||
2039 | * We already searched all the extent entries from the passed in offset | ||
2040 | * to the end and didn't find enough space for the cluster, and we also | ||
2041 | * didn't find any bitmaps that met our criteria, just go ahead and exit | ||
2042 | */ | ||
2043 | if (found_bitmap) { | ||
2044 | ret = -ENOSPC; | ||
2045 | goto out; | ||
2046 | } | ||
2047 | |||
2048 | cluster->points_to_bitmap = false; | ||
2049 | window_start = entry->offset; | 2068 | window_start = entry->offset; |
2050 | window_free = entry->bytes; | 2069 | window_free = entry->bytes; |
2051 | last = entry; | ||
2052 | max_extent = entry->bytes; | 2070 | max_extent = entry->bytes; |
2071 | first = entry; | ||
2072 | last = entry; | ||
2073 | prev = entry; | ||
2053 | 2074 | ||
2054 | while (1) { | 2075 | while (window_free <= min_bytes) { |
2055 | /* out window is just right, lets fill it */ | 2076 | node = rb_next(&entry->offset_index); |
2056 | if (window_free >= bytes + empty_size) | 2077 | if (!node) |
2057 | break; | 2078 | return -ENOSPC; |
2058 | 2079 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | |
2059 | node = rb_next(&last->offset_index); | ||
2060 | if (!node) { | ||
2061 | if (found_bitmap) | ||
2062 | goto again; | ||
2063 | ret = -ENOSPC; | ||
2064 | goto out; | ||
2065 | } | ||
2066 | next = rb_entry(node, struct btrfs_free_space, offset_index); | ||
2067 | 2080 | ||
2068 | /* | 2081 | if (entry->bitmap) |
2069 | * we found a bitmap, so if this search doesn't result in a | ||
2070 | * cluster, we know to go and search again for the bitmaps and | ||
2071 | * start looking for space there | ||
2072 | */ | ||
2073 | if (next->bitmap) { | ||
2074 | if (!found_bitmap) | ||
2075 | offset = next->offset; | ||
2076 | found_bitmap = true; | ||
2077 | last = next; | ||
2078 | continue; | 2082 | continue; |
2079 | } | ||
2080 | |||
2081 | /* | 2083 | /* |
2082 | * we haven't filled the empty size and the window is | 2084 | * we haven't filled the empty size and the window is |
2083 | * very large. reset and try again | 2085 | * very large. reset and try again |
2084 | */ | 2086 | */ |
2085 | if (next->offset - (last->offset + last->bytes) > 128 * 1024 || | 2087 | if (entry->offset - (prev->offset + prev->bytes) > max_gap || |
2086 | next->offset - window_start > (bytes + empty_size) * 2) { | 2088 | entry->offset - window_start > (min_bytes * 2)) { |
2087 | entry = next; | 2089 | first = entry; |
2088 | window_start = entry->offset; | 2090 | window_start = entry->offset; |
2089 | window_free = entry->bytes; | 2091 | window_free = entry->bytes; |
2090 | last = entry; | 2092 | last = entry; |
2091 | max_extent = entry->bytes; | 2093 | max_extent = entry->bytes; |
2092 | } else { | 2094 | } else { |
2093 | last = next; | 2095 | last = entry; |
2094 | window_free += next->bytes; | 2096 | window_free += entry->bytes; |
2095 | if (entry->bytes > max_extent) | 2097 | if (entry->bytes > max_extent) |
2096 | max_extent = entry->bytes; | 2098 | max_extent = entry->bytes; |
2097 | } | 2099 | } |
2100 | prev = entry; | ||
2098 | } | 2101 | } |
2099 | 2102 | ||
2100 | cluster->window_start = entry->offset; | 2103 | cluster->window_start = first->offset; |
2104 | |||
2105 | node = &first->offset_index; | ||
2101 | 2106 | ||
2102 | /* | 2107 | /* |
2103 | * now we've found our entries, pull them out of the free space | 2108 | * now we've found our entries, pull them out of the free space |
2104 | * cache and put them into the cluster rbtree | 2109 | * cache and put them into the cluster rbtree |
2105 | * | ||
2106 | * The cluster includes an rbtree, but only uses the offset index | ||
2107 | * of each free space cache entry. | ||
2108 | */ | 2110 | */ |
2109 | while (1) { | 2111 | do { |
2112 | int ret; | ||
2113 | |||
2114 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
2110 | node = rb_next(&entry->offset_index); | 2115 | node = rb_next(&entry->offset_index); |
2111 | if (entry->bitmap && node) { | 2116 | if (entry->bitmap) |
2112 | entry = rb_entry(node, struct btrfs_free_space, | ||
2113 | offset_index); | ||
2114 | continue; | 2117 | continue; |
2115 | } else if (entry->bitmap && !node) { | ||
2116 | break; | ||
2117 | } | ||
2118 | 2118 | ||
2119 | rb_erase(&entry->offset_index, &block_group->free_space_offset); | 2119 | rb_erase(&entry->offset_index, &block_group->free_space_offset); |
2120 | ret = tree_insert_offset(&cluster->root, entry->offset, | 2120 | ret = tree_insert_offset(&cluster->root, entry->offset, |
2121 | &entry->offset_index, 0); | 2121 | &entry->offset_index, 0); |
2122 | BUG_ON(ret); | 2122 | BUG_ON(ret); |
2123 | } while (node && entry != last); | ||
2123 | 2124 | ||
2124 | if (!node || entry == last) | 2125 | cluster->max_size = max_extent; |
2125 | break; | ||
2126 | 2126 | ||
2127 | return 0; | ||
2128 | } | ||
2129 | |||
2130 | /* | ||
2131 | * This specifically looks for bitmaps that may work in the cluster, we assume | ||
2132 | * that we have already failed to find extents that will work. | ||
2133 | */ | ||
2134 | static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, | ||
2135 | struct btrfs_free_cluster *cluster, | ||
2136 | u64 offset, u64 bytes, u64 min_bytes) | ||
2137 | { | ||
2138 | struct btrfs_free_space *entry; | ||
2139 | struct rb_node *node; | ||
2140 | int ret = -ENOSPC; | ||
2141 | |||
2142 | if (block_group->total_bitmaps == 0) | ||
2143 | return -ENOSPC; | ||
2144 | |||
2145 | entry = tree_search_offset(block_group, | ||
2146 | offset_to_bitmap(block_group, offset), | ||
2147 | 0, 1); | ||
2148 | if (!entry) | ||
2149 | return -ENOSPC; | ||
2150 | |||
2151 | node = &entry->offset_index; | ||
2152 | do { | ||
2127 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2153 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
2154 | node = rb_next(&entry->offset_index); | ||
2155 | if (!entry->bitmap) | ||
2156 | continue; | ||
2157 | if (entry->bytes < min_bytes) | ||
2158 | continue; | ||
2159 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, | ||
2160 | bytes, min_bytes); | ||
2161 | } while (ret && node); | ||
2162 | |||
2163 | return ret; | ||
2164 | } | ||
2165 | |||
2166 | /* | ||
2167 | * here we try to find a cluster of blocks in a block group. The goal | ||
2168 | * is to find at least bytes free and up to empty_size + bytes free. | ||
2169 | * We might not find them all in one contiguous area. | ||
2170 | * | ||
2171 | * returns zero and sets up cluster if things worked out, otherwise | ||
2172 | * it returns -enospc | ||
2173 | */ | ||
2174 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | ||
2175 | struct btrfs_root *root, | ||
2176 | struct btrfs_block_group_cache *block_group, | ||
2177 | struct btrfs_free_cluster *cluster, | ||
2178 | u64 offset, u64 bytes, u64 empty_size) | ||
2179 | { | ||
2180 | u64 min_bytes; | ||
2181 | int ret; | ||
2182 | |||
2183 | /* for metadata, allow allocates with more holes */ | ||
2184 | if (btrfs_test_opt(root, SSD_SPREAD)) { | ||
2185 | min_bytes = bytes + empty_size; | ||
2186 | } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
2187 | /* | ||
2188 | * we want to do larger allocations when we are | ||
2189 | * flushing out the delayed refs, it helps prevent | ||
2190 | * making more work as we go along. | ||
2191 | */ | ||
2192 | if (trans->transaction->delayed_refs.flushing) | ||
2193 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
2194 | else | ||
2195 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
2196 | } else | ||
2197 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
2198 | |||
2199 | spin_lock(&block_group->tree_lock); | ||
2200 | |||
2201 | /* | ||
2202 | * If we know we don't have enough space to make a cluster don't even | ||
2203 | * bother doing all the work to try and find one. | ||
2204 | */ | ||
2205 | if (block_group->free_space < min_bytes) { | ||
2206 | spin_unlock(&block_group->tree_lock); | ||
2207 | return -ENOSPC; | ||
2128 | } | 2208 | } |
2129 | 2209 | ||
2130 | cluster->max_size = max_extent; | 2210 | spin_lock(&cluster->lock); |
2131 | got_it: | 2211 | |
2132 | ret = 0; | 2212 | /* someone already found a cluster, hooray */ |
2133 | atomic_inc(&block_group->count); | 2213 | if (cluster->block_group) { |
2134 | list_add_tail(&cluster->block_group_list, &block_group->cluster_list); | 2214 | ret = 0; |
2135 | cluster->block_group = block_group; | 2215 | goto out; |
2216 | } | ||
2217 | |||
2218 | ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes, | ||
2219 | min_bytes); | ||
2220 | if (ret) | ||
2221 | ret = setup_cluster_bitmap(block_group, cluster, offset, | ||
2222 | bytes, min_bytes); | ||
2223 | |||
2224 | if (!ret) { | ||
2225 | atomic_inc(&block_group->count); | ||
2226 | list_add_tail(&cluster->block_group_list, | ||
2227 | &block_group->cluster_list); | ||
2228 | cluster->block_group = block_group; | ||
2229 | } | ||
2136 | out: | 2230 | out: |
2137 | spin_unlock(&cluster->lock); | 2231 | spin_unlock(&cluster->lock); |
2138 | spin_unlock(&block_group->tree_lock); | 2232 | spin_unlock(&block_group->tree_lock); |
@@ -2149,8 +2243,99 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | |||
2149 | spin_lock_init(&cluster->refill_lock); | 2243 | spin_lock_init(&cluster->refill_lock); |
2150 | cluster->root = RB_ROOT; | 2244 | cluster->root = RB_ROOT; |
2151 | cluster->max_size = 0; | 2245 | cluster->max_size = 0; |
2152 | cluster->points_to_bitmap = false; | ||
2153 | INIT_LIST_HEAD(&cluster->block_group_list); | 2246 | INIT_LIST_HEAD(&cluster->block_group_list); |
2154 | cluster->block_group = NULL; | 2247 | cluster->block_group = NULL; |
2155 | } | 2248 | } |
2156 | 2249 | ||
2250 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | ||
2251 | u64 *trimmed, u64 start, u64 end, u64 minlen) | ||
2252 | { | ||
2253 | struct btrfs_free_space *entry = NULL; | ||
2254 | struct btrfs_fs_info *fs_info = block_group->fs_info; | ||
2255 | u64 bytes = 0; | ||
2256 | u64 actually_trimmed; | ||
2257 | int ret = 0; | ||
2258 | |||
2259 | *trimmed = 0; | ||
2260 | |||
2261 | while (start < end) { | ||
2262 | spin_lock(&block_group->tree_lock); | ||
2263 | |||
2264 | if (block_group->free_space < minlen) { | ||
2265 | spin_unlock(&block_group->tree_lock); | ||
2266 | break; | ||
2267 | } | ||
2268 | |||
2269 | entry = tree_search_offset(block_group, start, 0, 1); | ||
2270 | if (!entry) | ||
2271 | entry = tree_search_offset(block_group, | ||
2272 | offset_to_bitmap(block_group, | ||
2273 | start), | ||
2274 | 1, 1); | ||
2275 | |||
2276 | if (!entry || entry->offset >= end) { | ||
2277 | spin_unlock(&block_group->tree_lock); | ||
2278 | break; | ||
2279 | } | ||
2280 | |||
2281 | if (entry->bitmap) { | ||
2282 | ret = search_bitmap(block_group, entry, &start, &bytes); | ||
2283 | if (!ret) { | ||
2284 | if (start >= end) { | ||
2285 | spin_unlock(&block_group->tree_lock); | ||
2286 | break; | ||
2287 | } | ||
2288 | bytes = min(bytes, end - start); | ||
2289 | bitmap_clear_bits(block_group, entry, | ||
2290 | start, bytes); | ||
2291 | if (entry->bytes == 0) | ||
2292 | free_bitmap(block_group, entry); | ||
2293 | } else { | ||
2294 | start = entry->offset + BITS_PER_BITMAP * | ||
2295 | block_group->sectorsize; | ||
2296 | spin_unlock(&block_group->tree_lock); | ||
2297 | ret = 0; | ||
2298 | continue; | ||
2299 | } | ||
2300 | } else { | ||
2301 | start = entry->offset; | ||
2302 | bytes = min(entry->bytes, end - start); | ||
2303 | unlink_free_space(block_group, entry); | ||
2304 | kfree(entry); | ||
2305 | } | ||
2306 | |||
2307 | spin_unlock(&block_group->tree_lock); | ||
2308 | |||
2309 | if (bytes >= minlen) { | ||
2310 | int update_ret; | ||
2311 | update_ret = btrfs_update_reserved_bytes(block_group, | ||
2312 | bytes, 1, 1); | ||
2313 | |||
2314 | ret = btrfs_error_discard_extent(fs_info->extent_root, | ||
2315 | start, | ||
2316 | bytes, | ||
2317 | &actually_trimmed); | ||
2318 | |||
2319 | btrfs_add_free_space(block_group, | ||
2320 | start, bytes); | ||
2321 | if (!update_ret) | ||
2322 | btrfs_update_reserved_bytes(block_group, | ||
2323 | bytes, 0, 1); | ||
2324 | |||
2325 | if (ret) | ||
2326 | break; | ||
2327 | *trimmed += actually_trimmed; | ||
2328 | } | ||
2329 | start += bytes; | ||
2330 | bytes = 0; | ||
2331 | |||
2332 | if (fatal_signal_pending(current)) { | ||
2333 | ret = -ERESTARTSYS; | ||
2334 | break; | ||
2335 | } | ||
2336 | |||
2337 | cond_resched(); | ||
2338 | } | ||
2339 | |||
2340 | return ret; | ||
2341 | } | ||
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index e49ca5c321b5..65c3b935289f 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h | |||
@@ -68,4 +68,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
68 | int btrfs_return_cluster_to_free_space( | 68 | int btrfs_return_cluster_to_free_space( |
69 | struct btrfs_block_group_cache *block_group, | 69 | struct btrfs_block_group_cache *block_group, |
70 | struct btrfs_free_cluster *cluster); | 70 | struct btrfs_free_cluster *cluster); |
71 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | ||
72 | u64 *trimmed, u64 start, u64 end, u64 minlen); | ||
71 | #endif | 73 | #endif |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index c56eb5909172..c05a08f4c411 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -30,7 +30,8 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) | |||
30 | int slot; | 30 | int slot; |
31 | 31 | ||
32 | path = btrfs_alloc_path(); | 32 | path = btrfs_alloc_path(); |
33 | BUG_ON(!path); | 33 | if (!path) |
34 | return -ENOMEM; | ||
34 | 35 | ||
35 | search_key.objectid = BTRFS_LAST_FREE_OBJECTID; | 36 | search_key.objectid = BTRFS_LAST_FREE_OBJECTID; |
36 | search_key.type = -1; | 37 | search_key.type = -1; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 512c3d1da083..fcd66b6a8086 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include "tree-log.h" | 50 | #include "tree-log.h" |
51 | #include "compression.h" | 51 | #include "compression.h" |
52 | #include "locking.h" | 52 | #include "locking.h" |
53 | #include "free-space-cache.h" | ||
53 | 54 | ||
54 | struct btrfs_iget_args { | 55 | struct btrfs_iget_args { |
55 | u64 ino; | 56 | u64 ino; |
@@ -70,6 +71,7 @@ static struct kmem_cache *btrfs_inode_cachep; | |||
70 | struct kmem_cache *btrfs_trans_handle_cachep; | 71 | struct kmem_cache *btrfs_trans_handle_cachep; |
71 | struct kmem_cache *btrfs_transaction_cachep; | 72 | struct kmem_cache *btrfs_transaction_cachep; |
72 | struct kmem_cache *btrfs_path_cachep; | 73 | struct kmem_cache *btrfs_path_cachep; |
74 | struct kmem_cache *btrfs_free_space_cachep; | ||
73 | 75 | ||
74 | #define S_SHIFT 12 | 76 | #define S_SHIFT 12 |
75 | static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | 77 | static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { |
@@ -82,7 +84,8 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | |||
82 | [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, | 84 | [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, |
83 | }; | 85 | }; |
84 | 86 | ||
85 | static void btrfs_truncate(struct inode *inode); | 87 | static int btrfs_setsize(struct inode *inode, loff_t newsize); |
88 | static int btrfs_truncate(struct inode *inode); | ||
86 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); | 89 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); |
87 | static noinline int cow_file_range(struct inode *inode, | 90 | static noinline int cow_file_range(struct inode *inode, |
88 | struct page *locked_page, | 91 | struct page *locked_page, |
@@ -109,6 +112,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, | |||
109 | static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | 112 | static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, |
110 | struct btrfs_root *root, struct inode *inode, | 113 | struct btrfs_root *root, struct inode *inode, |
111 | u64 start, size_t size, size_t compressed_size, | 114 | u64 start, size_t size, size_t compressed_size, |
115 | int compress_type, | ||
112 | struct page **compressed_pages) | 116 | struct page **compressed_pages) |
113 | { | 117 | { |
114 | struct btrfs_key key; | 118 | struct btrfs_key key; |
@@ -123,12 +127,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
123 | size_t cur_size = size; | 127 | size_t cur_size = size; |
124 | size_t datasize; | 128 | size_t datasize; |
125 | unsigned long offset; | 129 | unsigned long offset; |
126 | int compress_type = BTRFS_COMPRESS_NONE; | ||
127 | 130 | ||
128 | if (compressed_size && compressed_pages) { | 131 | if (compressed_size && compressed_pages) |
129 | compress_type = root->fs_info->compress_type; | ||
130 | cur_size = compressed_size; | 132 | cur_size = compressed_size; |
131 | } | ||
132 | 133 | ||
133 | path = btrfs_alloc_path(); | 134 | path = btrfs_alloc_path(); |
134 | if (!path) | 135 | if (!path) |
@@ -218,7 +219,7 @@ fail: | |||
218 | static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | 219 | static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, |
219 | struct btrfs_root *root, | 220 | struct btrfs_root *root, |
220 | struct inode *inode, u64 start, u64 end, | 221 | struct inode *inode, u64 start, u64 end, |
221 | size_t compressed_size, | 222 | size_t compressed_size, int compress_type, |
222 | struct page **compressed_pages) | 223 | struct page **compressed_pages) |
223 | { | 224 | { |
224 | u64 isize = i_size_read(inode); | 225 | u64 isize = i_size_read(inode); |
@@ -251,7 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
251 | inline_len = min_t(u64, isize, actual_end); | 252 | inline_len = min_t(u64, isize, actual_end); |
252 | ret = insert_inline_extent(trans, root, inode, start, | 253 | ret = insert_inline_extent(trans, root, inode, start, |
253 | inline_len, compressed_size, | 254 | inline_len, compressed_size, |
254 | compressed_pages); | 255 | compress_type, compressed_pages); |
255 | BUG_ON(ret); | 256 | BUG_ON(ret); |
256 | btrfs_delalloc_release_metadata(inode, end + 1 - start); | 257 | btrfs_delalloc_release_metadata(inode, end + 1 - start); |
257 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); | 258 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
@@ -288,6 +289,7 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
288 | struct async_extent *async_extent; | 289 | struct async_extent *async_extent; |
289 | 290 | ||
290 | async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); | 291 | async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); |
292 | BUG_ON(!async_extent); | ||
291 | async_extent->start = start; | 293 | async_extent->start = start; |
292 | async_extent->ram_size = ram_size; | 294 | async_extent->ram_size = ram_size; |
293 | async_extent->compressed_size = compressed_size; | 295 | async_extent->compressed_size = compressed_size; |
@@ -382,9 +384,11 @@ again: | |||
382 | */ | 384 | */ |
383 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && | 385 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && |
384 | (btrfs_test_opt(root, COMPRESS) || | 386 | (btrfs_test_opt(root, COMPRESS) || |
385 | (BTRFS_I(inode)->force_compress))) { | 387 | (BTRFS_I(inode)->force_compress) || |
388 | (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { | ||
386 | WARN_ON(pages); | 389 | WARN_ON(pages); |
387 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 390 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
391 | BUG_ON(!pages); | ||
388 | 392 | ||
389 | if (BTRFS_I(inode)->force_compress) | 393 | if (BTRFS_I(inode)->force_compress) |
390 | compress_type = BTRFS_I(inode)->force_compress; | 394 | compress_type = BTRFS_I(inode)->force_compress; |
@@ -427,12 +431,13 @@ again: | |||
427 | * to make an uncompressed inline extent. | 431 | * to make an uncompressed inline extent. |
428 | */ | 432 | */ |
429 | ret = cow_file_range_inline(trans, root, inode, | 433 | ret = cow_file_range_inline(trans, root, inode, |
430 | start, end, 0, NULL); | 434 | start, end, 0, 0, NULL); |
431 | } else { | 435 | } else { |
432 | /* try making a compressed inline extent */ | 436 | /* try making a compressed inline extent */ |
433 | ret = cow_file_range_inline(trans, root, inode, | 437 | ret = cow_file_range_inline(trans, root, inode, |
434 | start, end, | 438 | start, end, |
435 | total_compressed, pages); | 439 | total_compressed, |
440 | compress_type, pages); | ||
436 | } | 441 | } |
437 | if (ret == 0) { | 442 | if (ret == 0) { |
438 | /* | 443 | /* |
@@ -786,7 +791,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
786 | if (start == 0) { | 791 | if (start == 0) { |
787 | /* lets try to make an inline extent */ | 792 | /* lets try to make an inline extent */ |
788 | ret = cow_file_range_inline(trans, root, inode, | 793 | ret = cow_file_range_inline(trans, root, inode, |
789 | start, end, 0, NULL); | 794 | start, end, 0, 0, NULL); |
790 | if (ret == 0) { | 795 | if (ret == 0) { |
791 | extent_clear_unlock_delalloc(inode, | 796 | extent_clear_unlock_delalloc(inode, |
792 | &BTRFS_I(inode)->io_tree, | 797 | &BTRFS_I(inode)->io_tree, |
@@ -1254,7 +1259,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1254 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1259 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1255 | page_started, 0, nr_written); | 1260 | page_started, 0, nr_written); |
1256 | else if (!btrfs_test_opt(root, COMPRESS) && | 1261 | else if (!btrfs_test_opt(root, COMPRESS) && |
1257 | !(BTRFS_I(inode)->force_compress)) | 1262 | !(BTRFS_I(inode)->force_compress) && |
1263 | !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) | ||
1258 | ret = cow_file_range(inode, locked_page, start, end, | 1264 | ret = cow_file_range(inode, locked_page, start, end, |
1259 | page_started, nr_written, 1); | 1265 | page_started, nr_written, 1); |
1260 | else | 1266 | else |
@@ -1461,8 +1467,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1461 | if (bio_flags & EXTENT_BIO_COMPRESSED) { | 1467 | if (bio_flags & EXTENT_BIO_COMPRESSED) { |
1462 | return btrfs_submit_compressed_read(inode, bio, | 1468 | return btrfs_submit_compressed_read(inode, bio, |
1463 | mirror_num, bio_flags); | 1469 | mirror_num, bio_flags); |
1464 | } else if (!skip_sum) | 1470 | } else if (!skip_sum) { |
1465 | btrfs_lookup_bio_sums(root, inode, bio, NULL); | 1471 | ret = btrfs_lookup_bio_sums(root, inode, bio, NULL); |
1472 | if (ret) | ||
1473 | return ret; | ||
1474 | } | ||
1466 | goto mapit; | 1475 | goto mapit; |
1467 | } else if (!skip_sum) { | 1476 | } else if (!skip_sum) { |
1468 | /* csum items have already been cloned */ | 1477 | /* csum items have already been cloned */ |
@@ -1761,9 +1770,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1761 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 1770 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
1762 | &ordered_extent->list); | 1771 | &ordered_extent->list); |
1763 | 1772 | ||
1764 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1773 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1765 | ret = btrfs_update_inode(trans, root, inode); | 1774 | if (!ret) { |
1766 | BUG_ON(ret); | 1775 | ret = btrfs_update_inode(trans, root, inode); |
1776 | BUG_ON(ret); | ||
1777 | } | ||
1778 | ret = 0; | ||
1767 | out: | 1779 | out: |
1768 | if (nolock) { | 1780 | if (nolock) { |
1769 | if (trans) | 1781 | if (trans) |
@@ -1785,6 +1797,8 @@ out: | |||
1785 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | 1797 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
1786 | struct extent_state *state, int uptodate) | 1798 | struct extent_state *state, int uptodate) |
1787 | { | 1799 | { |
1800 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); | ||
1801 | |||
1788 | ClearPagePrivate2(page); | 1802 | ClearPagePrivate2(page); |
1789 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | 1803 | return btrfs_finish_ordered_io(page->mapping->host, start, end); |
1790 | } | 1804 | } |
@@ -1895,10 +1909,10 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1895 | else | 1909 | else |
1896 | rw = READ; | 1910 | rw = READ; |
1897 | 1911 | ||
1898 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | 1912 | ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, |
1899 | failrec->last_mirror, | 1913 | failrec->last_mirror, |
1900 | failrec->bio_flags, 0); | 1914 | failrec->bio_flags, 0); |
1901 | return 0; | 1915 | return ret; |
1902 | } | 1916 | } |
1903 | 1917 | ||
1904 | /* | 1918 | /* |
@@ -2210,8 +2224,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2210 | insert = 1; | 2224 | insert = 1; |
2211 | #endif | 2225 | #endif |
2212 | insert = 1; | 2226 | insert = 1; |
2213 | } else { | ||
2214 | WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved); | ||
2215 | } | 2227 | } |
2216 | 2228 | ||
2217 | if (!BTRFS_I(inode)->orphan_meta_reserved) { | 2229 | if (!BTRFS_I(inode)->orphan_meta_reserved) { |
@@ -2282,7 +2294,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2282 | * this cleans up any orphans that may be left on the list from the last use | 2294 | * this cleans up any orphans that may be left on the list from the last use |
2283 | * of this root. | 2295 | * of this root. |
2284 | */ | 2296 | */ |
2285 | void btrfs_orphan_cleanup(struct btrfs_root *root) | 2297 | int btrfs_orphan_cleanup(struct btrfs_root *root) |
2286 | { | 2298 | { |
2287 | struct btrfs_path *path; | 2299 | struct btrfs_path *path; |
2288 | struct extent_buffer *leaf; | 2300 | struct extent_buffer *leaf; |
@@ -2292,10 +2304,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2292 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | 2304 | int ret = 0, nr_unlink = 0, nr_truncate = 0; |
2293 | 2305 | ||
2294 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) | 2306 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) |
2295 | return; | 2307 | return 0; |
2296 | 2308 | ||
2297 | path = btrfs_alloc_path(); | 2309 | path = btrfs_alloc_path(); |
2298 | BUG_ON(!path); | 2310 | if (!path) { |
2311 | ret = -ENOMEM; | ||
2312 | goto out; | ||
2313 | } | ||
2299 | path->reada = -1; | 2314 | path->reada = -1; |
2300 | 2315 | ||
2301 | key.objectid = BTRFS_ORPHAN_OBJECTID; | 2316 | key.objectid = BTRFS_ORPHAN_OBJECTID; |
@@ -2304,18 +2319,16 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2304 | 2319 | ||
2305 | while (1) { | 2320 | while (1) { |
2306 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 2321 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
2307 | if (ret < 0) { | 2322 | if (ret < 0) |
2308 | printk(KERN_ERR "Error searching slot for orphan: %d" | 2323 | goto out; |
2309 | "\n", ret); | ||
2310 | break; | ||
2311 | } | ||
2312 | 2324 | ||
2313 | /* | 2325 | /* |
2314 | * if ret == 0 means we found what we were searching for, which | 2326 | * if ret == 0 means we found what we were searching for, which |
2315 | * is weird, but possible, so only screw with path if we didnt | 2327 | * is weird, but possible, so only screw with path if we didn't |
2316 | * find the key and see if we have stuff that matches | 2328 | * find the key and see if we have stuff that matches |
2317 | */ | 2329 | */ |
2318 | if (ret > 0) { | 2330 | if (ret > 0) { |
2331 | ret = 0; | ||
2319 | if (path->slots[0] == 0) | 2332 | if (path->slots[0] == 0) |
2320 | break; | 2333 | break; |
2321 | path->slots[0]--; | 2334 | path->slots[0]--; |
@@ -2343,7 +2356,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2343 | found_key.type = BTRFS_INODE_ITEM_KEY; | 2356 | found_key.type = BTRFS_INODE_ITEM_KEY; |
2344 | found_key.offset = 0; | 2357 | found_key.offset = 0; |
2345 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); | 2358 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
2346 | BUG_ON(IS_ERR(inode)); | 2359 | if (IS_ERR(inode)) { |
2360 | ret = PTR_ERR(inode); | ||
2361 | goto out; | ||
2362 | } | ||
2347 | 2363 | ||
2348 | /* | 2364 | /* |
2349 | * add this inode to the orphan list so btrfs_orphan_del does | 2365 | * add this inode to the orphan list so btrfs_orphan_del does |
@@ -2361,7 +2377,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2361 | */ | 2377 | */ |
2362 | if (is_bad_inode(inode)) { | 2378 | if (is_bad_inode(inode)) { |
2363 | trans = btrfs_start_transaction(root, 0); | 2379 | trans = btrfs_start_transaction(root, 0); |
2364 | BUG_ON(IS_ERR(trans)); | 2380 | if (IS_ERR(trans)) { |
2381 | ret = PTR_ERR(trans); | ||
2382 | goto out; | ||
2383 | } | ||
2365 | btrfs_orphan_del(trans, inode); | 2384 | btrfs_orphan_del(trans, inode); |
2366 | btrfs_end_transaction(trans, root); | 2385 | btrfs_end_transaction(trans, root); |
2367 | iput(inode); | 2386 | iput(inode); |
@@ -2370,17 +2389,22 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2370 | 2389 | ||
2371 | /* if we have links, this was a truncate, lets do that */ | 2390 | /* if we have links, this was a truncate, lets do that */ |
2372 | if (inode->i_nlink) { | 2391 | if (inode->i_nlink) { |
2392 | if (!S_ISREG(inode->i_mode)) { | ||
2393 | WARN_ON(1); | ||
2394 | iput(inode); | ||
2395 | continue; | ||
2396 | } | ||
2373 | nr_truncate++; | 2397 | nr_truncate++; |
2374 | btrfs_truncate(inode); | 2398 | ret = btrfs_truncate(inode); |
2375 | } else { | 2399 | } else { |
2376 | nr_unlink++; | 2400 | nr_unlink++; |
2377 | } | 2401 | } |
2378 | 2402 | ||
2379 | /* this will do delete_inode and everything for us */ | 2403 | /* this will do delete_inode and everything for us */ |
2380 | iput(inode); | 2404 | iput(inode); |
2405 | if (ret) | ||
2406 | goto out; | ||
2381 | } | 2407 | } |
2382 | btrfs_free_path(path); | ||
2383 | |||
2384 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; | 2408 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; |
2385 | 2409 | ||
2386 | if (root->orphan_block_rsv) | 2410 | if (root->orphan_block_rsv) |
@@ -2389,14 +2413,20 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2389 | 2413 | ||
2390 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | 2414 | if (root->orphan_block_rsv || root->orphan_item_inserted) { |
2391 | trans = btrfs_join_transaction(root, 1); | 2415 | trans = btrfs_join_transaction(root, 1); |
2392 | BUG_ON(IS_ERR(trans)); | 2416 | if (!IS_ERR(trans)) |
2393 | btrfs_end_transaction(trans, root); | 2417 | btrfs_end_transaction(trans, root); |
2394 | } | 2418 | } |
2395 | 2419 | ||
2396 | if (nr_unlink) | 2420 | if (nr_unlink) |
2397 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); | 2421 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); |
2398 | if (nr_truncate) | 2422 | if (nr_truncate) |
2399 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); | 2423 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); |
2424 | |||
2425 | out: | ||
2426 | if (ret) | ||
2427 | printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret); | ||
2428 | btrfs_free_path(path); | ||
2429 | return ret; | ||
2400 | } | 2430 | } |
2401 | 2431 | ||
2402 | /* | 2432 | /* |
@@ -2563,6 +2593,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2563 | struct btrfs_inode_item *item, | 2593 | struct btrfs_inode_item *item, |
2564 | struct inode *inode) | 2594 | struct inode *inode) |
2565 | { | 2595 | { |
2596 | if (!leaf->map_token) | ||
2597 | map_private_extent_buffer(leaf, (unsigned long)item, | ||
2598 | sizeof(struct btrfs_inode_item), | ||
2599 | &leaf->map_token, &leaf->kaddr, | ||
2600 | &leaf->map_start, &leaf->map_len, | ||
2601 | KM_USER1); | ||
2602 | |||
2566 | btrfs_set_inode_uid(leaf, item, inode->i_uid); | 2603 | btrfs_set_inode_uid(leaf, item, inode->i_uid); |
2567 | btrfs_set_inode_gid(leaf, item, inode->i_gid); | 2604 | btrfs_set_inode_gid(leaf, item, inode->i_gid); |
2568 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); | 2605 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); |
@@ -2591,6 +2628,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2591 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | 2628 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); |
2592 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | 2629 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); |
2593 | btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); | 2630 | btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); |
2631 | |||
2632 | if (leaf->map_token) { | ||
2633 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
2634 | leaf->map_token = NULL; | ||
2635 | } | ||
2594 | } | 2636 | } |
2595 | 2637 | ||
2596 | /* | 2638 | /* |
@@ -2635,10 +2677,10 @@ failed: | |||
2635 | * recovery code. It remove a link in a directory with a given name, and | 2677 | * recovery code. It remove a link in a directory with a given name, and |
2636 | * also drops the back refs in the inode to the directory | 2678 | * also drops the back refs in the inode to the directory |
2637 | */ | 2679 | */ |
2638 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | 2680 | static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, |
2639 | struct btrfs_root *root, | 2681 | struct btrfs_root *root, |
2640 | struct inode *dir, struct inode *inode, | 2682 | struct inode *dir, struct inode *inode, |
2641 | const char *name, int name_len) | 2683 | const char *name, int name_len) |
2642 | { | 2684 | { |
2643 | struct btrfs_path *path; | 2685 | struct btrfs_path *path; |
2644 | int ret = 0; | 2686 | int ret = 0; |
@@ -2710,12 +2752,25 @@ err: | |||
2710 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 2752 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
2711 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 2753 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
2712 | btrfs_update_inode(trans, root, dir); | 2754 | btrfs_update_inode(trans, root, dir); |
2713 | btrfs_drop_nlink(inode); | ||
2714 | ret = btrfs_update_inode(trans, root, inode); | ||
2715 | out: | 2755 | out: |
2716 | return ret; | 2756 | return ret; |
2717 | } | 2757 | } |
2718 | 2758 | ||
2759 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | ||
2760 | struct btrfs_root *root, | ||
2761 | struct inode *dir, struct inode *inode, | ||
2762 | const char *name, int name_len) | ||
2763 | { | ||
2764 | int ret; | ||
2765 | ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len); | ||
2766 | if (!ret) { | ||
2767 | btrfs_drop_nlink(inode); | ||
2768 | ret = btrfs_update_inode(trans, root, inode); | ||
2769 | } | ||
2770 | return ret; | ||
2771 | } | ||
2772 | |||
2773 | |||
2719 | /* helper to check if there is any shared block in the path */ | 2774 | /* helper to check if there is any shared block in the path */ |
2720 | static int check_path_shared(struct btrfs_root *root, | 2775 | static int check_path_shared(struct btrfs_root *root, |
2721 | struct btrfs_path *path) | 2776 | struct btrfs_path *path) |
@@ -3537,7 +3592,13 @@ out: | |||
3537 | return ret; | 3592 | return ret; |
3538 | } | 3593 | } |
3539 | 3594 | ||
3540 | int btrfs_cont_expand(struct inode *inode, loff_t size) | 3595 | /* |
3596 | * This function puts in dummy file extents for the area we're creating a hole | ||
3597 | * for. So if we are truncating this file to a larger size we need to insert | ||
3598 | * these file extents so that btrfs_get_extent will return a EXTENT_MAP_HOLE for | ||
3599 | * the range between oldsize and size | ||
3600 | */ | ||
3601 | int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | ||
3541 | { | 3602 | { |
3542 | struct btrfs_trans_handle *trans; | 3603 | struct btrfs_trans_handle *trans; |
3543 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3604 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -3545,7 +3606,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3545 | struct extent_map *em = NULL; | 3606 | struct extent_map *em = NULL; |
3546 | struct extent_state *cached_state = NULL; | 3607 | struct extent_state *cached_state = NULL; |
3547 | u64 mask = root->sectorsize - 1; | 3608 | u64 mask = root->sectorsize - 1; |
3548 | u64 hole_start = (inode->i_size + mask) & ~mask; | 3609 | u64 hole_start = (oldsize + mask) & ~mask; |
3549 | u64 block_end = (size + mask) & ~mask; | 3610 | u64 block_end = (size + mask) & ~mask; |
3550 | u64 last_byte; | 3611 | u64 last_byte; |
3551 | u64 cur_offset; | 3612 | u64 cur_offset; |
@@ -3590,13 +3651,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3590 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3651 | err = btrfs_drop_extents(trans, inode, cur_offset, |
3591 | cur_offset + hole_size, | 3652 | cur_offset + hole_size, |
3592 | &hint_byte, 1); | 3653 | &hint_byte, 1); |
3593 | BUG_ON(err); | 3654 | if (err) |
3655 | break; | ||
3594 | 3656 | ||
3595 | err = btrfs_insert_file_extent(trans, root, | 3657 | err = btrfs_insert_file_extent(trans, root, |
3596 | inode->i_ino, cur_offset, 0, | 3658 | inode->i_ino, cur_offset, 0, |
3597 | 0, hole_size, 0, hole_size, | 3659 | 0, hole_size, 0, hole_size, |
3598 | 0, 0, 0); | 3660 | 0, 0, 0); |
3599 | BUG_ON(err); | 3661 | if (err) |
3662 | break; | ||
3600 | 3663 | ||
3601 | btrfs_drop_extent_cache(inode, hole_start, | 3664 | btrfs_drop_extent_cache(inode, hole_start, |
3602 | last_byte - 1, 0); | 3665 | last_byte - 1, 0); |
@@ -3616,81 +3679,41 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3616 | return err; | 3679 | return err; |
3617 | } | 3680 | } |
3618 | 3681 | ||
3619 | static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | 3682 | static int btrfs_setsize(struct inode *inode, loff_t newsize) |
3620 | { | 3683 | { |
3621 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3684 | loff_t oldsize = i_size_read(inode); |
3622 | struct btrfs_trans_handle *trans; | ||
3623 | unsigned long nr; | ||
3624 | int ret; | 3685 | int ret; |
3625 | 3686 | ||
3626 | if (attr->ia_size == inode->i_size) | 3687 | if (newsize == oldsize) |
3627 | return 0; | 3688 | return 0; |
3628 | 3689 | ||
3629 | if (attr->ia_size > inode->i_size) { | 3690 | if (newsize > oldsize) { |
3630 | unsigned long limit; | 3691 | i_size_write(inode, newsize); |
3631 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | 3692 | btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); |
3632 | if (attr->ia_size > inode->i_sb->s_maxbytes) | 3693 | truncate_pagecache(inode, oldsize, newsize); |
3633 | return -EFBIG; | 3694 | ret = btrfs_cont_expand(inode, oldsize, newsize); |
3634 | if (limit != RLIM_INFINITY && attr->ia_size > limit) { | ||
3635 | send_sig(SIGXFSZ, current, 0); | ||
3636 | return -EFBIG; | ||
3637 | } | ||
3638 | } | ||
3639 | |||
3640 | trans = btrfs_start_transaction(root, 5); | ||
3641 | if (IS_ERR(trans)) | ||
3642 | return PTR_ERR(trans); | ||
3643 | |||
3644 | btrfs_set_trans_block_group(trans, inode); | ||
3645 | |||
3646 | ret = btrfs_orphan_add(trans, inode); | ||
3647 | BUG_ON(ret); | ||
3648 | |||
3649 | nr = trans->blocks_used; | ||
3650 | btrfs_end_transaction(trans, root); | ||
3651 | btrfs_btree_balance_dirty(root, nr); | ||
3652 | |||
3653 | if (attr->ia_size > inode->i_size) { | ||
3654 | ret = btrfs_cont_expand(inode, attr->ia_size); | ||
3655 | if (ret) { | 3695 | if (ret) { |
3656 | btrfs_truncate(inode); | 3696 | btrfs_setsize(inode, oldsize); |
3657 | return ret; | 3697 | return ret; |
3658 | } | 3698 | } |
3659 | 3699 | ||
3660 | i_size_write(inode, attr->ia_size); | 3700 | mark_inode_dirty(inode); |
3661 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 3701 | } else { |
3662 | 3702 | ||
3663 | trans = btrfs_start_transaction(root, 0); | 3703 | /* |
3664 | BUG_ON(IS_ERR(trans)); | 3704 | * We're truncating a file that used to have good data down to |
3665 | btrfs_set_trans_block_group(trans, inode); | 3705 | * zero. Make sure it gets into the ordered flush list so that |
3666 | trans->block_rsv = root->orphan_block_rsv; | 3706 | * any new writes get down to disk quickly. |
3667 | BUG_ON(!trans->block_rsv); | 3707 | */ |
3708 | if (newsize == 0) | ||
3709 | BTRFS_I(inode)->ordered_data_close = 1; | ||
3668 | 3710 | ||
3669 | ret = btrfs_update_inode(trans, root, inode); | 3711 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ |
3670 | BUG_ON(ret); | 3712 | truncate_setsize(inode, newsize); |
3671 | if (inode->i_nlink > 0) { | 3713 | ret = btrfs_truncate(inode); |
3672 | ret = btrfs_orphan_del(trans, inode); | ||
3673 | BUG_ON(ret); | ||
3674 | } | ||
3675 | nr = trans->blocks_used; | ||
3676 | btrfs_end_transaction(trans, root); | ||
3677 | btrfs_btree_balance_dirty(root, nr); | ||
3678 | return 0; | ||
3679 | } | 3714 | } |
3680 | 3715 | ||
3681 | /* | 3716 | return ret; |
3682 | * We're truncating a file that used to have good data down to | ||
3683 | * zero. Make sure it gets into the ordered flush list so that | ||
3684 | * any new writes get down to disk quickly. | ||
3685 | */ | ||
3686 | if (attr->ia_size == 0) | ||
3687 | BTRFS_I(inode)->ordered_data_close = 1; | ||
3688 | |||
3689 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ | ||
3690 | ret = vmtruncate(inode, attr->ia_size); | ||
3691 | BUG_ON(ret); | ||
3692 | |||
3693 | return 0; | ||
3694 | } | 3717 | } |
3695 | 3718 | ||
3696 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | 3719 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) |
@@ -3707,7 +3730,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3707 | return err; | 3730 | return err; |
3708 | 3731 | ||
3709 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { | 3732 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { |
3710 | err = btrfs_setattr_size(inode, attr); | 3733 | err = btrfs_setsize(inode, attr->ia_size); |
3711 | if (err) | 3734 | if (err) |
3712 | return err; | 3735 | return err; |
3713 | } | 3736 | } |
@@ -3730,6 +3753,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
3730 | unsigned long nr; | 3753 | unsigned long nr; |
3731 | int ret; | 3754 | int ret; |
3732 | 3755 | ||
3756 | trace_btrfs_inode_evict(inode); | ||
3757 | |||
3733 | truncate_inode_pages(&inode->i_data, 0); | 3758 | truncate_inode_pages(&inode->i_data, 0); |
3734 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || | 3759 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || |
3735 | root == root->fs_info->tree_root)) | 3760 | root == root->fs_info->tree_root)) |
@@ -4072,7 +4097,6 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
4072 | BTRFS_I(inode)->root = root; | 4097 | BTRFS_I(inode)->root = root; |
4073 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); | 4098 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); |
4074 | btrfs_read_locked_inode(inode); | 4099 | btrfs_read_locked_inode(inode); |
4075 | |||
4076 | inode_tree_add(inode); | 4100 | inode_tree_add(inode); |
4077 | unlock_new_inode(inode); | 4101 | unlock_new_inode(inode); |
4078 | if (new) | 4102 | if (new) |
@@ -4147,8 +4171,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
4147 | if (!IS_ERR(inode) && root != sub_root) { | 4171 | if (!IS_ERR(inode) && root != sub_root) { |
4148 | down_read(&root->fs_info->cleanup_work_sem); | 4172 | down_read(&root->fs_info->cleanup_work_sem); |
4149 | if (!(inode->i_sb->s_flags & MS_RDONLY)) | 4173 | if (!(inode->i_sb->s_flags & MS_RDONLY)) |
4150 | btrfs_orphan_cleanup(sub_root); | 4174 | ret = btrfs_orphan_cleanup(sub_root); |
4151 | up_read(&root->fs_info->cleanup_work_sem); | 4175 | up_read(&root->fs_info->cleanup_work_sem); |
4176 | if (ret) | ||
4177 | inode = ERR_PTR(ret); | ||
4152 | } | 4178 | } |
4153 | 4179 | ||
4154 | return inode; | 4180 | return inode; |
@@ -4196,10 +4222,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4196 | struct btrfs_key found_key; | 4222 | struct btrfs_key found_key; |
4197 | struct btrfs_path *path; | 4223 | struct btrfs_path *path; |
4198 | int ret; | 4224 | int ret; |
4199 | u32 nritems; | ||
4200 | struct extent_buffer *leaf; | 4225 | struct extent_buffer *leaf; |
4201 | int slot; | 4226 | int slot; |
4202 | int advance; | ||
4203 | unsigned char d_type; | 4227 | unsigned char d_type; |
4204 | int over = 0; | 4228 | int over = 0; |
4205 | u32 di_cur; | 4229 | u32 di_cur; |
@@ -4242,27 +4266,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4242 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 4266 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
4243 | if (ret < 0) | 4267 | if (ret < 0) |
4244 | goto err; | 4268 | goto err; |
4245 | advance = 0; | ||
4246 | 4269 | ||
4247 | while (1) { | 4270 | while (1) { |
4248 | leaf = path->nodes[0]; | 4271 | leaf = path->nodes[0]; |
4249 | nritems = btrfs_header_nritems(leaf); | ||
4250 | slot = path->slots[0]; | 4272 | slot = path->slots[0]; |
4251 | if (advance || slot >= nritems) { | 4273 | if (slot >= btrfs_header_nritems(leaf)) { |
4252 | if (slot >= nritems - 1) { | 4274 | ret = btrfs_next_leaf(root, path); |
4253 | ret = btrfs_next_leaf(root, path); | 4275 | if (ret < 0) |
4254 | if (ret) | 4276 | goto err; |
4255 | break; | 4277 | else if (ret > 0) |
4256 | leaf = path->nodes[0]; | 4278 | break; |
4257 | nritems = btrfs_header_nritems(leaf); | 4279 | continue; |
4258 | slot = path->slots[0]; | ||
4259 | } else { | ||
4260 | slot++; | ||
4261 | path->slots[0]++; | ||
4262 | } | ||
4263 | } | 4280 | } |
4264 | 4281 | ||
4265 | advance = 1; | ||
4266 | item = btrfs_item_nr(leaf, slot); | 4282 | item = btrfs_item_nr(leaf, slot); |
4267 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 4283 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
4268 | 4284 | ||
@@ -4271,7 +4287,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4271 | if (btrfs_key_type(&found_key) != key_type) | 4287 | if (btrfs_key_type(&found_key) != key_type) |
4272 | break; | 4288 | break; |
4273 | if (found_key.offset < filp->f_pos) | 4289 | if (found_key.offset < filp->f_pos) |
4274 | continue; | 4290 | goto next; |
4275 | 4291 | ||
4276 | filp->f_pos = found_key.offset; | 4292 | filp->f_pos = found_key.offset; |
4277 | 4293 | ||
@@ -4282,6 +4298,9 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4282 | while (di_cur < di_total) { | 4298 | while (di_cur < di_total) { |
4283 | struct btrfs_key location; | 4299 | struct btrfs_key location; |
4284 | 4300 | ||
4301 | if (verify_dir_item(root, leaf, di)) | ||
4302 | break; | ||
4303 | |||
4285 | name_len = btrfs_dir_name_len(leaf, di); | 4304 | name_len = btrfs_dir_name_len(leaf, di); |
4286 | if (name_len <= sizeof(tmp_name)) { | 4305 | if (name_len <= sizeof(tmp_name)) { |
4287 | name_ptr = tmp_name; | 4306 | name_ptr = tmp_name; |
@@ -4321,6 +4340,8 @@ skip: | |||
4321 | di_cur += di_len; | 4340 | di_cur += di_len; |
4322 | di = (struct btrfs_dir_item *)((char *)di + di_len); | 4341 | di = (struct btrfs_dir_item *)((char *)di + di_len); |
4323 | } | 4342 | } |
4343 | next: | ||
4344 | path->slots[0]++; | ||
4324 | } | 4345 | } |
4325 | 4346 | ||
4326 | /* Reached end of directory/root. Bump pos past the last item. */ | 4347 | /* Reached end of directory/root. Bump pos past the last item. */ |
@@ -4513,12 +4534,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4513 | BUG_ON(!path); | 4534 | BUG_ON(!path); |
4514 | 4535 | ||
4515 | inode = new_inode(root->fs_info->sb); | 4536 | inode = new_inode(root->fs_info->sb); |
4516 | if (!inode) | 4537 | if (!inode) { |
4538 | btrfs_free_path(path); | ||
4517 | return ERR_PTR(-ENOMEM); | 4539 | return ERR_PTR(-ENOMEM); |
4540 | } | ||
4518 | 4541 | ||
4519 | if (dir) { | 4542 | if (dir) { |
4543 | trace_btrfs_inode_request(dir); | ||
4544 | |||
4520 | ret = btrfs_set_inode_index(dir, index); | 4545 | ret = btrfs_set_inode_index(dir, index); |
4521 | if (ret) { | 4546 | if (ret) { |
4547 | btrfs_free_path(path); | ||
4522 | iput(inode); | 4548 | iput(inode); |
4523 | return ERR_PTR(ret); | 4549 | return ERR_PTR(ret); |
4524 | } | 4550 | } |
@@ -4585,12 +4611,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4585 | if ((mode & S_IFREG)) { | 4611 | if ((mode & S_IFREG)) { |
4586 | if (btrfs_test_opt(root, NODATASUM)) | 4612 | if (btrfs_test_opt(root, NODATASUM)) |
4587 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; | 4613 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; |
4588 | if (btrfs_test_opt(root, NODATACOW)) | 4614 | if (btrfs_test_opt(root, NODATACOW) || |
4615 | (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW)) | ||
4589 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; | 4616 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; |
4590 | } | 4617 | } |
4591 | 4618 | ||
4592 | insert_inode_hash(inode); | 4619 | insert_inode_hash(inode); |
4593 | inode_tree_add(inode); | 4620 | inode_tree_add(inode); |
4621 | |||
4622 | trace_btrfs_inode_new(inode); | ||
4623 | |||
4594 | return inode; | 4624 | return inode; |
4595 | fail: | 4625 | fail: |
4596 | if (dir) | 4626 | if (dir) |
@@ -4809,10 +4839,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4809 | 4839 | ||
4810 | /* do not allow sys_link's with other subvols of the same device */ | 4840 | /* do not allow sys_link's with other subvols of the same device */ |
4811 | if (root->objectid != BTRFS_I(inode)->root->objectid) | 4841 | if (root->objectid != BTRFS_I(inode)->root->objectid) |
4812 | return -EPERM; | 4842 | return -EXDEV; |
4813 | 4843 | ||
4814 | btrfs_inc_nlink(inode); | 4844 | if (inode->i_nlink == ~0U) |
4815 | inode->i_ctime = CURRENT_TIME; | 4845 | return -EMLINK; |
4816 | 4846 | ||
4817 | err = btrfs_set_inode_index(dir, &index); | 4847 | err = btrfs_set_inode_index(dir, &index); |
4818 | if (err) | 4848 | if (err) |
@@ -4829,6 +4859,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4829 | goto fail; | 4859 | goto fail; |
4830 | } | 4860 | } |
4831 | 4861 | ||
4862 | btrfs_inc_nlink(inode); | ||
4863 | inode->i_ctime = CURRENT_TIME; | ||
4864 | |||
4832 | btrfs_set_trans_block_group(trans, dir); | 4865 | btrfs_set_trans_block_group(trans, dir); |
4833 | ihold(inode); | 4866 | ihold(inode); |
4834 | 4867 | ||
@@ -5198,7 +5231,7 @@ again: | |||
5198 | btrfs_mark_buffer_dirty(leaf); | 5231 | btrfs_mark_buffer_dirty(leaf); |
5199 | } | 5232 | } |
5200 | set_extent_uptodate(io_tree, em->start, | 5233 | set_extent_uptodate(io_tree, em->start, |
5201 | extent_map_end(em) - 1, GFP_NOFS); | 5234 | extent_map_end(em) - 1, NULL, GFP_NOFS); |
5202 | goto insert; | 5235 | goto insert; |
5203 | } else { | 5236 | } else { |
5204 | printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); | 5237 | printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); |
@@ -5265,6 +5298,9 @@ insert: | |||
5265 | } | 5298 | } |
5266 | write_unlock(&em_tree->lock); | 5299 | write_unlock(&em_tree->lock); |
5267 | out: | 5300 | out: |
5301 | |||
5302 | trace_btrfs_get_extent(root, em); | ||
5303 | |||
5268 | if (path) | 5304 | if (path) |
5269 | btrfs_free_path(path); | 5305 | btrfs_free_path(path); |
5270 | if (trans) { | 5306 | if (trans) { |
@@ -5402,17 +5438,30 @@ out: | |||
5402 | } | 5438 | } |
5403 | 5439 | ||
5404 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | 5440 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, |
5441 | struct extent_map *em, | ||
5405 | u64 start, u64 len) | 5442 | u64 start, u64 len) |
5406 | { | 5443 | { |
5407 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5444 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5408 | struct btrfs_trans_handle *trans; | 5445 | struct btrfs_trans_handle *trans; |
5409 | struct extent_map *em; | ||
5410 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 5446 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
5411 | struct btrfs_key ins; | 5447 | struct btrfs_key ins; |
5412 | u64 alloc_hint; | 5448 | u64 alloc_hint; |
5413 | int ret; | 5449 | int ret; |
5450 | bool insert = false; | ||
5414 | 5451 | ||
5415 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | 5452 | /* |
5453 | * Ok if the extent map we looked up is a hole and is for the exact | ||
5454 | * range we want, there is no reason to allocate a new one, however if | ||
5455 | * it is not right then we need to free this one and drop the cache for | ||
5456 | * our range. | ||
5457 | */ | ||
5458 | if (em->block_start != EXTENT_MAP_HOLE || em->start != start || | ||
5459 | em->len != len) { | ||
5460 | free_extent_map(em); | ||
5461 | em = NULL; | ||
5462 | insert = true; | ||
5463 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | ||
5464 | } | ||
5416 | 5465 | ||
5417 | trans = btrfs_join_transaction(root, 0); | 5466 | trans = btrfs_join_transaction(root, 0); |
5418 | if (IS_ERR(trans)) | 5467 | if (IS_ERR(trans)) |
@@ -5428,10 +5477,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
5428 | goto out; | 5477 | goto out; |
5429 | } | 5478 | } |
5430 | 5479 | ||
5431 | em = alloc_extent_map(GFP_NOFS); | ||
5432 | if (!em) { | 5480 | if (!em) { |
5433 | em = ERR_PTR(-ENOMEM); | 5481 | em = alloc_extent_map(GFP_NOFS); |
5434 | goto out; | 5482 | if (!em) { |
5483 | em = ERR_PTR(-ENOMEM); | ||
5484 | goto out; | ||
5485 | } | ||
5435 | } | 5486 | } |
5436 | 5487 | ||
5437 | em->start = start; | 5488 | em->start = start; |
@@ -5441,9 +5492,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
5441 | em->block_start = ins.objectid; | 5492 | em->block_start = ins.objectid; |
5442 | em->block_len = ins.offset; | 5493 | em->block_len = ins.offset; |
5443 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 5494 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
5495 | |||
5496 | /* | ||
5497 | * We need to do this because if we're using the original em we searched | ||
5498 | * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that. | ||
5499 | */ | ||
5500 | em->flags = 0; | ||
5444 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 5501 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
5445 | 5502 | ||
5446 | while (1) { | 5503 | while (insert) { |
5447 | write_lock(&em_tree->lock); | 5504 | write_lock(&em_tree->lock); |
5448 | ret = add_extent_mapping(em_tree, em); | 5505 | ret = add_extent_mapping(em_tree, em); |
5449 | write_unlock(&em_tree->lock); | 5506 | write_unlock(&em_tree->lock); |
@@ -5661,8 +5718,7 @@ must_cow: | |||
5661 | * it above | 5718 | * it above |
5662 | */ | 5719 | */ |
5663 | len = bh_result->b_size; | 5720 | len = bh_result->b_size; |
5664 | free_extent_map(em); | 5721 | em = btrfs_new_extent_direct(inode, em, start, len); |
5665 | em = btrfs_new_extent_direct(inode, start, len); | ||
5666 | if (IS_ERR(em)) | 5722 | if (IS_ERR(em)) |
5667 | return PTR_ERR(em); | 5723 | return PTR_ERR(em); |
5668 | len = min(len, em->len - (start - em->start)); | 5724 | len = min(len, em->len - (start - em->start)); |
@@ -5748,6 +5804,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
5748 | 5804 | ||
5749 | kfree(dip->csums); | 5805 | kfree(dip->csums); |
5750 | kfree(dip); | 5806 | kfree(dip); |
5807 | |||
5808 | /* If we had a csum failure make sure to clear the uptodate flag */ | ||
5809 | if (err) | ||
5810 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
5751 | dio_end_io(bio, err); | 5811 | dio_end_io(bio, err); |
5752 | } | 5812 | } |
5753 | 5813 | ||
@@ -5821,8 +5881,10 @@ again: | |||
5821 | } | 5881 | } |
5822 | 5882 | ||
5823 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | 5883 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); |
5824 | btrfs_ordered_update_i_size(inode, 0, ordered); | 5884 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); |
5825 | btrfs_update_inode(trans, root, inode); | 5885 | if (!ret) |
5886 | btrfs_update_inode(trans, root, inode); | ||
5887 | ret = 0; | ||
5826 | out_unlock: | 5888 | out_unlock: |
5827 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, | 5889 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, |
5828 | ordered->file_offset + ordered->len - 1, | 5890 | ordered->file_offset + ordered->len - 1, |
@@ -5849,6 +5911,10 @@ out_done: | |||
5849 | 5911 | ||
5850 | kfree(dip->csums); | 5912 | kfree(dip->csums); |
5851 | kfree(dip); | 5913 | kfree(dip); |
5914 | |||
5915 | /* If we had an error make sure to clear the uptodate flag */ | ||
5916 | if (err) | ||
5917 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
5852 | dio_end_io(bio, err); | 5918 | dio_end_io(bio, err); |
5853 | } | 5919 | } |
5854 | 5920 | ||
@@ -5904,7 +5970,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, | |||
5904 | 5970 | ||
5905 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | 5971 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, |
5906 | int rw, u64 file_offset, int skip_sum, | 5972 | int rw, u64 file_offset, int skip_sum, |
5907 | u32 *csums) | 5973 | u32 *csums, int async_submit) |
5908 | { | 5974 | { |
5909 | int write = rw & REQ_WRITE; | 5975 | int write = rw & REQ_WRITE; |
5910 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5976 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -5915,18 +5981,33 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
5915 | if (ret) | 5981 | if (ret) |
5916 | goto err; | 5982 | goto err; |
5917 | 5983 | ||
5918 | if (write && !skip_sum) { | 5984 | if (skip_sum) |
5985 | goto map; | ||
5986 | |||
5987 | if (write && async_submit) { | ||
5919 | ret = btrfs_wq_submit_bio(root->fs_info, | 5988 | ret = btrfs_wq_submit_bio(root->fs_info, |
5920 | inode, rw, bio, 0, 0, | 5989 | inode, rw, bio, 0, 0, |
5921 | file_offset, | 5990 | file_offset, |
5922 | __btrfs_submit_bio_start_direct_io, | 5991 | __btrfs_submit_bio_start_direct_io, |
5923 | __btrfs_submit_bio_done); | 5992 | __btrfs_submit_bio_done); |
5924 | goto err; | 5993 | goto err; |
5925 | } else if (!skip_sum) | 5994 | } else if (write) { |
5926 | btrfs_lookup_bio_sums_dio(root, inode, bio, | 5995 | /* |
5996 | * If we aren't doing async submit, calculate the csum of the | ||
5997 | * bio now. | ||
5998 | */ | ||
5999 | ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1); | ||
6000 | if (ret) | ||
6001 | goto err; | ||
6002 | } else if (!skip_sum) { | ||
6003 | ret = btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5927 | file_offset, csums); | 6004 | file_offset, csums); |
6005 | if (ret) | ||
6006 | goto err; | ||
6007 | } | ||
5928 | 6008 | ||
5929 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | 6009 | map: |
6010 | ret = btrfs_map_bio(root, rw, bio, 0, async_submit); | ||
5930 | err: | 6011 | err: |
5931 | bio_put(bio); | 6012 | bio_put(bio); |
5932 | return ret; | 6013 | return ret; |
@@ -5948,13 +6029,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
5948 | int nr_pages = 0; | 6029 | int nr_pages = 0; |
5949 | u32 *csums = dip->csums; | 6030 | u32 *csums = dip->csums; |
5950 | int ret = 0; | 6031 | int ret = 0; |
5951 | 6032 | int async_submit = 0; | |
5952 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); | 6033 | int write = rw & REQ_WRITE; |
5953 | if (!bio) | ||
5954 | return -ENOMEM; | ||
5955 | bio->bi_private = dip; | ||
5956 | bio->bi_end_io = btrfs_end_dio_bio; | ||
5957 | atomic_inc(&dip->pending_bios); | ||
5958 | 6034 | ||
5959 | map_length = orig_bio->bi_size; | 6035 | map_length = orig_bio->bi_size; |
5960 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | 6036 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, |
@@ -5964,6 +6040,19 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
5964 | return -EIO; | 6040 | return -EIO; |
5965 | } | 6041 | } |
5966 | 6042 | ||
6043 | if (map_length >= orig_bio->bi_size) { | ||
6044 | bio = orig_bio; | ||
6045 | goto submit; | ||
6046 | } | ||
6047 | |||
6048 | async_submit = 1; | ||
6049 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); | ||
6050 | if (!bio) | ||
6051 | return -ENOMEM; | ||
6052 | bio->bi_private = dip; | ||
6053 | bio->bi_end_io = btrfs_end_dio_bio; | ||
6054 | atomic_inc(&dip->pending_bios); | ||
6055 | |||
5967 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { | 6056 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { |
5968 | if (unlikely(map_length < submit_len + bvec->bv_len || | 6057 | if (unlikely(map_length < submit_len + bvec->bv_len || |
5969 | bio_add_page(bio, bvec->bv_page, bvec->bv_len, | 6058 | bio_add_page(bio, bvec->bv_page, bvec->bv_len, |
@@ -5977,14 +6066,15 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
5977 | atomic_inc(&dip->pending_bios); | 6066 | atomic_inc(&dip->pending_bios); |
5978 | ret = __btrfs_submit_dio_bio(bio, inode, rw, | 6067 | ret = __btrfs_submit_dio_bio(bio, inode, rw, |
5979 | file_offset, skip_sum, | 6068 | file_offset, skip_sum, |
5980 | csums); | 6069 | csums, async_submit); |
5981 | if (ret) { | 6070 | if (ret) { |
5982 | bio_put(bio); | 6071 | bio_put(bio); |
5983 | atomic_dec(&dip->pending_bios); | 6072 | atomic_dec(&dip->pending_bios); |
5984 | goto out_err; | 6073 | goto out_err; |
5985 | } | 6074 | } |
5986 | 6075 | ||
5987 | if (!skip_sum) | 6076 | /* Write's use the ordered csums */ |
6077 | if (!write && !skip_sum) | ||
5988 | csums = csums + nr_pages; | 6078 | csums = csums + nr_pages; |
5989 | start_sector += submit_len >> 9; | 6079 | start_sector += submit_len >> 9; |
5990 | file_offset += submit_len; | 6080 | file_offset += submit_len; |
@@ -6013,8 +6103,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
6013 | } | 6103 | } |
6014 | } | 6104 | } |
6015 | 6105 | ||
6106 | submit: | ||
6016 | ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, | 6107 | ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, |
6017 | csums); | 6108 | csums, async_submit); |
6018 | if (!ret) | 6109 | if (!ret) |
6019 | return 0; | 6110 | return 0; |
6020 | 6111 | ||
@@ -6052,7 +6143,8 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | |||
6052 | } | 6143 | } |
6053 | dip->csums = NULL; | 6144 | dip->csums = NULL; |
6054 | 6145 | ||
6055 | if (!skip_sum) { | 6146 | /* Write's use the ordered csum stuff, so we don't need dip->csums */ |
6147 | if (!write && !skip_sum) { | ||
6056 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); | 6148 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); |
6057 | if (!dip->csums) { | 6149 | if (!dip->csums) { |
6058 | kfree(dip); | 6150 | kfree(dip); |
@@ -6108,6 +6200,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io | |||
6108 | unsigned long nr_segs) | 6200 | unsigned long nr_segs) |
6109 | { | 6201 | { |
6110 | int seg; | 6202 | int seg; |
6203 | int i; | ||
6111 | size_t size; | 6204 | size_t size; |
6112 | unsigned long addr; | 6205 | unsigned long addr; |
6113 | unsigned blocksize_mask = root->sectorsize - 1; | 6206 | unsigned blocksize_mask = root->sectorsize - 1; |
@@ -6122,8 +6215,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io | |||
6122 | addr = (unsigned long)iov[seg].iov_base; | 6215 | addr = (unsigned long)iov[seg].iov_base; |
6123 | size = iov[seg].iov_len; | 6216 | size = iov[seg].iov_len; |
6124 | end += size; | 6217 | end += size; |
6125 | if ((addr & blocksize_mask) || (size & blocksize_mask)) | 6218 | if ((addr & blocksize_mask) || (size & blocksize_mask)) |
6126 | goto out; | 6219 | goto out; |
6220 | |||
6221 | /* If this is a write we don't need to check anymore */ | ||
6222 | if (rw & WRITE) | ||
6223 | continue; | ||
6224 | |||
6225 | /* | ||
6226 | * Check to make sure we don't have duplicate iov_base's in this | ||
6227 | * iovec, if so return EINVAL, otherwise we'll get csum errors | ||
6228 | * when reading back. | ||
6229 | */ | ||
6230 | for (i = seg + 1; i < nr_segs; i++) { | ||
6231 | if (iov[seg].iov_base == iov[i].iov_base) | ||
6232 | goto out; | ||
6233 | } | ||
6127 | } | 6234 | } |
6128 | retval = 0; | 6235 | retval = 0; |
6129 | out: | 6236 | out: |
@@ -6474,28 +6581,42 @@ out: | |||
6474 | return ret; | 6581 | return ret; |
6475 | } | 6582 | } |
6476 | 6583 | ||
6477 | static void btrfs_truncate(struct inode *inode) | 6584 | static int btrfs_truncate(struct inode *inode) |
6478 | { | 6585 | { |
6479 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6586 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6480 | int ret; | 6587 | int ret; |
6588 | int err = 0; | ||
6481 | struct btrfs_trans_handle *trans; | 6589 | struct btrfs_trans_handle *trans; |
6482 | unsigned long nr; | 6590 | unsigned long nr; |
6483 | u64 mask = root->sectorsize - 1; | 6591 | u64 mask = root->sectorsize - 1; |
6484 | 6592 | ||
6485 | if (!S_ISREG(inode->i_mode)) { | ||
6486 | WARN_ON(1); | ||
6487 | return; | ||
6488 | } | ||
6489 | |||
6490 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); | 6593 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); |
6491 | if (ret) | 6594 | if (ret) |
6492 | return; | 6595 | return ret; |
6493 | 6596 | ||
6494 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 6597 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
6495 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 6598 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
6496 | 6599 | ||
6600 | trans = btrfs_start_transaction(root, 5); | ||
6601 | if (IS_ERR(trans)) | ||
6602 | return PTR_ERR(trans); | ||
6603 | |||
6604 | btrfs_set_trans_block_group(trans, inode); | ||
6605 | |||
6606 | ret = btrfs_orphan_add(trans, inode); | ||
6607 | if (ret) { | ||
6608 | btrfs_end_transaction(trans, root); | ||
6609 | return ret; | ||
6610 | } | ||
6611 | |||
6612 | nr = trans->blocks_used; | ||
6613 | btrfs_end_transaction(trans, root); | ||
6614 | btrfs_btree_balance_dirty(root, nr); | ||
6615 | |||
6616 | /* Now start a transaction for the truncate */ | ||
6497 | trans = btrfs_start_transaction(root, 0); | 6617 | trans = btrfs_start_transaction(root, 0); |
6498 | BUG_ON(IS_ERR(trans)); | 6618 | if (IS_ERR(trans)) |
6619 | return PTR_ERR(trans); | ||
6499 | btrfs_set_trans_block_group(trans, inode); | 6620 | btrfs_set_trans_block_group(trans, inode); |
6500 | trans->block_rsv = root->orphan_block_rsv; | 6621 | trans->block_rsv = root->orphan_block_rsv; |
6501 | 6622 | ||
@@ -6522,29 +6643,38 @@ static void btrfs_truncate(struct inode *inode) | |||
6522 | while (1) { | 6643 | while (1) { |
6523 | if (!trans) { | 6644 | if (!trans) { |
6524 | trans = btrfs_start_transaction(root, 0); | 6645 | trans = btrfs_start_transaction(root, 0); |
6525 | BUG_ON(IS_ERR(trans)); | 6646 | if (IS_ERR(trans)) |
6647 | return PTR_ERR(trans); | ||
6526 | btrfs_set_trans_block_group(trans, inode); | 6648 | btrfs_set_trans_block_group(trans, inode); |
6527 | trans->block_rsv = root->orphan_block_rsv; | 6649 | trans->block_rsv = root->orphan_block_rsv; |
6528 | } | 6650 | } |
6529 | 6651 | ||
6530 | ret = btrfs_block_rsv_check(trans, root, | 6652 | ret = btrfs_block_rsv_check(trans, root, |
6531 | root->orphan_block_rsv, 0, 5); | 6653 | root->orphan_block_rsv, 0, 5); |
6532 | if (ret) { | 6654 | if (ret == -EAGAIN) { |
6533 | BUG_ON(ret != -EAGAIN); | ||
6534 | ret = btrfs_commit_transaction(trans, root); | 6655 | ret = btrfs_commit_transaction(trans, root); |
6535 | BUG_ON(ret); | 6656 | if (ret) |
6657 | return ret; | ||
6536 | trans = NULL; | 6658 | trans = NULL; |
6537 | continue; | 6659 | continue; |
6660 | } else if (ret) { | ||
6661 | err = ret; | ||
6662 | break; | ||
6538 | } | 6663 | } |
6539 | 6664 | ||
6540 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6665 | ret = btrfs_truncate_inode_items(trans, root, inode, |
6541 | inode->i_size, | 6666 | inode->i_size, |
6542 | BTRFS_EXTENT_DATA_KEY); | 6667 | BTRFS_EXTENT_DATA_KEY); |
6543 | if (ret != -EAGAIN) | 6668 | if (ret != -EAGAIN) { |
6669 | err = ret; | ||
6544 | break; | 6670 | break; |
6671 | } | ||
6545 | 6672 | ||
6546 | ret = btrfs_update_inode(trans, root, inode); | 6673 | ret = btrfs_update_inode(trans, root, inode); |
6547 | BUG_ON(ret); | 6674 | if (ret) { |
6675 | err = ret; | ||
6676 | break; | ||
6677 | } | ||
6548 | 6678 | ||
6549 | nr = trans->blocks_used; | 6679 | nr = trans->blocks_used; |
6550 | btrfs_end_transaction(trans, root); | 6680 | btrfs_end_transaction(trans, root); |
@@ -6554,16 +6684,27 @@ static void btrfs_truncate(struct inode *inode) | |||
6554 | 6684 | ||
6555 | if (ret == 0 && inode->i_nlink > 0) { | 6685 | if (ret == 0 && inode->i_nlink > 0) { |
6556 | ret = btrfs_orphan_del(trans, inode); | 6686 | ret = btrfs_orphan_del(trans, inode); |
6557 | BUG_ON(ret); | 6687 | if (ret) |
6688 | err = ret; | ||
6689 | } else if (ret && inode->i_nlink > 0) { | ||
6690 | /* | ||
6691 | * Failed to do the truncate, remove us from the in memory | ||
6692 | * orphan list. | ||
6693 | */ | ||
6694 | ret = btrfs_orphan_del(NULL, inode); | ||
6558 | } | 6695 | } |
6559 | 6696 | ||
6560 | ret = btrfs_update_inode(trans, root, inode); | 6697 | ret = btrfs_update_inode(trans, root, inode); |
6561 | BUG_ON(ret); | 6698 | if (ret && !err) |
6699 | err = ret; | ||
6562 | 6700 | ||
6563 | nr = trans->blocks_used; | 6701 | nr = trans->blocks_used; |
6564 | ret = btrfs_end_transaction_throttle(trans, root); | 6702 | ret = btrfs_end_transaction_throttle(trans, root); |
6565 | BUG_ON(ret); | 6703 | if (ret && !err) |
6704 | err = ret; | ||
6566 | btrfs_btree_balance_dirty(root, nr); | 6705 | btrfs_btree_balance_dirty(root, nr); |
6706 | |||
6707 | return err; | ||
6567 | } | 6708 | } |
6568 | 6709 | ||
6569 | /* | 6710 | /* |
@@ -6630,9 +6771,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6630 | ei->index_cnt = (u64)-1; | 6771 | ei->index_cnt = (u64)-1; |
6631 | ei->last_unlink_trans = 0; | 6772 | ei->last_unlink_trans = 0; |
6632 | 6773 | ||
6633 | spin_lock_init(&ei->accounting_lock); | ||
6634 | atomic_set(&ei->outstanding_extents, 0); | 6774 | atomic_set(&ei->outstanding_extents, 0); |
6635 | ei->reserved_extents = 0; | 6775 | atomic_set(&ei->reserved_extents, 0); |
6636 | 6776 | ||
6637 | ei->ordered_data_close = 0; | 6777 | ei->ordered_data_close = 0; |
6638 | ei->orphan_meta_reserved = 0; | 6778 | ei->orphan_meta_reserved = 0; |
@@ -6668,7 +6808,7 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6668 | WARN_ON(!list_empty(&inode->i_dentry)); | 6808 | WARN_ON(!list_empty(&inode->i_dentry)); |
6669 | WARN_ON(inode->i_data.nrpages); | 6809 | WARN_ON(inode->i_data.nrpages); |
6670 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | 6810 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); |
6671 | WARN_ON(BTRFS_I(inode)->reserved_extents); | 6811 | WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); |
6672 | 6812 | ||
6673 | /* | 6813 | /* |
6674 | * This can happen where we create an inode, but somebody else also | 6814 | * This can happen where we create an inode, but somebody else also |
@@ -6760,6 +6900,8 @@ void btrfs_destroy_cachep(void) | |||
6760 | kmem_cache_destroy(btrfs_transaction_cachep); | 6900 | kmem_cache_destroy(btrfs_transaction_cachep); |
6761 | if (btrfs_path_cachep) | 6901 | if (btrfs_path_cachep) |
6762 | kmem_cache_destroy(btrfs_path_cachep); | 6902 | kmem_cache_destroy(btrfs_path_cachep); |
6903 | if (btrfs_free_space_cachep) | ||
6904 | kmem_cache_destroy(btrfs_free_space_cachep); | ||
6763 | } | 6905 | } |
6764 | 6906 | ||
6765 | int btrfs_init_cachep(void) | 6907 | int btrfs_init_cachep(void) |
@@ -6788,6 +6930,12 @@ int btrfs_init_cachep(void) | |||
6788 | if (!btrfs_path_cachep) | 6930 | if (!btrfs_path_cachep) |
6789 | goto fail; | 6931 | goto fail; |
6790 | 6932 | ||
6933 | btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache", | ||
6934 | sizeof(struct btrfs_free_space), 0, | ||
6935 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
6936 | if (!btrfs_free_space_cachep) | ||
6937 | goto fail; | ||
6938 | |||
6791 | return 0; | 6939 | return 0; |
6792 | fail: | 6940 | fail: |
6793 | btrfs_destroy_cachep(); | 6941 | btrfs_destroy_cachep(); |
@@ -6806,6 +6954,26 @@ static int btrfs_getattr(struct vfsmount *mnt, | |||
6806 | return 0; | 6954 | return 0; |
6807 | } | 6955 | } |
6808 | 6956 | ||
6957 | /* | ||
6958 | * If a file is moved, it will inherit the cow and compression flags of the new | ||
6959 | * directory. | ||
6960 | */ | ||
6961 | static void fixup_inode_flags(struct inode *dir, struct inode *inode) | ||
6962 | { | ||
6963 | struct btrfs_inode *b_dir = BTRFS_I(dir); | ||
6964 | struct btrfs_inode *b_inode = BTRFS_I(inode); | ||
6965 | |||
6966 | if (b_dir->flags & BTRFS_INODE_NODATACOW) | ||
6967 | b_inode->flags |= BTRFS_INODE_NODATACOW; | ||
6968 | else | ||
6969 | b_inode->flags &= ~BTRFS_INODE_NODATACOW; | ||
6970 | |||
6971 | if (b_dir->flags & BTRFS_INODE_COMPRESS) | ||
6972 | b_inode->flags |= BTRFS_INODE_COMPRESS; | ||
6973 | else | ||
6974 | b_inode->flags &= ~BTRFS_INODE_COMPRESS; | ||
6975 | } | ||
6976 | |||
6809 | static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | 6977 | static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, |
6810 | struct inode *new_dir, struct dentry *new_dentry) | 6978 | struct inode *new_dir, struct dentry *new_dentry) |
6811 | { | 6979 | { |
@@ -6854,8 +7022,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6854 | * should cover the worst case number of items we'll modify. | 7022 | * should cover the worst case number of items we'll modify. |
6855 | */ | 7023 | */ |
6856 | trans = btrfs_start_transaction(root, 20); | 7024 | trans = btrfs_start_transaction(root, 20); |
6857 | if (IS_ERR(trans)) | 7025 | if (IS_ERR(trans)) { |
6858 | return PTR_ERR(trans); | 7026 | ret = PTR_ERR(trans); |
7027 | goto out_notrans; | ||
7028 | } | ||
6859 | 7029 | ||
6860 | btrfs_set_trans_block_group(trans, new_dir); | 7030 | btrfs_set_trans_block_group(trans, new_dir); |
6861 | 7031 | ||
@@ -6908,11 +7078,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6908 | old_dentry->d_name.name, | 7078 | old_dentry->d_name.name, |
6909 | old_dentry->d_name.len); | 7079 | old_dentry->d_name.len); |
6910 | } else { | 7080 | } else { |
6911 | btrfs_inc_nlink(old_dentry->d_inode); | 7081 | ret = __btrfs_unlink_inode(trans, root, old_dir, |
6912 | ret = btrfs_unlink_inode(trans, root, old_dir, | 7082 | old_dentry->d_inode, |
6913 | old_dentry->d_inode, | 7083 | old_dentry->d_name.name, |
6914 | old_dentry->d_name.name, | 7084 | old_dentry->d_name.len); |
6915 | old_dentry->d_name.len); | 7085 | if (!ret) |
7086 | ret = btrfs_update_inode(trans, root, old_inode); | ||
6916 | } | 7087 | } |
6917 | BUG_ON(ret); | 7088 | BUG_ON(ret); |
6918 | 7089 | ||
@@ -6939,6 +7110,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6939 | } | 7110 | } |
6940 | } | 7111 | } |
6941 | 7112 | ||
7113 | fixup_inode_flags(new_dir, old_inode); | ||
7114 | |||
6942 | ret = btrfs_add_link(trans, new_dir, old_inode, | 7115 | ret = btrfs_add_link(trans, new_dir, old_inode, |
6943 | new_dentry->d_name.name, | 7116 | new_dentry->d_name.name, |
6944 | new_dentry->d_name.len, 0, index); | 7117 | new_dentry->d_name.len, 0, index); |
@@ -6952,7 +7125,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6952 | } | 7125 | } |
6953 | out_fail: | 7126 | out_fail: |
6954 | btrfs_end_transaction_throttle(trans, root); | 7127 | btrfs_end_transaction_throttle(trans, root); |
6955 | 7128 | out_notrans: | |
6956 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 7129 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
6957 | up_read(&root->fs_info->subvol_sem); | 7130 | up_read(&root->fs_info->subvol_sem); |
6958 | 7131 | ||
@@ -7340,7 +7513,6 @@ static const struct address_space_operations btrfs_aops = { | |||
7340 | .writepage = btrfs_writepage, | 7513 | .writepage = btrfs_writepage, |
7341 | .writepages = btrfs_writepages, | 7514 | .writepages = btrfs_writepages, |
7342 | .readpages = btrfs_readpages, | 7515 | .readpages = btrfs_readpages, |
7343 | .sync_page = block_sync_page, | ||
7344 | .direct_IO = btrfs_direct_IO, | 7516 | .direct_IO = btrfs_direct_IO, |
7345 | .invalidatepage = btrfs_invalidatepage, | 7517 | .invalidatepage = btrfs_invalidatepage, |
7346 | .releasepage = btrfs_releasepage, | 7518 | .releasepage = btrfs_releasepage, |
@@ -7356,7 +7528,6 @@ static const struct address_space_operations btrfs_symlink_aops = { | |||
7356 | }; | 7528 | }; |
7357 | 7529 | ||
7358 | static const struct inode_operations btrfs_file_inode_operations = { | 7530 | static const struct inode_operations btrfs_file_inode_operations = { |
7359 | .truncate = btrfs_truncate, | ||
7360 | .getattr = btrfs_getattr, | 7531 | .getattr = btrfs_getattr, |
7361 | .setattr = btrfs_setattr, | 7532 | .setattr = btrfs_setattr, |
7362 | .setxattr = btrfs_setxattr, | 7533 | .setxattr = btrfs_setxattr, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5fdb2abc4fa7..ffb48d6c5433 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/xattr.h> | 40 | #include <linux/xattr.h> |
41 | #include <linux/vmalloc.h> | 41 | #include <linux/vmalloc.h> |
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/blkdev.h> | ||
43 | #include "compat.h" | 44 | #include "compat.h" |
44 | #include "ctree.h" | 45 | #include "ctree.h" |
45 | #include "disk-io.h" | 46 | #include "disk-io.h" |
@@ -138,6 +139,24 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg) | |||
138 | return 0; | 139 | return 0; |
139 | } | 140 | } |
140 | 141 | ||
142 | static int check_flags(unsigned int flags) | ||
143 | { | ||
144 | if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ | ||
145 | FS_NOATIME_FL | FS_NODUMP_FL | \ | ||
146 | FS_SYNC_FL | FS_DIRSYNC_FL | \ | ||
147 | FS_NOCOMP_FL | FS_COMPR_FL | \ | ||
148 | FS_NOCOW_FL | FS_COW_FL)) | ||
149 | return -EOPNOTSUPP; | ||
150 | |||
151 | if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) | ||
152 | return -EINVAL; | ||
153 | |||
154 | if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL)) | ||
155 | return -EINVAL; | ||
156 | |||
157 | return 0; | ||
158 | } | ||
159 | |||
141 | static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | 160 | static int btrfs_ioctl_setflags(struct file *file, void __user *arg) |
142 | { | 161 | { |
143 | struct inode *inode = file->f_path.dentry->d_inode; | 162 | struct inode *inode = file->f_path.dentry->d_inode; |
@@ -153,12 +172,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
153 | if (copy_from_user(&flags, arg, sizeof(flags))) | 172 | if (copy_from_user(&flags, arg, sizeof(flags))) |
154 | return -EFAULT; | 173 | return -EFAULT; |
155 | 174 | ||
156 | if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ | 175 | ret = check_flags(flags); |
157 | FS_NOATIME_FL | FS_NODUMP_FL | \ | 176 | if (ret) |
158 | FS_SYNC_FL | FS_DIRSYNC_FL)) | 177 | return ret; |
159 | return -EOPNOTSUPP; | ||
160 | 178 | ||
161 | if (!is_owner_or_cap(inode)) | 179 | if (!inode_owner_or_capable(inode)) |
162 | return -EACCES; | 180 | return -EACCES; |
163 | 181 | ||
164 | mutex_lock(&inode->i_mutex); | 182 | mutex_lock(&inode->i_mutex); |
@@ -201,6 +219,22 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
201 | else | 219 | else |
202 | ip->flags &= ~BTRFS_INODE_DIRSYNC; | 220 | ip->flags &= ~BTRFS_INODE_DIRSYNC; |
203 | 221 | ||
222 | /* | ||
223 | * The COMPRESS flag can only be changed by users, while the NOCOMPRESS | ||
224 | * flag may be changed automatically if compression code won't make | ||
225 | * things smaller. | ||
226 | */ | ||
227 | if (flags & FS_NOCOMP_FL) { | ||
228 | ip->flags &= ~BTRFS_INODE_COMPRESS; | ||
229 | ip->flags |= BTRFS_INODE_NOCOMPRESS; | ||
230 | } else if (flags & FS_COMPR_FL) { | ||
231 | ip->flags |= BTRFS_INODE_COMPRESS; | ||
232 | ip->flags &= ~BTRFS_INODE_NOCOMPRESS; | ||
233 | } | ||
234 | if (flags & FS_NOCOW_FL) | ||
235 | ip->flags |= BTRFS_INODE_NODATACOW; | ||
236 | else if (flags & FS_COW_FL) | ||
237 | ip->flags &= ~BTRFS_INODE_NODATACOW; | ||
204 | 238 | ||
205 | trans = btrfs_join_transaction(root, 1); | 239 | trans = btrfs_join_transaction(root, 1); |
206 | BUG_ON(IS_ERR(trans)); | 240 | BUG_ON(IS_ERR(trans)); |
@@ -213,9 +247,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
213 | btrfs_end_transaction(trans, root); | 247 | btrfs_end_transaction(trans, root); |
214 | 248 | ||
215 | mnt_drop_write(file->f_path.mnt); | 249 | mnt_drop_write(file->f_path.mnt); |
250 | |||
251 | ret = 0; | ||
216 | out_unlock: | 252 | out_unlock: |
217 | mutex_unlock(&inode->i_mutex); | 253 | mutex_unlock(&inode->i_mutex); |
218 | return 0; | 254 | return ret; |
219 | } | 255 | } |
220 | 256 | ||
221 | static int btrfs_ioctl_getversion(struct file *file, int __user *arg) | 257 | static int btrfs_ioctl_getversion(struct file *file, int __user *arg) |
@@ -225,6 +261,49 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg) | |||
225 | return put_user(inode->i_generation, arg); | 261 | return put_user(inode->i_generation, arg); |
226 | } | 262 | } |
227 | 263 | ||
264 | static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) | ||
265 | { | ||
266 | struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info; | ||
267 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
268 | struct btrfs_device *device; | ||
269 | struct request_queue *q; | ||
270 | struct fstrim_range range; | ||
271 | u64 minlen = ULLONG_MAX; | ||
272 | u64 num_devices = 0; | ||
273 | int ret; | ||
274 | |||
275 | if (!capable(CAP_SYS_ADMIN)) | ||
276 | return -EPERM; | ||
277 | |||
278 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | ||
279 | list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { | ||
280 | if (!device->bdev) | ||
281 | continue; | ||
282 | q = bdev_get_queue(device->bdev); | ||
283 | if (blk_queue_discard(q)) { | ||
284 | num_devices++; | ||
285 | minlen = min((u64)q->limits.discard_granularity, | ||
286 | minlen); | ||
287 | } | ||
288 | } | ||
289 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
290 | if (!num_devices) | ||
291 | return -EOPNOTSUPP; | ||
292 | |||
293 | if (copy_from_user(&range, arg, sizeof(range))) | ||
294 | return -EFAULT; | ||
295 | |||
296 | range.minlen = max(range.minlen, minlen); | ||
297 | ret = btrfs_trim_fs(root, &range); | ||
298 | if (ret < 0) | ||
299 | return ret; | ||
300 | |||
301 | if (copy_to_user(arg, &range, sizeof(range))) | ||
302 | return -EFAULT; | ||
303 | |||
304 | return 0; | ||
305 | } | ||
306 | |||
228 | static noinline int create_subvol(struct btrfs_root *root, | 307 | static noinline int create_subvol(struct btrfs_root *root, |
229 | struct dentry *dentry, | 308 | struct dentry *dentry, |
230 | char *name, int namelen, | 309 | char *name, int namelen, |
@@ -294,6 +373,10 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
294 | inode_item->nbytes = cpu_to_le64(root->leafsize); | 373 | inode_item->nbytes = cpu_to_le64(root->leafsize); |
295 | inode_item->mode = cpu_to_le32(S_IFDIR | 0755); | 374 | inode_item->mode = cpu_to_le32(S_IFDIR | 0755); |
296 | 375 | ||
376 | root_item.flags = 0; | ||
377 | root_item.byte_limit = 0; | ||
378 | inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT); | ||
379 | |||
297 | btrfs_set_root_bytenr(&root_item, leaf->start); | 380 | btrfs_set_root_bytenr(&root_item, leaf->start); |
298 | btrfs_set_root_generation(&root_item, trans->transid); | 381 | btrfs_set_root_generation(&root_item, trans->transid); |
299 | btrfs_set_root_level(&root_item, 0); | 382 | btrfs_set_root_level(&root_item, 0); |
@@ -409,7 +492,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
409 | if (ret) | 492 | if (ret) |
410 | goto fail; | 493 | goto fail; |
411 | 494 | ||
412 | btrfs_orphan_cleanup(pending_snapshot->snap); | 495 | ret = btrfs_orphan_cleanup(pending_snapshot->snap); |
496 | if (ret) | ||
497 | goto fail; | ||
413 | 498 | ||
414 | parent = dget_parent(dentry); | 499 | parent = dget_parent(dentry); |
415 | inode = btrfs_lookup_dentry(parent->d_inode, dentry); | 500 | inode = btrfs_lookup_dentry(parent->d_inode, dentry); |
@@ -1077,7 +1162,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, | |||
1077 | if (flags & ~BTRFS_SUBVOL_RDONLY) | 1162 | if (flags & ~BTRFS_SUBVOL_RDONLY) |
1078 | return -EOPNOTSUPP; | 1163 | return -EOPNOTSUPP; |
1079 | 1164 | ||
1080 | if (!is_owner_or_cap(inode)) | 1165 | if (!inode_owner_or_capable(inode)) |
1081 | return -EACCES; | 1166 | return -EACCES; |
1082 | 1167 | ||
1083 | down_write(&root->fs_info->subvol_sem); | 1168 | down_write(&root->fs_info->subvol_sem); |
@@ -2202,7 +2287,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
2202 | struct btrfs_ioctl_space_info space; | 2287 | struct btrfs_ioctl_space_info space; |
2203 | struct btrfs_ioctl_space_info *dest; | 2288 | struct btrfs_ioctl_space_info *dest; |
2204 | struct btrfs_ioctl_space_info *dest_orig; | 2289 | struct btrfs_ioctl_space_info *dest_orig; |
2205 | struct btrfs_ioctl_space_info *user_dest; | 2290 | struct btrfs_ioctl_space_info __user *user_dest; |
2206 | struct btrfs_space_info *info; | 2291 | struct btrfs_space_info *info; |
2207 | u64 types[] = {BTRFS_BLOCK_GROUP_DATA, | 2292 | u64 types[] = {BTRFS_BLOCK_GROUP_DATA, |
2208 | BTRFS_BLOCK_GROUP_SYSTEM, | 2293 | BTRFS_BLOCK_GROUP_SYSTEM, |
@@ -2348,12 +2433,17 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp | |||
2348 | struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; | 2433 | struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; |
2349 | struct btrfs_trans_handle *trans; | 2434 | struct btrfs_trans_handle *trans; |
2350 | u64 transid; | 2435 | u64 transid; |
2436 | int ret; | ||
2351 | 2437 | ||
2352 | trans = btrfs_start_transaction(root, 0); | 2438 | trans = btrfs_start_transaction(root, 0); |
2353 | if (IS_ERR(trans)) | 2439 | if (IS_ERR(trans)) |
2354 | return PTR_ERR(trans); | 2440 | return PTR_ERR(trans); |
2355 | transid = trans->transid; | 2441 | transid = trans->transid; |
2356 | btrfs_commit_transaction_async(trans, root, 0); | 2442 | ret = btrfs_commit_transaction_async(trans, root, 0); |
2443 | if (ret) { | ||
2444 | btrfs_end_transaction(trans, root); | ||
2445 | return ret; | ||
2446 | } | ||
2357 | 2447 | ||
2358 | if (argp) | 2448 | if (argp) |
2359 | if (copy_to_user(argp, &transid, sizeof(transid))) | 2449 | if (copy_to_user(argp, &transid, sizeof(transid))) |
@@ -2388,6 +2478,8 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
2388 | return btrfs_ioctl_setflags(file, argp); | 2478 | return btrfs_ioctl_setflags(file, argp); |
2389 | case FS_IOC_GETVERSION: | 2479 | case FS_IOC_GETVERSION: |
2390 | return btrfs_ioctl_getversion(file, argp); | 2480 | return btrfs_ioctl_getversion(file, argp); |
2481 | case FITRIM: | ||
2482 | return btrfs_ioctl_fitrim(file, argp); | ||
2391 | case BTRFS_IOC_SNAP_CREATE: | 2483 | case BTRFS_IOC_SNAP_CREATE: |
2392 | return btrfs_ioctl_snap_create(file, argp, 0); | 2484 | return btrfs_ioctl_snap_create(file, argp, 0); |
2393 | case BTRFS_IOC_SNAP_CREATE_V2: | 2485 | case BTRFS_IOC_SNAP_CREATE_V2: |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 083a55477375..a1c940425307 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -202,6 +202,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
202 | INIT_LIST_HEAD(&entry->list); | 202 | INIT_LIST_HEAD(&entry->list); |
203 | INIT_LIST_HEAD(&entry->root_extent_list); | 203 | INIT_LIST_HEAD(&entry->root_extent_list); |
204 | 204 | ||
205 | trace_btrfs_ordered_extent_add(inode, entry); | ||
206 | |||
205 | spin_lock(&tree->lock); | 207 | spin_lock(&tree->lock); |
206 | node = tree_insert(&tree->tree, file_offset, | 208 | node = tree_insert(&tree->tree, file_offset, |
207 | &entry->rb_node); | 209 | &entry->rb_node); |
@@ -387,6 +389,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
387 | struct list_head *cur; | 389 | struct list_head *cur; |
388 | struct btrfs_ordered_sum *sum; | 390 | struct btrfs_ordered_sum *sum; |
389 | 391 | ||
392 | trace_btrfs_ordered_extent_put(entry->inode, entry); | ||
393 | |||
390 | if (atomic_dec_and_test(&entry->refs)) { | 394 | if (atomic_dec_and_test(&entry->refs)) { |
391 | while (!list_empty(&entry->list)) { | 395 | while (!list_empty(&entry->list)) { |
392 | cur = entry->list.next; | 396 | cur = entry->list.next; |
@@ -420,6 +424,8 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, | |||
420 | spin_lock(&root->fs_info->ordered_extent_lock); | 424 | spin_lock(&root->fs_info->ordered_extent_lock); |
421 | list_del_init(&entry->root_extent_list); | 425 | list_del_init(&entry->root_extent_list); |
422 | 426 | ||
427 | trace_btrfs_ordered_extent_remove(inode, entry); | ||
428 | |||
423 | /* | 429 | /* |
424 | * we have no more ordered extents for this inode and | 430 | * we have no more ordered extents for this inode and |
425 | * no dirty pages. We can safely remove it from the | 431 | * no dirty pages. We can safely remove it from the |
@@ -585,6 +591,8 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
585 | u64 start = entry->file_offset; | 591 | u64 start = entry->file_offset; |
586 | u64 end = start + entry->len - 1; | 592 | u64 end = start + entry->len - 1; |
587 | 593 | ||
594 | trace_btrfs_ordered_extent_start(inode, entry); | ||
595 | |||
588 | /* | 596 | /* |
589 | * pages in the range can be dirty, clean or writeback. We | 597 | * pages in the range can be dirty, clean or writeback. We |
590 | * start IO on any dirty ones so the wait doesn't stall waiting | 598 | * start IO on any dirty ones so the wait doesn't stall waiting |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 31ade5802ae8..199a80134312 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -1724,6 +1724,7 @@ again: | |||
1724 | 1724 | ||
1725 | eb = read_tree_block(dest, old_bytenr, blocksize, | 1725 | eb = read_tree_block(dest, old_bytenr, blocksize, |
1726 | old_ptr_gen); | 1726 | old_ptr_gen); |
1727 | BUG_ON(!eb); | ||
1727 | btrfs_tree_lock(eb); | 1728 | btrfs_tree_lock(eb); |
1728 | if (cow) { | 1729 | if (cow) { |
1729 | ret = btrfs_cow_block(trans, dest, eb, parent, | 1730 | ret = btrfs_cow_block(trans, dest, eb, parent, |
@@ -2345,7 +2346,7 @@ struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, | |||
2345 | root = next->root; | 2346 | root = next->root; |
2346 | BUG_ON(!root); | 2347 | BUG_ON(!root); |
2347 | 2348 | ||
2348 | /* no other choice for non-refernce counted tree */ | 2349 | /* no other choice for non-references counted tree */ |
2349 | if (!root->ref_cows) | 2350 | if (!root->ref_cows) |
2350 | return root; | 2351 | return root; |
2351 | 2352 | ||
@@ -2513,6 +2514,10 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2513 | blocksize = btrfs_level_size(root, node->level); | 2514 | blocksize = btrfs_level_size(root, node->level); |
2514 | generation = btrfs_node_ptr_generation(upper->eb, slot); | 2515 | generation = btrfs_node_ptr_generation(upper->eb, slot); |
2515 | eb = read_tree_block(root, bytenr, blocksize, generation); | 2516 | eb = read_tree_block(root, bytenr, blocksize, generation); |
2517 | if (!eb) { | ||
2518 | err = -EIO; | ||
2519 | goto next; | ||
2520 | } | ||
2516 | btrfs_tree_lock(eb); | 2521 | btrfs_tree_lock(eb); |
2517 | btrfs_set_lock_blocking(eb); | 2522 | btrfs_set_lock_blocking(eb); |
2518 | 2523 | ||
@@ -2670,6 +2675,7 @@ static int get_tree_block_key(struct reloc_control *rc, | |||
2670 | BUG_ON(block->key_ready); | 2675 | BUG_ON(block->key_ready); |
2671 | eb = read_tree_block(rc->extent_root, block->bytenr, | 2676 | eb = read_tree_block(rc->extent_root, block->bytenr, |
2672 | block->key.objectid, block->key.offset); | 2677 | block->key.objectid, block->key.offset); |
2678 | BUG_ON(!eb); | ||
2673 | WARN_ON(btrfs_header_level(eb) != block->level); | 2679 | WARN_ON(btrfs_header_level(eb) != block->level); |
2674 | if (block->level == 0) | 2680 | if (block->level == 0) |
2675 | btrfs_item_key_to_cpu(eb, &block->key, 0); | 2681 | btrfs_item_key_to_cpu(eb, &block->key, 0); |
@@ -4209,7 +4215,7 @@ out: | |||
4209 | if (IS_ERR(fs_root)) | 4215 | if (IS_ERR(fs_root)) |
4210 | err = PTR_ERR(fs_root); | 4216 | err = PTR_ERR(fs_root); |
4211 | else | 4217 | else |
4212 | btrfs_orphan_cleanup(fs_root); | 4218 | err = btrfs_orphan_cleanup(fs_root); |
4213 | } | 4219 | } |
4214 | return err; | 4220 | return err; |
4215 | } | 4221 | } |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 6a1086e83ffc..6928bff62daa 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -88,7 +88,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, | |||
88 | search_key.offset = (u64)-1; | 88 | search_key.offset = (u64)-1; |
89 | 89 | ||
90 | path = btrfs_alloc_path(); | 90 | path = btrfs_alloc_path(); |
91 | BUG_ON(!path); | 91 | if (!path) |
92 | return -ENOMEM; | ||
92 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); | 93 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); |
93 | if (ret < 0) | 94 | if (ret < 0) |
94 | goto out; | 95 | goto out; |
@@ -332,7 +333,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
332 | struct extent_buffer *leaf; | 333 | struct extent_buffer *leaf; |
333 | 334 | ||
334 | path = btrfs_alloc_path(); | 335 | path = btrfs_alloc_path(); |
335 | BUG_ON(!path); | 336 | if (!path) |
337 | return -ENOMEM; | ||
336 | ret = btrfs_search_slot(trans, root, key, path, -1, 1); | 338 | ret = btrfs_search_slot(trans, root, key, path, -1, 1); |
337 | if (ret < 0) | 339 | if (ret < 0) |
338 | goto out; | 340 | goto out; |
@@ -471,3 +473,21 @@ again: | |||
471 | btrfs_free_path(path); | 473 | btrfs_free_path(path); |
472 | return 0; | 474 | return 0; |
473 | } | 475 | } |
476 | |||
477 | /* | ||
478 | * Old btrfs forgets to init root_item->flags and root_item->byte_limit | ||
479 | * for subvolumes. To work around this problem, we steal a bit from | ||
480 | * root_item->inode_item->flags, and use it to indicate if those fields | ||
481 | * have been properly initialized. | ||
482 | */ | ||
483 | void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item) | ||
484 | { | ||
485 | u64 inode_flags = le64_to_cpu(root_item->inode.flags); | ||
486 | |||
487 | if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) { | ||
488 | inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT; | ||
489 | root_item->inode.flags = cpu_to_le64(inode_flags); | ||
490 | root_item->flags = 0; | ||
491 | root_item->byte_limit = 0; | ||
492 | } | ||
493 | } | ||
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d39a9895d932..0ac712efcdf2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -52,6 +52,9 @@ | |||
52 | #include "export.h" | 52 | #include "export.h" |
53 | #include "compression.h" | 53 | #include "compression.h" |
54 | 54 | ||
55 | #define CREATE_TRACE_POINTS | ||
56 | #include <trace/events/btrfs.h> | ||
57 | |||
55 | static const struct super_operations btrfs_super_ops; | 58 | static const struct super_operations btrfs_super_ops; |
56 | 59 | ||
57 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, | 60 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, |
@@ -156,7 +159,7 @@ enum { | |||
156 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, | 159 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, |
157 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, | 160 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
158 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, | 161 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, |
159 | Opt_enospc_debug, Opt_err, | 162 | Opt_enospc_debug, Opt_subvolrootid, Opt_err, |
160 | }; | 163 | }; |
161 | 164 | ||
162 | static match_table_t tokens = { | 165 | static match_table_t tokens = { |
@@ -186,6 +189,7 @@ static match_table_t tokens = { | |||
186 | {Opt_clear_cache, "clear_cache"}, | 189 | {Opt_clear_cache, "clear_cache"}, |
187 | {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, | 190 | {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, |
188 | {Opt_enospc_debug, "enospc_debug"}, | 191 | {Opt_enospc_debug, "enospc_debug"}, |
192 | {Opt_subvolrootid, "subvolrootid=%d"}, | ||
189 | {Opt_err, NULL}, | 193 | {Opt_err, NULL}, |
190 | }; | 194 | }; |
191 | 195 | ||
@@ -229,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
229 | break; | 233 | break; |
230 | case Opt_subvol: | 234 | case Opt_subvol: |
231 | case Opt_subvolid: | 235 | case Opt_subvolid: |
236 | case Opt_subvolrootid: | ||
232 | case Opt_device: | 237 | case Opt_device: |
233 | /* | 238 | /* |
234 | * These are parsed by btrfs_parse_early_options | 239 | * These are parsed by btrfs_parse_early_options |
@@ -385,7 +390,7 @@ out: | |||
385 | */ | 390 | */ |
386 | static int btrfs_parse_early_options(const char *options, fmode_t flags, | 391 | static int btrfs_parse_early_options(const char *options, fmode_t flags, |
387 | void *holder, char **subvol_name, u64 *subvol_objectid, | 392 | void *holder, char **subvol_name, u64 *subvol_objectid, |
388 | struct btrfs_fs_devices **fs_devices) | 393 | u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices) |
389 | { | 394 | { |
390 | substring_t args[MAX_OPT_ARGS]; | 395 | substring_t args[MAX_OPT_ARGS]; |
391 | char *opts, *orig, *p; | 396 | char *opts, *orig, *p; |
@@ -426,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
426 | *subvol_objectid = intarg; | 431 | *subvol_objectid = intarg; |
427 | } | 432 | } |
428 | break; | 433 | break; |
434 | case Opt_subvolrootid: | ||
435 | intarg = 0; | ||
436 | error = match_int(&args[0], &intarg); | ||
437 | if (!error) { | ||
438 | /* we want the original fs_tree */ | ||
439 | if (!intarg) | ||
440 | *subvol_rootid = | ||
441 | BTRFS_FS_TREE_OBJECTID; | ||
442 | else | ||
443 | *subvol_rootid = intarg; | ||
444 | } | ||
445 | break; | ||
429 | case Opt_device: | 446 | case Opt_device: |
430 | error = btrfs_scan_one_device(match_strdup(&args[0]), | 447 | error = btrfs_scan_one_device(match_strdup(&args[0]), |
431 | flags, holder, fs_devices); | 448 | flags, holder, fs_devices); |
@@ -620,6 +637,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
620 | struct btrfs_root *root = btrfs_sb(sb); | 637 | struct btrfs_root *root = btrfs_sb(sb); |
621 | int ret; | 638 | int ret; |
622 | 639 | ||
640 | trace_btrfs_sync_fs(wait); | ||
641 | |||
623 | if (!wait) { | 642 | if (!wait) { |
624 | filemap_flush(root->fs_info->btree_inode->i_mapping); | 643 | filemap_flush(root->fs_info->btree_inode->i_mapping); |
625 | return 0; | 644 | return 0; |
@@ -639,6 +658,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
639 | { | 658 | { |
640 | struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); | 659 | struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); |
641 | struct btrfs_fs_info *info = root->fs_info; | 660 | struct btrfs_fs_info *info = root->fs_info; |
661 | char *compress_type; | ||
642 | 662 | ||
643 | if (btrfs_test_opt(root, DEGRADED)) | 663 | if (btrfs_test_opt(root, DEGRADED)) |
644 | seq_puts(seq, ",degraded"); | 664 | seq_puts(seq, ",degraded"); |
@@ -657,8 +677,16 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
657 | if (info->thread_pool_size != min_t(unsigned long, | 677 | if (info->thread_pool_size != min_t(unsigned long, |
658 | num_online_cpus() + 2, 8)) | 678 | num_online_cpus() + 2, 8)) |
659 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); | 679 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); |
660 | if (btrfs_test_opt(root, COMPRESS)) | 680 | if (btrfs_test_opt(root, COMPRESS)) { |
661 | seq_puts(seq, ",compress"); | 681 | if (info->compress_type == BTRFS_COMPRESS_ZLIB) |
682 | compress_type = "zlib"; | ||
683 | else | ||
684 | compress_type = "lzo"; | ||
685 | if (btrfs_test_opt(root, FORCE_COMPRESS)) | ||
686 | seq_printf(seq, ",compress-force=%s", compress_type); | ||
687 | else | ||
688 | seq_printf(seq, ",compress=%s", compress_type); | ||
689 | } | ||
662 | if (btrfs_test_opt(root, NOSSD)) | 690 | if (btrfs_test_opt(root, NOSSD)) |
663 | seq_puts(seq, ",nossd"); | 691 | seq_puts(seq, ",nossd"); |
664 | if (btrfs_test_opt(root, SSD_SPREAD)) | 692 | if (btrfs_test_opt(root, SSD_SPREAD)) |
@@ -673,6 +701,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
673 | seq_puts(seq, ",discard"); | 701 | seq_puts(seq, ",discard"); |
674 | if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) | 702 | if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) |
675 | seq_puts(seq, ",noacl"); | 703 | seq_puts(seq, ",noacl"); |
704 | if (btrfs_test_opt(root, SPACE_CACHE)) | ||
705 | seq_puts(seq, ",space_cache"); | ||
706 | if (btrfs_test_opt(root, CLEAR_CACHE)) | ||
707 | seq_puts(seq, ",clear_cache"); | ||
708 | if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) | ||
709 | seq_puts(seq, ",user_subvol_rm_allowed"); | ||
676 | return 0; | 710 | return 0; |
677 | } | 711 | } |
678 | 712 | ||
@@ -716,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
716 | fmode_t mode = FMODE_READ; | 750 | fmode_t mode = FMODE_READ; |
717 | char *subvol_name = NULL; | 751 | char *subvol_name = NULL; |
718 | u64 subvol_objectid = 0; | 752 | u64 subvol_objectid = 0; |
753 | u64 subvol_rootid = 0; | ||
719 | int error = 0; | 754 | int error = 0; |
720 | 755 | ||
721 | if (!(flags & MS_RDONLY)) | 756 | if (!(flags & MS_RDONLY)) |
@@ -723,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
723 | 758 | ||
724 | error = btrfs_parse_early_options(data, mode, fs_type, | 759 | error = btrfs_parse_early_options(data, mode, fs_type, |
725 | &subvol_name, &subvol_objectid, | 760 | &subvol_name, &subvol_objectid, |
726 | &fs_devices); | 761 | &subvol_rootid, &fs_devices); |
727 | if (error) | 762 | if (error) |
728 | return ERR_PTR(error); | 763 | return ERR_PTR(error); |
729 | 764 | ||
@@ -787,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
787 | s->s_flags |= MS_ACTIVE; | 822 | s->s_flags |= MS_ACTIVE; |
788 | } | 823 | } |
789 | 824 | ||
790 | root = get_default_root(s, subvol_objectid); | ||
791 | if (IS_ERR(root)) { | ||
792 | error = PTR_ERR(root); | ||
793 | deactivate_locked_super(s); | ||
794 | goto error_free_subvol_name; | ||
795 | } | ||
796 | /* if they gave us a subvolume name bind mount into that */ | 825 | /* if they gave us a subvolume name bind mount into that */ |
797 | if (strcmp(subvol_name, ".")) { | 826 | if (strcmp(subvol_name, ".")) { |
798 | struct dentry *new_root; | 827 | struct dentry *new_root; |
828 | |||
829 | root = get_default_root(s, subvol_rootid); | ||
830 | if (IS_ERR(root)) { | ||
831 | error = PTR_ERR(root); | ||
832 | deactivate_locked_super(s); | ||
833 | goto error_free_subvol_name; | ||
834 | } | ||
835 | |||
799 | mutex_lock(&root->d_inode->i_mutex); | 836 | mutex_lock(&root->d_inode->i_mutex); |
800 | new_root = lookup_one_len(subvol_name, root, | 837 | new_root = lookup_one_len(subvol_name, root, |
801 | strlen(subvol_name)); | 838 | strlen(subvol_name)); |
@@ -816,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
816 | } | 853 | } |
817 | dput(root); | 854 | dput(root); |
818 | root = new_root; | 855 | root = new_root; |
856 | } else { | ||
857 | root = get_default_root(s, subvol_objectid); | ||
858 | if (IS_ERR(root)) { | ||
859 | error = PTR_ERR(root); | ||
860 | deactivate_locked_super(s); | ||
861 | goto error_free_subvol_name; | ||
862 | } | ||
819 | } | 863 | } |
820 | 864 | ||
821 | kfree(subvol_name); | 865 | kfree(subvol_name); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3d73c8d93bbb..c571734d5e5a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -32,10 +32,8 @@ | |||
32 | 32 | ||
33 | static noinline void put_transaction(struct btrfs_transaction *transaction) | 33 | static noinline void put_transaction(struct btrfs_transaction *transaction) |
34 | { | 34 | { |
35 | WARN_ON(transaction->use_count == 0); | 35 | WARN_ON(atomic_read(&transaction->use_count) == 0); |
36 | transaction->use_count--; | 36 | if (atomic_dec_and_test(&transaction->use_count)) { |
37 | if (transaction->use_count == 0) { | ||
38 | list_del_init(&transaction->list); | ||
39 | memset(transaction, 0, sizeof(*transaction)); | 37 | memset(transaction, 0, sizeof(*transaction)); |
40 | kmem_cache_free(btrfs_transaction_cachep, transaction); | 38 | kmem_cache_free(btrfs_transaction_cachep, transaction); |
41 | } | 39 | } |
@@ -57,16 +55,17 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
57 | if (!cur_trans) { | 55 | if (!cur_trans) { |
58 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, | 56 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, |
59 | GFP_NOFS); | 57 | GFP_NOFS); |
60 | BUG_ON(!cur_trans); | 58 | if (!cur_trans) |
59 | return -ENOMEM; | ||
61 | root->fs_info->generation++; | 60 | root->fs_info->generation++; |
62 | cur_trans->num_writers = 1; | 61 | atomic_set(&cur_trans->num_writers, 1); |
63 | cur_trans->num_joined = 0; | 62 | cur_trans->num_joined = 0; |
64 | cur_trans->transid = root->fs_info->generation; | 63 | cur_trans->transid = root->fs_info->generation; |
65 | init_waitqueue_head(&cur_trans->writer_wait); | 64 | init_waitqueue_head(&cur_trans->writer_wait); |
66 | init_waitqueue_head(&cur_trans->commit_wait); | 65 | init_waitqueue_head(&cur_trans->commit_wait); |
67 | cur_trans->in_commit = 0; | 66 | cur_trans->in_commit = 0; |
68 | cur_trans->blocked = 0; | 67 | cur_trans->blocked = 0; |
69 | cur_trans->use_count = 1; | 68 | atomic_set(&cur_trans->use_count, 1); |
70 | cur_trans->commit_done = 0; | 69 | cur_trans->commit_done = 0; |
71 | cur_trans->start_time = get_seconds(); | 70 | cur_trans->start_time = get_seconds(); |
72 | 71 | ||
@@ -87,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
87 | root->fs_info->running_transaction = cur_trans; | 86 | root->fs_info->running_transaction = cur_trans; |
88 | spin_unlock(&root->fs_info->new_trans_lock); | 87 | spin_unlock(&root->fs_info->new_trans_lock); |
89 | } else { | 88 | } else { |
90 | cur_trans->num_writers++; | 89 | atomic_inc(&cur_trans->num_writers); |
91 | cur_trans->num_joined++; | 90 | cur_trans->num_joined++; |
92 | } | 91 | } |
93 | 92 | ||
@@ -144,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root) | |||
144 | cur_trans = root->fs_info->running_transaction; | 143 | cur_trans = root->fs_info->running_transaction; |
145 | if (cur_trans && cur_trans->blocked) { | 144 | if (cur_trans && cur_trans->blocked) { |
146 | DEFINE_WAIT(wait); | 145 | DEFINE_WAIT(wait); |
147 | cur_trans->use_count++; | 146 | atomic_inc(&cur_trans->use_count); |
148 | while (1) { | 147 | while (1) { |
149 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | 148 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, |
150 | TASK_UNINTERRUPTIBLE); | 149 | TASK_UNINTERRUPTIBLE); |
@@ -180,6 +179,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
180 | { | 179 | { |
181 | struct btrfs_trans_handle *h; | 180 | struct btrfs_trans_handle *h; |
182 | struct btrfs_transaction *cur_trans; | 181 | struct btrfs_transaction *cur_trans; |
182 | int retries = 0; | ||
183 | int ret; | 183 | int ret; |
184 | 184 | ||
185 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 185 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) |
@@ -195,10 +195,15 @@ again: | |||
195 | wait_current_trans(root); | 195 | wait_current_trans(root); |
196 | 196 | ||
197 | ret = join_transaction(root); | 197 | ret = join_transaction(root); |
198 | BUG_ON(ret); | 198 | if (ret < 0) { |
199 | kmem_cache_free(btrfs_trans_handle_cachep, h); | ||
200 | if (type != TRANS_JOIN_NOLOCK) | ||
201 | mutex_unlock(&root->fs_info->trans_mutex); | ||
202 | return ERR_PTR(ret); | ||
203 | } | ||
199 | 204 | ||
200 | cur_trans = root->fs_info->running_transaction; | 205 | cur_trans = root->fs_info->running_transaction; |
201 | cur_trans->use_count++; | 206 | atomic_inc(&cur_trans->use_count); |
202 | if (type != TRANS_JOIN_NOLOCK) | 207 | if (type != TRANS_JOIN_NOLOCK) |
203 | mutex_unlock(&root->fs_info->trans_mutex); | 208 | mutex_unlock(&root->fs_info->trans_mutex); |
204 | 209 | ||
@@ -218,10 +223,18 @@ again: | |||
218 | 223 | ||
219 | if (num_items > 0) { | 224 | if (num_items > 0) { |
220 | ret = btrfs_trans_reserve_metadata(h, root, num_items); | 225 | ret = btrfs_trans_reserve_metadata(h, root, num_items); |
221 | if (ret == -EAGAIN) { | 226 | if (ret == -EAGAIN && !retries) { |
227 | retries++; | ||
222 | btrfs_commit_transaction(h, root); | 228 | btrfs_commit_transaction(h, root); |
223 | goto again; | 229 | goto again; |
230 | } else if (ret == -EAGAIN) { | ||
231 | /* | ||
232 | * We have already retried and got EAGAIN, so really we | ||
233 | * don't have space, so set ret to -ENOSPC. | ||
234 | */ | ||
235 | ret = -ENOSPC; | ||
224 | } | 236 | } |
237 | |||
225 | if (ret < 0) { | 238 | if (ret < 0) { |
226 | btrfs_end_transaction(h, root); | 239 | btrfs_end_transaction(h, root); |
227 | return ERR_PTR(ret); | 240 | return ERR_PTR(ret); |
@@ -321,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | |||
321 | goto out_unlock; /* nothing committing|committed */ | 334 | goto out_unlock; /* nothing committing|committed */ |
322 | } | 335 | } |
323 | 336 | ||
324 | cur_trans->use_count++; | 337 | atomic_inc(&cur_trans->use_count); |
325 | mutex_unlock(&root->fs_info->trans_mutex); | 338 | mutex_unlock(&root->fs_info->trans_mutex); |
326 | 339 | ||
327 | wait_for_commit(root, cur_trans); | 340 | wait_for_commit(root, cur_trans); |
@@ -451,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
451 | wake_up_process(info->transaction_kthread); | 464 | wake_up_process(info->transaction_kthread); |
452 | } | 465 | } |
453 | 466 | ||
454 | if (lock) | ||
455 | mutex_lock(&info->trans_mutex); | ||
456 | WARN_ON(cur_trans != info->running_transaction); | 467 | WARN_ON(cur_trans != info->running_transaction); |
457 | WARN_ON(cur_trans->num_writers < 1); | 468 | WARN_ON(atomic_read(&cur_trans->num_writers) < 1); |
458 | cur_trans->num_writers--; | 469 | atomic_dec(&cur_trans->num_writers); |
459 | 470 | ||
460 | smp_mb(); | 471 | smp_mb(); |
461 | if (waitqueue_active(&cur_trans->writer_wait)) | 472 | if (waitqueue_active(&cur_trans->writer_wait)) |
462 | wake_up(&cur_trans->writer_wait); | 473 | wake_up(&cur_trans->writer_wait); |
463 | put_transaction(cur_trans); | 474 | put_transaction(cur_trans); |
464 | if (lock) | ||
465 | mutex_unlock(&info->trans_mutex); | ||
466 | 475 | ||
467 | if (current->journal_info == trans) | 476 | if (current->journal_info == trans) |
468 | current->journal_info = NULL; | 477 | current->journal_info = NULL; |
@@ -970,6 +979,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
970 | record_root_in_trans(trans, root); | 979 | record_root_in_trans(trans, root); |
971 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 980 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
972 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 981 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
982 | btrfs_check_and_init_root_item(new_root_item); | ||
973 | 983 | ||
974 | root_flags = btrfs_root_flags(new_root_item); | 984 | root_flags = btrfs_root_flags(new_root_item); |
975 | if (pending->readonly) | 985 | if (pending->readonly) |
@@ -1156,7 +1166,8 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
1156 | struct btrfs_transaction *cur_trans; | 1166 | struct btrfs_transaction *cur_trans; |
1157 | 1167 | ||
1158 | ac = kmalloc(sizeof(*ac), GFP_NOFS); | 1168 | ac = kmalloc(sizeof(*ac), GFP_NOFS); |
1159 | BUG_ON(!ac); | 1169 | if (!ac) |
1170 | return -ENOMEM; | ||
1160 | 1171 | ||
1161 | INIT_DELAYED_WORK(&ac->work, do_async_commit); | 1172 | INIT_DELAYED_WORK(&ac->work, do_async_commit); |
1162 | ac->root = root; | 1173 | ac->root = root; |
@@ -1170,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
1170 | /* take transaction reference */ | 1181 | /* take transaction reference */ |
1171 | mutex_lock(&root->fs_info->trans_mutex); | 1182 | mutex_lock(&root->fs_info->trans_mutex); |
1172 | cur_trans = trans->transaction; | 1183 | cur_trans = trans->transaction; |
1173 | cur_trans->use_count++; | 1184 | atomic_inc(&cur_trans->use_count); |
1174 | mutex_unlock(&root->fs_info->trans_mutex); | 1185 | mutex_unlock(&root->fs_info->trans_mutex); |
1175 | 1186 | ||
1176 | btrfs_end_transaction(trans, root); | 1187 | btrfs_end_transaction(trans, root); |
@@ -1229,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1229 | 1240 | ||
1230 | mutex_lock(&root->fs_info->trans_mutex); | 1241 | mutex_lock(&root->fs_info->trans_mutex); |
1231 | if (cur_trans->in_commit) { | 1242 | if (cur_trans->in_commit) { |
1232 | cur_trans->use_count++; | 1243 | atomic_inc(&cur_trans->use_count); |
1233 | mutex_unlock(&root->fs_info->trans_mutex); | 1244 | mutex_unlock(&root->fs_info->trans_mutex); |
1234 | btrfs_end_transaction(trans, root); | 1245 | btrfs_end_transaction(trans, root); |
1235 | 1246 | ||
@@ -1251,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1251 | prev_trans = list_entry(cur_trans->list.prev, | 1262 | prev_trans = list_entry(cur_trans->list.prev, |
1252 | struct btrfs_transaction, list); | 1263 | struct btrfs_transaction, list); |
1253 | if (!prev_trans->commit_done) { | 1264 | if (!prev_trans->commit_done) { |
1254 | prev_trans->use_count++; | 1265 | atomic_inc(&prev_trans->use_count); |
1255 | mutex_unlock(&root->fs_info->trans_mutex); | 1266 | mutex_unlock(&root->fs_info->trans_mutex); |
1256 | 1267 | ||
1257 | wait_for_commit(root, prev_trans); | 1268 | wait_for_commit(root, prev_trans); |
@@ -1292,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1292 | TASK_UNINTERRUPTIBLE); | 1303 | TASK_UNINTERRUPTIBLE); |
1293 | 1304 | ||
1294 | smp_mb(); | 1305 | smp_mb(); |
1295 | if (cur_trans->num_writers > 1) | 1306 | if (atomic_read(&cur_trans->num_writers) > 1) |
1296 | schedule_timeout(MAX_SCHEDULE_TIMEOUT); | 1307 | schedule_timeout(MAX_SCHEDULE_TIMEOUT); |
1297 | else if (should_grow) | 1308 | else if (should_grow) |
1298 | schedule_timeout(1); | 1309 | schedule_timeout(1); |
1299 | 1310 | ||
1300 | mutex_lock(&root->fs_info->trans_mutex); | 1311 | mutex_lock(&root->fs_info->trans_mutex); |
1301 | finish_wait(&cur_trans->writer_wait, &wait); | 1312 | finish_wait(&cur_trans->writer_wait, &wait); |
1302 | } while (cur_trans->num_writers > 1 || | 1313 | } while (atomic_read(&cur_trans->num_writers) > 1 || |
1303 | (should_grow && cur_trans->num_joined != joined)); | 1314 | (should_grow && cur_trans->num_joined != joined)); |
1304 | 1315 | ||
1305 | ret = create_pending_snapshots(trans, root->fs_info); | 1316 | ret = create_pending_snapshots(trans, root->fs_info); |
@@ -1386,9 +1397,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1386 | 1397 | ||
1387 | wake_up(&cur_trans->commit_wait); | 1398 | wake_up(&cur_trans->commit_wait); |
1388 | 1399 | ||
1400 | list_del_init(&cur_trans->list); | ||
1389 | put_transaction(cur_trans); | 1401 | put_transaction(cur_trans); |
1390 | put_transaction(cur_trans); | 1402 | put_transaction(cur_trans); |
1391 | 1403 | ||
1404 | trace_btrfs_transaction_commit(root); | ||
1405 | |||
1392 | mutex_unlock(&root->fs_info->trans_mutex); | 1406 | mutex_unlock(&root->fs_info->trans_mutex); |
1393 | 1407 | ||
1394 | if (current->journal_info == trans) | 1408 | if (current->journal_info == trans) |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 229a594cacd5..e441acc6c584 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -27,11 +27,11 @@ struct btrfs_transaction { | |||
27 | * total writers in this transaction, it must be zero before the | 27 | * total writers in this transaction, it must be zero before the |
28 | * transaction can end | 28 | * transaction can end |
29 | */ | 29 | */ |
30 | unsigned long num_writers; | 30 | atomic_t num_writers; |
31 | 31 | ||
32 | unsigned long num_joined; | 32 | unsigned long num_joined; |
33 | int in_commit; | 33 | int in_commit; |
34 | int use_count; | 34 | atomic_t use_count; |
35 | int commit_done; | 35 | int commit_done; |
36 | int blocked; | 36 | int blocked; |
37 | struct list_head list; | 37 | struct list_head list; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a4bbb854dfd2..c50271ad3157 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -799,12 +799,12 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | |||
799 | struct inode *dir; | 799 | struct inode *dir; |
800 | int ret; | 800 | int ret; |
801 | struct btrfs_inode_ref *ref; | 801 | struct btrfs_inode_ref *ref; |
802 | struct btrfs_dir_item *di; | ||
803 | struct inode *inode; | 802 | struct inode *inode; |
804 | char *name; | 803 | char *name; |
805 | int namelen; | 804 | int namelen; |
806 | unsigned long ref_ptr; | 805 | unsigned long ref_ptr; |
807 | unsigned long ref_end; | 806 | unsigned long ref_end; |
807 | int search_done = 0; | ||
808 | 808 | ||
809 | /* | 809 | /* |
810 | * it is possible that we didn't log all the parent directories | 810 | * it is possible that we didn't log all the parent directories |
@@ -845,7 +845,10 @@ again: | |||
845 | * existing back reference, and we don't want to create | 845 | * existing back reference, and we don't want to create |
846 | * dangling pointers in the directory. | 846 | * dangling pointers in the directory. |
847 | */ | 847 | */ |
848 | conflict_again: | 848 | |
849 | if (search_done) | ||
850 | goto insert; | ||
851 | |||
849 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); | 852 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); |
850 | if (ret == 0) { | 853 | if (ret == 0) { |
851 | char *victim_name; | 854 | char *victim_name; |
@@ -886,37 +889,21 @@ conflict_again: | |||
886 | ret = btrfs_unlink_inode(trans, root, dir, | 889 | ret = btrfs_unlink_inode(trans, root, dir, |
887 | inode, victim_name, | 890 | inode, victim_name, |
888 | victim_name_len); | 891 | victim_name_len); |
889 | kfree(victim_name); | ||
890 | btrfs_release_path(root, path); | ||
891 | goto conflict_again; | ||
892 | } | 892 | } |
893 | kfree(victim_name); | 893 | kfree(victim_name); |
894 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; | 894 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; |
895 | } | 895 | } |
896 | BUG_ON(ret); | 896 | BUG_ON(ret); |
897 | } | ||
898 | btrfs_release_path(root, path); | ||
899 | |||
900 | /* look for a conflicting sequence number */ | ||
901 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, | ||
902 | btrfs_inode_ref_index(eb, ref), | ||
903 | name, namelen, 0); | ||
904 | if (di && !IS_ERR(di)) { | ||
905 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
906 | BUG_ON(ret); | ||
907 | } | ||
908 | btrfs_release_path(root, path); | ||
909 | 897 | ||
910 | 898 | /* | |
911 | /* look for a conflicting name */ | 899 | * NOTE: we have searched root tree and checked the |
912 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | 900 | * coresponding ref, it does not need to check again. |
913 | name, namelen, 0); | 901 | */ |
914 | if (di && !IS_ERR(di)) { | 902 | search_done = 1; |
915 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
916 | BUG_ON(ret); | ||
917 | } | 903 | } |
918 | btrfs_release_path(root, path); | 904 | btrfs_release_path(root, path); |
919 | 905 | ||
906 | insert: | ||
920 | /* insert our name */ | 907 | /* insert our name */ |
921 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, | 908 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, |
922 | btrfs_inode_ref_index(eb, ref)); | 909 | btrfs_inode_ref_index(eb, ref)); |
@@ -1286,6 +1273,8 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, | |||
1286 | ptr_end = ptr + item_size; | 1273 | ptr_end = ptr + item_size; |
1287 | while (ptr < ptr_end) { | 1274 | while (ptr < ptr_end) { |
1288 | di = (struct btrfs_dir_item *)ptr; | 1275 | di = (struct btrfs_dir_item *)ptr; |
1276 | if (verify_dir_item(root, eb, di)) | ||
1277 | return -EIO; | ||
1289 | name_len = btrfs_dir_name_len(eb, di); | 1278 | name_len = btrfs_dir_name_len(eb, di); |
1290 | ret = replay_one_name(trans, root, path, eb, di, key); | 1279 | ret = replay_one_name(trans, root, path, eb, di, key); |
1291 | BUG_ON(ret); | 1280 | BUG_ON(ret); |
@@ -1412,6 +1401,11 @@ again: | |||
1412 | ptr_end = ptr + item_size; | 1401 | ptr_end = ptr + item_size; |
1413 | while (ptr < ptr_end) { | 1402 | while (ptr < ptr_end) { |
1414 | di = (struct btrfs_dir_item *)ptr; | 1403 | di = (struct btrfs_dir_item *)ptr; |
1404 | if (verify_dir_item(root, eb, di)) { | ||
1405 | ret = -EIO; | ||
1406 | goto out; | ||
1407 | } | ||
1408 | |||
1415 | name_len = btrfs_dir_name_len(eb, di); | 1409 | name_len = btrfs_dir_name_len(eb, di); |
1416 | name = kmalloc(name_len, GFP_NOFS); | 1410 | name = kmalloc(name_len, GFP_NOFS); |
1417 | if (!name) { | 1411 | if (!name) { |
@@ -1821,7 +1815,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, | |||
1821 | int orig_level; | 1815 | int orig_level; |
1822 | 1816 | ||
1823 | path = btrfs_alloc_path(); | 1817 | path = btrfs_alloc_path(); |
1824 | BUG_ON(!path); | 1818 | if (!path) |
1819 | return -ENOMEM; | ||
1825 | 1820 | ||
1826 | level = btrfs_header_level(log->node); | 1821 | level = btrfs_header_level(log->node); |
1827 | orig_level = level; | 1822 | orig_level = level; |
@@ -3107,9 +3102,11 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
3107 | .stage = 0, | 3102 | .stage = 0, |
3108 | }; | 3103 | }; |
3109 | 3104 | ||
3110 | fs_info->log_root_recovering = 1; | ||
3111 | path = btrfs_alloc_path(); | 3105 | path = btrfs_alloc_path(); |
3112 | BUG_ON(!path); | 3106 | if (!path) |
3107 | return -ENOMEM; | ||
3108 | |||
3109 | fs_info->log_root_recovering = 1; | ||
3113 | 3110 | ||
3114 | trans = btrfs_start_transaction(fs_info->tree_root, 0); | 3111 | trans = btrfs_start_transaction(fs_info->tree_root, 0); |
3115 | BUG_ON(IS_ERR(trans)); | 3112 | BUG_ON(IS_ERR(trans)); |
@@ -3117,7 +3114,8 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
3117 | wc.trans = trans; | 3114 | wc.trans = trans; |
3118 | wc.pin = 1; | 3115 | wc.pin = 1; |
3119 | 3116 | ||
3120 | walk_log_tree(trans, log_root_tree, &wc); | 3117 | ret = walk_log_tree(trans, log_root_tree, &wc); |
3118 | BUG_ON(ret); | ||
3121 | 3119 | ||
3122 | again: | 3120 | again: |
3123 | key.objectid = BTRFS_TREE_LOG_OBJECTID; | 3121 | key.objectid = BTRFS_TREE_LOG_OBJECTID; |
@@ -3141,8 +3139,7 @@ again: | |||
3141 | 3139 | ||
3142 | log = btrfs_read_fs_root_no_radix(log_root_tree, | 3140 | log = btrfs_read_fs_root_no_radix(log_root_tree, |
3143 | &found_key); | 3141 | &found_key); |
3144 | BUG_ON(!log); | 3142 | BUG_ON(IS_ERR(log)); |
3145 | |||
3146 | 3143 | ||
3147 | tmp_key.objectid = found_key.offset; | 3144 | tmp_key.objectid = found_key.offset; |
3148 | tmp_key.type = BTRFS_ROOT_ITEM_KEY; | 3145 | tmp_key.type = BTRFS_ROOT_ITEM_KEY; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dd13eb81ee40..309a57b9fc85 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -33,17 +33,6 @@ | |||
33 | #include "volumes.h" | 33 | #include "volumes.h" |
34 | #include "async-thread.h" | 34 | #include "async-thread.h" |
35 | 35 | ||
36 | struct map_lookup { | ||
37 | u64 type; | ||
38 | int io_align; | ||
39 | int io_width; | ||
40 | int stripe_len; | ||
41 | int sector_size; | ||
42 | int num_stripes; | ||
43 | int sub_stripes; | ||
44 | struct btrfs_bio_stripe stripes[]; | ||
45 | }; | ||
46 | |||
47 | static int init_first_rw_device(struct btrfs_trans_handle *trans, | 36 | static int init_first_rw_device(struct btrfs_trans_handle *trans, |
48 | struct btrfs_root *root, | 37 | struct btrfs_root *root, |
49 | struct btrfs_device *device); | 38 | struct btrfs_device *device); |
@@ -162,7 +151,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
162 | struct bio *cur; | 151 | struct bio *cur; |
163 | int again = 0; | 152 | int again = 0; |
164 | unsigned long num_run; | 153 | unsigned long num_run; |
165 | unsigned long num_sync_run; | ||
166 | unsigned long batch_run = 0; | 154 | unsigned long batch_run = 0; |
167 | unsigned long limit; | 155 | unsigned long limit; |
168 | unsigned long last_waited = 0; | 156 | unsigned long last_waited = 0; |
@@ -173,11 +161,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
173 | limit = btrfs_async_submit_limit(fs_info); | 161 | limit = btrfs_async_submit_limit(fs_info); |
174 | limit = limit * 2 / 3; | 162 | limit = limit * 2 / 3; |
175 | 163 | ||
176 | /* we want to make sure that every time we switch from the sync | ||
177 | * list to the normal list, we unplug | ||
178 | */ | ||
179 | num_sync_run = 0; | ||
180 | |||
181 | loop: | 164 | loop: |
182 | spin_lock(&device->io_lock); | 165 | spin_lock(&device->io_lock); |
183 | 166 | ||
@@ -223,15 +206,6 @@ loop_lock: | |||
223 | 206 | ||
224 | spin_unlock(&device->io_lock); | 207 | spin_unlock(&device->io_lock); |
225 | 208 | ||
226 | /* | ||
227 | * if we're doing the regular priority list, make sure we unplug | ||
228 | * for any high prio bios we've sent down | ||
229 | */ | ||
230 | if (pending_bios == &device->pending_bios && num_sync_run > 0) { | ||
231 | num_sync_run = 0; | ||
232 | blk_run_backing_dev(bdi, NULL); | ||
233 | } | ||
234 | |||
235 | while (pending) { | 209 | while (pending) { |
236 | 210 | ||
237 | rmb(); | 211 | rmb(); |
@@ -259,19 +233,11 @@ loop_lock: | |||
259 | 233 | ||
260 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 234 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
261 | 235 | ||
262 | if (cur->bi_rw & REQ_SYNC) | ||
263 | num_sync_run++; | ||
264 | |||
265 | submit_bio(cur->bi_rw, cur); | 236 | submit_bio(cur->bi_rw, cur); |
266 | num_run++; | 237 | num_run++; |
267 | batch_run++; | 238 | batch_run++; |
268 | if (need_resched()) { | 239 | if (need_resched()) |
269 | if (num_sync_run) { | ||
270 | blk_run_backing_dev(bdi, NULL); | ||
271 | num_sync_run = 0; | ||
272 | } | ||
273 | cond_resched(); | 240 | cond_resched(); |
274 | } | ||
275 | 241 | ||
276 | /* | 242 | /* |
277 | * we made progress, there is more work to do and the bdi | 243 | * we made progress, there is more work to do and the bdi |
@@ -304,13 +270,8 @@ loop_lock: | |||
304 | * against it before looping | 270 | * against it before looping |
305 | */ | 271 | */ |
306 | last_waited = ioc->last_waited; | 272 | last_waited = ioc->last_waited; |
307 | if (need_resched()) { | 273 | if (need_resched()) |
308 | if (num_sync_run) { | ||
309 | blk_run_backing_dev(bdi, NULL); | ||
310 | num_sync_run = 0; | ||
311 | } | ||
312 | cond_resched(); | 274 | cond_resched(); |
313 | } | ||
314 | continue; | 275 | continue; |
315 | } | 276 | } |
316 | spin_lock(&device->io_lock); | 277 | spin_lock(&device->io_lock); |
@@ -323,22 +284,6 @@ loop_lock: | |||
323 | } | 284 | } |
324 | } | 285 | } |
325 | 286 | ||
326 | if (num_sync_run) { | ||
327 | num_sync_run = 0; | ||
328 | blk_run_backing_dev(bdi, NULL); | ||
329 | } | ||
330 | /* | ||
331 | * IO has already been through a long path to get here. Checksumming, | ||
332 | * async helper threads, perhaps compression. We've done a pretty | ||
333 | * good job of collecting a batch of IO and should just unplug | ||
334 | * the device right away. | ||
335 | * | ||
336 | * This will help anyone who is waiting on the IO, they might have | ||
337 | * already unplugged, but managed to do so before the bio they | ||
338 | * cared about found its way down here. | ||
339 | */ | ||
340 | blk_run_backing_dev(bdi, NULL); | ||
341 | |||
342 | cond_resched(); | 287 | cond_resched(); |
343 | if (again) | 288 | if (again) |
344 | goto loop; | 289 | goto loop; |
@@ -1923,6 +1868,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1923 | 1868 | ||
1924 | BUG_ON(ret); | 1869 | BUG_ON(ret); |
1925 | 1870 | ||
1871 | trace_btrfs_chunk_free(root, map, chunk_offset, em->len); | ||
1872 | |||
1926 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { | 1873 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { |
1927 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); | 1874 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); |
1928 | BUG_ON(ret); | 1875 | BUG_ON(ret); |
@@ -2650,6 +2597,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2650 | *num_bytes = chunk_bytes_by_type(type, calc_size, | 2597 | *num_bytes = chunk_bytes_by_type(type, calc_size, |
2651 | map->num_stripes, sub_stripes); | 2598 | map->num_stripes, sub_stripes); |
2652 | 2599 | ||
2600 | trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes); | ||
2601 | |||
2653 | em = alloc_extent_map(GFP_NOFS); | 2602 | em = alloc_extent_map(GFP_NOFS); |
2654 | if (!em) { | 2603 | if (!em) { |
2655 | ret = -ENOMEM; | 2604 | ret = -ENOMEM; |
@@ -2758,6 +2707,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2758 | item_size); | 2707 | item_size); |
2759 | BUG_ON(ret); | 2708 | BUG_ON(ret); |
2760 | } | 2709 | } |
2710 | |||
2761 | kfree(chunk); | 2711 | kfree(chunk); |
2762 | return 0; | 2712 | return 0; |
2763 | } | 2713 | } |
@@ -2955,14 +2905,17 @@ static int find_live_mirror(struct map_lookup *map, int first, int num, | |||
2955 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 2905 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, |
2956 | u64 logical, u64 *length, | 2906 | u64 logical, u64 *length, |
2957 | struct btrfs_multi_bio **multi_ret, | 2907 | struct btrfs_multi_bio **multi_ret, |
2958 | int mirror_num, struct page *unplug_page) | 2908 | int mirror_num) |
2959 | { | 2909 | { |
2960 | struct extent_map *em; | 2910 | struct extent_map *em; |
2961 | struct map_lookup *map; | 2911 | struct map_lookup *map; |
2962 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 2912 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
2963 | u64 offset; | 2913 | u64 offset; |
2964 | u64 stripe_offset; | 2914 | u64 stripe_offset; |
2915 | u64 stripe_end_offset; | ||
2965 | u64 stripe_nr; | 2916 | u64 stripe_nr; |
2917 | u64 stripe_nr_orig; | ||
2918 | u64 stripe_nr_end; | ||
2966 | int stripes_allocated = 8; | 2919 | int stripes_allocated = 8; |
2967 | int stripes_required = 1; | 2920 | int stripes_required = 1; |
2968 | int stripe_index; | 2921 | int stripe_index; |
@@ -2971,7 +2924,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
2971 | int max_errors = 0; | 2924 | int max_errors = 0; |
2972 | struct btrfs_multi_bio *multi = NULL; | 2925 | struct btrfs_multi_bio *multi = NULL; |
2973 | 2926 | ||
2974 | if (multi_ret && !(rw & REQ_WRITE)) | 2927 | if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD))) |
2975 | stripes_allocated = 1; | 2928 | stripes_allocated = 1; |
2976 | again: | 2929 | again: |
2977 | if (multi_ret) { | 2930 | if (multi_ret) { |
@@ -2987,11 +2940,6 @@ again: | |||
2987 | em = lookup_extent_mapping(em_tree, logical, *length); | 2940 | em = lookup_extent_mapping(em_tree, logical, *length); |
2988 | read_unlock(&em_tree->lock); | 2941 | read_unlock(&em_tree->lock); |
2989 | 2942 | ||
2990 | if (!em && unplug_page) { | ||
2991 | kfree(multi); | ||
2992 | return 0; | ||
2993 | } | ||
2994 | |||
2995 | if (!em) { | 2943 | if (!em) { |
2996 | printk(KERN_CRIT "unable to find logical %llu len %llu\n", | 2944 | printk(KERN_CRIT "unable to find logical %llu len %llu\n", |
2997 | (unsigned long long)logical, | 2945 | (unsigned long long)logical, |
@@ -3017,7 +2965,15 @@ again: | |||
3017 | max_errors = 1; | 2965 | max_errors = 1; |
3018 | } | 2966 | } |
3019 | } | 2967 | } |
3020 | if (multi_ret && (rw & REQ_WRITE) && | 2968 | if (rw & REQ_DISCARD) { |
2969 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | | ||
2970 | BTRFS_BLOCK_GROUP_RAID1 | | ||
2971 | BTRFS_BLOCK_GROUP_DUP | | ||
2972 | BTRFS_BLOCK_GROUP_RAID10)) { | ||
2973 | stripes_required = map->num_stripes; | ||
2974 | } | ||
2975 | } | ||
2976 | if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) && | ||
3021 | stripes_allocated < stripes_required) { | 2977 | stripes_allocated < stripes_required) { |
3022 | stripes_allocated = map->num_stripes; | 2978 | stripes_allocated = map->num_stripes; |
3023 | free_extent_map(em); | 2979 | free_extent_map(em); |
@@ -3037,23 +2993,37 @@ again: | |||
3037 | /* stripe_offset is the offset of this block in its stripe*/ | 2993 | /* stripe_offset is the offset of this block in its stripe*/ |
3038 | stripe_offset = offset - stripe_offset; | 2994 | stripe_offset = offset - stripe_offset; |
3039 | 2995 | ||
3040 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | | 2996 | if (rw & REQ_DISCARD) |
3041 | BTRFS_BLOCK_GROUP_RAID10 | | 2997 | *length = min_t(u64, em->len - offset, *length); |
3042 | BTRFS_BLOCK_GROUP_DUP)) { | 2998 | else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | |
2999 | BTRFS_BLOCK_GROUP_RAID1 | | ||
3000 | BTRFS_BLOCK_GROUP_RAID10 | | ||
3001 | BTRFS_BLOCK_GROUP_DUP)) { | ||
3043 | /* we limit the length of each bio to what fits in a stripe */ | 3002 | /* we limit the length of each bio to what fits in a stripe */ |
3044 | *length = min_t(u64, em->len - offset, | 3003 | *length = min_t(u64, em->len - offset, |
3045 | map->stripe_len - stripe_offset); | 3004 | map->stripe_len - stripe_offset); |
3046 | } else { | 3005 | } else { |
3047 | *length = em->len - offset; | 3006 | *length = em->len - offset; |
3048 | } | 3007 | } |
3049 | 3008 | ||
3050 | if (!multi_ret && !unplug_page) | 3009 | if (!multi_ret) |
3051 | goto out; | 3010 | goto out; |
3052 | 3011 | ||
3053 | num_stripes = 1; | 3012 | num_stripes = 1; |
3054 | stripe_index = 0; | 3013 | stripe_index = 0; |
3055 | if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 3014 | stripe_nr_orig = stripe_nr; |
3056 | if (unplug_page || (rw & REQ_WRITE)) | 3015 | stripe_nr_end = (offset + *length + map->stripe_len - 1) & |
3016 | (~(map->stripe_len - 1)); | ||
3017 | do_div(stripe_nr_end, map->stripe_len); | ||
3018 | stripe_end_offset = stripe_nr_end * map->stripe_len - | ||
3019 | (offset + *length); | ||
3020 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | ||
3021 | if (rw & REQ_DISCARD) | ||
3022 | num_stripes = min_t(u64, map->num_stripes, | ||
3023 | stripe_nr_end - stripe_nr_orig); | ||
3024 | stripe_index = do_div(stripe_nr, map->num_stripes); | ||
3025 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | ||
3026 | if (rw & (REQ_WRITE | REQ_DISCARD)) | ||
3057 | num_stripes = map->num_stripes; | 3027 | num_stripes = map->num_stripes; |
3058 | else if (mirror_num) | 3028 | else if (mirror_num) |
3059 | stripe_index = mirror_num - 1; | 3029 | stripe_index = mirror_num - 1; |
@@ -3064,7 +3034,7 @@ again: | |||
3064 | } | 3034 | } |
3065 | 3035 | ||
3066 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 3036 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
3067 | if (rw & REQ_WRITE) | 3037 | if (rw & (REQ_WRITE | REQ_DISCARD)) |
3068 | num_stripes = map->num_stripes; | 3038 | num_stripes = map->num_stripes; |
3069 | else if (mirror_num) | 3039 | else if (mirror_num) |
3070 | stripe_index = mirror_num - 1; | 3040 | stripe_index = mirror_num - 1; |
@@ -3075,8 +3045,12 @@ again: | |||
3075 | stripe_index = do_div(stripe_nr, factor); | 3045 | stripe_index = do_div(stripe_nr, factor); |
3076 | stripe_index *= map->sub_stripes; | 3046 | stripe_index *= map->sub_stripes; |
3077 | 3047 | ||
3078 | if (unplug_page || (rw & REQ_WRITE)) | 3048 | if (rw & REQ_WRITE) |
3079 | num_stripes = map->sub_stripes; | 3049 | num_stripes = map->sub_stripes; |
3050 | else if (rw & REQ_DISCARD) | ||
3051 | num_stripes = min_t(u64, map->sub_stripes * | ||
3052 | (stripe_nr_end - stripe_nr_orig), | ||
3053 | map->num_stripes); | ||
3080 | else if (mirror_num) | 3054 | else if (mirror_num) |
3081 | stripe_index += mirror_num - 1; | 3055 | stripe_index += mirror_num - 1; |
3082 | else { | 3056 | else { |
@@ -3094,24 +3068,101 @@ again: | |||
3094 | } | 3068 | } |
3095 | BUG_ON(stripe_index >= map->num_stripes); | 3069 | BUG_ON(stripe_index >= map->num_stripes); |
3096 | 3070 | ||
3097 | for (i = 0; i < num_stripes; i++) { | 3071 | if (rw & REQ_DISCARD) { |
3098 | if (unplug_page) { | 3072 | for (i = 0; i < num_stripes; i++) { |
3099 | struct btrfs_device *device; | ||
3100 | struct backing_dev_info *bdi; | ||
3101 | |||
3102 | device = map->stripes[stripe_index].dev; | ||
3103 | if (device->bdev) { | ||
3104 | bdi = blk_get_backing_dev_info(device->bdev); | ||
3105 | if (bdi->unplug_io_fn) | ||
3106 | bdi->unplug_io_fn(bdi, unplug_page); | ||
3107 | } | ||
3108 | } else { | ||
3109 | multi->stripes[i].physical = | 3073 | multi->stripes[i].physical = |
3110 | map->stripes[stripe_index].physical + | 3074 | map->stripes[stripe_index].physical + |
3111 | stripe_offset + stripe_nr * map->stripe_len; | 3075 | stripe_offset + stripe_nr * map->stripe_len; |
3112 | multi->stripes[i].dev = map->stripes[stripe_index].dev; | 3076 | multi->stripes[i].dev = map->stripes[stripe_index].dev; |
3077 | |||
3078 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | ||
3079 | u64 stripes; | ||
3080 | u32 last_stripe = 0; | ||
3081 | int j; | ||
3082 | |||
3083 | div_u64_rem(stripe_nr_end - 1, | ||
3084 | map->num_stripes, | ||
3085 | &last_stripe); | ||
3086 | |||
3087 | for (j = 0; j < map->num_stripes; j++) { | ||
3088 | u32 test; | ||
3089 | |||
3090 | div_u64_rem(stripe_nr_end - 1 - j, | ||
3091 | map->num_stripes, &test); | ||
3092 | if (test == stripe_index) | ||
3093 | break; | ||
3094 | } | ||
3095 | stripes = stripe_nr_end - 1 - j; | ||
3096 | do_div(stripes, map->num_stripes); | ||
3097 | multi->stripes[i].length = map->stripe_len * | ||
3098 | (stripes - stripe_nr + 1); | ||
3099 | |||
3100 | if (i == 0) { | ||
3101 | multi->stripes[i].length -= | ||
3102 | stripe_offset; | ||
3103 | stripe_offset = 0; | ||
3104 | } | ||
3105 | if (stripe_index == last_stripe) | ||
3106 | multi->stripes[i].length -= | ||
3107 | stripe_end_offset; | ||
3108 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | ||
3109 | u64 stripes; | ||
3110 | int j; | ||
3111 | int factor = map->num_stripes / | ||
3112 | map->sub_stripes; | ||
3113 | u32 last_stripe = 0; | ||
3114 | |||
3115 | div_u64_rem(stripe_nr_end - 1, | ||
3116 | factor, &last_stripe); | ||
3117 | last_stripe *= map->sub_stripes; | ||
3118 | |||
3119 | for (j = 0; j < factor; j++) { | ||
3120 | u32 test; | ||
3121 | |||
3122 | div_u64_rem(stripe_nr_end - 1 - j, | ||
3123 | factor, &test); | ||
3124 | |||
3125 | if (test == | ||
3126 | stripe_index / map->sub_stripes) | ||
3127 | break; | ||
3128 | } | ||
3129 | stripes = stripe_nr_end - 1 - j; | ||
3130 | do_div(stripes, factor); | ||
3131 | multi->stripes[i].length = map->stripe_len * | ||
3132 | (stripes - stripe_nr + 1); | ||
3133 | |||
3134 | if (i < map->sub_stripes) { | ||
3135 | multi->stripes[i].length -= | ||
3136 | stripe_offset; | ||
3137 | if (i == map->sub_stripes - 1) | ||
3138 | stripe_offset = 0; | ||
3139 | } | ||
3140 | if (stripe_index >= last_stripe && | ||
3141 | stripe_index <= (last_stripe + | ||
3142 | map->sub_stripes - 1)) { | ||
3143 | multi->stripes[i].length -= | ||
3144 | stripe_end_offset; | ||
3145 | } | ||
3146 | } else | ||
3147 | multi->stripes[i].length = *length; | ||
3148 | |||
3149 | stripe_index++; | ||
3150 | if (stripe_index == map->num_stripes) { | ||
3151 | /* This could only happen for RAID0/10 */ | ||
3152 | stripe_index = 0; | ||
3153 | stripe_nr++; | ||
3154 | } | ||
3155 | } | ||
3156 | } else { | ||
3157 | for (i = 0; i < num_stripes; i++) { | ||
3158 | multi->stripes[i].physical = | ||
3159 | map->stripes[stripe_index].physical + | ||
3160 | stripe_offset + | ||
3161 | stripe_nr * map->stripe_len; | ||
3162 | multi->stripes[i].dev = | ||
3163 | map->stripes[stripe_index].dev; | ||
3164 | stripe_index++; | ||
3113 | } | 3165 | } |
3114 | stripe_index++; | ||
3115 | } | 3166 | } |
3116 | if (multi_ret) { | 3167 | if (multi_ret) { |
3117 | *multi_ret = multi; | 3168 | *multi_ret = multi; |
@@ -3128,7 +3179,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
3128 | struct btrfs_multi_bio **multi_ret, int mirror_num) | 3179 | struct btrfs_multi_bio **multi_ret, int mirror_num) |
3129 | { | 3180 | { |
3130 | return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, | 3181 | return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, |
3131 | mirror_num, NULL); | 3182 | mirror_num); |
3132 | } | 3183 | } |
3133 | 3184 | ||
3134 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | 3185 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, |
@@ -3196,14 +3247,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
3196 | return 0; | 3247 | return 0; |
3197 | } | 3248 | } |
3198 | 3249 | ||
3199 | int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, | ||
3200 | u64 logical, struct page *page) | ||
3201 | { | ||
3202 | u64 length = PAGE_CACHE_SIZE; | ||
3203 | return __btrfs_map_block(map_tree, READ, logical, &length, | ||
3204 | NULL, 0, page); | ||
3205 | } | ||
3206 | |||
3207 | static void end_bio_multi_stripe(struct bio *bio, int err) | 3250 | static void end_bio_multi_stripe(struct bio *bio, int err) |
3208 | { | 3251 | { |
3209 | struct btrfs_multi_bio *multi = bio->bi_private; | 3252 | struct btrfs_multi_bio *multi = bio->bi_private; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7fb59d45fe8c..cc2eadaf7a27 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -126,6 +126,7 @@ struct btrfs_fs_devices { | |||
126 | struct btrfs_bio_stripe { | 126 | struct btrfs_bio_stripe { |
127 | struct btrfs_device *dev; | 127 | struct btrfs_device *dev; |
128 | u64 physical; | 128 | u64 physical; |
129 | u64 length; /* only used for discard mappings */ | ||
129 | }; | 130 | }; |
130 | 131 | ||
131 | struct btrfs_multi_bio { | 132 | struct btrfs_multi_bio { |
@@ -145,6 +146,17 @@ struct btrfs_device_info { | |||
145 | u64 max_avail; | 146 | u64 max_avail; |
146 | }; | 147 | }; |
147 | 148 | ||
149 | struct map_lookup { | ||
150 | u64 type; | ||
151 | int io_align; | ||
152 | int io_width; | ||
153 | int stripe_len; | ||
154 | int sector_size; | ||
155 | int num_stripes; | ||
156 | int sub_stripes; | ||
157 | struct btrfs_bio_stripe stripes[]; | ||
158 | }; | ||
159 | |||
148 | /* Used to sort the devices by max_avail(descending sort) */ | 160 | /* Used to sort the devices by max_avail(descending sort) */ |
149 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); | 161 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); |
150 | 162 | ||
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index d779cefcfd7d..cfd660550ded 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
180 | struct btrfs_path *path; | 180 | struct btrfs_path *path; |
181 | struct extent_buffer *leaf; | 181 | struct extent_buffer *leaf; |
182 | struct btrfs_dir_item *di; | 182 | struct btrfs_dir_item *di; |
183 | int ret = 0, slot, advance; | 183 | int ret = 0, slot; |
184 | size_t total_size = 0, size_left = size; | 184 | size_t total_size = 0, size_left = size; |
185 | unsigned long name_ptr; | 185 | unsigned long name_ptr; |
186 | size_t name_len; | 186 | size_t name_len; |
187 | u32 nritems; | ||
188 | 187 | ||
189 | /* | 188 | /* |
190 | * ok we want all objects associated with this id. | 189 | * ok we want all objects associated with this id. |
@@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
204 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 203 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
205 | if (ret < 0) | 204 | if (ret < 0) |
206 | goto err; | 205 | goto err; |
207 | advance = 0; | 206 | |
208 | while (1) { | 207 | while (1) { |
209 | leaf = path->nodes[0]; | 208 | leaf = path->nodes[0]; |
210 | nritems = btrfs_header_nritems(leaf); | ||
211 | slot = path->slots[0]; | 209 | slot = path->slots[0]; |
212 | 210 | ||
213 | /* this is where we start walking through the path */ | 211 | /* this is where we start walking through the path */ |
214 | if (advance || slot >= nritems) { | 212 | if (slot >= btrfs_header_nritems(leaf)) { |
215 | /* | 213 | /* |
216 | * if we've reached the last slot in this leaf we need | 214 | * if we've reached the last slot in this leaf we need |
217 | * to go to the next leaf and reset everything | 215 | * to go to the next leaf and reset everything |
218 | */ | 216 | */ |
219 | if (slot >= nritems-1) { | 217 | ret = btrfs_next_leaf(root, path); |
220 | ret = btrfs_next_leaf(root, path); | 218 | if (ret < 0) |
221 | if (ret) | 219 | goto err; |
222 | break; | 220 | else if (ret > 0) |
223 | leaf = path->nodes[0]; | 221 | break; |
224 | nritems = btrfs_header_nritems(leaf); | 222 | continue; |
225 | slot = path->slots[0]; | ||
226 | } else { | ||
227 | /* | ||
228 | * just walking through the slots on this leaf | ||
229 | */ | ||
230 | slot++; | ||
231 | path->slots[0]++; | ||
232 | } | ||
233 | } | 223 | } |
234 | advance = 1; | ||
235 | 224 | ||
236 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 225 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
237 | 226 | ||
@@ -242,13 +231,15 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
242 | break; | 231 | break; |
243 | 232 | ||
244 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | 233 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); |
234 | if (verify_dir_item(root, leaf, di)) | ||
235 | continue; | ||
245 | 236 | ||
246 | name_len = btrfs_dir_name_len(leaf, di); | 237 | name_len = btrfs_dir_name_len(leaf, di); |
247 | total_size += name_len + 1; | 238 | total_size += name_len + 1; |
248 | 239 | ||
249 | /* we are just looking for how big our buffer needs to be */ | 240 | /* we are just looking for how big our buffer needs to be */ |
250 | if (!size) | 241 | if (!size) |
251 | continue; | 242 | goto next; |
252 | 243 | ||
253 | if (!buffer || (name_len + 1) > size_left) { | 244 | if (!buffer || (name_len + 1) > size_left) { |
254 | ret = -ERANGE; | 245 | ret = -ERANGE; |
@@ -261,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
261 | 252 | ||
262 | size_left -= name_len + 1; | 253 | size_left -= name_len + 1; |
263 | buffer += name_len + 1; | 254 | buffer += name_len + 1; |
255 | next: | ||
256 | path->slots[0]++; | ||
264 | } | 257 | } |
265 | ret = total_size; | 258 | ret = total_size; |
266 | 259 | ||
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index f5ec2d44150d..faccd47c6c46 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -57,7 +57,8 @@ static struct list_head *zlib_alloc_workspace(void) | |||
57 | if (!workspace) | 57 | if (!workspace) |
58 | return ERR_PTR(-ENOMEM); | 58 | return ERR_PTR(-ENOMEM); |
59 | 59 | ||
60 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); | 60 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize( |
61 | MAX_WBITS, MAX_MEM_LEVEL)); | ||
61 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); | 62 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); |
62 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 63 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
63 | if (!workspace->def_strm.workspace || | 64 | if (!workspace->def_strm.workspace || |