aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <clm@fb.com>2016-03-01 11:13:56 -0500
committerChris Mason <clm@fb.com>2016-03-01 11:13:56 -0500
commitc05c5ee5ea40fe40df5e29471f583a0eb7c531e3 (patch)
treec56a3336bf631d9e97f038f1253e8e99726eb3e5
parentfc77dbd34c5c99bce46d40a2491937c3bcbd10af (diff)
parentf5bc27c71a1b0741cb93dbec0f216b012b21d93f (diff)
Merge tag 'for-chris' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux into for-linus-4.6
Btrfs patchsets for 4.6
-rw-r--r--Documentation/filesystems/btrfs.txt19
-rw-r--r--fs/btrfs/backref.c12
-rw-r--r--fs/btrfs/ctree.c36
-rw-r--r--fs/btrfs/ctree.h69
-rw-r--r--fs/btrfs/delayed-inode.c3
-rw-r--r--fs/btrfs/delayed-ref.c12
-rw-r--r--fs/btrfs/dev-replace.c132
-rw-r--r--fs/btrfs/dev-replace.h7
-rw-r--r--fs/btrfs/disk-io.c68
-rw-r--r--fs/btrfs/extent-tree.c38
-rw-r--r--fs/btrfs/extent_io.c40
-rw-r--r--fs/btrfs/extent_io.h5
-rw-r--r--fs/btrfs/extent_map.c3
-rw-r--r--fs/btrfs/file-item.c92
-rw-r--r--fs/btrfs/file.c146
-rw-r--r--fs/btrfs/inode.c301
-rw-r--r--fs/btrfs/ioctl.c28
-rw-r--r--fs/btrfs/ordered-data.c3
-rw-r--r--fs/btrfs/print-tree.c23
-rw-r--r--fs/btrfs/reada.c268
-rw-r--r--fs/btrfs/root-tree.c2
-rw-r--r--fs/btrfs/scrub.c30
-rw-r--r--fs/btrfs/send.c36
-rw-r--r--fs/btrfs/super.c48
-rw-r--r--fs/btrfs/tests/btrfs-tests.c6
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c1
-rw-r--r--fs/btrfs/transaction.c13
-rw-r--r--fs/btrfs/volumes.c47
-rw-r--r--fs/btrfs/xattr.c2
29 files changed, 881 insertions, 609 deletions
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt
index c772b47e7ef0..6593d2e415c5 100644
--- a/Documentation/filesystems/btrfs.txt
+++ b/Documentation/filesystems/btrfs.txt
@@ -168,10 +168,23 @@ Options with (*) are default options and will not show in the mount options.
168 notreelog 168 notreelog
169 Enable/disable the tree logging used for fsync and O_SYNC writes. 169 Enable/disable the tree logging used for fsync and O_SYNC writes.
170 170
171 recovery 171 nologreplay
172 Enable autorecovery attempts if a bad tree root is found at mount time. 172 Disable the log tree replay at mount time to prevent filesystem
173 Currently this scans a list of several previous tree roots and tries to 173 from getting modified.
174 Must be used with 'ro' mount option.
175 A filesystem mounted with this option cannot transition to a
176 read-write mount via remount,rw - the filesystem must be unmounted
177 and mounted back again if read-write access is desired.
178
179 usebackuproot
180 Enable attempts to use backup tree roots if a bad tree root is found at
181 mount time.
182 Currently this scans a list of 4 previous tree roots and tries to
174 use the first readable. 183 use the first readable.
184 And since the mount option doesn't affect any behavior after mount,
185 it won't be shown in mount info.
186 Prior to 4.6, this was done by 'recovery' option that has been
187 deprecated, but will work.
175 188
176 rescan_uuid_tree 189 rescan_uuid_tree
177 Force check and rebuild procedure of the UUID tree. This should not 190 Force check and rebuild procedure of the UUID tree. This should not
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index f6dac40f87ff..80e8472d618b 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -148,8 +148,7 @@ int __init btrfs_prelim_ref_init(void)
148 148
149void btrfs_prelim_ref_exit(void) 149void btrfs_prelim_ref_exit(void)
150{ 150{
151 if (btrfs_prelim_ref_cache) 151 kmem_cache_destroy(btrfs_prelim_ref_cache);
152 kmem_cache_destroy(btrfs_prelim_ref_cache);
153} 152}
154 153
155/* 154/*
@@ -566,17 +565,14 @@ static void __merge_refs(struct list_head *head, int mode)
566 struct __prelim_ref *pos2 = pos1, *tmp; 565 struct __prelim_ref *pos2 = pos1, *tmp;
567 566
568 list_for_each_entry_safe_continue(pos2, tmp, head, list) { 567 list_for_each_entry_safe_continue(pos2, tmp, head, list) {
569 struct __prelim_ref *xchg, *ref1 = pos1, *ref2 = pos2; 568 struct __prelim_ref *ref1 = pos1, *ref2 = pos2;
570 struct extent_inode_elem *eie; 569 struct extent_inode_elem *eie;
571 570
572 if (!ref_for_same_block(ref1, ref2)) 571 if (!ref_for_same_block(ref1, ref2))
573 continue; 572 continue;
574 if (mode == 1) { 573 if (mode == 1) {
575 if (!ref1->parent && ref2->parent) { 574 if (!ref1->parent && ref2->parent)
576 xchg = ref1; 575 swap(ref1, ref2);
577 ref1 = ref2;
578 ref2 = xchg;
579 }
580 } else { 576 } else {
581 if (ref1->parent != ref2->parent) 577 if (ref1->parent != ref2->parent)
582 continue; 578 continue;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 769e0ff1b4ce..77592931ab4f 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -311,7 +311,7 @@ struct tree_mod_root {
311 311
312struct tree_mod_elem { 312struct tree_mod_elem {
313 struct rb_node node; 313 struct rb_node node;
314 u64 index; /* shifted logical */ 314 u64 logical;
315 u64 seq; 315 u64 seq;
316 enum mod_log_op op; 316 enum mod_log_op op;
317 317
@@ -435,11 +435,11 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
435 435
436/* 436/*
437 * key order of the log: 437 * key order of the log:
438 * index -> sequence 438 * node/leaf start address -> sequence
439 * 439 *
440 * the index is the shifted logical of the *new* root node for root replace 440 * The 'start address' is the logical address of the *new* root node
441 * operations, or the shifted logical of the affected block for all other 441 * for root replace operations, or the logical address of the affected
442 * operations. 442 * block for all other operations.
443 * 443 *
444 * Note: must be called with write lock (tree_mod_log_write_lock). 444 * Note: must be called with write lock (tree_mod_log_write_lock).
445 */ 445 */
@@ -460,9 +460,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
460 while (*new) { 460 while (*new) {
461 cur = container_of(*new, struct tree_mod_elem, node); 461 cur = container_of(*new, struct tree_mod_elem, node);
462 parent = *new; 462 parent = *new;
463 if (cur->index < tm->index) 463 if (cur->logical < tm->logical)
464 new = &((*new)->rb_left); 464 new = &((*new)->rb_left);
465 else if (cur->index > tm->index) 465 else if (cur->logical > tm->logical)
466 new = &((*new)->rb_right); 466 new = &((*new)->rb_right);
467 else if (cur->seq < tm->seq) 467 else if (cur->seq < tm->seq)
468 new = &((*new)->rb_left); 468 new = &((*new)->rb_left);
@@ -523,7 +523,7 @@ alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
523 if (!tm) 523 if (!tm)
524 return NULL; 524 return NULL;
525 525
526 tm->index = eb->start >> PAGE_CACHE_SHIFT; 526 tm->logical = eb->start;
527 if (op != MOD_LOG_KEY_ADD) { 527 if (op != MOD_LOG_KEY_ADD) {
528 btrfs_node_key(eb, &tm->key, slot); 528 btrfs_node_key(eb, &tm->key, slot);
529 tm->blockptr = btrfs_node_blockptr(eb, slot); 529 tm->blockptr = btrfs_node_blockptr(eb, slot);
@@ -588,7 +588,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
588 goto free_tms; 588 goto free_tms;
589 } 589 }
590 590
591 tm->index = eb->start >> PAGE_CACHE_SHIFT; 591 tm->logical = eb->start;
592 tm->slot = src_slot; 592 tm->slot = src_slot;
593 tm->move.dst_slot = dst_slot; 593 tm->move.dst_slot = dst_slot;
594 tm->move.nr_items = nr_items; 594 tm->move.nr_items = nr_items;
@@ -699,7 +699,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
699 goto free_tms; 699 goto free_tms;
700 } 700 }
701 701
702 tm->index = new_root->start >> PAGE_CACHE_SHIFT; 702 tm->logical = new_root->start;
703 tm->old_root.logical = old_root->start; 703 tm->old_root.logical = old_root->start;
704 tm->old_root.level = btrfs_header_level(old_root); 704 tm->old_root.level = btrfs_header_level(old_root);
705 tm->generation = btrfs_header_generation(old_root); 705 tm->generation = btrfs_header_generation(old_root);
@@ -739,16 +739,15 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
739 struct rb_node *node; 739 struct rb_node *node;
740 struct tree_mod_elem *cur = NULL; 740 struct tree_mod_elem *cur = NULL;
741 struct tree_mod_elem *found = NULL; 741 struct tree_mod_elem *found = NULL;
742 u64 index = start >> PAGE_CACHE_SHIFT;
743 742
744 tree_mod_log_read_lock(fs_info); 743 tree_mod_log_read_lock(fs_info);
745 tm_root = &fs_info->tree_mod_log; 744 tm_root = &fs_info->tree_mod_log;
746 node = tm_root->rb_node; 745 node = tm_root->rb_node;
747 while (node) { 746 while (node) {
748 cur = container_of(node, struct tree_mod_elem, node); 747 cur = container_of(node, struct tree_mod_elem, node);
749 if (cur->index < index) { 748 if (cur->logical < start) {
750 node = node->rb_left; 749 node = node->rb_left;
751 } else if (cur->index > index) { 750 } else if (cur->logical > start) {
752 node = node->rb_right; 751 node = node->rb_right;
753 } else if (cur->seq < min_seq) { 752 } else if (cur->seq < min_seq) {
754 node = node->rb_left; 753 node = node->rb_left;
@@ -1230,9 +1229,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1230 return NULL; 1229 return NULL;
1231 1230
1232 /* 1231 /*
1233 * the very last operation that's logged for a root is the replacement 1232 * the very last operation that's logged for a root is the
1234 * operation (if it is replaced at all). this has the index of the *new* 1233 * replacement operation (if it is replaced at all). this has
1235 * root, making it the very first operation that's logged for this root. 1234 * the logical address of the *new* root, making it the very
1235 * first operation that's logged for this root.
1236 */ 1236 */
1237 while (1) { 1237 while (1) {
1238 tm = tree_mod_log_search_oldest(fs_info, root_logical, 1238 tm = tree_mod_log_search_oldest(fs_info, root_logical,
@@ -1336,7 +1336,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1336 if (!next) 1336 if (!next)
1337 break; 1337 break;
1338 tm = container_of(next, struct tree_mod_elem, node); 1338 tm = container_of(next, struct tree_mod_elem, node);
1339 if (tm->index != first_tm->index) 1339 if (tm->logical != first_tm->logical)
1340 break; 1340 break;
1341 } 1341 }
1342 tree_mod_log_read_unlock(fs_info); 1342 tree_mod_log_read_unlock(fs_info);
@@ -5361,7 +5361,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
5361 goto out; 5361 goto out;
5362 } 5362 }
5363 5363
5364 tmp_buf = kmalloc(left_root->nodesize, GFP_NOFS); 5364 tmp_buf = kmalloc(left_root->nodesize, GFP_KERNEL);
5365 if (!tmp_buf) { 5365 if (!tmp_buf) {
5366 ret = -ENOMEM; 5366 ret = -ENOMEM;
5367 goto out; 5367 goto out;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index bfe4a337fb4d..b69ad1305b71 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -100,6 +100,9 @@ struct btrfs_ordered_sum;
100/* tracks free space in block groups. */ 100/* tracks free space in block groups. */
101#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL 101#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
102 102
103/* device stats in the device tree */
104#define BTRFS_DEV_STATS_OBJECTID 0ULL
105
103/* for storing balance parameters in the root tree */ 106/* for storing balance parameters in the root tree */
104#define BTRFS_BALANCE_OBJECTID -4ULL 107#define BTRFS_BALANCE_OBJECTID -4ULL
105 108
@@ -1002,8 +1005,10 @@ struct btrfs_dev_replace {
1002 pid_t lock_owner; 1005 pid_t lock_owner;
1003 atomic_t nesting_level; 1006 atomic_t nesting_level;
1004 struct mutex lock_finishing_cancel_unmount; 1007 struct mutex lock_finishing_cancel_unmount;
1005 struct mutex lock_management_lock; 1008 rwlock_t lock;
1006 struct mutex lock; 1009 atomic_t read_locks;
1010 atomic_t blocking_readers;
1011 wait_queue_head_t read_lock_wq;
1007 1012
1008 struct btrfs_scrub_progress scrub_progress; 1013 struct btrfs_scrub_progress scrub_progress;
1009}; 1014};
@@ -1822,6 +1827,9 @@ struct btrfs_fs_info {
1822 spinlock_t reada_lock; 1827 spinlock_t reada_lock;
1823 struct radix_tree_root reada_tree; 1828 struct radix_tree_root reada_tree;
1824 1829
1830 /* readahead works cnt */
1831 atomic_t reada_works_cnt;
1832
1825 /* Extent buffer radix tree */ 1833 /* Extent buffer radix tree */
1826 spinlock_t buffer_lock; 1834 spinlock_t buffer_lock;
1827 struct radix_tree_root buffer_radix; 1835 struct radix_tree_root buffer_radix;
@@ -2185,13 +2193,43 @@ struct btrfs_ioctl_defrag_range_args {
2185 */ 2193 */
2186#define BTRFS_QGROUP_RELATION_KEY 246 2194#define BTRFS_QGROUP_RELATION_KEY 246
2187 2195
2196/*
2197 * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY.
2198 */
2188#define BTRFS_BALANCE_ITEM_KEY 248 2199#define BTRFS_BALANCE_ITEM_KEY 248
2189 2200
2190/* 2201/*
2191 * Persistantly stores the io stats in the device tree. 2202 * The key type for tree items that are stored persistently, but do not need to
2192 * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid). 2203 * exist for extended period of time. The items can exist in any tree.
2204 *
2205 * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
2206 *
2207 * Existing items:
2208 *
2209 * - balance status item
2210 * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
2193 */ 2211 */
2194#define BTRFS_DEV_STATS_KEY 249 2212#define BTRFS_TEMPORARY_ITEM_KEY 248
2213
2214/*
2215 * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY
2216 */
2217#define BTRFS_DEV_STATS_KEY 249
2218
2219/*
2220 * The key type for tree items that are stored persistently and usually exist
2221 * for a long period, eg. filesystem lifetime. The item kinds can be status
2222 * information, stats or preference values. The item can exist in any tree.
2223 *
2224 * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
2225 *
2226 * Existing items:
2227 *
2228 * - device statistics, store IO stats in the device tree, one key for all
2229 * stats
2230 * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
2231 */
2232#define BTRFS_PERSISTENT_ITEM_KEY 249
2195 2233
2196/* 2234/*
2197 * Persistantly stores the device replace state in the device tree. 2235 * Persistantly stores the device replace state in the device tree.
@@ -2241,7 +2279,7 @@ struct btrfs_ioctl_defrag_range_args {
2241#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) 2279#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
2242#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) 2280#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
2243#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) 2281#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
2244#define BTRFS_MOUNT_RECOVERY (1 << 18) 2282#define BTRFS_MOUNT_USEBACKUPROOT (1 << 18)
2245#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19) 2283#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
2246#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) 2284#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
2247#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) 2285#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
@@ -2250,9 +2288,10 @@ struct btrfs_ioctl_defrag_range_args {
2250#define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24) 2288#define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24)
2251#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25) 2289#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
2252#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26) 2290#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
2291#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
2253 2292
2254#define BTRFS_DEFAULT_COMMIT_INTERVAL (30) 2293#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
2255#define BTRFS_DEFAULT_MAX_INLINE (8192) 2294#define BTRFS_DEFAULT_MAX_INLINE (2048)
2256 2295
2257#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 2296#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
2258#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 2297#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2353,6 +2392,9 @@ struct btrfs_map_token {
2353 unsigned long offset; 2392 unsigned long offset;
2354}; 2393};
2355 2394
2395#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \
2396 ((bytes) >> (fs_info)->sb->s_blocksize_bits)
2397
2356static inline void btrfs_init_map_token (struct btrfs_map_token *token) 2398static inline void btrfs_init_map_token (struct btrfs_map_token *token)
2357{ 2399{
2358 token->kaddr = NULL; 2400 token->kaddr = NULL;
@@ -3448,8 +3490,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes);
3448static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, 3490static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
3449 unsigned num_items) 3491 unsigned num_items)
3450{ 3492{
3451 return (root->nodesize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * 3493 return root->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
3452 2 * num_items;
3453} 3494}
3454 3495
3455/* 3496/*
@@ -4027,7 +4068,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
4027 struct btrfs_root *root, 4068 struct btrfs_root *root,
4028 struct inode *dir, u64 objectid, 4069 struct inode *dir, u64 objectid,
4029 const char *name, int name_len); 4070 const char *name, int name_len);
4030int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, 4071int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
4031 int front); 4072 int front);
4032int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, 4073int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4033 struct btrfs_root *root, 4074 struct btrfs_root *root,
@@ -4089,6 +4130,7 @@ void btrfs_test_inode_set_ops(struct inode *inode);
4089 4130
4090/* ioctl.c */ 4131/* ioctl.c */
4091long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 4132long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
4133int btrfs_ioctl_get_supported_features(void __user *arg);
4092void btrfs_update_iflags(struct inode *inode); 4134void btrfs_update_iflags(struct inode *inode);
4093void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); 4135void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
4094int btrfs_is_empty_uuid(u8 *uuid); 4136int btrfs_is_empty_uuid(u8 *uuid);
@@ -4151,7 +4193,8 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info);
4151ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); 4193ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
4152 4194
4153/* super.c */ 4195/* super.c */
4154int btrfs_parse_options(struct btrfs_root *root, char *options); 4196int btrfs_parse_options(struct btrfs_root *root, char *options,
4197 unsigned long new_flags);
4155int btrfs_sync_fs(struct super_block *sb, int wait); 4198int btrfs_sync_fs(struct super_block *sb, int wait);
4156 4199
4157#ifdef CONFIG_PRINTK 4200#ifdef CONFIG_PRINTK
@@ -4525,8 +4568,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
4525 struct btrfs_key *start, struct btrfs_key *end); 4568 struct btrfs_key *start, struct btrfs_key *end);
4526int btrfs_reada_wait(void *handle); 4569int btrfs_reada_wait(void *handle);
4527void btrfs_reada_detach(void *handle); 4570void btrfs_reada_detach(void *handle);
4528int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, 4571int btree_readahead_hook(struct btrfs_fs_info *fs_info,
4529 u64 start, int err); 4572 struct extent_buffer *eb, u64 start, int err);
4530 4573
4531static inline int is_fstree(u64 rootid) 4574static inline int is_fstree(u64 rootid)
4532{ 4575{
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index b57daa895cea..a20d541bb190 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -43,8 +43,7 @@ int __init btrfs_delayed_inode_init(void)
43 43
44void btrfs_delayed_inode_exit(void) 44void btrfs_delayed_inode_exit(void)
45{ 45{
46 if (delayed_node_cache) 46 kmem_cache_destroy(delayed_node_cache);
47 kmem_cache_destroy(delayed_node_cache);
48} 47}
49 48
50static inline void btrfs_init_delayed_node( 49static inline void btrfs_init_delayed_node(
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 914ac13bd92f..430b3689b112 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -929,14 +929,10 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
929 929
930void btrfs_delayed_ref_exit(void) 930void btrfs_delayed_ref_exit(void)
931{ 931{
932 if (btrfs_delayed_ref_head_cachep) 932 kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
933 kmem_cache_destroy(btrfs_delayed_ref_head_cachep); 933 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
934 if (btrfs_delayed_tree_ref_cachep) 934 kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
935 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep); 935 kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
936 if (btrfs_delayed_data_ref_cachep)
937 kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
938 if (btrfs_delayed_extent_op_cachep)
939 kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
940} 936}
941 937
942int btrfs_delayed_ref_init(void) 938int btrfs_delayed_ref_init(void)
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index cbb7dbfb3fff..ff2db7a6c894 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -202,13 +202,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
202 struct btrfs_dev_replace_item *ptr; 202 struct btrfs_dev_replace_item *ptr;
203 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; 203 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
204 204
205 btrfs_dev_replace_lock(dev_replace); 205 btrfs_dev_replace_lock(dev_replace, 0);
206 if (!dev_replace->is_valid || 206 if (!dev_replace->is_valid ||
207 !dev_replace->item_needs_writeback) { 207 !dev_replace->item_needs_writeback) {
208 btrfs_dev_replace_unlock(dev_replace); 208 btrfs_dev_replace_unlock(dev_replace, 0);
209 return 0; 209 return 0;
210 } 210 }
211 btrfs_dev_replace_unlock(dev_replace); 211 btrfs_dev_replace_unlock(dev_replace, 0);
212 212
213 key.objectid = 0; 213 key.objectid = 0;
214 key.type = BTRFS_DEV_REPLACE_KEY; 214 key.type = BTRFS_DEV_REPLACE_KEY;
@@ -264,7 +264,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
264 ptr = btrfs_item_ptr(eb, path->slots[0], 264 ptr = btrfs_item_ptr(eb, path->slots[0],
265 struct btrfs_dev_replace_item); 265 struct btrfs_dev_replace_item);
266 266
267 btrfs_dev_replace_lock(dev_replace); 267 btrfs_dev_replace_lock(dev_replace, 1);
268 if (dev_replace->srcdev) 268 if (dev_replace->srcdev)
269 btrfs_set_dev_replace_src_devid(eb, ptr, 269 btrfs_set_dev_replace_src_devid(eb, ptr,
270 dev_replace->srcdev->devid); 270 dev_replace->srcdev->devid);
@@ -287,7 +287,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
287 btrfs_set_dev_replace_cursor_right(eb, ptr, 287 btrfs_set_dev_replace_cursor_right(eb, ptr,
288 dev_replace->cursor_right); 288 dev_replace->cursor_right);
289 dev_replace->item_needs_writeback = 0; 289 dev_replace->item_needs_writeback = 0;
290 btrfs_dev_replace_unlock(dev_replace); 290 btrfs_dev_replace_unlock(dev_replace, 1);
291 291
292 btrfs_mark_buffer_dirty(eb); 292 btrfs_mark_buffer_dirty(eb);
293 293
@@ -356,7 +356,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
356 return PTR_ERR(trans); 356 return PTR_ERR(trans);
357 } 357 }
358 358
359 btrfs_dev_replace_lock(dev_replace); 359 btrfs_dev_replace_lock(dev_replace, 1);
360 switch (dev_replace->replace_state) { 360 switch (dev_replace->replace_state) {
361 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: 361 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
362 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: 362 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -395,7 +395,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
395 dev_replace->is_valid = 1; 395 dev_replace->is_valid = 1;
396 dev_replace->item_needs_writeback = 1; 396 dev_replace->item_needs_writeback = 1;
397 args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; 397 args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
398 btrfs_dev_replace_unlock(dev_replace); 398 btrfs_dev_replace_unlock(dev_replace, 1);
399 399
400 ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device); 400 ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
401 if (ret) 401 if (ret)
@@ -407,7 +407,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
407 trans = btrfs_start_transaction(root, 0); 407 trans = btrfs_start_transaction(root, 0);
408 if (IS_ERR(trans)) { 408 if (IS_ERR(trans)) {
409 ret = PTR_ERR(trans); 409 ret = PTR_ERR(trans);
410 btrfs_dev_replace_lock(dev_replace); 410 btrfs_dev_replace_lock(dev_replace, 1);
411 goto leave; 411 goto leave;
412 } 412 }
413 413
@@ -433,7 +433,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
433leave: 433leave:
434 dev_replace->srcdev = NULL; 434 dev_replace->srcdev = NULL;
435 dev_replace->tgtdev = NULL; 435 dev_replace->tgtdev = NULL;
436 btrfs_dev_replace_unlock(dev_replace); 436 btrfs_dev_replace_unlock(dev_replace, 1);
437 btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); 437 btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
438 return ret; 438 return ret;
439} 439}
@@ -471,18 +471,18 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
471 /* don't allow cancel or unmount to disturb the finishing procedure */ 471 /* don't allow cancel or unmount to disturb the finishing procedure */
472 mutex_lock(&dev_replace->lock_finishing_cancel_unmount); 472 mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
473 473
474 btrfs_dev_replace_lock(dev_replace); 474 btrfs_dev_replace_lock(dev_replace, 0);
475 /* was the operation canceled, or is it finished? */ 475 /* was the operation canceled, or is it finished? */
476 if (dev_replace->replace_state != 476 if (dev_replace->replace_state !=
477 BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) { 477 BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) {
478 btrfs_dev_replace_unlock(dev_replace); 478 btrfs_dev_replace_unlock(dev_replace, 0);
479 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); 479 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
480 return 0; 480 return 0;
481 } 481 }
482 482
483 tgt_device = dev_replace->tgtdev; 483 tgt_device = dev_replace->tgtdev;
484 src_device = dev_replace->srcdev; 484 src_device = dev_replace->srcdev;
485 btrfs_dev_replace_unlock(dev_replace); 485 btrfs_dev_replace_unlock(dev_replace, 0);
486 486
487 /* 487 /*
488 * flush all outstanding I/O and inode extent mappings before the 488 * flush all outstanding I/O and inode extent mappings before the
@@ -507,7 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
507 /* keep away write_all_supers() during the finishing procedure */ 507 /* keep away write_all_supers() during the finishing procedure */
508 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 508 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
509 mutex_lock(&root->fs_info->chunk_mutex); 509 mutex_lock(&root->fs_info->chunk_mutex);
510 btrfs_dev_replace_lock(dev_replace); 510 btrfs_dev_replace_lock(dev_replace, 1);
511 dev_replace->replace_state = 511 dev_replace->replace_state =
512 scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 512 scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
513 : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; 513 : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
@@ -528,7 +528,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
528 rcu_str_deref(src_device->name), 528 rcu_str_deref(src_device->name),
529 src_device->devid, 529 src_device->devid,
530 rcu_str_deref(tgt_device->name), scrub_ret); 530 rcu_str_deref(tgt_device->name), scrub_ret);
531 btrfs_dev_replace_unlock(dev_replace); 531 btrfs_dev_replace_unlock(dev_replace, 1);
532 mutex_unlock(&root->fs_info->chunk_mutex); 532 mutex_unlock(&root->fs_info->chunk_mutex);
533 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 533 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
534 mutex_unlock(&uuid_mutex); 534 mutex_unlock(&uuid_mutex);
@@ -565,7 +565,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
565 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); 565 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
566 fs_info->fs_devices->rw_devices++; 566 fs_info->fs_devices->rw_devices++;
567 567
568 btrfs_dev_replace_unlock(dev_replace); 568 btrfs_dev_replace_unlock(dev_replace, 1);
569 569
570 btrfs_rm_dev_replace_blocked(fs_info); 570 btrfs_rm_dev_replace_blocked(fs_info);
571 571
@@ -649,7 +649,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
649 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; 649 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
650 struct btrfs_device *srcdev; 650 struct btrfs_device *srcdev;
651 651
652 btrfs_dev_replace_lock(dev_replace); 652 btrfs_dev_replace_lock(dev_replace, 0);
653 /* even if !dev_replace_is_valid, the values are good enough for 653 /* even if !dev_replace_is_valid, the values are good enough for
654 * the replace_status ioctl */ 654 * the replace_status ioctl */
655 args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; 655 args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
@@ -675,7 +675,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
675 div_u64(btrfs_device_get_total_bytes(srcdev), 1000)); 675 div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
676 break; 676 break;
677 } 677 }
678 btrfs_dev_replace_unlock(dev_replace); 678 btrfs_dev_replace_unlock(dev_replace, 0);
679} 679}
680 680
681int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, 681int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
@@ -698,13 +698,13 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
698 return -EROFS; 698 return -EROFS;
699 699
700 mutex_lock(&dev_replace->lock_finishing_cancel_unmount); 700 mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
701 btrfs_dev_replace_lock(dev_replace); 701 btrfs_dev_replace_lock(dev_replace, 1);
702 switch (dev_replace->replace_state) { 702 switch (dev_replace->replace_state) {
703 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: 703 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
704 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: 704 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
705 case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: 705 case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
706 result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED; 706 result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED;
707 btrfs_dev_replace_unlock(dev_replace); 707 btrfs_dev_replace_unlock(dev_replace, 1);
708 goto leave; 708 goto leave;
709 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: 709 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
710 case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: 710 case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
@@ -717,7 +717,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
717 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; 717 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
718 dev_replace->time_stopped = get_seconds(); 718 dev_replace->time_stopped = get_seconds();
719 dev_replace->item_needs_writeback = 1; 719 dev_replace->item_needs_writeback = 1;
720 btrfs_dev_replace_unlock(dev_replace); 720 btrfs_dev_replace_unlock(dev_replace, 1);
721 btrfs_scrub_cancel(fs_info); 721 btrfs_scrub_cancel(fs_info);
722 722
723 trans = btrfs_start_transaction(root, 0); 723 trans = btrfs_start_transaction(root, 0);
@@ -740,7 +740,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
740 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; 740 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
741 741
742 mutex_lock(&dev_replace->lock_finishing_cancel_unmount); 742 mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
743 btrfs_dev_replace_lock(dev_replace); 743 btrfs_dev_replace_lock(dev_replace, 1);
744 switch (dev_replace->replace_state) { 744 switch (dev_replace->replace_state) {
745 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: 745 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
746 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: 746 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -756,7 +756,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
756 break; 756 break;
757 } 757 }
758 758
759 btrfs_dev_replace_unlock(dev_replace); 759 btrfs_dev_replace_unlock(dev_replace, 1);
760 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); 760 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
761} 761}
762 762
@@ -766,12 +766,12 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
766 struct task_struct *task; 766 struct task_struct *task;
767 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; 767 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
768 768
769 btrfs_dev_replace_lock(dev_replace); 769 btrfs_dev_replace_lock(dev_replace, 1);
770 switch (dev_replace->replace_state) { 770 switch (dev_replace->replace_state) {
771 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: 771 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
772 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: 772 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
773 case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: 773 case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
774 btrfs_dev_replace_unlock(dev_replace); 774 btrfs_dev_replace_unlock(dev_replace, 1);
775 return 0; 775 return 0;
776 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: 776 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
777 break; 777 break;
@@ -784,10 +784,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
784 btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing"); 784 btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing");
785 btrfs_info(fs_info, 785 btrfs_info(fs_info,
786 "you may cancel the operation after 'mount -o degraded'"); 786 "you may cancel the operation after 'mount -o degraded'");
787 btrfs_dev_replace_unlock(dev_replace); 787 btrfs_dev_replace_unlock(dev_replace, 1);
788 return 0; 788 return 0;
789 } 789 }
790 btrfs_dev_replace_unlock(dev_replace); 790 btrfs_dev_replace_unlock(dev_replace, 1);
791 791
792 WARN_ON(atomic_xchg( 792 WARN_ON(atomic_xchg(
793 &fs_info->mutually_exclusive_operation_running, 1)); 793 &fs_info->mutually_exclusive_operation_running, 1));
@@ -802,7 +802,7 @@ static int btrfs_dev_replace_kthread(void *data)
802 struct btrfs_ioctl_dev_replace_args *status_args; 802 struct btrfs_ioctl_dev_replace_args *status_args;
803 u64 progress; 803 u64 progress;
804 804
805 status_args = kzalloc(sizeof(*status_args), GFP_NOFS); 805 status_args = kzalloc(sizeof(*status_args), GFP_KERNEL);
806 if (status_args) { 806 if (status_args) {
807 btrfs_dev_replace_status(fs_info, status_args); 807 btrfs_dev_replace_status(fs_info, status_args);
808 progress = status_args->status.progress_1000; 808 progress = status_args->status.progress_1000;
@@ -865,48 +865,58 @@ int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
865 return 1; 865 return 1;
866} 866}
867 867
868void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace) 868void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw)
869{ 869{
870 /* the beginning is just an optimization for the typical case */ 870 if (rw == 1) {
871 if (atomic_read(&dev_replace->nesting_level) == 0) { 871 /* write */
872acquire_lock: 872again:
873 /* this is not a nested case where the same thread 873 wait_event(dev_replace->read_lock_wq,
874 * is trying to acqurire the same lock twice */ 874 atomic_read(&dev_replace->blocking_readers) == 0);
875 mutex_lock(&dev_replace->lock); 875 write_lock(&dev_replace->lock);
876 mutex_lock(&dev_replace->lock_management_lock); 876 if (atomic_read(&dev_replace->blocking_readers)) {
877 dev_replace->lock_owner = current->pid; 877 write_unlock(&dev_replace->lock);
878 atomic_inc(&dev_replace->nesting_level); 878 goto again;
879 mutex_unlock(&dev_replace->lock_management_lock); 879 }
880 return; 880 } else {
881 read_lock(&dev_replace->lock);
882 atomic_inc(&dev_replace->read_locks);
881 } 883 }
884}
882 885
883 mutex_lock(&dev_replace->lock_management_lock); 886void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw)
884 if (atomic_read(&dev_replace->nesting_level) > 0 && 887{
885 dev_replace->lock_owner == current->pid) { 888 if (rw == 1) {
886 WARN_ON(!mutex_is_locked(&dev_replace->lock)); 889 /* write */
887 atomic_inc(&dev_replace->nesting_level); 890 ASSERT(atomic_read(&dev_replace->blocking_readers) == 0);
888 mutex_unlock(&dev_replace->lock_management_lock); 891 write_unlock(&dev_replace->lock);
889 return; 892 } else {
893 ASSERT(atomic_read(&dev_replace->read_locks) > 0);
894 atomic_dec(&dev_replace->read_locks);
895 read_unlock(&dev_replace->lock);
890 } 896 }
897}
891 898
892 mutex_unlock(&dev_replace->lock_management_lock); 899/* inc blocking cnt and release read lock */
893 goto acquire_lock; 900void btrfs_dev_replace_set_lock_blocking(
901 struct btrfs_dev_replace *dev_replace)
902{
903 /* only set blocking for read lock */
904 ASSERT(atomic_read(&dev_replace->read_locks) > 0);
905 atomic_inc(&dev_replace->blocking_readers);
906 read_unlock(&dev_replace->lock);
894} 907}
895 908
896void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace) 909/* acquire read lock and dec blocking cnt */
910void btrfs_dev_replace_clear_lock_blocking(
911 struct btrfs_dev_replace *dev_replace)
897{ 912{
898 WARN_ON(!mutex_is_locked(&dev_replace->lock)); 913 /* only set blocking for read lock */
899 mutex_lock(&dev_replace->lock_management_lock); 914 ASSERT(atomic_read(&dev_replace->read_locks) > 0);
900 WARN_ON(atomic_read(&dev_replace->nesting_level) < 1); 915 ASSERT(atomic_read(&dev_replace->blocking_readers) > 0);
901 WARN_ON(dev_replace->lock_owner != current->pid); 916 read_lock(&dev_replace->lock);
902 atomic_dec(&dev_replace->nesting_level); 917 if (atomic_dec_and_test(&dev_replace->blocking_readers) &&
903 if (atomic_read(&dev_replace->nesting_level) == 0) { 918 waitqueue_active(&dev_replace->read_lock_wq))
904 dev_replace->lock_owner = 0; 919 wake_up(&dev_replace->read_lock_wq);
905 mutex_unlock(&dev_replace->lock_management_lock);
906 mutex_unlock(&dev_replace->lock);
907 } else {
908 mutex_unlock(&dev_replace->lock_management_lock);
909 }
910} 920}
911 921
912void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) 922void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
index 20035cbbf021..29e3ef5f96bd 100644
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -34,8 +34,11 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
34void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info); 34void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
35int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info); 35int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
36int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace); 36int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
37void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace); 37void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw);
38void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace); 38void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw);
39void btrfs_dev_replace_set_lock_blocking(struct btrfs_dev_replace *dev_replace);
40void btrfs_dev_replace_clear_lock_blocking(
41 struct btrfs_dev_replace *dev_replace);
39 42
40static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value) 43static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value)
41{ 44{
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4545e2e2ad45..a998ef15ec6d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -110,8 +110,7 @@ int __init btrfs_end_io_wq_init(void)
110 110
111void btrfs_end_io_wq_exit(void) 111void btrfs_end_io_wq_exit(void)
112{ 112{
113 if (btrfs_end_io_wq_cache) 113 kmem_cache_destroy(btrfs_end_io_wq_cache);
114 kmem_cache_destroy(btrfs_end_io_wq_cache);
115} 114}
116 115
117/* 116/*
@@ -612,6 +611,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
612 int found_level; 611 int found_level;
613 struct extent_buffer *eb; 612 struct extent_buffer *eb;
614 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 613 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
614 struct btrfs_fs_info *fs_info = root->fs_info;
615 int ret = 0; 615 int ret = 0;
616 int reads_done; 616 int reads_done;
617 617
@@ -637,21 +637,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
637 637
638 found_start = btrfs_header_bytenr(eb); 638 found_start = btrfs_header_bytenr(eb);
639 if (found_start != eb->start) { 639 if (found_start != eb->start) {
640 btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu", 640 btrfs_err_rl(fs_info, "bad tree block start %llu %llu",
641 found_start, eb->start); 641 found_start, eb->start);
642 ret = -EIO; 642 ret = -EIO;
643 goto err; 643 goto err;
644 } 644 }
645 if (check_tree_block_fsid(root->fs_info, eb)) { 645 if (check_tree_block_fsid(fs_info, eb)) {
646 btrfs_err_rl(eb->fs_info, "bad fsid on block %llu", 646 btrfs_err_rl(fs_info, "bad fsid on block %llu",
647 eb->start); 647 eb->start);
648 ret = -EIO; 648 ret = -EIO;
649 goto err; 649 goto err;
650 } 650 }
651 found_level = btrfs_header_level(eb); 651 found_level = btrfs_header_level(eb);
652 if (found_level >= BTRFS_MAX_LEVEL) { 652 if (found_level >= BTRFS_MAX_LEVEL) {
653 btrfs_err(root->fs_info, "bad tree block level %d", 653 btrfs_err(fs_info, "bad tree block level %d",
654 (int)btrfs_header_level(eb)); 654 (int)btrfs_header_level(eb));
655 ret = -EIO; 655 ret = -EIO;
656 goto err; 656 goto err;
657 } 657 }
@@ -659,7 +659,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
659 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), 659 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
660 eb, found_level); 660 eb, found_level);
661 661
662 ret = csum_tree_block(root->fs_info, eb, 1); 662 ret = csum_tree_block(fs_info, eb, 1);
663 if (ret) { 663 if (ret) {
664 ret = -EIO; 664 ret = -EIO;
665 goto err; 665 goto err;
@@ -680,7 +680,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
680err: 680err:
681 if (reads_done && 681 if (reads_done &&
682 test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) 682 test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
683 btree_readahead_hook(root, eb, eb->start, ret); 683 btree_readahead_hook(fs_info, eb, eb->start, ret);
684 684
685 if (ret) { 685 if (ret) {
686 /* 686 /*
@@ -699,14 +699,13 @@ out:
699static int btree_io_failed_hook(struct page *page, int failed_mirror) 699static int btree_io_failed_hook(struct page *page, int failed_mirror)
700{ 700{
701 struct extent_buffer *eb; 701 struct extent_buffer *eb;
702 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
703 702
704 eb = (struct extent_buffer *)page->private; 703 eb = (struct extent_buffer *)page->private;
705 set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); 704 set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
706 eb->read_mirror = failed_mirror; 705 eb->read_mirror = failed_mirror;
707 atomic_dec(&eb->io_pages); 706 atomic_dec(&eb->io_pages);
708 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) 707 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
709 btree_readahead_hook(root, eb, eb->start, -EIO); 708 btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO);
710 return -EIO; /* we fixed nothing */ 709 return -EIO; /* we fixed nothing */
711} 710}
712 711
@@ -1296,9 +1295,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
1296 spin_lock_init(&root->root_item_lock); 1295 spin_lock_init(&root->root_item_lock);
1297} 1296}
1298 1297
1299static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) 1298static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
1299 gfp_t flags)
1300{ 1300{
1301 struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); 1301 struct btrfs_root *root = kzalloc(sizeof(*root), flags);
1302 if (root) 1302 if (root)
1303 root->fs_info = fs_info; 1303 root->fs_info = fs_info;
1304 return root; 1304 return root;
@@ -1310,7 +1310,7 @@ struct btrfs_root *btrfs_alloc_dummy_root(void)
1310{ 1310{
1311 struct btrfs_root *root; 1311 struct btrfs_root *root;
1312 1312
1313 root = btrfs_alloc_root(NULL); 1313 root = btrfs_alloc_root(NULL, GFP_KERNEL);
1314 if (!root) 1314 if (!root)
1315 return ERR_PTR(-ENOMEM); 1315 return ERR_PTR(-ENOMEM);
1316 __setup_root(4096, 4096, 4096, root, NULL, 1); 1316 __setup_root(4096, 4096, 4096, root, NULL, 1);
@@ -1332,7 +1332,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1332 int ret = 0; 1332 int ret = 0;
1333 uuid_le uuid; 1333 uuid_le uuid;
1334 1334
1335 root = btrfs_alloc_root(fs_info); 1335 root = btrfs_alloc_root(fs_info, GFP_KERNEL);
1336 if (!root) 1336 if (!root)
1337 return ERR_PTR(-ENOMEM); 1337 return ERR_PTR(-ENOMEM);
1338 1338
@@ -1408,7 +1408,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
1408 struct btrfs_root *tree_root = fs_info->tree_root; 1408 struct btrfs_root *tree_root = fs_info->tree_root;
1409 struct extent_buffer *leaf; 1409 struct extent_buffer *leaf;
1410 1410
1411 root = btrfs_alloc_root(fs_info); 1411 root = btrfs_alloc_root(fs_info, GFP_NOFS);
1412 if (!root) 1412 if (!root)
1413 return ERR_PTR(-ENOMEM); 1413 return ERR_PTR(-ENOMEM);
1414 1414
@@ -1506,7 +1506,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
1506 if (!path) 1506 if (!path)
1507 return ERR_PTR(-ENOMEM); 1507 return ERR_PTR(-ENOMEM);
1508 1508
1509 root = btrfs_alloc_root(fs_info); 1509 root = btrfs_alloc_root(fs_info, GFP_NOFS);
1510 if (!root) { 1510 if (!root) {
1511 ret = -ENOMEM; 1511 ret = -ENOMEM;
1512 goto alloc_fail; 1512 goto alloc_fail;
@@ -2272,9 +2272,11 @@ static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
2272 fs_info->dev_replace.lock_owner = 0; 2272 fs_info->dev_replace.lock_owner = 0;
2273 atomic_set(&fs_info->dev_replace.nesting_level, 0); 2273 atomic_set(&fs_info->dev_replace.nesting_level, 0);
2274 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); 2274 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
2275 mutex_init(&fs_info->dev_replace.lock_management_lock); 2275 rwlock_init(&fs_info->dev_replace.lock);
2276 mutex_init(&fs_info->dev_replace.lock); 2276 atomic_set(&fs_info->dev_replace.read_locks, 0);
2277 atomic_set(&fs_info->dev_replace.blocking_readers, 0);
2277 init_waitqueue_head(&fs_info->replace_wait); 2278 init_waitqueue_head(&fs_info->replace_wait);
2279 init_waitqueue_head(&fs_info->dev_replace.read_lock_wq);
2278} 2280}
2279 2281
2280static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) 2282static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
@@ -2385,7 +2387,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
2385 return -EIO; 2387 return -EIO;
2386 } 2388 }
2387 2389
2388 log_tree_root = btrfs_alloc_root(fs_info); 2390 log_tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
2389 if (!log_tree_root) 2391 if (!log_tree_root)
2390 return -ENOMEM; 2392 return -ENOMEM;
2391 2393
@@ -2510,8 +2512,8 @@ int open_ctree(struct super_block *sb,
2510 int backup_index = 0; 2512 int backup_index = 0;
2511 int max_active; 2513 int max_active;
2512 2514
2513 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); 2515 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
2514 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); 2516 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
2515 if (!tree_root || !chunk_root) { 2517 if (!tree_root || !chunk_root) {
2516 err = -ENOMEM; 2518 err = -ENOMEM;
2517 goto fail; 2519 goto fail;
@@ -2603,6 +2605,7 @@ int open_ctree(struct super_block *sb,
2603 atomic_set(&fs_info->nr_async_bios, 0); 2605 atomic_set(&fs_info->nr_async_bios, 0);
2604 atomic_set(&fs_info->defrag_running, 0); 2606 atomic_set(&fs_info->defrag_running, 0);
2605 atomic_set(&fs_info->qgroup_op_seq, 0); 2607 atomic_set(&fs_info->qgroup_op_seq, 0);
2608 atomic_set(&fs_info->reada_works_cnt, 0);
2606 atomic64_set(&fs_info->tree_mod_seq, 0); 2609 atomic64_set(&fs_info->tree_mod_seq, 0);
2607 fs_info->sb = sb; 2610 fs_info->sb = sb;
2608 fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; 2611 fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
@@ -2622,7 +2625,7 @@ int open_ctree(struct super_block *sb,
2622 INIT_LIST_HEAD(&fs_info->ordered_roots); 2625 INIT_LIST_HEAD(&fs_info->ordered_roots);
2623 spin_lock_init(&fs_info->ordered_root_lock); 2626 spin_lock_init(&fs_info->ordered_root_lock);
2624 fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), 2627 fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
2625 GFP_NOFS); 2628 GFP_KERNEL);
2626 if (!fs_info->delayed_root) { 2629 if (!fs_info->delayed_root) {
2627 err = -ENOMEM; 2630 err = -ENOMEM;
2628 goto fail_iput; 2631 goto fail_iput;
@@ -2750,7 +2753,7 @@ int open_ctree(struct super_block *sb,
2750 */ 2753 */
2751 fs_info->compress_type = BTRFS_COMPRESS_ZLIB; 2754 fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
2752 2755
2753 ret = btrfs_parse_options(tree_root, options); 2756 ret = btrfs_parse_options(tree_root, options, sb->s_flags);
2754 if (ret) { 2757 if (ret) {
2755 err = ret; 2758 err = ret;
2756 goto fail_alloc; 2759 goto fail_alloc;
@@ -3029,8 +3032,9 @@ retry_root_backup:
3029 if (ret) 3032 if (ret)
3030 goto fail_trans_kthread; 3033 goto fail_trans_kthread;
3031 3034
3032 /* do not make disk changes in broken FS */ 3035 /* do not make disk changes in broken FS or nologreplay is given */
3033 if (btrfs_super_log_root(disk_super) != 0) { 3036 if (btrfs_super_log_root(disk_super) != 0 &&
3037 !btrfs_test_opt(tree_root, NOLOGREPLAY)) {
3034 ret = btrfs_replay_log(fs_info, fs_devices); 3038 ret = btrfs_replay_log(fs_info, fs_devices);
3035 if (ret) { 3039 if (ret) {
3036 err = ret; 3040 err = ret;
@@ -3146,6 +3150,12 @@ retry_root_backup:
3146 3150
3147 fs_info->open = 1; 3151 fs_info->open = 1;
3148 3152
3153 /*
3154 * backuproot only affect mount behavior, and if open_ctree succeeded,
3155 * no need to keep the flag
3156 */
3157 btrfs_clear_opt(fs_info->mount_opt, USEBACKUPROOT);
3158
3149 return 0; 3159 return 0;
3150 3160
3151fail_qgroup: 3161fail_qgroup:
@@ -3200,7 +3210,7 @@ fail:
3200 return err; 3210 return err;
3201 3211
3202recovery_tree_root: 3212recovery_tree_root:
3203 if (!btrfs_test_opt(tree_root, RECOVERY)) 3213 if (!btrfs_test_opt(tree_root, USEBACKUPROOT))
3204 goto fail_tree_roots; 3214 goto fail_tree_roots;
3205 3215
3206 free_root_pointers(fs_info, 0); 3216 free_root_pointers(fs_info, 0);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e2287c7c10be..083783b53536 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4838,7 +4838,7 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
4838 u64 thresh = div_factor_fine(space_info->total_bytes, 98); 4838 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
4839 4839
4840 /* If we're just plain full then async reclaim just slows us down. */ 4840 /* If we're just plain full then async reclaim just slows us down. */
4841 if (space_info->bytes_used >= thresh) 4841 if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
4842 return 0; 4842 return 0;
4843 4843
4844 return (used >= thresh && !btrfs_fs_closing(fs_info) && 4844 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
@@ -5373,27 +5373,33 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
5373 5373
5374 block_rsv->size = min_t(u64, num_bytes, SZ_512M); 5374 block_rsv->size = min_t(u64, num_bytes, SZ_512M);
5375 5375
5376 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 5376 if (block_rsv->reserved < block_rsv->size) {
5377 sinfo->bytes_reserved + sinfo->bytes_readonly + 5377 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
5378 sinfo->bytes_may_use; 5378 sinfo->bytes_reserved + sinfo->bytes_readonly +
5379 5379 sinfo->bytes_may_use;
5380 if (sinfo->total_bytes > num_bytes) { 5380 if (sinfo->total_bytes > num_bytes) {
5381 num_bytes = sinfo->total_bytes - num_bytes; 5381 num_bytes = sinfo->total_bytes - num_bytes;
5382 block_rsv->reserved += num_bytes; 5382 num_bytes = min(num_bytes,
5383 sinfo->bytes_may_use += num_bytes; 5383 block_rsv->size - block_rsv->reserved);
5384 trace_btrfs_space_reservation(fs_info, "space_info", 5384 block_rsv->reserved += num_bytes;
5385 sinfo->flags, num_bytes, 1); 5385 sinfo->bytes_may_use += num_bytes;
5386 } 5386 trace_btrfs_space_reservation(fs_info, "space_info",
5387 5387 sinfo->flags, num_bytes,
5388 if (block_rsv->reserved >= block_rsv->size) { 5388 1);
5389 }
5390 } else if (block_rsv->reserved > block_rsv->size) {
5389 num_bytes = block_rsv->reserved - block_rsv->size; 5391 num_bytes = block_rsv->reserved - block_rsv->size;
5390 sinfo->bytes_may_use -= num_bytes; 5392 sinfo->bytes_may_use -= num_bytes;
5391 trace_btrfs_space_reservation(fs_info, "space_info", 5393 trace_btrfs_space_reservation(fs_info, "space_info",
5392 sinfo->flags, num_bytes, 0); 5394 sinfo->flags, num_bytes, 0);
5393 block_rsv->reserved = block_rsv->size; 5395 block_rsv->reserved = block_rsv->size;
5394 block_rsv->full = 1;
5395 } 5396 }
5396 5397
5398 if (block_rsv->reserved == block_rsv->size)
5399 block_rsv->full = 1;
5400 else
5401 block_rsv->full = 0;
5402
5397 spin_unlock(&block_rsv->lock); 5403 spin_unlock(&block_rsv->lock);
5398 spin_unlock(&sinfo->lock); 5404 spin_unlock(&sinfo->lock);
5399} 5405}
@@ -7018,7 +7024,7 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
7018 struct btrfs_free_cluster *cluster, 7024 struct btrfs_free_cluster *cluster,
7019 int delalloc) 7025 int delalloc)
7020{ 7026{
7021 struct btrfs_block_group_cache *used_bg; 7027 struct btrfs_block_group_cache *used_bg = NULL;
7022 bool locked = false; 7028 bool locked = false;
7023again: 7029again:
7024 spin_lock(&cluster->refill_lock); 7030 spin_lock(&cluster->refill_lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 392592dc7010..76a0c8597d98 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -206,10 +206,8 @@ void extent_io_exit(void)
206 * destroy caches. 206 * destroy caches.
207 */ 207 */
208 rcu_barrier(); 208 rcu_barrier();
209 if (extent_state_cache) 209 kmem_cache_destroy(extent_state_cache);
210 kmem_cache_destroy(extent_state_cache); 210 kmem_cache_destroy(extent_buffer_cache);
211 if (extent_buffer_cache)
212 kmem_cache_destroy(extent_buffer_cache);
213 if (btrfs_bioset) 211 if (btrfs_bioset)
214 bioset_free(btrfs_bioset); 212 bioset_free(btrfs_bioset);
215} 213}
@@ -232,7 +230,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
232 if (!state) 230 if (!state)
233 return state; 231 return state;
234 state->state = 0; 232 state->state = 0;
235 state->private = 0; 233 state->failrec = NULL;
236 RB_CLEAR_NODE(&state->rb_node); 234 RB_CLEAR_NODE(&state->rb_node);
237 btrfs_leak_debug_add(&state->leak_list, &states); 235 btrfs_leak_debug_add(&state->leak_list, &states);
238 atomic_set(&state->refs, 1); 236 atomic_set(&state->refs, 1);
@@ -1844,7 +1842,8 @@ out:
1844 * set the private field for a given byte offset in the tree. If there isn't 1842 * set the private field for a given byte offset in the tree. If there isn't
1845 * an extent_state there already, this does nothing. 1843 * an extent_state there already, this does nothing.
1846 */ 1844 */
1847static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) 1845static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
1846 struct io_failure_record *failrec)
1848{ 1847{
1849 struct rb_node *node; 1848 struct rb_node *node;
1850 struct extent_state *state; 1849 struct extent_state *state;
@@ -1865,13 +1864,14 @@ static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private
1865 ret = -ENOENT; 1864 ret = -ENOENT;
1866 goto out; 1865 goto out;
1867 } 1866 }
1868 state->private = private; 1867 state->failrec = failrec;
1869out: 1868out:
1870 spin_unlock(&tree->lock); 1869 spin_unlock(&tree->lock);
1871 return ret; 1870 return ret;
1872} 1871}
1873 1872
1874int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) 1873static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start,
1874 struct io_failure_record **failrec)
1875{ 1875{
1876 struct rb_node *node; 1876 struct rb_node *node;
1877 struct extent_state *state; 1877 struct extent_state *state;
@@ -1892,7 +1892,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1892 ret = -ENOENT; 1892 ret = -ENOENT;
1893 goto out; 1893 goto out;
1894 } 1894 }
1895 *private = state->private; 1895 *failrec = state->failrec;
1896out: 1896out:
1897 spin_unlock(&tree->lock); 1897 spin_unlock(&tree->lock);
1898 return ret; 1898 return ret;
@@ -1972,7 +1972,7 @@ int free_io_failure(struct inode *inode, struct io_failure_record *rec)
1972 int err = 0; 1972 int err = 0;
1973 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; 1973 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
1974 1974
1975 set_state_private(failure_tree, rec->start, 0); 1975 set_state_failrec(failure_tree, rec->start, NULL);
1976 ret = clear_extent_bits(failure_tree, rec->start, 1976 ret = clear_extent_bits(failure_tree, rec->start,
1977 rec->start + rec->len - 1, 1977 rec->start + rec->len - 1,
1978 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); 1978 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
@@ -2089,7 +2089,6 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
2089 unsigned int pg_offset) 2089 unsigned int pg_offset)
2090{ 2090{
2091 u64 private; 2091 u64 private;
2092 u64 private_failure;
2093 struct io_failure_record *failrec; 2092 struct io_failure_record *failrec;
2094 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; 2093 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2095 struct extent_state *state; 2094 struct extent_state *state;
@@ -2102,12 +2101,11 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
2102 if (!ret) 2101 if (!ret)
2103 return 0; 2102 return 0;
2104 2103
2105 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start, 2104 ret = get_state_failrec(&BTRFS_I(inode)->io_failure_tree, start,
2106 &private_failure); 2105 &failrec);
2107 if (ret) 2106 if (ret)
2108 return 0; 2107 return 0;
2109 2108
2110 failrec = (struct io_failure_record *)(unsigned long) private_failure;
2111 BUG_ON(!failrec->this_mirror); 2109 BUG_ON(!failrec->this_mirror);
2112 2110
2113 if (failrec->in_validation) { 2111 if (failrec->in_validation) {
@@ -2167,7 +2165,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
2167 2165
2168 next = next_state(state); 2166 next = next_state(state);
2169 2167
2170 failrec = (struct io_failure_record *)(unsigned long)state->private; 2168 failrec = state->failrec;
2171 free_extent_state(state); 2169 free_extent_state(state);
2172 kfree(failrec); 2170 kfree(failrec);
2173 2171
@@ -2177,10 +2175,9 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
2177} 2175}
2178 2176
2179int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, 2177int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2180 struct io_failure_record **failrec_ret) 2178 struct io_failure_record **failrec_ret)
2181{ 2179{
2182 struct io_failure_record *failrec; 2180 struct io_failure_record *failrec;
2183 u64 private;
2184 struct extent_map *em; 2181 struct extent_map *em;
2185 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; 2182 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2186 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2183 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
@@ -2188,7 +2185,7 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2188 int ret; 2185 int ret;
2189 u64 logical; 2186 u64 logical;
2190 2187
2191 ret = get_state_private(failure_tree, start, &private); 2188 ret = get_state_failrec(failure_tree, start, &failrec);
2192 if (ret) { 2189 if (ret) {
2193 failrec = kzalloc(sizeof(*failrec), GFP_NOFS); 2190 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2194 if (!failrec) 2191 if (!failrec)
@@ -2237,8 +2234,7 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2237 ret = set_extent_bits(failure_tree, start, end, 2234 ret = set_extent_bits(failure_tree, start, end,
2238 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); 2235 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
2239 if (ret >= 0) 2236 if (ret >= 0)
2240 ret = set_state_private(failure_tree, start, 2237 ret = set_state_failrec(failure_tree, start, failrec);
2241 (u64)(unsigned long)failrec);
2242 /* set the bits in the inode's tree */ 2238 /* set the bits in the inode's tree */
2243 if (ret >= 0) 2239 if (ret >= 0)
2244 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED, 2240 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED,
@@ -2248,7 +2244,6 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2248 return ret; 2244 return ret;
2249 } 2245 }
2250 } else { 2246 } else {
2251 failrec = (struct io_failure_record *)(unsigned long)private;
2252 pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n", 2247 pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n",
2253 failrec->logical, failrec->start, failrec->len, 2248 failrec->logical, failrec->start, failrec->len,
2254 failrec->in_validation); 2249 failrec->in_validation);
@@ -3177,7 +3172,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
3177 3172
3178 while (1) { 3173 while (1) {
3179 lock_extent(tree, start, end); 3174 lock_extent(tree, start, end);
3180 ordered = btrfs_lookup_ordered_extent(inode, start); 3175 ordered = btrfs_lookup_ordered_range(inode, start,
3176 PAGE_CACHE_SIZE);
3181 if (!ordered) 3177 if (!ordered)
3182 break; 3178 break;
3183 unlock_extent(tree, start, end); 3179 unlock_extent(tree, start, end);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 880d5292e972..5dbf92e68fbd 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -61,6 +61,7 @@
61struct extent_state; 61struct extent_state;
62struct btrfs_root; 62struct btrfs_root;
63struct btrfs_io_bio; 63struct btrfs_io_bio;
64struct io_failure_record;
64 65
65typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, 66typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
66 struct bio *bio, int mirror_num, 67 struct bio *bio, int mirror_num,
@@ -111,8 +112,7 @@ struct extent_state {
111 atomic_t refs; 112 atomic_t refs;
112 unsigned state; 113 unsigned state;
113 114
114 /* for use by the FS */ 115 struct io_failure_record *failrec;
115 u64 private;
116 116
117#ifdef CONFIG_BTRFS_DEBUG 117#ifdef CONFIG_BTRFS_DEBUG
118 struct list_head leak_list; 118 struct list_head leak_list;
@@ -342,7 +342,6 @@ int extent_readpages(struct extent_io_tree *tree,
342 get_extent_t get_extent); 342 get_extent_t get_extent);
343int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 343int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
344 __u64 start, __u64 len, get_extent_t *get_extent); 344 __u64 start, __u64 len, get_extent_t *get_extent);
345int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
346void set_page_extent_mapped(struct page *page); 345void set_page_extent_mapped(struct page *page);
347 346
348struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, 347struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 84fb56d5c018..cdbadeaef202 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -20,8 +20,7 @@ int __init extent_map_init(void)
20 20
21void extent_map_exit(void) 21void extent_map_exit(void)
22{ 22{
23 if (extent_map_cache) 23 kmem_cache_destroy(extent_map_cache);
24 kmem_cache_destroy(extent_map_cache);
25} 24}
26 25
27/** 26/**
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a67e1c828d0f..1c50a7b09b4e 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -172,6 +172,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
172 u64 item_start_offset = 0; 172 u64 item_start_offset = 0;
173 u64 item_last_offset = 0; 173 u64 item_last_offset = 0;
174 u64 disk_bytenr; 174 u64 disk_bytenr;
175 u64 page_bytes_left;
175 u32 diff; 176 u32 diff;
176 int nblocks; 177 int nblocks;
177 int bio_index = 0; 178 int bio_index = 0;
@@ -220,6 +221,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
220 disk_bytenr = (u64)bio->bi_iter.bi_sector << 9; 221 disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
221 if (dio) 222 if (dio)
222 offset = logical_offset; 223 offset = logical_offset;
224
225 page_bytes_left = bvec->bv_len;
223 while (bio_index < bio->bi_vcnt) { 226 while (bio_index < bio->bi_vcnt) {
224 if (!dio) 227 if (!dio)
225 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 228 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
@@ -243,7 +246,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
243 if (BTRFS_I(inode)->root->root_key.objectid == 246 if (BTRFS_I(inode)->root->root_key.objectid ==
244 BTRFS_DATA_RELOC_TREE_OBJECTID) { 247 BTRFS_DATA_RELOC_TREE_OBJECTID) {
245 set_extent_bits(io_tree, offset, 248 set_extent_bits(io_tree, offset,
246 offset + bvec->bv_len - 1, 249 offset + root->sectorsize - 1,
247 EXTENT_NODATASUM, GFP_NOFS); 250 EXTENT_NODATASUM, GFP_NOFS);
248 } else { 251 } else {
249 btrfs_info(BTRFS_I(inode)->root->fs_info, 252 btrfs_info(BTRFS_I(inode)->root->fs_info,
@@ -281,11 +284,17 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
281found: 284found:
282 csum += count * csum_size; 285 csum += count * csum_size;
283 nblocks -= count; 286 nblocks -= count;
284 bio_index += count; 287
285 while (count--) { 288 while (count--) {
286 disk_bytenr += bvec->bv_len; 289 disk_bytenr += root->sectorsize;
287 offset += bvec->bv_len; 290 offset += root->sectorsize;
288 bvec++; 291 page_bytes_left -= root->sectorsize;
292 if (!page_bytes_left) {
293 bio_index++;
294 bvec++;
295 page_bytes_left = bvec->bv_len;
296 }
297
289 } 298 }
290 } 299 }
291 btrfs_free_path(path); 300 btrfs_free_path(path);
@@ -432,6 +441,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
432 struct bio_vec *bvec = bio->bi_io_vec; 441 struct bio_vec *bvec = bio->bi_io_vec;
433 int bio_index = 0; 442 int bio_index = 0;
434 int index; 443 int index;
444 int nr_sectors;
445 int i;
435 unsigned long total_bytes = 0; 446 unsigned long total_bytes = 0;
436 unsigned long this_sum_bytes = 0; 447 unsigned long this_sum_bytes = 0;
437 u64 offset; 448 u64 offset;
@@ -459,41 +470,56 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
459 if (!contig) 470 if (!contig)
460 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 471 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
461 472
462 if (offset >= ordered->file_offset + ordered->len || 473 data = kmap_atomic(bvec->bv_page);
463 offset < ordered->file_offset) {
464 unsigned long bytes_left;
465 sums->len = this_sum_bytes;
466 this_sum_bytes = 0;
467 btrfs_add_ordered_sum(inode, ordered, sums);
468 btrfs_put_ordered_extent(ordered);
469 474
470 bytes_left = bio->bi_iter.bi_size - total_bytes; 475 nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
476 bvec->bv_len + root->sectorsize
477 - 1);
478
479 for (i = 0; i < nr_sectors; i++) {
480 if (offset >= ordered->file_offset + ordered->len ||
481 offset < ordered->file_offset) {
482 unsigned long bytes_left;
483
484 kunmap_atomic(data);
485 sums->len = this_sum_bytes;
486 this_sum_bytes = 0;
487 btrfs_add_ordered_sum(inode, ordered, sums);
488 btrfs_put_ordered_extent(ordered);
489
490 bytes_left = bio->bi_iter.bi_size - total_bytes;
491
492 sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
493 GFP_NOFS);
494 BUG_ON(!sums); /* -ENOMEM */
495 sums->len = bytes_left;
496 ordered = btrfs_lookup_ordered_extent(inode,
497 offset);
498 ASSERT(ordered); /* Logic error */
499 sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9)
500 + total_bytes;
501 index = 0;
502
503 data = kmap_atomic(bvec->bv_page);
504 }
471 505
472 sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), 506 sums->sums[index] = ~(u32)0;
473 GFP_NOFS); 507 sums->sums[index]
474 BUG_ON(!sums); /* -ENOMEM */ 508 = btrfs_csum_data(data + bvec->bv_offset
475 sums->len = bytes_left; 509 + (i * root->sectorsize),
476 ordered = btrfs_lookup_ordered_extent(inode, offset); 510 sums->sums[index],
477 BUG_ON(!ordered); /* Logic error */ 511 root->sectorsize);
478 sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) + 512 btrfs_csum_final(sums->sums[index],
479 total_bytes; 513 (char *)(sums->sums + index));
480 index = 0; 514 index++;
515 offset += root->sectorsize;
516 this_sum_bytes += root->sectorsize;
517 total_bytes += root->sectorsize;
481 } 518 }
482 519
483 data = kmap_atomic(bvec->bv_page);
484 sums->sums[index] = ~(u32)0;
485 sums->sums[index] = btrfs_csum_data(data + bvec->bv_offset,
486 sums->sums[index],
487 bvec->bv_len);
488 kunmap_atomic(data); 520 kunmap_atomic(data);
489 btrfs_csum_final(sums->sums[index],
490 (char *)(sums->sums + index));
491 521
492 bio_index++; 522 bio_index++;
493 index++;
494 total_bytes += bvec->bv_len;
495 this_sum_bytes += bvec->bv_len;
496 offset += bvec->bv_len;
497 bvec++; 523 bvec++;
498 } 524 }
499 this_sum_bytes = 0; 525 this_sum_bytes = 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 098bb8f690c9..03de2466db23 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -498,7 +498,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
498 loff_t isize = i_size_read(inode); 498 loff_t isize = i_size_read(inode);
499 499
500 start_pos = pos & ~((u64)root->sectorsize - 1); 500 start_pos = pos & ~((u64)root->sectorsize - 1);
501 num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize); 501 num_bytes = round_up(write_bytes + pos - start_pos, root->sectorsize);
502 502
503 end_of_last_block = start_pos + num_bytes - 1; 503 end_of_last_block = start_pos + num_bytes - 1;
504 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, 504 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
@@ -1379,16 +1379,19 @@ fail:
1379static noinline int 1379static noinline int
1380lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, 1380lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
1381 size_t num_pages, loff_t pos, 1381 size_t num_pages, loff_t pos,
1382 size_t write_bytes,
1382 u64 *lockstart, u64 *lockend, 1383 u64 *lockstart, u64 *lockend,
1383 struct extent_state **cached_state) 1384 struct extent_state **cached_state)
1384{ 1385{
1386 struct btrfs_root *root = BTRFS_I(inode)->root;
1385 u64 start_pos; 1387 u64 start_pos;
1386 u64 last_pos; 1388 u64 last_pos;
1387 int i; 1389 int i;
1388 int ret = 0; 1390 int ret = 0;
1389 1391
1390 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); 1392 start_pos = round_down(pos, root->sectorsize);
1391 last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1; 1393 last_pos = start_pos
1394 + round_up(pos + write_bytes - start_pos, root->sectorsize) - 1;
1392 1395
1393 if (start_pos < inode->i_size) { 1396 if (start_pos < inode->i_size) {
1394 struct btrfs_ordered_extent *ordered; 1397 struct btrfs_ordered_extent *ordered;
@@ -1503,6 +1506,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1503 1506
1504 while (iov_iter_count(i) > 0) { 1507 while (iov_iter_count(i) > 0) {
1505 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 1508 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1509 size_t sector_offset;
1506 size_t write_bytes = min(iov_iter_count(i), 1510 size_t write_bytes = min(iov_iter_count(i),
1507 nrptrs * (size_t)PAGE_CACHE_SIZE - 1511 nrptrs * (size_t)PAGE_CACHE_SIZE -
1508 offset); 1512 offset);
@@ -1511,6 +1515,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1511 size_t reserve_bytes; 1515 size_t reserve_bytes;
1512 size_t dirty_pages; 1516 size_t dirty_pages;
1513 size_t copied; 1517 size_t copied;
1518 size_t dirty_sectors;
1519 size_t num_sectors;
1514 1520
1515 WARN_ON(num_pages > nrptrs); 1521 WARN_ON(num_pages > nrptrs);
1516 1522
@@ -1523,29 +1529,29 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1523 break; 1529 break;
1524 } 1530 }
1525 1531
1526 reserve_bytes = num_pages << PAGE_CACHE_SHIFT; 1532 sector_offset = pos & (root->sectorsize - 1);
1533 reserve_bytes = round_up(write_bytes + sector_offset,
1534 root->sectorsize);
1527 1535
1528 if (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | 1536 if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1529 BTRFS_INODE_PREALLOC)) { 1537 BTRFS_INODE_PREALLOC)) &&
1530 ret = check_can_nocow(inode, pos, &write_bytes); 1538 check_can_nocow(inode, pos, &write_bytes) > 0) {
1531 if (ret < 0) 1539 /*
1532 break; 1540 * For nodata cow case, no need to reserve
1533 if (ret > 0) { 1541 * data space.
1534 /* 1542 */
1535 * For nodata cow case, no need to reserve 1543 only_release_metadata = true;
1536 * data space. 1544 /*
1537 */ 1545 * our prealloc extent may be smaller than
1538 only_release_metadata = true; 1546 * write_bytes, so scale down.
1539 /* 1547 */
1540 * our prealloc extent may be smaller than 1548 num_pages = DIV_ROUND_UP(write_bytes + offset,
1541 * write_bytes, so scale down. 1549 PAGE_CACHE_SIZE);
1542 */ 1550 reserve_bytes = round_up(write_bytes + sector_offset,
1543 num_pages = DIV_ROUND_UP(write_bytes + offset, 1551 root->sectorsize);
1544 PAGE_CACHE_SIZE); 1552 goto reserve_metadata;
1545 reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
1546 goto reserve_metadata;
1547 }
1548 } 1553 }
1554
1549 ret = btrfs_check_data_free_space(inode, pos, write_bytes); 1555 ret = btrfs_check_data_free_space(inode, pos, write_bytes);
1550 if (ret < 0) 1556 if (ret < 0)
1551 break; 1557 break;
@@ -1576,8 +1582,8 @@ again:
1576 break; 1582 break;
1577 1583
1578 ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages, 1584 ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
1579 pos, &lockstart, &lockend, 1585 pos, write_bytes, &lockstart,
1580 &cached_state); 1586 &lockend, &cached_state);
1581 if (ret < 0) { 1587 if (ret < 0) {
1582 if (ret == -EAGAIN) 1588 if (ret == -EAGAIN)
1583 goto again; 1589 goto again;
@@ -1612,9 +1618,16 @@ again:
1612 * we still have an outstanding extent for the chunk we actually 1618 * we still have an outstanding extent for the chunk we actually
1613 * managed to copy. 1619 * managed to copy.
1614 */ 1620 */
1615 if (num_pages > dirty_pages) { 1621 num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
1616 release_bytes = (num_pages - dirty_pages) << 1622 reserve_bytes);
1617 PAGE_CACHE_SHIFT; 1623 dirty_sectors = round_up(copied + sector_offset,
1624 root->sectorsize);
1625 dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
1626 dirty_sectors);
1627
1628 if (num_sectors > dirty_sectors) {
1629 release_bytes = (write_bytes - copied)
1630 & ~((u64)root->sectorsize - 1);
1618 if (copied > 0) { 1631 if (copied > 0) {
1619 spin_lock(&BTRFS_I(inode)->lock); 1632 spin_lock(&BTRFS_I(inode)->lock);
1620 BTRFS_I(inode)->outstanding_extents++; 1633 BTRFS_I(inode)->outstanding_extents++;
@@ -1633,7 +1646,8 @@ again:
1633 } 1646 }
1634 } 1647 }
1635 1648
1636 release_bytes = dirty_pages << PAGE_CACHE_SHIFT; 1649 release_bytes = round_up(copied + sector_offset,
1650 root->sectorsize);
1637 1651
1638 if (copied > 0) 1652 if (copied > 0)
1639 ret = btrfs_dirty_pages(root, inode, pages, 1653 ret = btrfs_dirty_pages(root, inode, pages,
@@ -1654,8 +1668,7 @@ again:
1654 1668
1655 if (only_release_metadata && copied > 0) { 1669 if (only_release_metadata && copied > 0) {
1656 lockstart = round_down(pos, root->sectorsize); 1670 lockstart = round_down(pos, root->sectorsize);
1657 lockend = lockstart + 1671 lockend = round_up(pos + copied, root->sectorsize) - 1;
1658 (dirty_pages << PAGE_CACHE_SHIFT) - 1;
1659 1672
1660 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 1673 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
1661 lockend, EXTENT_NORESERVE, NULL, 1674 lockend, EXTENT_NORESERVE, NULL,
@@ -1761,6 +1774,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1761 ssize_t err; 1774 ssize_t err;
1762 loff_t pos; 1775 loff_t pos;
1763 size_t count; 1776 size_t count;
1777 loff_t oldsize;
1778 int clean_page = 0;
1764 1779
1765 inode_lock(inode); 1780 inode_lock(inode);
1766 err = generic_write_checks(iocb, from); 1781 err = generic_write_checks(iocb, from);
@@ -1799,14 +1814,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1799 pos = iocb->ki_pos; 1814 pos = iocb->ki_pos;
1800 count = iov_iter_count(from); 1815 count = iov_iter_count(from);
1801 start_pos = round_down(pos, root->sectorsize); 1816 start_pos = round_down(pos, root->sectorsize);
1802 if (start_pos > i_size_read(inode)) { 1817 oldsize = i_size_read(inode);
1818 if (start_pos > oldsize) {
1803 /* Expand hole size to cover write data, preventing empty gap */ 1819 /* Expand hole size to cover write data, preventing empty gap */
1804 end_pos = round_up(pos + count, root->sectorsize); 1820 end_pos = round_up(pos + count, root->sectorsize);
1805 err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); 1821 err = btrfs_cont_expand(inode, oldsize, end_pos);
1806 if (err) { 1822 if (err) {
1807 inode_unlock(inode); 1823 inode_unlock(inode);
1808 goto out; 1824 goto out;
1809 } 1825 }
1826 if (start_pos > round_up(oldsize, root->sectorsize))
1827 clean_page = 1;
1810 } 1828 }
1811 1829
1812 if (sync) 1830 if (sync)
@@ -1818,6 +1836,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1818 num_written = __btrfs_buffered_write(file, from, pos); 1836 num_written = __btrfs_buffered_write(file, from, pos);
1819 if (num_written > 0) 1837 if (num_written > 0)
1820 iocb->ki_pos = pos + num_written; 1838 iocb->ki_pos = pos + num_written;
1839 if (clean_page)
1840 pagecache_isize_extended(inode, oldsize,
1841 i_size_read(inode));
1821 } 1842 }
1822 1843
1823 inode_unlock(inode); 1844 inode_unlock(inode);
@@ -2293,10 +2314,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2293 int ret = 0; 2314 int ret = 0;
2294 int err = 0; 2315 int err = 0;
2295 unsigned int rsv_count; 2316 unsigned int rsv_count;
2296 bool same_page; 2317 bool same_block;
2297 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); 2318 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
2298 u64 ino_size; 2319 u64 ino_size;
2299 bool truncated_page = false; 2320 bool truncated_block = false;
2300 bool updated_inode = false; 2321 bool updated_inode = false;
2301 2322
2302 ret = btrfs_wait_ordered_range(inode, offset, len); 2323 ret = btrfs_wait_ordered_range(inode, offset, len);
@@ -2304,7 +2325,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2304 return ret; 2325 return ret;
2305 2326
2306 inode_lock(inode); 2327 inode_lock(inode);
2307 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); 2328 ino_size = round_up(inode->i_size, root->sectorsize);
2308 ret = find_first_non_hole(inode, &offset, &len); 2329 ret = find_first_non_hole(inode, &offset, &len);
2309 if (ret < 0) 2330 if (ret < 0)
2310 goto out_only_mutex; 2331 goto out_only_mutex;
@@ -2317,31 +2338,30 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2317 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); 2338 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
2318 lockend = round_down(offset + len, 2339 lockend = round_down(offset + len,
2319 BTRFS_I(inode)->root->sectorsize) - 1; 2340 BTRFS_I(inode)->root->sectorsize) - 1;
2320 same_page = ((offset >> PAGE_CACHE_SHIFT) == 2341 same_block = (BTRFS_BYTES_TO_BLKS(root->fs_info, offset))
2321 ((offset + len - 1) >> PAGE_CACHE_SHIFT)); 2342 == (BTRFS_BYTES_TO_BLKS(root->fs_info, offset + len - 1));
2322
2323 /* 2343 /*
2324 * We needn't truncate any page which is beyond the end of the file 2344 * We needn't truncate any block which is beyond the end of the file
2325 * because we are sure there is no data there. 2345 * because we are sure there is no data there.
2326 */ 2346 */
2327 /* 2347 /*
2328 * Only do this if we are in the same page and we aren't doing the 2348 * Only do this if we are in the same block and we aren't doing the
2329 * entire page. 2349 * entire block.
2330 */ 2350 */
2331 if (same_page && len < PAGE_CACHE_SIZE) { 2351 if (same_block && len < root->sectorsize) {
2332 if (offset < ino_size) { 2352 if (offset < ino_size) {
2333 truncated_page = true; 2353 truncated_block = true;
2334 ret = btrfs_truncate_page(inode, offset, len, 0); 2354 ret = btrfs_truncate_block(inode, offset, len, 0);
2335 } else { 2355 } else {
2336 ret = 0; 2356 ret = 0;
2337 } 2357 }
2338 goto out_only_mutex; 2358 goto out_only_mutex;
2339 } 2359 }
2340 2360
2341 /* zero back part of the first page */ 2361 /* zero back part of the first block */
2342 if (offset < ino_size) { 2362 if (offset < ino_size) {
2343 truncated_page = true; 2363 truncated_block = true;
2344 ret = btrfs_truncate_page(inode, offset, 0, 0); 2364 ret = btrfs_truncate_block(inode, offset, 0, 0);
2345 if (ret) { 2365 if (ret) {
2346 inode_unlock(inode); 2366 inode_unlock(inode);
2347 return ret; 2367 return ret;
@@ -2376,9 +2396,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2376 if (!ret) { 2396 if (!ret) {
2377 /* zero the front end of the last page */ 2397 /* zero the front end of the last page */
2378 if (tail_start + tail_len < ino_size) { 2398 if (tail_start + tail_len < ino_size) {
2379 truncated_page = true; 2399 truncated_block = true;
2380 ret = btrfs_truncate_page(inode, 2400 ret = btrfs_truncate_block(inode,
2381 tail_start + tail_len, 0, 1); 2401 tail_start + tail_len,
2402 0, 1);
2382 if (ret) 2403 if (ret)
2383 goto out_only_mutex; 2404 goto out_only_mutex;
2384 } 2405 }
@@ -2544,7 +2565,7 @@ out_trans:
2544 goto out_free; 2565 goto out_free;
2545 2566
2546 inode_inc_iversion(inode); 2567 inode_inc_iversion(inode);
2547 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2568 inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
2548 2569
2549 trans->block_rsv = &root->fs_info->trans_block_rsv; 2570 trans->block_rsv = &root->fs_info->trans_block_rsv;
2550 ret = btrfs_update_inode(trans, root, inode); 2571 ret = btrfs_update_inode(trans, root, inode);
@@ -2558,7 +2579,7 @@ out:
2558 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 2579 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2559 &cached_state, GFP_NOFS); 2580 &cached_state, GFP_NOFS);
2560out_only_mutex: 2581out_only_mutex:
2561 if (!updated_inode && truncated_page && !ret && !err) { 2582 if (!updated_inode && truncated_block && !ret && !err) {
2562 /* 2583 /*
2563 * If we only end up zeroing part of a page, we still need to 2584 * If we only end up zeroing part of a page, we still need to
2564 * update the inode item, so that all the time fields are 2585 * update the inode item, so that all the time fields are
@@ -2611,7 +2632,7 @@ static int add_falloc_range(struct list_head *head, u64 start, u64 len)
2611 return 0; 2632 return 0;
2612 } 2633 }
2613insert: 2634insert:
2614 range = kmalloc(sizeof(*range), GFP_NOFS); 2635 range = kmalloc(sizeof(*range), GFP_KERNEL);
2615 if (!range) 2636 if (!range)
2616 return -ENOMEM; 2637 return -ENOMEM;
2617 range->start = start; 2638 range->start = start;
@@ -2678,10 +2699,10 @@ static long btrfs_fallocate(struct file *file, int mode,
2678 } else if (offset + len > inode->i_size) { 2699 } else if (offset + len > inode->i_size) {
2679 /* 2700 /*
2680 * If we are fallocating from the end of the file onward we 2701 * If we are fallocating from the end of the file onward we
2681 * need to zero out the end of the page if i_size lands in the 2702 * need to zero out the end of the block if i_size lands in the
2682 * middle of a page. 2703 * middle of a block.
2683 */ 2704 */
2684 ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); 2705 ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
2685 if (ret) 2706 if (ret)
2686 goto out; 2707 goto out;
2687 } 2708 }
@@ -2712,7 +2733,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2712 btrfs_put_ordered_extent(ordered); 2733 btrfs_put_ordered_extent(ordered);
2713 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 2734 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
2714 alloc_start, locked_end, 2735 alloc_start, locked_end,
2715 &cached_state, GFP_NOFS); 2736 &cached_state, GFP_KERNEL);
2716 /* 2737 /*
2717 * we can't wait on the range with the transaction 2738 * we can't wait on the range with the transaction
2718 * running or with the extent lock held 2739 * running or with the extent lock held
@@ -2794,7 +2815,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2794 if (IS_ERR(trans)) { 2815 if (IS_ERR(trans)) {
2795 ret = PTR_ERR(trans); 2816 ret = PTR_ERR(trans);
2796 } else { 2817 } else {
2797 inode->i_ctime = CURRENT_TIME; 2818 inode->i_ctime = current_fs_time(inode->i_sb);
2798 i_size_write(inode, actual_end); 2819 i_size_write(inode, actual_end);
2799 btrfs_ordered_update_i_size(inode, actual_end, NULL); 2820 btrfs_ordered_update_i_size(inode, actual_end, NULL);
2800 ret = btrfs_update_inode(trans, root, inode); 2821 ret = btrfs_update_inode(trans, root, inode);
@@ -2806,7 +2827,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2806 } 2827 }
2807out_unlock: 2828out_unlock:
2808 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 2829 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
2809 &cached_state, GFP_NOFS); 2830 &cached_state, GFP_KERNEL);
2810out: 2831out:
2811 /* 2832 /*
2812 * As we waited the extent range, the data_rsv_map must be empty 2833 * As we waited the extent range, the data_rsv_map must be empty
@@ -2939,8 +2960,7 @@ const struct file_operations btrfs_file_operations = {
2939 2960
2940void btrfs_auto_defrag_exit(void) 2961void btrfs_auto_defrag_exit(void)
2941{ 2962{
2942 if (btrfs_inode_defrag_cachep) 2963 kmem_cache_destroy(btrfs_inode_defrag_cachep);
2943 kmem_cache_destroy(btrfs_inode_defrag_cachep);
2944} 2964}
2945 2965
2946int btrfs_auto_defrag_init(void) 2966int btrfs_auto_defrag_init(void)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d96f5cf38a2d..25dcff71e451 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -263,7 +263,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
263 data_len = compressed_size; 263 data_len = compressed_size;
264 264
265 if (start > 0 || 265 if (start > 0 ||
266 actual_end > PAGE_CACHE_SIZE || 266 actual_end > root->sectorsize ||
267 data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) || 267 data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) ||
268 (!compressed_size && 268 (!compressed_size &&
269 (actual_end & (root->sectorsize - 1)) == 0) || 269 (actual_end & (root->sectorsize - 1)) == 0) ||
@@ -2002,7 +2002,8 @@ again:
2002 if (PagePrivate2(page)) 2002 if (PagePrivate2(page))
2003 goto out; 2003 goto out;
2004 2004
2005 ordered = btrfs_lookup_ordered_extent(inode, page_start); 2005 ordered = btrfs_lookup_ordered_range(inode, page_start,
2006 PAGE_CACHE_SIZE);
2006 if (ordered) { 2007 if (ordered) {
2007 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, 2008 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
2008 page_end, &cached_state, GFP_NOFS); 2009 page_end, &cached_state, GFP_NOFS);
@@ -4013,7 +4014,8 @@ err:
4013 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 4014 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
4014 inode_inc_iversion(inode); 4015 inode_inc_iversion(inode);
4015 inode_inc_iversion(dir); 4016 inode_inc_iversion(dir);
4016 inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; 4017 inode->i_ctime = dir->i_mtime =
4018 dir->i_ctime = current_fs_time(inode->i_sb);
4017 ret = btrfs_update_inode(trans, root, dir); 4019 ret = btrfs_update_inode(trans, root, dir);
4018out: 4020out:
4019 return ret; 4021 return ret;
@@ -4156,7 +4158,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
4156 4158
4157 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 4159 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
4158 inode_inc_iversion(dir); 4160 inode_inc_iversion(dir);
4159 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 4161 dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
4160 ret = btrfs_update_inode_fallback(trans, root, dir); 4162 ret = btrfs_update_inode_fallback(trans, root, dir);
4161 if (ret) 4163 if (ret)
4162 btrfs_abort_transaction(trans, root, ret); 4164 btrfs_abort_transaction(trans, root, ret);
@@ -4211,11 +4213,20 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,
4211{ 4213{
4212 int ret; 4214 int ret;
4213 4215
4216 /*
4217 * This is only used to apply pressure to the enospc system, we don't
4218 * intend to use this reservation at all.
4219 */
4214 bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted); 4220 bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted);
4221 bytes_deleted *= root->nodesize;
4215 ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv, 4222 ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv,
4216 bytes_deleted, BTRFS_RESERVE_NO_FLUSH); 4223 bytes_deleted, BTRFS_RESERVE_NO_FLUSH);
4217 if (!ret) 4224 if (!ret) {
4225 trace_btrfs_space_reservation(root->fs_info, "transaction",
4226 trans->transid,
4227 bytes_deleted, 1);
4218 trans->bytes_reserved += bytes_deleted; 4228 trans->bytes_reserved += bytes_deleted;
4229 }
4219 return ret; 4230 return ret;
4220 4231
4221} 4232}
@@ -4248,7 +4259,8 @@ static int truncate_inline_extent(struct inode *inode,
4248 * read the extent item from disk (data not in the page cache). 4259 * read the extent item from disk (data not in the page cache).
4249 */ 4260 */
4250 btrfs_release_path(path); 4261 btrfs_release_path(path);
4251 return btrfs_truncate_page(inode, offset, page_end - offset, 0); 4262 return btrfs_truncate_block(inode, offset, page_end - offset,
4263 0);
4252 } 4264 }
4253 4265
4254 btrfs_set_file_extent_ram_bytes(leaf, fi, size); 4266 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
@@ -4601,17 +4613,17 @@ error:
4601} 4613}
4602 4614
4603/* 4615/*
4604 * btrfs_truncate_page - read, zero a chunk and write a page 4616 * btrfs_truncate_block - read, zero a chunk and write a block
4605 * @inode - inode that we're zeroing 4617 * @inode - inode that we're zeroing
4606 * @from - the offset to start zeroing 4618 * @from - the offset to start zeroing
4607 * @len - the length to zero, 0 to zero the entire range respective to the 4619 * @len - the length to zero, 0 to zero the entire range respective to the
4608 * offset 4620 * offset
4609 * @front - zero up to the offset instead of from the offset on 4621 * @front - zero up to the offset instead of from the offset on
4610 * 4622 *
4611 * This will find the page for the "from" offset and cow the page and zero the 4623 * This will find the block for the "from" offset and cow the block and zero the
4612 * part we want to zero. This is used with truncate and hole punching. 4624 * part we want to zero. This is used with truncate and hole punching.
4613 */ 4625 */
4614int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, 4626int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
4615 int front) 4627 int front)
4616{ 4628{
4617 struct address_space *mapping = inode->i_mapping; 4629 struct address_space *mapping = inode->i_mapping;
@@ -4622,18 +4634,19 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
4622 char *kaddr; 4634 char *kaddr;
4623 u32 blocksize = root->sectorsize; 4635 u32 blocksize = root->sectorsize;
4624 pgoff_t index = from >> PAGE_CACHE_SHIFT; 4636 pgoff_t index = from >> PAGE_CACHE_SHIFT;
4625 unsigned offset = from & (PAGE_CACHE_SIZE-1); 4637 unsigned offset = from & (blocksize - 1);
4626 struct page *page; 4638 struct page *page;
4627 gfp_t mask = btrfs_alloc_write_mask(mapping); 4639 gfp_t mask = btrfs_alloc_write_mask(mapping);
4628 int ret = 0; 4640 int ret = 0;
4629 u64 page_start; 4641 u64 block_start;
4630 u64 page_end; 4642 u64 block_end;
4631 4643
4632 if ((offset & (blocksize - 1)) == 0 && 4644 if ((offset & (blocksize - 1)) == 0 &&
4633 (!len || ((len & (blocksize - 1)) == 0))) 4645 (!len || ((len & (blocksize - 1)) == 0)))
4634 goto out; 4646 goto out;
4647
4635 ret = btrfs_delalloc_reserve_space(inode, 4648 ret = btrfs_delalloc_reserve_space(inode,
4636 round_down(from, PAGE_CACHE_SIZE), PAGE_CACHE_SIZE); 4649 round_down(from, blocksize), blocksize);
4637 if (ret) 4650 if (ret)
4638 goto out; 4651 goto out;
4639 4652
@@ -4641,14 +4654,14 @@ again:
4641 page = find_or_create_page(mapping, index, mask); 4654 page = find_or_create_page(mapping, index, mask);
4642 if (!page) { 4655 if (!page) {
4643 btrfs_delalloc_release_space(inode, 4656 btrfs_delalloc_release_space(inode,
4644 round_down(from, PAGE_CACHE_SIZE), 4657 round_down(from, blocksize),
4645 PAGE_CACHE_SIZE); 4658 blocksize);
4646 ret = -ENOMEM; 4659 ret = -ENOMEM;
4647 goto out; 4660 goto out;
4648 } 4661 }
4649 4662
4650 page_start = page_offset(page); 4663 block_start = round_down(from, blocksize);
4651 page_end = page_start + PAGE_CACHE_SIZE - 1; 4664 block_end = block_start + blocksize - 1;
4652 4665
4653 if (!PageUptodate(page)) { 4666 if (!PageUptodate(page)) {
4654 ret = btrfs_readpage(NULL, page); 4667 ret = btrfs_readpage(NULL, page);
@@ -4665,12 +4678,12 @@ again:
4665 } 4678 }
4666 wait_on_page_writeback(page); 4679 wait_on_page_writeback(page);
4667 4680
4668 lock_extent_bits(io_tree, page_start, page_end, &cached_state); 4681 lock_extent_bits(io_tree, block_start, block_end, &cached_state);
4669 set_page_extent_mapped(page); 4682 set_page_extent_mapped(page);
4670 4683
4671 ordered = btrfs_lookup_ordered_extent(inode, page_start); 4684 ordered = btrfs_lookup_ordered_extent(inode, block_start);
4672 if (ordered) { 4685 if (ordered) {
4673 unlock_extent_cached(io_tree, page_start, page_end, 4686 unlock_extent_cached(io_tree, block_start, block_end,
4674 &cached_state, GFP_NOFS); 4687 &cached_state, GFP_NOFS);
4675 unlock_page(page); 4688 unlock_page(page);
4676 page_cache_release(page); 4689 page_cache_release(page);
@@ -4679,39 +4692,41 @@ again:
4679 goto again; 4692 goto again;
4680 } 4693 }
4681 4694
4682 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, 4695 clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
4683 EXTENT_DIRTY | EXTENT_DELALLOC | 4696 EXTENT_DIRTY | EXTENT_DELALLOC |
4684 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 4697 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
4685 0, 0, &cached_state, GFP_NOFS); 4698 0, 0, &cached_state, GFP_NOFS);
4686 4699
4687 ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 4700 ret = btrfs_set_extent_delalloc(inode, block_start, block_end,
4688 &cached_state); 4701 &cached_state);
4689 if (ret) { 4702 if (ret) {
4690 unlock_extent_cached(io_tree, page_start, page_end, 4703 unlock_extent_cached(io_tree, block_start, block_end,
4691 &cached_state, GFP_NOFS); 4704 &cached_state, GFP_NOFS);
4692 goto out_unlock; 4705 goto out_unlock;
4693 } 4706 }
4694 4707
4695 if (offset != PAGE_CACHE_SIZE) { 4708 if (offset != blocksize) {
4696 if (!len) 4709 if (!len)
4697 len = PAGE_CACHE_SIZE - offset; 4710 len = blocksize - offset;
4698 kaddr = kmap(page); 4711 kaddr = kmap(page);
4699 if (front) 4712 if (front)
4700 memset(kaddr, 0, offset); 4713 memset(kaddr + (block_start - page_offset(page)),
4714 0, offset);
4701 else 4715 else
4702 memset(kaddr + offset, 0, len); 4716 memset(kaddr + (block_start - page_offset(page)) + offset,
4717 0, len);
4703 flush_dcache_page(page); 4718 flush_dcache_page(page);
4704 kunmap(page); 4719 kunmap(page);
4705 } 4720 }
4706 ClearPageChecked(page); 4721 ClearPageChecked(page);
4707 set_page_dirty(page); 4722 set_page_dirty(page);
4708 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, 4723 unlock_extent_cached(io_tree, block_start, block_end, &cached_state,
4709 GFP_NOFS); 4724 GFP_NOFS);
4710 4725
4711out_unlock: 4726out_unlock:
4712 if (ret) 4727 if (ret)
4713 btrfs_delalloc_release_space(inode, page_start, 4728 btrfs_delalloc_release_space(inode, block_start,
4714 PAGE_CACHE_SIZE); 4729 blocksize);
4715 unlock_page(page); 4730 unlock_page(page);
4716 page_cache_release(page); 4731 page_cache_release(page);
4717out: 4732out:
@@ -4782,11 +4797,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4782 int err = 0; 4797 int err = 0;
4783 4798
4784 /* 4799 /*
4785 * If our size started in the middle of a page we need to zero out the 4800 * If our size started in the middle of a block we need to zero out the
4786 * rest of the page before we expand the i_size, otherwise we could 4801 * rest of the block before we expand the i_size, otherwise we could
4787 * expose stale data. 4802 * expose stale data.
4788 */ 4803 */
4789 err = btrfs_truncate_page(inode, oldsize, 0, 0); 4804 err = btrfs_truncate_block(inode, oldsize, 0, 0);
4790 if (err) 4805 if (err)
4791 return err; 4806 return err;
4792 4807
@@ -4895,7 +4910,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
4895 } 4910 }
4896 4911
4897 if (newsize > oldsize) { 4912 if (newsize > oldsize) {
4898 truncate_pagecache(inode, newsize);
4899 /* 4913 /*
4900 * Don't do an expanding truncate while snapshoting is ongoing. 4914 * Don't do an expanding truncate while snapshoting is ongoing.
4901 * This is to ensure the snapshot captures a fully consistent 4915 * This is to ensure the snapshot captures a fully consistent
@@ -4918,6 +4932,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
4918 4932
4919 i_size_write(inode, newsize); 4933 i_size_write(inode, newsize);
4920 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); 4934 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
4935 pagecache_isize_extended(inode, oldsize, newsize);
4921 ret = btrfs_update_inode(trans, root, inode); 4936 ret = btrfs_update_inode(trans, root, inode);
4922 btrfs_end_write_no_snapshoting(root); 4937 btrfs_end_write_no_snapshoting(root);
4923 btrfs_end_transaction(trans, root); 4938 btrfs_end_transaction(trans, root);
@@ -5588,7 +5603,7 @@ static struct inode *new_simple_dir(struct super_block *s,
5588 inode->i_op = &btrfs_dir_ro_inode_operations; 5603 inode->i_op = &btrfs_dir_ro_inode_operations;
5589 inode->i_fop = &simple_dir_operations; 5604 inode->i_fop = &simple_dir_operations;
5590 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; 5605 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
5591 inode->i_mtime = CURRENT_TIME; 5606 inode->i_mtime = current_fs_time(inode->i_sb);
5592 inode->i_atime = inode->i_mtime; 5607 inode->i_atime = inode->i_mtime;
5593 inode->i_ctime = inode->i_mtime; 5608 inode->i_ctime = inode->i_mtime;
5594 BTRFS_I(inode)->i_otime = inode->i_mtime; 5609 BTRFS_I(inode)->i_otime = inode->i_mtime;
@@ -5790,7 +5805,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5790 if (name_len <= sizeof(tmp_name)) { 5805 if (name_len <= sizeof(tmp_name)) {
5791 name_ptr = tmp_name; 5806 name_ptr = tmp_name;
5792 } else { 5807 } else {
5793 name_ptr = kmalloc(name_len, GFP_NOFS); 5808 name_ptr = kmalloc(name_len, GFP_KERNEL);
5794 if (!name_ptr) { 5809 if (!name_ptr) {
5795 ret = -ENOMEM; 5810 ret = -ENOMEM;
5796 goto err; 5811 goto err;
@@ -6172,7 +6187,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
6172 inode_init_owner(inode, dir, mode); 6187 inode_init_owner(inode, dir, mode);
6173 inode_set_bytes(inode, 0); 6188 inode_set_bytes(inode, 0);
6174 6189
6175 inode->i_mtime = CURRENT_TIME; 6190 inode->i_mtime = current_fs_time(inode->i_sb);
6176 inode->i_atime = inode->i_mtime; 6191 inode->i_atime = inode->i_mtime;
6177 inode->i_ctime = inode->i_mtime; 6192 inode->i_ctime = inode->i_mtime;
6178 BTRFS_I(inode)->i_otime = inode->i_mtime; 6193 BTRFS_I(inode)->i_otime = inode->i_mtime;
@@ -6285,7 +6300,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
6285 btrfs_i_size_write(parent_inode, parent_inode->i_size + 6300 btrfs_i_size_write(parent_inode, parent_inode->i_size +
6286 name_len * 2); 6301 name_len * 2);
6287 inode_inc_iversion(parent_inode); 6302 inode_inc_iversion(parent_inode);
6288 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; 6303 parent_inode->i_mtime = parent_inode->i_ctime =
6304 current_fs_time(parent_inode->i_sb);
6289 ret = btrfs_update_inode(trans, root, parent_inode); 6305 ret = btrfs_update_inode(trans, root, parent_inode);
6290 if (ret) 6306 if (ret)
6291 btrfs_abort_transaction(trans, root, ret); 6307 btrfs_abort_transaction(trans, root, ret);
@@ -6503,7 +6519,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
6503 BTRFS_I(inode)->dir_index = 0ULL; 6519 BTRFS_I(inode)->dir_index = 0ULL;
6504 inc_nlink(inode); 6520 inc_nlink(inode);
6505 inode_inc_iversion(inode); 6521 inode_inc_iversion(inode);
6506 inode->i_ctime = CURRENT_TIME; 6522 inode->i_ctime = current_fs_time(inode->i_sb);
6507 ihold(inode); 6523 ihold(inode);
6508 set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); 6524 set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
6509 6525
@@ -7764,9 +7780,9 @@ static int btrfs_check_dio_repairable(struct inode *inode,
7764} 7780}
7765 7781
7766static int dio_read_error(struct inode *inode, struct bio *failed_bio, 7782static int dio_read_error(struct inode *inode, struct bio *failed_bio,
7767 struct page *page, u64 start, u64 end, 7783 struct page *page, unsigned int pgoff,
7768 int failed_mirror, bio_end_io_t *repair_endio, 7784 u64 start, u64 end, int failed_mirror,
7769 void *repair_arg) 7785 bio_end_io_t *repair_endio, void *repair_arg)
7770{ 7786{
7771 struct io_failure_record *failrec; 7787 struct io_failure_record *failrec;
7772 struct bio *bio; 7788 struct bio *bio;
@@ -7787,7 +7803,9 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
7787 return -EIO; 7803 return -EIO;
7788 } 7804 }
7789 7805
7790 if (failed_bio->bi_vcnt > 1) 7806 if ((failed_bio->bi_vcnt > 1)
7807 || (failed_bio->bi_io_vec->bv_len
7808 > BTRFS_I(inode)->root->sectorsize))
7791 read_mode = READ_SYNC | REQ_FAILFAST_DEV; 7809 read_mode = READ_SYNC | REQ_FAILFAST_DEV;
7792 else 7810 else
7793 read_mode = READ_SYNC; 7811 read_mode = READ_SYNC;
@@ -7795,7 +7813,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
7795 isector = start - btrfs_io_bio(failed_bio)->logical; 7813 isector = start - btrfs_io_bio(failed_bio)->logical;
7796 isector >>= inode->i_sb->s_blocksize_bits; 7814 isector >>= inode->i_sb->s_blocksize_bits;
7797 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, 7815 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
7798 0, isector, repair_endio, repair_arg); 7816 pgoff, isector, repair_endio, repair_arg);
7799 if (!bio) { 7817 if (!bio) {
7800 free_io_failure(inode, failrec); 7818 free_io_failure(inode, failrec);
7801 return -EIO; 7819 return -EIO;
@@ -7825,12 +7843,17 @@ struct btrfs_retry_complete {
7825static void btrfs_retry_endio_nocsum(struct bio *bio) 7843static void btrfs_retry_endio_nocsum(struct bio *bio)
7826{ 7844{
7827 struct btrfs_retry_complete *done = bio->bi_private; 7845 struct btrfs_retry_complete *done = bio->bi_private;
7846 struct inode *inode;
7828 struct bio_vec *bvec; 7847 struct bio_vec *bvec;
7829 int i; 7848 int i;
7830 7849
7831 if (bio->bi_error) 7850 if (bio->bi_error)
7832 goto end; 7851 goto end;
7833 7852
7853 ASSERT(bio->bi_vcnt == 1);
7854 inode = bio->bi_io_vec->bv_page->mapping->host;
7855 ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize);
7856
7834 done->uptodate = 1; 7857 done->uptodate = 1;
7835 bio_for_each_segment_all(bvec, bio, i) 7858 bio_for_each_segment_all(bvec, bio, i)
7836 clean_io_failure(done->inode, done->start, bvec->bv_page, 0); 7859 clean_io_failure(done->inode, done->start, bvec->bv_page, 0);
@@ -7842,25 +7865,35 @@ end:
7842static int __btrfs_correct_data_nocsum(struct inode *inode, 7865static int __btrfs_correct_data_nocsum(struct inode *inode,
7843 struct btrfs_io_bio *io_bio) 7866 struct btrfs_io_bio *io_bio)
7844{ 7867{
7868 struct btrfs_fs_info *fs_info;
7845 struct bio_vec *bvec; 7869 struct bio_vec *bvec;
7846 struct btrfs_retry_complete done; 7870 struct btrfs_retry_complete done;
7847 u64 start; 7871 u64 start;
7872 unsigned int pgoff;
7873 u32 sectorsize;
7874 int nr_sectors;
7848 int i; 7875 int i;
7849 int ret; 7876 int ret;
7850 7877
7878 fs_info = BTRFS_I(inode)->root->fs_info;
7879 sectorsize = BTRFS_I(inode)->root->sectorsize;
7880
7851 start = io_bio->logical; 7881 start = io_bio->logical;
7852 done.inode = inode; 7882 done.inode = inode;
7853 7883
7854 bio_for_each_segment_all(bvec, &io_bio->bio, i) { 7884 bio_for_each_segment_all(bvec, &io_bio->bio, i) {
7855try_again: 7885 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
7886 pgoff = bvec->bv_offset;
7887
7888next_block_or_try_again:
7856 done.uptodate = 0; 7889 done.uptodate = 0;
7857 done.start = start; 7890 done.start = start;
7858 init_completion(&done.done); 7891 init_completion(&done.done);
7859 7892
7860 ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, 7893 ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
7861 start + bvec->bv_len - 1, 7894 pgoff, start, start + sectorsize - 1,
7862 io_bio->mirror_num, 7895 io_bio->mirror_num,
7863 btrfs_retry_endio_nocsum, &done); 7896 btrfs_retry_endio_nocsum, &done);
7864 if (ret) 7897 if (ret)
7865 return ret; 7898 return ret;
7866 7899
@@ -7868,10 +7901,15 @@ try_again:
7868 7901
7869 if (!done.uptodate) { 7902 if (!done.uptodate) {
7870 /* We might have another mirror, so try again */ 7903 /* We might have another mirror, so try again */
7871 goto try_again; 7904 goto next_block_or_try_again;
7872 } 7905 }
7873 7906
7874 start += bvec->bv_len; 7907 start += sectorsize;
7908
7909 if (nr_sectors--) {
7910 pgoff += sectorsize;
7911 goto next_block_or_try_again;
7912 }
7875 } 7913 }
7876 7914
7877 return 0; 7915 return 0;
@@ -7881,7 +7919,9 @@ static void btrfs_retry_endio(struct bio *bio)
7881{ 7919{
7882 struct btrfs_retry_complete *done = bio->bi_private; 7920 struct btrfs_retry_complete *done = bio->bi_private;
7883 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 7921 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
7922 struct inode *inode;
7884 struct bio_vec *bvec; 7923 struct bio_vec *bvec;
7924 u64 start;
7885 int uptodate; 7925 int uptodate;
7886 int ret; 7926 int ret;
7887 int i; 7927 int i;
@@ -7890,13 +7930,20 @@ static void btrfs_retry_endio(struct bio *bio)
7890 goto end; 7930 goto end;
7891 7931
7892 uptodate = 1; 7932 uptodate = 1;
7933
7934 start = done->start;
7935
7936 ASSERT(bio->bi_vcnt == 1);
7937 inode = bio->bi_io_vec->bv_page->mapping->host;
7938 ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize);
7939
7893 bio_for_each_segment_all(bvec, bio, i) { 7940 bio_for_each_segment_all(bvec, bio, i) {
7894 ret = __readpage_endio_check(done->inode, io_bio, i, 7941 ret = __readpage_endio_check(done->inode, io_bio, i,
7895 bvec->bv_page, 0, 7942 bvec->bv_page, bvec->bv_offset,
7896 done->start, bvec->bv_len); 7943 done->start, bvec->bv_len);
7897 if (!ret) 7944 if (!ret)
7898 clean_io_failure(done->inode, done->start, 7945 clean_io_failure(done->inode, done->start,
7899 bvec->bv_page, 0); 7946 bvec->bv_page, bvec->bv_offset);
7900 else 7947 else
7901 uptodate = 0; 7948 uptodate = 0;
7902 } 7949 }
@@ -7910,20 +7957,34 @@ end:
7910static int __btrfs_subio_endio_read(struct inode *inode, 7957static int __btrfs_subio_endio_read(struct inode *inode,
7911 struct btrfs_io_bio *io_bio, int err) 7958 struct btrfs_io_bio *io_bio, int err)
7912{ 7959{
7960 struct btrfs_fs_info *fs_info;
7913 struct bio_vec *bvec; 7961 struct bio_vec *bvec;
7914 struct btrfs_retry_complete done; 7962 struct btrfs_retry_complete done;
7915 u64 start; 7963 u64 start;
7916 u64 offset = 0; 7964 u64 offset = 0;
7965 u32 sectorsize;
7966 int nr_sectors;
7967 unsigned int pgoff;
7968 int csum_pos;
7917 int i; 7969 int i;
7918 int ret; 7970 int ret;
7919 7971
7972 fs_info = BTRFS_I(inode)->root->fs_info;
7973 sectorsize = BTRFS_I(inode)->root->sectorsize;
7974
7920 err = 0; 7975 err = 0;
7921 start = io_bio->logical; 7976 start = io_bio->logical;
7922 done.inode = inode; 7977 done.inode = inode;
7923 7978
7924 bio_for_each_segment_all(bvec, &io_bio->bio, i) { 7979 bio_for_each_segment_all(bvec, &io_bio->bio, i) {
7925 ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, 7980 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
7926 0, start, bvec->bv_len); 7981
7982 pgoff = bvec->bv_offset;
7983next_block:
7984 csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset);
7985 ret = __readpage_endio_check(inode, io_bio, csum_pos,
7986 bvec->bv_page, pgoff, start,
7987 sectorsize);
7927 if (likely(!ret)) 7988 if (likely(!ret))
7928 goto next; 7989 goto next;
7929try_again: 7990try_again:
@@ -7931,10 +7992,10 @@ try_again:
7931 done.start = start; 7992 done.start = start;
7932 init_completion(&done.done); 7993 init_completion(&done.done);
7933 7994
7934 ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, 7995 ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
7935 start + bvec->bv_len - 1, 7996 pgoff, start, start + sectorsize - 1,
7936 io_bio->mirror_num, 7997 io_bio->mirror_num,
7937 btrfs_retry_endio, &done); 7998 btrfs_retry_endio, &done);
7938 if (ret) { 7999 if (ret) {
7939 err = ret; 8000 err = ret;
7940 goto next; 8001 goto next;
@@ -7947,8 +8008,15 @@ try_again:
7947 goto try_again; 8008 goto try_again;
7948 } 8009 }
7949next: 8010next:
7950 offset += bvec->bv_len; 8011 offset += sectorsize;
7951 start += bvec->bv_len; 8012 start += sectorsize;
8013
8014 ASSERT(nr_sectors);
8015
8016 if (--nr_sectors) {
8017 pgoff += sectorsize;
8018 goto next_block;
8019 }
7952 } 8020 }
7953 8021
7954 return err; 8022 return err;
@@ -8202,9 +8270,11 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
8202 u64 file_offset = dip->logical_offset; 8270 u64 file_offset = dip->logical_offset;
8203 u64 submit_len = 0; 8271 u64 submit_len = 0;
8204 u64 map_length; 8272 u64 map_length;
8205 int nr_pages = 0; 8273 u32 blocksize = root->sectorsize;
8206 int ret;
8207 int async_submit = 0; 8274 int async_submit = 0;
8275 int nr_sectors;
8276 int ret;
8277 int i;
8208 8278
8209 map_length = orig_bio->bi_iter.bi_size; 8279 map_length = orig_bio->bi_iter.bi_size;
8210 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, 8280 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
@@ -8234,9 +8304,12 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
8234 atomic_inc(&dip->pending_bios); 8304 atomic_inc(&dip->pending_bios);
8235 8305
8236 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { 8306 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
8237 if (map_length < submit_len + bvec->bv_len || 8307 nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, bvec->bv_len);
8238 bio_add_page(bio, bvec->bv_page, bvec->bv_len, 8308 i = 0;
8239 bvec->bv_offset) < bvec->bv_len) { 8309next_block:
8310 if (unlikely(map_length < submit_len + blocksize ||
8311 bio_add_page(bio, bvec->bv_page, blocksize,
8312 bvec->bv_offset + (i * blocksize)) < blocksize)) {
8240 /* 8313 /*
8241 * inc the count before we submit the bio so 8314 * inc the count before we submit the bio so
8242 * we know the end IO handler won't happen before 8315 * we know the end IO handler won't happen before
@@ -8257,7 +8330,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
8257 file_offset += submit_len; 8330 file_offset += submit_len;
8258 8331
8259 submit_len = 0; 8332 submit_len = 0;
8260 nr_pages = 0;
8261 8333
8262 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, 8334 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
8263 start_sector, GFP_NOFS); 8335 start_sector, GFP_NOFS);
@@ -8275,9 +8347,14 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
8275 bio_put(bio); 8347 bio_put(bio);
8276 goto out_err; 8348 goto out_err;
8277 } 8349 }
8350
8351 goto next_block;
8278 } else { 8352 } else {
8279 submit_len += bvec->bv_len; 8353 submit_len += blocksize;
8280 nr_pages++; 8354 if (--nr_sectors) {
8355 i++;
8356 goto next_block;
8357 }
8281 bvec++; 8358 bvec++;
8282 } 8359 }
8283 } 8360 }
@@ -8642,6 +8719,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
8642 struct extent_state *cached_state = NULL; 8719 struct extent_state *cached_state = NULL;
8643 u64 page_start = page_offset(page); 8720 u64 page_start = page_offset(page);
8644 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 8721 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
8722 u64 start;
8723 u64 end;
8645 int inode_evicting = inode->i_state & I_FREEING; 8724 int inode_evicting = inode->i_state & I_FREEING;
8646 8725
8647 /* 8726 /*
@@ -8661,14 +8740,18 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
8661 8740
8662 if (!inode_evicting) 8741 if (!inode_evicting)
8663 lock_extent_bits(tree, page_start, page_end, &cached_state); 8742 lock_extent_bits(tree, page_start, page_end, &cached_state);
8664 ordered = btrfs_lookup_ordered_extent(inode, page_start); 8743again:
8744 start = page_start;
8745 ordered = btrfs_lookup_ordered_range(inode, start,
8746 page_end - start + 1);
8665 if (ordered) { 8747 if (ordered) {
8748 end = min(page_end, ordered->file_offset + ordered->len - 1);
8666 /* 8749 /*
8667 * IO on this page will never be started, so we need 8750 * IO on this page will never be started, so we need
8668 * to account for any ordered extents now 8751 * to account for any ordered extents now
8669 */ 8752 */
8670 if (!inode_evicting) 8753 if (!inode_evicting)
8671 clear_extent_bit(tree, page_start, page_end, 8754 clear_extent_bit(tree, start, end,
8672 EXTENT_DIRTY | EXTENT_DELALLOC | 8755 EXTENT_DIRTY | EXTENT_DELALLOC |
8673 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | 8756 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
8674 EXTENT_DEFRAG, 1, 0, &cached_state, 8757 EXTENT_DEFRAG, 1, 0, &cached_state,
@@ -8685,22 +8768,26 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
8685 8768
8686 spin_lock_irq(&tree->lock); 8769 spin_lock_irq(&tree->lock);
8687 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags); 8770 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
8688 new_len = page_start - ordered->file_offset; 8771 new_len = start - ordered->file_offset;
8689 if (new_len < ordered->truncated_len) 8772 if (new_len < ordered->truncated_len)
8690 ordered->truncated_len = new_len; 8773 ordered->truncated_len = new_len;
8691 spin_unlock_irq(&tree->lock); 8774 spin_unlock_irq(&tree->lock);
8692 8775
8693 if (btrfs_dec_test_ordered_pending(inode, &ordered, 8776 if (btrfs_dec_test_ordered_pending(inode, &ordered,
8694 page_start, 8777 start,
8695 PAGE_CACHE_SIZE, 1)) 8778 end - start + 1, 1))
8696 btrfs_finish_ordered_io(ordered); 8779 btrfs_finish_ordered_io(ordered);
8697 } 8780 }
8698 btrfs_put_ordered_extent(ordered); 8781 btrfs_put_ordered_extent(ordered);
8699 if (!inode_evicting) { 8782 if (!inode_evicting) {
8700 cached_state = NULL; 8783 cached_state = NULL;
8701 lock_extent_bits(tree, page_start, page_end, 8784 lock_extent_bits(tree, start, end,
8702 &cached_state); 8785 &cached_state);
8703 } 8786 }
8787
8788 start = end + 1;
8789 if (start < page_end)
8790 goto again;
8704 } 8791 }
8705 8792
8706 /* 8793 /*
@@ -8761,15 +8848,28 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
8761 loff_t size; 8848 loff_t size;
8762 int ret; 8849 int ret;
8763 int reserved = 0; 8850 int reserved = 0;
8851 u64 reserved_space;
8764 u64 page_start; 8852 u64 page_start;
8765 u64 page_end; 8853 u64 page_end;
8854 u64 end;
8855
8856 reserved_space = PAGE_CACHE_SIZE;
8766 8857
8767 sb_start_pagefault(inode->i_sb); 8858 sb_start_pagefault(inode->i_sb);
8768 page_start = page_offset(page); 8859 page_start = page_offset(page);
8769 page_end = page_start + PAGE_CACHE_SIZE - 1; 8860 page_end = page_start + PAGE_CACHE_SIZE - 1;
8861 end = page_end;
8770 8862
8863 /*
8864 * Reserving delalloc space after obtaining the page lock can lead to
8865 * deadlock. For example, if a dirty page is locked by this function
8866 * and the call to btrfs_delalloc_reserve_space() ends up triggering
8867 * dirty page write out, then the btrfs_writepage() function could
8868 * end up waiting indefinitely to get a lock on the page currently
8869 * being processed by btrfs_page_mkwrite() function.
8870 */
8771 ret = btrfs_delalloc_reserve_space(inode, page_start, 8871 ret = btrfs_delalloc_reserve_space(inode, page_start,
8772 PAGE_CACHE_SIZE); 8872 reserved_space);
8773 if (!ret) { 8873 if (!ret) {
8774 ret = file_update_time(vma->vm_file); 8874 ret = file_update_time(vma->vm_file);
8775 reserved = 1; 8875 reserved = 1;
@@ -8803,7 +8903,7 @@ again:
8803 * we can't set the delalloc bits if there are pending ordered 8903 * we can't set the delalloc bits if there are pending ordered
8804 * extents. Drop our locks and wait for them to finish 8904 * extents. Drop our locks and wait for them to finish
8805 */ 8905 */
8806 ordered = btrfs_lookup_ordered_extent(inode, page_start); 8906 ordered = btrfs_lookup_ordered_range(inode, page_start, page_end);
8807 if (ordered) { 8907 if (ordered) {
8808 unlock_extent_cached(io_tree, page_start, page_end, 8908 unlock_extent_cached(io_tree, page_start, page_end,
8809 &cached_state, GFP_NOFS); 8909 &cached_state, GFP_NOFS);
@@ -8813,6 +8913,18 @@ again:
8813 goto again; 8913 goto again;
8814 } 8914 }
8815 8915
8916 if (page->index == ((size - 1) >> PAGE_CACHE_SHIFT)) {
8917 reserved_space = round_up(size - page_start, root->sectorsize);
8918 if (reserved_space < PAGE_CACHE_SIZE) {
8919 end = page_start + reserved_space - 1;
8920 spin_lock(&BTRFS_I(inode)->lock);
8921 BTRFS_I(inode)->outstanding_extents++;
8922 spin_unlock(&BTRFS_I(inode)->lock);
8923 btrfs_delalloc_release_space(inode, page_start,
8924 PAGE_CACHE_SIZE - reserved_space);
8925 }
8926 }
8927
8816 /* 8928 /*
8817 * XXX - page_mkwrite gets called every time the page is dirtied, even 8929 * XXX - page_mkwrite gets called every time the page is dirtied, even
8818 * if it was already dirty, so for space accounting reasons we need to 8930 * if it was already dirty, so for space accounting reasons we need to
@@ -8820,12 +8932,12 @@ again:
8820 * is probably a better way to do this, but for now keep consistent with 8932 * is probably a better way to do this, but for now keep consistent with
8821 * prepare_pages in the normal write path. 8933 * prepare_pages in the normal write path.
8822 */ 8934 */
8823 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, 8935 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
8824 EXTENT_DIRTY | EXTENT_DELALLOC | 8936 EXTENT_DIRTY | EXTENT_DELALLOC |
8825 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 8937 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
8826 0, 0, &cached_state, GFP_NOFS); 8938 0, 0, &cached_state, GFP_NOFS);
8827 8939
8828 ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 8940 ret = btrfs_set_extent_delalloc(inode, page_start, end,
8829 &cached_state); 8941 &cached_state);
8830 if (ret) { 8942 if (ret) {
8831 unlock_extent_cached(io_tree, page_start, page_end, 8943 unlock_extent_cached(io_tree, page_start, page_end,
@@ -8864,7 +8976,7 @@ out_unlock:
8864 } 8976 }
8865 unlock_page(page); 8977 unlock_page(page);
8866out: 8978out:
8867 btrfs_delalloc_release_space(inode, page_start, PAGE_CACHE_SIZE); 8979 btrfs_delalloc_release_space(inode, page_start, reserved_space);
8868out_noreserve: 8980out_noreserve:
8869 sb_end_pagefault(inode->i_sb); 8981 sb_end_pagefault(inode->i_sb);
8870 return ret; 8982 return ret;
@@ -9190,16 +9302,11 @@ void btrfs_destroy_cachep(void)
9190 * destroy cache. 9302 * destroy cache.
9191 */ 9303 */
9192 rcu_barrier(); 9304 rcu_barrier();
9193 if (btrfs_inode_cachep) 9305 kmem_cache_destroy(btrfs_inode_cachep);
9194 kmem_cache_destroy(btrfs_inode_cachep); 9306 kmem_cache_destroy(btrfs_trans_handle_cachep);
9195 if (btrfs_trans_handle_cachep) 9307 kmem_cache_destroy(btrfs_transaction_cachep);
9196 kmem_cache_destroy(btrfs_trans_handle_cachep); 9308 kmem_cache_destroy(btrfs_path_cachep);
9197 if (btrfs_transaction_cachep) 9309 kmem_cache_destroy(btrfs_free_space_cachep);
9198 kmem_cache_destroy(btrfs_transaction_cachep);
9199 if (btrfs_path_cachep)
9200 kmem_cache_destroy(btrfs_path_cachep);
9201 if (btrfs_free_space_cachep)
9202 kmem_cache_destroy(btrfs_free_space_cachep);
9203} 9310}
9204 9311
9205int btrfs_init_cachep(void) 9312int btrfs_init_cachep(void)
@@ -9250,7 +9357,6 @@ static int btrfs_getattr(struct vfsmount *mnt,
9250 9357
9251 generic_fillattr(inode, stat); 9358 generic_fillattr(inode, stat);
9252 stat->dev = BTRFS_I(inode)->root->anon_dev; 9359 stat->dev = BTRFS_I(inode)->root->anon_dev;
9253 stat->blksize = PAGE_CACHE_SIZE;
9254 9360
9255 spin_lock(&BTRFS_I(inode)->lock); 9361 spin_lock(&BTRFS_I(inode)->lock);
9256 delalloc_bytes = BTRFS_I(inode)->delalloc_bytes; 9362 delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
@@ -9268,7 +9374,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9268 struct btrfs_root *dest = BTRFS_I(new_dir)->root; 9374 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9269 struct inode *new_inode = d_inode(new_dentry); 9375 struct inode *new_inode = d_inode(new_dentry);
9270 struct inode *old_inode = d_inode(old_dentry); 9376 struct inode *old_inode = d_inode(old_dentry);
9271 struct timespec ctime = CURRENT_TIME;
9272 u64 index = 0; 9377 u64 index = 0;
9273 u64 root_objectid; 9378 u64 root_objectid;
9274 int ret; 9379 int ret;
@@ -9365,9 +9470,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9365 inode_inc_iversion(old_dir); 9470 inode_inc_iversion(old_dir);
9366 inode_inc_iversion(new_dir); 9471 inode_inc_iversion(new_dir);
9367 inode_inc_iversion(old_inode); 9472 inode_inc_iversion(old_inode);
9368 old_dir->i_ctime = old_dir->i_mtime = ctime; 9473 old_dir->i_ctime = old_dir->i_mtime =
9369 new_dir->i_ctime = new_dir->i_mtime = ctime; 9474 new_dir->i_ctime = new_dir->i_mtime =
9370 old_inode->i_ctime = ctime; 9475 old_inode->i_ctime = current_fs_time(old_dir->i_sb);
9371 9476
9372 if (old_dentry->d_parent != new_dentry->d_parent) 9477 if (old_dentry->d_parent != new_dentry->d_parent)
9373 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); 9478 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
@@ -9392,7 +9497,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9392 9497
9393 if (new_inode) { 9498 if (new_inode) {
9394 inode_inc_iversion(new_inode); 9499 inode_inc_iversion(new_inode);
9395 new_inode->i_ctime = CURRENT_TIME; 9500 new_inode->i_ctime = current_fs_time(new_inode->i_sb);
9396 if (unlikely(btrfs_ino(new_inode) == 9501 if (unlikely(btrfs_ino(new_inode) ==
9397 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 9502 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
9398 root_objectid = BTRFS_I(new_inode)->location.objectid; 9503 root_objectid = BTRFS_I(new_inode)->location.objectid;
@@ -9870,7 +9975,7 @@ next:
9870 *alloc_hint = ins.objectid + ins.offset; 9975 *alloc_hint = ins.objectid + ins.offset;
9871 9976
9872 inode_inc_iversion(inode); 9977 inode_inc_iversion(inode);
9873 inode->i_ctime = CURRENT_TIME; 9978 inode->i_ctime = current_fs_time(inode->i_sb);
9874 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 9979 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
9875 if (!(mode & FALLOC_FL_KEEP_SIZE) && 9980 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
9876 (actual_len > inode->i_size) && 9981 (actual_len > inode->i_size) &&
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 48aee9846329..86249cf78897 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -347,7 +347,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
347 347
348 btrfs_update_iflags(inode); 348 btrfs_update_iflags(inode);
349 inode_inc_iversion(inode); 349 inode_inc_iversion(inode);
350 inode->i_ctime = CURRENT_TIME; 350 inode->i_ctime = current_fs_time(inode->i_sb);
351 ret = btrfs_update_inode(trans, root, inode); 351 ret = btrfs_update_inode(trans, root, inode);
352 352
353 btrfs_end_transaction(trans, root); 353 btrfs_end_transaction(trans, root);
@@ -443,7 +443,7 @@ static noinline int create_subvol(struct inode *dir,
443 struct btrfs_root *root = BTRFS_I(dir)->root; 443 struct btrfs_root *root = BTRFS_I(dir)->root;
444 struct btrfs_root *new_root; 444 struct btrfs_root *new_root;
445 struct btrfs_block_rsv block_rsv; 445 struct btrfs_block_rsv block_rsv;
446 struct timespec cur_time = CURRENT_TIME; 446 struct timespec cur_time = current_fs_time(dir->i_sb);
447 struct inode *inode; 447 struct inode *inode;
448 int ret; 448 int ret;
449 int err; 449 int err;
@@ -844,10 +844,6 @@ static noinline int btrfs_mksubvol(struct path *parent,
844 if (IS_ERR(dentry)) 844 if (IS_ERR(dentry))
845 goto out_unlock; 845 goto out_unlock;
846 846
847 error = -EEXIST;
848 if (d_really_is_positive(dentry))
849 goto out_dput;
850
851 error = btrfs_may_create(dir, dentry); 847 error = btrfs_may_create(dir, dentry);
852 if (error) 848 if (error)
853 goto out_dput; 849 goto out_dput;
@@ -2097,8 +2093,6 @@ static noinline int search_ioctl(struct inode *inode,
2097 key.offset = (u64)-1; 2093 key.offset = (u64)-1;
2098 root = btrfs_read_fs_root_no_name(info, &key); 2094 root = btrfs_read_fs_root_no_name(info, &key);
2099 if (IS_ERR(root)) { 2095 if (IS_ERR(root)) {
2100 btrfs_err(info, "could not find root %llu",
2101 sk->tree_id);
2102 btrfs_free_path(path); 2096 btrfs_free_path(path);
2103 return -ENOENT; 2097 return -ENOENT;
2104 } 2098 }
@@ -2960,8 +2954,8 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
2960 * of the array is bounded by len, which is in turn bounded by 2954 * of the array is bounded by len, which is in turn bounded by
2961 * BTRFS_MAX_DEDUPE_LEN. 2955 * BTRFS_MAX_DEDUPE_LEN.
2962 */ 2956 */
2963 src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); 2957 src_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
2964 dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); 2958 dst_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
2965 if (!src_pgarr || !dst_pgarr) { 2959 if (!src_pgarr || !dst_pgarr) {
2966 kfree(src_pgarr); 2960 kfree(src_pgarr);
2967 kfree(dst_pgarr); 2961 kfree(dst_pgarr);
@@ -3217,7 +3211,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
3217 3211
3218 inode_inc_iversion(inode); 3212 inode_inc_iversion(inode);
3219 if (!no_time_update) 3213 if (!no_time_update)
3220 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3214 inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
3221 /* 3215 /*
3222 * We round up to the block size at eof when determining which 3216 * We round up to the block size at eof when determining which
3223 * extents to clone above, but shouldn't round up the file size. 3217 * extents to clone above, but shouldn't round up the file size.
@@ -3889,8 +3883,9 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
3889 * Truncate page cache pages so that future reads will see the cloned 3883 * Truncate page cache pages so that future reads will see the cloned
3890 * data immediately and not the previous data. 3884 * data immediately and not the previous data.
3891 */ 3885 */
3892 truncate_inode_pages_range(&inode->i_data, destoff, 3886 truncate_inode_pages_range(&inode->i_data,
3893 PAGE_CACHE_ALIGN(destoff + len) - 1); 3887 round_down(destoff, PAGE_CACHE_SIZE),
3888 round_up(destoff + len, PAGE_CACHE_SIZE) - 1);
3894out_unlock: 3889out_unlock:
3895 if (!same_inode) 3890 if (!same_inode)
3896 btrfs_double_inode_unlock(src, inode); 3891 btrfs_double_inode_unlock(src, inode);
@@ -5031,7 +5026,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
5031 struct btrfs_root *root = BTRFS_I(inode)->root; 5026 struct btrfs_root *root = BTRFS_I(inode)->root;
5032 struct btrfs_root_item *root_item = &root->root_item; 5027 struct btrfs_root_item *root_item = &root->root_item;
5033 struct btrfs_trans_handle *trans; 5028 struct btrfs_trans_handle *trans;
5034 struct timespec ct = CURRENT_TIME; 5029 struct timespec ct = current_fs_time(inode->i_sb);
5035 int ret = 0; 5030 int ret = 0;
5036 int received_uuid_changed; 5031 int received_uuid_changed;
5037 5032
@@ -5262,8 +5257,7 @@ out_unlock:
5262 .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \ 5257 .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \
5263 .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix } 5258 .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix }
5264 5259
5265static int btrfs_ioctl_get_supported_features(struct file *file, 5260int btrfs_ioctl_get_supported_features(void __user *arg)
5266 void __user *arg)
5267{ 5261{
5268 static const struct btrfs_ioctl_feature_flags features[3] = { 5262 static const struct btrfs_ioctl_feature_flags features[3] = {
5269 INIT_FEATURE_FLAGS(SUPP), 5263 INIT_FEATURE_FLAGS(SUPP),
@@ -5542,7 +5536,7 @@ long btrfs_ioctl(struct file *file, unsigned int
5542 case BTRFS_IOC_SET_FSLABEL: 5536 case BTRFS_IOC_SET_FSLABEL:
5543 return btrfs_ioctl_set_fslabel(file, argp); 5537 return btrfs_ioctl_set_fslabel(file, argp);
5544 case BTRFS_IOC_GET_SUPPORTED_FEATURES: 5538 case BTRFS_IOC_GET_SUPPORTED_FEATURES:
5545 return btrfs_ioctl_get_supported_features(file, argp); 5539 return btrfs_ioctl_get_supported_features(argp);
5546 case BTRFS_IOC_GET_FEATURES: 5540 case BTRFS_IOC_GET_FEATURES:
5547 return btrfs_ioctl_get_features(file, argp); 5541 return btrfs_ioctl_get_features(file, argp);
5548 case BTRFS_IOC_SET_FEATURES: 5542 case BTRFS_IOC_SET_FEATURES:
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 8c27292ea9ea..988eb1513aa5 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -1114,6 +1114,5 @@ int __init ordered_data_init(void)
1114 1114
1115void ordered_data_exit(void) 1115void ordered_data_exit(void)
1116{ 1116{
1117 if (btrfs_ordered_extent_cache) 1117 kmem_cache_destroy(btrfs_ordered_extent_cache);
1118 kmem_cache_destroy(btrfs_ordered_extent_cache);
1119} 1118}
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 647ab12fdf5d..147dc6ca5de1 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -295,8 +295,27 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
295 btrfs_dev_extent_chunk_offset(l, dev_extent), 295 btrfs_dev_extent_chunk_offset(l, dev_extent),
296 btrfs_dev_extent_length(l, dev_extent)); 296 btrfs_dev_extent_length(l, dev_extent));
297 break; 297 break;
298 case BTRFS_DEV_STATS_KEY: 298 case BTRFS_PERSISTENT_ITEM_KEY:
299 printk(KERN_INFO "\t\tdevice stats\n"); 299 printk(KERN_INFO "\t\tpersistent item objectid %llu offset %llu\n",
300 key.objectid, key.offset);
301 switch (key.objectid) {
302 case BTRFS_DEV_STATS_OBJECTID:
303 printk(KERN_INFO "\t\tdevice stats\n");
304 break;
305 default:
306 printk(KERN_INFO "\t\tunknown persistent item\n");
307 }
308 break;
309 case BTRFS_TEMPORARY_ITEM_KEY:
310 printk(KERN_INFO "\t\ttemporary item objectid %llu offset %llu\n",
311 key.objectid, key.offset);
312 switch (key.objectid) {
313 case BTRFS_BALANCE_OBJECTID:
314 printk(KERN_INFO "\t\tbalance status\n");
315 break;
316 default:
317 printk(KERN_INFO "\t\tunknown temporary item\n");
318 }
300 break; 319 break;
301 case BTRFS_DEV_REPLACE_KEY: 320 case BTRFS_DEV_REPLACE_KEY:
302 printk(KERN_INFO "\t\tdev replace\n"); 321 printk(KERN_INFO "\t\tdev replace\n");
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 619f92963e27..b892914968c1 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -72,7 +72,7 @@ struct reada_extent {
72 spinlock_t lock; 72 spinlock_t lock;
73 struct reada_zone *zones[BTRFS_MAX_MIRRORS]; 73 struct reada_zone *zones[BTRFS_MAX_MIRRORS];
74 int nzones; 74 int nzones;
75 struct btrfs_device *scheduled_for; 75 int scheduled;
76}; 76};
77 77
78struct reada_zone { 78struct reada_zone {
@@ -101,67 +101,53 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info);
101static void __reada_start_machine(struct btrfs_fs_info *fs_info); 101static void __reada_start_machine(struct btrfs_fs_info *fs_info);
102 102
103static int reada_add_block(struct reada_control *rc, u64 logical, 103static int reada_add_block(struct reada_control *rc, u64 logical,
104 struct btrfs_key *top, int level, u64 generation); 104 struct btrfs_key *top, u64 generation);
105 105
106/* recurses */ 106/* recurses */
107/* in case of err, eb might be NULL */ 107/* in case of err, eb might be NULL */
108static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, 108static void __readahead_hook(struct btrfs_fs_info *fs_info,
109 u64 start, int err) 109 struct reada_extent *re, struct extent_buffer *eb,
110 u64 start, int err)
110{ 111{
111 int level = 0; 112 int level = 0;
112 int nritems; 113 int nritems;
113 int i; 114 int i;
114 u64 bytenr; 115 u64 bytenr;
115 u64 generation; 116 u64 generation;
116 struct reada_extent *re;
117 struct btrfs_fs_info *fs_info = root->fs_info;
118 struct list_head list; 117 struct list_head list;
119 unsigned long index = start >> PAGE_CACHE_SHIFT;
120 struct btrfs_device *for_dev;
121 118
122 if (eb) 119 if (eb)
123 level = btrfs_header_level(eb); 120 level = btrfs_header_level(eb);
124 121
125 /* find extent */
126 spin_lock(&fs_info->reada_lock);
127 re = radix_tree_lookup(&fs_info->reada_tree, index);
128 if (re)
129 re->refcnt++;
130 spin_unlock(&fs_info->reada_lock);
131
132 if (!re)
133 return -1;
134
135 spin_lock(&re->lock); 122 spin_lock(&re->lock);
136 /* 123 /*
137 * just take the full list from the extent. afterwards we 124 * just take the full list from the extent. afterwards we
138 * don't need the lock anymore 125 * don't need the lock anymore
139 */ 126 */
140 list_replace_init(&re->extctl, &list); 127 list_replace_init(&re->extctl, &list);
141 for_dev = re->scheduled_for; 128 re->scheduled = 0;
142 re->scheduled_for = NULL;
143 spin_unlock(&re->lock); 129 spin_unlock(&re->lock);
144 130
145 if (err == 0) { 131 /*
146 nritems = level ? btrfs_header_nritems(eb) : 0; 132 * this is the error case, the extent buffer has not been
147 generation = btrfs_header_generation(eb); 133 * read correctly. We won't access anything from it and
148 /* 134 * just cleanup our data structures. Effectively this will
149 * FIXME: currently we just set nritems to 0 if this is a leaf, 135 * cut the branch below this node from read ahead.
150 * effectively ignoring the content. In a next step we could 136 */
151 * trigger more readahead depending from the content, e.g. 137 if (err)
152 * fetch the checksums for the extents in the leaf. 138 goto cleanup;
153 */
154 } else {
155 /*
156 * this is the error case, the extent buffer has not been
157 * read correctly. We won't access anything from it and
158 * just cleanup our data structures. Effectively this will
159 * cut the branch below this node from read ahead.
160 */
161 nritems = 0;
162 generation = 0;
163 }
164 139
140 /*
141 * FIXME: currently we just set nritems to 0 if this is a leaf,
142 * effectively ignoring the content. In a next step we could
143 * trigger more readahead depending from the content, e.g.
144 * fetch the checksums for the extents in the leaf.
145 */
146 if (!level)
147 goto cleanup;
148
149 nritems = btrfs_header_nritems(eb);
150 generation = btrfs_header_generation(eb);
165 for (i = 0; i < nritems; i++) { 151 for (i = 0; i < nritems; i++) {
166 struct reada_extctl *rec; 152 struct reada_extctl *rec;
167 u64 n_gen; 153 u64 n_gen;
@@ -188,19 +174,20 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
188 */ 174 */
189#ifdef DEBUG 175#ifdef DEBUG
190 if (rec->generation != generation) { 176 if (rec->generation != generation) {
191 btrfs_debug(root->fs_info, 177 btrfs_debug(fs_info,
192 "generation mismatch for (%llu,%d,%llu) %llu != %llu", 178 "generation mismatch for (%llu,%d,%llu) %llu != %llu",
193 key.objectid, key.type, key.offset, 179 key.objectid, key.type, key.offset,
194 rec->generation, generation); 180 rec->generation, generation);
195 } 181 }
196#endif 182#endif
197 if (rec->generation == generation && 183 if (rec->generation == generation &&
198 btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && 184 btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
199 btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) 185 btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
200 reada_add_block(rc, bytenr, &next_key, 186 reada_add_block(rc, bytenr, &next_key, n_gen);
201 level - 1, n_gen);
202 } 187 }
203 } 188 }
189
190cleanup:
204 /* 191 /*
205 * free extctl records 192 * free extctl records
206 */ 193 */
@@ -222,26 +209,37 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
222 209
223 reada_extent_put(fs_info, re); /* one ref for each entry */ 210 reada_extent_put(fs_info, re); /* one ref for each entry */
224 } 211 }
225 reada_extent_put(fs_info, re); /* our ref */
226 if (for_dev)
227 atomic_dec(&for_dev->reada_in_flight);
228 212
229 return 0; 213 return;
230} 214}
231 215
232/* 216/*
233 * start is passed separately in case eb in NULL, which may be the case with 217 * start is passed separately in case eb in NULL, which may be the case with
234 * failed I/O 218 * failed I/O
235 */ 219 */
236int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, 220int btree_readahead_hook(struct btrfs_fs_info *fs_info,
237 u64 start, int err) 221 struct extent_buffer *eb, u64 start, int err)
238{ 222{
239 int ret; 223 int ret = 0;
224 struct reada_extent *re;
240 225
241 ret = __readahead_hook(root, eb, start, err); 226 /* find extent */
227 spin_lock(&fs_info->reada_lock);
228 re = radix_tree_lookup(&fs_info->reada_tree,
229 start >> PAGE_CACHE_SHIFT);
230 if (re)
231 re->refcnt++;
232 spin_unlock(&fs_info->reada_lock);
233 if (!re) {
234 ret = -1;
235 goto start_machine;
236 }
242 237
243 reada_start_machine(root->fs_info); 238 __readahead_hook(fs_info, re, eb, start, err);
239 reada_extent_put(fs_info, re); /* our ref */
244 240
241start_machine:
242 reada_start_machine(fs_info);
245 return ret; 243 return ret;
246} 244}
247 245
@@ -260,18 +258,14 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
260 spin_lock(&fs_info->reada_lock); 258 spin_lock(&fs_info->reada_lock);
261 ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, 259 ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
262 logical >> PAGE_CACHE_SHIFT, 1); 260 logical >> PAGE_CACHE_SHIFT, 1);
263 if (ret == 1) 261 if (ret == 1 && logical >= zone->start && logical <= zone->end) {
264 kref_get(&zone->refcnt); 262 kref_get(&zone->refcnt);
265 spin_unlock(&fs_info->reada_lock);
266
267 if (ret == 1) {
268 if (logical >= zone->start && logical < zone->end)
269 return zone;
270 spin_lock(&fs_info->reada_lock);
271 kref_put(&zone->refcnt, reada_zone_release);
272 spin_unlock(&fs_info->reada_lock); 263 spin_unlock(&fs_info->reada_lock);
264 return zone;
273 } 265 }
274 266
267 spin_unlock(&fs_info->reada_lock);
268
275 cache = btrfs_lookup_block_group(fs_info, logical); 269 cache = btrfs_lookup_block_group(fs_info, logical);
276 if (!cache) 270 if (!cache)
277 return NULL; 271 return NULL;
@@ -280,7 +274,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
280 end = start + cache->key.offset - 1; 274 end = start + cache->key.offset - 1;
281 btrfs_put_block_group(cache); 275 btrfs_put_block_group(cache);
282 276
283 zone = kzalloc(sizeof(*zone), GFP_NOFS); 277 zone = kzalloc(sizeof(*zone), GFP_KERNEL);
284 if (!zone) 278 if (!zone)
285 return NULL; 279 return NULL;
286 280
@@ -307,8 +301,10 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
307 kfree(zone); 301 kfree(zone);
308 ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, 302 ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
309 logical >> PAGE_CACHE_SHIFT, 1); 303 logical >> PAGE_CACHE_SHIFT, 1);
310 if (ret == 1) 304 if (ret == 1 && logical >= zone->start && logical <= zone->end)
311 kref_get(&zone->refcnt); 305 kref_get(&zone->refcnt);
306 else
307 zone = NULL;
312 } 308 }
313 spin_unlock(&fs_info->reada_lock); 309 spin_unlock(&fs_info->reada_lock);
314 310
@@ -317,7 +313,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
317 313
318static struct reada_extent *reada_find_extent(struct btrfs_root *root, 314static struct reada_extent *reada_find_extent(struct btrfs_root *root,
319 u64 logical, 315 u64 logical,
320 struct btrfs_key *top, int level) 316 struct btrfs_key *top)
321{ 317{
322 int ret; 318 int ret;
323 struct reada_extent *re = NULL; 319 struct reada_extent *re = NULL;
@@ -330,9 +326,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
330 u64 length; 326 u64 length;
331 int real_stripes; 327 int real_stripes;
332 int nzones = 0; 328 int nzones = 0;
333 int i;
334 unsigned long index = logical >> PAGE_CACHE_SHIFT; 329 unsigned long index = logical >> PAGE_CACHE_SHIFT;
335 int dev_replace_is_ongoing; 330 int dev_replace_is_ongoing;
331 int have_zone = 0;
336 332
337 spin_lock(&fs_info->reada_lock); 333 spin_lock(&fs_info->reada_lock);
338 re = radix_tree_lookup(&fs_info->reada_tree, index); 334 re = radix_tree_lookup(&fs_info->reada_tree, index);
@@ -343,7 +339,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
343 if (re) 339 if (re)
344 return re; 340 return re;
345 341
346 re = kzalloc(sizeof(*re), GFP_NOFS); 342 re = kzalloc(sizeof(*re), GFP_KERNEL);
347 if (!re) 343 if (!re)
348 return NULL; 344 return NULL;
349 345
@@ -375,11 +371,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
375 struct reada_zone *zone; 371 struct reada_zone *zone;
376 372
377 dev = bbio->stripes[nzones].dev; 373 dev = bbio->stripes[nzones].dev;
374
375 /* cannot read ahead on missing device. */
376 if (!dev->bdev)
377 continue;
378
378 zone = reada_find_zone(fs_info, dev, logical, bbio); 379 zone = reada_find_zone(fs_info, dev, logical, bbio);
379 if (!zone) 380 if (!zone)
380 break; 381 continue;
381 382
382 re->zones[nzones] = zone; 383 re->zones[re->nzones++] = zone;
383 spin_lock(&zone->lock); 384 spin_lock(&zone->lock);
384 if (!zone->elems) 385 if (!zone->elems)
385 kref_get(&zone->refcnt); 386 kref_get(&zone->refcnt);
@@ -389,14 +390,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
389 kref_put(&zone->refcnt, reada_zone_release); 390 kref_put(&zone->refcnt, reada_zone_release);
390 spin_unlock(&fs_info->reada_lock); 391 spin_unlock(&fs_info->reada_lock);
391 } 392 }
392 re->nzones = nzones; 393 if (re->nzones == 0) {
393 if (nzones == 0) {
394 /* not a single zone found, error and out */ 394 /* not a single zone found, error and out */
395 goto error; 395 goto error;
396 } 396 }
397 397
398 /* insert extent in reada_tree + all per-device trees, all or nothing */ 398 /* insert extent in reada_tree + all per-device trees, all or nothing */
399 btrfs_dev_replace_lock(&fs_info->dev_replace); 399 btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
400 spin_lock(&fs_info->reada_lock); 400 spin_lock(&fs_info->reada_lock);
401 ret = radix_tree_insert(&fs_info->reada_tree, index, re); 401 ret = radix_tree_insert(&fs_info->reada_tree, index, re);
402 if (ret == -EEXIST) { 402 if (ret == -EEXIST) {
@@ -404,19 +404,20 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
404 BUG_ON(!re_exist); 404 BUG_ON(!re_exist);
405 re_exist->refcnt++; 405 re_exist->refcnt++;
406 spin_unlock(&fs_info->reada_lock); 406 spin_unlock(&fs_info->reada_lock);
407 btrfs_dev_replace_unlock(&fs_info->dev_replace); 407 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
408 goto error; 408 goto error;
409 } 409 }
410 if (ret) { 410 if (ret) {
411 spin_unlock(&fs_info->reada_lock); 411 spin_unlock(&fs_info->reada_lock);
412 btrfs_dev_replace_unlock(&fs_info->dev_replace); 412 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
413 goto error; 413 goto error;
414 } 414 }
415 prev_dev = NULL; 415 prev_dev = NULL;
416 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( 416 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
417 &fs_info->dev_replace); 417 &fs_info->dev_replace);
418 for (i = 0; i < nzones; ++i) { 418 for (nzones = 0; nzones < re->nzones; ++nzones) {
419 dev = bbio->stripes[i].dev; 419 dev = re->zones[nzones]->device;
420
420 if (dev == prev_dev) { 421 if (dev == prev_dev) {
421 /* 422 /*
422 * in case of DUP, just add the first zone. As both 423 * in case of DUP, just add the first zone. As both
@@ -427,15 +428,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
427 */ 428 */
428 continue; 429 continue;
429 } 430 }
430 if (!dev->bdev) { 431 if (!dev->bdev)
431 /* 432 continue;
432 * cannot read ahead on missing device, but for RAID5/6, 433
433 * REQ_GET_READ_MIRRORS return 1. So don't skip missing
434 * device for such case.
435 */
436 if (nzones > 1)
437 continue;
438 }
439 if (dev_replace_is_ongoing && 434 if (dev_replace_is_ongoing &&
440 dev == fs_info->dev_replace.tgtdev) { 435 dev == fs_info->dev_replace.tgtdev) {
441 /* 436 /*
@@ -447,8 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
447 prev_dev = dev; 442 prev_dev = dev;
448 ret = radix_tree_insert(&dev->reada_extents, index, re); 443 ret = radix_tree_insert(&dev->reada_extents, index, re);
449 if (ret) { 444 if (ret) {
450 while (--i >= 0) { 445 while (--nzones >= 0) {
451 dev = bbio->stripes[i].dev; 446 dev = re->zones[nzones]->device;
452 BUG_ON(dev == NULL); 447 BUG_ON(dev == NULL);
453 /* ignore whether the entry was inserted */ 448 /* ignore whether the entry was inserted */
454 radix_tree_delete(&dev->reada_extents, index); 449 radix_tree_delete(&dev->reada_extents, index);
@@ -456,21 +451,24 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
456 BUG_ON(fs_info == NULL); 451 BUG_ON(fs_info == NULL);
457 radix_tree_delete(&fs_info->reada_tree, index); 452 radix_tree_delete(&fs_info->reada_tree, index);
458 spin_unlock(&fs_info->reada_lock); 453 spin_unlock(&fs_info->reada_lock);
459 btrfs_dev_replace_unlock(&fs_info->dev_replace); 454 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
460 goto error; 455 goto error;
461 } 456 }
457 have_zone = 1;
462 } 458 }
463 spin_unlock(&fs_info->reada_lock); 459 spin_unlock(&fs_info->reada_lock);
464 btrfs_dev_replace_unlock(&fs_info->dev_replace); 460 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
461
462 if (!have_zone)
463 goto error;
465 464
466 btrfs_put_bbio(bbio); 465 btrfs_put_bbio(bbio);
467 return re; 466 return re;
468 467
469error: 468error:
470 while (nzones) { 469 for (nzones = 0; nzones < re->nzones; ++nzones) {
471 struct reada_zone *zone; 470 struct reada_zone *zone;
472 471
473 --nzones;
474 zone = re->zones[nzones]; 472 zone = re->zones[nzones];
475 kref_get(&zone->refcnt); 473 kref_get(&zone->refcnt);
476 spin_lock(&zone->lock); 474 spin_lock(&zone->lock);
@@ -531,8 +529,6 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
531 kref_put(&zone->refcnt, reada_zone_release); 529 kref_put(&zone->refcnt, reada_zone_release);
532 spin_unlock(&fs_info->reada_lock); 530 spin_unlock(&fs_info->reada_lock);
533 } 531 }
534 if (re->scheduled_for)
535 atomic_dec(&re->scheduled_for->reada_in_flight);
536 532
537 kfree(re); 533 kfree(re);
538} 534}
@@ -556,17 +552,17 @@ static void reada_control_release(struct kref *kref)
556} 552}
557 553
558static int reada_add_block(struct reada_control *rc, u64 logical, 554static int reada_add_block(struct reada_control *rc, u64 logical,
559 struct btrfs_key *top, int level, u64 generation) 555 struct btrfs_key *top, u64 generation)
560{ 556{
561 struct btrfs_root *root = rc->root; 557 struct btrfs_root *root = rc->root;
562 struct reada_extent *re; 558 struct reada_extent *re;
563 struct reada_extctl *rec; 559 struct reada_extctl *rec;
564 560
565 re = reada_find_extent(root, logical, top, level); /* takes one ref */ 561 re = reada_find_extent(root, logical, top); /* takes one ref */
566 if (!re) 562 if (!re)
567 return -1; 563 return -1;
568 564
569 rec = kzalloc(sizeof(*rec), GFP_NOFS); 565 rec = kzalloc(sizeof(*rec), GFP_KERNEL);
570 if (!rec) { 566 if (!rec) {
571 reada_extent_put(root->fs_info, re); 567 reada_extent_put(root->fs_info, re);
572 return -ENOMEM; 568 return -ENOMEM;
@@ -662,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
662 u64 logical; 658 u64 logical;
663 int ret; 659 int ret;
664 int i; 660 int i;
665 int need_kick = 0;
666 661
667 spin_lock(&fs_info->reada_lock); 662 spin_lock(&fs_info->reada_lock);
668 if (dev->reada_curr_zone == NULL) { 663 if (dev->reada_curr_zone == NULL) {
@@ -679,7 +674,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
679 */ 674 */
680 ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, 675 ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
681 dev->reada_next >> PAGE_CACHE_SHIFT, 1); 676 dev->reada_next >> PAGE_CACHE_SHIFT, 1);
682 if (ret == 0 || re->logical >= dev->reada_curr_zone->end) { 677 if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
683 ret = reada_pick_zone(dev); 678 ret = reada_pick_zone(dev);
684 if (!ret) { 679 if (!ret) {
685 spin_unlock(&fs_info->reada_lock); 680 spin_unlock(&fs_info->reada_lock);
@@ -698,6 +693,15 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
698 693
699 spin_unlock(&fs_info->reada_lock); 694 spin_unlock(&fs_info->reada_lock);
700 695
696 spin_lock(&re->lock);
697 if (re->scheduled || list_empty(&re->extctl)) {
698 spin_unlock(&re->lock);
699 reada_extent_put(fs_info, re);
700 return 0;
701 }
702 re->scheduled = 1;
703 spin_unlock(&re->lock);
704
701 /* 705 /*
702 * find mirror num 706 * find mirror num
703 */ 707 */
@@ -709,29 +713,20 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
709 } 713 }
710 logical = re->logical; 714 logical = re->logical;
711 715
712 spin_lock(&re->lock);
713 if (re->scheduled_for == NULL) {
714 re->scheduled_for = dev;
715 need_kick = 1;
716 }
717 spin_unlock(&re->lock);
718
719 reada_extent_put(fs_info, re);
720
721 if (!need_kick)
722 return 0;
723
724 atomic_inc(&dev->reada_in_flight); 716 atomic_inc(&dev->reada_in_flight);
725 ret = reada_tree_block_flagged(fs_info->extent_root, logical, 717 ret = reada_tree_block_flagged(fs_info->extent_root, logical,
726 mirror_num, &eb); 718 mirror_num, &eb);
727 if (ret) 719 if (ret)
728 __readahead_hook(fs_info->extent_root, NULL, logical, ret); 720 __readahead_hook(fs_info, re, NULL, logical, ret);
729 else if (eb) 721 else if (eb)
730 __readahead_hook(fs_info->extent_root, eb, eb->start, ret); 722 __readahead_hook(fs_info, re, eb, eb->start, ret);
731 723
732 if (eb) 724 if (eb)
733 free_extent_buffer(eb); 725 free_extent_buffer(eb);
734 726
727 atomic_dec(&dev->reada_in_flight);
728 reada_extent_put(fs_info, re);
729
735 return 1; 730 return 1;
736 731
737} 732}
@@ -752,6 +747,8 @@ static void reada_start_machine_worker(struct btrfs_work *work)
752 set_task_ioprio(current, BTRFS_IOPRIO_READA); 747 set_task_ioprio(current, BTRFS_IOPRIO_READA);
753 __reada_start_machine(fs_info); 748 __reada_start_machine(fs_info);
754 set_task_ioprio(current, old_ioprio); 749 set_task_ioprio(current, old_ioprio);
750
751 atomic_dec(&fs_info->reada_works_cnt);
755} 752}
756 753
757static void __reada_start_machine(struct btrfs_fs_info *fs_info) 754static void __reada_start_machine(struct btrfs_fs_info *fs_info)
@@ -783,15 +780,19 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
783 * enqueue to workers to finish it. This will distribute the load to 780 * enqueue to workers to finish it. This will distribute the load to
784 * the cores. 781 * the cores.
785 */ 782 */
786 for (i = 0; i < 2; ++i) 783 for (i = 0; i < 2; ++i) {
787 reada_start_machine(fs_info); 784 reada_start_machine(fs_info);
785 if (atomic_read(&fs_info->reada_works_cnt) >
786 BTRFS_MAX_MIRRORS * 2)
787 break;
788 }
788} 789}
789 790
790static void reada_start_machine(struct btrfs_fs_info *fs_info) 791static void reada_start_machine(struct btrfs_fs_info *fs_info)
791{ 792{
792 struct reada_machine_work *rmw; 793 struct reada_machine_work *rmw;
793 794
794 rmw = kzalloc(sizeof(*rmw), GFP_NOFS); 795 rmw = kzalloc(sizeof(*rmw), GFP_KERNEL);
795 if (!rmw) { 796 if (!rmw) {
796 /* FIXME we cannot handle this properly right now */ 797 /* FIXME we cannot handle this properly right now */
797 BUG(); 798 BUG();
@@ -801,6 +802,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
801 rmw->fs_info = fs_info; 802 rmw->fs_info = fs_info;
802 803
803 btrfs_queue_work(fs_info->readahead_workers, &rmw->work); 804 btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
805 atomic_inc(&fs_info->reada_works_cnt);
804} 806}
805 807
806#ifdef DEBUG 808#ifdef DEBUG
@@ -848,10 +850,9 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
848 if (ret == 0) 850 if (ret == 0)
849 break; 851 break;
850 printk(KERN_DEBUG 852 printk(KERN_DEBUG
851 " re: logical %llu size %u empty %d for %lld", 853 " re: logical %llu size %u empty %d scheduled %d",
852 re->logical, fs_info->tree_root->nodesize, 854 re->logical, fs_info->tree_root->nodesize,
853 list_empty(&re->extctl), re->scheduled_for ? 855 list_empty(&re->extctl), re->scheduled);
854 re->scheduled_for->devid : -1);
855 856
856 for (i = 0; i < re->nzones; ++i) { 857 for (i = 0; i < re->nzones; ++i) {
857 printk(KERN_CONT " zone %llu-%llu devs", 858 printk(KERN_CONT " zone %llu-%llu devs",
@@ -878,27 +879,21 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
878 index, 1); 879 index, 1);
879 if (ret == 0) 880 if (ret == 0)
880 break; 881 break;
881 if (!re->scheduled_for) { 882 if (!re->scheduled) {
882 index = (re->logical >> PAGE_CACHE_SHIFT) + 1; 883 index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
883 continue; 884 continue;
884 } 885 }
885 printk(KERN_DEBUG 886 printk(KERN_DEBUG
886 "re: logical %llu size %u list empty %d for %lld", 887 "re: logical %llu size %u list empty %d scheduled %d",
887 re->logical, fs_info->tree_root->nodesize, 888 re->logical, fs_info->tree_root->nodesize,
888 list_empty(&re->extctl), 889 list_empty(&re->extctl), re->scheduled);
889 re->scheduled_for ? re->scheduled_for->devid : -1);
890 for (i = 0; i < re->nzones; ++i) { 890 for (i = 0; i < re->nzones; ++i) {
891 printk(KERN_CONT " zone %llu-%llu devs", 891 printk(KERN_CONT " zone %llu-%llu devs",
892 re->zones[i]->start, 892 re->zones[i]->start,
893 re->zones[i]->end); 893 re->zones[i]->end);
894 for (i = 0; i < re->nzones; ++i) { 894 for (j = 0; j < re->zones[i]->ndevs; ++j) {
895 printk(KERN_CONT " zone %llu-%llu devs", 895 printk(KERN_CONT " %lld",
896 re->zones[i]->start, 896 re->zones[i]->devs[j]->devid);
897 re->zones[i]->end);
898 for (j = 0; j < re->zones[i]->ndevs; ++j) {
899 printk(KERN_CONT " %lld",
900 re->zones[i]->devs[j]->devid);
901 }
902 } 897 }
903 } 898 }
904 printk(KERN_CONT "\n"); 899 printk(KERN_CONT "\n");
@@ -917,7 +912,6 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
917 struct reada_control *rc; 912 struct reada_control *rc;
918 u64 start; 913 u64 start;
919 u64 generation; 914 u64 generation;
920 int level;
921 int ret; 915 int ret;
922 struct extent_buffer *node; 916 struct extent_buffer *node;
923 static struct btrfs_key max_key = { 917 static struct btrfs_key max_key = {
@@ -926,7 +920,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
926 .offset = (u64)-1 920 .offset = (u64)-1
927 }; 921 };
928 922
929 rc = kzalloc(sizeof(*rc), GFP_NOFS); 923 rc = kzalloc(sizeof(*rc), GFP_KERNEL);
930 if (!rc) 924 if (!rc)
931 return ERR_PTR(-ENOMEM); 925 return ERR_PTR(-ENOMEM);
932 926
@@ -940,11 +934,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
940 934
941 node = btrfs_root_node(root); 935 node = btrfs_root_node(root);
942 start = node->start; 936 start = node->start;
943 level = btrfs_header_level(node);
944 generation = btrfs_header_generation(node); 937 generation = btrfs_header_generation(node);
945 free_extent_buffer(node); 938 free_extent_buffer(node);
946 939
947 ret = reada_add_block(rc, start, &max_key, level, generation); 940 ret = reada_add_block(rc, start, &max_key, generation);
948 if (ret) { 941 if (ret) {
949 kfree(rc); 942 kfree(rc);
950 return ERR_PTR(ret); 943 return ERR_PTR(ret);
@@ -959,8 +952,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
959int btrfs_reada_wait(void *handle) 952int btrfs_reada_wait(void *handle)
960{ 953{
961 struct reada_control *rc = handle; 954 struct reada_control *rc = handle;
955 struct btrfs_fs_info *fs_info = rc->root->fs_info;
962 956
963 while (atomic_read(&rc->elems)) { 957 while (atomic_read(&rc->elems)) {
958 if (!atomic_read(&fs_info->reada_works_cnt))
959 reada_start_machine(fs_info);
964 wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, 960 wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
965 5 * HZ); 961 5 * HZ);
966 dump_devs(rc->root->fs_info, 962 dump_devs(rc->root->fs_info,
@@ -977,9 +973,13 @@ int btrfs_reada_wait(void *handle)
977int btrfs_reada_wait(void *handle) 973int btrfs_reada_wait(void *handle)
978{ 974{
979 struct reada_control *rc = handle; 975 struct reada_control *rc = handle;
976 struct btrfs_fs_info *fs_info = rc->root->fs_info;
980 977
981 while (atomic_read(&rc->elems)) { 978 while (atomic_read(&rc->elems)) {
982 wait_event(rc->wait, atomic_read(&rc->elems) == 0); 979 if (!atomic_read(&fs_info->reada_works_cnt))
980 reada_start_machine(fs_info);
981 wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
982 (HZ + 9) / 10);
983 } 983 }
984 984
985 kref_put(&rc->refcnt, reada_control_release); 985 kref_put(&rc->refcnt, reada_control_release);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 7cf8509deda7..a25f3b21491b 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -488,7 +488,7 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
488 struct btrfs_root *root) 488 struct btrfs_root *root)
489{ 489{
490 struct btrfs_root_item *item = &root->root_item; 490 struct btrfs_root_item *item = &root->root_item;
491 struct timespec ct = CURRENT_TIME; 491 struct timespec ct = current_fs_time(root->fs_info->sb);
492 492
493 spin_lock(&root->root_item_lock); 493 spin_lock(&root->root_item_lock);
494 btrfs_set_root_ctransid(item, trans->transid); 494 btrfs_set_root_ctransid(item, trans->transid);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 92bf5ee732fb..e42aa27c96e9 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -461,7 +461,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
461 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; 461 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
462 int ret; 462 int ret;
463 463
464 sctx = kzalloc(sizeof(*sctx), GFP_NOFS); 464 sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
465 if (!sctx) 465 if (!sctx)
466 goto nomem; 466 goto nomem;
467 atomic_set(&sctx->refs, 1); 467 atomic_set(&sctx->refs, 1);
@@ -472,7 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
472 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) { 472 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
473 struct scrub_bio *sbio; 473 struct scrub_bio *sbio;
474 474
475 sbio = kzalloc(sizeof(*sbio), GFP_NOFS); 475 sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
476 if (!sbio) 476 if (!sbio)
477 goto nomem; 477 goto nomem;
478 sctx->bios[i] = sbio; 478 sctx->bios[i] = sbio;
@@ -1654,7 +1654,7 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1654again: 1654again:
1655 if (!wr_ctx->wr_curr_bio) { 1655 if (!wr_ctx->wr_curr_bio) {
1656 wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio), 1656 wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
1657 GFP_NOFS); 1657 GFP_KERNEL);
1658 if (!wr_ctx->wr_curr_bio) { 1658 if (!wr_ctx->wr_curr_bio) {
1659 mutex_unlock(&wr_ctx->wr_lock); 1659 mutex_unlock(&wr_ctx->wr_lock);
1660 return -ENOMEM; 1660 return -ENOMEM;
@@ -1671,7 +1671,8 @@ again:
1671 sbio->dev = wr_ctx->tgtdev; 1671 sbio->dev = wr_ctx->tgtdev;
1672 bio = sbio->bio; 1672 bio = sbio->bio;
1673 if (!bio) { 1673 if (!bio) {
1674 bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio); 1674 bio = btrfs_io_bio_alloc(GFP_KERNEL,
1675 wr_ctx->pages_per_wr_bio);
1675 if (!bio) { 1676 if (!bio) {
1676 mutex_unlock(&wr_ctx->wr_lock); 1677 mutex_unlock(&wr_ctx->wr_lock);
1677 return -ENOMEM; 1678 return -ENOMEM;
@@ -2076,7 +2077,8 @@ again:
2076 sbio->dev = spage->dev; 2077 sbio->dev = spage->dev;
2077 bio = sbio->bio; 2078 bio = sbio->bio;
2078 if (!bio) { 2079 if (!bio) {
2079 bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio); 2080 bio = btrfs_io_bio_alloc(GFP_KERNEL,
2081 sctx->pages_per_rd_bio);
2080 if (!bio) 2082 if (!bio)
2081 return -ENOMEM; 2083 return -ENOMEM;
2082 sbio->bio = bio; 2084 sbio->bio = bio;
@@ -2241,7 +2243,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2241 struct scrub_block *sblock; 2243 struct scrub_block *sblock;
2242 int index; 2244 int index;
2243 2245
2244 sblock = kzalloc(sizeof(*sblock), GFP_NOFS); 2246 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2245 if (!sblock) { 2247 if (!sblock) {
2246 spin_lock(&sctx->stat_lock); 2248 spin_lock(&sctx->stat_lock);
2247 sctx->stat.malloc_errors++; 2249 sctx->stat.malloc_errors++;
@@ -2259,7 +2261,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2259 struct scrub_page *spage; 2261 struct scrub_page *spage;
2260 u64 l = min_t(u64, len, PAGE_SIZE); 2262 u64 l = min_t(u64, len, PAGE_SIZE);
2261 2263
2262 spage = kzalloc(sizeof(*spage), GFP_NOFS); 2264 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2263 if (!spage) { 2265 if (!spage) {
2264leave_nomem: 2266leave_nomem:
2265 spin_lock(&sctx->stat_lock); 2267 spin_lock(&sctx->stat_lock);
@@ -2286,7 +2288,7 @@ leave_nomem:
2286 spage->have_csum = 0; 2288 spage->have_csum = 0;
2287 } 2289 }
2288 sblock->page_count++; 2290 sblock->page_count++;
2289 spage->page = alloc_page(GFP_NOFS); 2291 spage->page = alloc_page(GFP_KERNEL);
2290 if (!spage->page) 2292 if (!spage->page)
2291 goto leave_nomem; 2293 goto leave_nomem;
2292 len -= l; 2294 len -= l;
@@ -2541,7 +2543,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
2541 struct scrub_block *sblock; 2543 struct scrub_block *sblock;
2542 int index; 2544 int index;
2543 2545
2544 sblock = kzalloc(sizeof(*sblock), GFP_NOFS); 2546 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2545 if (!sblock) { 2547 if (!sblock) {
2546 spin_lock(&sctx->stat_lock); 2548 spin_lock(&sctx->stat_lock);
2547 sctx->stat.malloc_errors++; 2549 sctx->stat.malloc_errors++;
@@ -2561,7 +2563,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
2561 struct scrub_page *spage; 2563 struct scrub_page *spage;
2562 u64 l = min_t(u64, len, PAGE_SIZE); 2564 u64 l = min_t(u64, len, PAGE_SIZE);
2563 2565
2564 spage = kzalloc(sizeof(*spage), GFP_NOFS); 2566 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2565 if (!spage) { 2567 if (!spage) {
2566leave_nomem: 2568leave_nomem:
2567 spin_lock(&sctx->stat_lock); 2569 spin_lock(&sctx->stat_lock);
@@ -2591,7 +2593,7 @@ leave_nomem:
2591 spage->have_csum = 0; 2593 spage->have_csum = 0;
2592 } 2594 }
2593 sblock->page_count++; 2595 sblock->page_count++;
2594 spage->page = alloc_page(GFP_NOFS); 2596 spage->page = alloc_page(GFP_KERNEL);
2595 if (!spage->page) 2597 if (!spage->page)
2596 goto leave_nomem; 2598 goto leave_nomem;
2597 len -= l; 2599 len -= l;
@@ -3857,16 +3859,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3857 return -EIO; 3859 return -EIO;
3858 } 3860 }
3859 3861
3860 btrfs_dev_replace_lock(&fs_info->dev_replace); 3862 btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
3861 if (dev->scrub_device || 3863 if (dev->scrub_device ||
3862 (!is_dev_replace && 3864 (!is_dev_replace &&
3863 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) { 3865 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
3864 btrfs_dev_replace_unlock(&fs_info->dev_replace); 3866 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
3865 mutex_unlock(&fs_info->scrub_lock); 3867 mutex_unlock(&fs_info->scrub_lock);
3866 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 3868 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3867 return -EINPROGRESS; 3869 return -EINPROGRESS;
3868 } 3870 }
3869 btrfs_dev_replace_unlock(&fs_info->dev_replace); 3871 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
3870 3872
3871 ret = scrub_workers_get(fs_info, is_dev_replace); 3873 ret = scrub_workers_get(fs_info, is_dev_replace);
3872 if (ret) { 3874 if (ret) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 63a6152be04b..d2e29925f1da 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -304,7 +304,7 @@ static struct fs_path *fs_path_alloc(void)
304{ 304{
305 struct fs_path *p; 305 struct fs_path *p;
306 306
307 p = kmalloc(sizeof(*p), GFP_NOFS); 307 p = kmalloc(sizeof(*p), GFP_KERNEL);
308 if (!p) 308 if (!p)
309 return NULL; 309 return NULL;
310 p->reversed = 0; 310 p->reversed = 0;
@@ -363,11 +363,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
363 * First time the inline_buf does not suffice 363 * First time the inline_buf does not suffice
364 */ 364 */
365 if (p->buf == p->inline_buf) { 365 if (p->buf == p->inline_buf) {
366 tmp_buf = kmalloc(len, GFP_NOFS); 366 tmp_buf = kmalloc(len, GFP_KERNEL);
367 if (tmp_buf) 367 if (tmp_buf)
368 memcpy(tmp_buf, p->buf, old_buf_len); 368 memcpy(tmp_buf, p->buf, old_buf_len);
369 } else { 369 } else {
370 tmp_buf = krealloc(p->buf, len, GFP_NOFS); 370 tmp_buf = krealloc(p->buf, len, GFP_KERNEL);
371 } 371 }
372 if (!tmp_buf) 372 if (!tmp_buf)
373 return -ENOMEM; 373 return -ENOMEM;
@@ -995,7 +995,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
995 * values are small. 995 * values are small.
996 */ 996 */
997 buf_len = PATH_MAX; 997 buf_len = PATH_MAX;
998 buf = kmalloc(buf_len, GFP_NOFS); 998 buf = kmalloc(buf_len, GFP_KERNEL);
999 if (!buf) { 999 if (!buf) {
1000 ret = -ENOMEM; 1000 ret = -ENOMEM;
1001 goto out; 1001 goto out;
@@ -1042,7 +1042,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
1042 buf = NULL; 1042 buf = NULL;
1043 } else { 1043 } else {
1044 char *tmp = krealloc(buf, buf_len, 1044 char *tmp = krealloc(buf, buf_len,
1045 GFP_NOFS | __GFP_NOWARN); 1045 GFP_KERNEL | __GFP_NOWARN);
1046 1046
1047 if (!tmp) 1047 if (!tmp)
1048 kfree(buf); 1048 kfree(buf);
@@ -1303,7 +1303,7 @@ static int find_extent_clone(struct send_ctx *sctx,
1303 /* We only use this path under the commit sem */ 1303 /* We only use this path under the commit sem */
1304 tmp_path->need_commit_sem = 0; 1304 tmp_path->need_commit_sem = 0;
1305 1305
1306 backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); 1306 backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL);
1307 if (!backref_ctx) { 1307 if (!backref_ctx) {
1308 ret = -ENOMEM; 1308 ret = -ENOMEM;
1309 goto out; 1309 goto out;
@@ -1984,7 +1984,7 @@ static int name_cache_insert(struct send_ctx *sctx,
1984 nce_head = radix_tree_lookup(&sctx->name_cache, 1984 nce_head = radix_tree_lookup(&sctx->name_cache,
1985 (unsigned long)nce->ino); 1985 (unsigned long)nce->ino);
1986 if (!nce_head) { 1986 if (!nce_head) {
1987 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); 1987 nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
1988 if (!nce_head) { 1988 if (!nce_head) {
1989 kfree(nce); 1989 kfree(nce);
1990 return -ENOMEM; 1990 return -ENOMEM;
@@ -2179,7 +2179,7 @@ out_cache:
2179 /* 2179 /*
2180 * Store the result of the lookup in the name cache. 2180 * Store the result of the lookup in the name cache.
2181 */ 2181 */
2182 nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); 2182 nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL);
2183 if (!nce) { 2183 if (!nce) {
2184 ret = -ENOMEM; 2184 ret = -ENOMEM;
2185 goto out; 2185 goto out;
@@ -2315,7 +2315,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
2315 if (!path) 2315 if (!path)
2316 return -ENOMEM; 2316 return -ENOMEM;
2317 2317
2318 name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS); 2318 name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
2319 if (!name) { 2319 if (!name) {
2320 btrfs_free_path(path); 2320 btrfs_free_path(path);
2321 return -ENOMEM; 2321 return -ENOMEM;
@@ -2730,7 +2730,7 @@ static int __record_ref(struct list_head *head, u64 dir,
2730{ 2730{
2731 struct recorded_ref *ref; 2731 struct recorded_ref *ref;
2732 2732
2733 ref = kmalloc(sizeof(*ref), GFP_NOFS); 2733 ref = kmalloc(sizeof(*ref), GFP_KERNEL);
2734 if (!ref) 2734 if (!ref)
2735 return -ENOMEM; 2735 return -ENOMEM;
2736 2736
@@ -2755,7 +2755,7 @@ static int dup_ref(struct recorded_ref *ref, struct list_head *list)
2755{ 2755{
2756 struct recorded_ref *new; 2756 struct recorded_ref *new;
2757 2757
2758 new = kmalloc(sizeof(*ref), GFP_NOFS); 2758 new = kmalloc(sizeof(*ref), GFP_KERNEL);
2759 if (!new) 2759 if (!new)
2760 return -ENOMEM; 2760 return -ENOMEM;
2761 2761
@@ -2818,7 +2818,7 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2818 struct rb_node *parent = NULL; 2818 struct rb_node *parent = NULL;
2819 struct orphan_dir_info *entry, *odi; 2819 struct orphan_dir_info *entry, *odi;
2820 2820
2821 odi = kmalloc(sizeof(*odi), GFP_NOFS); 2821 odi = kmalloc(sizeof(*odi), GFP_KERNEL);
2822 if (!odi) 2822 if (!odi)
2823 return ERR_PTR(-ENOMEM); 2823 return ERR_PTR(-ENOMEM);
2824 odi->ino = dir_ino; 2824 odi->ino = dir_ino;
@@ -2973,7 +2973,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
2973 struct rb_node *parent = NULL; 2973 struct rb_node *parent = NULL;
2974 struct waiting_dir_move *entry, *dm; 2974 struct waiting_dir_move *entry, *dm;
2975 2975
2976 dm = kmalloc(sizeof(*dm), GFP_NOFS); 2976 dm = kmalloc(sizeof(*dm), GFP_KERNEL);
2977 if (!dm) 2977 if (!dm)
2978 return -ENOMEM; 2978 return -ENOMEM;
2979 dm->ino = ino; 2979 dm->ino = ino;
@@ -3040,7 +3040,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
3040 int exists = 0; 3040 int exists = 0;
3041 int ret; 3041 int ret;
3042 3042
3043 pm = kmalloc(sizeof(*pm), GFP_NOFS); 3043 pm = kmalloc(sizeof(*pm), GFP_KERNEL);
3044 if (!pm) 3044 if (!pm)
3045 return -ENOMEM; 3045 return -ENOMEM;
3046 pm->parent_ino = parent_ino; 3046 pm->parent_ino = parent_ino;
@@ -4280,7 +4280,7 @@ static int __find_xattr(int num, struct btrfs_key *di_key,
4280 strncmp(name, ctx->name, name_len) == 0) { 4280 strncmp(name, ctx->name, name_len) == 0) {
4281 ctx->found_idx = num; 4281 ctx->found_idx = num;
4282 ctx->found_data_len = data_len; 4282 ctx->found_data_len = data_len;
4283 ctx->found_data = kmemdup(data, data_len, GFP_NOFS); 4283 ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
4284 if (!ctx->found_data) 4284 if (!ctx->found_data)
4285 return -ENOMEM; 4285 return -ENOMEM;
4286 return 1; 4286 return 1;
@@ -4481,7 +4481,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
4481 while (index <= last_index) { 4481 while (index <= last_index) {
4482 unsigned cur_len = min_t(unsigned, len, 4482 unsigned cur_len = min_t(unsigned, len,
4483 PAGE_CACHE_SIZE - pg_offset); 4483 PAGE_CACHE_SIZE - pg_offset);
4484 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 4484 page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
4485 if (!page) { 4485 if (!page) {
4486 ret = -ENOMEM; 4486 ret = -ENOMEM;
4487 break; 4487 break;
@@ -5989,7 +5989,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
5989 goto out; 5989 goto out;
5990 } 5990 }
5991 5991
5992 sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); 5992 sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL);
5993 if (!sctx) { 5993 if (!sctx) {
5994 ret = -ENOMEM; 5994 ret = -ENOMEM;
5995 goto out; 5995 goto out;
@@ -5997,7 +5997,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
5997 5997
5998 INIT_LIST_HEAD(&sctx->new_refs); 5998 INIT_LIST_HEAD(&sctx->new_refs);
5999 INIT_LIST_HEAD(&sctx->deleted_refs); 5999 INIT_LIST_HEAD(&sctx->deleted_refs);
6000 INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); 6000 INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
6001 INIT_LIST_HEAD(&sctx->name_cache_list); 6001 INIT_LIST_HEAD(&sctx->name_cache_list);
6002 6002
6003 sctx->flags = arg->flags; 6003 sctx->flags = arg->flags;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d41e09fe8e38..a958f625793b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -303,7 +303,8 @@ enum {
303 Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree, 303 Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
304 Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard, 304 Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
305 Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow, 305 Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
306 Opt_datasum, Opt_treelog, Opt_noinode_cache, 306 Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_usebackuproot,
307 Opt_nologreplay, Opt_norecovery,
307#ifdef CONFIG_BTRFS_DEBUG 308#ifdef CONFIG_BTRFS_DEBUG
308 Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all, 309 Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
309#endif 310#endif
@@ -335,6 +336,8 @@ static const match_table_t tokens = {
335 {Opt_noacl, "noacl"}, 336 {Opt_noacl, "noacl"},
336 {Opt_notreelog, "notreelog"}, 337 {Opt_notreelog, "notreelog"},
337 {Opt_treelog, "treelog"}, 338 {Opt_treelog, "treelog"},
339 {Opt_nologreplay, "nologreplay"},
340 {Opt_norecovery, "norecovery"},
338 {Opt_flushoncommit, "flushoncommit"}, 341 {Opt_flushoncommit, "flushoncommit"},
339 {Opt_noflushoncommit, "noflushoncommit"}, 342 {Opt_noflushoncommit, "noflushoncommit"},
340 {Opt_ratio, "metadata_ratio=%d"}, 343 {Opt_ratio, "metadata_ratio=%d"},
@@ -352,7 +355,8 @@ static const match_table_t tokens = {
352 {Opt_inode_cache, "inode_cache"}, 355 {Opt_inode_cache, "inode_cache"},
353 {Opt_noinode_cache, "noinode_cache"}, 356 {Opt_noinode_cache, "noinode_cache"},
354 {Opt_no_space_cache, "nospace_cache"}, 357 {Opt_no_space_cache, "nospace_cache"},
355 {Opt_recovery, "recovery"}, 358 {Opt_recovery, "recovery"}, /* deprecated */
359 {Opt_usebackuproot, "usebackuproot"},
356 {Opt_skip_balance, "skip_balance"}, 360 {Opt_skip_balance, "skip_balance"},
357 {Opt_check_integrity, "check_int"}, 361 {Opt_check_integrity, "check_int"},
358 {Opt_check_integrity_including_extent_data, "check_int_data"}, 362 {Opt_check_integrity_including_extent_data, "check_int_data"},
@@ -373,7 +377,8 @@ static const match_table_t tokens = {
373 * reading in a new superblock is parsed here. 377 * reading in a new superblock is parsed here.
374 * XXX JDM: This needs to be cleaned up for remount. 378 * XXX JDM: This needs to be cleaned up for remount.
375 */ 379 */
376int btrfs_parse_options(struct btrfs_root *root, char *options) 380int btrfs_parse_options(struct btrfs_root *root, char *options,
381 unsigned long new_flags)
377{ 382{
378 struct btrfs_fs_info *info = root->fs_info; 383 struct btrfs_fs_info *info = root->fs_info;
379 substring_t args[MAX_OPT_ARGS]; 384 substring_t args[MAX_OPT_ARGS];
@@ -393,8 +398,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
393 else if (cache_gen) 398 else if (cache_gen)
394 btrfs_set_opt(info->mount_opt, SPACE_CACHE); 399 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
395 400
401 /*
402 * Even the options are empty, we still need to do extra check
403 * against new flags
404 */
396 if (!options) 405 if (!options)
397 goto out; 406 goto check;
398 407
399 /* 408 /*
400 * strsep changes the string, duplicate it because parse_options 409 * strsep changes the string, duplicate it because parse_options
@@ -606,6 +615,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
606 btrfs_clear_and_info(root, NOTREELOG, 615 btrfs_clear_and_info(root, NOTREELOG,
607 "enabling tree log"); 616 "enabling tree log");
608 break; 617 break;
618 case Opt_norecovery:
619 case Opt_nologreplay:
620 btrfs_set_and_info(root, NOLOGREPLAY,
621 "disabling log replay at mount time");
622 break;
609 case Opt_flushoncommit: 623 case Opt_flushoncommit:
610 btrfs_set_and_info(root, FLUSHONCOMMIT, 624 btrfs_set_and_info(root, FLUSHONCOMMIT,
611 "turning on flush-on-commit"); 625 "turning on flush-on-commit");
@@ -696,8 +710,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
696 "disabling auto defrag"); 710 "disabling auto defrag");
697 break; 711 break;
698 case Opt_recovery: 712 case Opt_recovery:
699 btrfs_info(root->fs_info, "enabling auto recovery"); 713 btrfs_warn(root->fs_info,
700 btrfs_set_opt(info->mount_opt, RECOVERY); 714 "'recovery' is deprecated, use 'usebackuproot' instead");
715 case Opt_usebackuproot:
716 btrfs_info(root->fs_info,
717 "trying to use backup root at mount time");
718 btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
701 break; 719 break;
702 case Opt_skip_balance: 720 case Opt_skip_balance:
703 btrfs_set_opt(info->mount_opt, SKIP_BALANCE); 721 btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
@@ -792,6 +810,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
792 break; 810 break;
793 } 811 }
794 } 812 }
813check:
814 /*
815 * Extra check for current option against current flag
816 */
817 if (btrfs_test_opt(root, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) {
818 btrfs_err(root->fs_info,
819 "nologreplay must be used with ro mount option");
820 ret = -EINVAL;
821 }
795out: 822out:
796 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) && 823 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) &&
797 !btrfs_test_opt(root, FREE_SPACE_TREE) && 824 !btrfs_test_opt(root, FREE_SPACE_TREE) &&
@@ -1202,6 +1229,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
1202 seq_puts(seq, ",ssd"); 1229 seq_puts(seq, ",ssd");
1203 if (btrfs_test_opt(root, NOTREELOG)) 1230 if (btrfs_test_opt(root, NOTREELOG))
1204 seq_puts(seq, ",notreelog"); 1231 seq_puts(seq, ",notreelog");
1232 if (btrfs_test_opt(root, NOLOGREPLAY))
1233 seq_puts(seq, ",nologreplay");
1205 if (btrfs_test_opt(root, FLUSHONCOMMIT)) 1234 if (btrfs_test_opt(root, FLUSHONCOMMIT))
1206 seq_puts(seq, ",flushoncommit"); 1235 seq_puts(seq, ",flushoncommit");
1207 if (btrfs_test_opt(root, DISCARD)) 1236 if (btrfs_test_opt(root, DISCARD))
@@ -1228,8 +1257,6 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
1228 seq_puts(seq, ",inode_cache"); 1257 seq_puts(seq, ",inode_cache");
1229 if (btrfs_test_opt(root, SKIP_BALANCE)) 1258 if (btrfs_test_opt(root, SKIP_BALANCE))
1230 seq_puts(seq, ",skip_balance"); 1259 seq_puts(seq, ",skip_balance");
1231 if (btrfs_test_opt(root, RECOVERY))
1232 seq_puts(seq, ",recovery");
1233#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 1260#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1234 if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA)) 1261 if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
1235 seq_puts(seq, ",check_int_data"); 1262 seq_puts(seq, ",check_int_data");
@@ -1685,7 +1712,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1685 } 1712 }
1686 } 1713 }
1687 1714
1688 ret = btrfs_parse_options(root, data); 1715 ret = btrfs_parse_options(root, data, *flags);
1689 if (ret) { 1716 if (ret) {
1690 ret = -EINVAL; 1717 ret = -EINVAL;
1691 goto restore; 1718 goto restore;
@@ -2163,6 +2190,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
2163 break; 2190 break;
2164 ret = !(fs_devices->num_devices == fs_devices->total_devices); 2191 ret = !(fs_devices->num_devices == fs_devices->total_devices);
2165 break; 2192 break;
2193 case BTRFS_IOC_GET_SUPPORTED_FEATURES:
2194 ret = btrfs_ioctl_get_supported_features((void __user*)arg);
2195 break;
2166 } 2196 }
2167 2197
2168 kfree(vol); 2198 kfree(vol);
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 0e1e61a7ec23..d39f714dabeb 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -189,12 +189,6 @@ btrfs_alloc_dummy_block_group(unsigned long length)
189 kfree(cache); 189 kfree(cache);
190 return NULL; 190 return NULL;
191 } 191 }
192 cache->fs_info = btrfs_alloc_dummy_fs_info();
193 if (!cache->fs_info) {
194 kfree(cache->free_space_ctl);
195 kfree(cache);
196 return NULL;
197 }
198 192
199 cache->key.objectid = 0; 193 cache->key.objectid = 0;
200 cache->key.offset = length; 194 cache->key.offset = length;
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index d05fe1ab4808..7cea4462acd5 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -485,6 +485,7 @@ static int run_test(test_func_t test_func, int bitmaps)
485 cache->bitmap_low_thresh = 0; 485 cache->bitmap_low_thresh = 0;
486 cache->bitmap_high_thresh = (u32)-1; 486 cache->bitmap_high_thresh = (u32)-1;
487 cache->needs_free_space = 1; 487 cache->needs_free_space = 1;
488 cache->fs_info = root->fs_info;
488 489
489 btrfs_init_dummy_trans(&trans); 490 btrfs_init_dummy_trans(&trans);
490 491
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index b6031ce474f7..43885e51b882 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -637,6 +637,8 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
637 637
638 trans->block_rsv = &root->fs_info->trans_block_rsv; 638 trans->block_rsv = &root->fs_info->trans_block_rsv;
639 trans->bytes_reserved = num_bytes; 639 trans->bytes_reserved = num_bytes;
640 trace_btrfs_space_reservation(root->fs_info, "transaction",
641 trans->transid, num_bytes, 1);
640 642
641 return trans; 643 return trans;
642} 644}
@@ -1333,7 +1335,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1333 struct dentry *dentry; 1335 struct dentry *dentry;
1334 struct extent_buffer *tmp; 1336 struct extent_buffer *tmp;
1335 struct extent_buffer *old; 1337 struct extent_buffer *old;
1336 struct timespec cur_time = CURRENT_TIME; 1338 struct timespec cur_time;
1337 int ret = 0; 1339 int ret = 0;
1338 u64 to_reserve = 0; 1340 u64 to_reserve = 0;
1339 u64 index = 0; 1341 u64 index = 0;
@@ -1375,12 +1377,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1375 rsv = trans->block_rsv; 1377 rsv = trans->block_rsv;
1376 trans->block_rsv = &pending->block_rsv; 1378 trans->block_rsv = &pending->block_rsv;
1377 trans->bytes_reserved = trans->block_rsv->reserved; 1379 trans->bytes_reserved = trans->block_rsv->reserved;
1378 1380 trace_btrfs_space_reservation(root->fs_info, "transaction",
1381 trans->transid,
1382 trans->bytes_reserved, 1);
1379 dentry = pending->dentry; 1383 dentry = pending->dentry;
1380 parent_inode = pending->dir; 1384 parent_inode = pending->dir;
1381 parent_root = BTRFS_I(parent_inode)->root; 1385 parent_root = BTRFS_I(parent_inode)->root;
1382 record_root_in_trans(trans, parent_root); 1386 record_root_in_trans(trans, parent_root);
1383 1387
1388 cur_time = current_fs_time(parent_inode->i_sb);
1389
1384 /* 1390 /*
1385 * insert the directory item 1391 * insert the directory item
1386 */ 1392 */
@@ -1523,7 +1529,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1523 1529
1524 btrfs_i_size_write(parent_inode, parent_inode->i_size + 1530 btrfs_i_size_write(parent_inode, parent_inode->i_size +
1525 dentry->d_name.len * 2); 1531 dentry->d_name.len * 2);
1526 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; 1532 parent_inode->i_mtime = parent_inode->i_ctime =
1533 current_fs_time(parent_inode->i_sb);
1527 ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); 1534 ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
1528 if (ret) { 1535 if (ret) {
1529 btrfs_abort_transaction(trans, root, ret); 1536 btrfs_abort_transaction(trans, root, ret);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 366b335946fa..80857b4646c0 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -138,7 +138,7 @@ static struct btrfs_fs_devices *__alloc_fs_devices(void)
138{ 138{
139 struct btrfs_fs_devices *fs_devs; 139 struct btrfs_fs_devices *fs_devs;
140 140
141 fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS); 141 fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
142 if (!fs_devs) 142 if (!fs_devs)
143 return ERR_PTR(-ENOMEM); 143 return ERR_PTR(-ENOMEM);
144 144
@@ -220,7 +220,7 @@ static struct btrfs_device *__alloc_device(void)
220{ 220{
221 struct btrfs_device *dev; 221 struct btrfs_device *dev;
222 222
223 dev = kzalloc(sizeof(*dev), GFP_NOFS); 223 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
224 if (!dev) 224 if (!dev)
225 return ERR_PTR(-ENOMEM); 225 return ERR_PTR(-ENOMEM);
226 226
@@ -733,7 +733,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
733 * uuid mutex so nothing we touch in here is going to disappear. 733 * uuid mutex so nothing we touch in here is going to disappear.
734 */ 734 */
735 if (orig_dev->name) { 735 if (orig_dev->name) {
736 name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); 736 name = rcu_string_strdup(orig_dev->name->str,
737 GFP_KERNEL);
737 if (!name) { 738 if (!name) {
738 kfree(device); 739 kfree(device);
739 goto error; 740 goto error;
@@ -1714,12 +1715,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1714 } while (read_seqretry(&root->fs_info->profiles_lock, seq)); 1715 } while (read_seqretry(&root->fs_info->profiles_lock, seq));
1715 1716
1716 num_devices = root->fs_info->fs_devices->num_devices; 1717 num_devices = root->fs_info->fs_devices->num_devices;
1717 btrfs_dev_replace_lock(&root->fs_info->dev_replace); 1718 btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0);
1718 if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) { 1719 if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
1719 WARN_ON(num_devices < 1); 1720 WARN_ON(num_devices < 1);
1720 num_devices--; 1721 num_devices--;
1721 } 1722 }
1722 btrfs_dev_replace_unlock(&root->fs_info->dev_replace); 1723 btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0);
1723 1724
1724 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { 1725 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
1725 ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET; 1726 ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET;
@@ -2287,7 +2288,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
2287 goto error; 2288 goto error;
2288 } 2289 }
2289 2290
2290 name = rcu_string_strdup(device_path, GFP_NOFS); 2291 name = rcu_string_strdup(device_path, GFP_KERNEL);
2291 if (!name) { 2292 if (!name) {
2292 kfree(device); 2293 kfree(device);
2293 ret = -ENOMEM; 2294 ret = -ENOMEM;
@@ -2966,7 +2967,7 @@ static int insert_balance_item(struct btrfs_root *root,
2966 } 2967 }
2967 2968
2968 key.objectid = BTRFS_BALANCE_OBJECTID; 2969 key.objectid = BTRFS_BALANCE_OBJECTID;
2969 key.type = BTRFS_BALANCE_ITEM_KEY; 2970 key.type = BTRFS_TEMPORARY_ITEM_KEY;
2970 key.offset = 0; 2971 key.offset = 0;
2971 2972
2972 ret = btrfs_insert_empty_item(trans, root, path, &key, 2973 ret = btrfs_insert_empty_item(trans, root, path, &key,
@@ -3015,7 +3016,7 @@ static int del_balance_item(struct btrfs_root *root)
3015 } 3016 }
3016 3017
3017 key.objectid = BTRFS_BALANCE_OBJECTID; 3018 key.objectid = BTRFS_BALANCE_OBJECTID;
3018 key.type = BTRFS_BALANCE_ITEM_KEY; 3019 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3019 key.offset = 0; 3020 key.offset = 0;
3020 3021
3021 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 3022 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
@@ -3686,12 +3687,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3686 } 3687 }
3687 3688
3688 num_devices = fs_info->fs_devices->num_devices; 3689 num_devices = fs_info->fs_devices->num_devices;
3689 btrfs_dev_replace_lock(&fs_info->dev_replace); 3690 btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
3690 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) { 3691 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
3691 BUG_ON(num_devices < 1); 3692 BUG_ON(num_devices < 1);
3692 num_devices--; 3693 num_devices--;
3693 } 3694 }
3694 btrfs_dev_replace_unlock(&fs_info->dev_replace); 3695 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
3695 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; 3696 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
3696 if (num_devices == 1) 3697 if (num_devices == 1)
3697 allowed |= BTRFS_BLOCK_GROUP_DUP; 3698 allowed |= BTRFS_BLOCK_GROUP_DUP;
@@ -3867,7 +3868,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
3867 return -ENOMEM; 3868 return -ENOMEM;
3868 3869
3869 key.objectid = BTRFS_BALANCE_OBJECTID; 3870 key.objectid = BTRFS_BALANCE_OBJECTID;
3870 key.type = BTRFS_BALANCE_ITEM_KEY; 3871 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3871 key.offset = 0; 3872 key.offset = 0;
3872 3873
3873 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); 3874 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
@@ -5062,10 +5063,10 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5062 ret = 1; 5063 ret = 1;
5063 free_extent_map(em); 5064 free_extent_map(em);
5064 5065
5065 btrfs_dev_replace_lock(&fs_info->dev_replace); 5066 btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
5066 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) 5067 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
5067 ret++; 5068 ret++;
5068 btrfs_dev_replace_unlock(&fs_info->dev_replace); 5069 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
5069 5070
5070 return ret; 5071 return ret;
5071} 5072}
@@ -5325,10 +5326,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5325 if (!bbio_ret) 5326 if (!bbio_ret)
5326 goto out; 5327 goto out;
5327 5328
5328 btrfs_dev_replace_lock(dev_replace); 5329 btrfs_dev_replace_lock(dev_replace, 0);
5329 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); 5330 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
5330 if (!dev_replace_is_ongoing) 5331 if (!dev_replace_is_ongoing)
5331 btrfs_dev_replace_unlock(dev_replace); 5332 btrfs_dev_replace_unlock(dev_replace, 0);
5333 else
5334 btrfs_dev_replace_set_lock_blocking(dev_replace);
5332 5335
5333 if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && 5336 if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
5334 !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) && 5337 !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
@@ -5751,8 +5754,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5751 bbio->mirror_num = map->num_stripes + 1; 5754 bbio->mirror_num = map->num_stripes + 1;
5752 } 5755 }
5753out: 5756out:
5754 if (dev_replace_is_ongoing) 5757 if (dev_replace_is_ongoing) {
5755 btrfs_dev_replace_unlock(dev_replace); 5758 btrfs_dev_replace_clear_lock_blocking(dev_replace);
5759 btrfs_dev_replace_unlock(dev_replace, 0);
5760 }
5756 free_extent_map(em); 5761 free_extent_map(em);
5757 return ret; 5762 return ret;
5758} 5763}
@@ -6705,8 +6710,8 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
6705 int item_size; 6710 int item_size;
6706 struct btrfs_dev_stats_item *ptr; 6711 struct btrfs_dev_stats_item *ptr;
6707 6712
6708 key.objectid = 0; 6713 key.objectid = BTRFS_DEV_STATS_OBJECTID;
6709 key.type = BTRFS_DEV_STATS_KEY; 6714 key.type = BTRFS_PERSISTENT_ITEM_KEY;
6710 key.offset = device->devid; 6715 key.offset = device->devid;
6711 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); 6716 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
6712 if (ret) { 6717 if (ret) {
@@ -6753,8 +6758,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
6753 int ret; 6758 int ret;
6754 int i; 6759 int i;
6755 6760
6756 key.objectid = 0; 6761 key.objectid = BTRFS_DEV_STATS_OBJECTID;
6757 key.type = BTRFS_DEV_STATS_KEY; 6762 key.type = BTRFS_PERSISTENT_ITEM_KEY;
6758 key.offset = device->devid; 6763 key.offset = device->devid;
6759 6764
6760 path = btrfs_alloc_path(); 6765 path = btrfs_alloc_path();
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 6c68d6356197..f2a20d52b9db 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -249,7 +249,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
249 goto out; 249 goto out;
250 250
251 inode_inc_iversion(inode); 251 inode_inc_iversion(inode);
252 inode->i_ctime = CURRENT_TIME; 252 inode->i_ctime = current_fs_time(inode->i_sb);
253 set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); 253 set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
254 ret = btrfs_update_inode(trans, root, inode); 254 ret = btrfs_update_inode(trans, root, inode);
255 BUG_ON(ret); 255 BUG_ON(ret);