aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorMichal Marek <mmarek@suse.cz>2014-01-02 08:02:06 -0500
committerMichal Marek <mmarek@suse.cz>2014-01-02 08:02:06 -0500
commit37e2c2a775fc887acd1432908478dfd532f7f00f (patch)
treee51ebc699d8e262fd47e0913be6a711cb1a7b565 /fs/btrfs
parent1c8ddae09f4c102b97c9086cc70347e89468a547 (diff)
parent6ce4eac1f600b34f2f7f58f9cd8f0503d79e42ae (diff)
Merge commit v3.13-rc1 into kbuild/misc
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Kconfig18
-rw-r--r--fs/btrfs/Makefile4
-rw-r--r--fs/btrfs/acl.c2
-rw-r--r--fs/btrfs/async-thread.c28
-rw-r--r--fs/btrfs/async-thread.h2
-rw-r--r--fs/btrfs/backref.c8
-rw-r--r--fs/btrfs/btrfs_inode.h25
-rw-r--r--fs/btrfs/check-integrity.c43
-rw-r--r--fs/btrfs/compat.h7
-rw-r--r--fs/btrfs/compression.c3
-rw-r--r--fs/btrfs/ctree.c82
-rw-r--r--fs/btrfs/ctree.h64
-rw-r--r--fs/btrfs/delayed-inode.c19
-rw-r--r--fs/btrfs/dev-replace.c33
-rw-r--r--fs/btrfs/dir-item.c8
-rw-r--r--fs/btrfs/disk-io.c280
-rw-r--r--fs/btrfs/disk-io.h17
-rw-r--r--fs/btrfs/export.c1
-rw-r--r--fs/btrfs/extent-tree.c225
-rw-r--r--fs/btrfs/extent_io.c168
-rw-r--r--fs/btrfs/extent_io.h8
-rw-r--r--fs/btrfs/extent_map.h8
-rw-r--r--fs/btrfs/file-item.c7
-rw-r--r--fs/btrfs/file.c167
-rw-r--r--fs/btrfs/free-space-cache.c88
-rw-r--r--fs/btrfs/free-space-cache.h9
-rw-r--r--fs/btrfs/inode-item.c2
-rw-r--r--fs/btrfs/inode-map.c13
-rw-r--r--fs/btrfs/inode.c232
-rw-r--r--fs/btrfs/ioctl.c147
-rw-r--r--fs/btrfs/ordered-data.c73
-rw-r--r--fs/btrfs/ordered-data.h7
-rw-r--r--fs/btrfs/print-tree.c2
-rw-r--r--fs/btrfs/raid56.c1
-rw-r--r--fs/btrfs/relocation.c137
-rw-r--r--fs/btrfs/root-tree.c8
-rw-r--r--fs/btrfs/scrub.c164
-rw-r--r--fs/btrfs/send.c193
-rw-r--r--fs/btrfs/super.c50
-rw-r--r--fs/btrfs/tests/btrfs-tests.c74
-rw-r--r--fs/btrfs/tests/btrfs-tests.h25
-rw-r--r--fs/btrfs/tests/extent-buffer-tests.c229
-rw-r--r--fs/btrfs/tests/extent-io-tests.c276
-rw-r--r--fs/btrfs/tests/inode-tests.c955
-rw-r--r--fs/btrfs/transaction.c92
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-defrag.c5
-rw-r--r--fs/btrfs/tree-log.c205
-rw-r--r--fs/btrfs/uuid-tree.c6
-rw-r--r--fs/btrfs/volumes.c42
-rw-r--r--fs/btrfs/volumes.h24
51 files changed, 3065 insertions, 1223 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 398cbd517be2..aa976eced2d2 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -9,12 +9,17 @@ config BTRFS_FS
9 select XOR_BLOCKS 9 select XOR_BLOCKS
10 10
11 help 11 help
12 Btrfs is a new filesystem with extents, writable snapshotting, 12 Btrfs is a general purpose copy-on-write filesystem with extents,
13 support for multiple devices and many more features. 13 writable snapshotting, support for multiple devices and many more
14 features focused on fault tolerance, repair and easy administration.
14 15
15 Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET 16 The filesystem disk format is no longer unstable, and it's not
16 FINALIZED. You should say N here unless you are interested in 17 expected to change unless there are strong reasons to do so. If there
17 testing Btrfs with non-critical data. 18 is a format change, file systems with a unchanged format will
19 continue to be mountable and usable by newer kernels.
20
21 For more information, please see the web pages at
22 http://btrfs.wiki.kernel.org.
18 23
19 To compile this file system support as a module, choose M here. The 24 To compile this file system support as a module, choose M here. The
20 module will be called btrfs. 25 module will be called btrfs.
@@ -59,7 +64,8 @@ config BTRFS_FS_RUN_SANITY_TESTS
59 help 64 help
60 This will run some basic sanity tests on the free space cache 65 This will run some basic sanity tests on the free space cache
61 code to make sure it is acting as it should. These are mostly 66 code to make sure it is acting as it should. These are mostly
62 regression tests and are only really interesting to btrfs devlopers. 67 regression tests and are only really interesting to btrfs
68 developers.
63 69
64 If unsure, say N. 70 If unsure, say N.
65 71
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index a91a6a355cc5..1a44e42d602a 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -14,4 +14,6 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
14btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o 14btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
15btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o 15btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
16 16
17btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o 17btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
18 tests/extent-buffer-tests.o tests/btrfs-tests.o \
19 tests/extent-io-tests.o tests/inode-tests.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index e15d2b0d8d3b..0890c83643e9 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -229,7 +229,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
229 if (ret > 0) { 229 if (ret > 0) {
230 /* we need an acl */ 230 /* we need an acl */
231 ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); 231 ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
232 } else { 232 } else if (ret < 0) {
233 cache_no_acl(inode); 233 cache_no_acl(inode);
234 } 234 }
235 } else { 235 } else {
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 58b7d14b08ee..c1e0b0caf9cc 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -107,7 +107,8 @@ static void check_idle_worker(struct btrfs_worker_thread *worker)
107 worker->idle = 1; 107 worker->idle = 1;
108 108
109 /* the list may be empty if the worker is just starting */ 109 /* the list may be empty if the worker is just starting */
110 if (!list_empty(&worker->worker_list)) { 110 if (!list_empty(&worker->worker_list) &&
111 !worker->workers->stopping) {
111 list_move(&worker->worker_list, 112 list_move(&worker->worker_list,
112 &worker->workers->idle_list); 113 &worker->workers->idle_list);
113 } 114 }
@@ -127,7 +128,8 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
127 spin_lock_irqsave(&worker->workers->lock, flags); 128 spin_lock_irqsave(&worker->workers->lock, flags);
128 worker->idle = 0; 129 worker->idle = 0;
129 130
130 if (!list_empty(&worker->worker_list)) { 131 if (!list_empty(&worker->worker_list) &&
132 !worker->workers->stopping) {
131 list_move_tail(&worker->worker_list, 133 list_move_tail(&worker->worker_list,
132 &worker->workers->worker_list); 134 &worker->workers->worker_list);
133 } 135 }
@@ -260,7 +262,7 @@ static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
260 struct btrfs_work *work = NULL; 262 struct btrfs_work *work = NULL;
261 struct list_head *cur = NULL; 263 struct list_head *cur = NULL;
262 264
263 if(!list_empty(prio_head)) 265 if (!list_empty(prio_head))
264 cur = prio_head->next; 266 cur = prio_head->next;
265 267
266 smp_mb(); 268 smp_mb();
@@ -412,6 +414,7 @@ void btrfs_stop_workers(struct btrfs_workers *workers)
412 int can_stop; 414 int can_stop;
413 415
414 spin_lock_irq(&workers->lock); 416 spin_lock_irq(&workers->lock);
417 workers->stopping = 1;
415 list_splice_init(&workers->idle_list, &workers->worker_list); 418 list_splice_init(&workers->idle_list, &workers->worker_list);
416 while (!list_empty(&workers->worker_list)) { 419 while (!list_empty(&workers->worker_list)) {
417 cur = workers->worker_list.next; 420 cur = workers->worker_list.next;
@@ -455,6 +458,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
455 workers->ordered = 0; 458 workers->ordered = 0;
456 workers->atomic_start_pending = 0; 459 workers->atomic_start_pending = 0;
457 workers->atomic_worker_start = async_helper; 460 workers->atomic_worker_start = async_helper;
461 workers->stopping = 0;
458} 462}
459 463
460/* 464/*
@@ -480,15 +484,20 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
480 atomic_set(&worker->num_pending, 0); 484 atomic_set(&worker->num_pending, 0);
481 atomic_set(&worker->refs, 1); 485 atomic_set(&worker->refs, 1);
482 worker->workers = workers; 486 worker->workers = workers;
483 worker->task = kthread_run(worker_loop, worker, 487 worker->task = kthread_create(worker_loop, worker,
484 "btrfs-%s-%d", workers->name, 488 "btrfs-%s-%d", workers->name,
485 workers->num_workers + 1); 489 workers->num_workers + 1);
486 if (IS_ERR(worker->task)) { 490 if (IS_ERR(worker->task)) {
487 ret = PTR_ERR(worker->task); 491 ret = PTR_ERR(worker->task);
488 kfree(worker);
489 goto fail; 492 goto fail;
490 } 493 }
494
491 spin_lock_irq(&workers->lock); 495 spin_lock_irq(&workers->lock);
496 if (workers->stopping) {
497 spin_unlock_irq(&workers->lock);
498 ret = -EINVAL;
499 goto fail_kthread;
500 }
492 list_add_tail(&worker->worker_list, &workers->idle_list); 501 list_add_tail(&worker->worker_list, &workers->idle_list);
493 worker->idle = 1; 502 worker->idle = 1;
494 workers->num_workers++; 503 workers->num_workers++;
@@ -496,8 +505,13 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
496 WARN_ON(workers->num_workers_starting < 0); 505 WARN_ON(workers->num_workers_starting < 0);
497 spin_unlock_irq(&workers->lock); 506 spin_unlock_irq(&workers->lock);
498 507
508 wake_up_process(worker->task);
499 return 0; 509 return 0;
510
511fail_kthread:
512 kthread_stop(worker->task);
500fail: 513fail:
514 kfree(worker);
501 spin_lock_irq(&workers->lock); 515 spin_lock_irq(&workers->lock);
502 workers->num_workers_starting--; 516 workers->num_workers_starting--;
503 spin_unlock_irq(&workers->lock); 517 spin_unlock_irq(&workers->lock);
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 063698b90ce2..1f26792683ed 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -107,6 +107,8 @@ struct btrfs_workers {
107 107
108 /* extra name for this worker, used for current->name */ 108 /* extra name for this worker, used for current->name */
109 char *name; 109 char *name;
110
111 int stopping;
110}; 112};
111 113
112void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); 114void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 0552a599b28f..3775947429b2 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -185,6 +185,9 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
185{ 185{
186 struct __prelim_ref *ref; 186 struct __prelim_ref *ref;
187 187
188 if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID)
189 return 0;
190
188 ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask); 191 ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask);
189 if (!ref) 192 if (!ref)
190 return -ENOMEM; 193 return -ENOMEM;
@@ -323,8 +326,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
323 326
324 eb = path->nodes[level]; 327 eb = path->nodes[level];
325 while (!eb) { 328 while (!eb) {
326 if (!level) { 329 if (WARN_ON(!level)) {
327 WARN_ON(1);
328 ret = 1; 330 ret = 1;
329 goto out; 331 goto out;
330 } 332 }
@@ -1619,7 +1621,7 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
1619 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1621 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1620 btrfs_release_path(path); 1622 btrfs_release_path(path);
1621 1623
1622 item = btrfs_item_nr(eb, slot); 1624 item = btrfs_item_nr(slot);
1623 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); 1625 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1624 1626
1625 for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) { 1627 for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d0ae226926ee..ac0b39db27d1 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -19,6 +19,7 @@
19#ifndef __BTRFS_I__ 19#ifndef __BTRFS_I__
20#define __BTRFS_I__ 20#define __BTRFS_I__
21 21
22#include <linux/hash.h>
22#include "extent_map.h" 23#include "extent_map.h"
23#include "extent_io.h" 24#include "extent_io.h"
24#include "ordered-data.h" 25#include "ordered-data.h"
@@ -179,6 +180,25 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
179 return container_of(inode, struct btrfs_inode, vfs_inode); 180 return container_of(inode, struct btrfs_inode, vfs_inode);
180} 181}
181 182
183static inline unsigned long btrfs_inode_hash(u64 objectid,
184 const struct btrfs_root *root)
185{
186 u64 h = objectid ^ (root->objectid * GOLDEN_RATIO_PRIME);
187
188#if BITS_PER_LONG == 32
189 h = (h >> 32) ^ (h & 0xffffffff);
190#endif
191
192 return (unsigned long)h;
193}
194
195static inline void btrfs_insert_inode_hash(struct inode *inode)
196{
197 unsigned long h = btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root);
198
199 __insert_inode_hash(inode, h);
200}
201
182static inline u64 btrfs_ino(struct inode *inode) 202static inline u64 btrfs_ino(struct inode *inode)
183{ 203{
184 u64 ino = BTRFS_I(inode)->location.objectid; 204 u64 ino = BTRFS_I(inode)->location.objectid;
@@ -213,7 +233,10 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
213static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) 233static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
214{ 234{
215 if (BTRFS_I(inode)->logged_trans == generation && 235 if (BTRFS_I(inode)->logged_trans == generation &&
216 BTRFS_I(inode)->last_sub_trans <= BTRFS_I(inode)->last_log_commit) 236 BTRFS_I(inode)->last_sub_trans <=
237 BTRFS_I(inode)->last_log_commit &&
238 BTRFS_I(inode)->last_sub_trans <=
239 BTRFS_I(inode)->root->last_log_commit)
217 return 1; 240 return 1;
218 return 0; 241 return 0;
219} 242}
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 1c47be187240..b50764bef141 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -77,6 +77,15 @@
77 * the integrity of (super)-block write requests, do not 77 * the integrity of (super)-block write requests, do not
78 * enable the config option BTRFS_FS_CHECK_INTEGRITY to 78 * enable the config option BTRFS_FS_CHECK_INTEGRITY to
79 * include and compile the integrity check tool. 79 * include and compile the integrity check tool.
80 *
81 * Expect millions of lines of information in the kernel log with an
82 * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
83 * kernel config to at least 26 (which is 64MB). Usually the value is
84 * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
85 * changed like this before LOG_BUF_SHIFT can be set to a high value:
86 * config LOG_BUF_SHIFT
87 * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
88 * range 12 30
80 */ 89 */
81 90
82#include <linux/sched.h> 91#include <linux/sched.h>
@@ -124,6 +133,7 @@
124#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 133#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400
125#define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 134#define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800
126#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 135#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000
136#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000
127 137
128struct btrfsic_dev_state; 138struct btrfsic_dev_state;
129struct btrfsic_state; 139struct btrfsic_state;
@@ -1038,7 +1048,7 @@ leaf_item_out_of_bounce_error:
1038 disk_item_offset, 1048 disk_item_offset,
1039 sizeof(struct btrfs_item)); 1049 sizeof(struct btrfs_item));
1040 item_offset = btrfs_stack_item_offset(&disk_item); 1050 item_offset = btrfs_stack_item_offset(&disk_item);
1041 item_size = btrfs_stack_item_offset(&disk_item); 1051 item_size = btrfs_stack_item_size(&disk_item);
1042 disk_key = &disk_item.key; 1052 disk_key = &disk_item.key;
1043 type = btrfs_disk_key_type(disk_key); 1053 type = btrfs_disk_key_type(disk_key);
1044 1054
@@ -1900,7 +1910,9 @@ again:
1900 dev_state, 1910 dev_state,
1901 dev_bytenr); 1911 dev_bytenr);
1902 } 1912 }
1903 if (block->logical_bytenr != bytenr) { 1913 if (block->logical_bytenr != bytenr &&
1914 !(!block->is_metadata &&
1915 block->logical_bytenr == 0))
1904 printk(KERN_INFO 1916 printk(KERN_INFO
1905 "Written block @%llu (%s/%llu/%d)" 1917 "Written block @%llu (%s/%llu/%d)"
1906 " found in hash table, %c," 1918 " found in hash table, %c,"
@@ -1910,15 +1922,14 @@ again:
1910 block->mirror_num, 1922 block->mirror_num,
1911 btrfsic_get_block_type(state, block), 1923 btrfsic_get_block_type(state, block),
1912 block->logical_bytenr); 1924 block->logical_bytenr);
1913 block->logical_bytenr = bytenr; 1925 else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1914 } else if (state->print_mask &
1915 BTRFSIC_PRINT_MASK_VERBOSE)
1916 printk(KERN_INFO 1926 printk(KERN_INFO
1917 "Written block @%llu (%s/%llu/%d)" 1927 "Written block @%llu (%s/%llu/%d)"
1918 " found in hash table, %c.\n", 1928 " found in hash table, %c.\n",
1919 bytenr, dev_state->name, dev_bytenr, 1929 bytenr, dev_state->name, dev_bytenr,
1920 block->mirror_num, 1930 block->mirror_num,
1921 btrfsic_get_block_type(state, block)); 1931 btrfsic_get_block_type(state, block));
1932 block->logical_bytenr = bytenr;
1922 } else { 1933 } else {
1923 if (num_pages * PAGE_CACHE_SIZE < 1934 if (num_pages * PAGE_CACHE_SIZE <
1924 state->datablock_size) { 1935 state->datablock_size) {
@@ -2463,10 +2474,8 @@ static int btrfsic_process_written_superblock(
2463 } 2474 }
2464 } 2475 }
2465 2476
2466 if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) { 2477 if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)))
2467 WARN_ON(1);
2468 btrfsic_dump_tree(state); 2478 btrfsic_dump_tree(state);
2469 }
2470 2479
2471 return 0; 2480 return 0;
2472} 2481}
@@ -2906,7 +2915,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2906 btrfsic_release_block_ctx(&block_ctx); 2915 btrfsic_release_block_ctx(&block_ctx);
2907 } 2916 }
2908 2917
2909 if (!match) { 2918 if (WARN_ON(!match)) {
2910 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," 2919 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2911 " buffer->log_bytenr=%llu, submit_bio(bdev=%s," 2920 " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2912 " phys_bytenr=%llu)!\n", 2921 " phys_bytenr=%llu)!\n",
@@ -2923,7 +2932,6 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2923 bytenr, block_ctx.dev->name, 2932 bytenr, block_ctx.dev->name,
2924 block_ctx.dev_bytenr, mirror_num); 2933 block_ctx.dev_bytenr, mirror_num);
2925 } 2934 }
2926 WARN_ON(1);
2927 } 2935 }
2928} 2936}
2929 2937
@@ -3017,6 +3025,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
3017 (rw & WRITE) && NULL != bio->bi_io_vec) { 3025 (rw & WRITE) && NULL != bio->bi_io_vec) {
3018 unsigned int i; 3026 unsigned int i;
3019 u64 dev_bytenr; 3027 u64 dev_bytenr;
3028 u64 cur_bytenr;
3020 int bio_is_patched; 3029 int bio_is_patched;
3021 char **mapped_datav; 3030 char **mapped_datav;
3022 3031
@@ -3035,6 +3044,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
3035 GFP_NOFS); 3044 GFP_NOFS);
3036 if (!mapped_datav) 3045 if (!mapped_datav)
3037 goto leave; 3046 goto leave;
3047 cur_bytenr = dev_bytenr;
3038 for (i = 0; i < bio->bi_vcnt; i++) { 3048 for (i = 0; i < bio->bi_vcnt; i++) {
3039 BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); 3049 BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
3040 mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); 3050 mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
@@ -3046,16 +3056,13 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
3046 kfree(mapped_datav); 3056 kfree(mapped_datav);
3047 goto leave; 3057 goto leave;
3048 } 3058 }
3049 if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3059 if (dev_state->state->print_mask &
3050 BTRFSIC_PRINT_MASK_VERBOSE) == 3060 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
3051 (dev_state->state->print_mask &
3052 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3053 BTRFSIC_PRINT_MASK_VERBOSE)))
3054 printk(KERN_INFO 3061 printk(KERN_INFO
3055 "#%u: page=%p, len=%u, offset=%u\n", 3062 "#%u: bytenr=%llu, len=%u, offset=%u\n",
3056 i, bio->bi_io_vec[i].bv_page, 3063 i, cur_bytenr, bio->bi_io_vec[i].bv_len,
3057 bio->bi_io_vec[i].bv_len,
3058 bio->bi_io_vec[i].bv_offset); 3064 bio->bi_io_vec[i].bv_offset);
3065 cur_bytenr += bio->bi_io_vec[i].bv_len;
3059 } 3066 }
3060 btrfsic_process_written_block(dev_state, dev_bytenr, 3067 btrfsic_process_written_block(dev_state, dev_bytenr,
3061 mapped_datav, bio->bi_vcnt, 3068 mapped_datav, bio->bi_vcnt,
diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h
deleted file mode 100644
index 7c4503ef6efd..000000000000
--- a/fs/btrfs/compat.h
+++ /dev/null
@@ -1,7 +0,0 @@
1#ifndef _COMPAT_H_
2#define _COMPAT_H_
3
4#define btrfs_drop_nlink(inode) drop_nlink(inode)
5#define btrfs_inc_nlink(inode) inc_nlink(inode)
6
7#endif /* _COMPAT_H_ */
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 6aad98cb343f..1499b27b4186 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -32,7 +32,6 @@
32#include <linux/writeback.h> 32#include <linux/writeback.h>
33#include <linux/bit_spinlock.h> 33#include <linux/bit_spinlock.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include "compat.h"
36#include "ctree.h" 35#include "ctree.h"
37#include "disk-io.h" 36#include "disk-io.h"
38#include "transaction.h" 37#include "transaction.h"
@@ -360,7 +359,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
360 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 359 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
361 360
362 bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); 361 bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
363 if(!bio) { 362 if (!bio) {
364 kfree(cb); 363 kfree(cb);
365 return -ENOMEM; 364 return -ENOMEM;
366 } 365 }
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 64346721173f..316136bd6dd7 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -274,7 +274,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
274 else 274 else
275 btrfs_set_header_owner(cow, new_root_objectid); 275 btrfs_set_header_owner(cow, new_root_objectid);
276 276
277 write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), 277 write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
278 BTRFS_FSID_SIZE); 278 BTRFS_FSID_SIZE);
279 279
280 WARN_ON(btrfs_header_generation(buf) > trans->transid); 280 WARN_ON(btrfs_header_generation(buf) > trans->transid);
@@ -996,7 +996,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
996 else 996 else
997 btrfs_set_header_owner(cow, root->root_key.objectid); 997 btrfs_set_header_owner(cow, root->root_key.objectid);
998 998
999 write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), 999 write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
1000 BTRFS_FSID_SIZE); 1000 BTRFS_FSID_SIZE);
1001 1001
1002 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); 1002 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
@@ -1005,8 +1005,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1005 return ret; 1005 return ret;
1006 } 1006 }
1007 1007
1008 if (root->ref_cows) 1008 if (root->ref_cows) {
1009 btrfs_reloc_cow_block(trans, root, buf, cow); 1009 ret = btrfs_reloc_cow_block(trans, root, buf, cow);
1010 if (ret)
1011 return ret;
1012 }
1010 1013
1011 if (buf == root->node) { 1014 if (buf == root->node) {
1012 WARN_ON(parent && parent != buf); 1015 WARN_ON(parent && parent != buf);
@@ -1282,11 +1285,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1282 free_extent_buffer(eb_root); 1285 free_extent_buffer(eb_root);
1283 blocksize = btrfs_level_size(root, old_root->level); 1286 blocksize = btrfs_level_size(root, old_root->level);
1284 old = read_tree_block(root, logical, blocksize, 0); 1287 old = read_tree_block(root, logical, blocksize, 0);
1285 if (!old || !extent_buffer_uptodate(old)) { 1288 if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
1286 free_extent_buffer(old); 1289 free_extent_buffer(old);
1287 pr_warn("btrfs: failed to read tree block %llu from get_old_root\n", 1290 pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
1288 logical); 1291 logical);
1289 WARN_ON(1);
1290 } else { 1292 } else {
1291 eb = btrfs_clone_extent_buffer(old); 1293 eb = btrfs_clone_extent_buffer(old);
1292 free_extent_buffer(old); 1294 free_extent_buffer(old);
@@ -2755,7 +2757,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2755 int level; 2757 int level;
2756 int lowest_unlock = 1; 2758 int lowest_unlock = 1;
2757 u8 lowest_level = 0; 2759 u8 lowest_level = 0;
2758 int prev_cmp; 2760 int prev_cmp = -1;
2759 2761
2760 lowest_level = p->lowest_level; 2762 lowest_level = p->lowest_level;
2761 WARN_ON(p->nodes[0] != NULL); 2763 WARN_ON(p->nodes[0] != NULL);
@@ -2766,7 +2768,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2766 } 2768 }
2767 2769
2768again: 2770again:
2769 prev_cmp = -1;
2770 b = get_old_root(root, time_seq); 2771 b = get_old_root(root, time_seq);
2771 level = btrfs_header_level(b); 2772 level = btrfs_header_level(b);
2772 p->locks[level] = BTRFS_READ_LOCK; 2773 p->locks[level] = BTRFS_READ_LOCK;
@@ -2784,6 +2785,11 @@ again:
2784 */ 2785 */
2785 btrfs_unlock_up_safe(p, level + 1); 2786 btrfs_unlock_up_safe(p, level + 1);
2786 2787
2788 /*
2789 * Since we can unwind eb's we want to do a real search every
2790 * time.
2791 */
2792 prev_cmp = -1;
2787 ret = key_search(b, key, level, &prev_cmp, &slot); 2793 ret = key_search(b, key, level, &prev_cmp, &slot);
2788 2794
2789 if (level != 0) { 2795 if (level != 0) {
@@ -3145,7 +3151,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3145 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); 3151 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
3146 btrfs_set_header_owner(c, root->root_key.objectid); 3152 btrfs_set_header_owner(c, root->root_key.objectid);
3147 3153
3148 write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(c), 3154 write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(),
3149 BTRFS_FSID_SIZE); 3155 BTRFS_FSID_SIZE);
3150 3156
3151 write_extent_buffer(c, root->fs_info->chunk_tree_uuid, 3157 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
@@ -3284,7 +3290,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3284 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); 3290 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
3285 btrfs_set_header_owner(split, root->root_key.objectid); 3291 btrfs_set_header_owner(split, root->root_key.objectid);
3286 write_extent_buffer(split, root->fs_info->fsid, 3292 write_extent_buffer(split, root->fs_info->fsid,
3287 btrfs_header_fsid(split), BTRFS_FSID_SIZE); 3293 btrfs_header_fsid(), BTRFS_FSID_SIZE);
3288 write_extent_buffer(split, root->fs_info->chunk_tree_uuid, 3294 write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
3289 btrfs_header_chunk_tree_uuid(split), 3295 btrfs_header_chunk_tree_uuid(split),
3290 BTRFS_UUID_SIZE); 3296 BTRFS_UUID_SIZE);
@@ -3334,8 +3340,8 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr)
3334 if (!nr) 3340 if (!nr)
3335 return 0; 3341 return 0;
3336 btrfs_init_map_token(&token); 3342 btrfs_init_map_token(&token);
3337 start_item = btrfs_item_nr(l, start); 3343 start_item = btrfs_item_nr(start);
3338 end_item = btrfs_item_nr(l, end); 3344 end_item = btrfs_item_nr(end);
3339 data_len = btrfs_token_item_offset(l, start_item, &token) + 3345 data_len = btrfs_token_item_offset(l, start_item, &token) +
3340 btrfs_token_item_size(l, start_item, &token); 3346 btrfs_token_item_size(l, start_item, &token);
3341 data_len = data_len - btrfs_token_item_offset(l, end_item, &token); 3347 data_len = data_len - btrfs_token_item_offset(l, end_item, &token);
@@ -3403,7 +3409,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
3403 slot = path->slots[1]; 3409 slot = path->slots[1];
3404 i = left_nritems - 1; 3410 i = left_nritems - 1;
3405 while (i >= nr) { 3411 while (i >= nr) {
3406 item = btrfs_item_nr(left, i); 3412 item = btrfs_item_nr(i);
3407 3413
3408 if (!empty && push_items > 0) { 3414 if (!empty && push_items > 0) {
3409 if (path->slots[0] > i) 3415 if (path->slots[0] > i)
@@ -3467,7 +3473,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
3467 btrfs_set_header_nritems(right, right_nritems); 3473 btrfs_set_header_nritems(right, right_nritems);
3468 push_space = BTRFS_LEAF_DATA_SIZE(root); 3474 push_space = BTRFS_LEAF_DATA_SIZE(root);
3469 for (i = 0; i < right_nritems; i++) { 3475 for (i = 0; i < right_nritems; i++) {
3470 item = btrfs_item_nr(right, i); 3476 item = btrfs_item_nr(i);
3471 push_space -= btrfs_token_item_size(right, item, &token); 3477 push_space -= btrfs_token_item_size(right, item, &token);
3472 btrfs_set_token_item_offset(right, item, push_space, &token); 3478 btrfs_set_token_item_offset(right, item, push_space, &token);
3473 } 3479 }
@@ -3609,7 +3615,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
3609 nr = min(right_nritems - 1, max_slot); 3615 nr = min(right_nritems - 1, max_slot);
3610 3616
3611 for (i = 0; i < nr; i++) { 3617 for (i = 0; i < nr; i++) {
3612 item = btrfs_item_nr(right, i); 3618 item = btrfs_item_nr(i);
3613 3619
3614 if (!empty && push_items > 0) { 3620 if (!empty && push_items > 0) {
3615 if (path->slots[0] < i) 3621 if (path->slots[0] < i)
@@ -3636,8 +3642,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
3636 ret = 1; 3642 ret = 1;
3637 goto out; 3643 goto out;
3638 } 3644 }
3639 if (!empty && push_items == btrfs_header_nritems(right)) 3645 WARN_ON(!empty && push_items == btrfs_header_nritems(right));
3640 WARN_ON(1);
3641 3646
3642 /* push data from right to left */ 3647 /* push data from right to left */
3643 copy_extent_buffer(left, right, 3648 copy_extent_buffer(left, right,
@@ -3660,7 +3665,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
3660 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { 3665 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
3661 u32 ioff; 3666 u32 ioff;
3662 3667
3663 item = btrfs_item_nr(left, i); 3668 item = btrfs_item_nr(i);
3664 3669
3665 ioff = btrfs_token_item_offset(left, item, &token); 3670 ioff = btrfs_token_item_offset(left, item, &token);
3666 btrfs_set_token_item_offset(left, item, 3671 btrfs_set_token_item_offset(left, item,
@@ -3691,7 +3696,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
3691 btrfs_set_header_nritems(right, right_nritems); 3696 btrfs_set_header_nritems(right, right_nritems);
3692 push_space = BTRFS_LEAF_DATA_SIZE(root); 3697 push_space = BTRFS_LEAF_DATA_SIZE(root);
3693 for (i = 0; i < right_nritems; i++) { 3698 for (i = 0; i < right_nritems; i++) {
3694 item = btrfs_item_nr(right, i); 3699 item = btrfs_item_nr(i);
3695 3700
3696 push_space = push_space - btrfs_token_item_size(right, 3701 push_space = push_space - btrfs_token_item_size(right,
3697 item, &token); 3702 item, &token);
@@ -3832,7 +3837,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
3832 btrfs_item_end_nr(l, mid); 3837 btrfs_item_end_nr(l, mid);
3833 3838
3834 for (i = 0; i < nritems; i++) { 3839 for (i = 0; i < nritems; i++) {
3835 struct btrfs_item *item = btrfs_item_nr(right, i); 3840 struct btrfs_item *item = btrfs_item_nr(i);
3836 u32 ioff; 3841 u32 ioff;
3837 3842
3838 ioff = btrfs_token_item_offset(right, item, &token); 3843 ioff = btrfs_token_item_offset(right, item, &token);
@@ -4013,7 +4018,7 @@ again:
4013 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 4018 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
4014 if (data_size && !tried_avoid_double) 4019 if (data_size && !tried_avoid_double)
4015 goto push_for_double; 4020 goto push_for_double;
4016 split = 2 ; 4021 split = 2;
4017 } 4022 }
4018 } 4023 }
4019 } 4024 }
@@ -4039,7 +4044,7 @@ again:
4039 btrfs_set_header_owner(right, root->root_key.objectid); 4044 btrfs_set_header_owner(right, root->root_key.objectid);
4040 btrfs_set_header_level(right, 0); 4045 btrfs_set_header_level(right, 0);
4041 write_extent_buffer(right, root->fs_info->fsid, 4046 write_extent_buffer(right, root->fs_info->fsid,
4042 btrfs_header_fsid(right), BTRFS_FSID_SIZE); 4047 btrfs_header_fsid(), BTRFS_FSID_SIZE);
4043 4048
4044 write_extent_buffer(right, root->fs_info->chunk_tree_uuid, 4049 write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
4045 btrfs_header_chunk_tree_uuid(right), 4050 btrfs_header_chunk_tree_uuid(right),
@@ -4174,7 +4179,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
4174 4179
4175 btrfs_set_path_blocking(path); 4180 btrfs_set_path_blocking(path);
4176 4181
4177 item = btrfs_item_nr(leaf, path->slots[0]); 4182 item = btrfs_item_nr(path->slots[0]);
4178 orig_offset = btrfs_item_offset(leaf, item); 4183 orig_offset = btrfs_item_offset(leaf, item);
4179 item_size = btrfs_item_size(leaf, item); 4184 item_size = btrfs_item_size(leaf, item);
4180 4185
@@ -4197,7 +4202,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
4197 btrfs_cpu_key_to_disk(&disk_key, new_key); 4202 btrfs_cpu_key_to_disk(&disk_key, new_key);
4198 btrfs_set_item_key(leaf, &disk_key, slot); 4203 btrfs_set_item_key(leaf, &disk_key, slot);
4199 4204
4200 new_item = btrfs_item_nr(leaf, slot); 4205 new_item = btrfs_item_nr(slot);
4201 4206
4202 btrfs_set_item_offset(leaf, new_item, orig_offset); 4207 btrfs_set_item_offset(leaf, new_item, orig_offset);
4203 btrfs_set_item_size(leaf, new_item, item_size - split_offset); 4208 btrfs_set_item_size(leaf, new_item, item_size - split_offset);
@@ -4336,7 +4341,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
4336 /* first correct the data pointers */ 4341 /* first correct the data pointers */
4337 for (i = slot; i < nritems; i++) { 4342 for (i = slot; i < nritems; i++) {
4338 u32 ioff; 4343 u32 ioff;
4339 item = btrfs_item_nr(leaf, i); 4344 item = btrfs_item_nr(i);
4340 4345
4341 ioff = btrfs_token_item_offset(leaf, item, &token); 4346 ioff = btrfs_token_item_offset(leaf, item, &token);
4342 btrfs_set_token_item_offset(leaf, item, 4347 btrfs_set_token_item_offset(leaf, item,
@@ -4384,7 +4389,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
4384 fixup_low_keys(root, path, &disk_key, 1); 4389 fixup_low_keys(root, path, &disk_key, 1);
4385 } 4390 }
4386 4391
4387 item = btrfs_item_nr(leaf, slot); 4392 item = btrfs_item_nr(slot);
4388 btrfs_set_item_size(leaf, item, new_size); 4393 btrfs_set_item_size(leaf, item, new_size);
4389 btrfs_mark_buffer_dirty(leaf); 4394 btrfs_mark_buffer_dirty(leaf);
4390 4395
@@ -4438,7 +4443,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
4438 /* first correct the data pointers */ 4443 /* first correct the data pointers */
4439 for (i = slot; i < nritems; i++) { 4444 for (i = slot; i < nritems; i++) {
4440 u32 ioff; 4445 u32 ioff;
4441 item = btrfs_item_nr(leaf, i); 4446 item = btrfs_item_nr(i);
4442 4447
4443 ioff = btrfs_token_item_offset(leaf, item, &token); 4448 ioff = btrfs_token_item_offset(leaf, item, &token);
4444 btrfs_set_token_item_offset(leaf, item, 4449 btrfs_set_token_item_offset(leaf, item,
@@ -4452,7 +4457,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
4452 4457
4453 data_end = old_data; 4458 data_end = old_data;
4454 old_size = btrfs_item_size_nr(leaf, slot); 4459 old_size = btrfs_item_size_nr(leaf, slot);
4455 item = btrfs_item_nr(leaf, slot); 4460 item = btrfs_item_nr(slot);
4456 btrfs_set_item_size(leaf, item, old_size + data_size); 4461 btrfs_set_item_size(leaf, item, old_size + data_size);
4457 btrfs_mark_buffer_dirty(leaf); 4462 btrfs_mark_buffer_dirty(leaf);
4458 4463
@@ -4511,7 +4516,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4511 for (i = slot; i < nritems; i++) { 4516 for (i = slot; i < nritems; i++) {
4512 u32 ioff; 4517 u32 ioff;
4513 4518
4514 item = btrfs_item_nr(leaf, i); 4519 item = btrfs_item_nr( i);
4515 ioff = btrfs_token_item_offset(leaf, item, &token); 4520 ioff = btrfs_token_item_offset(leaf, item, &token);
4516 btrfs_set_token_item_offset(leaf, item, 4521 btrfs_set_token_item_offset(leaf, item,
4517 ioff - total_data, &token); 4522 ioff - total_data, &token);
@@ -4532,7 +4537,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4532 for (i = 0; i < nr; i++) { 4537 for (i = 0; i < nr; i++) {
4533 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 4538 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
4534 btrfs_set_item_key(leaf, &disk_key, slot + i); 4539 btrfs_set_item_key(leaf, &disk_key, slot + i);
4535 item = btrfs_item_nr(leaf, slot + i); 4540 item = btrfs_item_nr(slot + i);
4536 btrfs_set_token_item_offset(leaf, item, 4541 btrfs_set_token_item_offset(leaf, item,
4537 data_end - data_size[i], &token); 4542 data_end - data_size[i], &token);
4538 data_end -= data_size[i]; 4543 data_end -= data_size[i];
@@ -4727,7 +4732,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4727 for (i = slot + nr; i < nritems; i++) { 4732 for (i = slot + nr; i < nritems; i++) {
4728 u32 ioff; 4733 u32 ioff;
4729 4734
4730 item = btrfs_item_nr(leaf, i); 4735 item = btrfs_item_nr(i);
4731 ioff = btrfs_token_item_offset(leaf, item, &token); 4736 ioff = btrfs_token_item_offset(leaf, item, &token);
4732 btrfs_set_token_item_offset(leaf, item, 4737 btrfs_set_token_item_offset(leaf, item,
4733 ioff + dsize, &token); 4738 ioff + dsize, &token);
@@ -4820,14 +4825,18 @@ static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
4820 4825
4821 btrfs_item_key_to_cpu(path->nodes[0], &key, 0); 4826 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
4822 4827
4823 if (key.offset > 0) 4828 if (key.offset > 0) {
4824 key.offset--; 4829 key.offset--;
4825 else if (key.type > 0) 4830 } else if (key.type > 0) {
4826 key.type--; 4831 key.type--;
4827 else if (key.objectid > 0) 4832 key.offset = (u64)-1;
4833 } else if (key.objectid > 0) {
4828 key.objectid--; 4834 key.objectid--;
4829 else 4835 key.type = (u8)-1;
4836 key.offset = (u64)-1;
4837 } else {
4830 return 1; 4838 return 1;
4839 }
4831 4840
4832 btrfs_release_path(path); 4841 btrfs_release_path(path);
4833 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4842 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -4863,7 +4872,6 @@ static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
4863 * was nothing in the tree that matched the search criteria. 4872 * was nothing in the tree that matched the search criteria.
4864 */ 4873 */
4865int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, 4874int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
4866 struct btrfs_key *max_key,
4867 struct btrfs_path *path, 4875 struct btrfs_path *path,
4868 u64 min_trans) 4876 u64 min_trans)
4869{ 4877{
@@ -4908,10 +4916,8 @@ again:
4908 * If it is too old, old, skip to the next one. 4916 * If it is too old, old, skip to the next one.
4909 */ 4917 */
4910 while (slot < nritems) { 4918 while (slot < nritems) {
4911 u64 blockptr;
4912 u64 gen; 4919 u64 gen;
4913 4920
4914 blockptr = btrfs_node_blockptr(cur, slot);
4915 gen = btrfs_node_ptr_generation(cur, slot); 4921 gen = btrfs_node_ptr_generation(cur, slot);
4916 if (gen < min_trans) { 4922 if (gen < min_trans) {
4917 slot++; 4923 slot++;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3c1da6f98a4d..54ab86127f7a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -47,6 +47,12 @@ extern struct kmem_cache *btrfs_path_cachep;
47extern struct kmem_cache *btrfs_free_space_cachep; 47extern struct kmem_cache *btrfs_free_space_cachep;
48struct btrfs_ordered_sum; 48struct btrfs_ordered_sum;
49 49
50#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
51#define STATIC noinline
52#else
53#define STATIC static noinline
54#endif
55
50#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ 56#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
51 57
52#define BTRFS_MAX_MIRRORS 3 58#define BTRFS_MAX_MIRRORS 3
@@ -1118,15 +1124,6 @@ struct btrfs_space_info {
1118 */ 1124 */
1119 struct percpu_counter total_bytes_pinned; 1125 struct percpu_counter total_bytes_pinned;
1120 1126
1121 /*
1122 * we bump reservation progress every time we decrement
1123 * bytes_reserved. This way people waiting for reservations
1124 * know something good has happened and they can check
1125 * for progress. The number here isn't to be trusted, it
1126 * just shows reclaim activity
1127 */
1128 unsigned long reservation_progress;
1129
1130 unsigned int full:1; /* indicates that we cannot allocate any more 1127 unsigned int full:1; /* indicates that we cannot allocate any more
1131 chunks for this space */ 1128 chunks for this space */
1132 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ 1129 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
@@ -1589,7 +1586,6 @@ struct btrfs_fs_info {
1589 atomic_t scrubs_paused; 1586 atomic_t scrubs_paused;
1590 atomic_t scrub_cancel_req; 1587 atomic_t scrub_cancel_req;
1591 wait_queue_head_t scrub_pause_wait; 1588 wait_queue_head_t scrub_pause_wait;
1592 struct rw_semaphore scrub_super_lock;
1593 int scrub_workers_refcnt; 1589 int scrub_workers_refcnt;
1594 struct btrfs_workers scrub_workers; 1590 struct btrfs_workers scrub_workers;
1595 struct btrfs_workers scrub_wr_completion_workers; 1591 struct btrfs_workers scrub_wr_completion_workers;
@@ -1733,7 +1729,9 @@ struct btrfs_root {
1733 int ref_cows; 1729 int ref_cows;
1734 int track_dirty; 1730 int track_dirty;
1735 int in_radix; 1731 int in_radix;
1736 1732#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
1733 int dummy_root;
1734#endif
1737 u64 defrag_trans_start; 1735 u64 defrag_trans_start;
1738 struct btrfs_key defrag_progress; 1736 struct btrfs_key defrag_progress;
1739 struct btrfs_key defrag_max; 1737 struct btrfs_key defrag_max;
@@ -2470,8 +2468,7 @@ static inline unsigned long btrfs_item_nr_offset(int nr)
2470 sizeof(struct btrfs_item) * nr; 2468 sizeof(struct btrfs_item) * nr;
2471} 2469}
2472 2470
2473static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb, 2471static inline struct btrfs_item *btrfs_item_nr(int nr)
2474 int nr)
2475{ 2472{
2476 return (struct btrfs_item *)btrfs_item_nr_offset(nr); 2473 return (struct btrfs_item *)btrfs_item_nr_offset(nr);
2477} 2474}
@@ -2484,30 +2481,30 @@ static inline u32 btrfs_item_end(struct extent_buffer *eb,
2484 2481
2485static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) 2482static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
2486{ 2483{
2487 return btrfs_item_end(eb, btrfs_item_nr(eb, nr)); 2484 return btrfs_item_end(eb, btrfs_item_nr(nr));
2488} 2485}
2489 2486
2490static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr) 2487static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr)
2491{ 2488{
2492 return btrfs_item_offset(eb, btrfs_item_nr(eb, nr)); 2489 return btrfs_item_offset(eb, btrfs_item_nr(nr));
2493} 2490}
2494 2491
2495static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) 2492static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
2496{ 2493{
2497 return btrfs_item_size(eb, btrfs_item_nr(eb, nr)); 2494 return btrfs_item_size(eb, btrfs_item_nr(nr));
2498} 2495}
2499 2496
2500static inline void btrfs_item_key(struct extent_buffer *eb, 2497static inline void btrfs_item_key(struct extent_buffer *eb,
2501 struct btrfs_disk_key *disk_key, int nr) 2498 struct btrfs_disk_key *disk_key, int nr)
2502{ 2499{
2503 struct btrfs_item *item = btrfs_item_nr(eb, nr); 2500 struct btrfs_item *item = btrfs_item_nr(nr);
2504 read_eb_member(eb, item, struct btrfs_item, key, disk_key); 2501 read_eb_member(eb, item, struct btrfs_item, key, disk_key);
2505} 2502}
2506 2503
2507static inline void btrfs_set_item_key(struct extent_buffer *eb, 2504static inline void btrfs_set_item_key(struct extent_buffer *eb,
2508 struct btrfs_disk_key *disk_key, int nr) 2505 struct btrfs_disk_key *disk_key, int nr)
2509{ 2506{
2510 struct btrfs_item *item = btrfs_item_nr(eb, nr); 2507 struct btrfs_item *item = btrfs_item_nr(nr);
2511 write_eb_member(eb, item, struct btrfs_item, key, disk_key); 2508 write_eb_member(eb, item, struct btrfs_item, key, disk_key);
2512} 2509}
2513 2510
@@ -2675,7 +2672,7 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
2675 btrfs_set_header_flags(eb, flags); 2672 btrfs_set_header_flags(eb, flags);
2676} 2673}
2677 2674
2678static inline unsigned long btrfs_header_fsid(struct extent_buffer *eb) 2675static inline unsigned long btrfs_header_fsid(void)
2679{ 2676{
2680 return offsetof(struct btrfs_header, fsid); 2677 return offsetof(struct btrfs_header, fsid);
2681} 2678}
@@ -3114,11 +3111,6 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level)
3114 ((unsigned long)(btrfs_leaf_data(leaf) + \ 3111 ((unsigned long)(btrfs_leaf_data(leaf) + \
3115 btrfs_item_offset_nr(leaf, slot))) 3112 btrfs_item_offset_nr(leaf, slot)))
3116 3113
3117static inline struct dentry *fdentry(struct file *file)
3118{
3119 return file->f_path.dentry;
3120}
3121
3122static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) 3114static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
3123{ 3115{
3124 return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) && 3116 return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
@@ -3135,7 +3127,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
3135 unsigned num_items) 3127 unsigned num_items)
3136{ 3128{
3137 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * 3129 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3138 3 * num_items; 3130 2 * num_items;
3139} 3131}
3140 3132
3141/* 3133/*
@@ -3317,7 +3309,6 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
3317 struct btrfs_key *key, int lowest_level, 3309 struct btrfs_key *key, int lowest_level,
3318 u64 min_trans); 3310 u64 min_trans);
3319int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, 3311int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
3320 struct btrfs_key *max_key,
3321 struct btrfs_path *path, 3312 struct btrfs_path *path,
3322 u64 min_trans); 3313 u64 min_trans);
3323enum btrfs_compare_tree_result { 3314enum btrfs_compare_tree_result {
@@ -3622,9 +3613,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
3622 struct btrfs_ordered_sum *sums); 3613 struct btrfs_ordered_sum *sums);
3623int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, 3614int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
3624 struct bio *bio, u64 file_start, int contig); 3615 struct bio *bio, u64 file_start, int contig);
3625int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
3626 struct btrfs_root *root, struct btrfs_path *path,
3627 u64 isize);
3628int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 3616int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
3629 struct list_head *list, int search_commit); 3617 struct list_head *list, int search_commit);
3630/* inode.c */ 3618/* inode.c */
@@ -3684,8 +3672,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3684 u32 min_type); 3672 u32 min_type);
3685 3673
3686int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); 3674int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
3687int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info, 3675int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput);
3688 int delay_iput);
3689int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, 3676int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
3690 struct extent_state **cached_state); 3677 struct extent_state **cached_state);
3691int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 3678int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
@@ -3754,9 +3741,6 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
3754int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); 3741int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
3755void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, 3742void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
3756 int skip_pinned); 3743 int skip_pinned);
3757int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace,
3758 u64 start, u64 end, int skip_pinned,
3759 int modified);
3760extern const struct file_operations btrfs_file_operations; 3744extern const struct file_operations btrfs_file_operations;
3761int __btrfs_drop_extents(struct btrfs_trans_handle *trans, 3745int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
3762 struct btrfs_root *root, struct inode *inode, 3746 struct btrfs_root *root, struct inode *inode,
@@ -3939,9 +3923,9 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
3939 struct btrfs_root *root); 3923 struct btrfs_root *root);
3940int btrfs_recover_relocation(struct btrfs_root *root); 3924int btrfs_recover_relocation(struct btrfs_root *root);
3941int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); 3925int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
3942void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, 3926int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
3943 struct btrfs_root *root, struct extent_buffer *buf, 3927 struct btrfs_root *root, struct extent_buffer *buf,
3944 struct extent_buffer *cow); 3928 struct extent_buffer *cow);
3945void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, 3929void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
3946 struct btrfs_pending_snapshot *pending, 3930 struct btrfs_pending_snapshot *pending,
3947 u64 *bytes_to_reserve); 3931 u64 *bytes_to_reserve);
@@ -3953,9 +3937,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3953 u64 end, struct btrfs_scrub_progress *progress, 3937 u64 end, struct btrfs_scrub_progress *progress,
3954 int readonly, int is_dev_replace); 3938 int readonly, int is_dev_replace);
3955void btrfs_scrub_pause(struct btrfs_root *root); 3939void btrfs_scrub_pause(struct btrfs_root *root);
3956void btrfs_scrub_pause_super(struct btrfs_root *root);
3957void btrfs_scrub_continue(struct btrfs_root *root); 3940void btrfs_scrub_continue(struct btrfs_root *root);
3958void btrfs_scrub_continue_super(struct btrfs_root *root);
3959int btrfs_scrub_cancel(struct btrfs_fs_info *info); 3941int btrfs_scrub_cancel(struct btrfs_fs_info *info);
3960int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, 3942int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
3961 struct btrfs_device *dev); 3943 struct btrfs_device *dev);
@@ -4037,5 +4019,9 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
4037 return signal_pending(current); 4019 return signal_pending(current);
4038} 4020}
4039 4021
4022/* Sanity test specific functions */
4023#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4024void btrfs_test_destroy_inode(struct inode *inode);
4025#endif
4040 4026
4041#endif 4027#endif
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index cbd9523ad09c..8d292fbae659 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -108,8 +108,8 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
108 return node; 108 return node;
109 } 109 }
110 btrfs_inode->delayed_node = node; 110 btrfs_inode->delayed_node = node;
111 atomic_inc(&node->refs); /* can be accessed */ 111 /* can be accessed and cached in the inode */
112 atomic_inc(&node->refs); /* cached in the inode */ 112 atomic_add(2, &node->refs);
113 spin_unlock(&root->inode_lock); 113 spin_unlock(&root->inode_lock);
114 return node; 114 return node;
115 } 115 }
@@ -138,8 +138,8 @@ again:
138 return ERR_PTR(-ENOMEM); 138 return ERR_PTR(-ENOMEM);
139 btrfs_init_delayed_node(node, root, ino); 139 btrfs_init_delayed_node(node, root, ino);
140 140
141 atomic_inc(&node->refs); /* cached in the btrfs inode */ 141 /* cached in the btrfs inode and can be accessed */
142 atomic_inc(&node->refs); /* can be accessed */ 142 atomic_add(2, &node->refs);
143 143
144 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); 144 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
145 if (ret) { 145 if (ret) {
@@ -649,14 +649,13 @@ static int btrfs_delayed_inode_reserve_metadata(
649 goto out; 649 goto out;
650 650
651 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); 651 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
652 if (!ret) 652 if (!WARN_ON(ret))
653 goto out; 653 goto out;
654 654
655 /* 655 /*
656 * Ok this is a problem, let's just steal from the global rsv 656 * Ok this is a problem, let's just steal from the global rsv
657 * since this really shouldn't happen that often. 657 * since this really shouldn't happen that often.
658 */ 658 */
659 WARN_ON(1);
660 ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv, 659 ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
661 dst_rsv, num_bytes); 660 dst_rsv, num_bytes);
662 goto out; 661 goto out;
@@ -771,13 +770,13 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
771 */ 770 */
772 btrfs_set_path_blocking(path); 771 btrfs_set_path_blocking(path);
773 772
774 keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS); 773 keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
775 if (!keys) { 774 if (!keys) {
776 ret = -ENOMEM; 775 ret = -ENOMEM;
777 goto out; 776 goto out;
778 } 777 }
779 778
780 data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS); 779 data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
781 if (!data_size) { 780 if (!data_size) {
782 ret = -ENOMEM; 781 ret = -ENOMEM;
783 goto error; 782 goto error;
@@ -1174,8 +1173,10 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1174 mutex_unlock(&delayed_node->mutex); 1173 mutex_unlock(&delayed_node->mutex);
1175 1174
1176 path = btrfs_alloc_path(); 1175 path = btrfs_alloc_path();
1177 if (!path) 1176 if (!path) {
1177 btrfs_release_delayed_node(delayed_node);
1178 return -ENOMEM; 1178 return -ENOMEM;
1179 }
1179 path->leave_spinning = 1; 1180 path->leave_spinning = 1;
1180 1181
1181 block_rsv = trans->block_rsv; 1182 block_rsv = trans->block_rsv;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index a64435359385..2cfc3dfff64f 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -26,7 +26,6 @@
26#include <linux/kthread.h> 26#include <linux/kthread.h>
27#include <linux/math64.h> 27#include <linux/math64.h>
28#include <asm/div64.h> 28#include <asm/div64.h>
29#include "compat.h"
30#include "ctree.h" 29#include "ctree.h"
31#include "extent_map.h" 30#include "extent_map.h"
32#include "disk-io.h" 31#include "disk-io.h"
@@ -38,7 +37,6 @@
38#include "rcu-string.h" 37#include "rcu-string.h"
39#include "dev-replace.h" 38#include "dev-replace.h"
40 39
41static u64 btrfs_get_seconds_since_1970(void);
42static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, 40static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
43 int scrub_ret); 41 int scrub_ret);
44static void btrfs_dev_replace_update_device_in_mapping_tree( 42static void btrfs_dev_replace_update_device_in_mapping_tree(
@@ -296,13 +294,6 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
296 dev_replace->cursor_left_last_write_of_item; 294 dev_replace->cursor_left_last_write_of_item;
297} 295}
298 296
299static u64 btrfs_get_seconds_since_1970(void)
300{
301 struct timespec t = CURRENT_TIME_SEC;
302
303 return t.tv_sec;
304}
305
306int btrfs_dev_replace_start(struct btrfs_root *root, 297int btrfs_dev_replace_start(struct btrfs_root *root,
307 struct btrfs_ioctl_dev_replace_args *args) 298 struct btrfs_ioctl_dev_replace_args *args)
308{ 299{
@@ -375,7 +366,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
375 dev_replace->tgtdev = tgt_device; 366 dev_replace->tgtdev = tgt_device;
376 367
377 printk_in_rcu(KERN_INFO 368 printk_in_rcu(KERN_INFO
378 "btrfs: dev_replace from %s (devid %llu) to %s) started\n", 369 "btrfs: dev_replace from %s (devid %llu) to %s started\n",
379 src_device->missing ? "<missing disk>" : 370 src_device->missing ? "<missing disk>" :
380 rcu_str_deref(src_device->name), 371 rcu_str_deref(src_device->name),
381 src_device->devid, 372 src_device->devid,
@@ -390,7 +381,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
390 * go to the tgtdev as well (refer to btrfs_map_block()). 381 * go to the tgtdev as well (refer to btrfs_map_block()).
391 */ 382 */
392 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; 383 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED;
393 dev_replace->time_started = btrfs_get_seconds_since_1970(); 384 dev_replace->time_started = get_seconds();
394 dev_replace->cursor_left = 0; 385 dev_replace->cursor_left = 0;
395 dev_replace->committed_cursor_left = 0; 386 dev_replace->committed_cursor_left = 0;
396 dev_replace->cursor_left_last_write_of_item = 0; 387 dev_replace->cursor_left_last_write_of_item = 0;
@@ -400,7 +391,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
400 args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; 391 args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
401 btrfs_dev_replace_unlock(dev_replace); 392 btrfs_dev_replace_unlock(dev_replace);
402 393
403 btrfs_wait_all_ordered_extents(root->fs_info, 0); 394 btrfs_wait_ordered_roots(root->fs_info, -1);
404 395
405 /* force writing the updated state information to disk */ 396 /* force writing the updated state information to disk */
406 trans = btrfs_start_transaction(root, 0); 397 trans = btrfs_start_transaction(root, 0);
@@ -470,12 +461,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
470 * flush all outstanding I/O and inode extent mappings before the 461 * flush all outstanding I/O and inode extent mappings before the
471 * copy operation is declared as being finished 462 * copy operation is declared as being finished
472 */ 463 */
473 ret = btrfs_start_all_delalloc_inodes(root->fs_info, 0); 464 ret = btrfs_start_delalloc_roots(root->fs_info, 0);
474 if (ret) { 465 if (ret) {
475 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); 466 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
476 return ret; 467 return ret;
477 } 468 }
478 btrfs_wait_all_ordered_extents(root->fs_info, 0); 469 btrfs_wait_ordered_roots(root->fs_info, -1);
479 470
480 trans = btrfs_start_transaction(root, 0); 471 trans = btrfs_start_transaction(root, 0);
481 if (IS_ERR(trans)) { 472 if (IS_ERR(trans)) {
@@ -493,7 +484,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
493 : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; 484 : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
494 dev_replace->tgtdev = NULL; 485 dev_replace->tgtdev = NULL;
495 dev_replace->srcdev = NULL; 486 dev_replace->srcdev = NULL;
496 dev_replace->time_stopped = btrfs_get_seconds_since_1970(); 487 dev_replace->time_stopped = get_seconds();
497 dev_replace->item_needs_writeback = 1; 488 dev_replace->item_needs_writeback = 1;
498 489
499 if (scrub_ret) { 490 if (scrub_ret) {
@@ -535,10 +526,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
535 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); 526 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
536 527
537 btrfs_rm_dev_replace_srcdev(fs_info, src_device); 528 btrfs_rm_dev_replace_srcdev(fs_info, src_device);
538 if (src_device->bdev) { 529
539 /* zero out the old super */
540 btrfs_scratch_superblock(src_device);
541 }
542 /* 530 /*
543 * this is again a consistent state where no dev_replace procedure 531 * this is again a consistent state where no dev_replace procedure
544 * is running, the target device is part of the filesystem, the 532 * is running, the target device is part of the filesystem, the
@@ -653,6 +641,9 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
653 u64 result; 641 u64 result;
654 int ret; 642 int ret;
655 643
644 if (fs_info->sb->s_flags & MS_RDONLY)
645 return -EROFS;
646
656 mutex_lock(&dev_replace->lock_finishing_cancel_unmount); 647 mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
657 btrfs_dev_replace_lock(dev_replace); 648 btrfs_dev_replace_lock(dev_replace);
658 switch (dev_replace->replace_state) { 649 switch (dev_replace->replace_state) {
@@ -671,7 +662,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
671 break; 662 break;
672 } 663 }
673 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; 664 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
674 dev_replace->time_stopped = btrfs_get_seconds_since_1970(); 665 dev_replace->time_stopped = get_seconds();
675 dev_replace->item_needs_writeback = 1; 666 dev_replace->item_needs_writeback = 1;
676 btrfs_dev_replace_unlock(dev_replace); 667 btrfs_dev_replace_unlock(dev_replace);
677 btrfs_scrub_cancel(fs_info); 668 btrfs_scrub_cancel(fs_info);
@@ -706,7 +697,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
706 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: 697 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
707 dev_replace->replace_state = 698 dev_replace->replace_state =
708 BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; 699 BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
709 dev_replace->time_stopped = btrfs_get_seconds_since_1970(); 700 dev_replace->time_stopped = get_seconds();
710 dev_replace->item_needs_writeback = 1; 701 dev_replace->item_needs_writeback = 1;
711 pr_info("btrfs: suspending dev_replace for unmount\n"); 702 pr_info("btrfs: suspending dev_replace for unmount\n");
712 break; 703 break;
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 79e594e341c7..c031ea3fd70f 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -58,7 +58,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
58 return ERR_PTR(ret); 58 return ERR_PTR(ret);
59 WARN_ON(ret > 0); 59 WARN_ON(ret > 0);
60 leaf = path->nodes[0]; 60 leaf = path->nodes[0];
61 item = btrfs_item_nr(leaf, path->slots[0]); 61 item = btrfs_item_nr(path->slots[0]);
62 ptr = btrfs_item_ptr(leaf, path->slots[0], char); 62 ptr = btrfs_item_ptr(leaf, path->slots[0], char);
63 BUG_ON(data_size > btrfs_item_size(leaf, item)); 63 BUG_ON(data_size > btrfs_item_size(leaf, item));
64 ptr += btrfs_item_size(leaf, item) - data_size; 64 ptr += btrfs_item_size(leaf, item) - data_size;
@@ -474,8 +474,10 @@ int verify_dir_item(struct btrfs_root *root,
474 } 474 }
475 475
476 /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ 476 /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
477 if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) { 477 if ((btrfs_dir_data_len(leaf, dir_item) +
478 printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n", 478 btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) {
479 printk(KERN_CRIT "btrfs: invalid dir item name + data len: %u + %u\n",
480 (unsigned)btrfs_dir_name_len(leaf, dir_item),
479 (unsigned)btrfs_dir_data_len(leaf, dir_item)); 481 (unsigned)btrfs_dir_data_len(leaf, dir_item));
480 return 1; 482 return 1;
481 } 483 }
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4cbb00af92ff..8072cfa8a3b1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -33,7 +33,6 @@
33#include <linux/uuid.h> 33#include <linux/uuid.h>
34#include <linux/semaphore.h> 34#include <linux/semaphore.h>
35#include <asm/unaligned.h> 35#include <asm/unaligned.h>
36#include "compat.h"
37#include "ctree.h" 36#include "ctree.h"
38#include "disk-io.h" 37#include "disk-io.h"
39#include "transaction.h" 38#include "transaction.h"
@@ -64,7 +63,6 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
64static void btrfs_destroy_ordered_extents(struct btrfs_root *root); 63static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
65static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 64static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
66 struct btrfs_root *root); 65 struct btrfs_root *root);
67static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t);
68static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root); 66static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
69static int btrfs_destroy_marked_extents(struct btrfs_root *root, 67static int btrfs_destroy_marked_extents(struct btrfs_root *root,
70 struct extent_io_tree *dirty_pages, 68 struct extent_io_tree *dirty_pages,
@@ -157,6 +155,7 @@ static struct btrfs_lockdep_keyset {
157 { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" }, 155 { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
158 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, 156 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
159 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, 157 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
158 { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
160 { .id = 0, .name_stem = "tree" }, 159 { .id = 0, .name_stem = "tree" },
161}; 160};
162 161
@@ -476,14 +475,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
476 if (page != eb->pages[0]) 475 if (page != eb->pages[0])
477 return 0; 476 return 0;
478 found_start = btrfs_header_bytenr(eb); 477 found_start = btrfs_header_bytenr(eb);
479 if (found_start != start) { 478 if (WARN_ON(found_start != start || !PageUptodate(page)))
480 WARN_ON(1);
481 return 0; 479 return 0;
482 }
483 if (!PageUptodate(page)) {
484 WARN_ON(1);
485 return 0;
486 }
487 csum_tree_block(root, eb, 0); 480 csum_tree_block(root, eb, 0);
488 return 0; 481 return 0;
489} 482}
@@ -495,7 +488,7 @@ static int check_tree_block_fsid(struct btrfs_root *root,
495 u8 fsid[BTRFS_UUID_SIZE]; 488 u8 fsid[BTRFS_UUID_SIZE];
496 int ret = 1; 489 int ret = 1;
497 490
498 read_extent_buffer(eb, fsid, btrfs_header_fsid(eb), BTRFS_FSID_SIZE); 491 read_extent_buffer(eb, fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE);
499 while (fs_devices) { 492 while (fs_devices) {
500 if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { 493 if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) {
501 ret = 0; 494 ret = 0;
@@ -1104,8 +1097,7 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
1104{ 1097{
1105 struct inode *btree_inode = root->fs_info->btree_inode; 1098 struct inode *btree_inode = root->fs_info->btree_inode;
1106 struct extent_buffer *eb; 1099 struct extent_buffer *eb;
1107 eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 1100 eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr);
1108 bytenr, blocksize);
1109 return eb; 1101 return eb;
1110} 1102}
1111 1103
@@ -1228,14 +1220,18 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1228 atomic_set(&root->refs, 1); 1220 atomic_set(&root->refs, 1);
1229 root->log_transid = 0; 1221 root->log_transid = 0;
1230 root->last_log_commit = 0; 1222 root->last_log_commit = 0;
1231 extent_io_tree_init(&root->dirty_log_pages, 1223 if (fs_info)
1232 fs_info->btree_inode->i_mapping); 1224 extent_io_tree_init(&root->dirty_log_pages,
1225 fs_info->btree_inode->i_mapping);
1233 1226
1234 memset(&root->root_key, 0, sizeof(root->root_key)); 1227 memset(&root->root_key, 0, sizeof(root->root_key));
1235 memset(&root->root_item, 0, sizeof(root->root_item)); 1228 memset(&root->root_item, 0, sizeof(root->root_item));
1236 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 1229 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
1237 memset(&root->root_kobj, 0, sizeof(root->root_kobj)); 1230 memset(&root->root_kobj, 0, sizeof(root->root_kobj));
1238 root->defrag_trans_start = fs_info->generation; 1231 if (fs_info)
1232 root->defrag_trans_start = fs_info->generation;
1233 else
1234 root->defrag_trans_start = 0;
1239 init_completion(&root->kobj_unregister); 1235 init_completion(&root->kobj_unregister);
1240 root->defrag_running = 0; 1236 root->defrag_running = 0;
1241 root->root_key.objectid = objectid; 1237 root->root_key.objectid = objectid;
@@ -1252,6 +1248,22 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
1252 return root; 1248 return root;
1253} 1249}
1254 1250
1251#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
1252/* Should only be used by the testing infrastructure */
1253struct btrfs_root *btrfs_alloc_dummy_root(void)
1254{
1255 struct btrfs_root *root;
1256
1257 root = btrfs_alloc_root(NULL);
1258 if (!root)
1259 return ERR_PTR(-ENOMEM);
1260 __setup_root(4096, 4096, 4096, 4096, root, NULL, 1);
1261 root->dummy_root = 1;
1262
1263 return root;
1264}
1265#endif
1266
1255struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, 1267struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1256 struct btrfs_fs_info *fs_info, 1268 struct btrfs_fs_info *fs_info,
1257 u64 objectid) 1269 u64 objectid)
@@ -1291,7 +1303,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1291 btrfs_set_header_owner(leaf, objectid); 1303 btrfs_set_header_owner(leaf, objectid);
1292 root->node = leaf; 1304 root->node = leaf;
1293 1305
1294 write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(leaf), 1306 write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(),
1295 BTRFS_FSID_SIZE); 1307 BTRFS_FSID_SIZE);
1296 write_extent_buffer(leaf, fs_info->chunk_tree_uuid, 1308 write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
1297 btrfs_header_chunk_tree_uuid(leaf), 1309 btrfs_header_chunk_tree_uuid(leaf),
@@ -1378,7 +1390,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
1378 root->node = leaf; 1390 root->node = leaf;
1379 1391
1380 write_extent_buffer(root->node, root->fs_info->fsid, 1392 write_extent_buffer(root->node, root->fs_info->fsid,
1381 btrfs_header_fsid(root->node), BTRFS_FSID_SIZE); 1393 btrfs_header_fsid(), BTRFS_FSID_SIZE);
1382 btrfs_mark_buffer_dirty(root->node); 1394 btrfs_mark_buffer_dirty(root->node);
1383 btrfs_tree_unlock(root->node); 1395 btrfs_tree_unlock(root->node);
1384 return root; 1396 return root;
@@ -1560,8 +1572,9 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
1560 return ret; 1572 return ret;
1561} 1573}
1562 1574
1563struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, 1575struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
1564 struct btrfs_key *location) 1576 struct btrfs_key *location,
1577 bool check_ref)
1565{ 1578{
1566 struct btrfs_root *root; 1579 struct btrfs_root *root;
1567 int ret; 1580 int ret;
@@ -1585,7 +1598,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1585again: 1598again:
1586 root = btrfs_lookup_fs_root(fs_info, location->objectid); 1599 root = btrfs_lookup_fs_root(fs_info, location->objectid);
1587 if (root) { 1600 if (root) {
1588 if (btrfs_root_refs(&root->root_item) == 0) 1601 if (check_ref && btrfs_root_refs(&root->root_item) == 0)
1589 return ERR_PTR(-ENOENT); 1602 return ERR_PTR(-ENOENT);
1590 return root; 1603 return root;
1591 } 1604 }
@@ -1594,7 +1607,7 @@ again:
1594 if (IS_ERR(root)) 1607 if (IS_ERR(root))
1595 return root; 1608 return root;
1596 1609
1597 if (btrfs_root_refs(&root->root_item) == 0) { 1610 if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
1598 ret = -ENOENT; 1611 ret = -ENOENT;
1599 goto fail; 1612 goto fail;
1600 } 1613 }
@@ -1778,6 +1791,9 @@ sleep:
1778 wake_up_process(root->fs_info->cleaner_kthread); 1791 wake_up_process(root->fs_info->cleaner_kthread);
1779 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 1792 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
1780 1793
1794 if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
1795 &root->fs_info->fs_state)))
1796 btrfs_cleanup_transaction(root);
1781 if (!try_to_freeze()) { 1797 if (!try_to_freeze()) {
1782 set_current_state(TASK_INTERRUPTIBLE); 1798 set_current_state(TASK_INTERRUPTIBLE);
1783 if (!kthread_should_stop() && 1799 if (!kthread_should_stop() &&
@@ -2011,50 +2027,28 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
2011 btrfs_stop_workers(&fs_info->qgroup_rescan_workers); 2027 btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
2012} 2028}
2013 2029
2030static void free_root_extent_buffers(struct btrfs_root *root)
2031{
2032 if (root) {
2033 free_extent_buffer(root->node);
2034 free_extent_buffer(root->commit_root);
2035 root->node = NULL;
2036 root->commit_root = NULL;
2037 }
2038}
2039
2014/* helper to cleanup tree roots */ 2040/* helper to cleanup tree roots */
2015static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) 2041static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
2016{ 2042{
2017 free_extent_buffer(info->tree_root->node); 2043 free_root_extent_buffers(info->tree_root);
2018 free_extent_buffer(info->tree_root->commit_root); 2044
2019 info->tree_root->node = NULL; 2045 free_root_extent_buffers(info->dev_root);
2020 info->tree_root->commit_root = NULL; 2046 free_root_extent_buffers(info->extent_root);
2021 2047 free_root_extent_buffers(info->csum_root);
2022 if (info->dev_root) { 2048 free_root_extent_buffers(info->quota_root);
2023 free_extent_buffer(info->dev_root->node); 2049 free_root_extent_buffers(info->uuid_root);
2024 free_extent_buffer(info->dev_root->commit_root); 2050 if (chunk_root)
2025 info->dev_root->node = NULL; 2051 free_root_extent_buffers(info->chunk_root);
2026 info->dev_root->commit_root = NULL;
2027 }
2028 if (info->extent_root) {
2029 free_extent_buffer(info->extent_root->node);
2030 free_extent_buffer(info->extent_root->commit_root);
2031 info->extent_root->node = NULL;
2032 info->extent_root->commit_root = NULL;
2033 }
2034 if (info->csum_root) {
2035 free_extent_buffer(info->csum_root->node);
2036 free_extent_buffer(info->csum_root->commit_root);
2037 info->csum_root->node = NULL;
2038 info->csum_root->commit_root = NULL;
2039 }
2040 if (info->quota_root) {
2041 free_extent_buffer(info->quota_root->node);
2042 free_extent_buffer(info->quota_root->commit_root);
2043 info->quota_root->node = NULL;
2044 info->quota_root->commit_root = NULL;
2045 }
2046 if (info->uuid_root) {
2047 free_extent_buffer(info->uuid_root->node);
2048 free_extent_buffer(info->uuid_root->commit_root);
2049 info->uuid_root->node = NULL;
2050 info->uuid_root->commit_root = NULL;
2051 }
2052 if (chunk_root) {
2053 free_extent_buffer(info->chunk_root->node);
2054 free_extent_buffer(info->chunk_root->commit_root);
2055 info->chunk_root->node = NULL;
2056 info->chunk_root->commit_root = NULL;
2057 }
2058} 2052}
2059 2053
2060static void del_fs_roots(struct btrfs_fs_info *fs_info) 2054static void del_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2228,7 +2222,6 @@ int open_ctree(struct super_block *sb,
2228 atomic_set(&fs_info->scrubs_paused, 0); 2222 atomic_set(&fs_info->scrubs_paused, 0);
2229 atomic_set(&fs_info->scrub_cancel_req, 0); 2223 atomic_set(&fs_info->scrub_cancel_req, 0);
2230 init_waitqueue_head(&fs_info->scrub_pause_wait); 2224 init_waitqueue_head(&fs_info->scrub_pause_wait);
2231 init_rwsem(&fs_info->scrub_super_lock);
2232 fs_info->scrub_workers_refcnt = 0; 2225 fs_info->scrub_workers_refcnt = 0;
2233#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 2226#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
2234 fs_info->check_integrity_print_mask = 0; 2227 fs_info->check_integrity_print_mask = 0;
@@ -2270,7 +2263,7 @@ int open_ctree(struct super_block *sb,
2270 sizeof(struct btrfs_key)); 2263 sizeof(struct btrfs_key));
2271 set_bit(BTRFS_INODE_DUMMY, 2264 set_bit(BTRFS_INODE_DUMMY,
2272 &BTRFS_I(fs_info->btree_inode)->runtime_flags); 2265 &BTRFS_I(fs_info->btree_inode)->runtime_flags);
2273 insert_inode_hash(fs_info->btree_inode); 2266 btrfs_insert_inode_hash(fs_info->btree_inode);
2274 2267
2275 spin_lock_init(&fs_info->block_group_cache_lock); 2268 spin_lock_init(&fs_info->block_group_cache_lock);
2276 fs_info->block_group_cache_tree = RB_ROOT; 2269 fs_info->block_group_cache_tree = RB_ROOT;
@@ -2668,6 +2661,7 @@ retry_root_backup:
2668 2661
2669 btrfs_set_root_node(&tree_root->root_item, tree_root->node); 2662 btrfs_set_root_node(&tree_root->root_item, tree_root->node);
2670 tree_root->commit_root = btrfs_root_node(tree_root); 2663 tree_root->commit_root = btrfs_root_node(tree_root);
2664 btrfs_set_root_refs(&tree_root->root_item, 1);
2671 2665
2672 location.objectid = BTRFS_EXTENT_TREE_OBJECTID; 2666 location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
2673 location.type = BTRFS_ROOT_ITEM_KEY; 2667 location.type = BTRFS_ROOT_ITEM_KEY;
@@ -3415,6 +3409,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3415 if (total_errors > max_errors) { 3409 if (total_errors > max_errors) {
3416 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 3410 printk(KERN_ERR "btrfs: %d errors while writing supers\n",
3417 total_errors); 3411 total_errors);
3412 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
3418 3413
3419 /* FUA is masked off if unsupported and can't be the reason */ 3414 /* FUA is masked off if unsupported and can't be the reason */
3420 btrfs_error(root->fs_info, -EIO, 3415 btrfs_error(root->fs_info, -EIO,
@@ -3445,10 +3440,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3445int write_ctree_super(struct btrfs_trans_handle *trans, 3440int write_ctree_super(struct btrfs_trans_handle *trans,
3446 struct btrfs_root *root, int max_mirrors) 3441 struct btrfs_root *root, int max_mirrors)
3447{ 3442{
3448 int ret; 3443 return write_all_supers(root, max_mirrors);
3449
3450 ret = write_all_supers(root, max_mirrors);
3451 return ret;
3452} 3444}
3453 3445
3454/* Drop a fs root from the radix tree and free it. */ 3446/* Drop a fs root from the radix tree and free it. */
@@ -3525,7 +3517,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
3525int btrfs_commit_super(struct btrfs_root *root) 3517int btrfs_commit_super(struct btrfs_root *root)
3526{ 3518{
3527 struct btrfs_trans_handle *trans; 3519 struct btrfs_trans_handle *trans;
3528 int ret;
3529 3520
3530 mutex_lock(&root->fs_info->cleaner_mutex); 3521 mutex_lock(&root->fs_info->cleaner_mutex);
3531 btrfs_run_delayed_iputs(root); 3522 btrfs_run_delayed_iputs(root);
@@ -3539,25 +3530,7 @@ int btrfs_commit_super(struct btrfs_root *root)
3539 trans = btrfs_join_transaction(root); 3530 trans = btrfs_join_transaction(root);
3540 if (IS_ERR(trans)) 3531 if (IS_ERR(trans))
3541 return PTR_ERR(trans); 3532 return PTR_ERR(trans);
3542 ret = btrfs_commit_transaction(trans, root); 3533 return btrfs_commit_transaction(trans, root);
3543 if (ret)
3544 return ret;
3545 /* run commit again to drop the original snapshot */
3546 trans = btrfs_join_transaction(root);
3547 if (IS_ERR(trans))
3548 return PTR_ERR(trans);
3549 ret = btrfs_commit_transaction(trans, root);
3550 if (ret)
3551 return ret;
3552 ret = btrfs_write_and_wait_transaction(NULL, root);
3553 if (ret) {
3554 btrfs_error(root->fs_info, ret,
3555 "Failed to sync btree inode to disk.");
3556 return ret;
3557 }
3558
3559 ret = write_ctree_super(NULL, root, 0);
3560 return ret;
3561} 3534}
3562 3535
3563int close_ctree(struct btrfs_root *root) 3536int close_ctree(struct btrfs_root *root)
@@ -3611,12 +3584,12 @@ int close_ctree(struct btrfs_root *root)
3611 percpu_counter_sum(&fs_info->delalloc_bytes)); 3584 percpu_counter_sum(&fs_info->delalloc_bytes));
3612 } 3585 }
3613 3586
3587 del_fs_roots(fs_info);
3588
3614 btrfs_free_block_groups(fs_info); 3589 btrfs_free_block_groups(fs_info);
3615 3590
3616 btrfs_stop_all_workers(fs_info); 3591 btrfs_stop_all_workers(fs_info);
3617 3592
3618 del_fs_roots(fs_info);
3619
3620 free_root_pointers(fs_info, 1); 3593 free_root_pointers(fs_info, 1);
3621 3594
3622 iput(fs_info->btree_inode); 3595 iput(fs_info->btree_inode);
@@ -3666,10 +3639,20 @@ int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
3666 3639
3667void btrfs_mark_buffer_dirty(struct extent_buffer *buf) 3640void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3668{ 3641{
3669 struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; 3642 struct btrfs_root *root;
3670 u64 transid = btrfs_header_generation(buf); 3643 u64 transid = btrfs_header_generation(buf);
3671 int was_dirty; 3644 int was_dirty;
3672 3645
3646#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
3647 /*
3648 * This is a fast path so only do this check if we have sanity tests
3649 * enabled. Normal people shouldn't be marking dummy buffers as dirty
3650 * outside of the sanity tests.
3651 */
3652 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &buf->bflags)))
3653 return;
3654#endif
3655 root = BTRFS_I(buf->pages[0]->mapping->host)->root;
3673 btrfs_assert_tree_locked(buf); 3656 btrfs_assert_tree_locked(buf);
3674 if (transid != root->fs_info->generation) 3657 if (transid != root->fs_info->generation)
3675 WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " 3658 WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "
@@ -3799,7 +3782,8 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
3799 while (!list_empty(&splice)) { 3782 while (!list_empty(&splice)) {
3800 root = list_first_entry(&splice, struct btrfs_root, 3783 root = list_first_entry(&splice, struct btrfs_root,
3801 ordered_root); 3784 ordered_root);
3802 list_del_init(&root->ordered_root); 3785 list_move_tail(&root->ordered_root,
3786 &fs_info->ordered_roots);
3803 3787
3804 btrfs_destroy_ordered_extents(root); 3788 btrfs_destroy_ordered_extents(root);
3805 3789
@@ -3877,24 +3861,6 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3877 return ret; 3861 return ret;
3878} 3862}
3879 3863
3880static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t)
3881{
3882 struct btrfs_pending_snapshot *snapshot;
3883 struct list_head splice;
3884
3885 INIT_LIST_HEAD(&splice);
3886
3887 list_splice_init(&t->pending_snapshots, &splice);
3888
3889 while (!list_empty(&splice)) {
3890 snapshot = list_entry(splice.next,
3891 struct btrfs_pending_snapshot,
3892 list);
3893 snapshot->error = -ECANCELED;
3894 list_del_init(&snapshot->list);
3895 }
3896}
3897
3898static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) 3864static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
3899{ 3865{
3900 struct btrfs_inode *btrfs_inode; 3866 struct btrfs_inode *btrfs_inode;
@@ -4024,15 +3990,13 @@ again:
4024void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, 3990void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
4025 struct btrfs_root *root) 3991 struct btrfs_root *root)
4026{ 3992{
3993 btrfs_destroy_ordered_operations(cur_trans, root);
3994
4027 btrfs_destroy_delayed_refs(cur_trans, root); 3995 btrfs_destroy_delayed_refs(cur_trans, root);
4028 btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
4029 cur_trans->dirty_pages.dirty_bytes);
4030 3996
4031 cur_trans->state = TRANS_STATE_COMMIT_START; 3997 cur_trans->state = TRANS_STATE_COMMIT_START;
4032 wake_up(&root->fs_info->transaction_blocked_wait); 3998 wake_up(&root->fs_info->transaction_blocked_wait);
4033 3999
4034 btrfs_evict_pending_snapshots(cur_trans);
4035
4036 cur_trans->state = TRANS_STATE_UNBLOCKED; 4000 cur_trans->state = TRANS_STATE_UNBLOCKED;
4037 wake_up(&root->fs_info->transaction_wait); 4001 wake_up(&root->fs_info->transaction_wait);
4038 4002
@@ -4056,63 +4020,51 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
4056static int btrfs_cleanup_transaction(struct btrfs_root *root) 4020static int btrfs_cleanup_transaction(struct btrfs_root *root)
4057{ 4021{
4058 struct btrfs_transaction *t; 4022 struct btrfs_transaction *t;
4059 LIST_HEAD(list);
4060 4023
4061 mutex_lock(&root->fs_info->transaction_kthread_mutex); 4024 mutex_lock(&root->fs_info->transaction_kthread_mutex);
4062 4025
4063 spin_lock(&root->fs_info->trans_lock); 4026 spin_lock(&root->fs_info->trans_lock);
4064 list_splice_init(&root->fs_info->trans_list, &list); 4027 while (!list_empty(&root->fs_info->trans_list)) {
4065 root->fs_info->running_transaction = NULL; 4028 t = list_first_entry(&root->fs_info->trans_list,
4066 spin_unlock(&root->fs_info->trans_lock); 4029 struct btrfs_transaction, list);
4067 4030 if (t->state >= TRANS_STATE_COMMIT_START) {
4068 while (!list_empty(&list)) { 4031 atomic_inc(&t->use_count);
4069 t = list_entry(list.next, struct btrfs_transaction, list); 4032 spin_unlock(&root->fs_info->trans_lock);
4070 4033 btrfs_wait_for_commit(root, t->transid);
4071 btrfs_destroy_ordered_operations(t, root); 4034 btrfs_put_transaction(t);
4072 4035 spin_lock(&root->fs_info->trans_lock);
4073 btrfs_destroy_all_ordered_extents(root->fs_info); 4036 continue;
4074 4037 }
4075 btrfs_destroy_delayed_refs(t, root); 4038 if (t == root->fs_info->running_transaction) {
4076 4039 t->state = TRANS_STATE_COMMIT_DOING;
4077 /* 4040 spin_unlock(&root->fs_info->trans_lock);
4078 * FIXME: cleanup wait for commit 4041 /*
4079 * We needn't acquire the lock here, because we are during 4042 * We wait for 0 num_writers since we don't hold a trans
4080 * the umount, there is no other task which will change it. 4043 * handle open currently for this transaction.
4081 */ 4044 */
4082 t->state = TRANS_STATE_COMMIT_START; 4045 wait_event(t->writer_wait,
4083 smp_mb(); 4046 atomic_read(&t->num_writers) == 0);
4084 if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) 4047 } else {
4085 wake_up(&root->fs_info->transaction_blocked_wait); 4048 spin_unlock(&root->fs_info->trans_lock);
4086 4049 }
4087 btrfs_evict_pending_snapshots(t); 4050 btrfs_cleanup_one_transaction(t, root);
4088
4089 t->state = TRANS_STATE_UNBLOCKED;
4090 smp_mb();
4091 if (waitqueue_active(&root->fs_info->transaction_wait))
4092 wake_up(&root->fs_info->transaction_wait);
4093
4094 btrfs_destroy_delayed_inodes(root);
4095 btrfs_assert_delayed_root_empty(root);
4096
4097 btrfs_destroy_all_delalloc_inodes(root->fs_info);
4098
4099 btrfs_destroy_marked_extents(root, &t->dirty_pages,
4100 EXTENT_DIRTY);
4101
4102 btrfs_destroy_pinned_extent(root,
4103 root->fs_info->pinned_extents);
4104
4105 t->state = TRANS_STATE_COMPLETED;
4106 smp_mb();
4107 if (waitqueue_active(&t->commit_wait))
4108 wake_up(&t->commit_wait);
4109 4051
4110 atomic_set(&t->use_count, 0); 4052 spin_lock(&root->fs_info->trans_lock);
4053 if (t == root->fs_info->running_transaction)
4054 root->fs_info->running_transaction = NULL;
4111 list_del_init(&t->list); 4055 list_del_init(&t->list);
4112 memset(t, 0, sizeof(*t)); 4056 spin_unlock(&root->fs_info->trans_lock);
4113 kmem_cache_free(btrfs_transaction_cachep, t);
4114 }
4115 4057
4058 btrfs_put_transaction(t);
4059 trace_btrfs_transaction_commit(root);
4060 spin_lock(&root->fs_info->trans_lock);
4061 }
4062 spin_unlock(&root->fs_info->trans_lock);
4063 btrfs_destroy_all_ordered_extents(root->fs_info);
4064 btrfs_destroy_delayed_inodes(root);
4065 btrfs_assert_delayed_root_empty(root);
4066 btrfs_destroy_pinned_extent(root, root->fs_info->pinned_extents);
4067 btrfs_destroy_all_delalloc_inodes(root->fs_info);
4116 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 4068 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
4117 4069
4118 return 0; 4070 return 0;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index b71acd6e1e5b..53059df350f8 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -68,8 +68,17 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
68int btrfs_init_fs_root(struct btrfs_root *root); 68int btrfs_init_fs_root(struct btrfs_root *root);
69int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, 69int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
70 struct btrfs_root *root); 70 struct btrfs_root *root);
71struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, 71
72 struct btrfs_key *location); 72struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
73 struct btrfs_key *key,
74 bool check_ref);
75static inline struct btrfs_root *
76btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
77 struct btrfs_key *location)
78{
79 return btrfs_get_fs_root(fs_info, location, true);
80}
81
73int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); 82int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
74void btrfs_btree_balance_dirty(struct btrfs_root *root); 83void btrfs_btree_balance_dirty(struct btrfs_root *root);
75void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root); 84void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root);
@@ -77,6 +86,10 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
77 struct btrfs_root *root); 86 struct btrfs_root *root);
78void btrfs_free_fs_root(struct btrfs_root *root); 87void btrfs_free_fs_root(struct btrfs_root *root);
79 88
89#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
90struct btrfs_root *btrfs_alloc_dummy_root(void);
91#endif
92
80/* 93/*
81 * This function is used to grab the root, and avoid it is freed when we 94 * This function is used to grab the root, and avoid it is freed when we
82 * access it. But it doesn't ensure that the tree is not dropped. 95 * access it. But it doesn't ensure that the tree is not dropped.
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 4b8691607373..41422a3de8ed 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -5,7 +5,6 @@
5#include "btrfs_inode.h" 5#include "btrfs_inode.h"
6#include "print-tree.h" 6#include "print-tree.h"
7#include "export.h" 7#include "export.h"
8#include "compat.h"
9 8
10#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \ 9#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \
11 parent_objectid) / 4) 10 parent_objectid) / 4)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cfb3cf711b34..45d98d01028f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -25,7 +25,6 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/ratelimit.h> 26#include <linux/ratelimit.h>
27#include <linux/percpu_counter.h> 27#include <linux/percpu_counter.h>
28#include "compat.h"
29#include "hash.h" 28#include "hash.h"
30#include "ctree.h" 29#include "ctree.h"
31#include "disk-io.h" 30#include "disk-io.h"
@@ -1551,9 +1550,8 @@ again:
1551 if (ret && !insert) { 1550 if (ret && !insert) {
1552 err = -ENOENT; 1551 err = -ENOENT;
1553 goto out; 1552 goto out;
1554 } else if (ret) { 1553 } else if (WARN_ON(ret)) {
1555 err = -EIO; 1554 err = -EIO;
1556 WARN_ON(1);
1557 goto out; 1555 goto out;
1558 } 1556 }
1559 1557
@@ -1979,7 +1977,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1979 struct btrfs_extent_item *item; 1977 struct btrfs_extent_item *item;
1980 u64 refs; 1978 u64 refs;
1981 int ret; 1979 int ret;
1982 int err = 0;
1983 1980
1984 path = btrfs_alloc_path(); 1981 path = btrfs_alloc_path();
1985 if (!path) 1982 if (!path)
@@ -1992,13 +1989,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1992 path, bytenr, num_bytes, parent, 1989 path, bytenr, num_bytes, parent,
1993 root_objectid, owner, offset, 1990 root_objectid, owner, offset,
1994 refs_to_add, extent_op); 1991 refs_to_add, extent_op);
1995 if (ret == 0) 1992 if (ret != -EAGAIN)
1996 goto out;
1997
1998 if (ret != -EAGAIN) {
1999 err = ret;
2000 goto out; 1993 goto out;
2001 }
2002 1994
2003 leaf = path->nodes[0]; 1995 leaf = path->nodes[0];
2004 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1996 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -2021,7 +2013,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2021 btrfs_abort_transaction(trans, root, ret); 2013 btrfs_abort_transaction(trans, root, ret);
2022out: 2014out:
2023 btrfs_free_path(path); 2015 btrfs_free_path(path);
2024 return err; 2016 return ret;
2025} 2017}
2026 2018
2027static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 2019static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
@@ -2137,15 +2129,28 @@ again:
2137 } 2129 }
2138 if (ret > 0) { 2130 if (ret > 0) {
2139 if (metadata) { 2131 if (metadata) {
2140 btrfs_release_path(path); 2132 if (path->slots[0] > 0) {
2141 metadata = 0; 2133 path->slots[0]--;
2134 btrfs_item_key_to_cpu(path->nodes[0], &key,
2135 path->slots[0]);
2136 if (key.objectid == node->bytenr &&
2137 key.type == BTRFS_EXTENT_ITEM_KEY &&
2138 key.offset == node->num_bytes)
2139 ret = 0;
2140 }
2141 if (ret > 0) {
2142 btrfs_release_path(path);
2143 metadata = 0;
2142 2144
2143 key.offset = node->num_bytes; 2145 key.objectid = node->bytenr;
2144 key.type = BTRFS_EXTENT_ITEM_KEY; 2146 key.offset = node->num_bytes;
2145 goto again; 2147 key.type = BTRFS_EXTENT_ITEM_KEY;
2148 goto again;
2149 }
2150 } else {
2151 err = -EIO;
2152 goto out;
2146 } 2153 }
2147 err = -EIO;
2148 goto out;
2149 } 2154 }
2150 2155
2151 leaf = path->nodes[0]; 2156 leaf = path->nodes[0];
@@ -2234,8 +2239,12 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2234{ 2239{
2235 int ret = 0; 2240 int ret = 0;
2236 2241
2237 if (trans->aborted) 2242 if (trans->aborted) {
2243 if (insert_reserved)
2244 btrfs_pin_extent(root, node->bytenr,
2245 node->num_bytes, 1);
2238 return 0; 2246 return 0;
2247 }
2239 2248
2240 if (btrfs_delayed_ref_is_head(node)) { 2249 if (btrfs_delayed_ref_is_head(node)) {
2241 struct btrfs_delayed_ref_head *head; 2250 struct btrfs_delayed_ref_head *head;
@@ -2411,6 +2420,14 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2411 btrfs_free_delayed_extent_op(extent_op); 2420 btrfs_free_delayed_extent_op(extent_op);
2412 2421
2413 if (ret) { 2422 if (ret) {
2423 /*
2424 * Need to reset must_insert_reserved if
2425 * there was an error so the abort stuff
2426 * can cleanup the reserved space
2427 * properly.
2428 */
2429 if (must_insert_reserved)
2430 locked_ref->must_insert_reserved = 1;
2414 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); 2431 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2415 spin_lock(&delayed_refs->lock); 2432 spin_lock(&delayed_refs->lock);
2416 btrfs_delayed_ref_unlock(locked_ref); 2433 btrfs_delayed_ref_unlock(locked_ref);
@@ -3197,8 +3214,7 @@ again:
3197 if (ret) 3214 if (ret)
3198 goto out_put; 3215 goto out_put;
3199 3216
3200 ret = btrfs_truncate_free_space_cache(root, trans, path, 3217 ret = btrfs_truncate_free_space_cache(root, trans, inode);
3201 inode);
3202 if (ret) 3218 if (ret)
3203 goto out_put; 3219 goto out_put;
3204 } 3220 }
@@ -3318,10 +3334,9 @@ again:
3318 last = cache->key.objectid + cache->key.offset; 3334 last = cache->key.objectid + cache->key.offset;
3319 3335
3320 err = write_one_cache_group(trans, root, path, cache); 3336 err = write_one_cache_group(trans, root, path, cache);
3337 btrfs_put_block_group(cache);
3321 if (err) /* File system offline */ 3338 if (err) /* File system offline */
3322 goto out; 3339 goto out;
3323
3324 btrfs_put_block_group(cache);
3325 } 3340 }
3326 3341
3327 while (1) { 3342 while (1) {
@@ -3605,10 +3620,9 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3605 /* make sure bytes are sectorsize aligned */ 3620 /* make sure bytes are sectorsize aligned */
3606 bytes = ALIGN(bytes, root->sectorsize); 3621 bytes = ALIGN(bytes, root->sectorsize);
3607 3622
3608 if (root == root->fs_info->tree_root || 3623 if (btrfs_is_free_space_inode(inode)) {
3609 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
3610 alloc_chunk = 0;
3611 committed = 1; 3624 committed = 1;
3625 ASSERT(current->journal_info);
3612 } 3626 }
3613 3627
3614 data_sinfo = fs_info->data_sinfo; 3628 data_sinfo = fs_info->data_sinfo;
@@ -3636,6 +3650,16 @@ again:
3636 spin_unlock(&data_sinfo->lock); 3650 spin_unlock(&data_sinfo->lock);
3637alloc: 3651alloc:
3638 alloc_target = btrfs_get_alloc_profile(root, 1); 3652 alloc_target = btrfs_get_alloc_profile(root, 1);
3653 /*
3654 * It is ugly that we don't call nolock join
3655 * transaction for the free space inode case here.
3656 * But it is safe because we only do the data space
3657 * reservation for the free space cache in the
3658 * transaction context, the common join transaction
3659 * just increase the counter of the current transaction
3660 * handler, doesn't try to acquire the trans_lock of
3661 * the fs.
3662 */
3639 trans = btrfs_join_transaction(root); 3663 trans = btrfs_join_transaction(root);
3640 if (IS_ERR(trans)) 3664 if (IS_ERR(trans))
3641 return PTR_ERR(trans); 3665 return PTR_ERR(trans);
@@ -3681,6 +3705,9 @@ commit_trans:
3681 goto again; 3705 goto again;
3682 } 3706 }
3683 3707
3708 trace_btrfs_space_reservation(root->fs_info,
3709 "space_info:enospc",
3710 data_sinfo->flags, bytes, 1);
3684 return -ENOSPC; 3711 return -ENOSPC;
3685 } 3712 }
3686 data_sinfo->bytes_may_use += bytes; 3713 data_sinfo->bytes_may_use += bytes;
@@ -3925,7 +3952,6 @@ static int can_overcommit(struct btrfs_root *root,
3925 u64 space_size; 3952 u64 space_size;
3926 u64 avail; 3953 u64 avail;
3927 u64 used; 3954 u64 used;
3928 u64 to_add;
3929 3955
3930 used = space_info->bytes_used + space_info->bytes_reserved + 3956 used = space_info->bytes_used + space_info->bytes_reserved +
3931 space_info->bytes_pinned + space_info->bytes_readonly; 3957 space_info->bytes_pinned + space_info->bytes_readonly;
@@ -3959,25 +3985,17 @@ static int can_overcommit(struct btrfs_root *root,
3959 BTRFS_BLOCK_GROUP_RAID10)) 3985 BTRFS_BLOCK_GROUP_RAID10))
3960 avail >>= 1; 3986 avail >>= 1;
3961 3987
3962 to_add = space_info->total_bytes;
3963
3964 /* 3988 /*
3965 * If we aren't flushing all things, let us overcommit up to 3989 * If we aren't flushing all things, let us overcommit up to
3966 * 1/2th of the space. If we can flush, don't let us overcommit 3990 * 1/2th of the space. If we can flush, don't let us overcommit
3967 * too much, let it overcommit up to 1/8 of the space. 3991 * too much, let it overcommit up to 1/8 of the space.
3968 */ 3992 */
3969 if (flush == BTRFS_RESERVE_FLUSH_ALL) 3993 if (flush == BTRFS_RESERVE_FLUSH_ALL)
3970 to_add >>= 3; 3994 avail >>= 3;
3971 else 3995 else
3972 to_add >>= 1; 3996 avail >>= 1;
3973
3974 /*
3975 * Limit the overcommit to the amount of free space we could possibly
3976 * allocate for chunks.
3977 */
3978 to_add = min(avail, to_add);
3979 3997
3980 if (used + bytes < space_info->total_bytes + to_add) 3998 if (used + bytes < space_info->total_bytes + avail)
3981 return 1; 3999 return 1;
3982 return 0; 4000 return 0;
3983} 4001}
@@ -3998,12 +4016,26 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
3998 * the filesystem is readonly(all dirty pages are written to 4016 * the filesystem is readonly(all dirty pages are written to
3999 * the disk). 4017 * the disk).
4000 */ 4018 */
4001 btrfs_start_all_delalloc_inodes(root->fs_info, 0); 4019 btrfs_start_delalloc_roots(root->fs_info, 0);
4002 if (!current->journal_info) 4020 if (!current->journal_info)
4003 btrfs_wait_all_ordered_extents(root->fs_info, 0); 4021 btrfs_wait_ordered_roots(root->fs_info, -1);
4004 } 4022 }
4005} 4023}
4006 4024
4025static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
4026{
4027 u64 bytes;
4028 int nr;
4029
4030 bytes = btrfs_calc_trans_metadata_size(root, 1);
4031 nr = (int)div64_u64(to_reclaim, bytes);
4032 if (!nr)
4033 nr = 1;
4034 return nr;
4035}
4036
4037#define EXTENT_SIZE_PER_ITEM (256 * 1024)
4038
4007/* 4039/*
4008 * shrink metadata reservation for delalloc 4040 * shrink metadata reservation for delalloc
4009 */ 4041 */
@@ -4016,24 +4048,30 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
4016 u64 delalloc_bytes; 4048 u64 delalloc_bytes;
4017 u64 max_reclaim; 4049 u64 max_reclaim;
4018 long time_left; 4050 long time_left;
4019 unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 4051 unsigned long nr_pages;
4020 int loops = 0; 4052 int loops;
4053 int items;
4021 enum btrfs_reserve_flush_enum flush; 4054 enum btrfs_reserve_flush_enum flush;
4022 4055
4056 /* Calc the number of the pages we need flush for space reservation */
4057 items = calc_reclaim_items_nr(root, to_reclaim);
4058 to_reclaim = items * EXTENT_SIZE_PER_ITEM;
4059
4023 trans = (struct btrfs_trans_handle *)current->journal_info; 4060 trans = (struct btrfs_trans_handle *)current->journal_info;
4024 block_rsv = &root->fs_info->delalloc_block_rsv; 4061 block_rsv = &root->fs_info->delalloc_block_rsv;
4025 space_info = block_rsv->space_info; 4062 space_info = block_rsv->space_info;
4026 4063
4027 smp_mb();
4028 delalloc_bytes = percpu_counter_sum_positive( 4064 delalloc_bytes = percpu_counter_sum_positive(
4029 &root->fs_info->delalloc_bytes); 4065 &root->fs_info->delalloc_bytes);
4030 if (delalloc_bytes == 0) { 4066 if (delalloc_bytes == 0) {
4031 if (trans) 4067 if (trans)
4032 return; 4068 return;
4033 btrfs_wait_all_ordered_extents(root->fs_info, 0); 4069 if (wait_ordered)
4070 btrfs_wait_ordered_roots(root->fs_info, items);
4034 return; 4071 return;
4035 } 4072 }
4036 4073
4074 loops = 0;
4037 while (delalloc_bytes && loops < 3) { 4075 while (delalloc_bytes && loops < 3) {
4038 max_reclaim = min(delalloc_bytes, to_reclaim); 4076 max_reclaim = min(delalloc_bytes, to_reclaim);
4039 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; 4077 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
@@ -4042,9 +4080,19 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
4042 * We need to wait for the async pages to actually start before 4080 * We need to wait for the async pages to actually start before
4043 * we do anything. 4081 * we do anything.
4044 */ 4082 */
4045 wait_event(root->fs_info->async_submit_wait, 4083 max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages);
4046 !atomic_read(&root->fs_info->async_delalloc_pages)); 4084 if (!max_reclaim)
4085 goto skip_async;
4047 4086
4087 if (max_reclaim <= nr_pages)
4088 max_reclaim = 0;
4089 else
4090 max_reclaim -= nr_pages;
4091
4092 wait_event(root->fs_info->async_submit_wait,
4093 atomic_read(&root->fs_info->async_delalloc_pages) <=
4094 (int)max_reclaim);
4095skip_async:
4048 if (!trans) 4096 if (!trans)
4049 flush = BTRFS_RESERVE_FLUSH_ALL; 4097 flush = BTRFS_RESERVE_FLUSH_ALL;
4050 else 4098 else
@@ -4058,13 +4106,12 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
4058 4106
4059 loops++; 4107 loops++;
4060 if (wait_ordered && !trans) { 4108 if (wait_ordered && !trans) {
4061 btrfs_wait_all_ordered_extents(root->fs_info, 0); 4109 btrfs_wait_ordered_roots(root->fs_info, items);
4062 } else { 4110 } else {
4063 time_left = schedule_timeout_killable(1); 4111 time_left = schedule_timeout_killable(1);
4064 if (time_left) 4112 if (time_left)
4065 break; 4113 break;
4066 } 4114 }
4067 smp_mb();
4068 delalloc_bytes = percpu_counter_sum_positive( 4115 delalloc_bytes = percpu_counter_sum_positive(
4069 &root->fs_info->delalloc_bytes); 4116 &root->fs_info->delalloc_bytes);
4070 } 4117 }
@@ -4149,16 +4196,11 @@ static int flush_space(struct btrfs_root *root,
4149 switch (state) { 4196 switch (state) {
4150 case FLUSH_DELAYED_ITEMS_NR: 4197 case FLUSH_DELAYED_ITEMS_NR:
4151 case FLUSH_DELAYED_ITEMS: 4198 case FLUSH_DELAYED_ITEMS:
4152 if (state == FLUSH_DELAYED_ITEMS_NR) { 4199 if (state == FLUSH_DELAYED_ITEMS_NR)
4153 u64 bytes = btrfs_calc_trans_metadata_size(root, 1); 4200 nr = calc_reclaim_items_nr(root, num_bytes) * 2;
4154 4201 else
4155 nr = (int)div64_u64(num_bytes, bytes);
4156 if (!nr)
4157 nr = 1;
4158 nr *= 2;
4159 } else {
4160 nr = -1; 4202 nr = -1;
4161 } 4203
4162 trans = btrfs_join_transaction(root); 4204 trans = btrfs_join_transaction(root);
4163 if (IS_ERR(trans)) { 4205 if (IS_ERR(trans)) {
4164 ret = PTR_ERR(trans); 4206 ret = PTR_ERR(trans);
@@ -4341,6 +4383,10 @@ out:
4341 !block_rsv_use_bytes(global_rsv, orig_bytes)) 4383 !block_rsv_use_bytes(global_rsv, orig_bytes))
4342 ret = 0; 4384 ret = 0;
4343 } 4385 }
4386 if (ret == -ENOSPC)
4387 trace_btrfs_space_reservation(root->fs_info,
4388 "space_info:enospc",
4389 space_info->flags, orig_bytes, 1);
4344 if (flushing) { 4390 if (flushing) {
4345 spin_lock(&space_info->lock); 4391 spin_lock(&space_info->lock);
4346 space_info->flush = 0; 4392 space_info->flush = 0;
@@ -4465,7 +4511,6 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
4465 space_info->bytes_may_use -= num_bytes; 4511 space_info->bytes_may_use -= num_bytes;
4466 trace_btrfs_space_reservation(fs_info, "space_info", 4512 trace_btrfs_space_reservation(fs_info, "space_info",
4467 space_info->flags, num_bytes, 0); 4513 space_info->flags, num_bytes, 0);
4468 space_info->reservation_progress++;
4469 spin_unlock(&space_info->lock); 4514 spin_unlock(&space_info->lock);
4470 } 4515 }
4471 } 4516 }
@@ -4666,7 +4711,6 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
4666 sinfo->bytes_may_use -= num_bytes; 4711 sinfo->bytes_may_use -= num_bytes;
4667 trace_btrfs_space_reservation(fs_info, "space_info", 4712 trace_btrfs_space_reservation(fs_info, "space_info",
4668 sinfo->flags, num_bytes, 0); 4713 sinfo->flags, num_bytes, 0);
4669 sinfo->reservation_progress++;
4670 block_rsv->reserved = block_rsv->size; 4714 block_rsv->reserved = block_rsv->size;
4671 block_rsv->full = 1; 4715 block_rsv->full = 1;
4672 } 4716 }
@@ -4997,7 +5041,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4997 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); 5041 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
4998 5042
4999 if (to_reserve) 5043 if (to_reserve)
5000 trace_btrfs_space_reservation(root->fs_info,"delalloc", 5044 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5001 btrfs_ino(inode), to_reserve, 1); 5045 btrfs_ino(inode), to_reserve, 1);
5002 block_rsv_add_bytes(block_rsv, to_reserve, 1); 5046 block_rsv_add_bytes(block_rsv, to_reserve, 1);
5003 5047
@@ -5275,6 +5319,8 @@ static int pin_down_extent(struct btrfs_root *root,
5275 5319
5276 set_extent_dirty(root->fs_info->pinned_extents, bytenr, 5320 set_extent_dirty(root->fs_info->pinned_extents, bytenr,
5277 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); 5321 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
5322 if (reserved)
5323 trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
5278 return 0; 5324 return 0;
5279} 5325}
5280 5326
@@ -5446,7 +5492,6 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
5446 space_info->bytes_readonly += num_bytes; 5492 space_info->bytes_readonly += num_bytes;
5447 cache->reserved -= num_bytes; 5493 cache->reserved -= num_bytes;
5448 space_info->bytes_reserved -= num_bytes; 5494 space_info->bytes_reserved -= num_bytes;
5449 space_info->reservation_progress++;
5450 } 5495 }
5451 spin_unlock(&cache->lock); 5496 spin_unlock(&cache->lock);
5452 spin_unlock(&space_info->lock); 5497 spin_unlock(&space_info->lock);
@@ -5730,9 +5775,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5730 } 5775 }
5731 extent_slot = path->slots[0]; 5776 extent_slot = path->slots[0];
5732 } 5777 }
5733 } else if (ret == -ENOENT) { 5778 } else if (WARN_ON(ret == -ENOENT)) {
5734 btrfs_print_leaf(extent_root, path->nodes[0]); 5779 btrfs_print_leaf(extent_root, path->nodes[0]);
5735 WARN_ON(1);
5736 btrfs_err(info, 5780 btrfs_err(info,
5737 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", 5781 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
5738 bytenr, parent, root_objectid, owner_objectid, 5782 bytenr, parent, root_objectid, owner_objectid,
@@ -5979,6 +6023,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
5979 6023
5980 btrfs_add_free_space(cache, buf->start, buf->len); 6024 btrfs_add_free_space(cache, buf->start, buf->len);
5981 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); 6025 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
6026 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
5982 pin = 0; 6027 pin = 0;
5983 } 6028 }
5984out: 6029out:
@@ -6117,10 +6162,13 @@ enum btrfs_loop_type {
6117/* 6162/*
6118 * walks the btree of allocated extents and find a hole of a given size. 6163 * walks the btree of allocated extents and find a hole of a given size.
6119 * The key ins is changed to record the hole: 6164 * The key ins is changed to record the hole:
6120 * ins->objectid == block start 6165 * ins->objectid == start position
6121 * ins->flags = BTRFS_EXTENT_ITEM_KEY 6166 * ins->flags = BTRFS_EXTENT_ITEM_KEY
6122 * ins->offset == number of blocks 6167 * ins->offset == the size of the hole.
6123 * Any available blocks before search_start are skipped. 6168 * Any available blocks before search_start are skipped.
6169 *
6170 * If there is no suitable free space, we will record the max size of
6171 * the free space extent currently.
6124 */ 6172 */
6125static noinline int find_free_extent(struct btrfs_root *orig_root, 6173static noinline int find_free_extent(struct btrfs_root *orig_root,
6126 u64 num_bytes, u64 empty_size, 6174 u64 num_bytes, u64 empty_size,
@@ -6133,6 +6181,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6133 struct btrfs_block_group_cache *block_group = NULL; 6181 struct btrfs_block_group_cache *block_group = NULL;
6134 struct btrfs_block_group_cache *used_block_group; 6182 struct btrfs_block_group_cache *used_block_group;
6135 u64 search_start = 0; 6183 u64 search_start = 0;
6184 u64 max_extent_size = 0;
6136 int empty_cluster = 2 * 1024 * 1024; 6185 int empty_cluster = 2 * 1024 * 1024;
6137 struct btrfs_space_info *space_info; 6186 struct btrfs_space_info *space_info;
6138 int loop = 0; 6187 int loop = 0;
@@ -6292,7 +6341,10 @@ have_block_group:
6292 btrfs_get_block_group(used_block_group); 6341 btrfs_get_block_group(used_block_group);
6293 6342
6294 offset = btrfs_alloc_from_cluster(used_block_group, 6343 offset = btrfs_alloc_from_cluster(used_block_group,
6295 last_ptr, num_bytes, used_block_group->key.objectid); 6344 last_ptr,
6345 num_bytes,
6346 used_block_group->key.objectid,
6347 &max_extent_size);
6296 if (offset) { 6348 if (offset) {
6297 /* we have a block, we're done */ 6349 /* we have a block, we're done */
6298 spin_unlock(&last_ptr->refill_lock); 6350 spin_unlock(&last_ptr->refill_lock);
@@ -6355,8 +6407,10 @@ refill_cluster:
6355 * cluster 6407 * cluster
6356 */ 6408 */
6357 offset = btrfs_alloc_from_cluster(block_group, 6409 offset = btrfs_alloc_from_cluster(block_group,
6358 last_ptr, num_bytes, 6410 last_ptr,
6359 search_start); 6411 num_bytes,
6412 search_start,
6413 &max_extent_size);
6360 if (offset) { 6414 if (offset) {
6361 /* we found one, proceed */ 6415 /* we found one, proceed */
6362 spin_unlock(&last_ptr->refill_lock); 6416 spin_unlock(&last_ptr->refill_lock);
@@ -6391,13 +6445,18 @@ unclustered_alloc:
6391 if (cached && 6445 if (cached &&
6392 block_group->free_space_ctl->free_space < 6446 block_group->free_space_ctl->free_space <
6393 num_bytes + empty_cluster + empty_size) { 6447 num_bytes + empty_cluster + empty_size) {
6448 if (block_group->free_space_ctl->free_space >
6449 max_extent_size)
6450 max_extent_size =
6451 block_group->free_space_ctl->free_space;
6394 spin_unlock(&block_group->free_space_ctl->tree_lock); 6452 spin_unlock(&block_group->free_space_ctl->tree_lock);
6395 goto loop; 6453 goto loop;
6396 } 6454 }
6397 spin_unlock(&block_group->free_space_ctl->tree_lock); 6455 spin_unlock(&block_group->free_space_ctl->tree_lock);
6398 6456
6399 offset = btrfs_find_space_for_alloc(block_group, search_start, 6457 offset = btrfs_find_space_for_alloc(block_group, search_start,
6400 num_bytes, empty_size); 6458 num_bytes, empty_size,
6459 &max_extent_size);
6401 /* 6460 /*
6402 * If we didn't find a chunk, and we haven't failed on this 6461 * If we didn't find a chunk, and we haven't failed on this
6403 * block group before, and this block group is in the middle of 6462 * block group before, and this block group is in the middle of
@@ -6515,7 +6574,8 @@ loop:
6515 ret = 0; 6574 ret = 0;
6516 } 6575 }
6517out: 6576out:
6518 6577 if (ret == -ENOSPC)
6578 ins->offset = max_extent_size;
6519 return ret; 6579 return ret;
6520} 6580}
6521 6581
@@ -6573,8 +6633,8 @@ again:
6573 flags); 6633 flags);
6574 6634
6575 if (ret == -ENOSPC) { 6635 if (ret == -ENOSPC) {
6576 if (!final_tried) { 6636 if (!final_tried && ins->offset) {
6577 num_bytes = num_bytes >> 1; 6637 num_bytes = min(num_bytes >> 1, ins->offset);
6578 num_bytes = round_down(num_bytes, root->sectorsize); 6638 num_bytes = round_down(num_bytes, root->sectorsize);
6579 num_bytes = max(num_bytes, min_alloc_size); 6639 num_bytes = max(num_bytes, min_alloc_size);
6580 if (num_bytes == min_alloc_size) 6640 if (num_bytes == min_alloc_size)
@@ -6591,8 +6651,6 @@ again:
6591 } 6651 }
6592 } 6652 }
6593 6653
6594 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
6595
6596 return ret; 6654 return ret;
6597} 6655}
6598 6656
@@ -6704,6 +6762,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
6704 ins->objectid, ins->offset); 6762 ins->objectid, ins->offset);
6705 BUG(); 6763 BUG();
6706 } 6764 }
6765 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
6707 return ret; 6766 return ret;
6708} 6767}
6709 6768
@@ -6728,13 +6787,18 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6728 size += sizeof(*block_info); 6787 size += sizeof(*block_info);
6729 6788
6730 path = btrfs_alloc_path(); 6789 path = btrfs_alloc_path();
6731 if (!path) 6790 if (!path) {
6791 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
6792 root->leafsize);
6732 return -ENOMEM; 6793 return -ENOMEM;
6794 }
6733 6795
6734 path->leave_spinning = 1; 6796 path->leave_spinning = 1;
6735 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 6797 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
6736 ins, size); 6798 ins, size);
6737 if (ret) { 6799 if (ret) {
6800 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
6801 root->leafsize);
6738 btrfs_free_path(path); 6802 btrfs_free_path(path);
6739 return ret; 6803 return ret;
6740 } 6804 }
@@ -6776,6 +6840,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6776 ins->objectid, ins->offset); 6840 ins->objectid, ins->offset);
6777 BUG(); 6841 BUG();
6778 } 6842 }
6843
6844 trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->leafsize);
6779 return ret; 6845 return ret;
6780} 6846}
6781 6847
@@ -7980,7 +8046,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
7980 8046
7981 spin_lock(&sinfo->lock); 8047 spin_lock(&sinfo->lock);
7982 8048
7983 for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) 8049 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
7984 if (!list_empty(&sinfo->block_groups[i])) 8050 if (!list_empty(&sinfo->block_groups[i]))
7985 free_bytes += __btrfs_get_ro_block_group_free_space( 8051 free_bytes += __btrfs_get_ro_block_group_free_space(
7986 &sinfo->block_groups[i]); 8052 &sinfo->block_groups[i]);
@@ -8268,15 +8334,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8268 8334
8269 release_global_block_rsv(info); 8335 release_global_block_rsv(info);
8270 8336
8271 while(!list_empty(&info->space_info)) { 8337 while (!list_empty(&info->space_info)) {
8272 space_info = list_entry(info->space_info.next, 8338 space_info = list_entry(info->space_info.next,
8273 struct btrfs_space_info, 8339 struct btrfs_space_info,
8274 list); 8340 list);
8275 if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) { 8341 if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
8276 if (space_info->bytes_pinned > 0 || 8342 if (WARN_ON(space_info->bytes_pinned > 0 ||
8277 space_info->bytes_reserved > 0 || 8343 space_info->bytes_reserved > 0 ||
8278 space_info->bytes_may_use > 0) { 8344 space_info->bytes_may_use > 0)) {
8279 WARN_ON(1);
8280 dump_space_info(space_info, 0, 0); 8345 dump_space_info(space_info, 0, 0);
8281 } 8346 }
8282 } 8347 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 09582b81640c..8e457fca0a0b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -13,13 +13,13 @@
13#include <linux/cleancache.h> 13#include <linux/cleancache.h>
14#include "extent_io.h" 14#include "extent_io.h"
15#include "extent_map.h" 15#include "extent_map.h"
16#include "compat.h"
17#include "ctree.h" 16#include "ctree.h"
18#include "btrfs_inode.h" 17#include "btrfs_inode.h"
19#include "volumes.h" 18#include "volumes.h"
20#include "check-integrity.h" 19#include "check-integrity.h"
21#include "locking.h" 20#include "locking.h"
22#include "rcu-string.h" 21#include "rcu-string.h"
22#include "backref.h"
23 23
24static struct kmem_cache *extent_state_cache; 24static struct kmem_cache *extent_state_cache;
25static struct kmem_cache *extent_buffer_cache; 25static struct kmem_cache *extent_buffer_cache;
@@ -145,8 +145,16 @@ int __init extent_io_init(void)
145 offsetof(struct btrfs_io_bio, bio)); 145 offsetof(struct btrfs_io_bio, bio));
146 if (!btrfs_bioset) 146 if (!btrfs_bioset)
147 goto free_buffer_cache; 147 goto free_buffer_cache;
148
149 if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
150 goto free_bioset;
151
148 return 0; 152 return 0;
149 153
154free_bioset:
155 bioset_free(btrfs_bioset);
156 btrfs_bioset = NULL;
157
150free_buffer_cache: 158free_buffer_cache:
151 kmem_cache_destroy(extent_buffer_cache); 159 kmem_cache_destroy(extent_buffer_cache);
152 extent_buffer_cache = NULL; 160 extent_buffer_cache = NULL;
@@ -1481,11 +1489,11 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1481 *end = state->end; 1489 *end = state->end;
1482 cur_start = state->end + 1; 1490 cur_start = state->end + 1;
1483 node = rb_next(node); 1491 node = rb_next(node);
1484 if (!node)
1485 break;
1486 total_bytes += state->end - state->start + 1; 1492 total_bytes += state->end - state->start + 1;
1487 if (total_bytes >= max_bytes) 1493 if (total_bytes >= max_bytes)
1488 break; 1494 break;
1495 if (!node)
1496 break;
1489 } 1497 }
1490out: 1498out:
1491 spin_unlock(&tree->lock); 1499 spin_unlock(&tree->lock);
@@ -1589,11 +1597,10 @@ done:
1589 * 1597 *
1590 * 1 is returned if we find something, 0 if nothing was in the tree 1598 * 1 is returned if we find something, 0 if nothing was in the tree
1591 */ 1599 */
1592static noinline u64 find_lock_delalloc_range(struct inode *inode, 1600STATIC u64 find_lock_delalloc_range(struct inode *inode,
1593 struct extent_io_tree *tree, 1601 struct extent_io_tree *tree,
1594 struct page *locked_page, 1602 struct page *locked_page, u64 *start,
1595 u64 *start, u64 *end, 1603 u64 *end, u64 max_bytes)
1596 u64 max_bytes)
1597{ 1604{
1598 u64 delalloc_start; 1605 u64 delalloc_start;
1599 u64 delalloc_end; 1606 u64 delalloc_end;
@@ -1612,7 +1619,7 @@ again:
1612 *start = delalloc_start; 1619 *start = delalloc_start;
1613 *end = delalloc_end; 1620 *end = delalloc_end;
1614 free_extent_state(cached_state); 1621 free_extent_state(cached_state);
1615 return found; 1622 return 0;
1616 } 1623 }
1617 1624
1618 /* 1625 /*
@@ -1625,10 +1632,9 @@ again:
1625 1632
1626 /* 1633 /*
1627 * make sure to limit the number of pages we try to lock down 1634 * make sure to limit the number of pages we try to lock down
1628 * if we're looping.
1629 */ 1635 */
1630 if (delalloc_end + 1 - delalloc_start > max_bytes && loops) 1636 if (delalloc_end + 1 - delalloc_start > max_bytes)
1631 delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1; 1637 delalloc_end = delalloc_start + max_bytes - 1;
1632 1638
1633 /* step two, lock all the pages after the page that has start */ 1639 /* step two, lock all the pages after the page that has start */
1634 ret = lock_delalloc_pages(inode, locked_page, 1640 ret = lock_delalloc_pages(inode, locked_page,
@@ -1639,8 +1645,7 @@ again:
1639 */ 1645 */
1640 free_extent_state(cached_state); 1646 free_extent_state(cached_state);
1641 if (!loops) { 1647 if (!loops) {
1642 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); 1648 max_bytes = PAGE_CACHE_SIZE;
1643 max_bytes = PAGE_CACHE_SIZE - offset;
1644 loops = 1; 1649 loops = 1;
1645 goto again; 1650 goto again;
1646 } else { 1651 } else {
@@ -1734,10 +1739,8 @@ u64 count_range_bits(struct extent_io_tree *tree,
1734 u64 last = 0; 1739 u64 last = 0;
1735 int found = 0; 1740 int found = 0;
1736 1741
1737 if (search_end <= cur_start) { 1742 if (WARN_ON(search_end <= cur_start))
1738 WARN_ON(1);
1739 return 0; 1743 return 0;
1740 }
1741 1744
1742 spin_lock(&tree->lock); 1745 spin_lock(&tree->lock);
1743 if (cur_start == 0 && bits == EXTENT_DIRTY) { 1746 if (cur_start == 0 && bits == EXTENT_DIRTY) {
@@ -1977,6 +1980,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
1977 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; 1980 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
1978 int ret; 1981 int ret;
1979 1982
1983 ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
1980 BUG_ON(!mirror_num); 1984 BUG_ON(!mirror_num);
1981 1985
1982 /* we can't repair anything in raid56 yet */ 1986 /* we can't repair anything in raid56 yet */
@@ -2033,6 +2037,9 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
2033 unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); 2037 unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
2034 int ret = 0; 2038 int ret = 0;
2035 2039
2040 if (root->fs_info->sb->s_flags & MS_RDONLY)
2041 return -EROFS;
2042
2036 for (i = 0; i < num_pages; i++) { 2043 for (i = 0; i < num_pages; i++) {
2037 struct page *p = extent_buffer_page(eb, i); 2044 struct page *p = extent_buffer_page(eb, i);
2038 ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE, 2045 ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
@@ -2054,12 +2061,12 @@ static int clean_io_failure(u64 start, struct page *page)
2054 u64 private; 2061 u64 private;
2055 u64 private_failure; 2062 u64 private_failure;
2056 struct io_failure_record *failrec; 2063 struct io_failure_record *failrec;
2057 struct btrfs_fs_info *fs_info; 2064 struct inode *inode = page->mapping->host;
2065 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2058 struct extent_state *state; 2066 struct extent_state *state;
2059 int num_copies; 2067 int num_copies;
2060 int did_repair = 0; 2068 int did_repair = 0;
2061 int ret; 2069 int ret;
2062 struct inode *inode = page->mapping->host;
2063 2070
2064 private = 0; 2071 private = 0;
2065 ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, 2072 ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
@@ -2082,6 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page)
2082 did_repair = 1; 2089 did_repair = 1;
2083 goto out; 2090 goto out;
2084 } 2091 }
2092 if (fs_info->sb->s_flags & MS_RDONLY)
2093 goto out;
2085 2094
2086 spin_lock(&BTRFS_I(inode)->io_tree.lock); 2095 spin_lock(&BTRFS_I(inode)->io_tree.lock);
2087 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, 2096 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
@@ -2091,7 +2100,6 @@ static int clean_io_failure(u64 start, struct page *page)
2091 2100
2092 if (state && state->start <= failrec->start && 2101 if (state && state->start <= failrec->start &&
2093 state->end >= failrec->start + failrec->len - 1) { 2102 state->end >= failrec->start + failrec->len - 1) {
2094 fs_info = BTRFS_I(inode)->root->fs_info;
2095 num_copies = btrfs_num_copies(fs_info, failrec->logical, 2103 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2096 failrec->len); 2104 failrec->len);
2097 if (num_copies > 1) { 2105 if (num_copies > 1) {
@@ -3563,9 +3571,8 @@ retry:
3563 * but no sense in crashing the users box for something 3571 * but no sense in crashing the users box for something
3564 * we can survive anyway. 3572 * we can survive anyway.
3565 */ 3573 */
3566 if (!eb) { 3574 if (WARN_ON(!eb)) {
3567 spin_unlock(&mapping->private_lock); 3575 spin_unlock(&mapping->private_lock);
3568 WARN_ON(1);
3569 continue; 3576 continue;
3570 } 3577 }
3571 3578
@@ -4032,7 +4039,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
4032 if (offset >= last) 4039 if (offset >= last)
4033 return NULL; 4040 return NULL;
4034 4041
4035 while(1) { 4042 while (1) {
4036 len = last - offset; 4043 len = last - offset;
4037 if (len == 0) 4044 if (len == 0)
4038 break; 4045 break;
@@ -4056,6 +4063,19 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
4056 return NULL; 4063 return NULL;
4057} 4064}
4058 4065
4066static noinline int count_ext_ref(u64 inum, u64 offset, u64 root_id, void *ctx)
4067{
4068 unsigned long cnt = *((unsigned long *)ctx);
4069
4070 cnt++;
4071 *((unsigned long *)ctx) = cnt;
4072
4073 /* Now we're sure that the extent is shared. */
4074 if (cnt > 1)
4075 return 1;
4076 return 0;
4077}
4078
4059int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 4079int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4060 __u64 start, __u64 len, get_extent_t *get_extent) 4080 __u64 start, __u64 len, get_extent_t *get_extent)
4061{ 4081{
@@ -4122,7 +4142,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4122 last = found_key.offset; 4142 last = found_key.offset;
4123 last_for_get_extent = last + 1; 4143 last_for_get_extent = last + 1;
4124 } 4144 }
4125 btrfs_free_path(path); 4145 btrfs_release_path(path);
4126 4146
4127 /* 4147 /*
4128 * we might have some extents allocated but more delalloc past those 4148 * we might have some extents allocated but more delalloc past those
@@ -4192,7 +4212,24 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4192 flags |= (FIEMAP_EXTENT_DELALLOC | 4212 flags |= (FIEMAP_EXTENT_DELALLOC |
4193 FIEMAP_EXTENT_UNKNOWN); 4213 FIEMAP_EXTENT_UNKNOWN);
4194 } else { 4214 } else {
4215 unsigned long ref_cnt = 0;
4216
4195 disko = em->block_start + offset_in_extent; 4217 disko = em->block_start + offset_in_extent;
4218
4219 /*
4220 * As btrfs supports shared space, this information
4221 * can be exported to userspace tools via
4222 * flag FIEMAP_EXTENT_SHARED.
4223 */
4224 ret = iterate_inodes_from_logical(
4225 em->block_start,
4226 BTRFS_I(inode)->root->fs_info,
4227 path, count_ext_ref, &ref_cnt);
4228 if (ret < 0 && ret != -ENOENT)
4229 goto out_free;
4230
4231 if (ref_cnt > 1)
4232 flags |= FIEMAP_EXTENT_SHARED;
4196 } 4233 }
4197 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 4234 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4198 flags |= FIEMAP_EXTENT_ENCODED; 4235 flags |= FIEMAP_EXTENT_ENCODED;
@@ -4224,6 +4261,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4224out_free: 4261out_free:
4225 free_extent_map(em); 4262 free_extent_map(em);
4226out: 4263out:
4264 btrfs_free_path(path);
4227 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1, 4265 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4228 &cached_state, GFP_NOFS); 4266 &cached_state, GFP_NOFS);
4229 return ret; 4267 return ret;
@@ -4449,6 +4487,23 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb)
4449 } 4487 }
4450} 4488}
4451 4489
4490struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
4491 u64 start)
4492{
4493 struct extent_buffer *eb;
4494
4495 rcu_read_lock();
4496 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
4497 if (eb && atomic_inc_not_zero(&eb->refs)) {
4498 rcu_read_unlock();
4499 mark_extent_buffer_accessed(eb);
4500 return eb;
4501 }
4502 rcu_read_unlock();
4503
4504 return NULL;
4505}
4506
4452struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 4507struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
4453 u64 start, unsigned long len) 4508 u64 start, unsigned long len)
4454{ 4509{
@@ -4462,14 +4517,10 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
4462 int uptodate = 1; 4517 int uptodate = 1;
4463 int ret; 4518 int ret;
4464 4519
4465 rcu_read_lock(); 4520
4466 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 4521 eb = find_extent_buffer(tree, start);
4467 if (eb && atomic_inc_not_zero(&eb->refs)) { 4522 if (eb)
4468 rcu_read_unlock();
4469 mark_extent_buffer_accessed(eb);
4470 return eb; 4523 return eb;
4471 }
4472 rcu_read_unlock();
4473 4524
4474 eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS); 4525 eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
4475 if (!eb) 4526 if (!eb)
@@ -4528,24 +4579,17 @@ again:
4528 4579
4529 spin_lock(&tree->buffer_lock); 4580 spin_lock(&tree->buffer_lock);
4530 ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb); 4581 ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
4582 spin_unlock(&tree->buffer_lock);
4583 radix_tree_preload_end();
4531 if (ret == -EEXIST) { 4584 if (ret == -EEXIST) {
4532 exists = radix_tree_lookup(&tree->buffer, 4585 exists = find_extent_buffer(tree, start);
4533 start >> PAGE_CACHE_SHIFT); 4586 if (exists)
4534 if (!atomic_inc_not_zero(&exists->refs)) { 4587 goto free_eb;
4535 spin_unlock(&tree->buffer_lock); 4588 else
4536 radix_tree_preload_end();
4537 exists = NULL;
4538 goto again; 4589 goto again;
4539 }
4540 spin_unlock(&tree->buffer_lock);
4541 radix_tree_preload_end();
4542 mark_extent_buffer_accessed(exists);
4543 goto free_eb;
4544 } 4590 }
4545 /* add one reference for the tree */ 4591 /* add one reference for the tree */
4546 check_buffer_tree_ref(eb); 4592 check_buffer_tree_ref(eb);
4547 spin_unlock(&tree->buffer_lock);
4548 radix_tree_preload_end();
4549 4593
4550 /* 4594 /*
4551 * there is a race where release page may have 4595 * there is a race where release page may have
@@ -4576,23 +4620,6 @@ free_eb:
4576 return exists; 4620 return exists;
4577} 4621}
4578 4622
4579struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
4580 u64 start, unsigned long len)
4581{
4582 struct extent_buffer *eb;
4583
4584 rcu_read_lock();
4585 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
4586 if (eb && atomic_inc_not_zero(&eb->refs)) {
4587 rcu_read_unlock();
4588 mark_extent_buffer_accessed(eb);
4589 return eb;
4590 }
4591 rcu_read_unlock();
4592
4593 return NULL;
4594}
4595
4596static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) 4623static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
4597{ 4624{
4598 struct extent_buffer *eb = 4625 struct extent_buffer *eb =
@@ -5056,23 +5083,6 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
5056 } 5083 }
5057} 5084}
5058 5085
5059static void move_pages(struct page *dst_page, struct page *src_page,
5060 unsigned long dst_off, unsigned long src_off,
5061 unsigned long len)
5062{
5063 char *dst_kaddr = page_address(dst_page);
5064 if (dst_page == src_page) {
5065 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
5066 } else {
5067 char *src_kaddr = page_address(src_page);
5068 char *p = dst_kaddr + dst_off + len;
5069 char *s = src_kaddr + src_off + len;
5070
5071 while (len--)
5072 *--p = *--s;
5073 }
5074}
5075
5076static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) 5086static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
5077{ 5087{
5078 unsigned long distance = (src > dst) ? src - dst : dst - src; 5088 unsigned long distance = (src > dst) ? src - dst : dst - src;
@@ -5183,7 +5193,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5183 5193
5184 cur = min_t(unsigned long, len, src_off_in_page + 1); 5194 cur = min_t(unsigned long, len, src_off_in_page + 1);
5185 cur = min(cur, dst_off_in_page + 1); 5195 cur = min(cur, dst_off_in_page + 1);
5186 move_pages(extent_buffer_page(dst, dst_i), 5196 copy_pages(extent_buffer_page(dst, dst_i),
5187 extent_buffer_page(dst, src_i), 5197 extent_buffer_page(dst, src_i),
5188 dst_off_in_page - cur + 1, 5198 dst_off_in_page - cur + 1,
5189 src_off_in_page - cur + 1, cur); 5199 src_off_in_page - cur + 1, cur);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 6dbc645f1f3d..19620c58f096 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -271,7 +271,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
271struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); 271struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len);
272struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); 272struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
273struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 273struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
274 u64 start, unsigned long len); 274 u64 start);
275void free_extent_buffer(struct extent_buffer *eb); 275void free_extent_buffer(struct extent_buffer *eb);
276void free_extent_buffer_stale(struct extent_buffer *eb); 276void free_extent_buffer_stale(struct extent_buffer *eb);
277#define WAIT_NONE 0 277#define WAIT_NONE 0
@@ -345,4 +345,10 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
345int end_extent_writepage(struct page *page, int err, u64 start, u64 end); 345int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
346int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, 346int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
347 int mirror_num); 347 int mirror_num);
348#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
349noinline u64 find_lock_delalloc_range(struct inode *inode,
350 struct extent_io_tree *tree,
351 struct page *locked_page, u64 *start,
352 u64 *end, u64 max_bytes);
353#endif
348#endif 354#endif
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 61adc44b7805..93fba716d7f8 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -3,10 +3,10 @@
3 3
4#include <linux/rbtree.h> 4#include <linux/rbtree.h>
5 5
6#define EXTENT_MAP_LAST_BYTE (u64)-4 6#define EXTENT_MAP_LAST_BYTE ((u64)-4)
7#define EXTENT_MAP_HOLE (u64)-3 7#define EXTENT_MAP_HOLE ((u64)-3)
8#define EXTENT_MAP_INLINE (u64)-2 8#define EXTENT_MAP_INLINE ((u64)-2)
9#define EXTENT_MAP_DELALLOC (u64)-1 9#define EXTENT_MAP_DELALLOC ((u64)-1)
10 10
11/* bits for the flags field */ 11/* bits for the flags field */
12#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */ 12#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 4f53159bdb9d..6f3848860283 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -329,6 +329,9 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
329 u64 csum_end; 329 u64 csum_end;
330 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 330 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
331 331
332 ASSERT(start == ALIGN(start, root->sectorsize) &&
333 (end + 1) == ALIGN(end + 1, root->sectorsize));
334
332 path = btrfs_alloc_path(); 335 path = btrfs_alloc_path();
333 if (!path) 336 if (!path)
334 return -ENOMEM; 337 return -ENOMEM;
@@ -846,10 +849,8 @@ insert:
846 path->leave_spinning = 0; 849 path->leave_spinning = 0;
847 if (ret < 0) 850 if (ret < 0)
848 goto fail_unlock; 851 goto fail_unlock;
849 if (ret != 0) { 852 if (WARN_ON(ret != 0))
850 WARN_ON(1);
851 goto fail_unlock; 853 goto fail_unlock;
852 }
853 leaf = path->nodes[0]; 854 leaf = path->nodes[0];
854csum: 855csum:
855 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 856 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index bc5072b2db53..82d0342763c5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -39,7 +39,6 @@
39#include "print-tree.h" 39#include "print-tree.h"
40#include "tree-log.h" 40#include "tree-log.h"
41#include "locking.h" 41#include "locking.h"
42#include "compat.h"
43#include "volumes.h" 42#include "volumes.h"
44 43
45static struct kmem_cache *btrfs_inode_defrag_cachep; 44static struct kmem_cache *btrfs_inode_defrag_cachep;
@@ -370,7 +369,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
370 u64 root_objectid = 0; 369 u64 root_objectid = 0;
371 370
372 atomic_inc(&fs_info->defrag_running); 371 atomic_inc(&fs_info->defrag_running);
373 while(1) { 372 while (1) {
374 /* Pause the auto defragger. */ 373 /* Pause the auto defragger. */
375 if (test_bit(BTRFS_FS_STATE_REMOUNTING, 374 if (test_bit(BTRFS_FS_STATE_REMOUNTING,
376 &fs_info->fs_state)) 375 &fs_info->fs_state))
@@ -1281,6 +1280,7 @@ again:
1281 } 1280 }
1282 wait_on_page_writeback(pages[i]); 1281 wait_on_page_writeback(pages[i]);
1283 } 1282 }
1283 faili = num_pages - 1;
1284 err = 0; 1284 err = 0;
1285 if (start_pos < inode->i_size) { 1285 if (start_pos < inode->i_size) {
1286 struct btrfs_ordered_extent *ordered; 1286 struct btrfs_ordered_extent *ordered;
@@ -1299,8 +1299,10 @@ again:
1299 unlock_page(pages[i]); 1299 unlock_page(pages[i]);
1300 page_cache_release(pages[i]); 1300 page_cache_release(pages[i]);
1301 } 1301 }
1302 btrfs_wait_ordered_range(inode, start_pos, 1302 err = btrfs_wait_ordered_range(inode, start_pos,
1303 last_pos - start_pos); 1303 last_pos - start_pos);
1304 if (err)
1305 goto fail;
1304 goto again; 1306 goto again;
1305 } 1307 }
1306 if (ordered) 1308 if (ordered)
@@ -1809,8 +1811,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1809 atomic_inc(&root->log_batch); 1811 atomic_inc(&root->log_batch);
1810 full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 1812 full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
1811 &BTRFS_I(inode)->runtime_flags); 1813 &BTRFS_I(inode)->runtime_flags);
1812 if (full_sync) 1814 if (full_sync) {
1813 btrfs_wait_ordered_range(inode, start, end - start + 1); 1815 ret = btrfs_wait_ordered_range(inode, start, end - start + 1);
1816 if (ret) {
1817 mutex_unlock(&inode->i_mutex);
1818 goto out;
1819 }
1820 }
1814 atomic_inc(&root->log_batch); 1821 atomic_inc(&root->log_batch);
1815 1822
1816 /* 1823 /*
@@ -1859,8 +1866,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1859 1866
1860 ret = btrfs_log_dentry_safe(trans, root, dentry); 1867 ret = btrfs_log_dentry_safe(trans, root, dentry);
1861 if (ret < 0) { 1868 if (ret < 0) {
1862 mutex_unlock(&inode->i_mutex); 1869 /* Fallthrough and commit/free transaction. */
1863 goto out; 1870 ret = 1;
1864 } 1871 }
1865 1872
1866 /* we've logged all the items and now have a consistent 1873 /* we've logged all the items and now have a consistent
@@ -1876,27 +1883,20 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1876 mutex_unlock(&inode->i_mutex); 1883 mutex_unlock(&inode->i_mutex);
1877 1884
1878 if (ret != BTRFS_NO_LOG_SYNC) { 1885 if (ret != BTRFS_NO_LOG_SYNC) {
1879 if (ret > 0) { 1886 if (!ret) {
1880 /*
1881 * If we didn't already wait for ordered extents we need
1882 * to do that now.
1883 */
1884 if (!full_sync)
1885 btrfs_wait_ordered_range(inode, start,
1886 end - start + 1);
1887 ret = btrfs_commit_transaction(trans, root);
1888 } else {
1889 ret = btrfs_sync_log(trans, root); 1887 ret = btrfs_sync_log(trans, root);
1890 if (ret == 0) { 1888 if (!ret) {
1891 ret = btrfs_end_transaction(trans, root); 1889 ret = btrfs_end_transaction(trans, root);
1892 } else { 1890 goto out;
1893 if (!full_sync)
1894 btrfs_wait_ordered_range(inode, start,
1895 end -
1896 start + 1);
1897 ret = btrfs_commit_transaction(trans, root);
1898 } 1891 }
1899 } 1892 }
1893 if (!full_sync) {
1894 ret = btrfs_wait_ordered_range(inode, start,
1895 end - start + 1);
1896 if (ret)
1897 goto out;
1898 }
1899 ret = btrfs_commit_transaction(trans, root);
1900 } else { 1900 } else {
1901 ret = btrfs_end_transaction(trans, root); 1901 ret = btrfs_end_transaction(trans, root);
1902 } 1902 }
@@ -2067,7 +2067,9 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2067 bool same_page = ((offset >> PAGE_CACHE_SHIFT) == 2067 bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
2068 ((offset + len - 1) >> PAGE_CACHE_SHIFT)); 2068 ((offset + len - 1) >> PAGE_CACHE_SHIFT));
2069 2069
2070 btrfs_wait_ordered_range(inode, offset, len); 2070 ret = btrfs_wait_ordered_range(inode, offset, len);
2071 if (ret)
2072 return ret;
2071 2073
2072 mutex_lock(&inode->i_mutex); 2074 mutex_lock(&inode->i_mutex);
2073 /* 2075 /*
@@ -2136,8 +2138,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2136 btrfs_put_ordered_extent(ordered); 2138 btrfs_put_ordered_extent(ordered);
2137 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, 2139 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
2138 lockend, &cached_state, GFP_NOFS); 2140 lockend, &cached_state, GFP_NOFS);
2139 btrfs_wait_ordered_range(inode, lockstart, 2141 ret = btrfs_wait_ordered_range(inode, lockstart,
2140 lockend - lockstart + 1); 2142 lockend - lockstart + 1);
2143 if (ret) {
2144 mutex_unlock(&inode->i_mutex);
2145 return ret;
2146 }
2141 } 2147 }
2142 2148
2143 path = btrfs_alloc_path(); 2149 path = btrfs_alloc_path();
@@ -2308,7 +2314,10 @@ static long btrfs_fallocate(struct file *file, int mode,
2308 * wait for ordered IO before we have any locks. We'll loop again 2314 * wait for ordered IO before we have any locks. We'll loop again
2309 * below with the locks held. 2315 * below with the locks held.
2310 */ 2316 */
2311 btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); 2317 ret = btrfs_wait_ordered_range(inode, alloc_start,
2318 alloc_end - alloc_start);
2319 if (ret)
2320 goto out;
2312 2321
2313 locked_end = alloc_end - 1; 2322 locked_end = alloc_end - 1;
2314 while (1) { 2323 while (1) {
@@ -2332,8 +2341,10 @@ static long btrfs_fallocate(struct file *file, int mode,
2332 * we can't wait on the range with the transaction 2341 * we can't wait on the range with the transaction
2333 * running or with the extent lock held 2342 * running or with the extent lock held
2334 */ 2343 */
2335 btrfs_wait_ordered_range(inode, alloc_start, 2344 ret = btrfs_wait_ordered_range(inode, alloc_start,
2336 alloc_end - alloc_start); 2345 alloc_end - alloc_start);
2346 if (ret)
2347 goto out;
2337 } else { 2348 } else {
2338 if (ordered) 2349 if (ordered)
2339 btrfs_put_ordered_extent(ordered); 2350 btrfs_put_ordered_extent(ordered);
@@ -2405,14 +2416,12 @@ out_reserve_fail:
2405static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) 2416static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
2406{ 2417{
2407 struct btrfs_root *root = BTRFS_I(inode)->root; 2418 struct btrfs_root *root = BTRFS_I(inode)->root;
2408 struct extent_map *em; 2419 struct extent_map *em = NULL;
2409 struct extent_state *cached_state = NULL; 2420 struct extent_state *cached_state = NULL;
2410 u64 lockstart = *offset; 2421 u64 lockstart = *offset;
2411 u64 lockend = i_size_read(inode); 2422 u64 lockend = i_size_read(inode);
2412 u64 start = *offset; 2423 u64 start = *offset;
2413 u64 orig_start = *offset;
2414 u64 len = i_size_read(inode); 2424 u64 len = i_size_read(inode);
2415 u64 last_end = 0;
2416 int ret = 0; 2425 int ret = 0;
2417 2426
2418 lockend = max_t(u64, root->sectorsize, lockend); 2427 lockend = max_t(u64, root->sectorsize, lockend);
@@ -2429,89 +2438,35 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
2429 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, 2438 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
2430 &cached_state); 2439 &cached_state);
2431 2440
2432 /* 2441 while (start < inode->i_size) {
2433 * Delalloc is such a pain. If we have a hole and we have pending
2434 * delalloc for a portion of the hole we will get back a hole that
2435 * exists for the entire range since it hasn't been actually written
2436 * yet. So to take care of this case we need to look for an extent just
2437 * before the position we want in case there is outstanding delalloc
2438 * going on here.
2439 */
2440 if (whence == SEEK_HOLE && start != 0) {
2441 if (start <= root->sectorsize)
2442 em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
2443 root->sectorsize, 0);
2444 else
2445 em = btrfs_get_extent_fiemap(inode, NULL, 0,
2446 start - root->sectorsize,
2447 root->sectorsize, 0);
2448 if (IS_ERR(em)) {
2449 ret = PTR_ERR(em);
2450 goto out;
2451 }
2452 last_end = em->start + em->len;
2453 if (em->block_start == EXTENT_MAP_DELALLOC)
2454 last_end = min_t(u64, last_end, inode->i_size);
2455 free_extent_map(em);
2456 }
2457
2458 while (1) {
2459 em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0); 2442 em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
2460 if (IS_ERR(em)) { 2443 if (IS_ERR(em)) {
2461 ret = PTR_ERR(em); 2444 ret = PTR_ERR(em);
2445 em = NULL;
2462 break; 2446 break;
2463 } 2447 }
2464 2448
2465 if (em->block_start == EXTENT_MAP_HOLE) { 2449 if (whence == SEEK_HOLE &&
2466 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { 2450 (em->block_start == EXTENT_MAP_HOLE ||
2467 if (last_end <= orig_start) { 2451 test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
2468 free_extent_map(em); 2452 break;
2469 ret = -ENXIO; 2453 else if (whence == SEEK_DATA &&
2470 break; 2454 (em->block_start != EXTENT_MAP_HOLE &&
2471 } 2455 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
2472 } 2456 break;
2473
2474 if (whence == SEEK_HOLE) {
2475 *offset = start;
2476 free_extent_map(em);
2477 break;
2478 }
2479 } else {
2480 if (whence == SEEK_DATA) {
2481 if (em->block_start == EXTENT_MAP_DELALLOC) {
2482 if (start >= inode->i_size) {
2483 free_extent_map(em);
2484 ret = -ENXIO;
2485 break;
2486 }
2487 }
2488
2489 if (!test_bit(EXTENT_FLAG_PREALLOC,
2490 &em->flags)) {
2491 *offset = start;
2492 free_extent_map(em);
2493 break;
2494 }
2495 }
2496 }
2497 2457
2498 start = em->start + em->len; 2458 start = em->start + em->len;
2499 last_end = em->start + em->len;
2500
2501 if (em->block_start == EXTENT_MAP_DELALLOC)
2502 last_end = min_t(u64, last_end, inode->i_size);
2503
2504 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
2505 free_extent_map(em);
2506 ret = -ENXIO;
2507 break;
2508 }
2509 free_extent_map(em); 2459 free_extent_map(em);
2460 em = NULL;
2510 cond_resched(); 2461 cond_resched();
2511 } 2462 }
2512 if (!ret) 2463 free_extent_map(em);
2513 *offset = min(*offset, inode->i_size); 2464 if (!ret) {
2514out: 2465 if (whence == SEEK_DATA && start >= inode->i_size)
2466 ret = -ENXIO;
2467 else
2468 *offset = min_t(loff_t, start, inode->i_size);
2469 }
2515 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 2470 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2516 &cached_state, GFP_NOFS); 2471 &cached_state, GFP_NOFS);
2517 return ret; 2472 return ret;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 3f0ddfce96e6..057be95b1e1e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -218,7 +218,6 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
218 218
219int btrfs_truncate_free_space_cache(struct btrfs_root *root, 219int btrfs_truncate_free_space_cache(struct btrfs_root *root,
220 struct btrfs_trans_handle *trans, 220 struct btrfs_trans_handle *trans,
221 struct btrfs_path *path,
222 struct inode *inode) 221 struct inode *inode)
223{ 222{
224 int ret = 0; 223 int ret = 0;
@@ -1009,8 +1008,13 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1009 if (ret) 1008 if (ret)
1010 goto out; 1009 goto out;
1011 1010
1012 1011 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
1013 btrfs_wait_ordered_range(inode, 0, (u64)-1); 1012 if (ret) {
1013 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1014 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
1015 GFP_NOFS);
1016 goto out;
1017 }
1014 1018
1015 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 1019 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
1016 key.offset = offset; 1020 key.offset = offset;
@@ -1431,13 +1435,19 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1431 ctl->free_space += bytes; 1435 ctl->free_space += bytes;
1432} 1436}
1433 1437
1438/*
1439 * If we can not find suitable extent, we will use bytes to record
1440 * the size of the max extent.
1441 */
1434static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1442static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1435 struct btrfs_free_space *bitmap_info, u64 *offset, 1443 struct btrfs_free_space *bitmap_info, u64 *offset,
1436 u64 *bytes) 1444 u64 *bytes)
1437{ 1445{
1438 unsigned long found_bits = 0; 1446 unsigned long found_bits = 0;
1447 unsigned long max_bits = 0;
1439 unsigned long bits, i; 1448 unsigned long bits, i;
1440 unsigned long next_zero; 1449 unsigned long next_zero;
1450 unsigned long extent_bits;
1441 1451
1442 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1452 i = offset_to_bit(bitmap_info->offset, ctl->unit,
1443 max_t(u64, *offset, bitmap_info->offset)); 1453 max_t(u64, *offset, bitmap_info->offset));
@@ -1446,9 +1456,12 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1446 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1456 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
1447 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1457 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1448 BITS_PER_BITMAP, i); 1458 BITS_PER_BITMAP, i);
1449 if ((next_zero - i) >= bits) { 1459 extent_bits = next_zero - i;
1450 found_bits = next_zero - i; 1460 if (extent_bits >= bits) {
1461 found_bits = extent_bits;
1451 break; 1462 break;
1463 } else if (extent_bits > max_bits) {
1464 max_bits = extent_bits;
1452 } 1465 }
1453 i = next_zero; 1466 i = next_zero;
1454 } 1467 }
@@ -1459,38 +1472,41 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1459 return 0; 1472 return 0;
1460 } 1473 }
1461 1474
1475 *bytes = (u64)(max_bits) * ctl->unit;
1462 return -1; 1476 return -1;
1463} 1477}
1464 1478
1479/* Cache the size of the max extent in bytes */
1465static struct btrfs_free_space * 1480static struct btrfs_free_space *
1466find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1481find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1467 unsigned long align) 1482 unsigned long align, u64 *max_extent_size)
1468{ 1483{
1469 struct btrfs_free_space *entry; 1484 struct btrfs_free_space *entry;
1470 struct rb_node *node; 1485 struct rb_node *node;
1471 u64 ctl_off;
1472 u64 tmp; 1486 u64 tmp;
1473 u64 align_off; 1487 u64 align_off;
1474 int ret; 1488 int ret;
1475 1489
1476 if (!ctl->free_space_offset.rb_node) 1490 if (!ctl->free_space_offset.rb_node)
1477 return NULL; 1491 goto out;
1478 1492
1479 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1493 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1480 if (!entry) 1494 if (!entry)
1481 return NULL; 1495 goto out;
1482 1496
1483 for (node = &entry->offset_index; node; node = rb_next(node)) { 1497 for (node = &entry->offset_index; node; node = rb_next(node)) {
1484 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1498 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1485 if (entry->bytes < *bytes) 1499 if (entry->bytes < *bytes) {
1500 if (entry->bytes > *max_extent_size)
1501 *max_extent_size = entry->bytes;
1486 continue; 1502 continue;
1503 }
1487 1504
1488 /* make sure the space returned is big enough 1505 /* make sure the space returned is big enough
1489 * to match our requested alignment 1506 * to match our requested alignment
1490 */ 1507 */
1491 if (*bytes >= align) { 1508 if (*bytes >= align) {
1492 ctl_off = entry->offset - ctl->start; 1509 tmp = entry->offset - ctl->start + align - 1;
1493 tmp = ctl_off + align - 1;;
1494 do_div(tmp, align); 1510 do_div(tmp, align);
1495 tmp = tmp * align + ctl->start; 1511 tmp = tmp * align + ctl->start;
1496 align_off = tmp - entry->offset; 1512 align_off = tmp - entry->offset;
@@ -1499,14 +1515,22 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1499 tmp = entry->offset; 1515 tmp = entry->offset;
1500 } 1516 }
1501 1517
1502 if (entry->bytes < *bytes + align_off) 1518 if (entry->bytes < *bytes + align_off) {
1519 if (entry->bytes > *max_extent_size)
1520 *max_extent_size = entry->bytes;
1503 continue; 1521 continue;
1522 }
1504 1523
1505 if (entry->bitmap) { 1524 if (entry->bitmap) {
1506 ret = search_bitmap(ctl, entry, &tmp, bytes); 1525 u64 size = *bytes;
1526
1527 ret = search_bitmap(ctl, entry, &tmp, &size);
1507 if (!ret) { 1528 if (!ret) {
1508 *offset = tmp; 1529 *offset = tmp;
1530 *bytes = size;
1509 return entry; 1531 return entry;
1532 } else if (size > *max_extent_size) {
1533 *max_extent_size = size;
1510 } 1534 }
1511 continue; 1535 continue;
1512 } 1536 }
@@ -1515,7 +1539,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1515 *bytes = entry->bytes - align_off; 1539 *bytes = entry->bytes - align_off;
1516 return entry; 1540 return entry;
1517 } 1541 }
1518 1542out:
1519 return NULL; 1543 return NULL;
1520} 1544}
1521 1545
@@ -2116,7 +2140,8 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2116} 2140}
2117 2141
2118u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2142u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2119 u64 offset, u64 bytes, u64 empty_size) 2143 u64 offset, u64 bytes, u64 empty_size,
2144 u64 *max_extent_size)
2120{ 2145{
2121 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2146 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2122 struct btrfs_free_space *entry = NULL; 2147 struct btrfs_free_space *entry = NULL;
@@ -2127,7 +2152,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2127 2152
2128 spin_lock(&ctl->tree_lock); 2153 spin_lock(&ctl->tree_lock);
2129 entry = find_free_space(ctl, &offset, &bytes_search, 2154 entry = find_free_space(ctl, &offset, &bytes_search,
2130 block_group->full_stripe_len); 2155 block_group->full_stripe_len, max_extent_size);
2131 if (!entry) 2156 if (!entry)
2132 goto out; 2157 goto out;
2133 2158
@@ -2137,7 +2162,6 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2137 if (!entry->bytes) 2162 if (!entry->bytes)
2138 free_bitmap(ctl, entry); 2163 free_bitmap(ctl, entry);
2139 } else { 2164 } else {
2140
2141 unlink_free_space(ctl, entry); 2165 unlink_free_space(ctl, entry);
2142 align_gap_len = offset - entry->offset; 2166 align_gap_len = offset - entry->offset;
2143 align_gap = entry->offset; 2167 align_gap = entry->offset;
@@ -2151,7 +2175,6 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2151 else 2175 else
2152 link_free_space(ctl, entry); 2176 link_free_space(ctl, entry);
2153 } 2177 }
2154
2155out: 2178out:
2156 spin_unlock(&ctl->tree_lock); 2179 spin_unlock(&ctl->tree_lock);
2157 2180
@@ -2206,7 +2229,8 @@ int btrfs_return_cluster_to_free_space(
2206static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2229static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2207 struct btrfs_free_cluster *cluster, 2230 struct btrfs_free_cluster *cluster,
2208 struct btrfs_free_space *entry, 2231 struct btrfs_free_space *entry,
2209 u64 bytes, u64 min_start) 2232 u64 bytes, u64 min_start,
2233 u64 *max_extent_size)
2210{ 2234{
2211 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2235 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2212 int err; 2236 int err;
@@ -2218,8 +2242,11 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2218 search_bytes = bytes; 2242 search_bytes = bytes;
2219 2243
2220 err = search_bitmap(ctl, entry, &search_start, &search_bytes); 2244 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2221 if (err) 2245 if (err) {
2246 if (search_bytes > *max_extent_size)
2247 *max_extent_size = search_bytes;
2222 return 0; 2248 return 0;
2249 }
2223 2250
2224 ret = search_start; 2251 ret = search_start;
2225 __bitmap_clear_bits(ctl, entry, ret, bytes); 2252 __bitmap_clear_bits(ctl, entry, ret, bytes);
@@ -2234,7 +2261,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2234 */ 2261 */
2235u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2262u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2236 struct btrfs_free_cluster *cluster, u64 bytes, 2263 struct btrfs_free_cluster *cluster, u64 bytes,
2237 u64 min_start) 2264 u64 min_start, u64 *max_extent_size)
2238{ 2265{
2239 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2266 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2240 struct btrfs_free_space *entry = NULL; 2267 struct btrfs_free_space *entry = NULL;
@@ -2253,7 +2280,10 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2253 goto out; 2280 goto out;
2254 2281
2255 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2282 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2256 while(1) { 2283 while (1) {
2284 if (entry->bytes < bytes && entry->bytes > *max_extent_size)
2285 *max_extent_size = entry->bytes;
2286
2257 if (entry->bytes < bytes || 2287 if (entry->bytes < bytes ||
2258 (!entry->bitmap && entry->offset < min_start)) { 2288 (!entry->bitmap && entry->offset < min_start)) {
2259 node = rb_next(&entry->offset_index); 2289 node = rb_next(&entry->offset_index);
@@ -2267,7 +2297,8 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2267 if (entry->bitmap) { 2297 if (entry->bitmap) {
2268 ret = btrfs_alloc_from_bitmap(block_group, 2298 ret = btrfs_alloc_from_bitmap(block_group,
2269 cluster, entry, bytes, 2299 cluster, entry, bytes,
2270 cluster->window_start); 2300 cluster->window_start,
2301 max_extent_size);
2271 if (ret == 0) { 2302 if (ret == 0) {
2272 node = rb_next(&entry->offset_index); 2303 node = rb_next(&entry->offset_index);
2273 if (!node) 2304 if (!node)
@@ -2940,19 +2971,15 @@ out:
2940 2971
2941int btrfs_write_out_ino_cache(struct btrfs_root *root, 2972int btrfs_write_out_ino_cache(struct btrfs_root *root,
2942 struct btrfs_trans_handle *trans, 2973 struct btrfs_trans_handle *trans,
2943 struct btrfs_path *path) 2974 struct btrfs_path *path,
2975 struct inode *inode)
2944{ 2976{
2945 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2977 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2946 struct inode *inode;
2947 int ret; 2978 int ret;
2948 2979
2949 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2980 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2950 return 0; 2981 return 0;
2951 2982
2952 inode = lookup_free_ino_inode(root, path);
2953 if (IS_ERR(inode))
2954 return 0;
2955
2956 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); 2983 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2957 if (ret) { 2984 if (ret) {
2958 btrfs_delalloc_release_metadata(inode, inode->i_size); 2985 btrfs_delalloc_release_metadata(inode, inode->i_size);
@@ -2963,7 +2990,6 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
2963#endif 2990#endif
2964 } 2991 }
2965 2992
2966 iput(inode);
2967 return ret; 2993 return ret;
2968} 2994}
2969 2995
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index c74904167476..0cf4977ef70d 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -58,7 +58,6 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
58 struct btrfs_block_rsv *rsv); 58 struct btrfs_block_rsv *rsv);
59int btrfs_truncate_free_space_cache(struct btrfs_root *root, 59int btrfs_truncate_free_space_cache(struct btrfs_root *root,
60 struct btrfs_trans_handle *trans, 60 struct btrfs_trans_handle *trans,
61 struct btrfs_path *path,
62 struct inode *inode); 61 struct inode *inode);
63int load_free_space_cache(struct btrfs_fs_info *fs_info, 62int load_free_space_cache(struct btrfs_fs_info *fs_info,
64 struct btrfs_block_group_cache *block_group); 63 struct btrfs_block_group_cache *block_group);
@@ -76,7 +75,8 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info,
76 struct btrfs_root *root); 75 struct btrfs_root *root);
77int btrfs_write_out_ino_cache(struct btrfs_root *root, 76int btrfs_write_out_ino_cache(struct btrfs_root *root,
78 struct btrfs_trans_handle *trans, 77 struct btrfs_trans_handle *trans,
79 struct btrfs_path *path); 78 struct btrfs_path *path,
79 struct inode *inode);
80 80
81void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group); 81void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
82int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, 82int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
@@ -94,7 +94,8 @@ void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
94void btrfs_remove_free_space_cache(struct btrfs_block_group_cache 94void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
95 *block_group); 95 *block_group);
96u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 96u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
97 u64 offset, u64 bytes, u64 empty_size); 97 u64 offset, u64 bytes, u64 empty_size,
98 u64 *max_extent_size);
98u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root); 99u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
99void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 100void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
100 u64 bytes); 101 u64 bytes);
@@ -105,7 +106,7 @@ int btrfs_find_space_cluster(struct btrfs_root *root,
105void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster); 106void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
106u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 107u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
107 struct btrfs_free_cluster *cluster, u64 bytes, 108 struct btrfs_free_cluster *cluster, u64 bytes,
108 u64 min_start); 109 u64 min_start, u64 *max_extent_size);
109int btrfs_return_cluster_to_free_space( 110int btrfs_return_cluster_to_free_space(
110 struct btrfs_block_group_cache *block_group, 111 struct btrfs_block_group_cache *block_group,
111 struct btrfs_free_cluster *cluster); 112 struct btrfs_free_cluster *cluster);
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index e0b7034d6343..ec82fae07097 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -369,7 +369,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
369 goto out; 369 goto out;
370 370
371 leaf = path->nodes[0]; 371 leaf = path->nodes[0];
372 item = btrfs_item_nr(leaf, path->slots[0]); 372 item = btrfs_item_nr(path->slots[0]);
373 ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char); 373 ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char);
374 ptr += btrfs_item_size(leaf, item) - ins_len; 374 ptr += btrfs_item_size(leaf, item) - ins_len;
375 extref = (struct btrfs_inode_extref *)ptr; 375 extref = (struct btrfs_inode_extref *)ptr;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 2c66ddbbe670..ab485e57b6fe 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -78,10 +78,8 @@ again:
78 btrfs_transaction_in_commit(fs_info)) { 78 btrfs_transaction_in_commit(fs_info)) {
79 leaf = path->nodes[0]; 79 leaf = path->nodes[0];
80 80
81 if (btrfs_header_nritems(leaf) == 0) { 81 if (WARN_ON(btrfs_header_nritems(leaf) == 0))
82 WARN_ON(1);
83 break; 82 break;
84 }
85 83
86 /* 84 /*
87 * Save the key so we can advances forward 85 * Save the key so we can advances forward
@@ -237,7 +235,7 @@ again:
237 start_caching(root); 235 start_caching(root);
238 236
239 if (objectid <= root->cache_progress || 237 if (objectid <= root->cache_progress ||
240 objectid > root->highest_objectid) 238 objectid >= root->highest_objectid)
241 __btrfs_add_free_space(ctl, objectid, 1); 239 __btrfs_add_free_space(ctl, objectid, 1);
242 else 240 else
243 __btrfs_add_free_space(pinned, objectid, 1); 241 __btrfs_add_free_space(pinned, objectid, 1);
@@ -412,8 +410,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
412 return 0; 410 return 0;
413 411
414 /* Don't save inode cache if we are deleting this root */ 412 /* Don't save inode cache if we are deleting this root */
415 if (btrfs_root_refs(&root->root_item) == 0 && 413 if (btrfs_root_refs(&root->root_item) == 0)
416 root != root->fs_info->tree_root)
417 return 0; 414 return 0;
418 415
419 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 416 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
@@ -467,7 +464,7 @@ again:
467 } 464 }
468 465
469 if (i_size_read(inode) > 0) { 466 if (i_size_read(inode) > 0) {
470 ret = btrfs_truncate_free_space_cache(root, trans, path, inode); 467 ret = btrfs_truncate_free_space_cache(root, trans, inode);
471 if (ret) { 468 if (ret) {
472 if (ret != -ENOSPC) 469 if (ret != -ENOSPC)
473 btrfs_abort_transaction(trans, root, ret); 470 btrfs_abort_transaction(trans, root, ret);
@@ -504,7 +501,7 @@ again:
504 } 501 }
505 btrfs_free_reserved_data_space(inode, prealloc); 502 btrfs_free_reserved_data_space(inode, prealloc);
506 503
507 ret = btrfs_write_out_ino_cache(root, trans, path); 504 ret = btrfs_write_out_ino_cache(root, trans, path, inode);
508out_put: 505out_put:
509 iput(inode); 506 iput(inode);
510out_release: 507out_release:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f338c5672d58..f1a77449d032 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -43,7 +43,6 @@
43#include <linux/btrfs.h> 43#include <linux/btrfs.h>
44#include <linux/blkdev.h> 44#include <linux/blkdev.h>
45#include <linux/posix_acl_xattr.h> 45#include <linux/posix_acl_xattr.h>
46#include "compat.h"
47#include "ctree.h" 46#include "ctree.h"
48#include "disk-io.h" 47#include "disk-io.h"
49#include "transaction.h" 48#include "transaction.h"
@@ -844,7 +843,10 @@ static noinline int cow_file_range(struct inode *inode,
844 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 843 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
845 int ret = 0; 844 int ret = 0;
846 845
847 BUG_ON(btrfs_is_free_space_inode(inode)); 846 if (btrfs_is_free_space_inode(inode)) {
847 WARN_ON_ONCE(1);
848 return -EINVAL;
849 }
848 850
849 num_bytes = ALIGN(end - start + 1, blocksize); 851 num_bytes = ALIGN(end - start + 1, blocksize);
850 num_bytes = max(blocksize, num_bytes); 852 num_bytes = max(blocksize, num_bytes);
@@ -1178,10 +1180,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1178 while (1) { 1180 while (1) {
1179 ret = btrfs_lookup_file_extent(trans, root, path, ino, 1181 ret = btrfs_lookup_file_extent(trans, root, path, ino,
1180 cur_offset, 0); 1182 cur_offset, 0);
1181 if (ret < 0) { 1183 if (ret < 0)
1182 btrfs_abort_transaction(trans, root, ret);
1183 goto error; 1184 goto error;
1184 }
1185 if (ret > 0 && path->slots[0] > 0 && check_prev) { 1185 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1186 leaf = path->nodes[0]; 1186 leaf = path->nodes[0];
1187 btrfs_item_key_to_cpu(leaf, &found_key, 1187 btrfs_item_key_to_cpu(leaf, &found_key,
@@ -1195,10 +1195,8 @@ next_slot:
1195 leaf = path->nodes[0]; 1195 leaf = path->nodes[0];
1196 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 1196 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1197 ret = btrfs_next_leaf(root, path); 1197 ret = btrfs_next_leaf(root, path);
1198 if (ret < 0) { 1198 if (ret < 0)
1199 btrfs_abort_transaction(trans, root, ret);
1200 goto error; 1199 goto error;
1201 }
1202 if (ret > 0) 1200 if (ret > 0)
1203 break; 1201 break;
1204 leaf = path->nodes[0]; 1202 leaf = path->nodes[0];
@@ -1289,10 +1287,8 @@ out_check:
1289 ret = cow_file_range(inode, locked_page, 1287 ret = cow_file_range(inode, locked_page,
1290 cow_start, found_key.offset - 1, 1288 cow_start, found_key.offset - 1,
1291 page_started, nr_written, 1); 1289 page_started, nr_written, 1);
1292 if (ret) { 1290 if (ret)
1293 btrfs_abort_transaction(trans, root, ret);
1294 goto error; 1291 goto error;
1295 }
1296 cow_start = (u64)-1; 1292 cow_start = (u64)-1;
1297 } 1293 }
1298 1294
@@ -1339,10 +1335,8 @@ out_check:
1339 BTRFS_DATA_RELOC_TREE_OBJECTID) { 1335 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1340 ret = btrfs_reloc_clone_csums(inode, cur_offset, 1336 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1341 num_bytes); 1337 num_bytes);
1342 if (ret) { 1338 if (ret)
1343 btrfs_abort_transaction(trans, root, ret);
1344 goto error; 1339 goto error;
1345 }
1346 } 1340 }
1347 1341
1348 extent_clear_unlock_delalloc(inode, cur_offset, 1342 extent_clear_unlock_delalloc(inode, cur_offset,
@@ -1364,10 +1358,8 @@ out_check:
1364 if (cow_start != (u64)-1) { 1358 if (cow_start != (u64)-1) {
1365 ret = cow_file_range(inode, locked_page, cow_start, end, 1359 ret = cow_file_range(inode, locked_page, cow_start, end,
1366 page_started, nr_written, 1); 1360 page_started, nr_written, 1);
1367 if (ret) { 1361 if (ret)
1368 btrfs_abort_transaction(trans, root, ret);
1369 goto error; 1362 goto error;
1370 }
1371 } 1363 }
1372 1364
1373error: 1365error:
@@ -1551,7 +1543,13 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1551 spin_unlock(&BTRFS_I(inode)->lock); 1543 spin_unlock(&BTRFS_I(inode)->lock);
1552 } 1544 }
1553 1545
1554 if (*bits & EXTENT_DO_ACCOUNTING) 1546 /*
1547 * We don't reserve metadata space for space cache inodes so we
1548 * don't need to call dellalloc_release_metadata if there is an
1549 * error.
1550 */
1551 if (*bits & EXTENT_DO_ACCOUNTING &&
1552 root != root->fs_info->tree_root)
1555 btrfs_delalloc_release_metadata(inode, len); 1553 btrfs_delalloc_release_metadata(inode, len);
1556 1554
1557 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1555 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
@@ -2041,10 +2039,8 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2041 key.offset = offset; 2039 key.offset = offset;
2042 2040
2043 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2041 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2044 if (ret < 0) { 2042 if (WARN_ON(ret < 0))
2045 WARN_ON(1);
2046 return ret; 2043 return ret;
2047 }
2048 ret = 0; 2044 ret = 0;
2049 2045
2050 while (1) { 2046 while (1) {
@@ -2133,7 +2129,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
2133 old->extent_offset, fs_info, 2129 old->extent_offset, fs_info,
2134 path, record_one_backref, 2130 path, record_one_backref,
2135 old); 2131 old);
2136 BUG_ON(ret < 0 && ret != -ENOENT); 2132 if (ret < 0 && ret != -ENOENT)
2133 return false;
2137 2134
2138 /* no backref to be processed for this extent */ 2135 /* no backref to be processed for this extent */
2139 if (!old->count) { 2136 if (!old->count) {
@@ -2367,10 +2364,23 @@ out_unlock:
2367 return ret; 2364 return ret;
2368} 2365}
2369 2366
2367static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
2368{
2369 struct old_sa_defrag_extent *old, *tmp;
2370
2371 if (!new)
2372 return;
2373
2374 list_for_each_entry_safe(old, tmp, &new->head, list) {
2375 list_del(&old->list);
2376 kfree(old);
2377 }
2378 kfree(new);
2379}
2380
2370static void relink_file_extents(struct new_sa_defrag_extent *new) 2381static void relink_file_extents(struct new_sa_defrag_extent *new)
2371{ 2382{
2372 struct btrfs_path *path; 2383 struct btrfs_path *path;
2373 struct old_sa_defrag_extent *old, *tmp;
2374 struct sa_defrag_extent_backref *backref; 2384 struct sa_defrag_extent_backref *backref;
2375 struct sa_defrag_extent_backref *prev = NULL; 2385 struct sa_defrag_extent_backref *prev = NULL;
2376 struct inode *inode; 2386 struct inode *inode;
@@ -2413,16 +2423,11 @@ static void relink_file_extents(struct new_sa_defrag_extent *new)
2413 kfree(prev); 2423 kfree(prev);
2414 2424
2415 btrfs_free_path(path); 2425 btrfs_free_path(path);
2416
2417 list_for_each_entry_safe(old, tmp, &new->head, list) {
2418 list_del(&old->list);
2419 kfree(old);
2420 }
2421out: 2426out:
2427 free_sa_defrag_extent(new);
2428
2422 atomic_dec(&root->fs_info->defrag_running); 2429 atomic_dec(&root->fs_info->defrag_running);
2423 wake_up(&root->fs_info->transaction_wait); 2430 wake_up(&root->fs_info->transaction_wait);
2424
2425 kfree(new);
2426} 2431}
2427 2432
2428static struct new_sa_defrag_extent * 2433static struct new_sa_defrag_extent *
@@ -2432,7 +2437,7 @@ record_old_file_extents(struct inode *inode,
2432 struct btrfs_root *root = BTRFS_I(inode)->root; 2437 struct btrfs_root *root = BTRFS_I(inode)->root;
2433 struct btrfs_path *path; 2438 struct btrfs_path *path;
2434 struct btrfs_key key; 2439 struct btrfs_key key;
2435 struct old_sa_defrag_extent *old, *tmp; 2440 struct old_sa_defrag_extent *old;
2436 struct new_sa_defrag_extent *new; 2441 struct new_sa_defrag_extent *new;
2437 int ret; 2442 int ret;
2438 2443
@@ -2480,7 +2485,7 @@ record_old_file_extents(struct inode *inode,
2480 if (slot >= btrfs_header_nritems(l)) { 2485 if (slot >= btrfs_header_nritems(l)) {
2481 ret = btrfs_next_leaf(root, path); 2486 ret = btrfs_next_leaf(root, path);
2482 if (ret < 0) 2487 if (ret < 0)
2483 goto out_free_list; 2488 goto out_free_path;
2484 else if (ret > 0) 2489 else if (ret > 0)
2485 break; 2490 break;
2486 continue; 2491 continue;
@@ -2509,7 +2514,7 @@ record_old_file_extents(struct inode *inode,
2509 2514
2510 old = kmalloc(sizeof(*old), GFP_NOFS); 2515 old = kmalloc(sizeof(*old), GFP_NOFS);
2511 if (!old) 2516 if (!old)
2512 goto out_free_list; 2517 goto out_free_path;
2513 2518
2514 offset = max(new->file_pos, key.offset); 2519 offset = max(new->file_pos, key.offset);
2515 end = min(new->file_pos + new->len, key.offset + num_bytes); 2520 end = min(new->file_pos + new->len, key.offset + num_bytes);
@@ -2531,15 +2536,10 @@ next:
2531 2536
2532 return new; 2537 return new;
2533 2538
2534out_free_list:
2535 list_for_each_entry_safe(old, tmp, &new->head, list) {
2536 list_del(&old->list);
2537 kfree(old);
2538 }
2539out_free_path: 2539out_free_path:
2540 btrfs_free_path(path); 2540 btrfs_free_path(path);
2541out_kfree: 2541out_kfree:
2542 kfree(new); 2542 free_sa_defrag_extent(new);
2543 return NULL; 2543 return NULL;
2544} 2544}
2545 2545
@@ -2710,8 +2710,14 @@ out:
2710 btrfs_remove_ordered_extent(inode, ordered_extent); 2710 btrfs_remove_ordered_extent(inode, ordered_extent);
2711 2711
2712 /* for snapshot-aware defrag */ 2712 /* for snapshot-aware defrag */
2713 if (new) 2713 if (new) {
2714 relink_file_extents(new); 2714 if (ret) {
2715 free_sa_defrag_extent(new);
2716 atomic_dec(&root->fs_info->defrag_running);
2717 } else {
2718 relink_file_extents(new);
2719 }
2720 }
2715 2721
2716 /* once for us */ 2722 /* once for us */
2717 btrfs_put_ordered_extent(ordered_extent); 2723 btrfs_put_ordered_extent(ordered_extent);
@@ -2969,6 +2975,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2969 if (insert >= 1) { 2975 if (insert >= 1) {
2970 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); 2976 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
2971 if (ret) { 2977 if (ret) {
2978 atomic_dec(&root->orphan_inodes);
2972 if (reserve) { 2979 if (reserve) {
2973 clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, 2980 clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
2974 &BTRFS_I(inode)->runtime_flags); 2981 &BTRFS_I(inode)->runtime_flags);
@@ -3018,14 +3025,16 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3018 release_rsv = 1; 3025 release_rsv = 1;
3019 spin_unlock(&root->orphan_lock); 3026 spin_unlock(&root->orphan_lock);
3020 3027
3021 if (trans && delete_item) 3028 if (delete_item) {
3022 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
3023
3024 if (release_rsv) {
3025 btrfs_orphan_release_metadata(inode);
3026 atomic_dec(&root->orphan_inodes); 3029 atomic_dec(&root->orphan_inodes);
3030 if (trans)
3031 ret = btrfs_del_orphan_item(trans, root,
3032 btrfs_ino(inode));
3027 } 3033 }
3028 3034
3035 if (release_rsv)
3036 btrfs_orphan_release_metadata(inode);
3037
3029 return ret; 3038 return ret;
3030} 3039}
3031 3040
@@ -3172,8 +3181,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3172 3181
3173 /* if we have links, this was a truncate, lets do that */ 3182 /* if we have links, this was a truncate, lets do that */
3174 if (inode->i_nlink) { 3183 if (inode->i_nlink) {
3175 if (!S_ISREG(inode->i_mode)) { 3184 if (WARN_ON(!S_ISREG(inode->i_mode))) {
3176 WARN_ON(1);
3177 iput(inode); 3185 iput(inode);
3178 continue; 3186 continue;
3179 } 3187 }
@@ -3636,7 +3644,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3636 int ret; 3644 int ret;
3637 ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len); 3645 ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
3638 if (!ret) { 3646 if (!ret) {
3639 btrfs_drop_nlink(inode); 3647 drop_nlink(inode);
3640 ret = btrfs_update_inode(trans, root, inode); 3648 ret = btrfs_update_inode(trans, root, inode);
3641 } 3649 }
3642 return ret; 3650 return ret;
@@ -4230,15 +4238,16 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4230 4238
4231 while (1) { 4239 while (1) {
4232 struct btrfs_ordered_extent *ordered; 4240 struct btrfs_ordered_extent *ordered;
4233 btrfs_wait_ordered_range(inode, hole_start, 4241
4234 block_end - hole_start);
4235 lock_extent_bits(io_tree, hole_start, block_end - 1, 0, 4242 lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
4236 &cached_state); 4243 &cached_state);
4237 ordered = btrfs_lookup_ordered_extent(inode, hole_start); 4244 ordered = btrfs_lookup_ordered_range(inode, hole_start,
4245 block_end - hole_start);
4238 if (!ordered) 4246 if (!ordered)
4239 break; 4247 break;
4240 unlock_extent_cached(io_tree, hole_start, block_end - 1, 4248 unlock_extent_cached(io_tree, hole_start, block_end - 1,
4241 &cached_state, GFP_NOFS); 4249 &cached_state, GFP_NOFS);
4250 btrfs_start_ordered_extent(inode, ordered, 1);
4242 btrfs_put_ordered_extent(ordered); 4251 btrfs_put_ordered_extent(ordered);
4243 } 4252 }
4244 4253
@@ -4472,8 +4481,10 @@ void btrfs_evict_inode(struct inode *inode)
4472 trace_btrfs_inode_evict(inode); 4481 trace_btrfs_inode_evict(inode);
4473 4482
4474 truncate_inode_pages(&inode->i_data, 0); 4483 truncate_inode_pages(&inode->i_data, 0);
4475 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || 4484 if (inode->i_nlink &&
4476 btrfs_is_free_space_inode(inode))) 4485 ((btrfs_root_refs(&root->root_item) != 0 &&
4486 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
4487 btrfs_is_free_space_inode(inode)))
4477 goto no_delete; 4488 goto no_delete;
4478 4489
4479 if (is_bad_inode(inode)) { 4490 if (is_bad_inode(inode)) {
@@ -4490,7 +4501,8 @@ void btrfs_evict_inode(struct inode *inode)
4490 } 4501 }
4491 4502
4492 if (inode->i_nlink > 0) { 4503 if (inode->i_nlink > 0) {
4493 BUG_ON(btrfs_root_refs(&root->root_item) != 0); 4504 BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
4505 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
4494 goto no_delete; 4506 goto no_delete;
4495 } 4507 }
4496 4508
@@ -4688,11 +4700,11 @@ static void inode_tree_add(struct inode *inode)
4688 struct btrfs_inode *entry; 4700 struct btrfs_inode *entry;
4689 struct rb_node **p; 4701 struct rb_node **p;
4690 struct rb_node *parent; 4702 struct rb_node *parent;
4703 struct rb_node *new = &BTRFS_I(inode)->rb_node;
4691 u64 ino = btrfs_ino(inode); 4704 u64 ino = btrfs_ino(inode);
4692 4705
4693 if (inode_unhashed(inode)) 4706 if (inode_unhashed(inode))
4694 return; 4707 return;
4695again:
4696 parent = NULL; 4708 parent = NULL;
4697 spin_lock(&root->inode_lock); 4709 spin_lock(&root->inode_lock);
4698 p = &root->inode_tree.rb_node; 4710 p = &root->inode_tree.rb_node;
@@ -4707,14 +4719,14 @@ again:
4707 else { 4719 else {
4708 WARN_ON(!(entry->vfs_inode.i_state & 4720 WARN_ON(!(entry->vfs_inode.i_state &
4709 (I_WILL_FREE | I_FREEING))); 4721 (I_WILL_FREE | I_FREEING)));
4710 rb_erase(parent, &root->inode_tree); 4722 rb_replace_node(parent, new, &root->inode_tree);
4711 RB_CLEAR_NODE(parent); 4723 RB_CLEAR_NODE(parent);
4712 spin_unlock(&root->inode_lock); 4724 spin_unlock(&root->inode_lock);
4713 goto again; 4725 return;
4714 } 4726 }
4715 } 4727 }
4716 rb_link_node(&BTRFS_I(inode)->rb_node, parent, p); 4728 rb_link_node(new, parent, p);
4717 rb_insert_color(&BTRFS_I(inode)->rb_node, &root->inode_tree); 4729 rb_insert_color(new, &root->inode_tree);
4718 spin_unlock(&root->inode_lock); 4730 spin_unlock(&root->inode_lock);
4719} 4731}
4720 4732
@@ -4731,14 +4743,7 @@ static void inode_tree_del(struct inode *inode)
4731 } 4743 }
4732 spin_unlock(&root->inode_lock); 4744 spin_unlock(&root->inode_lock);
4733 4745
4734 /* 4746 if (empty && btrfs_root_refs(&root->root_item) == 0) {
4735 * Free space cache has inodes in the tree root, but the tree root has a
4736 * root_refs of 0, so this could end up dropping the tree root as a
4737 * snapshot, so we need the extra !root->fs_info->tree_root check to
4738 * make sure we don't drop it.
4739 */
4740 if (empty && btrfs_root_refs(&root->root_item) == 0 &&
4741 root != root->fs_info->tree_root) {
4742 synchronize_srcu(&root->fs_info->subvol_srcu); 4747 synchronize_srcu(&root->fs_info->subvol_srcu);
4743 spin_lock(&root->inode_lock); 4748 spin_lock(&root->inode_lock);
4744 empty = RB_EMPTY_ROOT(&root->inode_tree); 4749 empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -4831,10 +4836,12 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
4831{ 4836{
4832 struct inode *inode; 4837 struct inode *inode;
4833 struct btrfs_iget_args args; 4838 struct btrfs_iget_args args;
4839 unsigned long hashval = btrfs_inode_hash(objectid, root);
4840
4834 args.ino = objectid; 4841 args.ino = objectid;
4835 args.root = root; 4842 args.root = root;
4836 4843
4837 inode = iget5_locked(s, objectid, btrfs_find_actor, 4844 inode = iget5_locked(s, hashval, btrfs_find_actor,
4838 btrfs_init_locked_inode, 4845 btrfs_init_locked_inode,
4839 (void *)&args); 4846 (void *)&args);
4840 return inode; 4847 return inode;
@@ -5048,7 +5055,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5048 continue; 5055 continue;
5049 } 5056 }
5050 5057
5051 item = btrfs_item_nr(leaf, slot); 5058 item = btrfs_item_nr(slot);
5052 btrfs_item_key_to_cpu(leaf, &found_key, slot); 5059 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5053 5060
5054 if (found_key.objectid != key.objectid) 5061 if (found_key.objectid != key.objectid)
@@ -5454,7 +5461,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5454 BTRFS_INODE_NODATASUM; 5461 BTRFS_INODE_NODATASUM;
5455 } 5462 }
5456 5463
5457 insert_inode_hash(inode); 5464 btrfs_insert_inode_hash(inode);
5458 inode_tree_add(inode); 5465 inode_tree_add(inode);
5459 5466
5460 trace_btrfs_inode_new(inode); 5467 trace_btrfs_inode_new(inode);
@@ -5730,7 +5737,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
5730 goto fail; 5737 goto fail;
5731 } 5738 }
5732 5739
5733 btrfs_inc_nlink(inode); 5740 inc_nlink(inode);
5734 inode_inc_iversion(inode); 5741 inode_inc_iversion(inode);
5735 inode->i_ctime = CURRENT_TIME; 5742 inode->i_ctime = CURRENT_TIME;
5736 ihold(inode); 5743 ihold(inode);
@@ -5860,7 +5867,7 @@ static noinline int uncompress_inline(struct btrfs_path *path,
5860 compress_type = btrfs_file_extent_compression(leaf, item); 5867 compress_type = btrfs_file_extent_compression(leaf, item);
5861 max_size = btrfs_file_extent_ram_bytes(leaf, item); 5868 max_size = btrfs_file_extent_ram_bytes(leaf, item);
5862 inline_size = btrfs_file_extent_inline_item_len(leaf, 5869 inline_size = btrfs_file_extent_inline_item_len(leaf,
5863 btrfs_item_nr(leaf, path->slots[0])); 5870 btrfs_item_nr(path->slots[0]));
5864 tmp = kmalloc(inline_size, GFP_NOFS); 5871 tmp = kmalloc(inline_size, GFP_NOFS);
5865 if (!tmp) 5872 if (!tmp)
5866 return -ENOMEM; 5873 return -ENOMEM;
@@ -5974,7 +5981,14 @@ again:
5974 found_type = btrfs_key_type(&found_key); 5981 found_type = btrfs_key_type(&found_key);
5975 if (found_key.objectid != objectid || 5982 if (found_key.objectid != objectid ||
5976 found_type != BTRFS_EXTENT_DATA_KEY) { 5983 found_type != BTRFS_EXTENT_DATA_KEY) {
5977 goto not_found; 5984 /*
5985 * If we backup past the first extent we want to move forward
5986 * and see if there is an extent in front of us, otherwise we'll
5987 * say there is a hole for our whole search range which can
5988 * cause problems.
5989 */
5990 extent_end = start;
5991 goto next;
5978 } 5992 }
5979 5993
5980 found_type = btrfs_file_extent_type(leaf, item); 5994 found_type = btrfs_file_extent_type(leaf, item);
@@ -5989,7 +6003,7 @@ again:
5989 size = btrfs_file_extent_inline_len(leaf, item); 6003 size = btrfs_file_extent_inline_len(leaf, item);
5990 extent_end = ALIGN(extent_start + size, root->sectorsize); 6004 extent_end = ALIGN(extent_start + size, root->sectorsize);
5991 } 6005 }
5992 6006next:
5993 if (start >= extent_end) { 6007 if (start >= extent_end) {
5994 path->slots[0]++; 6008 path->slots[0]++;
5995 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 6009 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@@ -6173,8 +6187,7 @@ insert:
6173 write_unlock(&em_tree->lock); 6187 write_unlock(&em_tree->lock);
6174out: 6188out:
6175 6189
6176 if (em) 6190 trace_btrfs_get_extent(root, em);
6177 trace_btrfs_get_extent(root, em);
6178 6191
6179 if (path) 6192 if (path)
6180 btrfs_free_path(path); 6193 btrfs_free_path(path);
@@ -6249,7 +6262,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
6249 /* adjust the range_start to make sure it doesn't 6262 /* adjust the range_start to make sure it doesn't
6250 * go backwards from the start they passed in 6263 * go backwards from the start they passed in
6251 */ 6264 */
6252 range_start = max(start,range_start); 6265 range_start = max(start, range_start);
6253 found = found_end - range_start; 6266 found = found_end - range_start;
6254 6267
6255 if (found > 0) { 6268 if (found > 0) {
@@ -6437,6 +6450,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
6437 6450
6438 if (btrfs_extent_readonly(root, disk_bytenr)) 6451 if (btrfs_extent_readonly(root, disk_bytenr))
6439 goto out; 6452 goto out;
6453 btrfs_release_path(path);
6440 6454
6441 /* 6455 /*
6442 * look for other files referencing this extent, if we 6456 * look for other files referencing this extent, if we
@@ -7052,7 +7066,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7052 } 7066 }
7053 } else { 7067 } else {
7054 submit_len += bvec->bv_len; 7068 submit_len += bvec->bv_len;
7055 nr_pages ++; 7069 nr_pages++;
7056 bvec++; 7070 bvec++;
7057 } 7071 }
7058 } 7072 }
@@ -7221,7 +7235,9 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
7221 * outstanding dirty pages are on disk. 7235 * outstanding dirty pages are on disk.
7222 */ 7236 */
7223 count = iov_length(iov, nr_segs); 7237 count = iov_length(iov, nr_segs);
7224 btrfs_wait_ordered_range(inode, offset, count); 7238 ret = btrfs_wait_ordered_range(inode, offset, count);
7239 if (ret)
7240 return ret;
7225 7241
7226 if (rw & WRITE) { 7242 if (rw & WRITE) {
7227 /* 7243 /*
@@ -7562,7 +7578,10 @@ static int btrfs_truncate(struct inode *inode)
7562 u64 mask = root->sectorsize - 1; 7578 u64 mask = root->sectorsize - 1;
7563 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); 7579 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
7564 7580
7565 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 7581 ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
7582 (u64)-1);
7583 if (ret)
7584 return ret;
7566 7585
7567 /* 7586 /*
7568 * Yes ladies and gentelment, this is indeed ugly. The fact is we have 7587 * Yes ladies and gentelment, this is indeed ugly. The fact is we have
@@ -7786,6 +7805,14 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
7786 return inode; 7805 return inode;
7787} 7806}
7788 7807
7808#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
7809void btrfs_test_destroy_inode(struct inode *inode)
7810{
7811 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
7812 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
7813}
7814#endif
7815
7789static void btrfs_i_callback(struct rcu_head *head) 7816static void btrfs_i_callback(struct rcu_head *head)
7790{ 7817{
7791 struct inode *inode = container_of(head, struct inode, i_rcu); 7818 struct inode *inode = container_of(head, struct inode, i_rcu);
@@ -7856,8 +7883,7 @@ int btrfs_drop_inode(struct inode *inode)
7856 return 1; 7883 return 1;
7857 7884
7858 /* the snap/subvol tree is on deleting */ 7885 /* the snap/subvol tree is on deleting */
7859 if (btrfs_root_refs(&root->root_item) == 0 && 7886 if (btrfs_root_refs(&root->root_item) == 0)
7860 root != root->fs_info->tree_root)
7861 return 1; 7887 return 1;
7862 else 7888 else
7863 return generic_drop_inode(inode); 7889 return generic_drop_inode(inode);
@@ -7986,7 +8012,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7986 8012
7987 8013
7988 /* check for collisions, even if the name isn't there */ 8014 /* check for collisions, even if the name isn't there */
7989 ret = btrfs_check_dir_item_collision(root, new_dir->i_ino, 8015 ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
7990 new_dentry->d_name.name, 8016 new_dentry->d_name.name,
7991 new_dentry->d_name.len); 8017 new_dentry->d_name.len);
7992 8018
@@ -7994,8 +8020,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7994 if (ret == -EEXIST) { 8020 if (ret == -EEXIST) {
7995 /* we shouldn't get 8021 /* we shouldn't get
7996 * eexist without a new_inode */ 8022 * eexist without a new_inode */
7997 if (!new_inode) { 8023 if (WARN_ON(!new_inode)) {
7998 WARN_ON(1);
7999 return ret; 8024 return ret;
8000 } 8025 }
8001 } else { 8026 } else {
@@ -8143,18 +8168,24 @@ out_notrans:
8143static void btrfs_run_delalloc_work(struct btrfs_work *work) 8168static void btrfs_run_delalloc_work(struct btrfs_work *work)
8144{ 8169{
8145 struct btrfs_delalloc_work *delalloc_work; 8170 struct btrfs_delalloc_work *delalloc_work;
8171 struct inode *inode;
8146 8172
8147 delalloc_work = container_of(work, struct btrfs_delalloc_work, 8173 delalloc_work = container_of(work, struct btrfs_delalloc_work,
8148 work); 8174 work);
8149 if (delalloc_work->wait) 8175 inode = delalloc_work->inode;
8150 btrfs_wait_ordered_range(delalloc_work->inode, 0, (u64)-1); 8176 if (delalloc_work->wait) {
8151 else 8177 btrfs_wait_ordered_range(inode, 0, (u64)-1);
8152 filemap_flush(delalloc_work->inode->i_mapping); 8178 } else {
8179 filemap_flush(inode->i_mapping);
8180 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
8181 &BTRFS_I(inode)->runtime_flags))
8182 filemap_flush(inode->i_mapping);
8183 }
8153 8184
8154 if (delalloc_work->delay_iput) 8185 if (delalloc_work->delay_iput)
8155 btrfs_add_delayed_iput(delalloc_work->inode); 8186 btrfs_add_delayed_iput(inode);
8156 else 8187 else
8157 iput(delalloc_work->inode); 8188 iput(inode);
8158 complete(&delalloc_work->completion); 8189 complete(&delalloc_work->completion);
8159} 8190}
8160 8191
@@ -8216,6 +8247,10 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8216 8247
8217 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); 8248 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
8218 if (unlikely(!work)) { 8249 if (unlikely(!work)) {
8250 if (delay_iput)
8251 btrfs_add_delayed_iput(inode);
8252 else
8253 iput(inode);
8219 ret = -ENOMEM; 8254 ret = -ENOMEM;
8220 goto out; 8255 goto out;
8221 } 8256 }
@@ -8271,8 +8306,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8271 return ret; 8306 return ret;
8272} 8307}
8273 8308
8274int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info, 8309int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput)
8275 int delay_iput)
8276{ 8310{
8277 struct btrfs_root *root; 8311 struct btrfs_root *root;
8278 struct list_head splice; 8312 struct list_head splice;
@@ -8332,14 +8366,14 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
8332 int err; 8366 int err;
8333 int drop_inode = 0; 8367 int drop_inode = 0;
8334 u64 objectid; 8368 u64 objectid;
8335 u64 index = 0 ; 8369 u64 index = 0;
8336 int name_len; 8370 int name_len;
8337 int datasize; 8371 int datasize;
8338 unsigned long ptr; 8372 unsigned long ptr;
8339 struct btrfs_file_extent_item *ei; 8373 struct btrfs_file_extent_item *ei;
8340 struct extent_buffer *leaf; 8374 struct extent_buffer *leaf;
8341 8375
8342 name_len = strlen(symname) + 1; 8376 name_len = strlen(symname);
8343 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) 8377 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
8344 return -ENAMETOOLONG; 8378 return -ENAMETOOLONG;
8345 8379
@@ -8427,7 +8461,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
8427 inode->i_mapping->a_ops = &btrfs_symlink_aops; 8461 inode->i_mapping->a_ops = &btrfs_symlink_aops;
8428 inode->i_mapping->backing_dev_info = &root->fs_info->bdi; 8462 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
8429 inode_set_bytes(inode, name_len); 8463 inode_set_bytes(inode, name_len);
8430 btrfs_i_size_write(inode, name_len - 1); 8464 btrfs_i_size_write(inode, name_len);
8431 err = btrfs_update_inode(trans, root, inode); 8465 err = btrfs_update_inode(trans, root, inode);
8432 if (err) 8466 if (err)
8433 drop_inode = 1; 8467 drop_inode = 1;
@@ -8486,6 +8520,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
8486 ins.offset, 0, 0, 0, 8520 ins.offset, 0, 0, 0,
8487 BTRFS_FILE_EXTENT_PREALLOC); 8521 BTRFS_FILE_EXTENT_PREALLOC);
8488 if (ret) { 8522 if (ret) {
8523 btrfs_free_reserved_extent(root, ins.objectid,
8524 ins.offset);
8489 btrfs_abort_transaction(trans, root, ret); 8525 btrfs_abort_transaction(trans, root, ret);
8490 if (own_trans) 8526 if (own_trans)
8491 btrfs_end_transaction(trans, root); 8527 btrfs_end_transaction(trans, root);
@@ -8613,11 +8649,13 @@ static const struct inode_operations btrfs_dir_inode_operations = {
8613 .removexattr = btrfs_removexattr, 8649 .removexattr = btrfs_removexattr,
8614 .permission = btrfs_permission, 8650 .permission = btrfs_permission,
8615 .get_acl = btrfs_get_acl, 8651 .get_acl = btrfs_get_acl,
8652 .update_time = btrfs_update_time,
8616}; 8653};
8617static const struct inode_operations btrfs_dir_ro_inode_operations = { 8654static const struct inode_operations btrfs_dir_ro_inode_operations = {
8618 .lookup = btrfs_lookup, 8655 .lookup = btrfs_lookup,
8619 .permission = btrfs_permission, 8656 .permission = btrfs_permission,
8620 .get_acl = btrfs_get_acl, 8657 .get_acl = btrfs_get_acl,
8658 .update_time = btrfs_update_time,
8621}; 8659};
8622 8660
8623static const struct file_operations btrfs_dir_file_operations = { 8661static const struct file_operations btrfs_dir_file_operations = {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1a5b9462dd9a..a111622598b0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -44,7 +44,6 @@
44#include <linux/uuid.h> 44#include <linux/uuid.h>
45#include <linux/btrfs.h> 45#include <linux/btrfs.h>
46#include <linux/uaccess.h> 46#include <linux/uaccess.h>
47#include "compat.h"
48#include "ctree.h" 47#include "ctree.h"
49#include "disk-io.h" 48#include "disk-io.h"
50#include "transaction.h" 49#include "transaction.h"
@@ -321,7 +320,7 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
321 320
322static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) 321static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
323{ 322{
324 struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb); 323 struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb);
325 struct btrfs_device *device; 324 struct btrfs_device *device;
326 struct request_queue *q; 325 struct request_queue *q;
327 struct fstrim_range range; 326 struct fstrim_range range;
@@ -369,9 +368,13 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
369 368
370int btrfs_is_empty_uuid(u8 *uuid) 369int btrfs_is_empty_uuid(u8 *uuid)
371{ 370{
372 static char empty_uuid[BTRFS_UUID_SIZE] = {0}; 371 int i;
373 372
374 return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE); 373 for (i = 0; i < BTRFS_UUID_SIZE; i++) {
374 if (uuid[i])
375 return 0;
376 }
377 return 1;
375} 378}
376 379
377static noinline int create_subvol(struct inode *dir, 380static noinline int create_subvol(struct inode *dir,
@@ -436,7 +439,7 @@ static noinline int create_subvol(struct inode *dir,
436 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 439 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
437 btrfs_set_header_owner(leaf, objectid); 440 btrfs_set_header_owner(leaf, objectid);
438 441
439 write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf), 442 write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(),
440 BTRFS_FSID_SIZE); 443 BTRFS_FSID_SIZE);
441 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, 444 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
442 btrfs_header_chunk_tree_uuid(leaf), 445 btrfs_header_chunk_tree_uuid(leaf),
@@ -574,7 +577,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
574 if (ret) 577 if (ret)
575 return ret; 578 return ret;
576 579
577 btrfs_wait_ordered_extents(root, 0); 580 btrfs_wait_ordered_extents(root, -1);
578 581
579 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 582 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
580 if (!pending_snapshot) 583 if (!pending_snapshot)
@@ -688,7 +691,7 @@ static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
688 * nfs_async_unlink(). 691 * nfs_async_unlink().
689 */ 692 */
690 693
691static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir) 694static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
692{ 695{
693 int error; 696 int error;
694 697
@@ -842,7 +845,6 @@ static int find_new_extents(struct btrfs_root *root,
842{ 845{
843 struct btrfs_path *path; 846 struct btrfs_path *path;
844 struct btrfs_key min_key; 847 struct btrfs_key min_key;
845 struct btrfs_key max_key;
846 struct extent_buffer *leaf; 848 struct extent_buffer *leaf;
847 struct btrfs_file_extent_item *extent; 849 struct btrfs_file_extent_item *extent;
848 int type; 850 int type;
@@ -857,15 +859,10 @@ static int find_new_extents(struct btrfs_root *root,
857 min_key.type = BTRFS_EXTENT_DATA_KEY; 859 min_key.type = BTRFS_EXTENT_DATA_KEY;
858 min_key.offset = *off; 860 min_key.offset = *off;
859 861
860 max_key.objectid = ino;
861 max_key.type = (u8)-1;
862 max_key.offset = (u64)-1;
863
864 path->keep_locks = 1; 862 path->keep_locks = 1;
865 863
866 while(1) { 864 while (1) {
867 ret = btrfs_search_forward(root, &min_key, &max_key, 865 ret = btrfs_search_forward(root, &min_key, path, newer_than);
868 path, newer_than);
869 if (ret != 0) 866 if (ret != 0)
870 goto none; 867 goto none;
871 if (min_key.objectid != ino) 868 if (min_key.objectid != ino)
@@ -1206,7 +1203,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1206 ra = &file->f_ra; 1203 ra = &file->f_ra;
1207 } 1204 }
1208 1205
1209 pages = kmalloc(sizeof(struct page *) * max_cluster, 1206 pages = kmalloc_array(max_cluster, sizeof(struct page *),
1210 GFP_NOFS); 1207 GFP_NOFS);
1211 if (!pages) { 1208 if (!pages) {
1212 ret = -ENOMEM; 1209 ret = -ENOMEM;
@@ -1893,7 +1890,6 @@ static noinline int search_ioctl(struct inode *inode,
1893{ 1890{
1894 struct btrfs_root *root; 1891 struct btrfs_root *root;
1895 struct btrfs_key key; 1892 struct btrfs_key key;
1896 struct btrfs_key max_key;
1897 struct btrfs_path *path; 1893 struct btrfs_path *path;
1898 struct btrfs_ioctl_search_key *sk = &args->key; 1894 struct btrfs_ioctl_search_key *sk = &args->key;
1899 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; 1895 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
@@ -1925,15 +1921,10 @@ static noinline int search_ioctl(struct inode *inode,
1925 key.type = sk->min_type; 1921 key.type = sk->min_type;
1926 key.offset = sk->min_offset; 1922 key.offset = sk->min_offset;
1927 1923
1928 max_key.objectid = sk->max_objectid;
1929 max_key.type = sk->max_type;
1930 max_key.offset = sk->max_offset;
1931
1932 path->keep_locks = 1; 1924 path->keep_locks = 1;
1933 1925
1934 while(1) { 1926 while (1) {
1935 ret = btrfs_search_forward(root, &key, &max_key, path, 1927 ret = btrfs_search_forward(root, &key, path, sk->min_transid);
1936 sk->min_transid);
1937 if (ret != 0) { 1928 if (ret != 0) {
1938 if (ret > 0) 1929 if (ret > 0)
1939 ret = 0; 1930 ret = 0;
@@ -2018,7 +2009,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
2018 key.type = BTRFS_INODE_REF_KEY; 2009 key.type = BTRFS_INODE_REF_KEY;
2019 key.offset = (u64)-1; 2010 key.offset = (u64)-1;
2020 2011
2021 while(1) { 2012 while (1) {
2022 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2013 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2023 if (ret < 0) 2014 if (ret < 0)
2024 goto out; 2015 goto out;
@@ -2047,7 +2038,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
2047 } 2038 }
2048 2039
2049 *(ptr + len) = '/'; 2040 *(ptr + len) = '/';
2050 read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); 2041 read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len);
2051 2042
2052 if (key.offset == BTRFS_FIRST_FREE_OBJECTID) 2043 if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
2053 break; 2044 break;
@@ -2058,7 +2049,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
2058 dirid = key.objectid; 2049 dirid = key.objectid;
2059 } 2050 }
2060 memmove(name, ptr, total_len); 2051 memmove(name, ptr, total_len);
2061 name[total_len]='\0'; 2052 name[total_len] = '\0';
2062 ret = 0; 2053 ret = 0;
2063out: 2054out:
2064 btrfs_free_path(path); 2055 btrfs_free_path(path);
@@ -2098,7 +2089,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
2098static noinline int btrfs_ioctl_snap_destroy(struct file *file, 2089static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2099 void __user *arg) 2090 void __user *arg)
2100{ 2091{
2101 struct dentry *parent = fdentry(file); 2092 struct dentry *parent = file->f_path.dentry;
2102 struct dentry *dentry; 2093 struct dentry *dentry;
2103 struct inode *dir = parent->d_inode; 2094 struct inode *dir = parent->d_inode;
2104 struct inode *inode; 2095 struct inode *inode;
@@ -2144,7 +2135,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2144 2135
2145 inode = dentry->d_inode; 2136 inode = dentry->d_inode;
2146 dest = BTRFS_I(inode)->root; 2137 dest = BTRFS_I(inode)->root;
2147 if (!capable(CAP_SYS_ADMIN)){ 2138 if (!capable(CAP_SYS_ADMIN)) {
2148 /* 2139 /*
2149 * Regular user. Only allow this with a special mount 2140 * Regular user. Only allow this with a special mount
2150 * option, when the user has write+exec access to the 2141 * option, when the user has write+exec access to the
@@ -2696,9 +2687,9 @@ out_unlock:
2696static long btrfs_ioctl_file_extent_same(struct file *file, 2687static long btrfs_ioctl_file_extent_same(struct file *file,
2697 void __user *argp) 2688 void __user *argp)
2698{ 2689{
2699 struct btrfs_ioctl_same_args *args = argp; 2690 struct btrfs_ioctl_same_args tmp;
2700 struct btrfs_ioctl_same_args same; 2691 struct btrfs_ioctl_same_args *same;
2701 struct btrfs_ioctl_same_extent_info info; 2692 struct btrfs_ioctl_same_extent_info *info;
2702 struct inode *src = file->f_dentry->d_inode; 2693 struct inode *src = file->f_dentry->d_inode;
2703 struct file *dst_file = NULL; 2694 struct file *dst_file = NULL;
2704 struct inode *dst; 2695 struct inode *dst;
@@ -2706,6 +2697,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2706 u64 len; 2697 u64 len;
2707 int i; 2698 int i;
2708 int ret; 2699 int ret;
2700 unsigned long size;
2709 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 2701 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
2710 bool is_admin = capable(CAP_SYS_ADMIN); 2702 bool is_admin = capable(CAP_SYS_ADMIN);
2711 2703
@@ -2716,15 +2708,25 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2716 if (ret) 2708 if (ret)
2717 return ret; 2709 return ret;
2718 2710
2719 if (copy_from_user(&same, 2711 if (copy_from_user(&tmp,
2720 (struct btrfs_ioctl_same_args __user *)argp, 2712 (struct btrfs_ioctl_same_args __user *)argp,
2721 sizeof(same))) { 2713 sizeof(tmp))) {
2722 ret = -EFAULT; 2714 ret = -EFAULT;
2723 goto out; 2715 goto out;
2724 } 2716 }
2725 2717
2726 off = same.logical_offset; 2718 size = sizeof(tmp) +
2727 len = same.length; 2719 tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info);
2720
2721 same = memdup_user((struct btrfs_ioctl_same_args __user *)argp, size);
2722
2723 if (IS_ERR(same)) {
2724 ret = PTR_ERR(same);
2725 goto out;
2726 }
2727
2728 off = same->logical_offset;
2729 len = same->length;
2728 2730
2729 /* 2731 /*
2730 * Limit the total length we will dedupe for each operation. 2732 * Limit the total length we will dedupe for each operation.
@@ -2752,27 +2754,28 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2752 if (!S_ISREG(src->i_mode)) 2754 if (!S_ISREG(src->i_mode))
2753 goto out; 2755 goto out;
2754 2756
2755 ret = 0; 2757 /* pre-format output fields to sane values */
2756 for (i = 0; i < same.dest_count; i++) { 2758 for (i = 0; i < same->dest_count; i++) {
2757 if (copy_from_user(&info, &args->info[i], sizeof(info))) { 2759 same->info[i].bytes_deduped = 0ULL;
2758 ret = -EFAULT; 2760 same->info[i].status = 0;
2759 goto out; 2761 }
2760 }
2761 2762
2762 info.bytes_deduped = 0; 2763 ret = 0;
2764 for (i = 0; i < same->dest_count; i++) {
2765 info = &same->info[i];
2763 2766
2764 dst_file = fget(info.fd); 2767 dst_file = fget(info->fd);
2765 if (!dst_file) { 2768 if (!dst_file) {
2766 info.status = -EBADF; 2769 info->status = -EBADF;
2767 goto next; 2770 goto next;
2768 } 2771 }
2769 2772
2770 if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { 2773 if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
2771 info.status = -EINVAL; 2774 info->status = -EINVAL;
2772 goto next; 2775 goto next;
2773 } 2776 }
2774 2777
2775 info.status = -EXDEV; 2778 info->status = -EXDEV;
2776 if (file->f_path.mnt != dst_file->f_path.mnt) 2779 if (file->f_path.mnt != dst_file->f_path.mnt)
2777 goto next; 2780 goto next;
2778 2781
@@ -2781,32 +2784,29 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2781 goto next; 2784 goto next;
2782 2785
2783 if (S_ISDIR(dst->i_mode)) { 2786 if (S_ISDIR(dst->i_mode)) {
2784 info.status = -EISDIR; 2787 info->status = -EISDIR;
2785 goto next; 2788 goto next;
2786 } 2789 }
2787 2790
2788 if (!S_ISREG(dst->i_mode)) { 2791 if (!S_ISREG(dst->i_mode)) {
2789 info.status = -EACCES; 2792 info->status = -EACCES;
2790 goto next; 2793 goto next;
2791 } 2794 }
2792 2795
2793 info.status = btrfs_extent_same(src, off, len, dst, 2796 info->status = btrfs_extent_same(src, off, len, dst,
2794 info.logical_offset); 2797 info->logical_offset);
2795 if (info.status == 0) 2798 if (info->status == 0)
2796 info.bytes_deduped += len; 2799 info->bytes_deduped += len;
2797 2800
2798next: 2801next:
2799 if (dst_file) 2802 if (dst_file)
2800 fput(dst_file); 2803 fput(dst_file);
2801
2802 if (__put_user_unaligned(info.status, &args->info[i].status) ||
2803 __put_user_unaligned(info.bytes_deduped,
2804 &args->info[i].bytes_deduped)) {
2805 ret = -EFAULT;
2806 goto out;
2807 }
2808 } 2804 }
2809 2805
2806 ret = copy_to_user(argp, same, size);
2807 if (ret)
2808 ret = -EFAULT;
2809
2810out: 2810out:
2811 mnt_drop_write_file(file); 2811 mnt_drop_write_file(file);
2812 return ret; 2812 return ret;
@@ -3105,7 +3105,7 @@ out:
3105static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 3105static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
3106 u64 off, u64 olen, u64 destoff) 3106 u64 off, u64 olen, u64 destoff)
3107{ 3107{
3108 struct inode *inode = fdentry(file)->d_inode; 3108 struct inode *inode = file_inode(file);
3109 struct btrfs_root *root = BTRFS_I(inode)->root; 3109 struct btrfs_root *root = BTRFS_I(inode)->root;
3110 struct fd src_file; 3110 struct fd src_file;
3111 struct inode *src; 3111 struct inode *src;
@@ -3310,7 +3310,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
3310 } 3310 }
3311 3311
3312 if (!objectid) 3312 if (!objectid)
3313 objectid = root->root_key.objectid; 3313 objectid = BTRFS_FS_TREE_OBJECTID;
3314 3314
3315 location.objectid = objectid; 3315 location.objectid = objectid;
3316 location.type = BTRFS_ROOT_ITEM_KEY; 3316 location.type = BTRFS_ROOT_ITEM_KEY;
@@ -3665,9 +3665,10 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
3665 3665
3666 switch (p->cmd) { 3666 switch (p->cmd) {
3667 case BTRFS_IOCTL_DEV_REPLACE_CMD_START: 3667 case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
3668 if (root->fs_info->sb->s_flags & MS_RDONLY) 3668 if (root->fs_info->sb->s_flags & MS_RDONLY) {
3669 return -EROFS; 3669 ret = -EROFS;
3670 3670 goto out;
3671 }
3671 if (atomic_xchg( 3672 if (atomic_xchg(
3672 &root->fs_info->mutually_exclusive_operation_running, 3673 &root->fs_info->mutually_exclusive_operation_running,
3673 1)) { 3674 1)) {
@@ -3693,7 +3694,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
3693 3694
3694 if (copy_to_user(arg, p, sizeof(*p))) 3695 if (copy_to_user(arg, p, sizeof(*p)))
3695 ret = -EFAULT; 3696 ret = -EFAULT;
3696 3697out:
3697 kfree(p); 3698 kfree(p);
3698 return ret; 3699 return ret;
3699} 3700}
@@ -4303,7 +4304,7 @@ static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
4303 4304
4304static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) 4305static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
4305{ 4306{
4306 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 4307 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
4307 4308
4308 if (!capable(CAP_SYS_ADMIN)) 4309 if (!capable(CAP_SYS_ADMIN))
4309 return -EPERM; 4310 return -EPERM;
@@ -4543,9 +4544,15 @@ long btrfs_ioctl(struct file *file, unsigned int
4543 return btrfs_ioctl_logical_to_ino(root, argp); 4544 return btrfs_ioctl_logical_to_ino(root, argp);
4544 case BTRFS_IOC_SPACE_INFO: 4545 case BTRFS_IOC_SPACE_INFO:
4545 return btrfs_ioctl_space_info(root, argp); 4546 return btrfs_ioctl_space_info(root, argp);
4546 case BTRFS_IOC_SYNC: 4547 case BTRFS_IOC_SYNC: {
4547 btrfs_sync_fs(file->f_dentry->d_sb, 1); 4548 int ret;
4548 return 0; 4549
4550 ret = btrfs_start_delalloc_roots(root->fs_info, 0);
4551 if (ret)
4552 return ret;
4553 ret = btrfs_sync_fs(file->f_dentry->d_sb, 1);
4554 return ret;
4555 }
4549 case BTRFS_IOC_START_SYNC: 4556 case BTRFS_IOC_START_SYNC:
4550 return btrfs_ioctl_start_sync(root, argp); 4557 return btrfs_ioctl_start_sync(root, argp);
4551 case BTRFS_IOC_WAIT_SYNC: 4558 case BTRFS_IOC_WAIT_SYNC:
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 966b413a33b8..69582d5b69d1 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -537,7 +537,9 @@ void btrfs_remove_ordered_extent(struct inode *inode,
537 */ 537 */
538 if (RB_EMPTY_ROOT(&tree->tree) && 538 if (RB_EMPTY_ROOT(&tree->tree) &&
539 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { 539 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
540 spin_lock(&root->fs_info->ordered_root_lock);
540 list_del_init(&BTRFS_I(inode)->ordered_operations); 541 list_del_init(&BTRFS_I(inode)->ordered_operations);
542 spin_unlock(&root->fs_info->ordered_root_lock);
541 } 543 }
542 544
543 if (!root->nr_ordered_extents) { 545 if (!root->nr_ordered_extents) {
@@ -563,11 +565,11 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
563 * wait for all the ordered extents in a root. This is done when balancing 565 * wait for all the ordered extents in a root. This is done when balancing
564 * space between drives. 566 * space between drives.
565 */ 567 */
566void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) 568int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
567{ 569{
568 struct list_head splice, works; 570 struct list_head splice, works;
569 struct btrfs_ordered_extent *ordered, *next; 571 struct btrfs_ordered_extent *ordered, *next;
570 struct inode *inode; 572 int count = 0;
571 573
572 INIT_LIST_HEAD(&splice); 574 INIT_LIST_HEAD(&splice);
573 INIT_LIST_HEAD(&works); 575 INIT_LIST_HEAD(&works);
@@ -575,20 +577,11 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
575 mutex_lock(&root->fs_info->ordered_operations_mutex); 577 mutex_lock(&root->fs_info->ordered_operations_mutex);
576 spin_lock(&root->ordered_extent_lock); 578 spin_lock(&root->ordered_extent_lock);
577 list_splice_init(&root->ordered_extents, &splice); 579 list_splice_init(&root->ordered_extents, &splice);
578 while (!list_empty(&splice)) { 580 while (!list_empty(&splice) && nr) {
579 ordered = list_first_entry(&splice, struct btrfs_ordered_extent, 581 ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
580 root_extent_list); 582 root_extent_list);
581 list_move_tail(&ordered->root_extent_list, 583 list_move_tail(&ordered->root_extent_list,
582 &root->ordered_extents); 584 &root->ordered_extents);
583 /*
584 * the inode may be getting freed (in sys_unlink path).
585 */
586 inode = igrab(ordered->inode);
587 if (!inode) {
588 cond_resched_lock(&root->ordered_extent_lock);
589 continue;
590 }
591
592 atomic_inc(&ordered->refs); 585 atomic_inc(&ordered->refs);
593 spin_unlock(&root->ordered_extent_lock); 586 spin_unlock(&root->ordered_extent_lock);
594 587
@@ -599,36 +592,35 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
599 592
600 cond_resched(); 593 cond_resched();
601 spin_lock(&root->ordered_extent_lock); 594 spin_lock(&root->ordered_extent_lock);
595 if (nr != -1)
596 nr--;
597 count++;
602 } 598 }
599 list_splice_tail(&splice, &root->ordered_extents);
603 spin_unlock(&root->ordered_extent_lock); 600 spin_unlock(&root->ordered_extent_lock);
604 601
605 list_for_each_entry_safe(ordered, next, &works, work_list) { 602 list_for_each_entry_safe(ordered, next, &works, work_list) {
606 list_del_init(&ordered->work_list); 603 list_del_init(&ordered->work_list);
607 wait_for_completion(&ordered->completion); 604 wait_for_completion(&ordered->completion);
608
609 inode = ordered->inode;
610 btrfs_put_ordered_extent(ordered); 605 btrfs_put_ordered_extent(ordered);
611 if (delay_iput)
612 btrfs_add_delayed_iput(inode);
613 else
614 iput(inode);
615
616 cond_resched(); 606 cond_resched();
617 } 607 }
618 mutex_unlock(&root->fs_info->ordered_operations_mutex); 608 mutex_unlock(&root->fs_info->ordered_operations_mutex);
609
610 return count;
619} 611}
620 612
621void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info, 613void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
622 int delay_iput)
623{ 614{
624 struct btrfs_root *root; 615 struct btrfs_root *root;
625 struct list_head splice; 616 struct list_head splice;
617 int done;
626 618
627 INIT_LIST_HEAD(&splice); 619 INIT_LIST_HEAD(&splice);
628 620
629 spin_lock(&fs_info->ordered_root_lock); 621 spin_lock(&fs_info->ordered_root_lock);
630 list_splice_init(&fs_info->ordered_roots, &splice); 622 list_splice_init(&fs_info->ordered_roots, &splice);
631 while (!list_empty(&splice)) { 623 while (!list_empty(&splice) && nr) {
632 root = list_first_entry(&splice, struct btrfs_root, 624 root = list_first_entry(&splice, struct btrfs_root,
633 ordered_root); 625 ordered_root);
634 root = btrfs_grab_fs_root(root); 626 root = btrfs_grab_fs_root(root);
@@ -637,11 +629,16 @@ void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
637 &fs_info->ordered_roots); 629 &fs_info->ordered_roots);
638 spin_unlock(&fs_info->ordered_root_lock); 630 spin_unlock(&fs_info->ordered_root_lock);
639 631
640 btrfs_wait_ordered_extents(root, delay_iput); 632 done = btrfs_wait_ordered_extents(root, nr);
641 btrfs_put_fs_root(root); 633 btrfs_put_fs_root(root);
642 634
643 spin_lock(&fs_info->ordered_root_lock); 635 spin_lock(&fs_info->ordered_root_lock);
636 if (nr != -1) {
637 nr -= done;
638 WARN_ON(nr < 0);
639 }
644 } 640 }
641 list_splice_tail(&splice, &fs_info->ordered_roots);
645 spin_unlock(&fs_info->ordered_root_lock); 642 spin_unlock(&fs_info->ordered_root_lock);
646} 643}
647 644
@@ -752,8 +749,9 @@ void btrfs_start_ordered_extent(struct inode *inode,
752/* 749/*
753 * Used to wait on ordered extents across a large range of bytes. 750 * Used to wait on ordered extents across a large range of bytes.
754 */ 751 */
755void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) 752int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
756{ 753{
754 int ret = 0;
757 u64 end; 755 u64 end;
758 u64 orig_end; 756 u64 orig_end;
759 struct btrfs_ordered_extent *ordered; 757 struct btrfs_ordered_extent *ordered;
@@ -769,8 +767,9 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
769 /* start IO across the range first to instantiate any delalloc 767 /* start IO across the range first to instantiate any delalloc
770 * extents 768 * extents
771 */ 769 */
772 filemap_fdatawrite_range(inode->i_mapping, start, orig_end); 770 ret = filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
773 771 if (ret)
772 return ret;
774 /* 773 /*
775 * So with compression we will find and lock a dirty page and clear the 774 * So with compression we will find and lock a dirty page and clear the
776 * first one as dirty, setup an async extent, and immediately return 775 * first one as dirty, setup an async extent, and immediately return
@@ -786,10 +785,15 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
786 * right and you are wrong. 785 * right and you are wrong.
787 */ 786 */
788 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, 787 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
789 &BTRFS_I(inode)->runtime_flags)) 788 &BTRFS_I(inode)->runtime_flags)) {
790 filemap_fdatawrite_range(inode->i_mapping, start, orig_end); 789 ret = filemap_fdatawrite_range(inode->i_mapping, start,
791 790 orig_end);
792 filemap_fdatawait_range(inode->i_mapping, start, orig_end); 791 if (ret)
792 return ret;
793 }
794 ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
795 if (ret)
796 return ret;
793 797
794 end = orig_end; 798 end = orig_end;
795 while (1) { 799 while (1) {
@@ -800,17 +804,20 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
800 btrfs_put_ordered_extent(ordered); 804 btrfs_put_ordered_extent(ordered);
801 break; 805 break;
802 } 806 }
803 if (ordered->file_offset + ordered->len < start) { 807 if (ordered->file_offset + ordered->len <= start) {
804 btrfs_put_ordered_extent(ordered); 808 btrfs_put_ordered_extent(ordered);
805 break; 809 break;
806 } 810 }
807 btrfs_start_ordered_extent(inode, ordered, 1); 811 btrfs_start_ordered_extent(inode, ordered, 1);
808 end = ordered->file_offset; 812 end = ordered->file_offset;
813 if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
814 ret = -EIO;
809 btrfs_put_ordered_extent(ordered); 815 btrfs_put_ordered_extent(ordered);
810 if (end == 0 || end == start) 816 if (ret || end == 0 || end == start)
811 break; 817 break;
812 end--; 818 end--;
813 } 819 }
820 return ret;
814} 821}
815 822
816/* 823/*
@@ -1094,7 +1101,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
1094 * if this file hasn't been changed since the last transaction 1101 * if this file hasn't been changed since the last transaction
1095 * commit, we can safely return without doing anything 1102 * commit, we can safely return without doing anything
1096 */ 1103 */
1097 if (last_mod < root->fs_info->last_trans_committed) 1104 if (last_mod <= root->fs_info->last_trans_committed)
1098 return; 1105 return;
1099 1106
1100 spin_lock(&root->fs_info->ordered_root_lock); 1107 spin_lock(&root->fs_info->ordered_root_lock);
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index d9a5aa097b4f..9b0450f7ac20 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -180,7 +180,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
180 u64 file_offset); 180 u64 file_offset);
181void btrfs_start_ordered_extent(struct inode *inode, 181void btrfs_start_ordered_extent(struct inode *inode,
182 struct btrfs_ordered_extent *entry, int wait); 182 struct btrfs_ordered_extent *entry, int wait);
183void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); 183int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
184struct btrfs_ordered_extent * 184struct btrfs_ordered_extent *
185btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); 185btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
186struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, 186struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
@@ -195,9 +195,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
195void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 195void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
196 struct btrfs_root *root, 196 struct btrfs_root *root,
197 struct inode *inode); 197 struct inode *inode);
198void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput); 198int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
199void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info, 199void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
200 int delay_iput);
201void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); 200void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
202void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); 201void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
203void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); 202void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 0088bedc8631..417053b17181 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -193,7 +193,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
193 btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d", 193 btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d",
194 btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l)); 194 btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l));
195 for (i = 0 ; i < nr ; i++) { 195 for (i = 0 ; i < nr ; i++) {
196 item = btrfs_item_nr(l, i); 196 item = btrfs_item_nr(i);
197 btrfs_item_key_to_cpu(l, &key, i); 197 btrfs_item_key_to_cpu(l, &key, i);
198 type = btrfs_key_type(&key); 198 type = btrfs_key_type(&key);
199 printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d " 199 printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d "
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d0ecfbd9cc9f..24ac21840a9a 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -33,7 +33,6 @@
33#include <linux/raid/xor.h> 33#include <linux/raid/xor.h>
34#include <linux/vmalloc.h> 34#include <linux/vmalloc.h>
35#include <asm/div64.h> 35#include <asm/div64.h>
36#include "compat.h"
37#include "ctree.h" 36#include "ctree.h"
38#include "extent_map.h" 37#include "extent_map.h"
39#include "disk-io.h" 38#include "disk-io.h"
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index aacc2121e87c..ce459a7cb16d 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -588,7 +588,7 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
588 else 588 else
589 key.offset = (u64)-1; 589 key.offset = (u64)-1;
590 590
591 return btrfs_read_fs_root_no_name(fs_info, &key); 591 return btrfs_get_fs_root(fs_info, &key, false);
592} 592}
593 593
594#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 594#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
@@ -1383,6 +1383,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
1383{ 1383{
1384 struct btrfs_root *reloc_root; 1384 struct btrfs_root *reloc_root;
1385 struct reloc_control *rc = root->fs_info->reloc_ctl; 1385 struct reloc_control *rc = root->fs_info->reloc_ctl;
1386 struct btrfs_block_rsv *rsv;
1386 int clear_rsv = 0; 1387 int clear_rsv = 0;
1387 int ret; 1388 int ret;
1388 1389
@@ -1396,13 +1397,14 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
1396 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1397 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1397 return 0; 1398 return 0;
1398 1399
1399 if (!trans->block_rsv) { 1400 if (!trans->reloc_reserved) {
1401 rsv = trans->block_rsv;
1400 trans->block_rsv = rc->block_rsv; 1402 trans->block_rsv = rc->block_rsv;
1401 clear_rsv = 1; 1403 clear_rsv = 1;
1402 } 1404 }
1403 reloc_root = create_reloc_root(trans, root, root->root_key.objectid); 1405 reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
1404 if (clear_rsv) 1406 if (clear_rsv)
1405 trans->block_rsv = NULL; 1407 trans->block_rsv = rsv;
1406 1408
1407 ret = __add_reloc_root(reloc_root); 1409 ret = __add_reloc_root(reloc_root);
1408 BUG_ON(ret < 0); 1410 BUG_ON(ret < 0);
@@ -1548,7 +1550,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1548 btrfs_file_extent_other_encoding(leaf, fi)); 1550 btrfs_file_extent_other_encoding(leaf, fi));
1549 1551
1550 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1552 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1551 ret = 1; 1553 ret = -EINVAL;
1552 goto out; 1554 goto out;
1553 } 1555 }
1554 1556
@@ -1579,7 +1581,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1579 u64 end; 1581 u64 end;
1580 u32 nritems; 1582 u32 nritems;
1581 u32 i; 1583 u32 i;
1582 int ret; 1584 int ret = 0;
1583 int first = 1; 1585 int first = 1;
1584 int dirty = 0; 1586 int dirty = 0;
1585 1587
@@ -1642,11 +1644,13 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1642 1644
1643 ret = get_new_location(rc->data_inode, &new_bytenr, 1645 ret = get_new_location(rc->data_inode, &new_bytenr,
1644 bytenr, num_bytes); 1646 bytenr, num_bytes);
1645 if (ret > 0) { 1647 if (ret) {
1646 WARN_ON(1); 1648 /*
1647 continue; 1649 * Don't have to abort since we've not changed anything
1650 * in the file extent yet.
1651 */
1652 break;
1648 } 1653 }
1649 BUG_ON(ret < 0);
1650 1654
1651 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); 1655 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1652 dirty = 1; 1656 dirty = 1;
@@ -1656,18 +1660,24 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1656 num_bytes, parent, 1660 num_bytes, parent,
1657 btrfs_header_owner(leaf), 1661 btrfs_header_owner(leaf),
1658 key.objectid, key.offset, 1); 1662 key.objectid, key.offset, 1);
1659 BUG_ON(ret); 1663 if (ret) {
1664 btrfs_abort_transaction(trans, root, ret);
1665 break;
1666 }
1660 1667
1661 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1668 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1662 parent, btrfs_header_owner(leaf), 1669 parent, btrfs_header_owner(leaf),
1663 key.objectid, key.offset, 1); 1670 key.objectid, key.offset, 1);
1664 BUG_ON(ret); 1671 if (ret) {
1672 btrfs_abort_transaction(trans, root, ret);
1673 break;
1674 }
1665 } 1675 }
1666 if (dirty) 1676 if (dirty)
1667 btrfs_mark_buffer_dirty(leaf); 1677 btrfs_mark_buffer_dirty(leaf);
1668 if (inode) 1678 if (inode)
1669 btrfs_add_delayed_iput(inode); 1679 btrfs_add_delayed_iput(inode);
1670 return 0; 1680 return ret;
1671} 1681}
1672 1682
1673static noinline_for_stack 1683static noinline_for_stack
@@ -1767,8 +1777,7 @@ again:
1767 new_ptr_gen = 0; 1777 new_ptr_gen = 0;
1768 } 1778 }
1769 1779
1770 if (new_bytenr > 0 && new_bytenr == old_bytenr) { 1780 if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) {
1771 WARN_ON(1);
1772 ret = level; 1781 ret = level;
1773 break; 1782 break;
1774 } 1783 }
@@ -2050,7 +2059,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
2050 LIST_HEAD(inode_list); 2059 LIST_HEAD(inode_list);
2051 struct btrfs_key key; 2060 struct btrfs_key key;
2052 struct btrfs_key next_key; 2061 struct btrfs_key next_key;
2053 struct btrfs_trans_handle *trans; 2062 struct btrfs_trans_handle *trans = NULL;
2054 struct btrfs_root *reloc_root; 2063 struct btrfs_root *reloc_root;
2055 struct btrfs_root_item *root_item; 2064 struct btrfs_root_item *root_item;
2056 struct btrfs_path *path; 2065 struct btrfs_path *path;
@@ -2099,18 +2108,19 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
2099 memset(&next_key, 0, sizeof(next_key)); 2108 memset(&next_key, 0, sizeof(next_key));
2100 2109
2101 while (1) { 2110 while (1) {
2102 trans = btrfs_start_transaction(root, 0);
2103 BUG_ON(IS_ERR(trans));
2104 trans->block_rsv = rc->block_rsv;
2105
2106 ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved, 2111 ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
2107 BTRFS_RESERVE_FLUSH_ALL); 2112 BTRFS_RESERVE_FLUSH_ALL);
2108 if (ret) { 2113 if (ret) {
2109 BUG_ON(ret != -EAGAIN); 2114 err = ret;
2110 ret = btrfs_commit_transaction(trans, root); 2115 goto out;
2111 BUG_ON(ret); 2116 }
2112 continue; 2117 trans = btrfs_start_transaction(root, 0);
2118 if (IS_ERR(trans)) {
2119 err = PTR_ERR(trans);
2120 trans = NULL;
2121 goto out;
2113 } 2122 }
2123 trans->block_rsv = rc->block_rsv;
2114 2124
2115 replaced = 0; 2125 replaced = 0;
2116 max_level = level; 2126 max_level = level;
@@ -2156,6 +2166,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
2156 root_item->drop_level = level; 2166 root_item->drop_level = level;
2157 2167
2158 btrfs_end_transaction_throttle(trans, root); 2168 btrfs_end_transaction_throttle(trans, root);
2169 trans = NULL;
2159 2170
2160 btrfs_btree_balance_dirty(root); 2171 btrfs_btree_balance_dirty(root);
2161 2172
@@ -2184,7 +2195,8 @@ out:
2184 btrfs_update_reloc_root(trans, root); 2195 btrfs_update_reloc_root(trans, root);
2185 } 2196 }
2186 2197
2187 btrfs_end_transaction_throttle(trans, root); 2198 if (trans)
2199 btrfs_end_transaction_throttle(trans, root);
2188 2200
2189 btrfs_btree_balance_dirty(root); 2201 btrfs_btree_balance_dirty(root);
2190 2202
@@ -3250,7 +3262,7 @@ static int add_tree_block(struct reloc_control *rc,
3250 struct rb_node *rb_node; 3262 struct rb_node *rb_node;
3251 u32 item_size; 3263 u32 item_size;
3252 int level = -1; 3264 int level = -1;
3253 int generation; 3265 u64 generation;
3254 3266
3255 eb = path->nodes[0]; 3267 eb = path->nodes[0];
3256 item_size = btrfs_item_size_nr(eb, path->slots[0]); 3268 item_size = btrfs_item_size_nr(eb, path->slots[0]);
@@ -3399,7 +3411,6 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3399 struct inode *inode, u64 ino) 3411 struct inode *inode, u64 ino)
3400{ 3412{
3401 struct btrfs_key key; 3413 struct btrfs_key key;
3402 struct btrfs_path *path;
3403 struct btrfs_root *root = fs_info->tree_root; 3414 struct btrfs_root *root = fs_info->tree_root;
3404 struct btrfs_trans_handle *trans; 3415 struct btrfs_trans_handle *trans;
3405 int ret = 0; 3416 int ret = 0;
@@ -3424,22 +3435,14 @@ truncate:
3424 if (ret) 3435 if (ret)
3425 goto out; 3436 goto out;
3426 3437
3427 path = btrfs_alloc_path();
3428 if (!path) {
3429 ret = -ENOMEM;
3430 goto out;
3431 }
3432
3433 trans = btrfs_join_transaction(root); 3438 trans = btrfs_join_transaction(root);
3434 if (IS_ERR(trans)) { 3439 if (IS_ERR(trans)) {
3435 btrfs_free_path(path);
3436 ret = PTR_ERR(trans); 3440 ret = PTR_ERR(trans);
3437 goto out; 3441 goto out;
3438 } 3442 }
3439 3443
3440 ret = btrfs_truncate_free_space_cache(root, trans, path, inode); 3444 ret = btrfs_truncate_free_space_cache(root, trans, inode);
3441 3445
3442 btrfs_free_path(path);
3443 btrfs_end_transaction(trans, root); 3446 btrfs_end_transaction(trans, root);
3444 btrfs_btree_balance_dirty(root); 3447 btrfs_btree_balance_dirty(root);
3445out: 3448out:
@@ -3541,10 +3544,8 @@ static int find_data_references(struct reloc_control *rc,
3541 err = ret; 3544 err = ret;
3542 goto out; 3545 goto out;
3543 } 3546 }
3544 if (ret > 0) { 3547 if (WARN_ON(ret > 0))
3545 WARN_ON(1);
3546 goto out; 3548 goto out;
3547 }
3548 3549
3549 leaf = path->nodes[0]; 3550 leaf = path->nodes[0];
3550 nritems = btrfs_header_nritems(leaf); 3551 nritems = btrfs_header_nritems(leaf);
@@ -3564,11 +3565,9 @@ static int find_data_references(struct reloc_control *rc,
3564 } 3565 }
3565 3566
3566 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3567 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3567 if (key.objectid != ref_objectid || 3568 if (WARN_ON(key.objectid != ref_objectid ||
3568 key.type != BTRFS_EXTENT_DATA_KEY) { 3569 key.type != BTRFS_EXTENT_DATA_KEY))
3569 WARN_ON(1);
3570 break; 3570 break;
3571 }
3572 3571
3573 fi = btrfs_item_ptr(leaf, path->slots[0], 3572 fi = btrfs_item_ptr(leaf, path->slots[0],
3574 struct btrfs_file_extent_item); 3573 struct btrfs_file_extent_item);
@@ -3993,16 +3992,6 @@ restart:
3993 } 3992 }
3994 } 3993 }
3995 3994
3996 ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5);
3997 if (ret < 0) {
3998 if (ret != -ENOSPC) {
3999 err = ret;
4000 WARN_ON(1);
4001 break;
4002 }
4003 rc->commit_transaction = 1;
4004 }
4005
4006 if (rc->commit_transaction) { 3995 if (rc->commit_transaction) {
4007 rc->commit_transaction = 0; 3996 rc->commit_transaction = 0;
4008 ret = btrfs_commit_transaction(trans, rc->extent_root); 3997 ret = btrfs_commit_transaction(trans, rc->extent_root);
@@ -4233,12 +4222,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4233 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", 4222 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
4234 rc->block_group->key.objectid, rc->block_group->flags); 4223 rc->block_group->key.objectid, rc->block_group->flags);
4235 4224
4236 ret = btrfs_start_all_delalloc_inodes(fs_info, 0); 4225 ret = btrfs_start_delalloc_roots(fs_info, 0);
4237 if (ret < 0) { 4226 if (ret < 0) {
4238 err = ret; 4227 err = ret;
4239 goto out; 4228 goto out;
4240 } 4229 }
4241 btrfs_wait_all_ordered_extents(fs_info, 0); 4230 btrfs_wait_ordered_roots(fs_info, -1);
4242 4231
4243 while (1) { 4232 while (1) {
4244 mutex_lock(&fs_info->cleaner_mutex); 4233 mutex_lock(&fs_info->cleaner_mutex);
@@ -4256,7 +4245,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4256 rc->extents_found); 4245 rc->extents_found);
4257 4246
4258 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 4247 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
4259 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); 4248 ret = btrfs_wait_ordered_range(rc->data_inode, 0,
4249 (u64)-1);
4250 if (ret) {
4251 err = ret;
4252 goto out;
4253 }
4260 invalidate_mapping_pages(rc->data_inode->i_mapping, 4254 invalidate_mapping_pages(rc->data_inode->i_mapping,
4261 0, -1); 4255 0, -1);
4262 rc->stage = UPDATE_DATA_PTRS; 4256 rc->stage = UPDATE_DATA_PTRS;
@@ -4473,6 +4467,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4473 struct btrfs_root *root = BTRFS_I(inode)->root; 4467 struct btrfs_root *root = BTRFS_I(inode)->root;
4474 int ret; 4468 int ret;
4475 u64 disk_bytenr; 4469 u64 disk_bytenr;
4470 u64 new_bytenr;
4476 LIST_HEAD(list); 4471 LIST_HEAD(list);
4477 4472
4478 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 4473 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
@@ -4484,13 +4479,24 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4484 if (ret) 4479 if (ret)
4485 goto out; 4480 goto out;
4486 4481
4487 disk_bytenr = ordered->start;
4488 while (!list_empty(&list)) { 4482 while (!list_empty(&list)) {
4489 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 4483 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
4490 list_del_init(&sums->list); 4484 list_del_init(&sums->list);
4491 4485
4492 sums->bytenr = disk_bytenr; 4486 /*
4493 disk_bytenr += sums->len; 4487 * We need to offset the new_bytenr based on where the csum is.
4488 * We need to do this because we will read in entire prealloc
4489 * extents but we may have written to say the middle of the
4490 * prealloc extent, so we need to make sure the csum goes with
4491 * the right disk offset.
4492 *
4493 * We can do this because the data reloc inode refers strictly
4494 * to the on disk bytes, so we don't have to worry about
4495 * disk_len vs real len like with real inodes since it's all
4496 * disk length.
4497 */
4498 new_bytenr = ordered->start + (sums->bytenr - disk_bytenr);
4499 sums->bytenr = new_bytenr;
4494 4500
4495 btrfs_add_ordered_sum(inode, ordered, sums); 4501 btrfs_add_ordered_sum(inode, ordered, sums);
4496 } 4502 }
@@ -4499,19 +4505,19 @@ out:
4499 return ret; 4505 return ret;
4500} 4506}
4501 4507
4502void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, 4508int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4503 struct btrfs_root *root, struct extent_buffer *buf, 4509 struct btrfs_root *root, struct extent_buffer *buf,
4504 struct extent_buffer *cow) 4510 struct extent_buffer *cow)
4505{ 4511{
4506 struct reloc_control *rc; 4512 struct reloc_control *rc;
4507 struct backref_node *node; 4513 struct backref_node *node;
4508 int first_cow = 0; 4514 int first_cow = 0;
4509 int level; 4515 int level;
4510 int ret; 4516 int ret = 0;
4511 4517
4512 rc = root->fs_info->reloc_ctl; 4518 rc = root->fs_info->reloc_ctl;
4513 if (!rc) 4519 if (!rc)
4514 return; 4520 return 0;
4515 4521
4516 BUG_ON(rc->stage == UPDATE_DATA_PTRS && 4522 BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
4517 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); 4523 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
@@ -4547,10 +4553,9 @@ void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4547 rc->nodes_relocated += buf->len; 4553 rc->nodes_relocated += buf->len;
4548 } 4554 }
4549 4555
4550 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) { 4556 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)
4551 ret = replace_file_extents(trans, rc, root, cow); 4557 ret = replace_file_extents(trans, rc, root, cow);
4552 BUG_ON(ret); 4558 return ret;
4553 }
4554} 4559}
4555 4560
4556/* 4561/*
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 0b1f4ef8db98..ec71ea44d2b4 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -299,11 +299,6 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
299 continue; 299 continue;
300 } 300 }
301 301
302 if (btrfs_root_refs(&root->root_item) == 0) {
303 btrfs_add_dead_root(root);
304 continue;
305 }
306
307 err = btrfs_init_fs_root(root); 302 err = btrfs_init_fs_root(root);
308 if (err) { 303 if (err) {
309 btrfs_free_fs_root(root); 304 btrfs_free_fs_root(root);
@@ -318,6 +313,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
318 btrfs_free_fs_root(root); 313 btrfs_free_fs_root(root);
319 break; 314 break;
320 } 315 }
316
317 if (btrfs_root_refs(&root->root_item) == 0)
318 btrfs_add_dead_root(root);
321 } 319 }
322 320
323 btrfs_free_path(path); 321 btrfs_free_path(path);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 0afcd452fcb3..561e2f16ba3e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -158,12 +158,20 @@ struct scrub_fixup_nodatasum {
158 int mirror_num; 158 int mirror_num;
159}; 159};
160 160
161struct scrub_nocow_inode {
162 u64 inum;
163 u64 offset;
164 u64 root;
165 struct list_head list;
166};
167
161struct scrub_copy_nocow_ctx { 168struct scrub_copy_nocow_ctx {
162 struct scrub_ctx *sctx; 169 struct scrub_ctx *sctx;
163 u64 logical; 170 u64 logical;
164 u64 len; 171 u64 len;
165 int mirror_num; 172 int mirror_num;
166 u64 physical_for_dev_replace; 173 u64 physical_for_dev_replace;
174 struct list_head inodes;
167 struct btrfs_work work; 175 struct btrfs_work work;
168}; 176};
169 177
@@ -245,7 +253,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
245static int write_page_nocow(struct scrub_ctx *sctx, 253static int write_page_nocow(struct scrub_ctx *sctx,
246 u64 physical_for_dev_replace, struct page *page); 254 u64 physical_for_dev_replace, struct page *page);
247static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, 255static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
248 void *ctx); 256 struct scrub_copy_nocow_ctx *ctx);
249static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, 257static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
250 int mirror_num, u64 physical_for_dev_replace); 258 int mirror_num, u64 physical_for_dev_replace);
251static void copy_nocow_pages_worker(struct btrfs_work *work); 259static void copy_nocow_pages_worker(struct btrfs_work *work);
@@ -930,8 +938,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
930 BTRFS_DEV_STAT_CORRUPTION_ERRS); 938 BTRFS_DEV_STAT_CORRUPTION_ERRS);
931 } 939 }
932 940
933 if (sctx->readonly && !sctx->is_dev_replace) 941 if (sctx->readonly) {
934 goto did_not_correct_error; 942 ASSERT(!sctx->is_dev_replace);
943 goto out;
944 }
935 945
936 if (!is_metadata && !have_csum) { 946 if (!is_metadata && !have_csum) {
937 struct scrub_fixup_nodatasum *fixup_nodatasum; 947 struct scrub_fixup_nodatasum *fixup_nodatasum;
@@ -2709,8 +2719,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
2709 mutex_unlock(&fs_info->scrub_lock); 2719 mutex_unlock(&fs_info->scrub_lock);
2710 wake_up(&fs_info->scrub_pause_wait); 2720 wake_up(&fs_info->scrub_pause_wait);
2711 2721
2712 dev_replace->cursor_left = dev_replace->cursor_right;
2713 dev_replace->item_needs_writeback = 1;
2714 btrfs_put_block_group(cache); 2722 btrfs_put_block_group(cache);
2715 if (ret) 2723 if (ret)
2716 break; 2724 break;
@@ -2724,6 +2732,9 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
2724 break; 2732 break;
2725 } 2733 }
2726 2734
2735 dev_replace->cursor_left = dev_replace->cursor_right;
2736 dev_replace->item_needs_writeback = 1;
2737
2727 key.offset = found_key.offset + length; 2738 key.offset = found_key.offset + length;
2728 btrfs_release_path(path); 2739 btrfs_release_path(path);
2729 } 2740 }
@@ -2775,7 +2786,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
2775{ 2786{
2776 int ret = 0; 2787 int ret = 0;
2777 2788
2778 mutex_lock(&fs_info->scrub_lock);
2779 if (fs_info->scrub_workers_refcnt == 0) { 2789 if (fs_info->scrub_workers_refcnt == 0) {
2780 if (is_dev_replace) 2790 if (is_dev_replace)
2781 btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1, 2791 btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1,
@@ -2805,21 +2815,17 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
2805 } 2815 }
2806 ++fs_info->scrub_workers_refcnt; 2816 ++fs_info->scrub_workers_refcnt;
2807out: 2817out:
2808 mutex_unlock(&fs_info->scrub_lock);
2809
2810 return ret; 2818 return ret;
2811} 2819}
2812 2820
2813static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) 2821static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
2814{ 2822{
2815 mutex_lock(&fs_info->scrub_lock);
2816 if (--fs_info->scrub_workers_refcnt == 0) { 2823 if (--fs_info->scrub_workers_refcnt == 0) {
2817 btrfs_stop_workers(&fs_info->scrub_workers); 2824 btrfs_stop_workers(&fs_info->scrub_workers);
2818 btrfs_stop_workers(&fs_info->scrub_wr_completion_workers); 2825 btrfs_stop_workers(&fs_info->scrub_wr_completion_workers);
2819 btrfs_stop_workers(&fs_info->scrub_nocow_workers); 2826 btrfs_stop_workers(&fs_info->scrub_nocow_workers);
2820 } 2827 }
2821 WARN_ON(fs_info->scrub_workers_refcnt < 0); 2828 WARN_ON(fs_info->scrub_workers_refcnt < 0);
2822 mutex_unlock(&fs_info->scrub_lock);
2823} 2829}
2824 2830
2825int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, 2831int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
@@ -2880,23 +2886,18 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2880 return -EINVAL; 2886 return -EINVAL;
2881 } 2887 }
2882 2888
2883 ret = scrub_workers_get(fs_info, is_dev_replace);
2884 if (ret)
2885 return ret;
2886 2889
2887 mutex_lock(&fs_info->fs_devices->device_list_mutex); 2890 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2888 dev = btrfs_find_device(fs_info, devid, NULL, NULL); 2891 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
2889 if (!dev || (dev->missing && !is_dev_replace)) { 2892 if (!dev || (dev->missing && !is_dev_replace)) {
2890 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 2893 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2891 scrub_workers_put(fs_info);
2892 return -ENODEV; 2894 return -ENODEV;
2893 } 2895 }
2894 mutex_lock(&fs_info->scrub_lock);
2895 2896
2897 mutex_lock(&fs_info->scrub_lock);
2896 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { 2898 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
2897 mutex_unlock(&fs_info->scrub_lock); 2899 mutex_unlock(&fs_info->scrub_lock);
2898 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 2900 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2899 scrub_workers_put(fs_info);
2900 return -EIO; 2901 return -EIO;
2901 } 2902 }
2902 2903
@@ -2907,10 +2908,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2907 btrfs_dev_replace_unlock(&fs_info->dev_replace); 2908 btrfs_dev_replace_unlock(&fs_info->dev_replace);
2908 mutex_unlock(&fs_info->scrub_lock); 2909 mutex_unlock(&fs_info->scrub_lock);
2909 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 2910 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2910 scrub_workers_put(fs_info);
2911 return -EINPROGRESS; 2911 return -EINPROGRESS;
2912 } 2912 }
2913 btrfs_dev_replace_unlock(&fs_info->dev_replace); 2913 btrfs_dev_replace_unlock(&fs_info->dev_replace);
2914
2915 ret = scrub_workers_get(fs_info, is_dev_replace);
2916 if (ret) {
2917 mutex_unlock(&fs_info->scrub_lock);
2918 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2919 return ret;
2920 }
2921
2914 sctx = scrub_setup_ctx(dev, is_dev_replace); 2922 sctx = scrub_setup_ctx(dev, is_dev_replace);
2915 if (IS_ERR(sctx)) { 2923 if (IS_ERR(sctx)) {
2916 mutex_unlock(&fs_info->scrub_lock); 2924 mutex_unlock(&fs_info->scrub_lock);
@@ -2923,13 +2931,15 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2923 2931
2924 atomic_inc(&fs_info->scrubs_running); 2932 atomic_inc(&fs_info->scrubs_running);
2925 mutex_unlock(&fs_info->scrub_lock); 2933 mutex_unlock(&fs_info->scrub_lock);
2926 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2927 2934
2928 if (!is_dev_replace) { 2935 if (!is_dev_replace) {
2929 down_read(&fs_info->scrub_super_lock); 2936 /*
2937 * by holding device list mutex, we can
2938 * kick off writing super in log tree sync.
2939 */
2930 ret = scrub_supers(sctx, dev); 2940 ret = scrub_supers(sctx, dev);
2931 up_read(&fs_info->scrub_super_lock);
2932 } 2941 }
2942 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2933 2943
2934 if (!ret) 2944 if (!ret)
2935 ret = scrub_enumerate_chunks(sctx, dev, start, end, 2945 ret = scrub_enumerate_chunks(sctx, dev, start, end,
@@ -2946,10 +2956,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2946 2956
2947 mutex_lock(&fs_info->scrub_lock); 2957 mutex_lock(&fs_info->scrub_lock);
2948 dev->scrub_device = NULL; 2958 dev->scrub_device = NULL;
2959 scrub_workers_put(fs_info);
2949 mutex_unlock(&fs_info->scrub_lock); 2960 mutex_unlock(&fs_info->scrub_lock);
2950 2961
2951 scrub_free_ctx(sctx); 2962 scrub_free_ctx(sctx);
2952 scrub_workers_put(fs_info);
2953 2963
2954 return ret; 2964 return ret;
2955} 2965}
@@ -2979,16 +2989,6 @@ void btrfs_scrub_continue(struct btrfs_root *root)
2979 wake_up(&fs_info->scrub_pause_wait); 2989 wake_up(&fs_info->scrub_pause_wait);
2980} 2990}
2981 2991
2982void btrfs_scrub_pause_super(struct btrfs_root *root)
2983{
2984 down_write(&root->fs_info->scrub_super_lock);
2985}
2986
2987void btrfs_scrub_continue_super(struct btrfs_root *root)
2988{
2989 up_write(&root->fs_info->scrub_super_lock);
2990}
2991
2992int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) 2992int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
2993{ 2993{
2994 mutex_lock(&fs_info->scrub_lock); 2994 mutex_lock(&fs_info->scrub_lock);
@@ -3126,12 +3126,30 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
3126 nocow_ctx->mirror_num = mirror_num; 3126 nocow_ctx->mirror_num = mirror_num;
3127 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; 3127 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
3128 nocow_ctx->work.func = copy_nocow_pages_worker; 3128 nocow_ctx->work.func = copy_nocow_pages_worker;
3129 INIT_LIST_HEAD(&nocow_ctx->inodes);
3129 btrfs_queue_worker(&fs_info->scrub_nocow_workers, 3130 btrfs_queue_worker(&fs_info->scrub_nocow_workers,
3130 &nocow_ctx->work); 3131 &nocow_ctx->work);
3131 3132
3132 return 0; 3133 return 0;
3133} 3134}
3134 3135
3136static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
3137{
3138 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
3139 struct scrub_nocow_inode *nocow_inode;
3140
3141 nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
3142 if (!nocow_inode)
3143 return -ENOMEM;
3144 nocow_inode->inum = inum;
3145 nocow_inode->offset = offset;
3146 nocow_inode->root = root;
3147 list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
3148 return 0;
3149}
3150
3151#define COPY_COMPLETE 1
3152
3135static void copy_nocow_pages_worker(struct btrfs_work *work) 3153static void copy_nocow_pages_worker(struct btrfs_work *work)
3136{ 3154{
3137 struct scrub_copy_nocow_ctx *nocow_ctx = 3155 struct scrub_copy_nocow_ctx *nocow_ctx =
@@ -3167,8 +3185,7 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
3167 } 3185 }
3168 3186
3169 ret = iterate_inodes_from_logical(logical, fs_info, path, 3187 ret = iterate_inodes_from_logical(logical, fs_info, path,
3170 copy_nocow_pages_for_inode, 3188 record_inode_for_nocow, nocow_ctx);
3171 nocow_ctx);
3172 if (ret != 0 && ret != -ENOENT) { 3189 if (ret != 0 && ret != -ENOENT) {
3173 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n", 3190 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n",
3174 logical, physical_for_dev_replace, len, mirror_num, 3191 logical, physical_for_dev_replace, len, mirror_num,
@@ -3177,7 +3194,33 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
3177 goto out; 3194 goto out;
3178 } 3195 }
3179 3196
3197 btrfs_end_transaction(trans, root);
3198 trans = NULL;
3199 while (!list_empty(&nocow_ctx->inodes)) {
3200 struct scrub_nocow_inode *entry;
3201 entry = list_first_entry(&nocow_ctx->inodes,
3202 struct scrub_nocow_inode,
3203 list);
3204 list_del_init(&entry->list);
3205 ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
3206 entry->root, nocow_ctx);
3207 kfree(entry);
3208 if (ret == COPY_COMPLETE) {
3209 ret = 0;
3210 break;
3211 } else if (ret) {
3212 break;
3213 }
3214 }
3180out: 3215out:
3216 while (!list_empty(&nocow_ctx->inodes)) {
3217 struct scrub_nocow_inode *entry;
3218 entry = list_first_entry(&nocow_ctx->inodes,
3219 struct scrub_nocow_inode,
3220 list);
3221 list_del_init(&entry->list);
3222 kfree(entry);
3223 }
3181 if (trans && !IS_ERR(trans)) 3224 if (trans && !IS_ERR(trans))
3182 btrfs_end_transaction(trans, root); 3225 btrfs_end_transaction(trans, root);
3183 if (not_written) 3226 if (not_written)
@@ -3190,20 +3233,25 @@ out:
3190 scrub_pending_trans_workers_dec(sctx); 3233 scrub_pending_trans_workers_dec(sctx);
3191} 3234}
3192 3235
3193static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) 3236static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
3237 struct scrub_copy_nocow_ctx *nocow_ctx)
3194{ 3238{
3195 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
3196 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; 3239 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
3197 struct btrfs_key key; 3240 struct btrfs_key key;
3198 struct inode *inode; 3241 struct inode *inode;
3199 struct page *page; 3242 struct page *page;
3200 struct btrfs_root *local_root; 3243 struct btrfs_root *local_root;
3244 struct btrfs_ordered_extent *ordered;
3245 struct extent_map *em;
3246 struct extent_state *cached_state = NULL;
3247 struct extent_io_tree *io_tree;
3201 u64 physical_for_dev_replace; 3248 u64 physical_for_dev_replace;
3202 u64 len; 3249 u64 len = nocow_ctx->len;
3250 u64 lockstart = offset, lockend = offset + len - 1;
3203 unsigned long index; 3251 unsigned long index;
3204 int srcu_index; 3252 int srcu_index;
3205 int ret; 3253 int ret = 0;
3206 int err; 3254 int err = 0;
3207 3255
3208 key.objectid = root; 3256 key.objectid = root;
3209 key.type = BTRFS_ROOT_ITEM_KEY; 3257 key.type = BTRFS_ROOT_ITEM_KEY;
@@ -3229,9 +3277,33 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
3229 mutex_lock(&inode->i_mutex); 3277 mutex_lock(&inode->i_mutex);
3230 inode_dio_wait(inode); 3278 inode_dio_wait(inode);
3231 3279
3232 ret = 0;
3233 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; 3280 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3234 len = nocow_ctx->len; 3281 io_tree = &BTRFS_I(inode)->io_tree;
3282
3283 lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
3284 ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
3285 if (ordered) {
3286 btrfs_put_ordered_extent(ordered);
3287 goto out_unlock;
3288 }
3289
3290 em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0);
3291 if (IS_ERR(em)) {
3292 ret = PTR_ERR(em);
3293 goto out_unlock;
3294 }
3295
3296 /*
3297 * This extent does not actually cover the logical extent anymore,
3298 * move on to the next inode.
3299 */
3300 if (em->block_start > nocow_ctx->logical ||
3301 em->block_start + em->block_len < nocow_ctx->logical + len) {
3302 free_extent_map(em);
3303 goto out_unlock;
3304 }
3305 free_extent_map(em);
3306
3235 while (len >= PAGE_CACHE_SIZE) { 3307 while (len >= PAGE_CACHE_SIZE) {
3236 index = offset >> PAGE_CACHE_SHIFT; 3308 index = offset >> PAGE_CACHE_SHIFT;
3237again: 3309again:
@@ -3247,10 +3319,9 @@ again:
3247 goto next_page; 3319 goto next_page;
3248 } else { 3320 } else {
3249 ClearPageError(page); 3321 ClearPageError(page);
3250 err = extent_read_full_page(&BTRFS_I(inode)-> 3322 err = extent_read_full_page_nolock(io_tree, page,
3251 io_tree, 3323 btrfs_get_extent,
3252 page, btrfs_get_extent, 3324 nocow_ctx->mirror_num);
3253 nocow_ctx->mirror_num);
3254 if (err) { 3325 if (err) {
3255 ret = err; 3326 ret = err;
3256 goto next_page; 3327 goto next_page;
@@ -3264,6 +3335,7 @@ again:
3264 * page in the page cache. 3335 * page in the page cache.
3265 */ 3336 */
3266 if (page->mapping != inode->i_mapping) { 3337 if (page->mapping != inode->i_mapping) {
3338 unlock_page(page);
3267 page_cache_release(page); 3339 page_cache_release(page);
3268 goto again; 3340 goto again;
3269 } 3341 }
@@ -3287,6 +3359,10 @@ next_page:
3287 physical_for_dev_replace += PAGE_CACHE_SIZE; 3359 physical_for_dev_replace += PAGE_CACHE_SIZE;
3288 len -= PAGE_CACHE_SIZE; 3360 len -= PAGE_CACHE_SIZE;
3289 } 3361 }
3362 ret = COPY_COMPLETE;
3363out_unlock:
3364 unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
3365 GFP_NOFS);
3290out: 3366out:
3291 mutex_unlock(&inode->i_mutex); 3367 mutex_unlock(&inode->i_mutex);
3292 iput(inode); 3368 iput(inode);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index e46e0ed74925..6837fe87f3a6 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -121,7 +121,6 @@ struct send_ctx {
121 struct list_head name_cache_list; 121 struct list_head name_cache_list;
122 int name_cache_size; 122 int name_cache_size;
123 123
124 struct file *cur_inode_filp;
125 char *read_buf; 124 char *read_buf;
126}; 125};
127 126
@@ -565,10 +564,8 @@ static int begin_cmd(struct send_ctx *sctx, int cmd)
565{ 564{
566 struct btrfs_cmd_header *hdr; 565 struct btrfs_cmd_header *hdr;
567 566
568 if (!sctx->send_buf) { 567 if (WARN_ON(!sctx->send_buf))
569 WARN_ON(1);
570 return -EINVAL; 568 return -EINVAL;
571 }
572 569
573 BUG_ON(sctx->send_size); 570 BUG_ON(sctx->send_size);
574 571
@@ -791,7 +788,7 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
791 if (found_key->type == BTRFS_INODE_REF_KEY) { 788 if (found_key->type == BTRFS_INODE_REF_KEY) {
792 ptr = (unsigned long)btrfs_item_ptr(eb, slot, 789 ptr = (unsigned long)btrfs_item_ptr(eb, slot,
793 struct btrfs_inode_ref); 790 struct btrfs_inode_ref);
794 item = btrfs_item_nr(eb, slot); 791 item = btrfs_item_nr(slot);
795 total = btrfs_item_size(eb, item); 792 total = btrfs_item_size(eb, item);
796 elem_size = sizeof(*iref); 793 elem_size = sizeof(*iref);
797 } else { 794 } else {
@@ -905,7 +902,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
905 902
906 eb = path->nodes[0]; 903 eb = path->nodes[0];
907 slot = path->slots[0]; 904 slot = path->slots[0];
908 item = btrfs_item_nr(eb, slot); 905 item = btrfs_item_nr(slot);
909 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 906 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
910 cur = 0; 907 cur = 0;
911 len = 0; 908 len = 0;
@@ -2120,77 +2117,6 @@ out:
2120} 2117}
2121 2118
2122/* 2119/*
2123 * Called for regular files when sending extents data. Opens a struct file
2124 * to read from the file.
2125 */
2126static int open_cur_inode_file(struct send_ctx *sctx)
2127{
2128 int ret = 0;
2129 struct btrfs_key key;
2130 struct path path;
2131 struct inode *inode;
2132 struct dentry *dentry;
2133 struct file *filp;
2134 int new = 0;
2135
2136 if (sctx->cur_inode_filp)
2137 goto out;
2138
2139 key.objectid = sctx->cur_ino;
2140 key.type = BTRFS_INODE_ITEM_KEY;
2141 key.offset = 0;
2142
2143 inode = btrfs_iget(sctx->send_root->fs_info->sb, &key, sctx->send_root,
2144 &new);
2145 if (IS_ERR(inode)) {
2146 ret = PTR_ERR(inode);
2147 goto out;
2148 }
2149
2150 dentry = d_obtain_alias(inode);
2151 inode = NULL;
2152 if (IS_ERR(dentry)) {
2153 ret = PTR_ERR(dentry);
2154 goto out;
2155 }
2156
2157 path.mnt = sctx->mnt;
2158 path.dentry = dentry;
2159 filp = dentry_open(&path, O_RDONLY | O_LARGEFILE, current_cred());
2160 dput(dentry);
2161 dentry = NULL;
2162 if (IS_ERR(filp)) {
2163 ret = PTR_ERR(filp);
2164 goto out;
2165 }
2166 sctx->cur_inode_filp = filp;
2167
2168out:
2169 /*
2170 * no xxxput required here as every vfs op
2171 * does it by itself on failure
2172 */
2173 return ret;
2174}
2175
2176/*
2177 * Closes the struct file that was created in open_cur_inode_file
2178 */
2179static int close_cur_inode_file(struct send_ctx *sctx)
2180{
2181 int ret = 0;
2182
2183 if (!sctx->cur_inode_filp)
2184 goto out;
2185
2186 ret = filp_close(sctx->cur_inode_filp, NULL);
2187 sctx->cur_inode_filp = NULL;
2188
2189out:
2190 return ret;
2191}
2192
2193/*
2194 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace 2120 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
2195 */ 2121 */
2196static int send_subvol_begin(struct send_ctx *sctx) 2122static int send_subvol_begin(struct send_ctx *sctx)
@@ -3622,6 +3548,72 @@ out:
3622 return ret; 3548 return ret;
3623} 3549}
3624 3550
3551static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
3552{
3553 struct btrfs_root *root = sctx->send_root;
3554 struct btrfs_fs_info *fs_info = root->fs_info;
3555 struct inode *inode;
3556 struct page *page;
3557 char *addr;
3558 struct btrfs_key key;
3559 pgoff_t index = offset >> PAGE_CACHE_SHIFT;
3560 pgoff_t last_index;
3561 unsigned pg_offset = offset & ~PAGE_CACHE_MASK;
3562 ssize_t ret = 0;
3563
3564 key.objectid = sctx->cur_ino;
3565 key.type = BTRFS_INODE_ITEM_KEY;
3566 key.offset = 0;
3567
3568 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3569 if (IS_ERR(inode))
3570 return PTR_ERR(inode);
3571
3572 if (offset + len > i_size_read(inode)) {
3573 if (offset > i_size_read(inode))
3574 len = 0;
3575 else
3576 len = offset - i_size_read(inode);
3577 }
3578 if (len == 0)
3579 goto out;
3580
3581 last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT;
3582 while (index <= last_index) {
3583 unsigned cur_len = min_t(unsigned, len,
3584 PAGE_CACHE_SIZE - pg_offset);
3585 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
3586 if (!page) {
3587 ret = -ENOMEM;
3588 break;
3589 }
3590
3591 if (!PageUptodate(page)) {
3592 btrfs_readpage(NULL, page);
3593 lock_page(page);
3594 if (!PageUptodate(page)) {
3595 unlock_page(page);
3596 page_cache_release(page);
3597 ret = -EIO;
3598 break;
3599 }
3600 }
3601
3602 addr = kmap(page);
3603 memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
3604 kunmap(page);
3605 unlock_page(page);
3606 page_cache_release(page);
3607 index++;
3608 pg_offset = 0;
3609 len -= cur_len;
3610 ret += cur_len;
3611 }
3612out:
3613 iput(inode);
3614 return ret;
3615}
3616
3625/* 3617/*
3626 * Read some bytes from the current inode/file and send a write command to 3618 * Read some bytes from the current inode/file and send a write command to
3627 * user space. 3619 * user space.
@@ -3630,35 +3622,20 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
3630{ 3622{
3631 int ret = 0; 3623 int ret = 0;
3632 struct fs_path *p; 3624 struct fs_path *p;
3633 loff_t pos = offset; 3625 ssize_t num_read = 0;
3634 int num_read = 0;
3635 mm_segment_t old_fs;
3636 3626
3637 p = fs_path_alloc(); 3627 p = fs_path_alloc();
3638 if (!p) 3628 if (!p)
3639 return -ENOMEM; 3629 return -ENOMEM;
3640 3630
3641 /*
3642 * vfs normally only accepts user space buffers for security reasons.
3643 * we only read from the file and also only provide the read_buf buffer
3644 * to vfs. As this buffer does not come from a user space call, it's
3645 * ok to temporary allow kernel space buffers.
3646 */
3647 old_fs = get_fs();
3648 set_fs(KERNEL_DS);
3649
3650verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); 3631verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
3651 3632
3652 ret = open_cur_inode_file(sctx); 3633 num_read = fill_read_buf(sctx, offset, len);
3653 if (ret < 0) 3634 if (num_read <= 0) {
3654 goto out; 3635 if (num_read < 0)
3655 3636 ret = num_read;
3656 ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos);
3657 if (ret < 0)
3658 goto out;
3659 num_read = ret;
3660 if (!num_read)
3661 goto out; 3637 goto out;
3638 }
3662 3639
3663 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 3640 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
3664 if (ret < 0) 3641 if (ret < 0)
@@ -3677,7 +3654,6 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
3677tlv_put_failure: 3654tlv_put_failure:
3678out: 3655out:
3679 fs_path_free(p); 3656 fs_path_free(p);
3680 set_fs(old_fs);
3681 if (ret < 0) 3657 if (ret < 0)
3682 return ret; 3658 return ret;
3683 return num_read; 3659 return num_read;
@@ -3926,16 +3902,16 @@ static int is_extent_unchanged(struct send_ctx *sctx,
3926 while (key.offset < ekey->offset + left_len) { 3902 while (key.offset < ekey->offset + left_len) {
3927 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 3903 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
3928 right_type = btrfs_file_extent_type(eb, ei); 3904 right_type = btrfs_file_extent_type(eb, ei);
3929 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
3930 right_len = btrfs_file_extent_num_bytes(eb, ei);
3931 right_offset = btrfs_file_extent_offset(eb, ei);
3932 right_gen = btrfs_file_extent_generation(eb, ei);
3933
3934 if (right_type != BTRFS_FILE_EXTENT_REG) { 3905 if (right_type != BTRFS_FILE_EXTENT_REG) {
3935 ret = 0; 3906 ret = 0;
3936 goto out; 3907 goto out;
3937 } 3908 }
3938 3909
3910 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
3911 right_len = btrfs_file_extent_num_bytes(eb, ei);
3912 right_offset = btrfs_file_extent_offset(eb, ei);
3913 right_gen = btrfs_file_extent_generation(eb, ei);
3914
3939 /* 3915 /*
3940 * Are we at extent 8? If yes, we know the extent is changed. 3916 * Are we at extent 8? If yes, we know the extent is changed.
3941 * This may only happen on the first iteration. 3917 * This may only happen on the first iteration.
@@ -4222,10 +4198,6 @@ static int changed_inode(struct send_ctx *sctx,
4222 u64 left_gen = 0; 4198 u64 left_gen = 0;
4223 u64 right_gen = 0; 4199 u64 right_gen = 0;
4224 4200
4225 ret = close_cur_inode_file(sctx);
4226 if (ret < 0)
4227 goto out;
4228
4229 sctx->cur_ino = key->objectid; 4201 sctx->cur_ino = key->objectid;
4230 sctx->cur_inode_new_gen = 0; 4202 sctx->cur_inode_new_gen = 0;
4231 4203
@@ -4686,11 +4658,6 @@ static int send_subvol(struct send_ctx *sctx)
4686 } 4658 }
4687 4659
4688out: 4660out:
4689 if (!ret)
4690 ret = close_cur_inode_file(sctx);
4691 else
4692 close_cur_inode_file(sctx);
4693
4694 free_recorded_refs(sctx); 4661 free_recorded_refs(sctx);
4695 return ret; 4662 return ret;
4696} 4663}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3aab10ce63e8..2d8ac1bf0cf9 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -42,7 +42,6 @@
42#include <linux/cleancache.h> 42#include <linux/cleancache.h>
43#include <linux/ratelimit.h> 43#include <linux/ratelimit.h>
44#include <linux/btrfs.h> 44#include <linux/btrfs.h>
45#include "compat.h"
46#include "delayed-inode.h" 45#include "delayed-inode.h"
47#include "ctree.h" 46#include "ctree.h"
48#include "disk-io.h" 47#include "disk-io.h"
@@ -921,7 +920,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
921 return 0; 920 return 0;
922 } 921 }
923 922
924 btrfs_wait_all_ordered_extents(fs_info, 1); 923 btrfs_wait_ordered_roots(fs_info, -1);
925 924
926 trans = btrfs_attach_transaction_barrier(root); 925 trans = btrfs_attach_transaction_barrier(root);
927 if (IS_ERR(trans)) { 926 if (IS_ERR(trans)) {
@@ -1330,6 +1329,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1330 * this also happens on 'umount -rf' or on shutdown, when 1329 * this also happens on 'umount -rf' or on shutdown, when
1331 * the filesystem is busy. 1330 * the filesystem is busy.
1332 */ 1331 */
1332
1333 /* wait for the uuid_scan task to finish */
1334 down(&fs_info->uuid_tree_rescan_sem);
1335 /* avoid complains from lockdep et al. */
1336 up(&fs_info->uuid_tree_rescan_sem);
1337
1333 sb->s_flags |= MS_RDONLY; 1338 sb->s_flags |= MS_RDONLY;
1334 1339
1335 btrfs_dev_replace_suspend_for_unmount(fs_info); 1340 btrfs_dev_replace_suspend_for_unmount(fs_info);
@@ -1340,6 +1345,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1340 if (ret) 1345 if (ret)
1341 goto restore; 1346 goto restore;
1342 } else { 1347 } else {
1348 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
1349 btrfs_err(fs_info,
1350 "Remounting read-write after error is not allowed\n");
1351 ret = -EINVAL;
1352 goto restore;
1353 }
1343 if (fs_info->fs_devices->rw_devices == 0) { 1354 if (fs_info->fs_devices->rw_devices == 0) {
1344 ret = -EACCES; 1355 ret = -EACCES;
1345 goto restore; 1356 goto restore;
@@ -1377,6 +1388,16 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1377 pr_warn("btrfs: failed to resume dev_replace\n"); 1388 pr_warn("btrfs: failed to resume dev_replace\n");
1378 goto restore; 1389 goto restore;
1379 } 1390 }
1391
1392 if (!fs_info->uuid_root) {
1393 pr_info("btrfs: creating UUID tree\n");
1394 ret = btrfs_create_uuid_tree(fs_info);
1395 if (ret) {
1396 pr_warn("btrfs: failed to create the uuid tree"
1397 "%d\n", ret);
1398 goto restore;
1399 }
1400 }
1380 sb->s_flags &= ~MS_RDONLY; 1401 sb->s_flags &= ~MS_RDONLY;
1381 } 1402 }
1382out: 1403out:
@@ -1449,7 +1470,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1449 nr_devices = fs_info->fs_devices->open_devices; 1470 nr_devices = fs_info->fs_devices->open_devices;
1450 BUG_ON(!nr_devices); 1471 BUG_ON(!nr_devices);
1451 1472
1452 devices_info = kmalloc(sizeof(*devices_info) * nr_devices, 1473 devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
1453 GFP_NOFS); 1474 GFP_NOFS);
1454 if (!devices_info) 1475 if (!devices_info)
1455 return -ENOMEM; 1476 return -ENOMEM;
@@ -1762,6 +1783,9 @@ static void btrfs_print_info(void)
1762#ifdef CONFIG_BTRFS_DEBUG 1783#ifdef CONFIG_BTRFS_DEBUG
1763 ", debug=on" 1784 ", debug=on"
1764#endif 1785#endif
1786#ifdef CONFIG_BTRFS_ASSERT
1787 ", assert=on"
1788#endif
1765#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 1789#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1766 ", integrity-checker=on" 1790 ", integrity-checker=on"
1767#endif 1791#endif
@@ -1770,7 +1794,25 @@ static void btrfs_print_info(void)
1770 1794
1771static int btrfs_run_sanity_tests(void) 1795static int btrfs_run_sanity_tests(void)
1772{ 1796{
1773 return btrfs_test_free_space_cache(); 1797 int ret;
1798
1799 ret = btrfs_init_test_fs();
1800 if (ret)
1801 return ret;
1802
1803 ret = btrfs_test_free_space_cache();
1804 if (ret)
1805 goto out;
1806 ret = btrfs_test_extent_buffer_operations();
1807 if (ret)
1808 goto out;
1809 ret = btrfs_test_extent_io();
1810 if (ret)
1811 goto out;
1812 ret = btrfs_test_inodes();
1813out:
1814 btrfs_destroy_test_fs();
1815 return ret;
1774} 1816}
1775 1817
1776static int __init init_btrfs_fs(void) 1818static int __init init_btrfs_fs(void)
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
new file mode 100644
index 000000000000..757ef00a75a4
--- /dev/null
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -0,0 +1,74 @@
1/*
2 * Copyright (C) 2013 Fusion IO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/fs.h>
20#include <linux/mount.h>
21#include <linux/magic.h>
22#include "btrfs-tests.h"
23#include "../ctree.h"
24
25static struct vfsmount *test_mnt = NULL;
26
27static const struct super_operations btrfs_test_super_ops = {
28 .alloc_inode = btrfs_alloc_inode,
29 .destroy_inode = btrfs_test_destroy_inode,
30};
31
32static struct dentry *btrfs_test_mount(struct file_system_type *fs_type,
33 int flags, const char *dev_name,
34 void *data)
35{
36 return mount_pseudo(fs_type, "btrfs_test:", &btrfs_test_super_ops,
37 NULL, BTRFS_TEST_MAGIC);
38}
39
40static struct file_system_type test_type = {
41 .name = "btrfs_test_fs",
42 .mount = btrfs_test_mount,
43 .kill_sb = kill_anon_super,
44};
45
46struct inode *btrfs_new_test_inode(void)
47{
48 return new_inode(test_mnt->mnt_sb);
49}
50
51int btrfs_init_test_fs(void)
52{
53 int ret;
54
55 ret = register_filesystem(&test_type);
56 if (ret) {
57 printk(KERN_ERR "btrfs: cannot register test file system\n");
58 return ret;
59 }
60
61 test_mnt = kern_mount(&test_type);
62 if (IS_ERR(test_mnt)) {
63 printk(KERN_ERR "btrfs: cannot mount test file system\n");
64 unregister_filesystem(&test_type);
65 return ret;
66 }
67 return 0;
68}
69
70void btrfs_destroy_test_fs(void)
71{
72 kern_unmount(test_mnt);
73 unregister_filesystem(&test_type);
74}
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index 580877625776..b353bc806ca0 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -24,11 +24,36 @@
24#define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__) 24#define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__)
25 25
26int btrfs_test_free_space_cache(void); 26int btrfs_test_free_space_cache(void);
27int btrfs_test_extent_buffer_operations(void);
28int btrfs_test_extent_io(void);
29int btrfs_test_inodes(void);
30int btrfs_init_test_fs(void);
31void btrfs_destroy_test_fs(void);
32struct inode *btrfs_new_test_inode(void);
27#else 33#else
28static inline int btrfs_test_free_space_cache(void) 34static inline int btrfs_test_free_space_cache(void)
29{ 35{
30 return 0; 36 return 0;
31} 37}
38static inline int btrfs_test_extent_buffer_operations(void)
39{
40 return 0;
41}
42static inline int btrfs_init_test_fs(void)
43{
44 return 0;
45}
46static inline void btrfs_destroy_test_fs(void)
47{
48}
49static inline int btrfs_test_extent_io(void)
50{
51 return 0;
52}
53static inline int btrfs_test_inodes(void)
54{
55 return 0;
56}
32#endif 57#endif
33 58
34#endif 59#endif
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
new file mode 100644
index 000000000000..cc286ce97d1e
--- /dev/null
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -0,0 +1,229 @@
1/*
2 * Copyright (C) 2013 Fusion IO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/slab.h>
20#include "btrfs-tests.h"
21#include "../ctree.h"
22#include "../extent_io.h"
23#include "../disk-io.h"
24
25static int test_btrfs_split_item(void)
26{
27 struct btrfs_path *path;
28 struct btrfs_root *root;
29 struct extent_buffer *eb;
30 struct btrfs_item *item;
31 char *value = "mary had a little lamb";
32 char *split1 = "mary had a little";
33 char *split2 = " lamb";
34 char *split3 = "mary";
35 char *split4 = " had a little";
36 char buf[32];
37 struct btrfs_key key;
38 u32 value_len = strlen(value);
39 int ret = 0;
40
41 test_msg("Running btrfs_split_item tests\n");
42
43 root = btrfs_alloc_dummy_root();
44 if (IS_ERR(root)) {
45 test_msg("Could not allocate root\n");
46 return PTR_ERR(root);
47 }
48
49 path = btrfs_alloc_path();
50 if (!path) {
51 test_msg("Could not allocate path\n");
52 kfree(root);
53 return -ENOMEM;
54 }
55
56 path->nodes[0] = eb = alloc_dummy_extent_buffer(0, 4096);
57 if (!eb) {
58 test_msg("Could not allocate dummy buffer\n");
59 ret = -ENOMEM;
60 goto out;
61 }
62 path->slots[0] = 0;
63
64 key.objectid = 0;
65 key.type = BTRFS_EXTENT_CSUM_KEY;
66 key.offset = 0;
67
68 setup_items_for_insert(root, path, &key, &value_len, value_len,
69 value_len + sizeof(struct btrfs_item), 1);
70 item = btrfs_item_nr(0);
71 write_extent_buffer(eb, value, btrfs_item_ptr_offset(eb, 0),
72 value_len);
73
74 key.offset = 3;
75
76 /*
77 * Passing NULL trans here should be safe because we have plenty of
78 * space in this leaf to split the item without having to split the
79 * leaf.
80 */
81 ret = btrfs_split_item(NULL, root, path, &key, 17);
82 if (ret) {
83 test_msg("Split item failed %d\n", ret);
84 goto out;
85 }
86
87 /*
88 * Read the first slot, it should have the original key and contain only
89 * 'mary had a little'
90 */
91 btrfs_item_key_to_cpu(eb, &key, 0);
92 if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
93 key.offset != 0) {
94 test_msg("Invalid key at slot 0\n");
95 ret = -EINVAL;
96 goto out;
97 }
98
99 item = btrfs_item_nr(0);
100 if (btrfs_item_size(eb, item) != strlen(split1)) {
101 test_msg("Invalid len in the first split\n");
102 ret = -EINVAL;
103 goto out;
104 }
105
106 read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
107 strlen(split1));
108 if (memcmp(buf, split1, strlen(split1))) {
109 test_msg("Data in the buffer doesn't match what it should "
110 "in the first split have='%.*s' want '%s'\n",
111 (int)strlen(split1), buf, split1);
112 ret = -EINVAL;
113 goto out;
114 }
115
116 btrfs_item_key_to_cpu(eb, &key, 1);
117 if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
118 key.offset != 3) {
119 test_msg("Invalid key at slot 1\n");
120 ret = -EINVAL;
121 goto out;
122 }
123
124 item = btrfs_item_nr(1);
125 if (btrfs_item_size(eb, item) != strlen(split2)) {
126 test_msg("Invalid len in the second split\n");
127 ret = -EINVAL;
128 goto out;
129 }
130
131 read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
132 strlen(split2));
133 if (memcmp(buf, split2, strlen(split2))) {
134 test_msg("Data in the buffer doesn't match what it should "
135 "in the second split\n");
136 ret = -EINVAL;
137 goto out;
138 }
139
140 key.offset = 1;
141 /* Do it again so we test memmoving the other items in the leaf */
142 ret = btrfs_split_item(NULL, root, path, &key, 4);
143 if (ret) {
144 test_msg("Second split item failed %d\n", ret);
145 goto out;
146 }
147
148 btrfs_item_key_to_cpu(eb, &key, 0);
149 if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
150 key.offset != 0) {
151 test_msg("Invalid key at slot 0\n");
152 ret = -EINVAL;
153 goto out;
154 }
155
156 item = btrfs_item_nr(0);
157 if (btrfs_item_size(eb, item) != strlen(split3)) {
158 test_msg("Invalid len in the first split\n");
159 ret = -EINVAL;
160 goto out;
161 }
162
163 read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
164 strlen(split3));
165 if (memcmp(buf, split3, strlen(split3))) {
166 test_msg("Data in the buffer doesn't match what it should "
167 "in the third split");
168 ret = -EINVAL;
169 goto out;
170 }
171
172 btrfs_item_key_to_cpu(eb, &key, 1);
173 if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
174 key.offset != 1) {
175 test_msg("Invalid key at slot 1\n");
176 ret = -EINVAL;
177 goto out;
178 }
179
180 item = btrfs_item_nr(1);
181 if (btrfs_item_size(eb, item) != strlen(split4)) {
182 test_msg("Invalid len in the second split\n");
183 ret = -EINVAL;
184 goto out;
185 }
186
187 read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
188 strlen(split4));
189 if (memcmp(buf, split4, strlen(split4))) {
190 test_msg("Data in the buffer doesn't match what it should "
191 "in the fourth split\n");
192 ret = -EINVAL;
193 goto out;
194 }
195
196 btrfs_item_key_to_cpu(eb, &key, 2);
197 if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
198 key.offset != 3) {
199 test_msg("Invalid key at slot 2\n");
200 ret = -EINVAL;
201 goto out;
202 }
203
204 item = btrfs_item_nr(2);
205 if (btrfs_item_size(eb, item) != strlen(split2)) {
206 test_msg("Invalid len in the second split\n");
207 ret = -EINVAL;
208 goto out;
209 }
210
211 read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 2),
212 strlen(split2));
213 if (memcmp(buf, split2, strlen(split2))) {
214 test_msg("Data in the buffer doesn't match what it should "
215 "in the last chunk\n");
216 ret = -EINVAL;
217 goto out;
218 }
219out:
220 btrfs_free_path(path);
221 kfree(root);
222 return ret;
223}
224
225int btrfs_test_extent_buffer_operations(void)
226{
227 test_msg("Running extent buffer operation tests");
228 return test_btrfs_split_item();
229}
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
new file mode 100644
index 000000000000..7e99c2f98dd0
--- /dev/null
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -0,0 +1,276 @@
1/*
2 * Copyright (C) 2013 Fusion IO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/pagemap.h>
20#include <linux/sched.h>
21#include "btrfs-tests.h"
22#include "../extent_io.h"
23
24#define PROCESS_UNLOCK (1 << 0)
25#define PROCESS_RELEASE (1 << 1)
26#define PROCESS_TEST_LOCKED (1 << 2)
27
28static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
29 unsigned long flags)
30{
31 int ret;
32 struct page *pages[16];
33 unsigned long index = start >> PAGE_CACHE_SHIFT;
34 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
35 unsigned long nr_pages = end_index - index + 1;
36 int i;
37 int count = 0;
38 int loops = 0;
39
40 while (nr_pages > 0) {
41 ret = find_get_pages_contig(inode->i_mapping, index,
42 min_t(unsigned long, nr_pages,
43 ARRAY_SIZE(pages)), pages);
44 for (i = 0; i < ret; i++) {
45 if (flags & PROCESS_TEST_LOCKED &&
46 !PageLocked(pages[i]))
47 count++;
48 if (flags & PROCESS_UNLOCK && PageLocked(pages[i]))
49 unlock_page(pages[i]);
50 page_cache_release(pages[i]);
51 if (flags & PROCESS_RELEASE)
52 page_cache_release(pages[i]);
53 }
54 nr_pages -= ret;
55 index += ret;
56 cond_resched();
57 loops++;
58 if (loops > 100000) {
59 printk(KERN_ERR "stuck in a loop, start %Lu, end %Lu, nr_pages %lu, ret %d\n", start, end, nr_pages, ret);
60 break;
61 }
62 }
63 return count;
64}
65
66static int test_find_delalloc(void)
67{
68 struct inode *inode;
69 struct extent_io_tree tmp;
70 struct page *page;
71 struct page *locked_page = NULL;
72 unsigned long index = 0;
73 u64 total_dirty = 256 * 1024 * 1024;
74 u64 max_bytes = 128 * 1024 * 1024;
75 u64 start, end, test_start;
76 u64 found;
77 int ret = -EINVAL;
78
79 inode = btrfs_new_test_inode();
80 if (!inode) {
81 test_msg("Failed to allocate test inode\n");
82 return -ENOMEM;
83 }
84
85 extent_io_tree_init(&tmp, &inode->i_data);
86
87 /*
88 * First go through and create and mark all of our pages dirty, we pin
89 * everything to make sure our pages don't get evicted and screw up our
90 * test.
91 */
92 for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
93 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
94 if (!page) {
95 test_msg("Failed to allocate test page\n");
96 ret = -ENOMEM;
97 goto out;
98 }
99 SetPageDirty(page);
100 if (index) {
101 unlock_page(page);
102 } else {
103 page_cache_get(page);
104 locked_page = page;
105 }
106 }
107
108 /* Test this scenario
109 * |--- delalloc ---|
110 * |--- search ---|
111 */
112 set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
113 start = 0;
114 end = 0;
115 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
116 &end, max_bytes);
117 if (!found) {
118 test_msg("Should have found at least one delalloc\n");
119 goto out_bits;
120 }
121 if (start != 0 || end != 4095) {
122 test_msg("Expected start 0 end 4095, got start %Lu end %Lu\n",
123 start, end);
124 goto out_bits;
125 }
126 unlock_extent(&tmp, start, end);
127 unlock_page(locked_page);
128 page_cache_release(locked_page);
129
130 /*
131 * Test this scenario
132 *
133 * |--- delalloc ---|
134 * |--- search ---|
135 */
136 test_start = 64 * 1024 * 1024;
137 locked_page = find_lock_page(inode->i_mapping,
138 test_start >> PAGE_CACHE_SHIFT);
139 if (!locked_page) {
140 test_msg("Couldn't find the locked page\n");
141 goto out_bits;
142 }
143 set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
144 start = test_start;
145 end = 0;
146 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
147 &end, max_bytes);
148 if (!found) {
149 test_msg("Couldn't find delalloc in our range\n");
150 goto out_bits;
151 }
152 if (start != test_start || end != max_bytes - 1) {
153 test_msg("Expected start %Lu end %Lu, got start %Lu, end "
154 "%Lu\n", test_start, max_bytes - 1, start, end);
155 goto out_bits;
156 }
157 if (process_page_range(inode, start, end,
158 PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
159 test_msg("There were unlocked pages in the range\n");
160 goto out_bits;
161 }
162 unlock_extent(&tmp, start, end);
163 /* locked_page was unlocked above */
164 page_cache_release(locked_page);
165
166 /*
167 * Test this scenario
168 * |--- delalloc ---|
169 * |--- search ---|
170 */
171 test_start = max_bytes + 4096;
172 locked_page = find_lock_page(inode->i_mapping, test_start >>
173 PAGE_CACHE_SHIFT);
174 if (!locked_page) {
175 test_msg("Could'nt find the locked page\n");
176 goto out_bits;
177 }
178 start = test_start;
179 end = 0;
180 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
181 &end, max_bytes);
182 if (found) {
183 test_msg("Found range when we shouldn't have\n");
184 goto out_bits;
185 }
186 if (end != (u64)-1) {
187 test_msg("Did not return the proper end offset\n");
188 goto out_bits;
189 }
190
191 /*
192 * Test this scenario
193 * [------- delalloc -------|
194 * [max_bytes]|-- search--|
195 *
196 * We are re-using our test_start from above since it works out well.
197 */
198 set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
199 start = test_start;
200 end = 0;
201 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
202 &end, max_bytes);
203 if (!found) {
204 test_msg("Didn't find our range\n");
205 goto out_bits;
206 }
207 if (start != test_start || end != total_dirty - 1) {
208 test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
209 test_start, total_dirty - 1, start, end);
210 goto out_bits;
211 }
212 if (process_page_range(inode, start, end,
213 PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
214 test_msg("Pages in range were not all locked\n");
215 goto out_bits;
216 }
217 unlock_extent(&tmp, start, end);
218
219 /*
220 * Now to test where we run into a page that is no longer dirty in the
221 * range we want to find.
222 */
223 page = find_get_page(inode->i_mapping, (max_bytes + (1 * 1024 * 1024))
224 >> PAGE_CACHE_SHIFT);
225 if (!page) {
226 test_msg("Couldn't find our page\n");
227 goto out_bits;
228 }
229 ClearPageDirty(page);
230 page_cache_release(page);
231
232 /* We unlocked it in the previous test */
233 lock_page(locked_page);
234 start = test_start;
235 end = 0;
236 /*
237 * Currently if we fail to find dirty pages in the delalloc range we
238 * will adjust max_bytes down to PAGE_CACHE_SIZE and then re-search. If
239 * this changes at any point in the future we will need to fix this
240 * tests expected behavior.
241 */
242 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
243 &end, max_bytes);
244 if (!found) {
245 test_msg("Didn't find our range\n");
246 goto out_bits;
247 }
248 if (start != test_start && end != test_start + PAGE_CACHE_SIZE - 1) {
249 test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
250 test_start, test_start + PAGE_CACHE_SIZE - 1, start,
251 end);
252 goto out_bits;
253 }
254 if (process_page_range(inode, start, end, PROCESS_TEST_LOCKED |
255 PROCESS_UNLOCK)) {
256 test_msg("Pages in range were not all locked\n");
257 goto out_bits;
258 }
259 ret = 0;
260out_bits:
261 clear_extent_bits(&tmp, 0, total_dirty - 1,
262 (unsigned long)-1, GFP_NOFS);
263out:
264 if (locked_page)
265 page_cache_release(locked_page);
266 process_page_range(inode, 0, total_dirty - 1,
267 PROCESS_UNLOCK | PROCESS_RELEASE);
268 iput(inode);
269 return ret;
270}
271
272int btrfs_test_extent_io(void)
273{
274 test_msg("Running find delalloc tests\n");
275 return test_find_delalloc();
276}
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
new file mode 100644
index 000000000000..397d1f99a8eb
--- /dev/null
+++ b/fs/btrfs/tests/inode-tests.c
@@ -0,0 +1,955 @@
1/*
2 * Copyright (C) 2013 Fusion IO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include "btrfs-tests.h"
20#include "../ctree.h"
21#include "../btrfs_inode.h"
22#include "../disk-io.h"
23#include "../extent_io.h"
24#include "../volumes.h"
25
26static struct btrfs_fs_info *alloc_dummy_fs_info(void)
27{
28 struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
29 GFP_NOFS);
30 if (!fs_info)
31 return fs_info;
32 fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
33 GFP_NOFS);
34 if (!fs_info->fs_devices) {
35 kfree(fs_info);
36 return NULL;
37 }
38 return fs_info;
39}
40static void free_dummy_root(struct btrfs_root *root)
41{
42 if (!root)
43 return;
44 if (root->fs_info) {
45 kfree(root->fs_info->fs_devices);
46 kfree(root->fs_info);
47 }
48 if (root->node)
49 free_extent_buffer(root->node);
50 kfree(root);
51}
52
53static void insert_extent(struct btrfs_root *root, u64 start, u64 len,
54 u64 ram_bytes, u64 offset, u64 disk_bytenr,
55 u64 disk_len, u32 type, u8 compression, int slot)
56{
57 struct btrfs_path path;
58 struct btrfs_file_extent_item *fi;
59 struct extent_buffer *leaf = root->node;
60 struct btrfs_key key;
61 u32 value_len = sizeof(struct btrfs_file_extent_item);
62
63 if (type == BTRFS_FILE_EXTENT_INLINE)
64 value_len += len;
65 memset(&path, 0, sizeof(path));
66
67 path.nodes[0] = leaf;
68 path.slots[0] = slot;
69
70 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
71 key.type = BTRFS_EXTENT_DATA_KEY;
72 key.offset = start;
73
74 setup_items_for_insert(root, &path, &key, &value_len, value_len,
75 value_len + sizeof(struct btrfs_item), 1);
76 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
77 btrfs_set_file_extent_generation(leaf, fi, 1);
78 btrfs_set_file_extent_type(leaf, fi, type);
79 btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
80 btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_len);
81 btrfs_set_file_extent_offset(leaf, fi, offset);
82 btrfs_set_file_extent_num_bytes(leaf, fi, len);
83 btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
84 btrfs_set_file_extent_compression(leaf, fi, compression);
85 btrfs_set_file_extent_encryption(leaf, fi, 0);
86 btrfs_set_file_extent_other_encoding(leaf, fi, 0);
87}
88
89static void insert_inode_item_key(struct btrfs_root *root)
90{
91 struct btrfs_path path;
92 struct extent_buffer *leaf = root->node;
93 struct btrfs_key key;
94 u32 value_len = 0;
95
96 memset(&path, 0, sizeof(path));
97
98 path.nodes[0] = leaf;
99 path.slots[0] = 0;
100
101 key.objectid = BTRFS_INODE_ITEM_KEY;
102 key.type = BTRFS_INODE_ITEM_KEY;
103 key.offset = 0;
104
105 setup_items_for_insert(root, &path, &key, &value_len, value_len,
106 value_len + sizeof(struct btrfs_item), 1);
107}
108
109/*
110 * Build the most complicated map of extents the earth has ever seen. We want
111 * this so we can test all of the corner cases of btrfs_get_extent. Here is a
112 * diagram of how the extents will look though this may not be possible we still
113 * want to make sure everything acts normally (the last number is not inclusive)
114 *
115 * [0 - 5][5 - 6][6 - 10][10 - 4096][ 4096 - 8192 ][8192 - 12288]
116 * [hole ][inline][ hole ][ regular ][regular1 split][ hole ]
117 *
118 * [ 12288 - 20480][20480 - 24576][ 24576 - 28672 ][28672 - 36864][36864 - 45056]
119 * [regular1 split][ prealloc1 ][prealloc1 written][ prealloc1 ][ compressed ]
120 *
121 * [45056 - 49152][49152-53248][53248-61440][61440-65536][ 65536+81920 ]
122 * [ compressed1 ][ regular ][compressed1][ regular ][ hole but no extent]
123 *
124 * [81920-86016]
125 * [ regular ]
126 */
127static void setup_file_extents(struct btrfs_root *root)
128{
129 int slot = 0;
130 u64 disk_bytenr = 1 * 1024 * 1024;
131 u64 offset = 0;
132
133 /* First we want a hole */
134 insert_extent(root, offset, 5, 5, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0,
135 slot);
136 slot++;
137 offset += 5;
138
139 /*
140 * Now we want an inline extent, I don't think this is possible but hey
141 * why not? Also keep in mind if we have an inline extent it counts as
142 * the whole first page. If we were to expand it we would have to cow
143 * and we wouldn't have an inline extent anymore.
144 */
145 insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0,
146 slot);
147 slot++;
148 offset = 4096;
149
150 /* Now another hole */
151 insert_extent(root, offset, 4, 4, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0,
152 slot);
153 slot++;
154 offset += 4;
155
156 /* Now for a regular extent */
157 insert_extent(root, offset, 4095, 4095, 0, disk_bytenr, 4096,
158 BTRFS_FILE_EXTENT_REG, 0, slot);
159 slot++;
160 disk_bytenr += 4096;
161 offset += 4095;
162
163 /*
164 * Now for 3 extents that were split from a hole punch so we test
165 * offsets properly.
166 */
167 insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
168 BTRFS_FILE_EXTENT_REG, 0, slot);
169 slot++;
170 offset += 4096;
171 insert_extent(root, offset, 4096, 4096, 0, 0, 0, BTRFS_FILE_EXTENT_REG,
172 0, slot);
173 slot++;
174 offset += 4096;
175 insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
176 BTRFS_FILE_EXTENT_REG, 0, slot);
177 slot++;
178 offset += 8192;
179 disk_bytenr += 16384;
180
181 /* Now for a unwritten prealloc extent */
182 insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
183 BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
184 slot++;
185 offset += 4096;
186
187 /*
188 * We want to jack up disk_bytenr a little more so the em stuff doesn't
189 * merge our records.
190 */
191 disk_bytenr += 8192;
192
193 /*
194 * Now for a partially written prealloc extent, basically the same as
195 * the hole punch example above. Ram_bytes never changes when you mark
196 * extents written btw.
197 */
198 insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
199 BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
200 slot++;
201 offset += 4096;
202 insert_extent(root, offset, 4096, 16384, 4096, disk_bytenr, 16384,
203 BTRFS_FILE_EXTENT_REG, 0, slot);
204 slot++;
205 offset += 4096;
206 insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
207 BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
208 slot++;
209 offset += 8192;
210 disk_bytenr += 16384;
211
212 /* Now a normal compressed extent */
213 insert_extent(root, offset, 8192, 8192, 0, disk_bytenr, 4096,
214 BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
215 slot++;
216 offset += 8192;
217 /* No merges */
218 disk_bytenr += 8192;
219
220 /* Now a split compressed extent */
221 insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 4096,
222 BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
223 slot++;
224 offset += 4096;
225 insert_extent(root, offset, 4096, 4096, 0, disk_bytenr + 4096, 4096,
226 BTRFS_FILE_EXTENT_REG, 0, slot);
227 slot++;
228 offset += 4096;
229 insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 4096,
230 BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
231 slot++;
232 offset += 8192;
233 disk_bytenr += 8192;
234
235 /* Now extents that have a hole but no hole extent */
236 insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
237 BTRFS_FILE_EXTENT_REG, 0, slot);
238 slot++;
239 offset += 16384;
240 disk_bytenr += 4096;
241 insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
242 BTRFS_FILE_EXTENT_REG, 0, slot);
243}
244
245static unsigned long prealloc_only = 0;
246static unsigned long compressed_only = 0;
247static unsigned long vacancy_only = 0;
248
249static noinline int test_btrfs_get_extent(void)
250{
251 struct inode *inode = NULL;
252 struct btrfs_root *root = NULL;
253 struct extent_map *em = NULL;
254 u64 orig_start;
255 u64 disk_bytenr;
256 u64 offset;
257 int ret = -ENOMEM;
258
259 inode = btrfs_new_test_inode();
260 if (!inode) {
261 test_msg("Couldn't allocate inode\n");
262 return ret;
263 }
264
265 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
266 BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
267 BTRFS_I(inode)->location.offset = 0;
268
269 root = btrfs_alloc_dummy_root();
270 if (IS_ERR(root)) {
271 test_msg("Couldn't allocate root\n");
272 goto out;
273 }
274
275 /*
276 * We do this since btrfs_get_extent wants to assign em->bdev to
277 * root->fs_info->fs_devices->latest_bdev.
278 */
279 root->fs_info = alloc_dummy_fs_info();
280 if (!root->fs_info) {
281 test_msg("Couldn't allocate dummy fs info\n");
282 goto out;
283 }
284
285 root->node = alloc_dummy_extent_buffer(0, 4096);
286 if (!root->node) {
287 test_msg("Couldn't allocate dummy buffer\n");
288 goto out;
289 }
290
291 /*
292 * We will just free a dummy node if it's ref count is 2 so we need an
293 * extra ref so our searches don't accidently release our page.
294 */
295 extent_buffer_get(root->node);
296 btrfs_set_header_nritems(root->node, 0);
297 btrfs_set_header_level(root->node, 0);
298 ret = -EINVAL;
299
300 /* First with no extents */
301 BTRFS_I(inode)->root = root;
302 em = btrfs_get_extent(inode, NULL, 0, 0, 4096, 0);
303 if (IS_ERR(em)) {
304 em = NULL;
305 test_msg("Got an error when we shouldn't have\n");
306 goto out;
307 }
308 if (em->block_start != EXTENT_MAP_HOLE) {
309 test_msg("Expected a hole, got %llu\n", em->block_start);
310 goto out;
311 }
312 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
313 test_msg("Vacancy flag wasn't set properly\n");
314 goto out;
315 }
316 free_extent_map(em);
317 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
318
319 /*
320 * All of the magic numbers are based on the mapping setup in
321 * setup_file_extents, so if you change anything there you need to
322 * update the comment and update the expected values below.
323 */
324 setup_file_extents(root);
325
326 em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0);
327 if (IS_ERR(em)) {
328 test_msg("Got an error when we shouldn't have\n");
329 goto out;
330 }
331 if (em->block_start != EXTENT_MAP_HOLE) {
332 test_msg("Expected a hole, got %llu\n", em->block_start);
333 goto out;
334 }
335 if (em->start != 0 || em->len != 5) {
336 test_msg("Unexpected extent wanted start 0 len 5, got start "
337 "%llu len %llu\n", em->start, em->len);
338 goto out;
339 }
340 if (em->flags != 0) {
341 test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
342 goto out;
343 }
344 offset = em->start + em->len;
345 free_extent_map(em);
346
347 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
348 if (IS_ERR(em)) {
349 test_msg("Got an error when we shouldn't have\n");
350 goto out;
351 }
352 if (em->block_start != EXTENT_MAP_INLINE) {
353 test_msg("Expected an inline, got %llu\n", em->block_start);
354 goto out;
355 }
356 if (em->start != offset || em->len != 4091) {
357 test_msg("Unexpected extent wanted start %llu len 1, got start "
358 "%llu len %llu\n", offset, em->start, em->len);
359 goto out;
360 }
361 if (em->flags != 0) {
362 test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
363 goto out;
364 }
365 /*
366 * We don't test anything else for inline since it doesn't get set
367 * unless we have a page for it to write into. Maybe we should change
368 * this?
369 */
370 offset = em->start + em->len;
371 free_extent_map(em);
372
373 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
374 if (IS_ERR(em)) {
375 test_msg("Got an error when we shouldn't have\n");
376 goto out;
377 }
378 if (em->block_start != EXTENT_MAP_HOLE) {
379 test_msg("Expected a hole, got %llu\n", em->block_start);
380 goto out;
381 }
382 if (em->start != offset || em->len != 4) {
383 test_msg("Unexpected extent wanted start %llu len 4, got start "
384 "%llu len %llu\n", offset, em->start, em->len);
385 goto out;
386 }
387 if (em->flags != 0) {
388 test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
389 goto out;
390 }
391 offset = em->start + em->len;
392 free_extent_map(em);
393
394 /* Regular extent */
395 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
396 if (IS_ERR(em)) {
397 test_msg("Got an error when we shouldn't have\n");
398 goto out;
399 }
400 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
401 test_msg("Expected a real extent, got %llu\n", em->block_start);
402 goto out;
403 }
404 if (em->start != offset || em->len != 4095) {
405 test_msg("Unexpected extent wanted start %llu len 4095, got "
406 "start %llu len %llu\n", offset, em->start, em->len);
407 goto out;
408 }
409 if (em->flags != 0) {
410 test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
411 goto out;
412 }
413 if (em->orig_start != em->start) {
414 test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
415 em->orig_start);
416 goto out;
417 }
418 offset = em->start + em->len;
419 free_extent_map(em);
420
421 /* The next 3 are split extents */
422 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
423 if (IS_ERR(em)) {
424 test_msg("Got an error when we shouldn't have\n");
425 goto out;
426 }
427 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
428 test_msg("Expected a real extent, got %llu\n", em->block_start);
429 goto out;
430 }
431 if (em->start != offset || em->len != 4096) {
432 test_msg("Unexpected extent wanted start %llu len 4096, got "
433 "start %llu len %llu\n", offset, em->start, em->len);
434 goto out;
435 }
436 if (em->flags != 0) {
437 test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
438 goto out;
439 }
440 if (em->orig_start != em->start) {
441 test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
442 em->orig_start);
443 goto out;
444 }
445 disk_bytenr = em->block_start;
446 orig_start = em->start;
447 offset = em->start + em->len;
448 free_extent_map(em);
449
450 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
451 if (IS_ERR(em)) {
452 test_msg("Got an error when we shouldn't have\n");
453 goto out;
454 }
455 if (em->block_start != EXTENT_MAP_HOLE) {
456 test_msg("Expected a hole, got %llu\n", em->block_start);
457 goto out;
458 }
459 if (em->start != offset || em->len != 4096) {
460 test_msg("Unexpected extent wanted start %llu len 4096, got "
461 "start %llu len %llu\n", offset, em->start, em->len);
462 goto out;
463 }
464 if (em->flags != 0) {
465 test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
466 goto out;
467 }
468 offset = em->start + em->len;
469 free_extent_map(em);
470
471 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
472 if (IS_ERR(em)) {
473 test_msg("Got an error when we shouldn't have\n");
474 goto out;
475 }
476 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
477 test_msg("Expected a real extent, got %llu\n", em->block_start);
478 goto out;
479 }
480 if (em->start != offset || em->len != 8192) {
481 test_msg("Unexpected extent wanted start %llu len 8192, got "
482 "start %llu len %llu\n", offset, em->start, em->len);
483 goto out;
484 }
485 if (em->flags != 0) {
486 test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
487 goto out;
488 }
489 if (em->orig_start != orig_start) {
490 test_msg("Wrong orig offset, want %llu, have %llu\n",
491 orig_start, em->orig_start);
492 goto out;
493 }
494 disk_bytenr += (em->start - orig_start);
495 if (em->block_start != disk_bytenr) {
496 test_msg("Wrong block start, want %llu, have %llu\n",
497 disk_bytenr, em->block_start);
498 goto out;
499 }
500 offset = em->start + em->len;
501 free_extent_map(em);
502
503 /* Prealloc extent */
504 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
505 if (IS_ERR(em)) {
506 test_msg("Got an error when we shouldn't have\n");
507 goto out;
508 }
509 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
510 test_msg("Expected a real extent, got %llu\n", em->block_start);
511 goto out;
512 }
513 if (em->start != offset || em->len != 4096) {
514 test_msg("Unexpected extent wanted start %llu len 4096, got "
515 "start %llu len %llu\n", offset, em->start, em->len);
516 goto out;
517 }
518 if (em->flags != prealloc_only) {
519 test_msg("Unexpected flags set, want %lu have %lu\n",
520 prealloc_only, em->flags);
521 goto out;
522 }
523 if (em->orig_start != em->start) {
524 test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
525 em->orig_start);
526 goto out;
527 }
528 offset = em->start + em->len;
529 free_extent_map(em);
530
531 /* The next 3 are a half written prealloc extent */
532 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
533 if (IS_ERR(em)) {
534 test_msg("Got an error when we shouldn't have\n");
535 goto out;
536 }
537 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
538 test_msg("Expected a real extent, got %llu\n", em->block_start);
539 goto out;
540 }
541 if (em->start != offset || em->len != 4096) {
542 test_msg("Unexpected extent wanted start %llu len 4096, got "
543 "start %llu len %llu\n", offset, em->start, em->len);
544 goto out;
545 }
546 if (em->flags != prealloc_only) {
547 test_msg("Unexpected flags set, want %lu have %lu\n",
548 prealloc_only, em->flags);
549 goto out;
550 }
551 if (em->orig_start != em->start) {
552 test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
553 em->orig_start);
554 goto out;
555 }
556 disk_bytenr = em->block_start;
557 orig_start = em->start;
558 offset = em->start + em->len;
559 free_extent_map(em);
560
561 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
562 if (IS_ERR(em)) {
563 test_msg("Got an error when we shouldn't have\n");
564 goto out;
565 }
566 if (em->block_start >= EXTENT_MAP_HOLE) {
567 test_msg("Expected a real extent, got %llu\n", em->block_start);
568 goto out;
569 }
570 if (em->start != offset || em->len != 4096) {
571 test_msg("Unexpected extent wanted start %llu len 4096, got "
572 "start %llu len %llu\n", offset, em->start, em->len);
573 goto out;
574 }
575 if (em->flags != 0) {
576 test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
577 goto out;
578 }
579 if (em->orig_start != orig_start) {
580 test_msg("Unexpected orig offset, wanted %llu, have %llu\n",
581 orig_start, em->orig_start);
582 goto out;
583 }
584 if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
585 test_msg("Unexpected block start, wanted %llu, have %llu\n",
586 disk_bytenr + (em->start - em->orig_start),
587 em->block_start);
588 goto out;
589 }
590 offset = em->start + em->len;
591 free_extent_map(em);
592
593 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
594 if (IS_ERR(em)) {
595 test_msg("Got an error when we shouldn't have\n");
596 goto out;
597 }
598 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
599 test_msg("Expected a real extent, got %llu\n", em->block_start);
600 goto out;
601 }
602 if (em->start != offset || em->len != 8192) {
603 test_msg("Unexpected extent wanted start %llu len 8192, got "
604 "start %llu len %llu\n", offset, em->start, em->len);
605 goto out;
606 }
607 if (em->flags != prealloc_only) {
608 test_msg("Unexpected flags set, want %lu have %lu\n",
609 prealloc_only, em->flags);
610 goto out;
611 }
612 if (em->orig_start != orig_start) {
613 test_msg("Wrong orig offset, want %llu, have %llu\n", orig_start,
614 em->orig_start);
615 goto out;
616 }
617 if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
618 test_msg("Unexpected block start, wanted %llu, have %llu\n",
619 disk_bytenr + (em->start - em->orig_start),
620 em->block_start);
621 goto out;
622 }
623 offset = em->start + em->len;
624 free_extent_map(em);
625
626 /* Now for the compressed extent */
627 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
628 if (IS_ERR(em)) {
629 test_msg("Got an error when we shouldn't have\n");
630 goto out;
631 }
632 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
633 test_msg("Expected a real extent, got %llu\n", em->block_start);
634 goto out;
635 }
636 if (em->start != offset || em->len != 8192) {
637 test_msg("Unexpected extent wanted start %llu len 8192, got "
638 "start %llu len %llu\n", offset, em->start, em->len);
639 goto out;
640 }
641 if (em->flags != compressed_only) {
642 test_msg("Unexpected flags set, want %lu have %lu\n",
643 compressed_only, em->flags);
644 goto out;
645 }
646 if (em->orig_start != em->start) {
647 test_msg("Wrong orig offset, want %llu, have %llu\n",
648 em->start, em->orig_start);
649 goto out;
650 }
651 if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
652 test_msg("Unexpected compress type, wanted %d, got %d\n",
653 BTRFS_COMPRESS_ZLIB, em->compress_type);
654 goto out;
655 }
656 offset = em->start + em->len;
657 free_extent_map(em);
658
659 /* Split compressed extent */
660 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
661 if (IS_ERR(em)) {
662 test_msg("Got an error when we shouldn't have\n");
663 goto out;
664 }
665 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
666 test_msg("Expected a real extent, got %llu\n", em->block_start);
667 goto out;
668 }
669 if (em->start != offset || em->len != 4096) {
670 test_msg("Unexpected extent wanted start %llu len 4096, got "
671 "start %llu len %llu\n", offset, em->start, em->len);
672 goto out;
673 }
674 if (em->flags != compressed_only) {
675 test_msg("Unexpected flags set, want %lu have %lu\n",
676 compressed_only, em->flags);
677 goto out;
678 }
679 if (em->orig_start != em->start) {
680 test_msg("Wrong orig offset, want %llu, have %llu\n",
681 em->start, em->orig_start);
682 goto out;
683 }
684 if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
685 test_msg("Unexpected compress type, wanted %d, got %d\n",
686 BTRFS_COMPRESS_ZLIB, em->compress_type);
687 goto out;
688 }
689 disk_bytenr = em->block_start;
690 orig_start = em->start;
691 offset = em->start + em->len;
692 free_extent_map(em);
693
694 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
695 if (IS_ERR(em)) {
696 test_msg("Got an error when we shouldn't have\n");
697 goto out;
698 }
699 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
700 test_msg("Expected a real extent, got %llu\n", em->block_start);
701 goto out;
702 }
703 if (em->start != offset || em->len != 4096) {
704 test_msg("Unexpected extent wanted start %llu len 4096, got "
705 "start %llu len %llu\n", offset, em->start, em->len);
706 goto out;
707 }
708 if (em->flags != 0) {
709 test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
710 goto out;
711 }
712 if (em->orig_start != em->start) {
713 test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
714 em->orig_start);
715 goto out;
716 }
717 offset = em->start + em->len;
718 free_extent_map(em);
719
720 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
721 if (IS_ERR(em)) {
722 test_msg("Got an error when we shouldn't have\n");
723 goto out;
724 }
725 if (em->block_start != disk_bytenr) {
726 test_msg("Block start does not match, want %llu got %llu\n",
727 disk_bytenr, em->block_start);
728 goto out;
729 }
730 if (em->start != offset || em->len != 8192) {
731 test_msg("Unexpected extent wanted start %llu len 8192, got "
732 "start %llu len %llu\n", offset, em->start, em->len);
733 goto out;
734 }
735 if (em->flags != compressed_only) {
736 test_msg("Unexpected flags set, want %lu have %lu\n",
737 compressed_only, em->flags);
738 goto out;
739 }
740 if (em->orig_start != orig_start) {
741 test_msg("Wrong orig offset, want %llu, have %llu\n",
742 em->start, orig_start);
743 goto out;
744 }
745 if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
746 test_msg("Unexpected compress type, wanted %d, got %d\n",
747 BTRFS_COMPRESS_ZLIB, em->compress_type);
748 goto out;
749 }
750 offset = em->start + em->len;
751 free_extent_map(em);
752
753 /* A hole between regular extents but no hole extent */
754 em = btrfs_get_extent(inode, NULL, 0, offset + 6, 4096, 0);
755 if (IS_ERR(em)) {
756 test_msg("Got an error when we shouldn't have\n");
757 goto out;
758 }
759 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
760 test_msg("Expected a real extent, got %llu\n", em->block_start);
761 goto out;
762 }
763 if (em->start != offset || em->len != 4096) {
764 test_msg("Unexpected extent wanted start %llu len 4096, got "
765 "start %llu len %llu\n", offset, em->start, em->len);
766 goto out;
767 }
768 if (em->flags != 0) {
769 test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
770 goto out;
771 }
772 if (em->orig_start != em->start) {
773 test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
774 em->orig_start);
775 goto out;
776 }
777 offset = em->start + em->len;
778 free_extent_map(em);
779
780 em = btrfs_get_extent(inode, NULL, 0, offset, 4096 * 1024, 0);
781 if (IS_ERR(em)) {
782 test_msg("Got an error when we shouldn't have\n");
783 goto out;
784 }
785 if (em->block_start != EXTENT_MAP_HOLE) {
786 test_msg("Expected a hole extent, got %llu\n", em->block_start);
787 goto out;
788 }
789 /*
790 * Currently we just return a length that we requested rather than the
791 * length of the actual hole, if this changes we'll have to change this
792 * test.
793 */
794 if (em->start != offset || em->len != 12288) {
795 test_msg("Unexpected extent wanted start %llu len 12288, got "
796 "start %llu len %llu\n", offset, em->start, em->len);
797 goto out;
798 }
799 if (em->flags != vacancy_only) {
800 test_msg("Unexpected flags set, want %lu have %lu\n",
801 vacancy_only, em->flags);
802 goto out;
803 }
804 if (em->orig_start != em->start) {
805 test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
806 em->orig_start);
807 goto out;
808 }
809 offset = em->start + em->len;
810 free_extent_map(em);
811
812 em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
813 if (IS_ERR(em)) {
814 test_msg("Got an error when we shouldn't have\n");
815 goto out;
816 }
817 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
818 test_msg("Expected a real extent, got %llu\n", em->block_start);
819 goto out;
820 }
821 if (em->start != offset || em->len != 4096) {
822 test_msg("Unexpected extent wanted start %llu len 4096, got "
823 "start %llu len %llu\n", offset, em->start, em->len);
824 goto out;
825 }
826 if (em->flags != 0) {
827 test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
828 goto out;
829 }
830 if (em->orig_start != em->start) {
831 test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
832 em->orig_start);
833 goto out;
834 }
835 ret = 0;
836out:
837 if (!IS_ERR(em))
838 free_extent_map(em);
839 iput(inode);
840 free_dummy_root(root);
841 return ret;
842}
843
844static int test_hole_first(void)
845{
846 struct inode *inode = NULL;
847 struct btrfs_root *root = NULL;
848 struct extent_map *em = NULL;
849 int ret = -ENOMEM;
850
851 inode = btrfs_new_test_inode();
852 if (!inode) {
853 test_msg("Couldn't allocate inode\n");
854 return ret;
855 }
856
857 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
858 BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
859 BTRFS_I(inode)->location.offset = 0;
860
861 root = btrfs_alloc_dummy_root();
862 if (IS_ERR(root)) {
863 test_msg("Couldn't allocate root\n");
864 goto out;
865 }
866
867 root->fs_info = alloc_dummy_fs_info();
868 if (!root->fs_info) {
869 test_msg("Couldn't allocate dummy fs info\n");
870 goto out;
871 }
872
873 root->node = alloc_dummy_extent_buffer(0, 4096);
874 if (!root->node) {
875 test_msg("Couldn't allocate dummy buffer\n");
876 goto out;
877 }
878
879 extent_buffer_get(root->node);
880 btrfs_set_header_nritems(root->node, 0);
881 btrfs_set_header_level(root->node, 0);
882 BTRFS_I(inode)->root = root;
883 ret = -EINVAL;
884
885 /*
886 * Need a blank inode item here just so we don't confuse
887 * btrfs_get_extent.
888 */
889 insert_inode_item_key(root);
890 insert_extent(root, 4096, 4096, 4096, 0, 4096, 4096,
891 BTRFS_FILE_EXTENT_REG, 0, 1);
892 em = btrfs_get_extent(inode, NULL, 0, 0, 8192, 0);
893 if (IS_ERR(em)) {
894 test_msg("Got an error when we shouldn't have\n");
895 goto out;
896 }
897 if (em->block_start != EXTENT_MAP_HOLE) {
898 test_msg("Expected a hole, got %llu\n", em->block_start);
899 goto out;
900 }
901 if (em->start != 0 || em->len != 4096) {
902 test_msg("Unexpected extent wanted start 0 len 4096, got start "
903 "%llu len %llu\n", em->start, em->len);
904 goto out;
905 }
906 if (em->flags != vacancy_only) {
907 test_msg("Wrong flags, wanted %lu, have %lu\n", vacancy_only,
908 em->flags);
909 goto out;
910 }
911 free_extent_map(em);
912
913 em = btrfs_get_extent(inode, NULL, 0, 4096, 8192, 0);
914 if (IS_ERR(em)) {
915 test_msg("Got an error when we shouldn't have\n");
916 goto out;
917 }
918 if (em->block_start != 4096) {
919 test_msg("Expected a real extent, got %llu\n", em->block_start);
920 goto out;
921 }
922 if (em->start != 4096 || em->len != 4096) {
923 test_msg("Unexpected extent wanted start 4096 len 4096, got "
924 "start %llu len %llu\n", em->start, em->len);
925 goto out;
926 }
927 if (em->flags != 0) {
928 test_msg("Unexpected flags set, wanted 0 got %lu\n",
929 em->flags);
930 goto out;
931 }
932 ret = 0;
933out:
934 if (!IS_ERR(em))
935 free_extent_map(em);
936 iput(inode);
937 free_dummy_root(root);
938 return ret;
939}
940
941int btrfs_test_inodes(void)
942{
943 int ret;
944
945 set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only);
946 set_bit(EXTENT_FLAG_VACANCY, &vacancy_only);
947 set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
948
949 test_msg("Running btrfs_get_extent tests\n");
950 ret = test_btrfs_get_extent();
951 if (ret)
952 return ret;
953 test_msg("Running hole first btrfs_get_extent test\n");
954 return test_hole_first();
955}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index cac4a3f76323..c6a872a8a468 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -57,7 +57,7 @@ static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
57 __TRANS_JOIN_NOLOCK), 57 __TRANS_JOIN_NOLOCK),
58}; 58};
59 59
60static void put_transaction(struct btrfs_transaction *transaction) 60void btrfs_put_transaction(struct btrfs_transaction *transaction)
61{ 61{
62 WARN_ON(atomic_read(&transaction->use_count) == 0); 62 WARN_ON(atomic_read(&transaction->use_count) == 0);
63 if (atomic_dec_and_test(&transaction->use_count)) { 63 if (atomic_dec_and_test(&transaction->use_count)) {
@@ -332,7 +332,7 @@ static void wait_current_trans(struct btrfs_root *root)
332 wait_event(root->fs_info->transaction_wait, 332 wait_event(root->fs_info->transaction_wait,
333 cur_trans->state >= TRANS_STATE_UNBLOCKED || 333 cur_trans->state >= TRANS_STATE_UNBLOCKED ||
334 cur_trans->aborted); 334 cur_trans->aborted);
335 put_transaction(cur_trans); 335 btrfs_put_transaction(cur_trans);
336 } else { 336 } else {
337 spin_unlock(&root->fs_info->trans_lock); 337 spin_unlock(&root->fs_info->trans_lock);
338 } 338 }
@@ -353,6 +353,17 @@ static int may_wait_transaction(struct btrfs_root *root, int type)
353 return 0; 353 return 0;
354} 354}
355 355
356static inline bool need_reserve_reloc_root(struct btrfs_root *root)
357{
358 if (!root->fs_info->reloc_ctl ||
359 !root->ref_cows ||
360 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
361 root->reloc_root)
362 return false;
363
364 return true;
365}
366
356static struct btrfs_trans_handle * 367static struct btrfs_trans_handle *
357start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, 368start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
358 enum btrfs_reserve_flush_enum flush) 369 enum btrfs_reserve_flush_enum flush)
@@ -360,8 +371,9 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
360 struct btrfs_trans_handle *h; 371 struct btrfs_trans_handle *h;
361 struct btrfs_transaction *cur_trans; 372 struct btrfs_transaction *cur_trans;
362 u64 num_bytes = 0; 373 u64 num_bytes = 0;
363 int ret;
364 u64 qgroup_reserved = 0; 374 u64 qgroup_reserved = 0;
375 bool reloc_reserved = false;
376 int ret;
365 377
366 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) 378 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
367 return ERR_PTR(-EROFS); 379 return ERR_PTR(-EROFS);
@@ -390,6 +402,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
390 } 402 }
391 403
392 num_bytes = btrfs_calc_trans_metadata_size(root, num_items); 404 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
405 /*
406 * Do the reservation for the relocation root creation
407 */
408 if (unlikely(need_reserve_reloc_root(root))) {
409 num_bytes += root->nodesize;
410 reloc_reserved = true;
411 }
412
393 ret = btrfs_block_rsv_add(root, 413 ret = btrfs_block_rsv_add(root,
394 &root->fs_info->trans_block_rsv, 414 &root->fs_info->trans_block_rsv,
395 num_bytes, flush); 415 num_bytes, flush);
@@ -451,6 +471,7 @@ again:
451 h->delayed_ref_elem.seq = 0; 471 h->delayed_ref_elem.seq = 0;
452 h->type = type; 472 h->type = type;
453 h->allocating_chunk = false; 473 h->allocating_chunk = false;
474 h->reloc_reserved = false;
454 INIT_LIST_HEAD(&h->qgroup_ref_list); 475 INIT_LIST_HEAD(&h->qgroup_ref_list);
455 INIT_LIST_HEAD(&h->new_bgs); 476 INIT_LIST_HEAD(&h->new_bgs);
456 477
@@ -466,6 +487,7 @@ again:
466 h->transid, num_bytes, 1); 487 h->transid, num_bytes, 1);
467 h->block_rsv = &root->fs_info->trans_block_rsv; 488 h->block_rsv = &root->fs_info->trans_block_rsv;
468 h->bytes_reserved = num_bytes; 489 h->bytes_reserved = num_bytes;
490 h->reloc_reserved = reloc_reserved;
469 } 491 }
470 h->qgroup_reserved = qgroup_reserved; 492 h->qgroup_reserved = qgroup_reserved;
471 493
@@ -610,7 +632,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
610 } 632 }
611 633
612 wait_for_commit(root, cur_trans); 634 wait_for_commit(root, cur_trans);
613 put_transaction(cur_trans); 635 btrfs_put_transaction(cur_trans);
614out: 636out:
615 return ret; 637 return ret;
616} 638}
@@ -735,7 +757,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
735 smp_mb(); 757 smp_mb();
736 if (waitqueue_active(&cur_trans->writer_wait)) 758 if (waitqueue_active(&cur_trans->writer_wait))
737 wake_up(&cur_trans->writer_wait); 759 wake_up(&cur_trans->writer_wait);
738 put_transaction(cur_trans); 760 btrfs_put_transaction(cur_trans);
739 761
740 if (current->journal_info == trans) 762 if (current->journal_info == trans)
741 current->journal_info = NULL; 763 current->journal_info = NULL;
@@ -744,8 +766,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
744 btrfs_run_delayed_iputs(root); 766 btrfs_run_delayed_iputs(root);
745 767
746 if (trans->aborted || 768 if (trans->aborted ||
747 test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) 769 test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
770 wake_up_process(info->transaction_kthread);
748 err = -EIO; 771 err = -EIO;
772 }
749 assert_qgroups_uptodate(trans); 773 assert_qgroups_uptodate(trans);
750 774
751 kmem_cache_free(btrfs_trans_handle_cachep, trans); 775 kmem_cache_free(btrfs_trans_handle_cachep, trans);
@@ -948,16 +972,19 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
948 return ret; 972 return ret;
949 973
950 ret = btrfs_run_dev_stats(trans, root->fs_info); 974 ret = btrfs_run_dev_stats(trans, root->fs_info);
951 WARN_ON(ret); 975 if (ret)
976 return ret;
952 ret = btrfs_run_dev_replace(trans, root->fs_info); 977 ret = btrfs_run_dev_replace(trans, root->fs_info);
953 WARN_ON(ret); 978 if (ret)
954 979 return ret;
955 ret = btrfs_run_qgroups(trans, root->fs_info); 980 ret = btrfs_run_qgroups(trans, root->fs_info);
956 BUG_ON(ret); 981 if (ret)
982 return ret;
957 983
958 /* run_qgroups might have added some more refs */ 984 /* run_qgroups might have added some more refs */
959 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 985 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
960 BUG_ON(ret); 986 if (ret)
987 return ret;
961 988
962 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 989 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
963 next = fs_info->dirty_cowonly_roots.next; 990 next = fs_info->dirty_cowonly_roots.next;
@@ -1453,7 +1480,7 @@ static void do_async_commit(struct work_struct *work)
1453 * We've got freeze protection passed with the transaction. 1480 * We've got freeze protection passed with the transaction.
1454 * Tell lockdep about it. 1481 * Tell lockdep about it.
1455 */ 1482 */
1456 if (ac->newtrans->type < TRANS_JOIN_NOLOCK) 1483 if (ac->newtrans->type & __TRANS_FREEZABLE)
1457 rwsem_acquire_read( 1484 rwsem_acquire_read(
1458 &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], 1485 &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1459 0, 1, _THIS_IP_); 1486 0, 1, _THIS_IP_);
@@ -1494,7 +1521,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1494 * Tell lockdep we've released the freeze rwsem, since the 1521 * Tell lockdep we've released the freeze rwsem, since the
1495 * async commit thread will be the one to unlock it. 1522 * async commit thread will be the one to unlock it.
1496 */ 1523 */
1497 if (trans->type < TRANS_JOIN_NOLOCK) 1524 if (ac->newtrans->type & __TRANS_FREEZABLE)
1498 rwsem_release( 1525 rwsem_release(
1499 &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], 1526 &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1500 1, _THIS_IP_); 1527 1, _THIS_IP_);
@@ -1510,7 +1537,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1510 if (current->journal_info == trans) 1537 if (current->journal_info == trans)
1511 current->journal_info = NULL; 1538 current->journal_info = NULL;
1512 1539
1513 put_transaction(cur_trans); 1540 btrfs_put_transaction(cur_trans);
1514 return 0; 1541 return 0;
1515} 1542}
1516 1543
@@ -1552,8 +1579,10 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
1552 root->fs_info->running_transaction = NULL; 1579 root->fs_info->running_transaction = NULL;
1553 spin_unlock(&root->fs_info->trans_lock); 1580 spin_unlock(&root->fs_info->trans_lock);
1554 1581
1555 put_transaction(cur_trans); 1582 if (trans->type & __TRANS_FREEZABLE)
1556 put_transaction(cur_trans); 1583 sb_end_intwrite(root->fs_info->sb);
1584 btrfs_put_transaction(cur_trans);
1585 btrfs_put_transaction(cur_trans);
1557 1586
1558 trace_btrfs_transaction_commit(root); 1587 trace_btrfs_transaction_commit(root);
1559 1588
@@ -1571,15 +1600,19 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
1571 int ret; 1600 int ret;
1572 1601
1573 ret = btrfs_run_delayed_items(trans, root); 1602 ret = btrfs_run_delayed_items(trans, root);
1574 if (ret)
1575 return ret;
1576
1577 /* 1603 /*
1578 * running the delayed items may have added new refs. account 1604 * running the delayed items may have added new refs. account
1579 * them now so that they hinder processing of more delayed refs 1605 * them now so that they hinder processing of more delayed refs
1580 * as little as possible. 1606 * as little as possible.
1581 */ 1607 */
1582 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); 1608 if (ret) {
1609 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
1610 return ret;
1611 }
1612
1613 ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
1614 if (ret)
1615 return ret;
1583 1616
1584 /* 1617 /*
1585 * rename don't use btrfs_join_transaction, so, once we 1618 * rename don't use btrfs_join_transaction, so, once we
@@ -1596,14 +1629,14 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
1596static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) 1629static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
1597{ 1630{
1598 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) 1631 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
1599 return btrfs_start_all_delalloc_inodes(fs_info, 1); 1632 return btrfs_start_delalloc_roots(fs_info, 1);
1600 return 0; 1633 return 0;
1601} 1634}
1602 1635
1603static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) 1636static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
1604{ 1637{
1605 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) 1638 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
1606 btrfs_wait_all_ordered_extents(fs_info, 1); 1639 btrfs_wait_ordered_roots(fs_info, -1);
1607} 1640}
1608 1641
1609int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 1642int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -1669,7 +1702,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1669 1702
1670 wait_for_commit(root, cur_trans); 1703 wait_for_commit(root, cur_trans);
1671 1704
1672 put_transaction(cur_trans); 1705 btrfs_put_transaction(cur_trans);
1673 1706
1674 return ret; 1707 return ret;
1675 } 1708 }
@@ -1686,7 +1719,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1686 1719
1687 wait_for_commit(root, prev_trans); 1720 wait_for_commit(root, prev_trans);
1688 1721
1689 put_transaction(prev_trans); 1722 btrfs_put_transaction(prev_trans);
1690 } else { 1723 } else {
1691 spin_unlock(&root->fs_info->trans_lock); 1724 spin_unlock(&root->fs_info->trans_lock);
1692 } 1725 }
@@ -1838,11 +1871,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1838 assert_qgroups_uptodate(trans); 1871 assert_qgroups_uptodate(trans);
1839 update_super_roots(root); 1872 update_super_roots(root);
1840 1873
1841 if (!root->fs_info->log_root_recovering) { 1874 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
1842 btrfs_set_super_log_root(root->fs_info->super_copy, 0); 1875 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
1843 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
1844 }
1845
1846 memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, 1876 memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy,
1847 sizeof(*root->fs_info->super_copy)); 1877 sizeof(*root->fs_info->super_copy));
1848 1878
@@ -1888,8 +1918,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1888 list_del_init(&cur_trans->list); 1918 list_del_init(&cur_trans->list);
1889 spin_unlock(&root->fs_info->trans_lock); 1919 spin_unlock(&root->fs_info->trans_lock);
1890 1920
1891 put_transaction(cur_trans); 1921 btrfs_put_transaction(cur_trans);
1892 put_transaction(cur_trans); 1922 btrfs_put_transaction(cur_trans);
1893 1923
1894 if (trans->type & __TRANS_FREEZABLE) 1924 if (trans->type & __TRANS_FREEZABLE)
1895 sb_end_intwrite(root->fs_info->sb); 1925 sb_end_intwrite(root->fs_info->sb);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 5c2af8491621..7657d115067d 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -92,6 +92,7 @@ struct btrfs_trans_handle {
92 short aborted; 92 short aborted;
93 short adding_csums; 93 short adding_csums;
94 bool allocating_chunk; 94 bool allocating_chunk;
95 bool reloc_reserved;
95 unsigned int type; 96 unsigned int type;
96 /* 97 /*
97 * this root is only needed to validate that the root passed to 98 * this root is only needed to validate that the root passed to
@@ -166,4 +167,5 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
166 struct extent_io_tree *dirty_pages, int mark); 167 struct extent_io_tree *dirty_pages, int mark);
167int btrfs_transaction_blocked(struct btrfs_fs_info *info); 168int btrfs_transaction_blocked(struct btrfs_fs_info *info);
168int btrfs_transaction_in_commit(struct btrfs_fs_info *info); 169int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
170void btrfs_put_transaction(struct btrfs_transaction *transaction);
169#endif 171#endif
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 94e05c1f118a..76928ca97741 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -37,7 +37,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
37 int ret = 0; 37 int ret = 0;
38 int wret; 38 int wret;
39 int level; 39 int level;
40 int is_extent = 0;
41 int next_key_ret = 0; 40 int next_key_ret = 0;
42 u64 last_ret = 0; 41 u64 last_ret = 0;
43 u64 min_trans = 0; 42 u64 min_trans = 0;
@@ -50,7 +49,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
50 goto out; 49 goto out;
51 } 50 }
52 51
53 if (root->ref_cows == 0 && !is_extent) 52 if (root->ref_cows == 0)
54 goto out; 53 goto out;
55 54
56 if (btrfs_test_opt(root, SSD)) 55 if (btrfs_test_opt(root, SSD))
@@ -85,7 +84,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
85 84
86 path->keep_locks = 1; 85 path->keep_locks = 1;
87 86
88 ret = btrfs_search_forward(root, &key, NULL, path, min_trans); 87 ret = btrfs_search_forward(root, &key, path, min_trans);
89 if (ret < 0) 88 if (ret < 0)
90 goto out; 89 goto out;
91 if (ret > 0) { 90 if (ret > 0) {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 0d9613c3f5e5..9f7fc51ca334 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -26,7 +26,6 @@
26#include "locking.h" 26#include "locking.h"
27#include "print-tree.h" 27#include "print-tree.h"
28#include "backref.h" 28#include "backref.h"
29#include "compat.h"
30#include "tree-log.h" 29#include "tree-log.h"
31#include "hash.h" 30#include "hash.h"
32 31
@@ -93,7 +92,8 @@
93 */ 92 */
94#define LOG_WALK_PIN_ONLY 0 93#define LOG_WALK_PIN_ONLY 0
95#define LOG_WALK_REPLAY_INODES 1 94#define LOG_WALK_REPLAY_INODES 1
96#define LOG_WALK_REPLAY_ALL 2 95#define LOG_WALK_REPLAY_DIR_INDEX 2
96#define LOG_WALK_REPLAY_ALL 3
97 97
98static int btrfs_log_inode(struct btrfs_trans_handle *trans, 98static int btrfs_log_inode(struct btrfs_trans_handle *trans,
99 struct btrfs_root *root, struct inode *inode, 99 struct btrfs_root *root, struct inode *inode,
@@ -393,6 +393,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
393 if (inode_item) { 393 if (inode_item) {
394 struct btrfs_inode_item *item; 394 struct btrfs_inode_item *item;
395 u64 nbytes; 395 u64 nbytes;
396 u32 mode;
396 397
397 item = btrfs_item_ptr(path->nodes[0], path->slots[0], 398 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
398 struct btrfs_inode_item); 399 struct btrfs_inode_item);
@@ -400,9 +401,19 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
400 item = btrfs_item_ptr(eb, slot, 401 item = btrfs_item_ptr(eb, slot,
401 struct btrfs_inode_item); 402 struct btrfs_inode_item);
402 btrfs_set_inode_nbytes(eb, item, nbytes); 403 btrfs_set_inode_nbytes(eb, item, nbytes);
404
405 /*
406 * If this is a directory we need to reset the i_size to
407 * 0 so that we can set it up properly when replaying
408 * the rest of the items in this log.
409 */
410 mode = btrfs_inode_mode(eb, item);
411 if (S_ISDIR(mode))
412 btrfs_set_inode_size(eb, item, 0);
403 } 413 }
404 } else if (inode_item) { 414 } else if (inode_item) {
405 struct btrfs_inode_item *item; 415 struct btrfs_inode_item *item;
416 u32 mode;
406 417
407 /* 418 /*
408 * New inode, set nbytes to 0 so that the nbytes comes out 419 * New inode, set nbytes to 0 so that the nbytes comes out
@@ -410,6 +421,15 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
410 */ 421 */
411 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); 422 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
412 btrfs_set_inode_nbytes(eb, item, 0); 423 btrfs_set_inode_nbytes(eb, item, 0);
424
425 /*
426 * If this is a directory we need to reset the i_size to 0 so
427 * that we can set it up properly when replaying the rest of
428 * the items in this log.
429 */
430 mode = btrfs_inode_mode(eb, item);
431 if (S_ISDIR(mode))
432 btrfs_set_inode_size(eb, item, 0);
413 } 433 }
414insert: 434insert:
415 btrfs_release_path(path); 435 btrfs_release_path(path);
@@ -915,7 +935,7 @@ again:
915 parent_objectid, 935 parent_objectid,
916 victim_name, 936 victim_name,
917 victim_name_len)) { 937 victim_name_len)) {
918 btrfs_inc_nlink(inode); 938 inc_nlink(inode);
919 btrfs_release_path(path); 939 btrfs_release_path(path);
920 940
921 ret = btrfs_unlink_inode(trans, root, dir, 941 ret = btrfs_unlink_inode(trans, root, dir,
@@ -985,7 +1005,7 @@ again:
985 victim_parent = read_one_inode(root, 1005 victim_parent = read_one_inode(root,
986 parent_objectid); 1006 parent_objectid);
987 if (victim_parent) { 1007 if (victim_parent) {
988 btrfs_inc_nlink(inode); 1008 inc_nlink(inode);
989 btrfs_release_path(path); 1009 btrfs_release_path(path);
990 1010
991 ret = btrfs_unlink_inode(trans, root, 1011 ret = btrfs_unlink_inode(trans, root,
@@ -1092,11 +1112,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1092 struct extent_buffer *eb, int slot, 1112 struct extent_buffer *eb, int slot,
1093 struct btrfs_key *key) 1113 struct btrfs_key *key)
1094{ 1114{
1095 struct inode *dir; 1115 struct inode *dir = NULL;
1096 struct inode *inode; 1116 struct inode *inode = NULL;
1097 unsigned long ref_ptr; 1117 unsigned long ref_ptr;
1098 unsigned long ref_end; 1118 unsigned long ref_end;
1099 char *name; 1119 char *name = NULL;
1100 int namelen; 1120 int namelen;
1101 int ret; 1121 int ret;
1102 int search_done = 0; 1122 int search_done = 0;
@@ -1129,13 +1149,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1129 * care of the rest 1149 * care of the rest
1130 */ 1150 */
1131 dir = read_one_inode(root, parent_objectid); 1151 dir = read_one_inode(root, parent_objectid);
1132 if (!dir) 1152 if (!dir) {
1133 return -ENOENT; 1153 ret = -ENOENT;
1154 goto out;
1155 }
1134 1156
1135 inode = read_one_inode(root, inode_objectid); 1157 inode = read_one_inode(root, inode_objectid);
1136 if (!inode) { 1158 if (!inode) {
1137 iput(dir); 1159 ret = -EIO;
1138 return -EIO; 1160 goto out;
1139 } 1161 }
1140 1162
1141 while (ref_ptr < ref_end) { 1163 while (ref_ptr < ref_end) {
@@ -1148,14 +1170,16 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1148 */ 1170 */
1149 if (!dir) 1171 if (!dir)
1150 dir = read_one_inode(root, parent_objectid); 1172 dir = read_one_inode(root, parent_objectid);
1151 if (!dir) 1173 if (!dir) {
1152 return -ENOENT; 1174 ret = -ENOENT;
1175 goto out;
1176 }
1153 } else { 1177 } else {
1154 ret = ref_get_fields(eb, ref_ptr, &namelen, &name, 1178 ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
1155 &ref_index); 1179 &ref_index);
1156 } 1180 }
1157 if (ret) 1181 if (ret)
1158 return ret; 1182 goto out;
1159 1183
1160 /* if we already have a perfect match, we're done */ 1184 /* if we already have a perfect match, we're done */
1161 if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), 1185 if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
@@ -1175,12 +1199,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1175 parent_objectid, 1199 parent_objectid,
1176 ref_index, name, namelen, 1200 ref_index, name, namelen,
1177 &search_done); 1201 &search_done);
1178 if (ret == 1) { 1202 if (ret) {
1179 ret = 0; 1203 if (ret == 1)
1204 ret = 0;
1180 goto out; 1205 goto out;
1181 } 1206 }
1182 if (ret)
1183 goto out;
1184 } 1207 }
1185 1208
1186 /* insert our name */ 1209 /* insert our name */
@@ -1194,6 +1217,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1194 1217
1195 ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; 1218 ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
1196 kfree(name); 1219 kfree(name);
1220 name = NULL;
1197 if (log_ref_ver) { 1221 if (log_ref_ver) {
1198 iput(dir); 1222 iput(dir);
1199 dir = NULL; 1223 dir = NULL;
@@ -1204,6 +1228,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1204 ret = overwrite_item(trans, root, path, eb, slot, key); 1228 ret = overwrite_item(trans, root, path, eb, slot, key);
1205out: 1229out:
1206 btrfs_release_path(path); 1230 btrfs_release_path(path);
1231 kfree(name);
1207 iput(dir); 1232 iput(dir);
1208 iput(inode); 1233 iput(inode);
1209 return ret; 1234 return ret;
@@ -1286,6 +1311,7 @@ static int count_inode_refs(struct btrfs_root *root,
1286 break; 1311 break;
1287 path->slots[0]--; 1312 path->slots[0]--;
1288 } 1313 }
1314process_slot:
1289 btrfs_item_key_to_cpu(path->nodes[0], &key, 1315 btrfs_item_key_to_cpu(path->nodes[0], &key,
1290 path->slots[0]); 1316 path->slots[0]);
1291 if (key.objectid != ino || 1317 if (key.objectid != ino ||
@@ -1306,6 +1332,10 @@ static int count_inode_refs(struct btrfs_root *root,
1306 1332
1307 if (key.offset == 0) 1333 if (key.offset == 0)
1308 break; 1334 break;
1335 if (path->slots[0] > 0) {
1336 path->slots[0]--;
1337 goto process_slot;
1338 }
1309 key.offset--; 1339 key.offset--;
1310 btrfs_release_path(path); 1340 btrfs_release_path(path);
1311 } 1341 }
@@ -1459,7 +1489,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
1459 if (!inode->i_nlink) 1489 if (!inode->i_nlink)
1460 set_nlink(inode, 1); 1490 set_nlink(inode, 1);
1461 else 1491 else
1462 btrfs_inc_nlink(inode); 1492 inc_nlink(inode);
1463 ret = btrfs_update_inode(trans, root, inode); 1493 ret = btrfs_update_inode(trans, root, inode);
1464 } else if (ret == -EEXIST) { 1494 } else if (ret == -EEXIST) {
1465 ret = 0; 1495 ret = 0;
@@ -1496,6 +1526,7 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
1496 iput(inode); 1526 iput(inode);
1497 return -EIO; 1527 return -EIO;
1498 } 1528 }
1529
1499 ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index); 1530 ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
1500 1531
1501 /* FIXME, put inode into FIXUP list */ 1532 /* FIXME, put inode into FIXUP list */
@@ -1534,6 +1565,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1534 u8 log_type; 1565 u8 log_type;
1535 int exists; 1566 int exists;
1536 int ret = 0; 1567 int ret = 0;
1568 bool update_size = (key->type == BTRFS_DIR_INDEX_KEY);
1537 1569
1538 dir = read_one_inode(root, key->objectid); 1570 dir = read_one_inode(root, key->objectid);
1539 if (!dir) 1571 if (!dir)
@@ -1604,6 +1636,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1604 goto insert; 1636 goto insert;
1605out: 1637out:
1606 btrfs_release_path(path); 1638 btrfs_release_path(path);
1639 if (!ret && update_size) {
1640 btrfs_i_size_write(dir, dir->i_size + name_len * 2);
1641 ret = btrfs_update_inode(trans, root, dir);
1642 }
1607 kfree(name); 1643 kfree(name);
1608 iput(dir); 1644 iput(dir);
1609 return ret; 1645 return ret;
@@ -1614,6 +1650,7 @@ insert:
1614 name, name_len, log_type, &log_key); 1650 name, name_len, log_type, &log_key);
1615 if (ret && ret != -ENOENT) 1651 if (ret && ret != -ENOENT)
1616 goto out; 1652 goto out;
1653 update_size = false;
1617 ret = 0; 1654 ret = 0;
1618 goto out; 1655 goto out;
1619} 1656}
@@ -1795,7 +1832,7 @@ again:
1795 dir_key->offset, 1832 dir_key->offset,
1796 name, name_len, 0); 1833 name, name_len, 0);
1797 } 1834 }
1798 if (IS_ERR_OR_NULL(log_di)) { 1835 if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) {
1799 btrfs_dir_item_key_to_cpu(eb, di, &location); 1836 btrfs_dir_item_key_to_cpu(eb, di, &location);
1800 btrfs_release_path(path); 1837 btrfs_release_path(path);
1801 btrfs_release_path(log_path); 1838 btrfs_release_path(log_path);
@@ -1813,7 +1850,7 @@ again:
1813 goto out; 1850 goto out;
1814 } 1851 }
1815 1852
1816 btrfs_inc_nlink(inode); 1853 inc_nlink(inode);
1817 ret = btrfs_unlink_inode(trans, root, dir, inode, 1854 ret = btrfs_unlink_inode(trans, root, dir, inode,
1818 name, name_len); 1855 name, name_len);
1819 if (!ret) 1856 if (!ret)
@@ -1832,6 +1869,9 @@ again:
1832 goto again; 1869 goto again;
1833 ret = 0; 1870 ret = 0;
1834 goto out; 1871 goto out;
1872 } else if (IS_ERR(log_di)) {
1873 kfree(name);
1874 return PTR_ERR(log_di);
1835 } 1875 }
1836 btrfs_release_path(log_path); 1876 btrfs_release_path(log_path);
1837 kfree(name); 1877 kfree(name);
@@ -2027,6 +2067,15 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
2027 if (ret) 2067 if (ret)
2028 break; 2068 break;
2029 } 2069 }
2070
2071 if (key.type == BTRFS_DIR_INDEX_KEY &&
2072 wc->stage == LOG_WALK_REPLAY_DIR_INDEX) {
2073 ret = replay_one_dir_item(wc->trans, root, path,
2074 eb, i, &key);
2075 if (ret)
2076 break;
2077 }
2078
2030 if (wc->stage < LOG_WALK_REPLAY_ALL) 2079 if (wc->stage < LOG_WALK_REPLAY_ALL)
2031 continue; 2080 continue;
2032 2081
@@ -2048,8 +2097,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
2048 eb, i, &key); 2097 eb, i, &key);
2049 if (ret) 2098 if (ret)
2050 break; 2099 break;
2051 } else if (key.type == BTRFS_DIR_ITEM_KEY || 2100 } else if (key.type == BTRFS_DIR_ITEM_KEY) {
2052 key.type == BTRFS_DIR_INDEX_KEY) {
2053 ret = replay_one_dir_item(wc->trans, root, path, 2101 ret = replay_one_dir_item(wc->trans, root, path,
2054 eb, i, &key); 2102 eb, i, &key);
2055 if (ret) 2103 if (ret)
@@ -2082,8 +2130,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2082 WARN_ON(*level >= BTRFS_MAX_LEVEL); 2130 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2083 cur = path->nodes[*level]; 2131 cur = path->nodes[*level];
2084 2132
2085 if (btrfs_header_level(cur) != *level) 2133 WARN_ON(btrfs_header_level(cur) != *level);
2086 WARN_ON(1);
2087 2134
2088 if (path->slots[*level] >= 2135 if (path->slots[*level] >=
2089 btrfs_header_nritems(cur)) 2136 btrfs_header_nritems(cur))
@@ -2115,11 +2162,13 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2115 return ret; 2162 return ret;
2116 } 2163 }
2117 2164
2118 btrfs_tree_lock(next); 2165 if (trans) {
2119 btrfs_set_lock_blocking(next); 2166 btrfs_tree_lock(next);
2120 clean_tree_block(trans, root, next); 2167 btrfs_set_lock_blocking(next);
2121 btrfs_wait_tree_block_writeback(next); 2168 clean_tree_block(trans, root, next);
2122 btrfs_tree_unlock(next); 2169 btrfs_wait_tree_block_writeback(next);
2170 btrfs_tree_unlock(next);
2171 }
2123 2172
2124 WARN_ON(root_owner != 2173 WARN_ON(root_owner !=
2125 BTRFS_TREE_LOG_OBJECTID); 2174 BTRFS_TREE_LOG_OBJECTID);
@@ -2191,11 +2240,13 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
2191 2240
2192 next = path->nodes[*level]; 2241 next = path->nodes[*level];
2193 2242
2194 btrfs_tree_lock(next); 2243 if (trans) {
2195 btrfs_set_lock_blocking(next); 2244 btrfs_tree_lock(next);
2196 clean_tree_block(trans, root, next); 2245 btrfs_set_lock_blocking(next);
2197 btrfs_wait_tree_block_writeback(next); 2246 clean_tree_block(trans, root, next);
2198 btrfs_tree_unlock(next); 2247 btrfs_wait_tree_block_writeback(next);
2248 btrfs_tree_unlock(next);
2249 }
2199 2250
2200 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); 2251 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
2201 ret = btrfs_free_and_pin_reserved_extent(root, 2252 ret = btrfs_free_and_pin_reserved_extent(root,
@@ -2265,11 +2316,13 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
2265 2316
2266 next = path->nodes[orig_level]; 2317 next = path->nodes[orig_level];
2267 2318
2268 btrfs_tree_lock(next); 2319 if (trans) {
2269 btrfs_set_lock_blocking(next); 2320 btrfs_tree_lock(next);
2270 clean_tree_block(trans, log, next); 2321 btrfs_set_lock_blocking(next);
2271 btrfs_wait_tree_block_writeback(next); 2322 clean_tree_block(trans, log, next);
2272 btrfs_tree_unlock(next); 2323 btrfs_wait_tree_block_writeback(next);
2324 btrfs_tree_unlock(next);
2325 }
2273 2326
2274 WARN_ON(log->root_key.objectid != 2327 WARN_ON(log->root_key.objectid !=
2275 BTRFS_TREE_LOG_OBJECTID); 2328 BTRFS_TREE_LOG_OBJECTID);
@@ -2535,9 +2588,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2535 * the running transaction open, so a full commit can't hop 2588 * the running transaction open, so a full commit can't hop
2536 * in and cause problems either. 2589 * in and cause problems either.
2537 */ 2590 */
2538 btrfs_scrub_pause_super(root);
2539 ret = write_ctree_super(trans, root->fs_info->tree_root, 1); 2591 ret = write_ctree_super(trans, root->fs_info->tree_root, 1);
2540 btrfs_scrub_continue_super(root);
2541 if (ret) { 2592 if (ret) {
2542 btrfs_abort_transaction(trans, root, ret); 2593 btrfs_abort_transaction(trans, root, ret);
2543 goto out_wake_log_root; 2594 goto out_wake_log_root;
@@ -2572,13 +2623,10 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
2572 .process_func = process_one_buffer 2623 .process_func = process_one_buffer
2573 }; 2624 };
2574 2625
2575 if (trans) { 2626 ret = walk_log_tree(trans, log, &wc);
2576 ret = walk_log_tree(trans, log, &wc); 2627 /* I don't think this can happen but just in case */
2577 2628 if (ret)
2578 /* I don't think this can happen but just in case */ 2629 btrfs_abort_transaction(trans, log, ret);
2579 if (ret)
2580 btrfs_abort_transaction(trans, log, ret);
2581 }
2582 2630
2583 while (1) { 2631 while (1) {
2584 ret = find_first_extent_bit(&log->dirty_log_pages, 2632 ret = find_first_extent_bit(&log->dirty_log_pages,
@@ -2831,7 +2879,6 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2831 u64 min_offset, u64 *last_offset_ret) 2879 u64 min_offset, u64 *last_offset_ret)
2832{ 2880{
2833 struct btrfs_key min_key; 2881 struct btrfs_key min_key;
2834 struct btrfs_key max_key;
2835 struct btrfs_root *log = root->log_root; 2882 struct btrfs_root *log = root->log_root;
2836 struct extent_buffer *src; 2883 struct extent_buffer *src;
2837 int err = 0; 2884 int err = 0;
@@ -2843,9 +2890,6 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2843 u64 ino = btrfs_ino(inode); 2890 u64 ino = btrfs_ino(inode);
2844 2891
2845 log = root->log_root; 2892 log = root->log_root;
2846 max_key.objectid = ino;
2847 max_key.offset = (u64)-1;
2848 max_key.type = key_type;
2849 2893
2850 min_key.objectid = ino; 2894 min_key.objectid = ino;
2851 min_key.type = key_type; 2895 min_key.type = key_type;
@@ -2853,8 +2897,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2853 2897
2854 path->keep_locks = 1; 2898 path->keep_locks = 1;
2855 2899
2856 ret = btrfs_search_forward(root, &min_key, &max_key, 2900 ret = btrfs_search_forward(root, &min_key, path, trans->transid);
2857 path, trans->transid);
2858 2901
2859 /* 2902 /*
2860 * we didn't find anything from this transaction, see if there 2903 * we didn't find anything from this transaction, see if there
@@ -2907,10 +2950,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2907 2950
2908 /* find the first key from this transaction again */ 2951 /* find the first key from this transaction again */
2909 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 2952 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
2910 if (ret != 0) { 2953 if (WARN_ON(ret != 0))
2911 WARN_ON(1);
2912 goto done; 2954 goto done;
2913 }
2914 2955
2915 /* 2956 /*
2916 * we have a block from this transaction, log every item in it 2957 * we have a block from this transaction, log every item in it
@@ -3136,11 +3177,10 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
3136 struct inode *inode) 3177 struct inode *inode)
3137{ 3178{
3138 struct btrfs_inode_item *inode_item; 3179 struct btrfs_inode_item *inode_item;
3139 struct btrfs_key key;
3140 int ret; 3180 int ret;
3141 3181
3142 memcpy(&key, &BTRFS_I(inode)->location, sizeof(key)); 3182 ret = btrfs_insert_empty_item(trans, log, path,
3143 ret = btrfs_insert_empty_item(trans, log, path, &key, 3183 &BTRFS_I(inode)->location,
3144 sizeof(*inode_item)); 3184 sizeof(*inode_item));
3145 if (ret && ret != -EEXIST) 3185 if (ret && ret != -EEXIST)
3146 return ret; 3186 return ret;
@@ -3339,7 +3379,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3339 btrfs_set_token_file_extent_type(leaf, fi, 3379 btrfs_set_token_file_extent_type(leaf, fi,
3340 BTRFS_FILE_EXTENT_REG, 3380 BTRFS_FILE_EXTENT_REG,
3341 &token); 3381 &token);
3342 if (em->block_start == 0) 3382 if (em->block_start == EXTENT_MAP_HOLE)
3343 skip_csum = true; 3383 skip_csum = true;
3344 } 3384 }
3345 3385
@@ -3381,11 +3421,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3381 if (skip_csum) 3421 if (skip_csum)
3382 return 0; 3422 return 0;
3383 3423
3384 if (em->compress_type) {
3385 csum_offset = 0;
3386 csum_len = block_len;
3387 }
3388
3389 /* 3424 /*
3390 * First check and see if our csums are on our outstanding ordered 3425 * First check and see if our csums are on our outstanding ordered
3391 * extents. 3426 * extents.
@@ -3469,8 +3504,13 @@ unlocked:
3469 if (!mod_len || ret) 3504 if (!mod_len || ret)
3470 return ret; 3505 return ret;
3471 3506
3472 csum_offset = mod_start - em->start; 3507 if (em->compress_type) {
3473 csum_len = mod_len; 3508 csum_offset = 0;
3509 csum_len = block_len;
3510 } else {
3511 csum_offset = mod_start - em->start;
3512 csum_len = mod_len;
3513 }
3474 3514
3475 /* block start is already adjusted for the file extent offset. */ 3515 /* block start is already adjusted for the file extent offset. */
3476 ret = btrfs_lookup_csums_range(log->fs_info->csum_root, 3516 ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
@@ -3657,7 +3697,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3657 ret = btrfs_truncate_inode_items(trans, log, 3697 ret = btrfs_truncate_inode_items(trans, log,
3658 inode, 0, 0); 3698 inode, 0, 0);
3659 } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, 3699 } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
3660 &BTRFS_I(inode)->runtime_flags)) { 3700 &BTRFS_I(inode)->runtime_flags) ||
3701 inode_only == LOG_INODE_EXISTS) {
3661 if (inode_only == LOG_INODE_ALL) 3702 if (inode_only == LOG_INODE_ALL)
3662 fast_search = true; 3703 fast_search = true;
3663 max_key.type = BTRFS_XATTR_ITEM_KEY; 3704 max_key.type = BTRFS_XATTR_ITEM_KEY;
@@ -3683,7 +3724,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3683 3724
3684 while (1) { 3725 while (1) {
3685 ins_nr = 0; 3726 ins_nr = 0;
3686 ret = btrfs_search_forward(root, &min_key, &max_key, 3727 ret = btrfs_search_forward(root, &min_key,
3687 path, trans->transid); 3728 path, trans->transid);
3688 if (ret != 0) 3729 if (ret != 0)
3689 break; 3730 break;
@@ -3733,14 +3774,14 @@ next_slot:
3733 } 3774 }
3734 btrfs_release_path(path); 3775 btrfs_release_path(path);
3735 3776
3736 if (min_key.offset < (u64)-1) 3777 if (min_key.offset < (u64)-1) {
3737 min_key.offset++; 3778 min_key.offset++;
3738 else if (min_key.type < (u8)-1) 3779 } else if (min_key.type < max_key.type) {
3739 min_key.type++; 3780 min_key.type++;
3740 else if (min_key.objectid < (u64)-1) 3781 min_key.offset = 0;
3741 min_key.objectid++; 3782 } else {
3742 else
3743 break; 3783 break;
3784 }
3744 } 3785 }
3745 if (ins_nr) { 3786 if (ins_nr) {
3746 ret = copy_items(trans, inode, dst_path, src, ins_start_slot, 3787 ret = copy_items(trans, inode, dst_path, src, ins_start_slot,
@@ -3761,7 +3802,7 @@ log_extents:
3761 err = ret; 3802 err = ret;
3762 goto out_unlock; 3803 goto out_unlock;
3763 } 3804 }
3764 } else { 3805 } else if (inode_only == LOG_INODE_ALL) {
3765 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; 3806 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
3766 struct extent_map *em, *n; 3807 struct extent_map *em, *n;
3767 3808
@@ -3805,6 +3846,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
3805 int ret = 0; 3846 int ret = 0;
3806 struct btrfs_root *root; 3847 struct btrfs_root *root;
3807 struct dentry *old_parent = NULL; 3848 struct dentry *old_parent = NULL;
3849 struct inode *orig_inode = inode;
3808 3850
3809 /* 3851 /*
3810 * for regular files, if its inode is already on disk, we don't 3852 * for regular files, if its inode is already on disk, we don't
@@ -3824,7 +3866,14 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
3824 } 3866 }
3825 3867
3826 while (1) { 3868 while (1) {
3827 BTRFS_I(inode)->logged_trans = trans->transid; 3869 /*
3870 * If we are logging a directory then we start with our inode,
3871 * not our parents inode, so we need to skipp setting the
3872 * logged_trans so that further down in the log code we don't
3873 * think this inode has already been logged.
3874 */
3875 if (inode != orig_inode)
3876 BTRFS_I(inode)->logged_trans = trans->transid;
3828 smp_mb(); 3877 smp_mb();
3829 3878
3830 if (BTRFS_I(inode)->last_unlink_trans > last_committed) { 3879 if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index dd0dea3766f7..fbda90004fe9 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -260,7 +260,6 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
260{ 260{
261 struct btrfs_root *root = fs_info->uuid_root; 261 struct btrfs_root *root = fs_info->uuid_root;
262 struct btrfs_key key; 262 struct btrfs_key key;
263 struct btrfs_key max_key;
264 struct btrfs_path *path; 263 struct btrfs_path *path;
265 int ret = 0; 264 int ret = 0;
266 struct extent_buffer *leaf; 265 struct extent_buffer *leaf;
@@ -277,13 +276,10 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
277 key.objectid = 0; 276 key.objectid = 0;
278 key.type = 0; 277 key.type = 0;
279 key.offset = 0; 278 key.offset = 0;
280 max_key.objectid = (u64)-1;
281 max_key.type = (u8)-1;
282 max_key.offset = (u64)-1;
283 279
284again_search_slot: 280again_search_slot:
285 path->keep_locks = 1; 281 path->keep_locks = 1;
286 ret = btrfs_search_forward(root, &key, &max_key, path, 0); 282 ret = btrfs_search_forward(root, &key, path, 0);
287 if (ret) { 283 if (ret) {
288 if (ret > 0) 284 if (ret > 0)
289 ret = 0; 285 ret = 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0052ca8264d9..92303f42baaa 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -28,7 +28,6 @@
28#include <linux/raid/pq.h> 28#include <linux/raid/pq.h>
29#include <linux/semaphore.h> 29#include <linux/semaphore.h>
30#include <asm/div64.h> 30#include <asm/div64.h>
31#include "compat.h"
32#include "ctree.h" 31#include "ctree.h"
33#include "extent_map.h" 32#include "extent_map.h"
34#include "disk-io.h" 33#include "disk-io.h"
@@ -666,7 +665,8 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
666 if (device->bdev) 665 if (device->bdev)
667 fs_devices->open_devices--; 666 fs_devices->open_devices--;
668 667
669 if (device->writeable && !device->is_tgtdev_for_dev_replace) { 668 if (device->writeable &&
669 device->devid != BTRFS_DEV_REPLACE_DEVID) {
670 list_del_init(&device->dev_alloc_list); 670 list_del_init(&device->dev_alloc_list);
671 fs_devices->rw_devices--; 671 fs_devices->rw_devices--;
672 } 672 }
@@ -796,7 +796,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
796 fs_devices->rotating = 1; 796 fs_devices->rotating = 1;
797 797
798 fs_devices->open_devices++; 798 fs_devices->open_devices++;
799 if (device->writeable && !device->is_tgtdev_for_dev_replace) { 799 if (device->writeable &&
800 device->devid != BTRFS_DEV_REPLACE_DEVID) {
800 fs_devices->rw_devices++; 801 fs_devices->rw_devices++;
801 list_add(&device->dev_alloc_list, 802 list_add(&device->dev_alloc_list,
802 &fs_devices->alloc_list); 803 &fs_devices->alloc_list);
@@ -911,9 +912,9 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
911 if (disk_super->label[0]) { 912 if (disk_super->label[0]) {
912 if (disk_super->label[BTRFS_LABEL_SIZE - 1]) 913 if (disk_super->label[BTRFS_LABEL_SIZE - 1])
913 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; 914 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
914 printk(KERN_INFO "device label %s ", disk_super->label); 915 printk(KERN_INFO "btrfs: device label %s ", disk_super->label);
915 } else { 916 } else {
916 printk(KERN_INFO "device fsid %pU ", disk_super->fsid); 917 printk(KERN_INFO "btrfs: device fsid %pU ", disk_super->fsid);
917 } 918 }
918 919
919 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); 920 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
@@ -1715,6 +1716,7 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
1715 struct btrfs_device *srcdev) 1716 struct btrfs_device *srcdev)
1716{ 1717{
1717 WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex)); 1718 WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex));
1719
1718 list_del_rcu(&srcdev->dev_list); 1720 list_del_rcu(&srcdev->dev_list);
1719 list_del_rcu(&srcdev->dev_alloc_list); 1721 list_del_rcu(&srcdev->dev_alloc_list);
1720 fs_info->fs_devices->num_devices--; 1722 fs_info->fs_devices->num_devices--;
@@ -1724,9 +1726,13 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
1724 } 1726 }
1725 if (srcdev->can_discard) 1727 if (srcdev->can_discard)
1726 fs_info->fs_devices->num_can_discard--; 1728 fs_info->fs_devices->num_can_discard--;
1727 if (srcdev->bdev) 1729 if (srcdev->bdev) {
1728 fs_info->fs_devices->open_devices--; 1730 fs_info->fs_devices->open_devices--;
1729 1731
1732 /* zero out the old super */
1733 btrfs_scratch_superblock(srcdev);
1734 }
1735
1730 call_rcu(&srcdev->rcu, free_device); 1736 call_rcu(&srcdev->rcu, free_device);
1731} 1737}
1732 1738
@@ -2035,6 +2041,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
2035 device->in_fs_metadata = 1; 2041 device->in_fs_metadata = 1;
2036 device->is_tgtdev_for_dev_replace = 0; 2042 device->is_tgtdev_for_dev_replace = 0;
2037 device->mode = FMODE_EXCL; 2043 device->mode = FMODE_EXCL;
2044 device->dev_stats_valid = 1;
2038 set_blocksize(device->bdev, 4096); 2045 set_blocksize(device->bdev, 4096);
2039 2046
2040 if (seeding_dev) { 2047 if (seeding_dev) {
@@ -2202,6 +2209,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
2202 device->in_fs_metadata = 1; 2209 device->in_fs_metadata = 1;
2203 device->is_tgtdev_for_dev_replace = 1; 2210 device->is_tgtdev_for_dev_replace = 1;
2204 device->mode = FMODE_EXCL; 2211 device->mode = FMODE_EXCL;
2212 device->dev_stats_valid = 1;
2205 set_blocksize(device->bdev, 4096); 2213 set_blocksize(device->bdev, 4096);
2206 device->fs_devices = fs_info->fs_devices; 2214 device->fs_devices = fs_info->fs_devices;
2207 list_add(&device->dev_list, &fs_info->fs_devices->devices); 2215 list_add(&device->dev_list, &fs_info->fs_devices->devices);
@@ -2544,8 +2552,7 @@ again:
2544 failed = 0; 2552 failed = 0;
2545 retried = true; 2553 retried = true;
2546 goto again; 2554 goto again;
2547 } else if (failed && retried) { 2555 } else if (WARN_ON(failed && retried)) {
2548 WARN_ON(1);
2549 ret = -ENOSPC; 2556 ret = -ENOSPC;
2550 } 2557 }
2551error: 2558error:
@@ -3417,6 +3424,9 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
3417 3424
3418int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) 3425int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
3419{ 3426{
3427 if (fs_info->sb->s_flags & MS_RDONLY)
3428 return -EROFS;
3429
3420 mutex_lock(&fs_info->balance_mutex); 3430 mutex_lock(&fs_info->balance_mutex);
3421 if (!fs_info->balance_ctl) { 3431 if (!fs_info->balance_ctl) {
3422 mutex_unlock(&fs_info->balance_mutex); 3432 mutex_unlock(&fs_info->balance_mutex);
@@ -3482,7 +3492,7 @@ static int btrfs_uuid_scan_kthread(void *data)
3482 path->keep_locks = 1; 3492 path->keep_locks = 1;
3483 3493
3484 while (1) { 3494 while (1) {
3485 ret = btrfs_search_forward(root, &key, &max_key, path, 0); 3495 ret = btrfs_search_forward(root, &key, path, 0);
3486 if (ret) { 3496 if (ret) {
3487 if (ret > 0) 3497 if (ret > 0)
3488 ret = 0; 3498 ret = 0;
@@ -4482,6 +4492,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
4482 btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got " 4492 btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got "
4483 "%Lu-%Lu\n", logical, logical+len, em->start, 4493 "%Lu-%Lu\n", logical, logical+len, em->start,
4484 em->start + em->len); 4494 em->start + em->len);
4495 free_extent_map(em);
4485 return 1; 4496 return 1;
4486 } 4497 }
4487 4498
@@ -4662,6 +4673,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4662 btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, " 4673 btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
4663 "found %Lu-%Lu\n", logical, em->start, 4674 "found %Lu-%Lu\n", logical, em->start,
4664 em->start + em->len); 4675 em->start + em->len);
4676 free_extent_map(em);
4665 return -EINVAL; 4677 return -EINVAL;
4666 } 4678 }
4667 4679
@@ -4889,7 +4901,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4889 num_stripes = map->num_stripes; 4901 num_stripes = map->num_stripes;
4890 max_errors = nr_parity_stripes(map); 4902 max_errors = nr_parity_stripes(map);
4891 4903
4892 raid_map = kmalloc(sizeof(u64) * num_stripes, 4904 raid_map = kmalloc_array(num_stripes, sizeof(u64),
4893 GFP_NOFS); 4905 GFP_NOFS);
4894 if (!raid_map) { 4906 if (!raid_map) {
4895 ret = -ENOMEM; 4907 ret = -ENOMEM;
@@ -5382,17 +5394,15 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
5382{ 5394{
5383 struct bio_vec *prev; 5395 struct bio_vec *prev;
5384 struct request_queue *q = bdev_get_queue(bdev); 5396 struct request_queue *q = bdev_get_queue(bdev);
5385 unsigned short max_sectors = queue_max_sectors(q); 5397 unsigned int max_sectors = queue_max_sectors(q);
5386 struct bvec_merge_data bvm = { 5398 struct bvec_merge_data bvm = {
5387 .bi_bdev = bdev, 5399 .bi_bdev = bdev,
5388 .bi_sector = sector, 5400 .bi_sector = sector,
5389 .bi_rw = bio->bi_rw, 5401 .bi_rw = bio->bi_rw,
5390 }; 5402 };
5391 5403
5392 if (bio->bi_vcnt == 0) { 5404 if (WARN_ON(bio->bi_vcnt == 0))
5393 WARN_ON(1);
5394 return 1; 5405 return 1;
5395 }
5396 5406
5397 prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; 5407 prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
5398 if (bio_sectors(bio) > max_sectors) 5408 if (bio_sectors(bio) > max_sectors)
@@ -5625,10 +5635,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
5625 struct btrfs_device *dev; 5635 struct btrfs_device *dev;
5626 u64 tmp; 5636 u64 tmp;
5627 5637
5628 if (!devid && !fs_info) { 5638 if (WARN_ON(!devid && !fs_info))
5629 WARN_ON(1);
5630 return ERR_PTR(-EINVAL); 5639 return ERR_PTR(-EINVAL);
5631 }
5632 5640
5633 dev = __alloc_device(); 5641 dev = __alloc_device();
5634 if (IS_ERR(dev)) 5642 if (IS_ERR(dev))
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index b72f540c8b29..8b3cd142b373 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -43,9 +43,8 @@ struct btrfs_device {
43 /* WRITE_SYNC bios */ 43 /* WRITE_SYNC bios */
44 struct btrfs_pending_bios pending_sync_bios; 44 struct btrfs_pending_bios pending_sync_bios;
45 45
46 int running_pending;
47 u64 generation; 46 u64 generation;
48 47 int running_pending;
49 int writeable; 48 int writeable;
50 int in_fs_metadata; 49 int in_fs_metadata;
51 int missing; 50 int missing;
@@ -53,11 +52,11 @@ struct btrfs_device {
53 int is_tgtdev_for_dev_replace; 52 int is_tgtdev_for_dev_replace;
54 53
55 spinlock_t io_lock; 54 spinlock_t io_lock;
55 /* the mode sent to blkdev_get */
56 fmode_t mode;
56 57
57 struct block_device *bdev; 58 struct block_device *bdev;
58 59
59 /* the mode sent to blkdev_get */
60 fmode_t mode;
61 60
62 struct rcu_string *name; 61 struct rcu_string *name;
63 62
@@ -78,16 +77,21 @@ struct btrfs_device {
78 77
79 /* optimal io width for this device */ 78 /* optimal io width for this device */
80 u32 io_width; 79 u32 io_width;
80 /* type and info about this device */
81 u64 type;
81 82
82 /* minimal io size for this device */ 83 /* minimal io size for this device */
83 u32 sector_size; 84 u32 sector_size;
84 85
85 /* type and info about this device */
86 u64 type;
87 86
88 /* physical drive uuid (or lvm uuid) */ 87 /* physical drive uuid (or lvm uuid) */
89 u8 uuid[BTRFS_UUID_SIZE]; 88 u8 uuid[BTRFS_UUID_SIZE];
90 89
90 /* for sending down flush barriers */
91 int nobarriers;
92 struct bio *flush_bio;
93 struct completion flush_wait;
94
91 /* per-device scrub information */ 95 /* per-device scrub information */
92 struct scrub_ctx *scrub_device; 96 struct scrub_ctx *scrub_device;
93 97
@@ -103,10 +107,6 @@ struct btrfs_device {
103 struct radix_tree_root reada_zones; 107 struct radix_tree_root reada_zones;
104 struct radix_tree_root reada_extents; 108 struct radix_tree_root reada_extents;
105 109
106 /* for sending down flush barriers */
107 struct bio *flush_bio;
108 struct completion flush_wait;
109 int nobarriers;
110 110
111 /* disk I/O failure stats. For detailed description refer to 111 /* disk I/O failure stats. For detailed description refer to
112 * enum btrfs_dev_stat_values in ioctl.h */ 112 * enum btrfs_dev_stat_values in ioctl.h */
@@ -132,7 +132,9 @@ struct btrfs_fs_devices {
132 132
133 /* all of the devices in the FS, protected by a mutex 133 /* all of the devices in the FS, protected by a mutex
134 * so we can safely walk it to write out the supers without 134 * so we can safely walk it to write out the supers without
135 * worrying about add/remove by the multi-device code 135 * worrying about add/remove by the multi-device code.
136 * Scrubbing super can kick off supers writing by holding
137 * this mutex lock.
136 */ 138 */
137 struct mutex device_list_mutex; 139 struct mutex device_list_mutex;
138 struct list_head devices; 140 struct list_head devices;