aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/acl.c2
-rw-r--r--fs/btrfs/btrfs_inode.h15
-rw-r--r--fs/btrfs/compression.c47
-rw-r--r--fs/btrfs/compression.h2
-rw-r--r--fs/btrfs/ctree.c51
-rw-r--r--fs/btrfs/ctree.h244
-rw-r--r--fs/btrfs/delayed-inode.c1694
-rw-r--r--fs/btrfs/delayed-inode.h141
-rw-r--r--fs/btrfs/delayed-ref.c114
-rw-r--r--fs/btrfs/delayed-ref.h6
-rw-r--r--fs/btrfs/dir-item.c39
-rw-r--r--fs/btrfs/disk-io.c210
-rw-r--r--fs/btrfs/disk-io.h19
-rw-r--r--fs/btrfs/export.c25
-rw-r--r--fs/btrfs/extent-tree.c1799
-rw-r--r--fs/btrfs/extent_io.c324
-rw-r--r--fs/btrfs/extent_io.h40
-rw-r--r--fs/btrfs/extent_map.c8
-rw-r--r--fs/btrfs/extent_map.h4
-rw-r--r--fs/btrfs/file-item.c38
-rw-r--r--fs/btrfs/file.c302
-rw-r--r--fs/btrfs/free-space-cache.c993
-rw-r--r--fs/btrfs/free-space-cache.h48
-rw-r--r--fs/btrfs/inode-item.c2
-rw-r--r--fs/btrfs/inode-map.c444
-rw-r--r--fs/btrfs/inode-map.h13
-rw-r--r--fs/btrfs/inode.c702
-rw-r--r--fs/btrfs/ioctl.c624
-rw-r--r--fs/btrfs/ioctl.h107
-rw-r--r--fs/btrfs/locking.c25
-rw-r--r--fs/btrfs/locking.h2
-rw-r--r--fs/btrfs/ref-cache.c164
-rw-r--r--fs/btrfs/ref-cache.h24
-rw-r--r--fs/btrfs/relocation.c67
-rw-r--r--fs/btrfs/root-tree.c61
-rw-r--r--fs/btrfs/scrub.c1369
-rw-r--r--fs/btrfs/super.c51
-rw-r--r--fs/btrfs/sysfs.c77
-rw-r--r--fs/btrfs/transaction.c196
-rw-r--r--fs/btrfs/transaction.h5
-rw-r--r--fs/btrfs/tree-defrag.c2
-rw-r--r--fs/btrfs/tree-log.c208
-rw-r--r--fs/btrfs/tree-log.h1
-rw-r--r--fs/btrfs/version.sh43
-rw-r--r--fs/btrfs/volumes.c657
-rw-r--r--fs/btrfs/volumes.h27
-rw-r--r--fs/btrfs/xattr.c12
48 files changed, 6488 insertions, 4562 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 31610ea73aec..9b72dcf1cd25 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -7,4 +7,4 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ 9 export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
10 compression.o delayed-ref.o relocation.o 10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 44ea5b92e1ba..f66fc9959733 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -288,7 +288,7 @@ int btrfs_acl_chmod(struct inode *inode)
288 return 0; 288 return 0;
289 289
290 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); 290 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
291 if (IS_ERR(acl) || !acl) 291 if (IS_ERR_OR_NULL(acl))
292 return PTR_ERR(acl); 292 return PTR_ERR(acl);
293 293
294 clone = posix_acl_clone(acl, GFP_KERNEL); 294 clone = posix_acl_clone(acl, GFP_KERNEL);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4bc852d3b83d..52d7eca8c7bf 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -22,6 +22,7 @@
22#include "extent_map.h" 22#include "extent_map.h"
23#include "extent_io.h" 23#include "extent_io.h"
24#include "ordered-data.h" 24#include "ordered-data.h"
25#include "delayed-inode.h"
25 26
26/* in memory btrfs inode */ 27/* in memory btrfs inode */
27struct btrfs_inode { 28struct btrfs_inode {
@@ -149,20 +150,34 @@ struct btrfs_inode {
149 unsigned ordered_data_close:1; 150 unsigned ordered_data_close:1;
150 unsigned orphan_meta_reserved:1; 151 unsigned orphan_meta_reserved:1;
151 unsigned dummy_inode:1; 152 unsigned dummy_inode:1;
153 unsigned in_defrag:1;
152 154
153 /* 155 /*
154 * always compress this one file 156 * always compress this one file
155 */ 157 */
156 unsigned force_compress:4; 158 unsigned force_compress:4;
157 159
160 struct btrfs_delayed_node *delayed_node;
161
158 struct inode vfs_inode; 162 struct inode vfs_inode;
159}; 163};
160 164
165extern unsigned char btrfs_filetype_table[];
166
161static inline struct btrfs_inode *BTRFS_I(struct inode *inode) 167static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
162{ 168{
163 return container_of(inode, struct btrfs_inode, vfs_inode); 169 return container_of(inode, struct btrfs_inode, vfs_inode);
164} 170}
165 171
172static inline u64 btrfs_ino(struct inode *inode)
173{
174 u64 ino = BTRFS_I(inode)->location.objectid;
175
176 if (ino <= BTRFS_FIRST_FREE_OBJECTID)
177 ino = inode->i_ino;
178 return ino;
179}
180
166static inline void btrfs_i_size_write(struct inode *inode, u64 size) 181static inline void btrfs_i_size_write(struct inode *inode, u64 size)
167{ 182{
168 i_size_write(inode, size); 183 i_size_write(inode, size);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 41d1d7c70e29..bfe42b03eaf9 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -125,9 +125,10 @@ static int check_compressed_csum(struct inode *inode,
125 kunmap_atomic(kaddr, KM_USER0); 125 kunmap_atomic(kaddr, KM_USER0);
126 126
127 if (csum != *cb_sum) { 127 if (csum != *cb_sum) {
128 printk(KERN_INFO "btrfs csum failed ino %lu " 128 printk(KERN_INFO "btrfs csum failed ino %llu "
129 "extent %llu csum %u " 129 "extent %llu csum %u "
130 "wanted %u mirror %d\n", inode->i_ino, 130 "wanted %u mirror %d\n",
131 (unsigned long long)btrfs_ino(inode),
131 (unsigned long long)disk_start, 132 (unsigned long long)disk_start,
132 csum, *cb_sum, cb->mirror_num); 133 csum, *cb_sum, cb->mirror_num);
133 ret = -EIO; 134 ret = -EIO;
@@ -332,7 +333,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
332 struct compressed_bio *cb; 333 struct compressed_bio *cb;
333 unsigned long bytes_left; 334 unsigned long bytes_left;
334 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 335 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
335 int page_index = 0; 336 int pg_index = 0;
336 struct page *page; 337 struct page *page;
337 u64 first_byte = disk_start; 338 u64 first_byte = disk_start;
338 struct block_device *bdev; 339 struct block_device *bdev;
@@ -366,8 +367,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
366 367
367 /* create and submit bios for the compressed pages */ 368 /* create and submit bios for the compressed pages */
368 bytes_left = compressed_len; 369 bytes_left = compressed_len;
369 for (page_index = 0; page_index < cb->nr_pages; page_index++) { 370 for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
370 page = compressed_pages[page_index]; 371 page = compressed_pages[pg_index];
371 page->mapping = inode->i_mapping; 372 page->mapping = inode->i_mapping;
372 if (bio->bi_size) 373 if (bio->bi_size)
373 ret = io_tree->ops->merge_bio_hook(page, 0, 374 ret = io_tree->ops->merge_bio_hook(page, 0,
@@ -432,7 +433,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
432 struct compressed_bio *cb) 433 struct compressed_bio *cb)
433{ 434{
434 unsigned long end_index; 435 unsigned long end_index;
435 unsigned long page_index; 436 unsigned long pg_index;
436 u64 last_offset; 437 u64 last_offset;
437 u64 isize = i_size_read(inode); 438 u64 isize = i_size_read(inode);
438 int ret; 439 int ret;
@@ -456,13 +457,13 @@ static noinline int add_ra_bio_pages(struct inode *inode,
456 end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 457 end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
457 458
458 while (last_offset < compressed_end) { 459 while (last_offset < compressed_end) {
459 page_index = last_offset >> PAGE_CACHE_SHIFT; 460 pg_index = last_offset >> PAGE_CACHE_SHIFT;
460 461
461 if (page_index > end_index) 462 if (pg_index > end_index)
462 break; 463 break;
463 464
464 rcu_read_lock(); 465 rcu_read_lock();
465 page = radix_tree_lookup(&mapping->page_tree, page_index); 466 page = radix_tree_lookup(&mapping->page_tree, pg_index);
466 rcu_read_unlock(); 467 rcu_read_unlock();
467 if (page) { 468 if (page) {
468 misses++; 469 misses++;
@@ -476,7 +477,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
476 if (!page) 477 if (!page)
477 break; 478 break;
478 479
479 if (add_to_page_cache_lru(page, mapping, page_index, 480 if (add_to_page_cache_lru(page, mapping, pg_index,
480 GFP_NOFS)) { 481 GFP_NOFS)) {
481 page_cache_release(page); 482 page_cache_release(page);
482 goto next; 483 goto next;
@@ -560,7 +561,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
560 unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; 561 unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
561 unsigned long compressed_len; 562 unsigned long compressed_len;
562 unsigned long nr_pages; 563 unsigned long nr_pages;
563 unsigned long page_index; 564 unsigned long pg_index;
564 struct page *page; 565 struct page *page;
565 struct block_device *bdev; 566 struct block_device *bdev;
566 struct bio *comp_bio; 567 struct bio *comp_bio;
@@ -613,10 +614,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
613 614
614 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 615 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
615 616
616 for (page_index = 0; page_index < nr_pages; page_index++) { 617 for (pg_index = 0; pg_index < nr_pages; pg_index++) {
617 cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | 618 cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS |
618 __GFP_HIGHMEM); 619 __GFP_HIGHMEM);
619 if (!cb->compressed_pages[page_index]) 620 if (!cb->compressed_pages[pg_index])
620 goto fail2; 621 goto fail2;
621 } 622 }
622 cb->nr_pages = nr_pages; 623 cb->nr_pages = nr_pages;
@@ -634,8 +635,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
634 comp_bio->bi_end_io = end_compressed_bio_read; 635 comp_bio->bi_end_io = end_compressed_bio_read;
635 atomic_inc(&cb->pending_bios); 636 atomic_inc(&cb->pending_bios);
636 637
637 for (page_index = 0; page_index < nr_pages; page_index++) { 638 for (pg_index = 0; pg_index < nr_pages; pg_index++) {
638 page = cb->compressed_pages[page_index]; 639 page = cb->compressed_pages[pg_index];
639 page->mapping = inode->i_mapping; 640 page->mapping = inode->i_mapping;
640 page->index = em_start >> PAGE_CACHE_SHIFT; 641 page->index = em_start >> PAGE_CACHE_SHIFT;
641 642
@@ -702,8 +703,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
702 return 0; 703 return 0;
703 704
704fail2: 705fail2:
705 for (page_index = 0; page_index < nr_pages; page_index++) 706 for (pg_index = 0; pg_index < nr_pages; pg_index++)
706 free_page((unsigned long)cb->compressed_pages[page_index]); 707 free_page((unsigned long)cb->compressed_pages[pg_index]);
707 708
708 kfree(cb->compressed_pages); 709 kfree(cb->compressed_pages);
709fail1: 710fail1:
@@ -945,7 +946,7 @@ void btrfs_exit_compress(void)
945int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, 946int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
946 unsigned long total_out, u64 disk_start, 947 unsigned long total_out, u64 disk_start,
947 struct bio_vec *bvec, int vcnt, 948 struct bio_vec *bvec, int vcnt,
948 unsigned long *page_index, 949 unsigned long *pg_index,
949 unsigned long *pg_offset) 950 unsigned long *pg_offset)
950{ 951{
951 unsigned long buf_offset; 952 unsigned long buf_offset;
@@ -954,7 +955,7 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
954 unsigned long working_bytes = total_out - buf_start; 955 unsigned long working_bytes = total_out - buf_start;
955 unsigned long bytes; 956 unsigned long bytes;
956 char *kaddr; 957 char *kaddr;
957 struct page *page_out = bvec[*page_index].bv_page; 958 struct page *page_out = bvec[*pg_index].bv_page;
958 959
959 /* 960 /*
960 * start byte is the first byte of the page we're currently 961 * start byte is the first byte of the page we're currently
@@ -995,11 +996,11 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
995 996
996 /* check if we need to pick another page */ 997 /* check if we need to pick another page */
997 if (*pg_offset == PAGE_CACHE_SIZE) { 998 if (*pg_offset == PAGE_CACHE_SIZE) {
998 (*page_index)++; 999 (*pg_index)++;
999 if (*page_index >= vcnt) 1000 if (*pg_index >= vcnt)
1000 return 0; 1001 return 0;
1001 1002
1002 page_out = bvec[*page_index].bv_page; 1003 page_out = bvec[*pg_index].bv_page;
1003 *pg_offset = 0; 1004 *pg_offset = 0;
1004 start_byte = page_offset(page_out) - disk_start; 1005 start_byte = page_offset(page_out) - disk_start;
1005 1006
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 51000174b9d7..a12059f4f0fd 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -37,7 +37,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
37int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, 37int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
38 unsigned long total_out, u64 disk_start, 38 unsigned long total_out, u64 disk_start,
39 struct bio_vec *bvec, int vcnt, 39 struct bio_vec *bvec, int vcnt,
40 unsigned long *page_index, 40 unsigned long *pg_index,
41 unsigned long *pg_offset); 41 unsigned long *pg_offset);
42 42
43int btrfs_submit_compressed_write(struct inode *inode, u64 start, 43int btrfs_submit_compressed_write(struct inode *inode, u64 start,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index f61c16c1481a..d84089349c82 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -38,11 +38,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
38 struct extent_buffer *src_buf); 38 struct extent_buffer *src_buf);
39static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 39static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
40 struct btrfs_path *path, int level, int slot); 40 struct btrfs_path *path, int level, int slot);
41static int setup_items_for_insert(struct btrfs_trans_handle *trans,
42 struct btrfs_root *root, struct btrfs_path *path,
43 struct btrfs_key *cpu_key, u32 *data_size,
44 u32 total_data, u32 total_size, int nr);
45
46 41
47struct btrfs_path *btrfs_alloc_path(void) 42struct btrfs_path *btrfs_alloc_path(void)
48{ 43{
@@ -105,7 +100,7 @@ void btrfs_free_path(struct btrfs_path *p)
105{ 100{
106 if (!p) 101 if (!p)
107 return; 102 return;
108 btrfs_release_path(NULL, p); 103 btrfs_release_path(p);
109 kmem_cache_free(btrfs_path_cachep, p); 104 kmem_cache_free(btrfs_path_cachep, p);
110} 105}
111 106
@@ -115,7 +110,7 @@ void btrfs_free_path(struct btrfs_path *p)
115 * 110 *
116 * It is safe to call this on paths that no locks or extent buffers held. 111 * It is safe to call this on paths that no locks or extent buffers held.
117 */ 112 */
118noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) 113noinline void btrfs_release_path(struct btrfs_path *p)
119{ 114{
120 int i; 115 int i;
121 116
@@ -1345,7 +1340,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1345 ret = -EAGAIN; 1340 ret = -EAGAIN;
1346 1341
1347 /* release the whole path */ 1342 /* release the whole path */
1348 btrfs_release_path(root, path); 1343 btrfs_release_path(path);
1349 1344
1350 /* read the blocks */ 1345 /* read the blocks */
1351 if (block1) 1346 if (block1)
@@ -1492,7 +1487,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1492 return 0; 1487 return 0;
1493 } 1488 }
1494 free_extent_buffer(tmp); 1489 free_extent_buffer(tmp);
1495 btrfs_release_path(NULL, p); 1490 btrfs_release_path(p);
1496 return -EIO; 1491 return -EIO;
1497 } 1492 }
1498 } 1493 }
@@ -1511,7 +1506,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1511 if (p->reada) 1506 if (p->reada)
1512 reada_for_search(root, p, level, slot, key->objectid); 1507 reada_for_search(root, p, level, slot, key->objectid);
1513 1508
1514 btrfs_release_path(NULL, p); 1509 btrfs_release_path(p);
1515 1510
1516 ret = -EAGAIN; 1511 ret = -EAGAIN;
1517 tmp = read_tree_block(root, blocknr, blocksize, 0); 1512 tmp = read_tree_block(root, blocknr, blocksize, 0);
@@ -1580,7 +1575,7 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
1580 } 1575 }
1581 b = p->nodes[level]; 1576 b = p->nodes[level];
1582 if (!b) { 1577 if (!b) {
1583 btrfs_release_path(NULL, p); 1578 btrfs_release_path(p);
1584 goto again; 1579 goto again;
1585 } 1580 }
1586 BUG_ON(btrfs_header_nritems(b) == 1); 1581 BUG_ON(btrfs_header_nritems(b) == 1);
@@ -1767,7 +1762,7 @@ done:
1767 if (!p->leave_spinning) 1762 if (!p->leave_spinning)
1768 btrfs_set_path_blocking(p); 1763 btrfs_set_path_blocking(p);
1769 if (ret < 0) 1764 if (ret < 0)
1770 btrfs_release_path(root, p); 1765 btrfs_release_path(p);
1771 return ret; 1766 return ret;
1772} 1767}
1773 1768
@@ -3040,7 +3035,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3040 struct btrfs_file_extent_item); 3035 struct btrfs_file_extent_item);
3041 extent_len = btrfs_file_extent_num_bytes(leaf, fi); 3036 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3042 } 3037 }
3043 btrfs_release_path(root, path); 3038 btrfs_release_path(path);
3044 3039
3045 path->keep_locks = 1; 3040 path->keep_locks = 1;
3046 path->search_for_split = 1; 3041 path->search_for_split = 1;
@@ -3230,7 +3225,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3230 struct btrfs_path *path, 3225 struct btrfs_path *path,
3231 u32 new_size, int from_end) 3226 u32 new_size, int from_end)
3232{ 3227{
3233 int ret = 0;
3234 int slot; 3228 int slot;
3235 struct extent_buffer *leaf; 3229 struct extent_buffer *leaf;
3236 struct btrfs_item *item; 3230 struct btrfs_item *item;
@@ -3328,12 +3322,11 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3328 btrfs_set_item_size(leaf, item, new_size); 3322 btrfs_set_item_size(leaf, item, new_size);
3329 btrfs_mark_buffer_dirty(leaf); 3323 btrfs_mark_buffer_dirty(leaf);
3330 3324
3331 ret = 0;
3332 if (btrfs_leaf_free_space(root, leaf) < 0) { 3325 if (btrfs_leaf_free_space(root, leaf) < 0) {
3333 btrfs_print_leaf(root, leaf); 3326 btrfs_print_leaf(root, leaf);
3334 BUG(); 3327 BUG();
3335 } 3328 }
3336 return ret; 3329 return 0;
3337} 3330}
3338 3331
3339/* 3332/*
@@ -3343,7 +3336,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3343 struct btrfs_root *root, struct btrfs_path *path, 3336 struct btrfs_root *root, struct btrfs_path *path,
3344 u32 data_size) 3337 u32 data_size)
3345{ 3338{
3346 int ret = 0;
3347 int slot; 3339 int slot;
3348 struct extent_buffer *leaf; 3340 struct extent_buffer *leaf;
3349 struct btrfs_item *item; 3341 struct btrfs_item *item;
@@ -3408,12 +3400,11 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3408 btrfs_set_item_size(leaf, item, old_size + data_size); 3400 btrfs_set_item_size(leaf, item, old_size + data_size);
3409 btrfs_mark_buffer_dirty(leaf); 3401 btrfs_mark_buffer_dirty(leaf);
3410 3402
3411 ret = 0;
3412 if (btrfs_leaf_free_space(root, leaf) < 0) { 3403 if (btrfs_leaf_free_space(root, leaf) < 0) {
3413 btrfs_print_leaf(root, leaf); 3404 btrfs_print_leaf(root, leaf);
3414 BUG(); 3405 BUG();
3415 } 3406 }
3416 return ret; 3407 return 0;
3417} 3408}
3418 3409
3419/* 3410/*
@@ -3573,11 +3564,10 @@ out:
3573 * to save stack depth by doing the bulk of the work in a function 3564 * to save stack depth by doing the bulk of the work in a function
3574 * that doesn't call btrfs_search_slot 3565 * that doesn't call btrfs_search_slot
3575 */ 3566 */
3576static noinline_for_stack int 3567int setup_items_for_insert(struct btrfs_trans_handle *trans,
3577setup_items_for_insert(struct btrfs_trans_handle *trans, 3568 struct btrfs_root *root, struct btrfs_path *path,
3578 struct btrfs_root *root, struct btrfs_path *path, 3569 struct btrfs_key *cpu_key, u32 *data_size,
3579 struct btrfs_key *cpu_key, u32 *data_size, 3570 u32 total_data, u32 total_size, int nr)
3580 u32 total_data, u32 total_size, int nr)
3581{ 3571{
3582 struct btrfs_item *item; 3572 struct btrfs_item *item;
3583 int i; 3573 int i;
@@ -3661,7 +3651,6 @@ setup_items_for_insert(struct btrfs_trans_handle *trans,
3661 3651
3662 ret = 0; 3652 ret = 0;
3663 if (slot == 0) { 3653 if (slot == 0) {
3664 struct btrfs_disk_key disk_key;
3665 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3654 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3666 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3655 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3667 } 3656 }
@@ -3963,7 +3952,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3963 else 3952 else
3964 return 1; 3953 return 1;
3965 3954
3966 btrfs_release_path(root, path); 3955 btrfs_release_path(path);
3967 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3956 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3968 if (ret < 0) 3957 if (ret < 0)
3969 return ret; 3958 return ret;
@@ -4087,7 +4076,7 @@ find_next_key:
4087 sret = btrfs_find_next_key(root, path, min_key, level, 4076 sret = btrfs_find_next_key(root, path, min_key, level,
4088 cache_only, min_trans); 4077 cache_only, min_trans);
4089 if (sret == 0) { 4078 if (sret == 0) {
4090 btrfs_release_path(root, path); 4079 btrfs_release_path(path);
4091 goto again; 4080 goto again;
4092 } else { 4081 } else {
4093 goto out; 4082 goto out;
@@ -4166,7 +4155,7 @@ next:
4166 btrfs_node_key_to_cpu(c, &cur_key, slot); 4155 btrfs_node_key_to_cpu(c, &cur_key, slot);
4167 4156
4168 orig_lowest = path->lowest_level; 4157 orig_lowest = path->lowest_level;
4169 btrfs_release_path(root, path); 4158 btrfs_release_path(path);
4170 path->lowest_level = level; 4159 path->lowest_level = level;
4171 ret = btrfs_search_slot(NULL, root, &cur_key, path, 4160 ret = btrfs_search_slot(NULL, root, &cur_key, path,
4172 0, 0); 4161 0, 0);
@@ -4243,7 +4232,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4243again: 4232again:
4244 level = 1; 4233 level = 1;
4245 next = NULL; 4234 next = NULL;
4246 btrfs_release_path(root, path); 4235 btrfs_release_path(path);
4247 4236
4248 path->keep_locks = 1; 4237 path->keep_locks = 1;
4249 4238
@@ -4299,7 +4288,7 @@ again:
4299 goto again; 4288 goto again;
4300 4289
4301 if (ret < 0) { 4290 if (ret < 0) {
4302 btrfs_release_path(root, path); 4291 btrfs_release_path(path);
4303 goto done; 4292 goto done;
4304 } 4293 }
4305 4294
@@ -4338,7 +4327,7 @@ again:
4338 goto again; 4327 goto again;
4339 4328
4340 if (ret < 0) { 4329 if (ret < 0) {
4341 btrfs_release_path(root, path); 4330 btrfs_release_path(path);
4342 goto done; 4331 goto done;
4343 } 4332 }
4344 4333
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0f8c489bcc02..8f98c2005715 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -23,6 +23,7 @@
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/highmem.h> 24#include <linux/highmem.h>
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/rwsem.h>
26#include <linux/completion.h> 27#include <linux/completion.h>
27#include <linux/backing-dev.h> 28#include <linux/backing-dev.h>
28#include <linux/wait.h> 29#include <linux/wait.h>
@@ -33,6 +34,7 @@
33#include "extent_io.h" 34#include "extent_io.h"
34#include "extent_map.h" 35#include "extent_map.h"
35#include "async-thread.h" 36#include "async-thread.h"
37#include "ioctl.h"
36 38
37struct btrfs_trans_handle; 39struct btrfs_trans_handle;
38struct btrfs_transaction; 40struct btrfs_transaction;
@@ -105,6 +107,12 @@ struct btrfs_ordered_sum;
105/* For storing free space cache */ 107/* For storing free space cache */
106#define BTRFS_FREE_SPACE_OBJECTID -11ULL 108#define BTRFS_FREE_SPACE_OBJECTID -11ULL
107 109
110/*
111 * The inode number assigned to the special inode for sotring
112 * free ino cache
113 */
114#define BTRFS_FREE_INO_OBJECTID -12ULL
115
108/* dummy objectid represents multiple objectids */ 116/* dummy objectid represents multiple objectids */
109#define BTRFS_MULTIPLE_OBJECTIDS -255ULL 117#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
110 118
@@ -187,7 +195,6 @@ struct btrfs_mapping_tree {
187 struct extent_map_tree map_tree; 195 struct extent_map_tree map_tree;
188}; 196};
189 197
190#define BTRFS_UUID_SIZE 16
191struct btrfs_dev_item { 198struct btrfs_dev_item {
192 /* the internal btrfs device id */ 199 /* the internal btrfs device id */
193 __le64 devid; 200 __le64 devid;
@@ -294,7 +301,6 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
294 sizeof(struct btrfs_stripe) * (num_stripes - 1); 301 sizeof(struct btrfs_stripe) * (num_stripes - 1);
295} 302}
296 303
297#define BTRFS_FSID_SIZE 16
298#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) 304#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
299#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) 305#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
300 306
@@ -510,6 +516,12 @@ struct btrfs_extent_item_v0 {
510/* use full backrefs for extent pointers in the block */ 516/* use full backrefs for extent pointers in the block */
511#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8) 517#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
512 518
519/*
520 * this flag is only used internally by scrub and may be changed at any time
521 * it is only declared here to avoid collisions
522 */
523#define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48)
524
513struct btrfs_tree_block_info { 525struct btrfs_tree_block_info {
514 struct btrfs_disk_key key; 526 struct btrfs_disk_key key;
515 u8 level; 527 u8 level;
@@ -740,12 +752,12 @@ struct btrfs_space_info {
740 */ 752 */
741 unsigned long reservation_progress; 753 unsigned long reservation_progress;
742 754
743 int full:1; /* indicates that we cannot allocate any more 755 unsigned int full:1; /* indicates that we cannot allocate any more
744 chunks for this space */ 756 chunks for this space */
745 int chunk_alloc:1; /* set if we are allocating a chunk */ 757 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
746 758
747 int force_alloc; /* set if we need to force a chunk alloc for 759 unsigned int force_alloc; /* set if we need to force a chunk
748 this space */ 760 alloc for this space */
749 761
750 struct list_head list; 762 struct list_head list;
751 763
@@ -830,9 +842,6 @@ struct btrfs_block_group_cache {
830 u64 bytes_super; 842 u64 bytes_super;
831 u64 flags; 843 u64 flags;
832 u64 sectorsize; 844 u64 sectorsize;
833 int extents_thresh;
834 int free_extents;
835 int total_bitmaps;
836 unsigned int ro:1; 845 unsigned int ro:1;
837 unsigned int dirty:1; 846 unsigned int dirty:1;
838 unsigned int iref:1; 847 unsigned int iref:1;
@@ -847,9 +856,7 @@ struct btrfs_block_group_cache {
847 struct btrfs_space_info *space_info; 856 struct btrfs_space_info *space_info;
848 857
849 /* free space cache stuff */ 858 /* free space cache stuff */
850 spinlock_t tree_lock; 859 struct btrfs_free_space_ctl *free_space_ctl;
851 struct rb_root free_space_offset;
852 u64 free_space;
853 860
854 /* block group cache stuff */ 861 /* block group cache stuff */
855 struct rb_node cache_node; 862 struct rb_node cache_node;
@@ -869,6 +876,7 @@ struct btrfs_block_group_cache {
869struct reloc_control; 876struct reloc_control;
870struct btrfs_device; 877struct btrfs_device;
871struct btrfs_fs_devices; 878struct btrfs_fs_devices;
879struct btrfs_delayed_root;
872struct btrfs_fs_info { 880struct btrfs_fs_info {
873 u8 fsid[BTRFS_FSID_SIZE]; 881 u8 fsid[BTRFS_FSID_SIZE];
874 u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; 882 u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
@@ -895,7 +903,10 @@ struct btrfs_fs_info {
895 /* logical->physical extent mapping */ 903 /* logical->physical extent mapping */
896 struct btrfs_mapping_tree mapping_tree; 904 struct btrfs_mapping_tree mapping_tree;
897 905
898 /* block reservation for extent, checksum and root tree */ 906 /*
907 * block reservation for extent, checksum, root tree and
908 * delayed dir index item
909 */
899 struct btrfs_block_rsv global_block_rsv; 910 struct btrfs_block_rsv global_block_rsv;
900 /* block reservation for delay allocation */ 911 /* block reservation for delay allocation */
901 struct btrfs_block_rsv delalloc_block_rsv; 912 struct btrfs_block_rsv delalloc_block_rsv;
@@ -1022,6 +1033,7 @@ struct btrfs_fs_info {
1022 * for the sys_munmap function call path 1033 * for the sys_munmap function call path
1023 */ 1034 */
1024 struct btrfs_workers fixup_workers; 1035 struct btrfs_workers fixup_workers;
1036 struct btrfs_workers delayed_workers;
1025 struct task_struct *transaction_kthread; 1037 struct task_struct *transaction_kthread;
1026 struct task_struct *cleaner_kthread; 1038 struct task_struct *cleaner_kthread;
1027 int thread_pool_size; 1039 int thread_pool_size;
@@ -1062,6 +1074,11 @@ struct btrfs_fs_info {
1062 /* all metadata allocations go through this cluster */ 1074 /* all metadata allocations go through this cluster */
1063 struct btrfs_free_cluster meta_alloc_cluster; 1075 struct btrfs_free_cluster meta_alloc_cluster;
1064 1076
1077 /* auto defrag inodes go here */
1078 spinlock_t defrag_inodes_lock;
1079 struct rb_root defrag_inodes;
1080 atomic_t defrag_running;
1081
1065 spinlock_t ref_cache_lock; 1082 spinlock_t ref_cache_lock;
1066 u64 total_ref_cache_size; 1083 u64 total_ref_cache_size;
1067 1084
@@ -1077,8 +1094,21 @@ struct btrfs_fs_info {
1077 1094
1078 void *bdev_holder; 1095 void *bdev_holder;
1079 1096
1097 /* private scrub information */
1098 struct mutex scrub_lock;
1099 atomic_t scrubs_running;
1100 atomic_t scrub_pause_req;
1101 atomic_t scrubs_paused;
1102 atomic_t scrub_cancel_req;
1103 wait_queue_head_t scrub_pause_wait;
1104 struct rw_semaphore scrub_super_lock;
1105 int scrub_workers_refcnt;
1106 struct btrfs_workers scrub_workers;
1107
1080 /* filesystem state */ 1108 /* filesystem state */
1081 u64 fs_state; 1109 u64 fs_state;
1110
1111 struct btrfs_delayed_root *delayed_root;
1082}; 1112};
1083 1113
1084/* 1114/*
@@ -1088,9 +1118,6 @@ struct btrfs_fs_info {
1088struct btrfs_root { 1118struct btrfs_root {
1089 struct extent_buffer *node; 1119 struct extent_buffer *node;
1090 1120
1091 /* the node lock is held while changing the node pointer */
1092 spinlock_t node_lock;
1093
1094 struct extent_buffer *commit_root; 1121 struct extent_buffer *commit_root;
1095 struct btrfs_root *log_root; 1122 struct btrfs_root *log_root;
1096 struct btrfs_root *reloc_root; 1123 struct btrfs_root *reloc_root;
@@ -1107,6 +1134,16 @@ struct btrfs_root {
1107 spinlock_t accounting_lock; 1134 spinlock_t accounting_lock;
1108 struct btrfs_block_rsv *block_rsv; 1135 struct btrfs_block_rsv *block_rsv;
1109 1136
1137 /* free ino cache stuff */
1138 struct mutex fs_commit_mutex;
1139 struct btrfs_free_space_ctl *free_ino_ctl;
1140 enum btrfs_caching_type cached;
1141 spinlock_t cache_lock;
1142 wait_queue_head_t cache_wait;
1143 struct btrfs_free_space_ctl *free_ino_pinned;
1144 u64 cache_progress;
1145 struct inode *cache_inode;
1146
1110 struct mutex log_mutex; 1147 struct mutex log_mutex;
1111 wait_queue_head_t log_writer_wait; 1148 wait_queue_head_t log_writer_wait;
1112 wait_queue_head_t log_commit_wait[2]; 1149 wait_queue_head_t log_commit_wait[2];
@@ -1162,12 +1199,49 @@ struct btrfs_root {
1162 struct rb_root inode_tree; 1199 struct rb_root inode_tree;
1163 1200
1164 /* 1201 /*
1202 * radix tree that keeps track of delayed nodes of every inode,
1203 * protected by inode_lock
1204 */
1205 struct radix_tree_root delayed_nodes_tree;
1206 /*
1165 * right now this just gets used so that a root has its own devid 1207 * right now this just gets used so that a root has its own devid
1166 * for stat. It may be used for more later 1208 * for stat. It may be used for more later
1167 */ 1209 */
1168 struct super_block anon_super; 1210 struct super_block anon_super;
1169}; 1211};
1170 1212
1213struct btrfs_ioctl_defrag_range_args {
1214 /* start of the defrag operation */
1215 __u64 start;
1216
1217 /* number of bytes to defrag, use (u64)-1 to say all */
1218 __u64 len;
1219
1220 /*
1221 * flags for the operation, which can include turning
1222 * on compression for this one defrag
1223 */
1224 __u64 flags;
1225
1226 /*
1227 * any extent bigger than this will be considered
1228 * already defragged. Use 0 to take the kernel default
1229 * Use 1 to say every single extent must be rewritten
1230 */
1231 __u32 extent_thresh;
1232
1233 /*
1234 * which compression method to use if turning on compression
1235 * for this defrag operation. If unspecified, zlib will
1236 * be used
1237 */
1238 __u32 compress_type;
1239
1240 /* spare for later */
1241 __u32 unused[4];
1242};
1243
1244
1171/* 1245/*
1172 * inode items have the data typically returned from stat and store other 1246 * inode items have the data typically returned from stat and store other
1173 * info about object characteristics. There is one for every file and dir in 1247 * info about object characteristics. There is one for every file and dir in
@@ -1265,6 +1339,7 @@ struct btrfs_root {
1265#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) 1339#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
1266#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1340#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1267#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) 1341#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
1342#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
1268 1343
1269#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1344#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1270#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1345#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1440,26 +1515,12 @@ static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
1440 return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); 1515 return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
1441} 1516}
1442 1517
1443static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb,
1444 struct btrfs_chunk *c, int nr,
1445 u64 val)
1446{
1447 btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val);
1448}
1449
1450static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb, 1518static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
1451 struct btrfs_chunk *c, int nr) 1519 struct btrfs_chunk *c, int nr)
1452{ 1520{
1453 return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); 1521 return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
1454} 1522}
1455 1523
1456static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb,
1457 struct btrfs_chunk *c, int nr,
1458 u64 val)
1459{
1460 btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val);
1461}
1462
1463/* struct btrfs_block_group_item */ 1524/* struct btrfs_block_group_item */
1464BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, 1525BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
1465 used, 64); 1526 used, 64);
@@ -1517,14 +1578,6 @@ btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
1517 return (struct btrfs_timespec *)ptr; 1578 return (struct btrfs_timespec *)ptr;
1518} 1579}
1519 1580
1520static inline struct btrfs_timespec *
1521btrfs_inode_otime(struct btrfs_inode_item *inode_item)
1522{
1523 unsigned long ptr = (unsigned long)inode_item;
1524 ptr += offsetof(struct btrfs_inode_item, otime);
1525 return (struct btrfs_timespec *)ptr;
1526}
1527
1528BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); 1581BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
1529BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); 1582BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
1530 1583
@@ -1875,33 +1928,6 @@ static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
1875 return (u8 *)ptr; 1928 return (u8 *)ptr;
1876} 1929}
1877 1930
1878static inline u8 *btrfs_super_fsid(struct extent_buffer *eb)
1879{
1880 unsigned long ptr = offsetof(struct btrfs_super_block, fsid);
1881 return (u8 *)ptr;
1882}
1883
1884static inline u8 *btrfs_header_csum(struct extent_buffer *eb)
1885{
1886 unsigned long ptr = offsetof(struct btrfs_header, csum);
1887 return (u8 *)ptr;
1888}
1889
1890static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb)
1891{
1892 return NULL;
1893}
1894
1895static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb)
1896{
1897 return NULL;
1898}
1899
1900static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb)
1901{
1902 return NULL;
1903}
1904
1905static inline int btrfs_is_leaf(struct extent_buffer *eb) 1931static inline int btrfs_is_leaf(struct extent_buffer *eb)
1906{ 1932{
1907 return btrfs_header_level(eb) == 0; 1933 return btrfs_header_level(eb) == 0;
@@ -2055,22 +2081,6 @@ static inline struct btrfs_root *btrfs_sb(struct super_block *sb)
2055 return sb->s_fs_info; 2081 return sb->s_fs_info;
2056} 2082}
2057 2083
2058static inline int btrfs_set_root_name(struct btrfs_root *root,
2059 const char *name, int len)
2060{
2061 /* if we already have a name just free it */
2062 kfree(root->name);
2063
2064 root->name = kmalloc(len+1, GFP_KERNEL);
2065 if (!root->name)
2066 return -ENOMEM;
2067
2068 memcpy(root->name, name, len);
2069 root->name[len] = '\0';
2070
2071 return 0;
2072}
2073
2074static inline u32 btrfs_level_size(struct btrfs_root *root, int level) 2084static inline u32 btrfs_level_size(struct btrfs_root *root, int level)
2075{ 2085{
2076 if (level == 0) 2086 if (level == 0)
@@ -2099,6 +2109,13 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
2099} 2109}
2100 2110
2101/* extent-tree.c */ 2111/* extent-tree.c */
2112static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
2113 int num_items)
2114{
2115 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
2116 3 * num_items;
2117}
2118
2102void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 2119void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
2103int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2120int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2104 struct btrfs_root *root, unsigned long count); 2121 struct btrfs_root *root, unsigned long count);
@@ -2108,12 +2125,9 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
2108 u64 num_bytes, u64 *refs, u64 *flags); 2125 u64 num_bytes, u64 *refs, u64 *flags);
2109int btrfs_pin_extent(struct btrfs_root *root, 2126int btrfs_pin_extent(struct btrfs_root *root,
2110 u64 bytenr, u64 num, int reserved); 2127 u64 bytenr, u64 num, int reserved);
2111int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
2112 struct btrfs_root *root, struct extent_buffer *leaf);
2113int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2128int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2114 struct btrfs_root *root, 2129 struct btrfs_root *root,
2115 u64 objectid, u64 offset, u64 bytenr); 2130 u64 objectid, u64 offset, u64 bytenr);
2116int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
2117struct btrfs_block_group_cache *btrfs_lookup_block_group( 2131struct btrfs_block_group_cache *btrfs_lookup_block_group(
2118 struct btrfs_fs_info *info, 2132 struct btrfs_fs_info *info,
2119 u64 bytenr); 2133 u64 bytenr);
@@ -2293,10 +2307,12 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
2293 struct btrfs_root *root, struct extent_buffer *parent, 2307 struct btrfs_root *root, struct extent_buffer *parent,
2294 int start_slot, int cache_only, u64 *last_ret, 2308 int start_slot, int cache_only, u64 *last_ret,
2295 struct btrfs_key *progress); 2309 struct btrfs_key *progress);
2296void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); 2310void btrfs_release_path(struct btrfs_path *p);
2297struct btrfs_path *btrfs_alloc_path(void); 2311struct btrfs_path *btrfs_alloc_path(void);
2298void btrfs_free_path(struct btrfs_path *p); 2312void btrfs_free_path(struct btrfs_path *p);
2299void btrfs_set_path_blocking(struct btrfs_path *p); 2313void btrfs_set_path_blocking(struct btrfs_path *p);
2314void btrfs_clear_path_blocking(struct btrfs_path *p,
2315 struct extent_buffer *held);
2300void btrfs_unlock_up_safe(struct btrfs_path *p, int level); 2316void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
2301 2317
2302int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2318int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -2308,13 +2324,12 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
2308 return btrfs_del_items(trans, root, path, path->slots[0], 1); 2324 return btrfs_del_items(trans, root, path, path->slots[0], 1);
2309} 2325}
2310 2326
2327int setup_items_for_insert(struct btrfs_trans_handle *trans,
2328 struct btrfs_root *root, struct btrfs_path *path,
2329 struct btrfs_key *cpu_key, u32 *data_size,
2330 u32 total_data, u32 total_size, int nr);
2311int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 2331int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2312 *root, struct btrfs_key *key, void *data, u32 data_size); 2332 *root, struct btrfs_key *key, void *data, u32 data_size);
2313int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
2314 struct btrfs_root *root,
2315 struct btrfs_path *path,
2316 struct btrfs_key *cpu_key, u32 *data_size,
2317 int nr);
2318int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 2333int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2319 struct btrfs_root *root, 2334 struct btrfs_root *root,
2320 struct btrfs_path *path, 2335 struct btrfs_path *path,
@@ -2360,8 +2375,6 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
2360 *item); 2375 *item);
2361int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct 2376int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
2362 btrfs_root_item *item, struct btrfs_key *key); 2377 btrfs_root_item *item, struct btrfs_key *key);
2363int btrfs_search_root(struct btrfs_root *root, u64 search_start,
2364 u64 *found_objectid);
2365int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); 2378int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
2366int btrfs_find_orphan_roots(struct btrfs_root *tree_root); 2379int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
2367int btrfs_set_root_node(struct btrfs_root_item *item, 2380int btrfs_set_root_node(struct btrfs_root_item *item,
@@ -2371,7 +2384,7 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
2371/* dir-item.c */ 2384/* dir-item.c */
2372int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, 2385int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
2373 struct btrfs_root *root, const char *name, 2386 struct btrfs_root *root, const char *name,
2374 int name_len, u64 dir, 2387 int name_len, struct inode *dir,
2375 struct btrfs_key *location, u8 type, u64 index); 2388 struct btrfs_key *location, u8 type, u64 index);
2376struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, 2389struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
2377 struct btrfs_root *root, 2390 struct btrfs_root *root,
@@ -2416,12 +2429,6 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
2416 struct btrfs_root *root, u64 offset); 2429 struct btrfs_root *root, u64 offset);
2417int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset); 2430int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
2418 2431
2419/* inode-map.c */
2420int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
2421 struct btrfs_root *fs_root,
2422 u64 dirid, u64 *objectid);
2423int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid);
2424
2425/* inode-item.c */ 2432/* inode-item.c */
2426int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, 2433int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
2427 struct btrfs_root *root, 2434 struct btrfs_root *root,
@@ -2466,8 +2473,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
2466 struct btrfs_ordered_sum *sums); 2473 struct btrfs_ordered_sum *sums);
2467int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, 2474int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
2468 struct bio *bio, u64 file_start, int contig); 2475 struct bio *bio, u64 file_start, int contig);
2469int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
2470 u64 start, unsigned long len);
2471struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, 2476struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
2472 struct btrfs_root *root, 2477 struct btrfs_root *root,
2473 struct btrfs_path *path, 2478 struct btrfs_path *path,
@@ -2475,8 +2480,8 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
2475int btrfs_csum_truncate(struct btrfs_trans_handle *trans, 2480int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
2476 struct btrfs_root *root, struct btrfs_path *path, 2481 struct btrfs_root *root, struct btrfs_path *path,
2477 u64 isize); 2482 u64 isize);
2478int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, 2483int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
2479 u64 end, struct list_head *list); 2484 struct list_head *list, int search_commit);
2480/* inode.c */ 2485/* inode.c */
2481 2486
2482/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ 2487/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
@@ -2505,8 +2510,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2505 u32 min_type); 2510 u32 min_type);
2506 2511
2507int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); 2512int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2508int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
2509 int sync);
2510int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, 2513int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2511 struct extent_state **cached_state); 2514 struct extent_state **cached_state);
2512int btrfs_writepages(struct address_space *mapping, 2515int btrfs_writepages(struct address_space *mapping,
@@ -2522,7 +2525,6 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
2522int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 2525int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2523int btrfs_readpage(struct file *file, struct page *page); 2526int btrfs_readpage(struct file *file, struct page *page);
2524void btrfs_evict_inode(struct inode *inode); 2527void btrfs_evict_inode(struct inode *inode);
2525void btrfs_put_inode(struct inode *inode);
2526int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); 2528int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
2527void btrfs_dirty_inode(struct inode *inode); 2529void btrfs_dirty_inode(struct inode *inode);
2528struct inode *btrfs_alloc_inode(struct super_block *sb); 2530struct inode *btrfs_alloc_inode(struct super_block *sb);
@@ -2533,10 +2535,8 @@ void btrfs_destroy_cachep(void);
2533long btrfs_ioctl_trans_end(struct file *file); 2535long btrfs_ioctl_trans_end(struct file *file);
2534struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 2536struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
2535 struct btrfs_root *root, int *was_new); 2537 struct btrfs_root *root, int *was_new);
2536int btrfs_commit_write(struct file *file, struct page *page,
2537 unsigned from, unsigned to);
2538struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, 2538struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2539 size_t page_offset, u64 start, u64 end, 2539 size_t pg_offset, u64 start, u64 end,
2540 int create); 2540 int create);
2541int btrfs_update_inode(struct btrfs_trans_handle *trans, 2541int btrfs_update_inode(struct btrfs_trans_handle *trans,
2542 struct btrfs_root *root, 2542 struct btrfs_root *root,
@@ -2568,12 +2568,16 @@ extern const struct dentry_operations btrfs_dentry_operations;
2568long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 2568long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
2569void btrfs_update_iflags(struct inode *inode); 2569void btrfs_update_iflags(struct inode *inode);
2570void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); 2570void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
2571 2571int btrfs_defrag_file(struct inode *inode, struct file *file,
2572 struct btrfs_ioctl_defrag_range_args *range,
2573 u64 newer_than, unsigned long max_pages);
2572/* file.c */ 2574/* file.c */
2575int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
2576 struct inode *inode);
2577int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
2573int btrfs_sync_file(struct file *file, int datasync); 2578int btrfs_sync_file(struct file *file, int datasync);
2574int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, 2579int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
2575 int skip_pinned); 2580 int skip_pinned);
2576int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2577extern const struct file_operations btrfs_file_operations; 2581extern const struct file_operations btrfs_file_operations;
2578int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, 2582int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2579 u64 start, u64 end, u64 *hint_byte, int drop_cache); 2583 u64 start, u64 end, u64 *hint_byte, int drop_cache);
@@ -2593,10 +2597,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
2593/* sysfs.c */ 2597/* sysfs.c */
2594int btrfs_init_sysfs(void); 2598int btrfs_init_sysfs(void);
2595void btrfs_exit_sysfs(void); 2599void btrfs_exit_sysfs(void);
2596int btrfs_sysfs_add_super(struct btrfs_fs_info *fs);
2597int btrfs_sysfs_add_root(struct btrfs_root *root);
2598void btrfs_sysfs_del_root(struct btrfs_root *root);
2599void btrfs_sysfs_del_super(struct btrfs_fs_info *root);
2600 2600
2601/* xattr.c */ 2601/* xattr.c */
2602ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); 2602ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
@@ -2639,4 +2639,18 @@ void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
2639 u64 *bytes_to_reserve); 2639 u64 *bytes_to_reserve);
2640void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, 2640void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
2641 struct btrfs_pending_snapshot *pending); 2641 struct btrfs_pending_snapshot *pending);
2642
2643/* scrub.c */
2644int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
2645 struct btrfs_scrub_progress *progress, int readonly);
2646int btrfs_scrub_pause(struct btrfs_root *root);
2647int btrfs_scrub_pause_super(struct btrfs_root *root);
2648int btrfs_scrub_continue(struct btrfs_root *root);
2649int btrfs_scrub_continue_super(struct btrfs_root *root);
2650int btrfs_scrub_cancel(struct btrfs_root *root);
2651int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev);
2652int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid);
2653int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
2654 struct btrfs_scrub_progress *progress);
2655
2642#endif 2656#endif
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
new file mode 100644
index 000000000000..b46d94d1dea8
--- /dev/null
+++ b/fs/btrfs/delayed-inode.c
@@ -0,0 +1,1694 @@
1/*
2 * Copyright (C) 2011 Fujitsu. All rights reserved.
3 * Written by Miao Xie <miaox@cn.fujitsu.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
18 */
19
20#include <linux/slab.h>
21#include "delayed-inode.h"
22#include "disk-io.h"
23#include "transaction.h"
24
25#define BTRFS_DELAYED_WRITEBACK 400
26#define BTRFS_DELAYED_BACKGROUND 100
27
28static struct kmem_cache *delayed_node_cache;
29
30int __init btrfs_delayed_inode_init(void)
31{
32 delayed_node_cache = kmem_cache_create("delayed_node",
33 sizeof(struct btrfs_delayed_node),
34 0,
35 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
36 NULL);
37 if (!delayed_node_cache)
38 return -ENOMEM;
39 return 0;
40}
41
42void btrfs_delayed_inode_exit(void)
43{
44 if (delayed_node_cache)
45 kmem_cache_destroy(delayed_node_cache);
46}
47
48static inline void btrfs_init_delayed_node(
49 struct btrfs_delayed_node *delayed_node,
50 struct btrfs_root *root, u64 inode_id)
51{
52 delayed_node->root = root;
53 delayed_node->inode_id = inode_id;
54 atomic_set(&delayed_node->refs, 0);
55 delayed_node->count = 0;
56 delayed_node->in_list = 0;
57 delayed_node->inode_dirty = 0;
58 delayed_node->ins_root = RB_ROOT;
59 delayed_node->del_root = RB_ROOT;
60 mutex_init(&delayed_node->mutex);
61 delayed_node->index_cnt = 0;
62 INIT_LIST_HEAD(&delayed_node->n_list);
63 INIT_LIST_HEAD(&delayed_node->p_list);
64 delayed_node->bytes_reserved = 0;
65}
66
67static inline int btrfs_is_continuous_delayed_item(
68 struct btrfs_delayed_item *item1,
69 struct btrfs_delayed_item *item2)
70{
71 if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
72 item1->key.objectid == item2->key.objectid &&
73 item1->key.type == item2->key.type &&
74 item1->key.offset + 1 == item2->key.offset)
75 return 1;
76 return 0;
77}
78
79static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
80 struct btrfs_root *root)
81{
82 return root->fs_info->delayed_root;
83}
84
85static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
86 struct inode *inode)
87{
88 struct btrfs_delayed_node *node;
89 struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
90 struct btrfs_root *root = btrfs_inode->root;
91 u64 ino = btrfs_ino(inode);
92 int ret;
93
94again:
95 node = ACCESS_ONCE(btrfs_inode->delayed_node);
96 if (node) {
97 atomic_inc(&node->refs); /* can be accessed */
98 return node;
99 }
100
101 spin_lock(&root->inode_lock);
102 node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
103 if (node) {
104 if (btrfs_inode->delayed_node) {
105 spin_unlock(&root->inode_lock);
106 goto again;
107 }
108 btrfs_inode->delayed_node = node;
109 atomic_inc(&node->refs); /* can be accessed */
110 atomic_inc(&node->refs); /* cached in the inode */
111 spin_unlock(&root->inode_lock);
112 return node;
113 }
114 spin_unlock(&root->inode_lock);
115
116 node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
117 if (!node)
118 return ERR_PTR(-ENOMEM);
119 btrfs_init_delayed_node(node, root, ino);
120
121 atomic_inc(&node->refs); /* cached in the btrfs inode */
122 atomic_inc(&node->refs); /* can be accessed */
123
124 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
125 if (ret) {
126 kmem_cache_free(delayed_node_cache, node);
127 return ERR_PTR(ret);
128 }
129
130 spin_lock(&root->inode_lock);
131 ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
132 if (ret == -EEXIST) {
133 kmem_cache_free(delayed_node_cache, node);
134 spin_unlock(&root->inode_lock);
135 radix_tree_preload_end();
136 goto again;
137 }
138 btrfs_inode->delayed_node = node;
139 spin_unlock(&root->inode_lock);
140 radix_tree_preload_end();
141
142 return node;
143}
144
145/*
146 * Call it when holding delayed_node->mutex
147 *
148 * If mod = 1, add this node into the prepared list.
149 */
150static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
151 struct btrfs_delayed_node *node,
152 int mod)
153{
154 spin_lock(&root->lock);
155 if (node->in_list) {
156 if (!list_empty(&node->p_list))
157 list_move_tail(&node->p_list, &root->prepare_list);
158 else if (mod)
159 list_add_tail(&node->p_list, &root->prepare_list);
160 } else {
161 list_add_tail(&node->n_list, &root->node_list);
162 list_add_tail(&node->p_list, &root->prepare_list);
163 atomic_inc(&node->refs); /* inserted into list */
164 root->nodes++;
165 node->in_list = 1;
166 }
167 spin_unlock(&root->lock);
168}
169
170/* Call it when holding delayed_node->mutex */
171static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
172 struct btrfs_delayed_node *node)
173{
174 spin_lock(&root->lock);
175 if (node->in_list) {
176 root->nodes--;
177 atomic_dec(&node->refs); /* not in the list */
178 list_del_init(&node->n_list);
179 if (!list_empty(&node->p_list))
180 list_del_init(&node->p_list);
181 node->in_list = 0;
182 }
183 spin_unlock(&root->lock);
184}
185
186struct btrfs_delayed_node *btrfs_first_delayed_node(
187 struct btrfs_delayed_root *delayed_root)
188{
189 struct list_head *p;
190 struct btrfs_delayed_node *node = NULL;
191
192 spin_lock(&delayed_root->lock);
193 if (list_empty(&delayed_root->node_list))
194 goto out;
195
196 p = delayed_root->node_list.next;
197 node = list_entry(p, struct btrfs_delayed_node, n_list);
198 atomic_inc(&node->refs);
199out:
200 spin_unlock(&delayed_root->lock);
201
202 return node;
203}
204
205struct btrfs_delayed_node *btrfs_next_delayed_node(
206 struct btrfs_delayed_node *node)
207{
208 struct btrfs_delayed_root *delayed_root;
209 struct list_head *p;
210 struct btrfs_delayed_node *next = NULL;
211
212 delayed_root = node->root->fs_info->delayed_root;
213 spin_lock(&delayed_root->lock);
214 if (!node->in_list) { /* not in the list */
215 if (list_empty(&delayed_root->node_list))
216 goto out;
217 p = delayed_root->node_list.next;
218 } else if (list_is_last(&node->n_list, &delayed_root->node_list))
219 goto out;
220 else
221 p = node->n_list.next;
222
223 next = list_entry(p, struct btrfs_delayed_node, n_list);
224 atomic_inc(&next->refs);
225out:
226 spin_unlock(&delayed_root->lock);
227
228 return next;
229}
230
231static void __btrfs_release_delayed_node(
232 struct btrfs_delayed_node *delayed_node,
233 int mod)
234{
235 struct btrfs_delayed_root *delayed_root;
236
237 if (!delayed_node)
238 return;
239
240 delayed_root = delayed_node->root->fs_info->delayed_root;
241
242 mutex_lock(&delayed_node->mutex);
243 if (delayed_node->count)
244 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
245 else
246 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
247 mutex_unlock(&delayed_node->mutex);
248
249 if (atomic_dec_and_test(&delayed_node->refs)) {
250 struct btrfs_root *root = delayed_node->root;
251 spin_lock(&root->inode_lock);
252 if (atomic_read(&delayed_node->refs) == 0) {
253 radix_tree_delete(&root->delayed_nodes_tree,
254 delayed_node->inode_id);
255 kmem_cache_free(delayed_node_cache, delayed_node);
256 }
257 spin_unlock(&root->inode_lock);
258 }
259}
260
261static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
262{
263 __btrfs_release_delayed_node(node, 0);
264}
265
266struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
267 struct btrfs_delayed_root *delayed_root)
268{
269 struct list_head *p;
270 struct btrfs_delayed_node *node = NULL;
271
272 spin_lock(&delayed_root->lock);
273 if (list_empty(&delayed_root->prepare_list))
274 goto out;
275
276 p = delayed_root->prepare_list.next;
277 list_del_init(p);
278 node = list_entry(p, struct btrfs_delayed_node, p_list);
279 atomic_inc(&node->refs);
280out:
281 spin_unlock(&delayed_root->lock);
282
283 return node;
284}
285
286static inline void btrfs_release_prepared_delayed_node(
287 struct btrfs_delayed_node *node)
288{
289 __btrfs_release_delayed_node(node, 1);
290}
291
292struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
293{
294 struct btrfs_delayed_item *item;
295 item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
296 if (item) {
297 item->data_len = data_len;
298 item->ins_or_del = 0;
299 item->bytes_reserved = 0;
300 item->block_rsv = NULL;
301 item->delayed_node = NULL;
302 atomic_set(&item->refs, 1);
303 }
304 return item;
305}
306
307/*
308 * __btrfs_lookup_delayed_item - look up the delayed item by key
309 * @delayed_node: pointer to the delayed node
310 * @key: the key to look up
311 * @prev: used to store the prev item if the right item isn't found
312 * @next: used to store the next item if the right item isn't found
313 *
314 * Note: if we don't find the right item, we will return the prev item and
315 * the next item.
316 */
317static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
318 struct rb_root *root,
319 struct btrfs_key *key,
320 struct btrfs_delayed_item **prev,
321 struct btrfs_delayed_item **next)
322{
323 struct rb_node *node, *prev_node = NULL;
324 struct btrfs_delayed_item *delayed_item = NULL;
325 int ret = 0;
326
327 node = root->rb_node;
328
329 while (node) {
330 delayed_item = rb_entry(node, struct btrfs_delayed_item,
331 rb_node);
332 prev_node = node;
333 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
334 if (ret < 0)
335 node = node->rb_right;
336 else if (ret > 0)
337 node = node->rb_left;
338 else
339 return delayed_item;
340 }
341
342 if (prev) {
343 if (!prev_node)
344 *prev = NULL;
345 else if (ret < 0)
346 *prev = delayed_item;
347 else if ((node = rb_prev(prev_node)) != NULL) {
348 *prev = rb_entry(node, struct btrfs_delayed_item,
349 rb_node);
350 } else
351 *prev = NULL;
352 }
353
354 if (next) {
355 if (!prev_node)
356 *next = NULL;
357 else if (ret > 0)
358 *next = delayed_item;
359 else if ((node = rb_next(prev_node)) != NULL) {
360 *next = rb_entry(node, struct btrfs_delayed_item,
361 rb_node);
362 } else
363 *next = NULL;
364 }
365 return NULL;
366}
367
368struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
369 struct btrfs_delayed_node *delayed_node,
370 struct btrfs_key *key)
371{
372 struct btrfs_delayed_item *item;
373
374 item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
375 NULL, NULL);
376 return item;
377}
378
379struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item(
380 struct btrfs_delayed_node *delayed_node,
381 struct btrfs_key *key)
382{
383 struct btrfs_delayed_item *item;
384
385 item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
386 NULL, NULL);
387 return item;
388}
389
390struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item(
391 struct btrfs_delayed_node *delayed_node,
392 struct btrfs_key *key)
393{
394 struct btrfs_delayed_item *item, *next;
395
396 item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
397 NULL, &next);
398 if (!item)
399 item = next;
400
401 return item;
402}
403
404struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item(
405 struct btrfs_delayed_node *delayed_node,
406 struct btrfs_key *key)
407{
408 struct btrfs_delayed_item *item, *next;
409
410 item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
411 NULL, &next);
412 if (!item)
413 item = next;
414
415 return item;
416}
417
418static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
419 struct btrfs_delayed_item *ins,
420 int action)
421{
422 struct rb_node **p, *node;
423 struct rb_node *parent_node = NULL;
424 struct rb_root *root;
425 struct btrfs_delayed_item *item;
426 int cmp;
427
428 if (action == BTRFS_DELAYED_INSERTION_ITEM)
429 root = &delayed_node->ins_root;
430 else if (action == BTRFS_DELAYED_DELETION_ITEM)
431 root = &delayed_node->del_root;
432 else
433 BUG();
434 p = &root->rb_node;
435 node = &ins->rb_node;
436
437 while (*p) {
438 parent_node = *p;
439 item = rb_entry(parent_node, struct btrfs_delayed_item,
440 rb_node);
441
442 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
443 if (cmp < 0)
444 p = &(*p)->rb_right;
445 else if (cmp > 0)
446 p = &(*p)->rb_left;
447 else
448 return -EEXIST;
449 }
450
451 rb_link_node(node, parent_node, p);
452 rb_insert_color(node, root);
453 ins->delayed_node = delayed_node;
454 ins->ins_or_del = action;
455
456 if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
457 action == BTRFS_DELAYED_INSERTION_ITEM &&
458 ins->key.offset >= delayed_node->index_cnt)
459 delayed_node->index_cnt = ins->key.offset + 1;
460
461 delayed_node->count++;
462 atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
463 return 0;
464}
465
466static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
467 struct btrfs_delayed_item *item)
468{
469 return __btrfs_add_delayed_item(node, item,
470 BTRFS_DELAYED_INSERTION_ITEM);
471}
472
473static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
474 struct btrfs_delayed_item *item)
475{
476 return __btrfs_add_delayed_item(node, item,
477 BTRFS_DELAYED_DELETION_ITEM);
478}
479
480static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
481{
482 struct rb_root *root;
483 struct btrfs_delayed_root *delayed_root;
484
485 delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
486
487 BUG_ON(!delayed_root);
488 BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
489 delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
490
491 if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
492 root = &delayed_item->delayed_node->ins_root;
493 else
494 root = &delayed_item->delayed_node->del_root;
495
496 rb_erase(&delayed_item->rb_node, root);
497 delayed_item->delayed_node->count--;
498 atomic_dec(&delayed_root->items);
499 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND &&
500 waitqueue_active(&delayed_root->wait))
501 wake_up(&delayed_root->wait);
502}
503
504static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
505{
506 if (item) {
507 __btrfs_remove_delayed_item(item);
508 if (atomic_dec_and_test(&item->refs))
509 kfree(item);
510 }
511}
512
513struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
514 struct btrfs_delayed_node *delayed_node)
515{
516 struct rb_node *p;
517 struct btrfs_delayed_item *item = NULL;
518
519 p = rb_first(&delayed_node->ins_root);
520 if (p)
521 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
522
523 return item;
524}
525
526struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
527 struct btrfs_delayed_node *delayed_node)
528{
529 struct rb_node *p;
530 struct btrfs_delayed_item *item = NULL;
531
532 p = rb_first(&delayed_node->del_root);
533 if (p)
534 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
535
536 return item;
537}
538
539struct btrfs_delayed_item *__btrfs_next_delayed_item(
540 struct btrfs_delayed_item *item)
541{
542 struct rb_node *p;
543 struct btrfs_delayed_item *next = NULL;
544
545 p = rb_next(&item->rb_node);
546 if (p)
547 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
548
549 return next;
550}
551
552static inline struct btrfs_delayed_node *btrfs_get_delayed_node(
553 struct inode *inode)
554{
555 struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
556 struct btrfs_delayed_node *delayed_node;
557
558 delayed_node = btrfs_inode->delayed_node;
559 if (delayed_node)
560 atomic_inc(&delayed_node->refs);
561
562 return delayed_node;
563}
564
565static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
566 u64 root_id)
567{
568 struct btrfs_key root_key;
569
570 if (root->objectid == root_id)
571 return root;
572
573 root_key.objectid = root_id;
574 root_key.type = BTRFS_ROOT_ITEM_KEY;
575 root_key.offset = (u64)-1;
576 return btrfs_read_fs_root_no_name(root->fs_info, &root_key);
577}
578
579static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
580 struct btrfs_root *root,
581 struct btrfs_delayed_item *item)
582{
583 struct btrfs_block_rsv *src_rsv;
584 struct btrfs_block_rsv *dst_rsv;
585 u64 num_bytes;
586 int ret;
587
588 if (!trans->bytes_reserved)
589 return 0;
590
591 src_rsv = trans->block_rsv;
592 dst_rsv = &root->fs_info->global_block_rsv;
593
594 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
595 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
596 if (!ret) {
597 item->bytes_reserved = num_bytes;
598 item->block_rsv = dst_rsv;
599 }
600
601 return ret;
602}
603
604static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
605 struct btrfs_delayed_item *item)
606{
607 if (!item->bytes_reserved)
608 return;
609
610 btrfs_block_rsv_release(root, item->block_rsv,
611 item->bytes_reserved);
612}
613
614static int btrfs_delayed_inode_reserve_metadata(
615 struct btrfs_trans_handle *trans,
616 struct btrfs_root *root,
617 struct btrfs_delayed_node *node)
618{
619 struct btrfs_block_rsv *src_rsv;
620 struct btrfs_block_rsv *dst_rsv;
621 u64 num_bytes;
622 int ret;
623
624 if (!trans->bytes_reserved)
625 return 0;
626
627 src_rsv = trans->block_rsv;
628 dst_rsv = &root->fs_info->global_block_rsv;
629
630 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
631 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
632 if (!ret)
633 node->bytes_reserved = num_bytes;
634
635 return ret;
636}
637
638static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
639 struct btrfs_delayed_node *node)
640{
641 struct btrfs_block_rsv *rsv;
642
643 if (!node->bytes_reserved)
644 return;
645
646 rsv = &root->fs_info->global_block_rsv;
647 btrfs_block_rsv_release(root, rsv,
648 node->bytes_reserved);
649 node->bytes_reserved = 0;
650}
651
652/*
653 * This helper will insert some continuous items into the same leaf according
654 * to the free space of the leaf.
655 */
656static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
657 struct btrfs_root *root,
658 struct btrfs_path *path,
659 struct btrfs_delayed_item *item)
660{
661 struct btrfs_delayed_item *curr, *next;
662 int free_space;
663 int total_data_size = 0, total_size = 0;
664 struct extent_buffer *leaf;
665 char *data_ptr;
666 struct btrfs_key *keys;
667 u32 *data_size;
668 struct list_head head;
669 int slot;
670 int nitems;
671 int i;
672 int ret = 0;
673
674 BUG_ON(!path->nodes[0]);
675
676 leaf = path->nodes[0];
677 free_space = btrfs_leaf_free_space(root, leaf);
678 INIT_LIST_HEAD(&head);
679
680 next = item;
681
682 /*
683 * count the number of the continuous items that we can insert in batch
684 */
685 while (total_size + next->data_len + sizeof(struct btrfs_item) <=
686 free_space) {
687 total_data_size += next->data_len;
688 total_size += next->data_len + sizeof(struct btrfs_item);
689 list_add_tail(&next->tree_list, &head);
690 nitems++;
691
692 curr = next;
693 next = __btrfs_next_delayed_item(curr);
694 if (!next)
695 break;
696
697 if (!btrfs_is_continuous_delayed_item(curr, next))
698 break;
699 }
700
701 if (!nitems) {
702 ret = 0;
703 goto out;
704 }
705
706 /*
707 * we need allocate some memory space, but it might cause the task
708 * to sleep, so we set all locked nodes in the path to blocking locks
709 * first.
710 */
711 btrfs_set_path_blocking(path);
712
713 keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
714 if (!keys) {
715 ret = -ENOMEM;
716 goto out;
717 }
718
719 data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
720 if (!data_size) {
721 ret = -ENOMEM;
722 goto error;
723 }
724
725 /* get keys of all the delayed items */
726 i = 0;
727 list_for_each_entry(next, &head, tree_list) {
728 keys[i] = next->key;
729 data_size[i] = next->data_len;
730 i++;
731 }
732
733 /* reset all the locked nodes in the patch to spinning locks. */
734 btrfs_clear_path_blocking(path, NULL);
735
736 /* insert the keys of the items */
737 ret = setup_items_for_insert(trans, root, path, keys, data_size,
738 total_data_size, total_size, nitems);
739 if (ret)
740 goto error;
741
742 /* insert the dir index items */
743 slot = path->slots[0];
744 list_for_each_entry_safe(curr, next, &head, tree_list) {
745 data_ptr = btrfs_item_ptr(leaf, slot, char);
746 write_extent_buffer(leaf, &curr->data,
747 (unsigned long)data_ptr,
748 curr->data_len);
749 slot++;
750
751 btrfs_delayed_item_release_metadata(root, curr);
752
753 list_del(&curr->tree_list);
754 btrfs_release_delayed_item(curr);
755 }
756
757error:
758 kfree(data_size);
759 kfree(keys);
760out:
761 return ret;
762}
763
764/*
765 * This helper can just do simple insertion that needn't extend item for new
766 * data, such as directory name index insertion, inode insertion.
767 */
768static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
769 struct btrfs_root *root,
770 struct btrfs_path *path,
771 struct btrfs_delayed_item *delayed_item)
772{
773 struct extent_buffer *leaf;
774 struct btrfs_item *item;
775 char *ptr;
776 int ret;
777
778 ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
779 delayed_item->data_len);
780 if (ret < 0 && ret != -EEXIST)
781 return ret;
782
783 leaf = path->nodes[0];
784
785 item = btrfs_item_nr(leaf, path->slots[0]);
786 ptr = btrfs_item_ptr(leaf, path->slots[0], char);
787
788 write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
789 delayed_item->data_len);
790 btrfs_mark_buffer_dirty(leaf);
791
792 btrfs_delayed_item_release_metadata(root, delayed_item);
793 return 0;
794}
795
796/*
797 * we insert an item first, then if there are some continuous items, we try
798 * to insert those items into the same leaf.
799 */
800static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
801 struct btrfs_path *path,
802 struct btrfs_root *root,
803 struct btrfs_delayed_node *node)
804{
805 struct btrfs_delayed_item *curr, *prev;
806 int ret = 0;
807
808do_again:
809 mutex_lock(&node->mutex);
810 curr = __btrfs_first_delayed_insertion_item(node);
811 if (!curr)
812 goto insert_end;
813
814 ret = btrfs_insert_delayed_item(trans, root, path, curr);
815 if (ret < 0) {
816 btrfs_release_path(path);
817 goto insert_end;
818 }
819
820 prev = curr;
821 curr = __btrfs_next_delayed_item(prev);
822 if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
823 /* insert the continuous items into the same leaf */
824 path->slots[0]++;
825 btrfs_batch_insert_items(trans, root, path, curr);
826 }
827 btrfs_release_delayed_item(prev);
828 btrfs_mark_buffer_dirty(path->nodes[0]);
829
830 btrfs_release_path(path);
831 mutex_unlock(&node->mutex);
832 goto do_again;
833
834insert_end:
835 mutex_unlock(&node->mutex);
836 return ret;
837}
838
839static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
840 struct btrfs_root *root,
841 struct btrfs_path *path,
842 struct btrfs_delayed_item *item)
843{
844 struct btrfs_delayed_item *curr, *next;
845 struct extent_buffer *leaf;
846 struct btrfs_key key;
847 struct list_head head;
848 int nitems, i, last_item;
849 int ret = 0;
850
851 BUG_ON(!path->nodes[0]);
852
853 leaf = path->nodes[0];
854
855 i = path->slots[0];
856 last_item = btrfs_header_nritems(leaf) - 1;
857 if (i > last_item)
858 return -ENOENT; /* FIXME: Is errno suitable? */
859
860 next = item;
861 INIT_LIST_HEAD(&head);
862 btrfs_item_key_to_cpu(leaf, &key, i);
863 nitems = 0;
864 /*
865 * count the number of the dir index items that we can delete in batch
866 */
867 while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
868 list_add_tail(&next->tree_list, &head);
869 nitems++;
870
871 curr = next;
872 next = __btrfs_next_delayed_item(curr);
873 if (!next)
874 break;
875
876 if (!btrfs_is_continuous_delayed_item(curr, next))
877 break;
878
879 i++;
880 if (i > last_item)
881 break;
882 btrfs_item_key_to_cpu(leaf, &key, i);
883 }
884
885 if (!nitems)
886 return 0;
887
888 ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
889 if (ret)
890 goto out;
891
892 list_for_each_entry_safe(curr, next, &head, tree_list) {
893 btrfs_delayed_item_release_metadata(root, curr);
894 list_del(&curr->tree_list);
895 btrfs_release_delayed_item(curr);
896 }
897
898out:
899 return ret;
900}
901
902static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
903 struct btrfs_path *path,
904 struct btrfs_root *root,
905 struct btrfs_delayed_node *node)
906{
907 struct btrfs_delayed_item *curr, *prev;
908 int ret = 0;
909
910do_again:
911 mutex_lock(&node->mutex);
912 curr = __btrfs_first_delayed_deletion_item(node);
913 if (!curr)
914 goto delete_fail;
915
916 ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
917 if (ret < 0)
918 goto delete_fail;
919 else if (ret > 0) {
920 /*
921 * can't find the item which the node points to, so this node
922 * is invalid, just drop it.
923 */
924 prev = curr;
925 curr = __btrfs_next_delayed_item(prev);
926 btrfs_release_delayed_item(prev);
927 ret = 0;
928 btrfs_release_path(path);
929 if (curr)
930 goto do_again;
931 else
932 goto delete_fail;
933 }
934
935 btrfs_batch_delete_items(trans, root, path, curr);
936 btrfs_release_path(path);
937 mutex_unlock(&node->mutex);
938 goto do_again;
939
940delete_fail:
941 btrfs_release_path(path);
942 mutex_unlock(&node->mutex);
943 return ret;
944}
945
946static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
947{
948 struct btrfs_delayed_root *delayed_root;
949
950 if (delayed_node && delayed_node->inode_dirty) {
951 BUG_ON(!delayed_node->root);
952 delayed_node->inode_dirty = 0;
953 delayed_node->count--;
954
955 delayed_root = delayed_node->root->fs_info->delayed_root;
956 atomic_dec(&delayed_root->items);
957 if (atomic_read(&delayed_root->items) <
958 BTRFS_DELAYED_BACKGROUND &&
959 waitqueue_active(&delayed_root->wait))
960 wake_up(&delayed_root->wait);
961 }
962}
963
964static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
965 struct btrfs_root *root,
966 struct btrfs_path *path,
967 struct btrfs_delayed_node *node)
968{
969 struct btrfs_key key;
970 struct btrfs_inode_item *inode_item;
971 struct extent_buffer *leaf;
972 int ret;
973
974 mutex_lock(&node->mutex);
975 if (!node->inode_dirty) {
976 mutex_unlock(&node->mutex);
977 return 0;
978 }
979
980 key.objectid = node->inode_id;
981 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
982 key.offset = 0;
983 ret = btrfs_lookup_inode(trans, root, path, &key, 1);
984 if (ret > 0) {
985 btrfs_release_path(path);
986 mutex_unlock(&node->mutex);
987 return -ENOENT;
988 } else if (ret < 0) {
989 mutex_unlock(&node->mutex);
990 return ret;
991 }
992
993 btrfs_unlock_up_safe(path, 1);
994 leaf = path->nodes[0];
995 inode_item = btrfs_item_ptr(leaf, path->slots[0],
996 struct btrfs_inode_item);
997 write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
998 sizeof(struct btrfs_inode_item));
999 btrfs_mark_buffer_dirty(leaf);
1000 btrfs_release_path(path);
1001
1002 btrfs_delayed_inode_release_metadata(root, node);
1003 btrfs_release_delayed_inode(node);
1004 mutex_unlock(&node->mutex);
1005
1006 return 0;
1007}
1008
1009/* Called when committing the transaction. */
1010int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1011 struct btrfs_root *root)
1012{
1013 struct btrfs_delayed_root *delayed_root;
1014 struct btrfs_delayed_node *curr_node, *prev_node;
1015 struct btrfs_path *path;
1016 int ret = 0;
1017
1018 path = btrfs_alloc_path();
1019 if (!path)
1020 return -ENOMEM;
1021 path->leave_spinning = 1;
1022
1023 delayed_root = btrfs_get_delayed_root(root);
1024
1025 curr_node = btrfs_first_delayed_node(delayed_root);
1026 while (curr_node) {
1027 root = curr_node->root;
1028 ret = btrfs_insert_delayed_items(trans, path, root,
1029 curr_node);
1030 if (!ret)
1031 ret = btrfs_delete_delayed_items(trans, path, root,
1032 curr_node);
1033 if (!ret)
1034 ret = btrfs_update_delayed_inode(trans, root, path,
1035 curr_node);
1036 if (ret) {
1037 btrfs_release_delayed_node(curr_node);
1038 break;
1039 }
1040
1041 prev_node = curr_node;
1042 curr_node = btrfs_next_delayed_node(curr_node);
1043 btrfs_release_delayed_node(prev_node);
1044 }
1045
1046 btrfs_free_path(path);
1047 return ret;
1048}
1049
1050static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1051 struct btrfs_delayed_node *node)
1052{
1053 struct btrfs_path *path;
1054 int ret;
1055
1056 path = btrfs_alloc_path();
1057 if (!path)
1058 return -ENOMEM;
1059 path->leave_spinning = 1;
1060
1061 ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1062 if (!ret)
1063 ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1064 if (!ret)
1065 ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1066 btrfs_free_path(path);
1067
1068 return ret;
1069}
1070
1071int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1072 struct inode *inode)
1073{
1074 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1075 int ret;
1076
1077 if (!delayed_node)
1078 return 0;
1079
1080 mutex_lock(&delayed_node->mutex);
1081 if (!delayed_node->count) {
1082 mutex_unlock(&delayed_node->mutex);
1083 btrfs_release_delayed_node(delayed_node);
1084 return 0;
1085 }
1086 mutex_unlock(&delayed_node->mutex);
1087
1088 ret = __btrfs_commit_inode_delayed_items(trans, delayed_node);
1089 btrfs_release_delayed_node(delayed_node);
1090 return ret;
1091}
1092
1093void btrfs_remove_delayed_node(struct inode *inode)
1094{
1095 struct btrfs_delayed_node *delayed_node;
1096
1097 delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
1098 if (!delayed_node)
1099 return;
1100
1101 BTRFS_I(inode)->delayed_node = NULL;
1102 btrfs_release_delayed_node(delayed_node);
1103}
1104
1105struct btrfs_async_delayed_node {
1106 struct btrfs_root *root;
1107 struct btrfs_delayed_node *delayed_node;
1108 struct btrfs_work work;
1109};
1110
1111static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1112{
1113 struct btrfs_async_delayed_node *async_node;
1114 struct btrfs_trans_handle *trans;
1115 struct btrfs_path *path;
1116 struct btrfs_delayed_node *delayed_node = NULL;
1117 struct btrfs_root *root;
1118 unsigned long nr = 0;
1119 int need_requeue = 0;
1120 int ret;
1121
1122 async_node = container_of(work, struct btrfs_async_delayed_node, work);
1123
1124 path = btrfs_alloc_path();
1125 if (!path)
1126 goto out;
1127 path->leave_spinning = 1;
1128
1129 delayed_node = async_node->delayed_node;
1130 root = delayed_node->root;
1131
1132 trans = btrfs_join_transaction(root);
1133 if (IS_ERR(trans))
1134 goto free_path;
1135
1136 ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
1137 if (!ret)
1138 ret = btrfs_delete_delayed_items(trans, path, root,
1139 delayed_node);
1140
1141 if (!ret)
1142 btrfs_update_delayed_inode(trans, root, path, delayed_node);
1143
1144 /*
1145 * Maybe new delayed items have been inserted, so we need requeue
1146 * the work. Besides that, we must dequeue the empty delayed nodes
1147 * to avoid the race between delayed items balance and the worker.
1148 * The race like this:
1149 * Task1 Worker thread
1150 * count == 0, needn't requeue
1151 * also needn't insert the
1152 * delayed node into prepare
1153 * list again.
1154 * add lots of delayed items
1155 * queue the delayed node
1156 * already in the list,
1157 * and not in the prepare
1158 * list, it means the delayed
1159 * node is being dealt with
1160 * by the worker.
1161 * do delayed items balance
1162 * the delayed node is being
1163 * dealt with by the worker
1164 * now, just wait.
1165 * the worker goto idle.
1166 * Task1 will sleep until the transaction is commited.
1167 */
1168 mutex_lock(&delayed_node->mutex);
1169 if (delayed_node->count)
1170 need_requeue = 1;
1171 else
1172 btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
1173 delayed_node);
1174 mutex_unlock(&delayed_node->mutex);
1175
1176 nr = trans->blocks_used;
1177
1178 btrfs_end_transaction_dmeta(trans, root);
1179 __btrfs_btree_balance_dirty(root, nr);
1180free_path:
1181 btrfs_free_path(path);
1182out:
1183 if (need_requeue)
1184 btrfs_requeue_work(&async_node->work);
1185 else {
1186 btrfs_release_prepared_delayed_node(delayed_node);
1187 kfree(async_node);
1188 }
1189}
1190
1191static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1192 struct btrfs_root *root, int all)
1193{
1194 struct btrfs_async_delayed_node *async_node;
1195 struct btrfs_delayed_node *curr;
1196 int count = 0;
1197
1198again:
1199 curr = btrfs_first_prepared_delayed_node(delayed_root);
1200 if (!curr)
1201 return 0;
1202
1203 async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
1204 if (!async_node) {
1205 btrfs_release_prepared_delayed_node(curr);
1206 return -ENOMEM;
1207 }
1208
1209 async_node->root = root;
1210 async_node->delayed_node = curr;
1211
1212 async_node->work.func = btrfs_async_run_delayed_node_done;
1213 async_node->work.flags = 0;
1214
1215 btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
1216 count++;
1217
1218 if (all || count < 4)
1219 goto again;
1220
1221 return 0;
1222}
1223
1224void btrfs_balance_delayed_items(struct btrfs_root *root)
1225{
1226 struct btrfs_delayed_root *delayed_root;
1227
1228 delayed_root = btrfs_get_delayed_root(root);
1229
1230 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1231 return;
1232
1233 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1234 int ret;
1235 ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
1236 if (ret)
1237 return;
1238
1239 wait_event_interruptible_timeout(
1240 delayed_root->wait,
1241 (atomic_read(&delayed_root->items) <
1242 BTRFS_DELAYED_BACKGROUND),
1243 HZ);
1244 return;
1245 }
1246
1247 btrfs_wq_run_delayed_node(delayed_root, root, 0);
1248}
1249
1250int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1251 struct btrfs_root *root, const char *name,
1252 int name_len, struct inode *dir,
1253 struct btrfs_disk_key *disk_key, u8 type,
1254 u64 index)
1255{
1256 struct btrfs_delayed_node *delayed_node;
1257 struct btrfs_delayed_item *delayed_item;
1258 struct btrfs_dir_item *dir_item;
1259 int ret;
1260
1261 delayed_node = btrfs_get_or_create_delayed_node(dir);
1262 if (IS_ERR(delayed_node))
1263 return PTR_ERR(delayed_node);
1264
1265 delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1266 if (!delayed_item) {
1267 ret = -ENOMEM;
1268 goto release_node;
1269 }
1270
1271 ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
1272 /*
1273 * we have reserved enough space when we start a new transaction,
1274 * so reserving metadata failure is impossible
1275 */
1276 BUG_ON(ret);
1277
1278 delayed_item->key.objectid = btrfs_ino(dir);
1279 btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
1280 delayed_item->key.offset = index;
1281
1282 dir_item = (struct btrfs_dir_item *)delayed_item->data;
1283 dir_item->location = *disk_key;
1284 dir_item->transid = cpu_to_le64(trans->transid);
1285 dir_item->data_len = 0;
1286 dir_item->name_len = cpu_to_le16(name_len);
1287 dir_item->type = type;
1288 memcpy((char *)(dir_item + 1), name, name_len);
1289
1290 mutex_lock(&delayed_node->mutex);
1291 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1292 if (unlikely(ret)) {
1293 printk(KERN_ERR "err add delayed dir index item(name: %s) into "
1294 "the insertion tree of the delayed node"
1295 "(root id: %llu, inode id: %llu, errno: %d)\n",
1296 name,
1297 (unsigned long long)delayed_node->root->objectid,
1298 (unsigned long long)delayed_node->inode_id,
1299 ret);
1300 BUG();
1301 }
1302 mutex_unlock(&delayed_node->mutex);
1303
1304release_node:
1305 btrfs_release_delayed_node(delayed_node);
1306 return ret;
1307}
1308
1309static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
1310 struct btrfs_delayed_node *node,
1311 struct btrfs_key *key)
1312{
1313 struct btrfs_delayed_item *item;
1314
1315 mutex_lock(&node->mutex);
1316 item = __btrfs_lookup_delayed_insertion_item(node, key);
1317 if (!item) {
1318 mutex_unlock(&node->mutex);
1319 return 1;
1320 }
1321
1322 btrfs_delayed_item_release_metadata(root, item);
1323 btrfs_release_delayed_item(item);
1324 mutex_unlock(&node->mutex);
1325 return 0;
1326}
1327
1328int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1329 struct btrfs_root *root, struct inode *dir,
1330 u64 index)
1331{
1332 struct btrfs_delayed_node *node;
1333 struct btrfs_delayed_item *item;
1334 struct btrfs_key item_key;
1335 int ret;
1336
1337 node = btrfs_get_or_create_delayed_node(dir);
1338 if (IS_ERR(node))
1339 return PTR_ERR(node);
1340
1341 item_key.objectid = btrfs_ino(dir);
1342 btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY);
1343 item_key.offset = index;
1344
1345 ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
1346 if (!ret)
1347 goto end;
1348
1349 item = btrfs_alloc_delayed_item(0);
1350 if (!item) {
1351 ret = -ENOMEM;
1352 goto end;
1353 }
1354
1355 item->key = item_key;
1356
1357 ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
1358 /*
1359 * we have reserved enough space when we start a new transaction,
1360 * so reserving metadata failure is impossible.
1361 */
1362 BUG_ON(ret);
1363
1364 mutex_lock(&node->mutex);
1365 ret = __btrfs_add_delayed_deletion_item(node, item);
1366 if (unlikely(ret)) {
1367 printk(KERN_ERR "err add delayed dir index item(index: %llu) "
1368 "into the deletion tree of the delayed node"
1369 "(root id: %llu, inode id: %llu, errno: %d)\n",
1370 (unsigned long long)index,
1371 (unsigned long long)node->root->objectid,
1372 (unsigned long long)node->inode_id,
1373 ret);
1374 BUG();
1375 }
1376 mutex_unlock(&node->mutex);
1377end:
1378 btrfs_release_delayed_node(node);
1379 return ret;
1380}
1381
1382int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1383{
1384 struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node;
1385 int ret = 0;
1386
1387 if (!delayed_node)
1388 return -ENOENT;
1389
1390 /*
1391 * Since we have held i_mutex of this directory, it is impossible that
1392 * a new directory index is added into the delayed node and index_cnt
1393 * is updated now. So we needn't lock the delayed node.
1394 */
1395 if (!delayed_node->index_cnt)
1396 return -EINVAL;
1397
1398 BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
1399 return ret;
1400}
1401
1402void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
1403 struct list_head *del_list)
1404{
1405 struct btrfs_delayed_node *delayed_node;
1406 struct btrfs_delayed_item *item;
1407
1408 delayed_node = btrfs_get_delayed_node(inode);
1409 if (!delayed_node)
1410 return;
1411
1412 mutex_lock(&delayed_node->mutex);
1413 item = __btrfs_first_delayed_insertion_item(delayed_node);
1414 while (item) {
1415 atomic_inc(&item->refs);
1416 list_add_tail(&item->readdir_list, ins_list);
1417 item = __btrfs_next_delayed_item(item);
1418 }
1419
1420 item = __btrfs_first_delayed_deletion_item(delayed_node);
1421 while (item) {
1422 atomic_inc(&item->refs);
1423 list_add_tail(&item->readdir_list, del_list);
1424 item = __btrfs_next_delayed_item(item);
1425 }
1426 mutex_unlock(&delayed_node->mutex);
1427 /*
1428 * This delayed node is still cached in the btrfs inode, so refs
1429 * must be > 1 now, and we needn't check it is going to be freed
1430 * or not.
1431 *
1432 * Besides that, this function is used to read dir, we do not
1433 * insert/delete delayed items in this period. So we also needn't
1434 * requeue or dequeue this delayed node.
1435 */
1436 atomic_dec(&delayed_node->refs);
1437}
1438
1439void btrfs_put_delayed_items(struct list_head *ins_list,
1440 struct list_head *del_list)
1441{
1442 struct btrfs_delayed_item *curr, *next;
1443
1444 list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1445 list_del(&curr->readdir_list);
1446 if (atomic_dec_and_test(&curr->refs))
1447 kfree(curr);
1448 }
1449
1450 list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1451 list_del(&curr->readdir_list);
1452 if (atomic_dec_and_test(&curr->refs))
1453 kfree(curr);
1454 }
1455}
1456
1457int btrfs_should_delete_dir_index(struct list_head *del_list,
1458 u64 index)
1459{
1460 struct btrfs_delayed_item *curr, *next;
1461 int ret;
1462
1463 if (list_empty(del_list))
1464 return 0;
1465
1466 list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1467 if (curr->key.offset > index)
1468 break;
1469
1470 list_del(&curr->readdir_list);
1471 ret = (curr->key.offset == index);
1472
1473 if (atomic_dec_and_test(&curr->refs))
1474 kfree(curr);
1475
1476 if (ret)
1477 return 1;
1478 else
1479 continue;
1480 }
1481 return 0;
1482}
1483
1484/*
1485 * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1486 *
1487 */
1488int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
1489 filldir_t filldir,
1490 struct list_head *ins_list)
1491{
1492 struct btrfs_dir_item *di;
1493 struct btrfs_delayed_item *curr, *next;
1494 struct btrfs_key location;
1495 char *name;
1496 int name_len;
1497 int over = 0;
1498 unsigned char d_type;
1499
1500 if (list_empty(ins_list))
1501 return 0;
1502
1503 /*
1504 * Changing the data of the delayed item is impossible. So
1505 * we needn't lock them. And we have held i_mutex of the
1506 * directory, nobody can delete any directory indexes now.
1507 */
1508 list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1509 list_del(&curr->readdir_list);
1510
1511 if (curr->key.offset < filp->f_pos) {
1512 if (atomic_dec_and_test(&curr->refs))
1513 kfree(curr);
1514 continue;
1515 }
1516
1517 filp->f_pos = curr->key.offset;
1518
1519 di = (struct btrfs_dir_item *)curr->data;
1520 name = (char *)(di + 1);
1521 name_len = le16_to_cpu(di->name_len);
1522
1523 d_type = btrfs_filetype_table[di->type];
1524 btrfs_disk_key_to_cpu(&location, &di->location);
1525
1526 over = filldir(dirent, name, name_len, curr->key.offset,
1527 location.objectid, d_type);
1528
1529 if (atomic_dec_and_test(&curr->refs))
1530 kfree(curr);
1531
1532 if (over)
1533 return 1;
1534 }
1535 return 0;
1536}
1537
1538BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
1539 generation, 64);
1540BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
1541 sequence, 64);
1542BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
1543 transid, 64);
1544BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
1545BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
1546 nbytes, 64);
1547BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
1548 block_group, 64);
1549BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
1550BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
1551BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
1552BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
1553BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
1554BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
1555
1556BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
1557BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
1558
1559static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1560 struct btrfs_inode_item *inode_item,
1561 struct inode *inode)
1562{
1563 btrfs_set_stack_inode_uid(inode_item, inode->i_uid);
1564 btrfs_set_stack_inode_gid(inode_item, inode->i_gid);
1565 btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1566 btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1567 btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1568 btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1569 btrfs_set_stack_inode_generation(inode_item,
1570 BTRFS_I(inode)->generation);
1571 btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence);
1572 btrfs_set_stack_inode_transid(inode_item, trans->transid);
1573 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1574 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1575 btrfs_set_stack_inode_block_group(inode_item, 0);
1576
1577 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1578 inode->i_atime.tv_sec);
1579 btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
1580 inode->i_atime.tv_nsec);
1581
1582 btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
1583 inode->i_mtime.tv_sec);
1584 btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
1585 inode->i_mtime.tv_nsec);
1586
1587 btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
1588 inode->i_ctime.tv_sec);
1589 btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
1590 inode->i_ctime.tv_nsec);
1591}
1592
1593int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1594 struct btrfs_root *root, struct inode *inode)
1595{
1596 struct btrfs_delayed_node *delayed_node;
1597 int ret;
1598
1599 delayed_node = btrfs_get_or_create_delayed_node(inode);
1600 if (IS_ERR(delayed_node))
1601 return PTR_ERR(delayed_node);
1602
1603 mutex_lock(&delayed_node->mutex);
1604 if (delayed_node->inode_dirty) {
1605 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1606 goto release_node;
1607 }
1608
1609 ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node);
1610 /*
1611 * we must reserve enough space when we start a new transaction,
1612 * so reserving metadata failure is impossible
1613 */
1614 BUG_ON(ret);
1615
1616 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1617 delayed_node->inode_dirty = 1;
1618 delayed_node->count++;
1619 atomic_inc(&root->fs_info->delayed_root->items);
1620release_node:
1621 mutex_unlock(&delayed_node->mutex);
1622 btrfs_release_delayed_node(delayed_node);
1623 return ret;
1624}
1625
1626static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1627{
1628 struct btrfs_root *root = delayed_node->root;
1629 struct btrfs_delayed_item *curr_item, *prev_item;
1630
1631 mutex_lock(&delayed_node->mutex);
1632 curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1633 while (curr_item) {
1634 btrfs_delayed_item_release_metadata(root, curr_item);
1635 prev_item = curr_item;
1636 curr_item = __btrfs_next_delayed_item(prev_item);
1637 btrfs_release_delayed_item(prev_item);
1638 }
1639
1640 curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1641 while (curr_item) {
1642 btrfs_delayed_item_release_metadata(root, curr_item);
1643 prev_item = curr_item;
1644 curr_item = __btrfs_next_delayed_item(prev_item);
1645 btrfs_release_delayed_item(prev_item);
1646 }
1647
1648 if (delayed_node->inode_dirty) {
1649 btrfs_delayed_inode_release_metadata(root, delayed_node);
1650 btrfs_release_delayed_inode(delayed_node);
1651 }
1652 mutex_unlock(&delayed_node->mutex);
1653}
1654
1655void btrfs_kill_delayed_inode_items(struct inode *inode)
1656{
1657 struct btrfs_delayed_node *delayed_node;
1658
1659 delayed_node = btrfs_get_delayed_node(inode);
1660 if (!delayed_node)
1661 return;
1662
1663 __btrfs_kill_delayed_node(delayed_node);
1664 btrfs_release_delayed_node(delayed_node);
1665}
1666
1667void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1668{
1669 u64 inode_id = 0;
1670 struct btrfs_delayed_node *delayed_nodes[8];
1671 int i, n;
1672
1673 while (1) {
1674 spin_lock(&root->inode_lock);
1675 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1676 (void **)delayed_nodes, inode_id,
1677 ARRAY_SIZE(delayed_nodes));
1678 if (!n) {
1679 spin_unlock(&root->inode_lock);
1680 break;
1681 }
1682
1683 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1684
1685 for (i = 0; i < n; i++)
1686 atomic_inc(&delayed_nodes[i]->refs);
1687 spin_unlock(&root->inode_lock);
1688
1689 for (i = 0; i < n; i++) {
1690 __btrfs_kill_delayed_node(delayed_nodes[i]);
1691 btrfs_release_delayed_node(delayed_nodes[i]);
1692 }
1693 }
1694}
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
new file mode 100644
index 000000000000..eb7d240aa648
--- /dev/null
+++ b/fs/btrfs/delayed-inode.h
@@ -0,0 +1,141 @@
1/*
2 * Copyright (C) 2011 Fujitsu. All rights reserved.
3 * Written by Miao Xie <miaox@cn.fujitsu.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
18 */
19
20#ifndef __DELAYED_TREE_OPERATION_H
21#define __DELAYED_TREE_OPERATION_H
22
23#include <linux/rbtree.h>
24#include <linux/spinlock.h>
25#include <linux/mutex.h>
26#include <linux/list.h>
27#include <linux/wait.h>
28#include <asm/atomic.h>
29
30#include "ctree.h"
31
32/* types of the delayed item */
33#define BTRFS_DELAYED_INSERTION_ITEM 1
34#define BTRFS_DELAYED_DELETION_ITEM 2
35
36struct btrfs_delayed_root {
37 spinlock_t lock;
38 struct list_head node_list;
39 /*
40 * Used for delayed nodes which is waiting to be dealt with by the
41 * worker. If the delayed node is inserted into the work queue, we
42 * drop it from this list.
43 */
44 struct list_head prepare_list;
45 atomic_t items; /* for delayed items */
46 int nodes; /* for delayed nodes */
47 wait_queue_head_t wait;
48};
49
50struct btrfs_delayed_node {
51 u64 inode_id;
52 u64 bytes_reserved;
53 struct btrfs_root *root;
54 /* Used to add the node into the delayed root's node list. */
55 struct list_head n_list;
56 /*
57 * Used to add the node into the prepare list, the nodes in this list
58 * is waiting to be dealt with by the async worker.
59 */
60 struct list_head p_list;
61 struct rb_root ins_root;
62 struct rb_root del_root;
63 struct mutex mutex;
64 struct btrfs_inode_item inode_item;
65 atomic_t refs;
66 u64 index_cnt;
67 bool in_list;
68 bool inode_dirty;
69 int count;
70};
71
72struct btrfs_delayed_item {
73 struct rb_node rb_node;
74 struct btrfs_key key;
75 struct list_head tree_list; /* used for batch insert/delete items */
76 struct list_head readdir_list; /* used for readdir items */
77 u64 bytes_reserved;
78 struct btrfs_block_rsv *block_rsv;
79 struct btrfs_delayed_node *delayed_node;
80 atomic_t refs;
81 int ins_or_del;
82 u32 data_len;
83 char data[0];
84};
85
86static inline void btrfs_init_delayed_root(
87 struct btrfs_delayed_root *delayed_root)
88{
89 atomic_set(&delayed_root->items, 0);
90 delayed_root->nodes = 0;
91 spin_lock_init(&delayed_root->lock);
92 init_waitqueue_head(&delayed_root->wait);
93 INIT_LIST_HEAD(&delayed_root->node_list);
94 INIT_LIST_HEAD(&delayed_root->prepare_list);
95}
96
97int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
98 struct btrfs_root *root, const char *name,
99 int name_len, struct inode *dir,
100 struct btrfs_disk_key *disk_key, u8 type,
101 u64 index);
102
103int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
104 struct btrfs_root *root, struct inode *dir,
105 u64 index);
106
107int btrfs_inode_delayed_dir_index_count(struct inode *inode);
108
109int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
110 struct btrfs_root *root);
111
112void btrfs_balance_delayed_items(struct btrfs_root *root);
113
114int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
115 struct inode *inode);
116/* Used for evicting the inode. */
117void btrfs_remove_delayed_node(struct inode *inode);
118void btrfs_kill_delayed_inode_items(struct inode *inode);
119
120
121int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
122 struct btrfs_root *root, struct inode *inode);
123
124/* Used for drop dead root */
125void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
126
127/* Used for readdir() */
128void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
129 struct list_head *del_list);
130void btrfs_put_delayed_items(struct list_head *ins_list,
131 struct list_head *del_list);
132int btrfs_should_delete_dir_index(struct list_head *del_list,
133 u64 index);
134int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
135 filldir_t filldir,
136 struct list_head *ins_list);
137
138/* for init */
139int __init btrfs_delayed_inode_init(void);
140void btrfs_delayed_inode_exit(void);
141#endif
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index bce28f653899..125cf76fcd08 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -281,44 +281,6 @@ again:
281} 281}
282 282
283/* 283/*
284 * This checks to see if there are any delayed refs in the
285 * btree for a given bytenr. It returns one if it finds any
286 * and zero otherwise.
287 *
288 * If it only finds a head node, it returns 0.
289 *
290 * The idea is to use this when deciding if you can safely delete an
291 * extent from the extent allocation tree. There may be a pending
292 * ref in the rbtree that adds or removes references, so as long as this
293 * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent
294 * allocation tree.
295 */
296int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
297{
298 struct btrfs_delayed_ref_node *ref;
299 struct btrfs_delayed_ref_root *delayed_refs;
300 struct rb_node *prev_node;
301 int ret = 0;
302
303 delayed_refs = &trans->transaction->delayed_refs;
304 spin_lock(&delayed_refs->lock);
305
306 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
307 if (ref) {
308 prev_node = rb_prev(&ref->rb_node);
309 if (!prev_node)
310 goto out;
311 ref = rb_entry(prev_node, struct btrfs_delayed_ref_node,
312 rb_node);
313 if (ref->bytenr == bytenr)
314 ret = 1;
315 }
316out:
317 spin_unlock(&delayed_refs->lock);
318 return ret;
319}
320
321/*
322 * helper function to update an extent delayed ref in the 284 * helper function to update an extent delayed ref in the
323 * rbtree. existing and update must both have the same 285 * rbtree. existing and update must both have the same
324 * bytenr and parent 286 * bytenr and parent
@@ -747,79 +709,3 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
747 return btrfs_delayed_node_to_head(ref); 709 return btrfs_delayed_node_to_head(ref);
748 return NULL; 710 return NULL;
749} 711}
750
751/*
752 * add a delayed ref to the tree. This does all of the accounting required
753 * to make sure the delayed ref is eventually processed before this
754 * transaction commits.
755 *
756 * The main point of this call is to add and remove a backreference in a single
757 * shot, taking the lock only once, and only searching for the head node once.
758 *
759 * It is the same as doing a ref add and delete in two separate calls.
760 */
761#if 0
762int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
763 u64 bytenr, u64 num_bytes, u64 orig_parent,
764 u64 parent, u64 orig_ref_root, u64 ref_root,
765 u64 orig_ref_generation, u64 ref_generation,
766 u64 owner_objectid, int pin)
767{
768 struct btrfs_delayed_ref *ref;
769 struct btrfs_delayed_ref *old_ref;
770 struct btrfs_delayed_ref_head *head_ref;
771 struct btrfs_delayed_ref_root *delayed_refs;
772 int ret;
773
774 ref = kmalloc(sizeof(*ref), GFP_NOFS);
775 if (!ref)
776 return -ENOMEM;
777
778 old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS);
779 if (!old_ref) {
780 kfree(ref);
781 return -ENOMEM;
782 }
783
784 /*
785 * the parent = 0 case comes from cases where we don't actually
786 * know the parent yet. It will get updated later via a add/drop
787 * pair.
788 */
789 if (parent == 0)
790 parent = bytenr;
791 if (orig_parent == 0)
792 orig_parent = bytenr;
793
794 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
795 if (!head_ref) {
796 kfree(ref);
797 kfree(old_ref);
798 return -ENOMEM;
799 }
800 delayed_refs = &trans->transaction->delayed_refs;
801 spin_lock(&delayed_refs->lock);
802
803 /*
804 * insert both the head node and the new ref without dropping
805 * the spin lock
806 */
807 ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
808 (u64)-1, 0, 0, 0,
809 BTRFS_UPDATE_DELAYED_HEAD, 0);
810 BUG_ON(ret);
811
812 ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
813 parent, ref_root, ref_generation,
814 owner_objectid, BTRFS_ADD_DELAYED_REF, 0);
815 BUG_ON(ret);
816
817 ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes,
818 orig_parent, orig_ref_root,
819 orig_ref_generation, owner_objectid,
820 BTRFS_DROP_DELAYED_REF, pin);
821 BUG_ON(ret);
822 spin_unlock(&delayed_refs->lock);
823 return 0;
824}
825#endif
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 50e3cf92fbda..e287e3b0eab0 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -166,12 +166,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
166 166
167struct btrfs_delayed_ref_head * 167struct btrfs_delayed_ref_head *
168btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); 168btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
169int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
170int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
171 u64 bytenr, u64 num_bytes, u64 orig_parent,
172 u64 parent, u64 orig_ref_root, u64 ref_root,
173 u64 orig_ref_generation, u64 ref_generation,
174 u64 owner_objectid, int pin);
175int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, 169int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
176 struct btrfs_delayed_ref_head *head); 170 struct btrfs_delayed_ref_head *head);
177int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 171int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index c62f02f6ae69..685f2593c4f0 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -50,7 +50,6 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
50 if (di) 50 if (di)
51 return ERR_PTR(-EEXIST); 51 return ERR_PTR(-EEXIST);
52 ret = btrfs_extend_item(trans, root, path, data_size); 52 ret = btrfs_extend_item(trans, root, path, data_size);
53 WARN_ON(ret > 0);
54 } 53 }
55 if (ret < 0) 54 if (ret < 0)
56 return ERR_PTR(ret); 55 return ERR_PTR(ret);
@@ -124,8 +123,9 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
124 * to use for the second index (if one is created). 123 * to use for the second index (if one is created).
125 */ 124 */
126int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root 125int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
127 *root, const char *name, int name_len, u64 dir, 126 *root, const char *name, int name_len,
128 struct btrfs_key *location, u8 type, u64 index) 127 struct inode *dir, struct btrfs_key *location,
128 u8 type, u64 index)
129{ 129{
130 int ret = 0; 130 int ret = 0;
131 int ret2 = 0; 131 int ret2 = 0;
@@ -137,13 +137,17 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
137 struct btrfs_disk_key disk_key; 137 struct btrfs_disk_key disk_key;
138 u32 data_size; 138 u32 data_size;
139 139
140 key.objectid = dir; 140 key.objectid = btrfs_ino(dir);
141 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); 141 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
142 key.offset = btrfs_name_hash(name, name_len); 142 key.offset = btrfs_name_hash(name, name_len);
143 143
144 path = btrfs_alloc_path(); 144 path = btrfs_alloc_path();
145 if (!path)
146 return -ENOMEM;
145 path->leave_spinning = 1; 147 path->leave_spinning = 1;
146 148
149 btrfs_cpu_key_to_disk(&disk_key, location);
150
147 data_size = sizeof(*dir_item) + name_len; 151 data_size = sizeof(*dir_item) + name_len;
148 dir_item = insert_with_overflow(trans, root, path, &key, data_size, 152 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
149 name, name_len); 153 name, name_len);
@@ -155,7 +159,6 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
155 } 159 }
156 160
157 leaf = path->nodes[0]; 161 leaf = path->nodes[0];
158 btrfs_cpu_key_to_disk(&disk_key, location);
159 btrfs_set_dir_item_key(leaf, dir_item, &disk_key); 162 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
160 btrfs_set_dir_type(leaf, dir_item, type); 163 btrfs_set_dir_type(leaf, dir_item, type);
161 btrfs_set_dir_data_len(leaf, dir_item, 0); 164 btrfs_set_dir_data_len(leaf, dir_item, 0);
@@ -172,29 +175,11 @@ second_insert:
172 ret = 0; 175 ret = 0;
173 goto out_free; 176 goto out_free;
174 } 177 }
175 btrfs_release_path(root, path); 178 btrfs_release_path(path);
176
177 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
178 key.offset = index;
179 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
180 name, name_len);
181 if (IS_ERR(dir_item)) {
182 ret2 = PTR_ERR(dir_item);
183 goto out_free;
184 }
185 leaf = path->nodes[0];
186 btrfs_cpu_key_to_disk(&disk_key, location);
187 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
188 btrfs_set_dir_type(leaf, dir_item, type);
189 btrfs_set_dir_data_len(leaf, dir_item, 0);
190 btrfs_set_dir_name_len(leaf, dir_item, name_len);
191 btrfs_set_dir_transid(leaf, dir_item, trans->transid);
192 name_ptr = (unsigned long)(dir_item + 1);
193 write_extent_buffer(leaf, name, name_ptr, name_len);
194 btrfs_mark_buffer_dirty(leaf);
195 179
180 ret2 = btrfs_insert_delayed_dir_index(trans, root, name, name_len, dir,
181 &disk_key, type, index);
196out_free: 182out_free:
197
198 btrfs_free_path(path); 183 btrfs_free_path(path);
199 if (ret) 184 if (ret)
200 return ret; 185 return ret;
@@ -452,7 +437,7 @@ int verify_dir_item(struct btrfs_root *root,
452 namelen = XATTR_NAME_MAX; 437 namelen = XATTR_NAME_MAX;
453 438
454 if (btrfs_dir_name_len(leaf, dir_item) > namelen) { 439 if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
455 printk(KERN_CRIT "btrfS: invalid dir item name len: %u\n", 440 printk(KERN_CRIT "btrfs: invalid dir item name len: %u\n",
456 (unsigned)btrfs_dir_data_len(leaf, dir_item)); 441 (unsigned)btrfs_dir_data_len(leaf, dir_item));
457 return 1; 442 return 1;
458 } 443 }
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 93ef254ec432..a203d363184d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -29,6 +29,7 @@
29#include <linux/crc32c.h> 29#include <linux/crc32c.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/migrate.h> 31#include <linux/migrate.h>
32#include <linux/ratelimit.h>
32#include <asm/unaligned.h> 33#include <asm/unaligned.h>
33#include "compat.h" 34#include "compat.h"
34#include "ctree.h" 35#include "ctree.h"
@@ -41,6 +42,7 @@
41#include "locking.h" 42#include "locking.h"
42#include "tree-log.h" 43#include "tree-log.h"
43#include "free-space-cache.h" 44#include "free-space-cache.h"
45#include "inode-map.h"
44 46
45static struct extent_io_ops btree_extent_io_ops; 47static struct extent_io_ops btree_extent_io_ops;
46static void end_workqueue_fn(struct btrfs_work *work); 48static void end_workqueue_fn(struct btrfs_work *work);
@@ -137,7 +139,7 @@ static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
137 * that covers the entire device 139 * that covers the entire device
138 */ 140 */
139static struct extent_map *btree_get_extent(struct inode *inode, 141static struct extent_map *btree_get_extent(struct inode *inode,
140 struct page *page, size_t page_offset, u64 start, u64 len, 142 struct page *page, size_t pg_offset, u64 start, u64 len,
141 int create) 143 int create)
142{ 144{
143 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 145 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
@@ -154,7 +156,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
154 } 156 }
155 read_unlock(&em_tree->lock); 157 read_unlock(&em_tree->lock);
156 158
157 em = alloc_extent_map(GFP_NOFS); 159 em = alloc_extent_map();
158 if (!em) { 160 if (!em) {
159 em = ERR_PTR(-ENOMEM); 161 em = ERR_PTR(-ENOMEM);
160 goto out; 162 goto out;
@@ -254,14 +256,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
254 memcpy(&found, result, csum_size); 256 memcpy(&found, result, csum_size);
255 257
256 read_extent_buffer(buf, &val, 0, csum_size); 258 read_extent_buffer(buf, &val, 0, csum_size);
257 if (printk_ratelimit()) { 259 printk_ratelimited(KERN_INFO "btrfs: %s checksum verify "
258 printk(KERN_INFO "btrfs: %s checksum verify "
259 "failed on %llu wanted %X found %X " 260 "failed on %llu wanted %X found %X "
260 "level %d\n", 261 "level %d\n",
261 root->fs_info->sb->s_id, 262 root->fs_info->sb->s_id,
262 (unsigned long long)buf->start, val, found, 263 (unsigned long long)buf->start, val, found,
263 btrfs_header_level(buf)); 264 btrfs_header_level(buf));
264 }
265 if (result != (char *)&inline_result) 265 if (result != (char *)&inline_result)
266 kfree(result); 266 kfree(result);
267 return 1; 267 return 1;
@@ -296,13 +296,11 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
296 ret = 0; 296 ret = 0;
297 goto out; 297 goto out;
298 } 298 }
299 if (printk_ratelimit()) { 299 printk_ratelimited("parent transid verify failed on %llu wanted %llu "
300 printk("parent transid verify failed on %llu wanted %llu "
301 "found %llu\n", 300 "found %llu\n",
302 (unsigned long long)eb->start, 301 (unsigned long long)eb->start,
303 (unsigned long long)parent_transid, 302 (unsigned long long)parent_transid,
304 (unsigned long long)btrfs_header_generation(eb)); 303 (unsigned long long)btrfs_header_generation(eb));
305 }
306 ret = 1; 304 ret = 1;
307 clear_extent_buffer_uptodate(io_tree, eb, &cached_state); 305 clear_extent_buffer_uptodate(io_tree, eb, &cached_state);
308out: 306out:
@@ -380,7 +378,7 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
380 len = page->private >> 2; 378 len = page->private >> 2;
381 WARN_ON(len == 0); 379 WARN_ON(len == 0);
382 380
383 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); 381 eb = alloc_extent_buffer(tree, start, len, page);
384 if (eb == NULL) { 382 if (eb == NULL) {
385 WARN_ON(1); 383 WARN_ON(1);
386 goto out; 384 goto out;
@@ -525,7 +523,7 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
525 len = page->private >> 2; 523 len = page->private >> 2;
526 WARN_ON(len == 0); 524 WARN_ON(len == 0);
527 525
528 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); 526 eb = alloc_extent_buffer(tree, start, len, page);
529 if (eb == NULL) { 527 if (eb == NULL) {
530 ret = -EIO; 528 ret = -EIO;
531 goto out; 529 goto out;
@@ -533,12 +531,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
533 531
534 found_start = btrfs_header_bytenr(eb); 532 found_start = btrfs_header_bytenr(eb);
535 if (found_start != start) { 533 if (found_start != start) {
536 if (printk_ratelimit()) { 534 printk_ratelimited(KERN_INFO "btrfs bad tree block start "
537 printk(KERN_INFO "btrfs bad tree block start "
538 "%llu %llu\n", 535 "%llu %llu\n",
539 (unsigned long long)found_start, 536 (unsigned long long)found_start,
540 (unsigned long long)eb->start); 537 (unsigned long long)eb->start);
541 }
542 ret = -EIO; 538 ret = -EIO;
543 goto err; 539 goto err;
544 } 540 }
@@ -550,10 +546,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
550 goto err; 546 goto err;
551 } 547 }
552 if (check_tree_block_fsid(root, eb)) { 548 if (check_tree_block_fsid(root, eb)) {
553 if (printk_ratelimit()) { 549 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
554 printk(KERN_INFO "btrfs bad fsid on block %llu\n",
555 (unsigned long long)eb->start); 550 (unsigned long long)eb->start);
556 }
557 ret = -EIO; 551 ret = -EIO;
558 goto err; 552 goto err;
559 } 553 }
@@ -650,12 +644,6 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
650 return 256 * limit; 644 return 256 * limit;
651} 645}
652 646
653int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
654{
655 return atomic_read(&info->nr_async_bios) >
656 btrfs_async_submit_limit(info);
657}
658
659static void run_one_async_start(struct btrfs_work *work) 647static void run_one_async_start(struct btrfs_work *work)
660{ 648{
661 struct async_submit_bio *async; 649 struct async_submit_bio *async;
@@ -963,7 +951,7 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
963 struct inode *btree_inode = root->fs_info->btree_inode; 951 struct inode *btree_inode = root->fs_info->btree_inode;
964 struct extent_buffer *eb; 952 struct extent_buffer *eb;
965 eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 953 eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
966 bytenr, blocksize, GFP_NOFS); 954 bytenr, blocksize);
967 return eb; 955 return eb;
968} 956}
969 957
@@ -974,7 +962,7 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
974 struct extent_buffer *eb; 962 struct extent_buffer *eb;
975 963
976 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 964 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
977 bytenr, blocksize, NULL, GFP_NOFS); 965 bytenr, blocksize, NULL);
978 return eb; 966 return eb;
979} 967}
980 968
@@ -1058,13 +1046,13 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1058 root->name = NULL; 1046 root->name = NULL;
1059 root->in_sysfs = 0; 1047 root->in_sysfs = 0;
1060 root->inode_tree = RB_ROOT; 1048 root->inode_tree = RB_ROOT;
1049 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
1061 root->block_rsv = NULL; 1050 root->block_rsv = NULL;
1062 root->orphan_block_rsv = NULL; 1051 root->orphan_block_rsv = NULL;
1063 1052
1064 INIT_LIST_HEAD(&root->dirty_list); 1053 INIT_LIST_HEAD(&root->dirty_list);
1065 INIT_LIST_HEAD(&root->orphan_list); 1054 INIT_LIST_HEAD(&root->orphan_list);
1066 INIT_LIST_HEAD(&root->root_list); 1055 INIT_LIST_HEAD(&root->root_list);
1067 spin_lock_init(&root->node_lock);
1068 spin_lock_init(&root->orphan_lock); 1056 spin_lock_init(&root->orphan_lock);
1069 spin_lock_init(&root->inode_lock); 1057 spin_lock_init(&root->inode_lock);
1070 spin_lock_init(&root->accounting_lock); 1058 spin_lock_init(&root->accounting_lock);
@@ -1080,7 +1068,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1080 root->log_transid = 0; 1068 root->log_transid = 0;
1081 root->last_log_commit = 0; 1069 root->last_log_commit = 0;
1082 extent_io_tree_init(&root->dirty_log_pages, 1070 extent_io_tree_init(&root->dirty_log_pages,
1083 fs_info->btree_inode->i_mapping, GFP_NOFS); 1071 fs_info->btree_inode->i_mapping);
1084 1072
1085 memset(&root->root_key, 0, sizeof(root->root_key)); 1073 memset(&root->root_key, 0, sizeof(root->root_key));
1086 memset(&root->root_item, 0, sizeof(root->root_item)); 1074 memset(&root->root_item, 0, sizeof(root->root_item));
@@ -1283,21 +1271,6 @@ out:
1283 return root; 1271 return root;
1284} 1272}
1285 1273
1286struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
1287 u64 root_objectid)
1288{
1289 struct btrfs_root *root;
1290
1291 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
1292 return fs_info->tree_root;
1293 if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
1294 return fs_info->extent_root;
1295
1296 root = radix_tree_lookup(&fs_info->fs_roots_radix,
1297 (unsigned long)root_objectid);
1298 return root;
1299}
1300
1301struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, 1274struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1302 struct btrfs_key *location) 1275 struct btrfs_key *location)
1303{ 1276{
@@ -1326,6 +1299,19 @@ again:
1326 if (IS_ERR(root)) 1299 if (IS_ERR(root))
1327 return root; 1300 return root;
1328 1301
1302 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
1303 if (!root->free_ino_ctl)
1304 goto fail;
1305 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
1306 GFP_NOFS);
1307 if (!root->free_ino_pinned)
1308 goto fail;
1309
1310 btrfs_init_free_ino_ctl(root);
1311 mutex_init(&root->fs_commit_mutex);
1312 spin_lock_init(&root->cache_lock);
1313 init_waitqueue_head(&root->cache_wait);
1314
1329 set_anon_super(&root->anon_super, NULL); 1315 set_anon_super(&root->anon_super, NULL);
1330 1316
1331 if (btrfs_root_refs(&root->root_item) == 0) { 1317 if (btrfs_root_refs(&root->root_item) == 0) {
@@ -1369,41 +1355,6 @@ fail:
1369 return ERR_PTR(ret); 1355 return ERR_PTR(ret);
1370} 1356}
1371 1357
1372struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
1373 struct btrfs_key *location,
1374 const char *name, int namelen)
1375{
1376 return btrfs_read_fs_root_no_name(fs_info, location);
1377#if 0
1378 struct btrfs_root *root;
1379 int ret;
1380
1381 root = btrfs_read_fs_root_no_name(fs_info, location);
1382 if (!root)
1383 return NULL;
1384
1385 if (root->in_sysfs)
1386 return root;
1387
1388 ret = btrfs_set_root_name(root, name, namelen);
1389 if (ret) {
1390 free_extent_buffer(root->node);
1391 kfree(root);
1392 return ERR_PTR(ret);
1393 }
1394
1395 ret = btrfs_sysfs_add_root(root);
1396 if (ret) {
1397 free_extent_buffer(root->node);
1398 kfree(root->name);
1399 kfree(root);
1400 return ERR_PTR(ret);
1401 }
1402 root->in_sysfs = 1;
1403 return root;
1404#endif
1405}
1406
1407static int btrfs_congested_fn(void *congested_data, int bdi_bits) 1358static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1408{ 1359{
1409 struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; 1360 struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
@@ -1411,7 +1362,8 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1411 struct btrfs_device *device; 1362 struct btrfs_device *device;
1412 struct backing_dev_info *bdi; 1363 struct backing_dev_info *bdi;
1413 1364
1414 list_for_each_entry(device, &info->fs_devices->devices, dev_list) { 1365 rcu_read_lock();
1366 list_for_each_entry_rcu(device, &info->fs_devices->devices, dev_list) {
1415 if (!device->bdev) 1367 if (!device->bdev)
1416 continue; 1368 continue;
1417 bdi = blk_get_backing_dev_info(device->bdev); 1369 bdi = blk_get_backing_dev_info(device->bdev);
@@ -1420,6 +1372,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1420 break; 1372 break;
1421 } 1373 }
1422 } 1374 }
1375 rcu_read_unlock();
1423 return ret; 1376 return ret;
1424} 1377}
1425 1378
@@ -1522,6 +1475,7 @@ static int cleaner_kthread(void *arg)
1522 btrfs_run_delayed_iputs(root); 1475 btrfs_run_delayed_iputs(root);
1523 btrfs_clean_old_snapshots(root); 1476 btrfs_clean_old_snapshots(root);
1524 mutex_unlock(&root->fs_info->cleaner_mutex); 1477 mutex_unlock(&root->fs_info->cleaner_mutex);
1478 btrfs_run_defrag_inodes(root->fs_info);
1525 } 1479 }
1526 1480
1527 if (freezing(current)) { 1481 if (freezing(current)) {
@@ -1611,7 +1565,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1611 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), 1565 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
1612 GFP_NOFS); 1566 GFP_NOFS);
1613 struct btrfs_root *tree_root = btrfs_sb(sb); 1567 struct btrfs_root *tree_root = btrfs_sb(sb);
1614 struct btrfs_fs_info *fs_info = tree_root->fs_info; 1568 struct btrfs_fs_info *fs_info = NULL;
1615 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), 1569 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
1616 GFP_NOFS); 1570 GFP_NOFS);
1617 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), 1571 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
@@ -1623,11 +1577,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1623 1577
1624 struct btrfs_super_block *disk_super; 1578 struct btrfs_super_block *disk_super;
1625 1579
1626 if (!extent_root || !tree_root || !fs_info || 1580 if (!extent_root || !tree_root || !tree_root->fs_info ||
1627 !chunk_root || !dev_root || !csum_root) { 1581 !chunk_root || !dev_root || !csum_root) {
1628 err = -ENOMEM; 1582 err = -ENOMEM;
1629 goto fail; 1583 goto fail;
1630 } 1584 }
1585 fs_info = tree_root->fs_info;
1631 1586
1632 ret = init_srcu_struct(&fs_info->subvol_srcu); 1587 ret = init_srcu_struct(&fs_info->subvol_srcu);
1633 if (ret) { 1588 if (ret) {
@@ -1662,6 +1617,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1662 spin_lock_init(&fs_info->ref_cache_lock); 1617 spin_lock_init(&fs_info->ref_cache_lock);
1663 spin_lock_init(&fs_info->fs_roots_radix_lock); 1618 spin_lock_init(&fs_info->fs_roots_radix_lock);
1664 spin_lock_init(&fs_info->delayed_iput_lock); 1619 spin_lock_init(&fs_info->delayed_iput_lock);
1620 spin_lock_init(&fs_info->defrag_inodes_lock);
1665 1621
1666 init_completion(&fs_info->kobj_unregister); 1622 init_completion(&fs_info->kobj_unregister);
1667 fs_info->tree_root = tree_root; 1623 fs_info->tree_root = tree_root;
@@ -1684,9 +1640,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1684 atomic_set(&fs_info->async_delalloc_pages, 0); 1640 atomic_set(&fs_info->async_delalloc_pages, 0);
1685 atomic_set(&fs_info->async_submit_draining, 0); 1641 atomic_set(&fs_info->async_submit_draining, 0);
1686 atomic_set(&fs_info->nr_async_bios, 0); 1642 atomic_set(&fs_info->nr_async_bios, 0);
1643 atomic_set(&fs_info->defrag_running, 0);
1687 fs_info->sb = sb; 1644 fs_info->sb = sb;
1688 fs_info->max_inline = 8192 * 1024; 1645 fs_info->max_inline = 8192 * 1024;
1689 fs_info->metadata_ratio = 0; 1646 fs_info->metadata_ratio = 0;
1647 fs_info->defrag_inodes = RB_ROOT;
1690 fs_info->trans_no_join = 0; 1648 fs_info->trans_no_join = 0;
1691 1649
1692 fs_info->thread_pool_size = min_t(unsigned long, 1650 fs_info->thread_pool_size = min_t(unsigned long,
@@ -1694,6 +1652,24 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1694 1652
1695 INIT_LIST_HEAD(&fs_info->ordered_extents); 1653 INIT_LIST_HEAD(&fs_info->ordered_extents);
1696 spin_lock_init(&fs_info->ordered_extent_lock); 1654 spin_lock_init(&fs_info->ordered_extent_lock);
1655 fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
1656 GFP_NOFS);
1657 if (!fs_info->delayed_root) {
1658 err = -ENOMEM;
1659 goto fail_iput;
1660 }
1661 btrfs_init_delayed_root(fs_info->delayed_root);
1662
1663 mutex_init(&fs_info->scrub_lock);
1664 atomic_set(&fs_info->scrubs_running, 0);
1665 atomic_set(&fs_info->scrub_pause_req, 0);
1666 atomic_set(&fs_info->scrubs_paused, 0);
1667 atomic_set(&fs_info->scrub_cancel_req, 0);
1668 init_waitqueue_head(&fs_info->scrub_pause_wait);
1669 init_rwsem(&fs_info->scrub_super_lock);
1670 fs_info->scrub_workers_refcnt = 0;
1671 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1672 fs_info->thread_pool_size, &fs_info->generic_worker);
1697 1673
1698 sb->s_blocksize = 4096; 1674 sb->s_blocksize = 4096;
1699 sb->s_blocksize_bits = blksize_bits(4096); 1675 sb->s_blocksize_bits = blksize_bits(4096);
@@ -1712,10 +1688,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1712 1688
1713 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); 1689 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
1714 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, 1690 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
1715 fs_info->btree_inode->i_mapping, 1691 fs_info->btree_inode->i_mapping);
1716 GFP_NOFS); 1692 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
1717 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
1718 GFP_NOFS);
1719 1693
1720 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; 1694 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
1721 1695
@@ -1729,9 +1703,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1729 fs_info->block_group_cache_tree = RB_ROOT; 1703 fs_info->block_group_cache_tree = RB_ROOT;
1730 1704
1731 extent_io_tree_init(&fs_info->freed_extents[0], 1705 extent_io_tree_init(&fs_info->freed_extents[0],
1732 fs_info->btree_inode->i_mapping, GFP_NOFS); 1706 fs_info->btree_inode->i_mapping);
1733 extent_io_tree_init(&fs_info->freed_extents[1], 1707 extent_io_tree_init(&fs_info->freed_extents[1],
1734 fs_info->btree_inode->i_mapping, GFP_NOFS); 1708 fs_info->btree_inode->i_mapping);
1735 fs_info->pinned_extents = &fs_info->freed_extents[0]; 1709 fs_info->pinned_extents = &fs_info->freed_extents[0];
1736 fs_info->do_barriers = 1; 1710 fs_info->do_barriers = 1;
1737 1711
@@ -1760,7 +1734,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1760 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 1734 bh = btrfs_read_dev_super(fs_devices->latest_bdev);
1761 if (!bh) { 1735 if (!bh) {
1762 err = -EINVAL; 1736 err = -EINVAL;
1763 goto fail_iput; 1737 goto fail_alloc;
1764 } 1738 }
1765 1739
1766 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); 1740 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy));
@@ -1772,7 +1746,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1772 1746
1773 disk_super = &fs_info->super_copy; 1747 disk_super = &fs_info->super_copy;
1774 if (!btrfs_super_root(disk_super)) 1748 if (!btrfs_super_root(disk_super))
1775 goto fail_iput; 1749 goto fail_alloc;
1776 1750
1777 /* check FS state, whether FS is broken. */ 1751 /* check FS state, whether FS is broken. */
1778 fs_info->fs_state |= btrfs_super_flags(disk_super); 1752 fs_info->fs_state |= btrfs_super_flags(disk_super);
@@ -1788,7 +1762,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1788 ret = btrfs_parse_options(tree_root, options); 1762 ret = btrfs_parse_options(tree_root, options);
1789 if (ret) { 1763 if (ret) {
1790 err = ret; 1764 err = ret;
1791 goto fail_iput; 1765 goto fail_alloc;
1792 } 1766 }
1793 1767
1794 features = btrfs_super_incompat_flags(disk_super) & 1768 features = btrfs_super_incompat_flags(disk_super) &
@@ -1798,7 +1772,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1798 "unsupported optional features (%Lx).\n", 1772 "unsupported optional features (%Lx).\n",
1799 (unsigned long long)features); 1773 (unsigned long long)features);
1800 err = -EINVAL; 1774 err = -EINVAL;
1801 goto fail_iput; 1775 goto fail_alloc;
1802 } 1776 }
1803 1777
1804 features = btrfs_super_incompat_flags(disk_super); 1778 features = btrfs_super_incompat_flags(disk_super);
@@ -1814,7 +1788,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1814 "unsupported option features (%Lx).\n", 1788 "unsupported option features (%Lx).\n",
1815 (unsigned long long)features); 1789 (unsigned long long)features);
1816 err = -EINVAL; 1790 err = -EINVAL;
1817 goto fail_iput; 1791 goto fail_alloc;
1818 } 1792 }
1819 1793
1820 btrfs_init_workers(&fs_info->generic_worker, 1794 btrfs_init_workers(&fs_info->generic_worker,
@@ -1861,6 +1835,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1861 &fs_info->generic_worker); 1835 &fs_info->generic_worker);
1862 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", 1836 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
1863 1, &fs_info->generic_worker); 1837 1, &fs_info->generic_worker);
1838 btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta",
1839 fs_info->thread_pool_size,
1840 &fs_info->generic_worker);
1864 1841
1865 /* 1842 /*
1866 * endios are largely parallel and should have a very 1843 * endios are largely parallel and should have a very
@@ -1882,6 +1859,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1882 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 1859 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
1883 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1860 btrfs_start_workers(&fs_info->endio_write_workers, 1);
1884 btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 1861 btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
1862 btrfs_start_workers(&fs_info->delayed_workers, 1);
1885 1863
1886 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1864 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1887 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1865 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2138,6 +2116,9 @@ fail_sb_buffer:
2138 btrfs_stop_workers(&fs_info->endio_write_workers); 2116 btrfs_stop_workers(&fs_info->endio_write_workers);
2139 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2117 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2140 btrfs_stop_workers(&fs_info->submit_workers); 2118 btrfs_stop_workers(&fs_info->submit_workers);
2119 btrfs_stop_workers(&fs_info->delayed_workers);
2120fail_alloc:
2121 kfree(fs_info->delayed_root);
2141fail_iput: 2122fail_iput:
2142 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2123 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2143 iput(fs_info->btree_inode); 2124 iput(fs_info->btree_inode);
@@ -2165,11 +2146,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
2165 if (uptodate) { 2146 if (uptodate) {
2166 set_buffer_uptodate(bh); 2147 set_buffer_uptodate(bh);
2167 } else { 2148 } else {
2168 if (printk_ratelimit()) { 2149 printk_ratelimited(KERN_WARNING "lost page write due to "
2169 printk(KERN_WARNING "lost page write due to "
2170 "I/O error on %s\n", 2150 "I/O error on %s\n",
2171 bdevname(bh->b_bdev, b)); 2151 bdevname(bh->b_bdev, b));
2172 }
2173 /* note, we dont' set_buffer_write_io_error because we have 2152 /* note, we dont' set_buffer_write_io_error because we have
2174 * our own ways of dealing with the IO errors 2153 * our own ways of dealing with the IO errors
2175 */ 2154 */
@@ -2333,7 +2312,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2333 2312
2334 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 2313 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2335 head = &root->fs_info->fs_devices->devices; 2314 head = &root->fs_info->fs_devices->devices;
2336 list_for_each_entry(dev, head, dev_list) { 2315 list_for_each_entry_rcu(dev, head, dev_list) {
2337 if (!dev->bdev) { 2316 if (!dev->bdev) {
2338 total_errors++; 2317 total_errors++;
2339 continue; 2318 continue;
@@ -2366,7 +2345,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2366 } 2345 }
2367 2346
2368 total_errors = 0; 2347 total_errors = 0;
2369 list_for_each_entry(dev, head, dev_list) { 2348 list_for_each_entry_rcu(dev, head, dev_list) {
2370 if (!dev->bdev) 2349 if (!dev->bdev)
2371 continue; 2350 continue;
2372 if (!dev->in_fs_metadata || !dev->writeable) 2351 if (!dev->in_fs_metadata || !dev->writeable)
@@ -2404,12 +2383,15 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2404 if (btrfs_root_refs(&root->root_item) == 0) 2383 if (btrfs_root_refs(&root->root_item) == 0)
2405 synchronize_srcu(&fs_info->subvol_srcu); 2384 synchronize_srcu(&fs_info->subvol_srcu);
2406 2385
2386 __btrfs_remove_free_space_cache(root->free_ino_pinned);
2387 __btrfs_remove_free_space_cache(root->free_ino_ctl);
2407 free_fs_root(root); 2388 free_fs_root(root);
2408 return 0; 2389 return 0;
2409} 2390}
2410 2391
2411static void free_fs_root(struct btrfs_root *root) 2392static void free_fs_root(struct btrfs_root *root)
2412{ 2393{
2394 iput(root->cache_inode);
2413 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); 2395 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
2414 if (root->anon_super.s_dev) { 2396 if (root->anon_super.s_dev) {
2415 down_write(&root->anon_super.s_umount); 2397 down_write(&root->anon_super.s_umount);
@@ -2417,6 +2399,8 @@ static void free_fs_root(struct btrfs_root *root)
2417 } 2399 }
2418 free_extent_buffer(root->node); 2400 free_extent_buffer(root->node);
2419 free_extent_buffer(root->commit_root); 2401 free_extent_buffer(root->commit_root);
2402 kfree(root->free_ino_ctl);
2403 kfree(root->free_ino_pinned);
2420 kfree(root->name); 2404 kfree(root->name);
2421 kfree(root); 2405 kfree(root);
2422} 2406}
@@ -2520,6 +2504,15 @@ int close_ctree(struct btrfs_root *root)
2520 fs_info->closing = 1; 2504 fs_info->closing = 1;
2521 smp_mb(); 2505 smp_mb();
2522 2506
2507 btrfs_scrub_cancel(root);
2508
2509 /* wait for any defraggers to finish */
2510 wait_event(fs_info->transaction_wait,
2511 (atomic_read(&fs_info->defrag_running) == 0));
2512
2513 /* clear out the rbtree of defraggable inodes */
2514 btrfs_run_defrag_inodes(root->fs_info);
2515
2523 btrfs_put_block_group_cache(fs_info); 2516 btrfs_put_block_group_cache(fs_info);
2524 2517
2525 /* 2518 /*
@@ -2578,6 +2571,7 @@ int close_ctree(struct btrfs_root *root)
2578 del_fs_roots(fs_info); 2571 del_fs_roots(fs_info);
2579 2572
2580 iput(fs_info->btree_inode); 2573 iput(fs_info->btree_inode);
2574 kfree(fs_info->delayed_root);
2581 2575
2582 btrfs_stop_workers(&fs_info->generic_worker); 2576 btrfs_stop_workers(&fs_info->generic_worker);
2583 btrfs_stop_workers(&fs_info->fixup_workers); 2577 btrfs_stop_workers(&fs_info->fixup_workers);
@@ -2589,6 +2583,7 @@ int close_ctree(struct btrfs_root *root)
2589 btrfs_stop_workers(&fs_info->endio_write_workers); 2583 btrfs_stop_workers(&fs_info->endio_write_workers);
2590 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2584 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2591 btrfs_stop_workers(&fs_info->submit_workers); 2585 btrfs_stop_workers(&fs_info->submit_workers);
2586 btrfs_stop_workers(&fs_info->delayed_workers);
2592 2587
2593 btrfs_close_devices(fs_info->fs_devices); 2588 btrfs_close_devices(fs_info->fs_devices);
2594 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2589 btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -2665,6 +2660,29 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2665 if (current->flags & PF_MEMALLOC) 2660 if (current->flags & PF_MEMALLOC)
2666 return; 2661 return;
2667 2662
2663 btrfs_balance_delayed_items(root);
2664
2665 num_dirty = root->fs_info->dirty_metadata_bytes;
2666
2667 if (num_dirty > thresh) {
2668 balance_dirty_pages_ratelimited_nr(
2669 root->fs_info->btree_inode->i_mapping, 1);
2670 }
2671 return;
2672}
2673
2674void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2675{
2676 /*
2677 * looks as though older kernels can get into trouble with
2678 * this code, they end up stuck in balance_dirty_pages forever
2679 */
2680 u64 num_dirty;
2681 unsigned long thresh = 32 * 1024 * 1024;
2682
2683 if (current->flags & PF_MEMALLOC)
2684 return;
2685
2668 num_dirty = root->fs_info->dirty_metadata_bytes; 2686 num_dirty = root->fs_info->dirty_metadata_bytes;
2669 2687
2670 if (num_dirty > thresh) { 2688 if (num_dirty > thresh) {
@@ -2697,7 +2715,7 @@ int btree_lock_page_hook(struct page *page)
2697 goto out; 2715 goto out;
2698 2716
2699 len = page->private >> 2; 2717 len = page->private >> 2;
2700 eb = find_extent_buffer(io_tree, bytenr, len, GFP_NOFS); 2718 eb = find_extent_buffer(io_tree, bytenr, len);
2701 if (!eb) 2719 if (!eb)
2702 goto out; 2720 goto out;
2703 2721
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 07b20dc2fd95..a0b610a67aae 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -55,35 +55,20 @@ int btrfs_commit_super(struct btrfs_root *root);
55int btrfs_error_commit_super(struct btrfs_root *root); 55int btrfs_error_commit_super(struct btrfs_root *root);
56struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 56struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
57 u64 bytenr, u32 blocksize); 57 u64 bytenr, u32 blocksize);
58struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
59 u64 root_objectid);
60struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
61 struct btrfs_key *location,
62 const char *name, int namelen);
63struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, 58struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
64 struct btrfs_key *location); 59 struct btrfs_key *location);
65struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, 60struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
66 struct btrfs_key *location); 61 struct btrfs_key *location);
67int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); 62int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
68int btrfs_insert_dev_radix(struct btrfs_root *root,
69 struct block_device *bdev,
70 u64 device_id,
71 u64 block_start,
72 u64 num_blocks);
73void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); 63void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
64void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
74int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); 65int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
75void btrfs_mark_buffer_dirty(struct extent_buffer *buf); 66void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
76void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf);
77int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); 67int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
78int btrfs_set_buffer_uptodate(struct extent_buffer *buf); 68int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
79int wait_on_tree_block_writeback(struct btrfs_root *root,
80 struct extent_buffer *buf);
81int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); 69int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
82u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); 70u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
83void btrfs_csum_final(u32 crc, char *result); 71void btrfs_csum_final(u32 crc, char *result);
84int btrfs_open_device(struct btrfs_device *dev);
85int btrfs_verify_block_csum(struct btrfs_root *root,
86 struct extent_buffer *buf);
87int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 72int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
88 int metadata); 73 int metadata);
89int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, 74int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
@@ -91,8 +76,6 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
91 unsigned long bio_flags, u64 bio_offset, 76 unsigned long bio_flags, u64 bio_offset,
92 extent_submit_bio_hook_t *submit_bio_start, 77 extent_submit_bio_hook_t *submit_bio_start,
93 extent_submit_bio_hook_t *submit_bio_done); 78 extent_submit_bio_hook_t *submit_bio_done);
94
95int btrfs_congested_async(struct btrfs_fs_info *info, int iodone);
96unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); 79unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
97int btrfs_write_tree_block(struct extent_buffer *buf); 80int btrfs_write_tree_block(struct extent_buffer *buf);
98int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); 81int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index b4ffad859adb..1b8dc33778f9 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -32,7 +32,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
32 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 32 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
33 type = FILEID_BTRFS_WITHOUT_PARENT; 33 type = FILEID_BTRFS_WITHOUT_PARENT;
34 34
35 fid->objectid = inode->i_ino; 35 fid->objectid = btrfs_ino(inode);
36 fid->root_objectid = BTRFS_I(inode)->root->objectid; 36 fid->root_objectid = BTRFS_I(inode)->root->objectid;
37 fid->gen = inode->i_generation; 37 fid->gen = inode->i_generation;
38 38
@@ -178,13 +178,13 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
178 if (!path) 178 if (!path)
179 return ERR_PTR(-ENOMEM); 179 return ERR_PTR(-ENOMEM);
180 180
181 if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 181 if (btrfs_ino(dir) == BTRFS_FIRST_FREE_OBJECTID) {
182 key.objectid = root->root_key.objectid; 182 key.objectid = root->root_key.objectid;
183 key.type = BTRFS_ROOT_BACKREF_KEY; 183 key.type = BTRFS_ROOT_BACKREF_KEY;
184 key.offset = (u64)-1; 184 key.offset = (u64)-1;
185 root = root->fs_info->tree_root; 185 root = root->fs_info->tree_root;
186 } else { 186 } else {
187 key.objectid = dir->i_ino; 187 key.objectid = btrfs_ino(dir);
188 key.type = BTRFS_INODE_REF_KEY; 188 key.type = BTRFS_INODE_REF_KEY;
189 key.offset = (u64)-1; 189 key.offset = (u64)-1;
190 } 190 }
@@ -244,6 +244,7 @@ static int btrfs_get_name(struct dentry *parent, char *name,
244 struct btrfs_key key; 244 struct btrfs_key key;
245 int name_len; 245 int name_len;
246 int ret; 246 int ret;
247 u64 ino;
247 248
248 if (!dir || !inode) 249 if (!dir || !inode)
249 return -EINVAL; 250 return -EINVAL;
@@ -251,19 +252,21 @@ static int btrfs_get_name(struct dentry *parent, char *name,
251 if (!S_ISDIR(dir->i_mode)) 252 if (!S_ISDIR(dir->i_mode))
252 return -EINVAL; 253 return -EINVAL;
253 254
255 ino = btrfs_ino(inode);
256
254 path = btrfs_alloc_path(); 257 path = btrfs_alloc_path();
255 if (!path) 258 if (!path)
256 return -ENOMEM; 259 return -ENOMEM;
257 path->leave_spinning = 1; 260 path->leave_spinning = 1;
258 261
259 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 262 if (ino == BTRFS_FIRST_FREE_OBJECTID) {
260 key.objectid = BTRFS_I(inode)->root->root_key.objectid; 263 key.objectid = BTRFS_I(inode)->root->root_key.objectid;
261 key.type = BTRFS_ROOT_BACKREF_KEY; 264 key.type = BTRFS_ROOT_BACKREF_KEY;
262 key.offset = (u64)-1; 265 key.offset = (u64)-1;
263 root = root->fs_info->tree_root; 266 root = root->fs_info->tree_root;
264 } else { 267 } else {
265 key.objectid = inode->i_ino; 268 key.objectid = ino;
266 key.offset = dir->i_ino; 269 key.offset = btrfs_ino(dir);
267 key.type = BTRFS_INODE_REF_KEY; 270 key.type = BTRFS_INODE_REF_KEY;
268 } 271 }
269 272
@@ -272,7 +275,7 @@ static int btrfs_get_name(struct dentry *parent, char *name,
272 btrfs_free_path(path); 275 btrfs_free_path(path);
273 return ret; 276 return ret;
274 } else if (ret > 0) { 277 } else if (ret > 0) {
275 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 278 if (ino == BTRFS_FIRST_FREE_OBJECTID) {
276 path->slots[0]--; 279 path->slots[0]--;
277 } else { 280 } else {
278 btrfs_free_path(path); 281 btrfs_free_path(path);
@@ -281,11 +284,11 @@ static int btrfs_get_name(struct dentry *parent, char *name,
281 } 284 }
282 leaf = path->nodes[0]; 285 leaf = path->nodes[0];
283 286
284 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 287 if (ino == BTRFS_FIRST_FREE_OBJECTID) {
285 rref = btrfs_item_ptr(leaf, path->slots[0], 288 rref = btrfs_item_ptr(leaf, path->slots[0],
286 struct btrfs_root_ref); 289 struct btrfs_root_ref);
287 name_ptr = (unsigned long)(rref + 1); 290 name_ptr = (unsigned long)(rref + 1);
288 name_len = btrfs_root_ref_name_len(leaf, rref); 291 name_len = btrfs_root_ref_name_len(leaf, rref);
289 } else { 292 } else {
290 iref = btrfs_item_ptr(leaf, path->slots[0], 293 iref = btrfs_item_ptr(leaf, path->slots[0],
291 struct btrfs_inode_ref); 294 struct btrfs_inode_ref);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c8c318494dee..c9173a7827b0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -94,7 +94,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
94 return (cache->flags & bits) == bits; 94 return (cache->flags & bits) == bits;
95} 95}
96 96
97void btrfs_get_block_group(struct btrfs_block_group_cache *cache) 97static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
98{ 98{
99 atomic_inc(&cache->count); 99 atomic_inc(&cache->count);
100} 100}
@@ -105,6 +105,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
105 WARN_ON(cache->pinned > 0); 105 WARN_ON(cache->pinned > 0);
106 WARN_ON(cache->reserved > 0); 106 WARN_ON(cache->reserved > 0);
107 WARN_ON(cache->reserved_pinned > 0); 107 WARN_ON(cache->reserved_pinned > 0);
108 kfree(cache->free_space_ctl);
108 kfree(cache); 109 kfree(cache);
109 } 110 }
110} 111}
@@ -381,7 +382,7 @@ again:
381 if (need_resched() || 382 if (need_resched() ||
382 btrfs_next_leaf(extent_root, path)) { 383 btrfs_next_leaf(extent_root, path)) {
383 caching_ctl->progress = last; 384 caching_ctl->progress = last;
384 btrfs_release_path(extent_root, path); 385 btrfs_release_path(path);
385 up_read(&fs_info->extent_commit_sem); 386 up_read(&fs_info->extent_commit_sem);
386 mutex_unlock(&caching_ctl->mutex); 387 mutex_unlock(&caching_ctl->mutex);
387 cond_resched(); 388 cond_resched();
@@ -757,8 +758,12 @@ again:
757 atomic_inc(&head->node.refs); 758 atomic_inc(&head->node.refs);
758 spin_unlock(&delayed_refs->lock); 759 spin_unlock(&delayed_refs->lock);
759 760
760 btrfs_release_path(root->fs_info->extent_root, path); 761 btrfs_release_path(path);
761 762
763 /*
764 * Mutex was contended, block until it's released and try
765 * again
766 */
762 mutex_lock(&head->mutex); 767 mutex_lock(&head->mutex);
763 mutex_unlock(&head->mutex); 768 mutex_unlock(&head->mutex);
764 btrfs_put_delayed_ref(&head->node); 769 btrfs_put_delayed_ref(&head->node);
@@ -937,7 +942,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
937 break; 942 break;
938 } 943 }
939 } 944 }
940 btrfs_release_path(root, path); 945 btrfs_release_path(path);
941 946
942 if (owner < BTRFS_FIRST_FREE_OBJECTID) 947 if (owner < BTRFS_FIRST_FREE_OBJECTID)
943 new_size += sizeof(*bi); 948 new_size += sizeof(*bi);
@@ -950,7 +955,6 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
950 BUG_ON(ret); 955 BUG_ON(ret);
951 956
952 ret = btrfs_extend_item(trans, root, path, new_size); 957 ret = btrfs_extend_item(trans, root, path, new_size);
953 BUG_ON(ret);
954 958
955 leaf = path->nodes[0]; 959 leaf = path->nodes[0];
956 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 960 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1045,7 +1049,7 @@ again:
1045 return 0; 1049 return 0;
1046#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1050#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1047 key.type = BTRFS_EXTENT_REF_V0_KEY; 1051 key.type = BTRFS_EXTENT_REF_V0_KEY;
1048 btrfs_release_path(root, path); 1052 btrfs_release_path(path);
1049 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1053 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1050 if (ret < 0) { 1054 if (ret < 0) {
1051 err = ret; 1055 err = ret;
@@ -1083,7 +1087,7 @@ again:
1083 if (match_extent_data_ref(leaf, ref, root_objectid, 1087 if (match_extent_data_ref(leaf, ref, root_objectid,
1084 owner, offset)) { 1088 owner, offset)) {
1085 if (recow) { 1089 if (recow) {
1086 btrfs_release_path(root, path); 1090 btrfs_release_path(path);
1087 goto again; 1091 goto again;
1088 } 1092 }
1089 err = 0; 1093 err = 0;
@@ -1144,7 +1148,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1144 if (match_extent_data_ref(leaf, ref, root_objectid, 1148 if (match_extent_data_ref(leaf, ref, root_objectid,
1145 owner, offset)) 1149 owner, offset))
1146 break; 1150 break;
1147 btrfs_release_path(root, path); 1151 btrfs_release_path(path);
1148 key.offset++; 1152 key.offset++;
1149 ret = btrfs_insert_empty_item(trans, root, path, &key, 1153 ret = btrfs_insert_empty_item(trans, root, path, &key,
1150 size); 1154 size);
@@ -1170,7 +1174,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1170 btrfs_mark_buffer_dirty(leaf); 1174 btrfs_mark_buffer_dirty(leaf);
1171 ret = 0; 1175 ret = 0;
1172fail: 1176fail:
1173 btrfs_release_path(root, path); 1177 btrfs_release_path(path);
1174 return ret; 1178 return ret;
1175} 1179}
1176 1180
@@ -1296,7 +1300,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1296 ret = -ENOENT; 1300 ret = -ENOENT;
1297#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1301#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1298 if (ret == -ENOENT && parent) { 1302 if (ret == -ENOENT && parent) {
1299 btrfs_release_path(root, path); 1303 btrfs_release_path(path);
1300 key.type = BTRFS_EXTENT_REF_V0_KEY; 1304 key.type = BTRFS_EXTENT_REF_V0_KEY;
1301 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1305 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1302 if (ret > 0) 1306 if (ret > 0)
@@ -1325,7 +1329,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1325 } 1329 }
1326 1330
1327 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1331 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1328 btrfs_release_path(root, path); 1332 btrfs_release_path(path);
1329 return ret; 1333 return ret;
1330} 1334}
1331 1335
@@ -1558,7 +1562,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1558 size = btrfs_extent_inline_ref_size(type); 1562 size = btrfs_extent_inline_ref_size(type);
1559 1563
1560 ret = btrfs_extend_item(trans, root, path, size); 1564 ret = btrfs_extend_item(trans, root, path, size);
1561 BUG_ON(ret);
1562 1565
1563 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1566 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1564 refs = btrfs_extent_refs(leaf, ei); 1567 refs = btrfs_extent_refs(leaf, ei);
@@ -1611,7 +1614,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1611 if (ret != -ENOENT) 1614 if (ret != -ENOENT)
1612 return ret; 1615 return ret;
1613 1616
1614 btrfs_release_path(root, path); 1617 btrfs_release_path(path);
1615 *ref_ret = NULL; 1618 *ref_ret = NULL;
1616 1619
1617 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1620 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
@@ -1687,7 +1690,6 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1687 end - ptr - size); 1690 end - ptr - size);
1688 item_size -= size; 1691 item_size -= size;
1689 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1692 ret = btrfs_truncate_item(trans, root, path, item_size, 1);
1690 BUG_ON(ret);
1691 } 1693 }
1692 btrfs_mark_buffer_dirty(leaf); 1694 btrfs_mark_buffer_dirty(leaf);
1693 return 0; 1695 return 0;
@@ -1865,7 +1867,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1865 __run_delayed_extent_op(extent_op, leaf, item); 1867 __run_delayed_extent_op(extent_op, leaf, item);
1866 1868
1867 btrfs_mark_buffer_dirty(leaf); 1869 btrfs_mark_buffer_dirty(leaf);
1868 btrfs_release_path(root->fs_info->extent_root, path); 1870 btrfs_release_path(path);
1869 1871
1870 path->reada = 1; 1872 path->reada = 1;
1871 path->leave_spinning = 1; 1873 path->leave_spinning = 1;
@@ -2300,6 +2302,10 @@ again:
2300 atomic_inc(&ref->refs); 2302 atomic_inc(&ref->refs);
2301 2303
2302 spin_unlock(&delayed_refs->lock); 2304 spin_unlock(&delayed_refs->lock);
2305 /*
2306 * Mutex was contended, block until it's
2307 * released and try again
2308 */
2303 mutex_lock(&head->mutex); 2309 mutex_lock(&head->mutex);
2304 mutex_unlock(&head->mutex); 2310 mutex_unlock(&head->mutex);
2305 2311
@@ -2364,8 +2370,12 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2364 atomic_inc(&head->node.refs); 2370 atomic_inc(&head->node.refs);
2365 spin_unlock(&delayed_refs->lock); 2371 spin_unlock(&delayed_refs->lock);
2366 2372
2367 btrfs_release_path(root->fs_info->extent_root, path); 2373 btrfs_release_path(path);
2368 2374
2375 /*
2376 * Mutex was contended, block until it's released and let
2377 * caller try again
2378 */
2369 mutex_lock(&head->mutex); 2379 mutex_lock(&head->mutex);
2370 mutex_unlock(&head->mutex); 2380 mutex_unlock(&head->mutex);
2371 btrfs_put_delayed_ref(&head->node); 2381 btrfs_put_delayed_ref(&head->node);
@@ -2513,126 +2523,6 @@ out:
2513 return ret; 2523 return ret;
2514} 2524}
2515 2525
2516#if 0
2517int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2518 struct extent_buffer *buf, u32 nr_extents)
2519{
2520 struct btrfs_key key;
2521 struct btrfs_file_extent_item *fi;
2522 u64 root_gen;
2523 u32 nritems;
2524 int i;
2525 int level;
2526 int ret = 0;
2527 int shared = 0;
2528
2529 if (!root->ref_cows)
2530 return 0;
2531
2532 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2533 shared = 0;
2534 root_gen = root->root_key.offset;
2535 } else {
2536 shared = 1;
2537 root_gen = trans->transid - 1;
2538 }
2539
2540 level = btrfs_header_level(buf);
2541 nritems = btrfs_header_nritems(buf);
2542
2543 if (level == 0) {
2544 struct btrfs_leaf_ref *ref;
2545 struct btrfs_extent_info *info;
2546
2547 ref = btrfs_alloc_leaf_ref(root, nr_extents);
2548 if (!ref) {
2549 ret = -ENOMEM;
2550 goto out;
2551 }
2552
2553 ref->root_gen = root_gen;
2554 ref->bytenr = buf->start;
2555 ref->owner = btrfs_header_owner(buf);
2556 ref->generation = btrfs_header_generation(buf);
2557 ref->nritems = nr_extents;
2558 info = ref->extents;
2559
2560 for (i = 0; nr_extents > 0 && i < nritems; i++) {
2561 u64 disk_bytenr;
2562 btrfs_item_key_to_cpu(buf, &key, i);
2563 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2564 continue;
2565 fi = btrfs_item_ptr(buf, i,
2566 struct btrfs_file_extent_item);
2567 if (btrfs_file_extent_type(buf, fi) ==
2568 BTRFS_FILE_EXTENT_INLINE)
2569 continue;
2570 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2571 if (disk_bytenr == 0)
2572 continue;
2573
2574 info->bytenr = disk_bytenr;
2575 info->num_bytes =
2576 btrfs_file_extent_disk_num_bytes(buf, fi);
2577 info->objectid = key.objectid;
2578 info->offset = key.offset;
2579 info++;
2580 }
2581
2582 ret = btrfs_add_leaf_ref(root, ref, shared);
2583 if (ret == -EEXIST && shared) {
2584 struct btrfs_leaf_ref *old;
2585 old = btrfs_lookup_leaf_ref(root, ref->bytenr);
2586 BUG_ON(!old);
2587 btrfs_remove_leaf_ref(root, old);
2588 btrfs_free_leaf_ref(root, old);
2589 ret = btrfs_add_leaf_ref(root, ref, shared);
2590 }
2591 WARN_ON(ret);
2592 btrfs_free_leaf_ref(root, ref);
2593 }
2594out:
2595 return ret;
2596}
2597
2598/* when a block goes through cow, we update the reference counts of
2599 * everything that block points to. The internal pointers of the block
2600 * can be in just about any order, and it is likely to have clusters of
2601 * things that are close together and clusters of things that are not.
2602 *
2603 * To help reduce the seeks that come with updating all of these reference
2604 * counts, sort them by byte number before actual updates are done.
2605 *
2606 * struct refsort is used to match byte number to slot in the btree block.
2607 * we sort based on the byte number and then use the slot to actually
2608 * find the item.
2609 *
2610 * struct refsort is smaller than strcut btrfs_item and smaller than
2611 * struct btrfs_key_ptr. Since we're currently limited to the page size
2612 * for a btree block, there's no way for a kmalloc of refsorts for a
2613 * single node to be bigger than a page.
2614 */
2615struct refsort {
2616 u64 bytenr;
2617 u32 slot;
2618};
2619
2620/*
2621 * for passing into sort()
2622 */
2623static int refsort_cmp(const void *a_void, const void *b_void)
2624{
2625 const struct refsort *a = a_void;
2626 const struct refsort *b = b_void;
2627
2628 if (a->bytenr < b->bytenr)
2629 return -1;
2630 if (a->bytenr > b->bytenr)
2631 return 1;
2632 return 0;
2633}
2634#endif
2635
2636static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2526static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2637 struct btrfs_root *root, 2527 struct btrfs_root *root,
2638 struct extent_buffer *buf, 2528 struct extent_buffer *buf,
@@ -2735,7 +2625,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
2735 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2625 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
2736 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2626 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
2737 btrfs_mark_buffer_dirty(leaf); 2627 btrfs_mark_buffer_dirty(leaf);
2738 btrfs_release_path(extent_root, path); 2628 btrfs_release_path(path);
2739fail: 2629fail:
2740 if (ret) 2630 if (ret)
2741 return ret; 2631 return ret;
@@ -2788,7 +2678,7 @@ again:
2788 inode = lookup_free_space_inode(root, block_group, path); 2678 inode = lookup_free_space_inode(root, block_group, path);
2789 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 2679 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
2790 ret = PTR_ERR(inode); 2680 ret = PTR_ERR(inode);
2791 btrfs_release_path(root, path); 2681 btrfs_release_path(path);
2792 goto out; 2682 goto out;
2793 } 2683 }
2794 2684
@@ -2857,7 +2747,7 @@ again:
2857out_put: 2747out_put:
2858 iput(inode); 2748 iput(inode);
2859out_free: 2749out_free:
2860 btrfs_release_path(root, path); 2750 btrfs_release_path(path);
2861out: 2751out:
2862 spin_lock(&block_group->lock); 2752 spin_lock(&block_group->lock);
2863 block_group->disk_cache_state = dcs; 2753 block_group->disk_cache_state = dcs;
@@ -3147,7 +3037,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3147 /* make sure bytes are sectorsize aligned */ 3037 /* make sure bytes are sectorsize aligned */
3148 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3038 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
3149 3039
3150 if (root == root->fs_info->tree_root) { 3040 if (root == root->fs_info->tree_root ||
3041 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
3151 alloc_chunk = 0; 3042 alloc_chunk = 0;
3152 committed = 1; 3043 committed = 1;
3153 } 3044 }
@@ -3215,18 +3106,6 @@ commit_trans:
3215 goto again; 3106 goto again;
3216 } 3107 }
3217 3108
3218#if 0 /* I hope we never need this code again, just in case */
3219 printk(KERN_ERR "no space left, need %llu, %llu bytes_used, "
3220 "%llu bytes_reserved, " "%llu bytes_pinned, "
3221 "%llu bytes_readonly, %llu may use %llu total\n",
3222 (unsigned long long)bytes,
3223 (unsigned long long)data_sinfo->bytes_used,
3224 (unsigned long long)data_sinfo->bytes_reserved,
3225 (unsigned long long)data_sinfo->bytes_pinned,
3226 (unsigned long long)data_sinfo->bytes_readonly,
3227 (unsigned long long)data_sinfo->bytes_may_use,
3228 (unsigned long long)data_sinfo->total_bytes);
3229#endif
3230 return -ENOSPC; 3109 return -ENOSPC;
3231 } 3110 }
3232 data_sinfo->bytes_may_use += bytes; 3111 data_sinfo->bytes_may_use += bytes;
@@ -3429,6 +3308,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3429 if (reserved == 0) 3308 if (reserved == 0)
3430 return 0; 3309 return 0;
3431 3310
3311 /* nothing to shrink - nothing to reclaim */
3312 if (root->fs_info->delalloc_bytes == 0)
3313 return 0;
3314
3432 max_reclaim = min(reserved, to_reclaim); 3315 max_reclaim = min(reserved, to_reclaim);
3433 3316
3434 while (loops < 1024) { 3317 while (loops < 1024) {
@@ -3655,8 +3538,8 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
3655 spin_unlock(&block_rsv->lock); 3538 spin_unlock(&block_rsv->lock);
3656} 3539}
3657 3540
3658void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, 3541static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3659 struct btrfs_block_rsv *dest, u64 num_bytes) 3542 struct btrfs_block_rsv *dest, u64 num_bytes)
3660{ 3543{
3661 struct btrfs_space_info *space_info = block_rsv->space_info; 3544 struct btrfs_space_info *space_info = block_rsv->space_info;
3662 3545
@@ -3859,23 +3742,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
3859 u64 meta_used; 3742 u64 meta_used;
3860 u64 data_used; 3743 u64 data_used;
3861 int csum_size = btrfs_super_csum_size(&fs_info->super_copy); 3744 int csum_size = btrfs_super_csum_size(&fs_info->super_copy);
3862#if 0
3863 /*
3864 * per tree used space accounting can be inaccuracy, so we
3865 * can't rely on it.
3866 */
3867 spin_lock(&fs_info->extent_root->accounting_lock);
3868 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item);
3869 spin_unlock(&fs_info->extent_root->accounting_lock);
3870
3871 spin_lock(&fs_info->csum_root->accounting_lock);
3872 num_bytes += btrfs_root_used(&fs_info->csum_root->root_item);
3873 spin_unlock(&fs_info->csum_root->accounting_lock);
3874 3745
3875 spin_lock(&fs_info->tree_root->accounting_lock);
3876 num_bytes += btrfs_root_used(&fs_info->tree_root->root_item);
3877 spin_unlock(&fs_info->tree_root->accounting_lock);
3878#endif
3879 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); 3746 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
3880 spin_lock(&sinfo->lock); 3747 spin_lock(&sinfo->lock);
3881 data_used = sinfo->bytes_used; 3748 data_used = sinfo->bytes_used;
@@ -3928,10 +3795,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3928 block_rsv->reserved = block_rsv->size; 3795 block_rsv->reserved = block_rsv->size;
3929 block_rsv->full = 1; 3796 block_rsv->full = 1;
3930 } 3797 }
3931#if 0 3798
3932 printk(KERN_INFO"global block rsv size %llu reserved %llu\n",
3933 block_rsv->size, block_rsv->reserved);
3934#endif
3935 spin_unlock(&sinfo->lock); 3799 spin_unlock(&sinfo->lock);
3936 spin_unlock(&block_rsv->lock); 3800 spin_unlock(&block_rsv->lock);
3937} 3801}
@@ -3977,12 +3841,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3977 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3841 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3978} 3842}
3979 3843
3980static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
3981{
3982 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3983 3 * num_items;
3984}
3985
3986int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, 3844int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
3987 struct btrfs_root *root, 3845 struct btrfs_root *root,
3988 struct btrfs_block_rsv *rsv) 3846 struct btrfs_block_rsv *rsv)
@@ -3996,7 +3854,7 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
3996 * needs to use some space. We may want to be smarter about this in the 3854 * needs to use some space. We may want to be smarter about this in the
3997 * future. 3855 * future.
3998 */ 3856 */
3999 num_bytes = calc_trans_metadata_size(root, 2); 3857 num_bytes = btrfs_calc_trans_metadata_size(root, 2);
4000 3858
4001 /* We already have enough bytes, just return */ 3859 /* We already have enough bytes, just return */
4002 if (rsv->reserved >= num_bytes) 3860 if (rsv->reserved >= num_bytes)
@@ -4024,7 +3882,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
4024 if (num_items == 0 || root->fs_info->chunk_root == root) 3882 if (num_items == 0 || root->fs_info->chunk_root == root)
4025 return 0; 3883 return 0;
4026 3884
4027 num_bytes = calc_trans_metadata_size(root, num_items); 3885 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
4028 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3886 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
4029 num_bytes); 3887 num_bytes);
4030 if (!ret) { 3888 if (!ret) {
@@ -4058,14 +3916,14 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
4058 * added it, so this takes the reservation so we can release it later 3916 * added it, so this takes the reservation so we can release it later
4059 * when we are truly done with the orphan item. 3917 * when we are truly done with the orphan item.
4060 */ 3918 */
4061 u64 num_bytes = calc_trans_metadata_size(root, 1); 3919 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
4062 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3920 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4063} 3921}
4064 3922
4065void btrfs_orphan_release_metadata(struct inode *inode) 3923void btrfs_orphan_release_metadata(struct inode *inode)
4066{ 3924{
4067 struct btrfs_root *root = BTRFS_I(inode)->root; 3925 struct btrfs_root *root = BTRFS_I(inode)->root;
4068 u64 num_bytes = calc_trans_metadata_size(root, 1); 3926 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
4069 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3927 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
4070} 3928}
4071 3929
@@ -4079,7 +3937,7 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
4079 * two for root back/forward refs, two for directory entries 3937 * two for root back/forward refs, two for directory entries
4080 * and one for root of the snapshot. 3938 * and one for root of the snapshot.
4081 */ 3939 */
4082 u64 num_bytes = calc_trans_metadata_size(root, 5); 3940 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
4083 dst_rsv->space_info = src_rsv->space_info; 3941 dst_rsv->space_info = src_rsv->space_info;
4084 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3942 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4085} 3943}
@@ -4108,7 +3966,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4108 3966
4109 if (nr_extents > reserved_extents) { 3967 if (nr_extents > reserved_extents) {
4110 nr_extents -= reserved_extents; 3968 nr_extents -= reserved_extents;
4111 to_reserve = calc_trans_metadata_size(root, nr_extents); 3969 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
4112 } else { 3970 } else {
4113 nr_extents = 0; 3971 nr_extents = 0;
4114 to_reserve = 0; 3972 to_reserve = 0;
@@ -4162,7 +4020,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4162 4020
4163 to_free = calc_csum_metadata_size(inode, num_bytes); 4021 to_free = calc_csum_metadata_size(inode, num_bytes);
4164 if (nr_extents > 0) 4022 if (nr_extents > 0)
4165 to_free += calc_trans_metadata_size(root, nr_extents); 4023 to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
4166 4024
4167 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 4025 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
4168 to_free); 4026 to_free);
@@ -4571,7 +4429,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4571 NULL, refs_to_drop, 4429 NULL, refs_to_drop,
4572 is_data); 4430 is_data);
4573 BUG_ON(ret); 4431 BUG_ON(ret);
4574 btrfs_release_path(extent_root, path); 4432 btrfs_release_path(path);
4575 path->leave_spinning = 1; 4433 path->leave_spinning = 1;
4576 4434
4577 key.objectid = bytenr; 4435 key.objectid = bytenr;
@@ -4610,7 +4468,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4610 owner_objectid, 0); 4468 owner_objectid, 0);
4611 BUG_ON(ret < 0); 4469 BUG_ON(ret < 0);
4612 4470
4613 btrfs_release_path(extent_root, path); 4471 btrfs_release_path(path);
4614 path->leave_spinning = 1; 4472 path->leave_spinning = 1;
4615 4473
4616 key.objectid = bytenr; 4474 key.objectid = bytenr;
@@ -4680,7 +4538,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4680 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 4538 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
4681 num_to_del); 4539 num_to_del);
4682 BUG_ON(ret); 4540 BUG_ON(ret);
4683 btrfs_release_path(extent_root, path); 4541 btrfs_release_path(path);
4684 4542
4685 if (is_data) { 4543 if (is_data) {
4686 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 4544 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
@@ -4923,7 +4781,7 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
4923 return 0; 4781 return 0;
4924 4782
4925 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 4783 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
4926 (cache->free_space >= num_bytes)); 4784 (cache->free_space_ctl->free_space >= num_bytes));
4927 4785
4928 put_caching_control(caching_ctl); 4786 put_caching_control(caching_ctl);
4929 return 0; 4787 return 0;
@@ -5159,13 +5017,14 @@ have_block_group:
5159 if (unlikely(block_group->ro)) 5017 if (unlikely(block_group->ro))
5160 goto loop; 5018 goto loop;
5161 5019
5162 spin_lock(&block_group->tree_lock); 5020 spin_lock(&block_group->free_space_ctl->tree_lock);
5163 if (cached && 5021 if (cached &&
5164 block_group->free_space < num_bytes + empty_size) { 5022 block_group->free_space_ctl->free_space <
5165 spin_unlock(&block_group->tree_lock); 5023 num_bytes + empty_size) {
5024 spin_unlock(&block_group->free_space_ctl->tree_lock);
5166 goto loop; 5025 goto loop;
5167 } 5026 }
5168 spin_unlock(&block_group->tree_lock); 5027 spin_unlock(&block_group->free_space_ctl->tree_lock);
5169 5028
5170 /* 5029 /*
5171 * Ok we want to try and use the cluster allocator, so lets look 5030 * Ok we want to try and use the cluster allocator, so lets look
@@ -6512,7 +6371,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6512 trans->block_rsv = block_rsv; 6371 trans->block_rsv = block_rsv;
6513 } 6372 }
6514 } 6373 }
6515 btrfs_release_path(root, path); 6374 btrfs_release_path(path);
6516 BUG_ON(err); 6375 BUG_ON(err);
6517 6376
6518 ret = btrfs_del_root(trans, tree_root, &root->root_key); 6377 ret = btrfs_del_root(trans, tree_root, &root->root_key);
@@ -6616,1514 +6475,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6616 return ret; 6475 return ret;
6617} 6476}
6618 6477
6619#if 0
6620static unsigned long calc_ra(unsigned long start, unsigned long last,
6621 unsigned long nr)
6622{
6623 return min(last, start + nr - 1);
6624}
6625
6626static noinline int relocate_inode_pages(struct inode *inode, u64 start,
6627 u64 len)
6628{
6629 u64 page_start;
6630 u64 page_end;
6631 unsigned long first_index;
6632 unsigned long last_index;
6633 unsigned long i;
6634 struct page *page;
6635 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
6636 struct file_ra_state *ra;
6637 struct btrfs_ordered_extent *ordered;
6638 unsigned int total_read = 0;
6639 unsigned int total_dirty = 0;
6640 int ret = 0;
6641
6642 ra = kzalloc(sizeof(*ra), GFP_NOFS);
6643 if (!ra)
6644 return -ENOMEM;
6645
6646 mutex_lock(&inode->i_mutex);
6647 first_index = start >> PAGE_CACHE_SHIFT;
6648 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
6649
6650 /* make sure the dirty trick played by the caller work */
6651 ret = invalidate_inode_pages2_range(inode->i_mapping,
6652 first_index, last_index);
6653 if (ret)
6654 goto out_unlock;
6655
6656 file_ra_state_init(ra, inode->i_mapping);
6657
6658 for (i = first_index ; i <= last_index; i++) {
6659 if (total_read % ra->ra_pages == 0) {
6660 btrfs_force_ra(inode->i_mapping, ra, NULL, i,
6661 calc_ra(i, last_index, ra->ra_pages));
6662 }
6663 total_read++;
6664again:
6665 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
6666 BUG_ON(1);
6667 page = grab_cache_page(inode->i_mapping, i);
6668 if (!page) {
6669 ret = -ENOMEM;
6670 goto out_unlock;
6671 }
6672 if (!PageUptodate(page)) {
6673 btrfs_readpage(NULL, page);
6674 lock_page(page);
6675 if (!PageUptodate(page)) {
6676 unlock_page(page);
6677 page_cache_release(page);
6678 ret = -EIO;
6679 goto out_unlock;
6680 }
6681 }
6682 wait_on_page_writeback(page);
6683
6684 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
6685 page_end = page_start + PAGE_CACHE_SIZE - 1;
6686 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
6687
6688 ordered = btrfs_lookup_ordered_extent(inode, page_start);
6689 if (ordered) {
6690 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
6691 unlock_page(page);
6692 page_cache_release(page);
6693 btrfs_start_ordered_extent(inode, ordered, 1);
6694 btrfs_put_ordered_extent(ordered);
6695 goto again;
6696 }
6697 set_page_extent_mapped(page);
6698
6699 if (i == first_index)
6700 set_extent_bits(io_tree, page_start, page_end,
6701 EXTENT_BOUNDARY, GFP_NOFS);
6702 btrfs_set_extent_delalloc(inode, page_start, page_end);
6703
6704 set_page_dirty(page);
6705 total_dirty++;
6706
6707 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
6708 unlock_page(page);
6709 page_cache_release(page);
6710 }
6711
6712out_unlock:
6713 kfree(ra);
6714 mutex_unlock(&inode->i_mutex);
6715 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
6716 return ret;
6717}
6718
6719static noinline int relocate_data_extent(struct inode *reloc_inode,
6720 struct btrfs_key *extent_key,
6721 u64 offset)
6722{
6723 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
6724 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
6725 struct extent_map *em;
6726 u64 start = extent_key->objectid - offset;
6727 u64 end = start + extent_key->offset - 1;
6728
6729 em = alloc_extent_map(GFP_NOFS);
6730 BUG_ON(!em);
6731
6732 em->start = start;
6733 em->len = extent_key->offset;
6734 em->block_len = extent_key->offset;
6735 em->block_start = extent_key->objectid;
6736 em->bdev = root->fs_info->fs_devices->latest_bdev;
6737 set_bit(EXTENT_FLAG_PINNED, &em->flags);
6738
6739 /* setup extent map to cheat btrfs_readpage */
6740 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
6741 while (1) {
6742 int ret;
6743 write_lock(&em_tree->lock);
6744 ret = add_extent_mapping(em_tree, em);
6745 write_unlock(&em_tree->lock);
6746 if (ret != -EEXIST) {
6747 free_extent_map(em);
6748 break;
6749 }
6750 btrfs_drop_extent_cache(reloc_inode, start, end, 0);
6751 }
6752 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
6753
6754 return relocate_inode_pages(reloc_inode, start, extent_key->offset);
6755}
6756
6757struct btrfs_ref_path {
6758 u64 extent_start;
6759 u64 nodes[BTRFS_MAX_LEVEL];
6760 u64 root_objectid;
6761 u64 root_generation;
6762 u64 owner_objectid;
6763 u32 num_refs;
6764 int lowest_level;
6765 int current_level;
6766 int shared_level;
6767
6768 struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
6769 u64 new_nodes[BTRFS_MAX_LEVEL];
6770};
6771
6772struct disk_extent {
6773 u64 ram_bytes;
6774 u64 disk_bytenr;
6775 u64 disk_num_bytes;
6776 u64 offset;
6777 u64 num_bytes;
6778 u8 compression;
6779 u8 encryption;
6780 u16 other_encoding;
6781};
6782
6783static int is_cowonly_root(u64 root_objectid)
6784{
6785 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
6786 root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
6787 root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
6788 root_objectid == BTRFS_DEV_TREE_OBJECTID ||
6789 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
6790 root_objectid == BTRFS_CSUM_TREE_OBJECTID)
6791 return 1;
6792 return 0;
6793}
6794
6795static noinline int __next_ref_path(struct btrfs_trans_handle *trans,
6796 struct btrfs_root *extent_root,
6797 struct btrfs_ref_path *ref_path,
6798 int first_time)
6799{
6800 struct extent_buffer *leaf;
6801 struct btrfs_path *path;
6802 struct btrfs_extent_ref *ref;
6803 struct btrfs_key key;
6804 struct btrfs_key found_key;
6805 u64 bytenr;
6806 u32 nritems;
6807 int level;
6808 int ret = 1;
6809
6810 path = btrfs_alloc_path();
6811 if (!path)
6812 return -ENOMEM;
6813
6814 if (first_time) {
6815 ref_path->lowest_level = -1;
6816 ref_path->current_level = -1;
6817 ref_path->shared_level = -1;
6818 goto walk_up;
6819 }
6820walk_down:
6821 level = ref_path->current_level - 1;
6822 while (level >= -1) {
6823 u64 parent;
6824 if (level < ref_path->lowest_level)
6825 break;
6826
6827 if (level >= 0)
6828 bytenr = ref_path->nodes[level];
6829 else
6830 bytenr = ref_path->extent_start;
6831 BUG_ON(bytenr == 0);
6832
6833 parent = ref_path->nodes[level + 1];
6834 ref_path->nodes[level + 1] = 0;
6835 ref_path->current_level = level;
6836 BUG_ON(parent == 0);
6837
6838 key.objectid = bytenr;
6839 key.offset = parent + 1;
6840 key.type = BTRFS_EXTENT_REF_KEY;
6841
6842 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6843 if (ret < 0)
6844 goto out;
6845 BUG_ON(ret == 0);
6846
6847 leaf = path->nodes[0];
6848 nritems = btrfs_header_nritems(leaf);
6849 if (path->slots[0] >= nritems) {
6850 ret = btrfs_next_leaf(extent_root, path);
6851 if (ret < 0)
6852 goto out;
6853 if (ret > 0)
6854 goto next;
6855 leaf = path->nodes[0];
6856 }
6857
6858 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6859 if (found_key.objectid == bytenr &&
6860 found_key.type == BTRFS_EXTENT_REF_KEY) {
6861 if (level < ref_path->shared_level)
6862 ref_path->shared_level = level;
6863 goto found;
6864 }
6865next:
6866 level--;
6867 btrfs_release_path(extent_root, path);
6868 cond_resched();
6869 }
6870 /* reached lowest level */
6871 ret = 1;
6872 goto out;
6873walk_up:
6874 level = ref_path->current_level;
6875 while (level < BTRFS_MAX_LEVEL - 1) {
6876 u64 ref_objectid;
6877
6878 if (level >= 0)
6879 bytenr = ref_path->nodes[level];
6880 else
6881 bytenr = ref_path->extent_start;
6882
6883 BUG_ON(bytenr == 0);
6884
6885 key.objectid = bytenr;
6886 key.offset = 0;
6887 key.type = BTRFS_EXTENT_REF_KEY;
6888
6889 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6890 if (ret < 0)
6891 goto out;
6892
6893 leaf = path->nodes[0];
6894 nritems = btrfs_header_nritems(leaf);
6895 if (path->slots[0] >= nritems) {
6896 ret = btrfs_next_leaf(extent_root, path);
6897 if (ret < 0)
6898 goto out;
6899 if (ret > 0) {
6900 /* the extent was freed by someone */
6901 if (ref_path->lowest_level == level)
6902 goto out;
6903 btrfs_release_path(extent_root, path);
6904 goto walk_down;
6905 }
6906 leaf = path->nodes[0];
6907 }
6908
6909 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6910 if (found_key.objectid != bytenr ||
6911 found_key.type != BTRFS_EXTENT_REF_KEY) {
6912 /* the extent was freed by someone */
6913 if (ref_path->lowest_level == level) {
6914 ret = 1;
6915 goto out;
6916 }
6917 btrfs_release_path(extent_root, path);
6918 goto walk_down;
6919 }
6920found:
6921 ref = btrfs_item_ptr(leaf, path->slots[0],
6922 struct btrfs_extent_ref);
6923 ref_objectid = btrfs_ref_objectid(leaf, ref);
6924 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
6925 if (first_time) {
6926 level = (int)ref_objectid;
6927 BUG_ON(level >= BTRFS_MAX_LEVEL);
6928 ref_path->lowest_level = level;
6929 ref_path->current_level = level;
6930 ref_path->nodes[level] = bytenr;
6931 } else {
6932 WARN_ON(ref_objectid != level);
6933 }
6934 } else {
6935 WARN_ON(level != -1);
6936 }
6937 first_time = 0;
6938
6939 if (ref_path->lowest_level == level) {
6940 ref_path->owner_objectid = ref_objectid;
6941 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
6942 }
6943
6944 /*
6945 * the block is tree root or the block isn't in reference
6946 * counted tree.
6947 */
6948 if (found_key.objectid == found_key.offset ||
6949 is_cowonly_root(btrfs_ref_root(leaf, ref))) {
6950 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6951 ref_path->root_generation =
6952 btrfs_ref_generation(leaf, ref);
6953 if (level < 0) {
6954 /* special reference from the tree log */
6955 ref_path->nodes[0] = found_key.offset;
6956 ref_path->current_level = 0;
6957 }
6958 ret = 0;
6959 goto out;
6960 }
6961
6962 level++;
6963 BUG_ON(ref_path->nodes[level] != 0);
6964 ref_path->nodes[level] = found_key.offset;
6965 ref_path->current_level = level;
6966
6967 /*
6968 * the reference was created in the running transaction,
6969 * no need to continue walking up.
6970 */
6971 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
6972 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6973 ref_path->root_generation =
6974 btrfs_ref_generation(leaf, ref);
6975 ret = 0;
6976 goto out;
6977 }
6978
6979 btrfs_release_path(extent_root, path);
6980 cond_resched();
6981 }
6982 /* reached max tree level, but no tree root found. */
6983 BUG();
6984out:
6985 btrfs_free_path(path);
6986 return ret;
6987}
6988
6989static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
6990 struct btrfs_root *extent_root,
6991 struct btrfs_ref_path *ref_path,
6992 u64 extent_start)
6993{
6994 memset(ref_path, 0, sizeof(*ref_path));
6995 ref_path->extent_start = extent_start;
6996
6997 return __next_ref_path(trans, extent_root, ref_path, 1);
6998}
6999
7000static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
7001 struct btrfs_root *extent_root,
7002 struct btrfs_ref_path *ref_path)
7003{
7004 return __next_ref_path(trans, extent_root, ref_path, 0);
7005}
7006
7007static noinline int get_new_locations(struct inode *reloc_inode,
7008 struct btrfs_key *extent_key,
7009 u64 offset, int no_fragment,
7010 struct disk_extent **extents,
7011 int *nr_extents)
7012{
7013 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
7014 struct btrfs_path *path;
7015 struct btrfs_file_extent_item *fi;
7016 struct extent_buffer *leaf;
7017 struct disk_extent *exts = *extents;
7018 struct btrfs_key found_key;
7019 u64 cur_pos;
7020 u64 last_byte;
7021 u32 nritems;
7022 int nr = 0;
7023 int max = *nr_extents;
7024 int ret;
7025
7026 WARN_ON(!no_fragment && *extents);
7027 if (!exts) {
7028 max = 1;
7029 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
7030 if (!exts)
7031 return -ENOMEM;
7032 }
7033
7034 path = btrfs_alloc_path();
7035 if (!path) {
7036 if (exts != *extents)
7037 kfree(exts);
7038 return -ENOMEM;
7039 }
7040
7041 cur_pos = extent_key->objectid - offset;
7042 last_byte = extent_key->objectid + extent_key->offset;
7043 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
7044 cur_pos, 0);
7045 if (ret < 0)
7046 goto out;
7047 if (ret > 0) {
7048 ret = -ENOENT;
7049 goto out;
7050 }
7051
7052 while (1) {
7053 leaf = path->nodes[0];
7054 nritems = btrfs_header_nritems(leaf);
7055 if (path->slots[0] >= nritems) {
7056 ret = btrfs_next_leaf(root, path);
7057 if (ret < 0)
7058 goto out;
7059 if (ret > 0)
7060 break;
7061 leaf = path->nodes[0];
7062 }
7063
7064 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
7065 if (found_key.offset != cur_pos ||
7066 found_key.type != BTRFS_EXTENT_DATA_KEY ||
7067 found_key.objectid != reloc_inode->i_ino)
7068 break;
7069
7070 fi = btrfs_item_ptr(leaf, path->slots[0],
7071 struct btrfs_file_extent_item);
7072 if (btrfs_file_extent_type(leaf, fi) !=
7073 BTRFS_FILE_EXTENT_REG ||
7074 btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
7075 break;
7076
7077 if (nr == max) {
7078 struct disk_extent *old = exts;
7079 max *= 2;
7080 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
7081 if (!exts) {
7082 ret = -ENOMEM;
7083 goto out;
7084 }
7085 memcpy(exts, old, sizeof(*exts) * nr);
7086 if (old != *extents)
7087 kfree(old);
7088 }
7089
7090 exts[nr].disk_bytenr =
7091 btrfs_file_extent_disk_bytenr(leaf, fi);
7092 exts[nr].disk_num_bytes =
7093 btrfs_file_extent_disk_num_bytes(leaf, fi);
7094 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
7095 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7096 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7097 exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
7098 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
7099 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
7100 fi);
7101 BUG_ON(exts[nr].offset > 0);
7102 BUG_ON(exts[nr].compression || exts[nr].encryption);
7103 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
7104
7105 cur_pos += exts[nr].num_bytes;
7106 nr++;
7107
7108 if (cur_pos + offset >= last_byte)
7109 break;
7110
7111 if (no_fragment) {
7112 ret = 1;
7113 goto out;
7114 }
7115 path->slots[0]++;
7116 }
7117
7118 BUG_ON(cur_pos + offset > last_byte);
7119 if (cur_pos + offset < last_byte) {
7120 ret = -ENOENT;
7121 goto out;
7122 }
7123 ret = 0;
7124out:
7125 btrfs_free_path(path);
7126 if (ret) {
7127 if (exts != *extents)
7128 kfree(exts);
7129 } else {
7130 *extents = exts;
7131 *nr_extents = nr;
7132 }
7133 return ret;
7134}
7135
7136static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
7137 struct btrfs_root *root,
7138 struct btrfs_path *path,
7139 struct btrfs_key *extent_key,
7140 struct btrfs_key *leaf_key,
7141 struct btrfs_ref_path *ref_path,
7142 struct disk_extent *new_extents,
7143 int nr_extents)
7144{
7145 struct extent_buffer *leaf;
7146 struct btrfs_file_extent_item *fi;
7147 struct inode *inode = NULL;
7148 struct btrfs_key key;
7149 u64 lock_start = 0;
7150 u64 lock_end = 0;
7151 u64 num_bytes;
7152 u64 ext_offset;
7153 u64 search_end = (u64)-1;
7154 u32 nritems;
7155 int nr_scaned = 0;
7156 int extent_locked = 0;
7157 int extent_type;
7158 int ret;
7159
7160 memcpy(&key, leaf_key, sizeof(key));
7161 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
7162 if (key.objectid < ref_path->owner_objectid ||
7163 (key.objectid == ref_path->owner_objectid &&
7164 key.type < BTRFS_EXTENT_DATA_KEY)) {
7165 key.objectid = ref_path->owner_objectid;
7166 key.type = BTRFS_EXTENT_DATA_KEY;
7167 key.offset = 0;
7168 }
7169 }
7170
7171 while (1) {
7172 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7173 if (ret < 0)
7174 goto out;
7175
7176 leaf = path->nodes[0];
7177 nritems = btrfs_header_nritems(leaf);
7178next:
7179 if (extent_locked && ret > 0) {
7180 /*
7181 * the file extent item was modified by someone
7182 * before the extent got locked.
7183 */
7184 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7185 lock_end, GFP_NOFS);
7186 extent_locked = 0;
7187 }
7188
7189 if (path->slots[0] >= nritems) {
7190 if (++nr_scaned > 2)
7191 break;
7192
7193 BUG_ON(extent_locked);
7194 ret = btrfs_next_leaf(root, path);
7195 if (ret < 0)
7196 goto out;
7197 if (ret > 0)
7198 break;
7199 leaf = path->nodes[0];
7200 nritems = btrfs_header_nritems(leaf);
7201 }
7202
7203 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7204
7205 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
7206 if ((key.objectid > ref_path->owner_objectid) ||
7207 (key.objectid == ref_path->owner_objectid &&
7208 key.type > BTRFS_EXTENT_DATA_KEY) ||
7209 key.offset >= search_end)
7210 break;
7211 }
7212
7213 if (inode && key.objectid != inode->i_ino) {
7214 BUG_ON(extent_locked);
7215 btrfs_release_path(root, path);
7216 mutex_unlock(&inode->i_mutex);
7217 iput(inode);
7218 inode = NULL;
7219 continue;
7220 }
7221
7222 if (key.type != BTRFS_EXTENT_DATA_KEY) {
7223 path->slots[0]++;
7224 ret = 1;
7225 goto next;
7226 }
7227 fi = btrfs_item_ptr(leaf, path->slots[0],
7228 struct btrfs_file_extent_item);
7229 extent_type = btrfs_file_extent_type(leaf, fi);
7230 if ((extent_type != BTRFS_FILE_EXTENT_REG &&
7231 extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
7232 (btrfs_file_extent_disk_bytenr(leaf, fi) !=
7233 extent_key->objectid)) {
7234 path->slots[0]++;
7235 ret = 1;
7236 goto next;
7237 }
7238
7239 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7240 ext_offset = btrfs_file_extent_offset(leaf, fi);
7241
7242 if (search_end == (u64)-1) {
7243 search_end = key.offset - ext_offset +
7244 btrfs_file_extent_ram_bytes(leaf, fi);
7245 }
7246
7247 if (!extent_locked) {
7248 lock_start = key.offset;
7249 lock_end = lock_start + num_bytes - 1;
7250 } else {
7251 if (lock_start > key.offset ||
7252 lock_end + 1 < key.offset + num_bytes) {
7253 unlock_extent(&BTRFS_I(inode)->io_tree,
7254 lock_start, lock_end, GFP_NOFS);
7255 extent_locked = 0;
7256 }
7257 }
7258
7259 if (!inode) {
7260 btrfs_release_path(root, path);
7261
7262 inode = btrfs_iget_locked(root->fs_info->sb,
7263 key.objectid, root);
7264 if (inode->i_state & I_NEW) {
7265 BTRFS_I(inode)->root = root;
7266 BTRFS_I(inode)->location.objectid =
7267 key.objectid;
7268 BTRFS_I(inode)->location.type =
7269 BTRFS_INODE_ITEM_KEY;
7270 BTRFS_I(inode)->location.offset = 0;
7271 btrfs_read_locked_inode(inode);
7272 unlock_new_inode(inode);
7273 }
7274 /*
7275 * some code call btrfs_commit_transaction while
7276 * holding the i_mutex, so we can't use mutex_lock
7277 * here.
7278 */
7279 if (is_bad_inode(inode) ||
7280 !mutex_trylock(&inode->i_mutex)) {
7281 iput(inode);
7282 inode = NULL;
7283 key.offset = (u64)-1;
7284 goto skip;
7285 }
7286 }
7287
7288 if (!extent_locked) {
7289 struct btrfs_ordered_extent *ordered;
7290
7291 btrfs_release_path(root, path);
7292
7293 lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7294 lock_end, GFP_NOFS);
7295 ordered = btrfs_lookup_first_ordered_extent(inode,
7296 lock_end);
7297 if (ordered &&
7298 ordered->file_offset <= lock_end &&
7299 ordered->file_offset + ordered->len > lock_start) {
7300 unlock_extent(&BTRFS_I(inode)->io_tree,
7301 lock_start, lock_end, GFP_NOFS);
7302 btrfs_start_ordered_extent(inode, ordered, 1);
7303 btrfs_put_ordered_extent(ordered);
7304 key.offset += num_bytes;
7305 goto skip;
7306 }
7307 if (ordered)
7308 btrfs_put_ordered_extent(ordered);
7309
7310 extent_locked = 1;
7311 continue;
7312 }
7313
7314 if (nr_extents == 1) {
7315 /* update extent pointer in place */
7316 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7317 new_extents[0].disk_bytenr);
7318 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7319 new_extents[0].disk_num_bytes);
7320 btrfs_mark_buffer_dirty(leaf);
7321
7322 btrfs_drop_extent_cache(inode, key.offset,
7323 key.offset + num_bytes - 1, 0);
7324
7325 ret = btrfs_inc_extent_ref(trans, root,
7326 new_extents[0].disk_bytenr,
7327 new_extents[0].disk_num_bytes,
7328 leaf->start,
7329 root->root_key.objectid,
7330 trans->transid,
7331 key.objectid);
7332 BUG_ON(ret);
7333
7334 ret = btrfs_free_extent(trans, root,
7335 extent_key->objectid,
7336 extent_key->offset,
7337 leaf->start,
7338 btrfs_header_owner(leaf),
7339 btrfs_header_generation(leaf),
7340 key.objectid, 0);
7341 BUG_ON(ret);
7342
7343 btrfs_release_path(root, path);
7344 key.offset += num_bytes;
7345 } else {
7346 BUG_ON(1);
7347#if 0
7348 u64 alloc_hint;
7349 u64 extent_len;
7350 int i;
7351 /*
7352 * drop old extent pointer at first, then insert the
7353 * new pointers one bye one
7354 */
7355 btrfs_release_path(root, path);
7356 ret = btrfs_drop_extents(trans, root, inode, key.offset,
7357 key.offset + num_bytes,
7358 key.offset, &alloc_hint);
7359 BUG_ON(ret);
7360
7361 for (i = 0; i < nr_extents; i++) {
7362 if (ext_offset >= new_extents[i].num_bytes) {
7363 ext_offset -= new_extents[i].num_bytes;
7364 continue;
7365 }
7366 extent_len = min(new_extents[i].num_bytes -
7367 ext_offset, num_bytes);
7368
7369 ret = btrfs_insert_empty_item(trans, root,
7370 path, &key,
7371 sizeof(*fi));
7372 BUG_ON(ret);
7373
7374 leaf = path->nodes[0];
7375 fi = btrfs_item_ptr(leaf, path->slots[0],
7376 struct btrfs_file_extent_item);
7377 btrfs_set_file_extent_generation(leaf, fi,
7378 trans->transid);
7379 btrfs_set_file_extent_type(leaf, fi,
7380 BTRFS_FILE_EXTENT_REG);
7381 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7382 new_extents[i].disk_bytenr);
7383 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7384 new_extents[i].disk_num_bytes);
7385 btrfs_set_file_extent_ram_bytes(leaf, fi,
7386 new_extents[i].ram_bytes);
7387
7388 btrfs_set_file_extent_compression(leaf, fi,
7389 new_extents[i].compression);
7390 btrfs_set_file_extent_encryption(leaf, fi,
7391 new_extents[i].encryption);
7392 btrfs_set_file_extent_other_encoding(leaf, fi,
7393 new_extents[i].other_encoding);
7394
7395 btrfs_set_file_extent_num_bytes(leaf, fi,
7396 extent_len);
7397 ext_offset += new_extents[i].offset;
7398 btrfs_set_file_extent_offset(leaf, fi,
7399 ext_offset);
7400 btrfs_mark_buffer_dirty(leaf);
7401
7402 btrfs_drop_extent_cache(inode, key.offset,
7403 key.offset + extent_len - 1, 0);
7404
7405 ret = btrfs_inc_extent_ref(trans, root,
7406 new_extents[i].disk_bytenr,
7407 new_extents[i].disk_num_bytes,
7408 leaf->start,
7409 root->root_key.objectid,
7410 trans->transid, key.objectid);
7411 BUG_ON(ret);
7412 btrfs_release_path(root, path);
7413
7414 inode_add_bytes(inode, extent_len);
7415
7416 ext_offset = 0;
7417 num_bytes -= extent_len;
7418 key.offset += extent_len;
7419
7420 if (num_bytes == 0)
7421 break;
7422 }
7423 BUG_ON(i >= nr_extents);
7424#endif
7425 }
7426
7427 if (extent_locked) {
7428 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7429 lock_end, GFP_NOFS);
7430 extent_locked = 0;
7431 }
7432skip:
7433 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
7434 key.offset >= search_end)
7435 break;
7436
7437 cond_resched();
7438 }
7439 ret = 0;
7440out:
7441 btrfs_release_path(root, path);
7442 if (inode) {
7443 mutex_unlock(&inode->i_mutex);
7444 if (extent_locked) {
7445 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7446 lock_end, GFP_NOFS);
7447 }
7448 iput(inode);
7449 }
7450 return ret;
7451}
7452
7453int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
7454 struct btrfs_root *root,
7455 struct extent_buffer *buf, u64 orig_start)
7456{
7457 int level;
7458 int ret;
7459
7460 BUG_ON(btrfs_header_generation(buf) != trans->transid);
7461 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
7462
7463 level = btrfs_header_level(buf);
7464 if (level == 0) {
7465 struct btrfs_leaf_ref *ref;
7466 struct btrfs_leaf_ref *orig_ref;
7467
7468 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
7469 if (!orig_ref)
7470 return -ENOENT;
7471
7472 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
7473 if (!ref) {
7474 btrfs_free_leaf_ref(root, orig_ref);
7475 return -ENOMEM;
7476 }
7477
7478 ref->nritems = orig_ref->nritems;
7479 memcpy(ref->extents, orig_ref->extents,
7480 sizeof(ref->extents[0]) * ref->nritems);
7481
7482 btrfs_free_leaf_ref(root, orig_ref);
7483
7484 ref->root_gen = trans->transid;
7485 ref->bytenr = buf->start;
7486 ref->owner = btrfs_header_owner(buf);
7487 ref->generation = btrfs_header_generation(buf);
7488
7489 ret = btrfs_add_leaf_ref(root, ref, 0);
7490 WARN_ON(ret);
7491 btrfs_free_leaf_ref(root, ref);
7492 }
7493 return 0;
7494}
7495
7496static noinline int invalidate_extent_cache(struct btrfs_root *root,
7497 struct extent_buffer *leaf,
7498 struct btrfs_block_group_cache *group,
7499 struct btrfs_root *target_root)
7500{
7501 struct btrfs_key key;
7502 struct inode *inode = NULL;
7503 struct btrfs_file_extent_item *fi;
7504 struct extent_state *cached_state = NULL;
7505 u64 num_bytes;
7506 u64 skip_objectid = 0;
7507 u32 nritems;
7508 u32 i;
7509
7510 nritems = btrfs_header_nritems(leaf);
7511 for (i = 0; i < nritems; i++) {
7512 btrfs_item_key_to_cpu(leaf, &key, i);
7513 if (key.objectid == skip_objectid ||
7514 key.type != BTRFS_EXTENT_DATA_KEY)
7515 continue;
7516 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
7517 if (btrfs_file_extent_type(leaf, fi) ==
7518 BTRFS_FILE_EXTENT_INLINE)
7519 continue;
7520 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
7521 continue;
7522 if (!inode || inode->i_ino != key.objectid) {
7523 iput(inode);
7524 inode = btrfs_ilookup(target_root->fs_info->sb,
7525 key.objectid, target_root, 1);
7526 }
7527 if (!inode) {
7528 skip_objectid = key.objectid;
7529 continue;
7530 }
7531 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7532
7533 lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset,
7534 key.offset + num_bytes - 1, 0, &cached_state,
7535 GFP_NOFS);
7536 btrfs_drop_extent_cache(inode, key.offset,
7537 key.offset + num_bytes - 1, 1);
7538 unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset,
7539 key.offset + num_bytes - 1, &cached_state,
7540 GFP_NOFS);
7541 cond_resched();
7542 }
7543 iput(inode);
7544 return 0;
7545}
7546
7547static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
7548 struct btrfs_root *root,
7549 struct extent_buffer *leaf,
7550 struct btrfs_block_group_cache *group,
7551 struct inode *reloc_inode)
7552{
7553 struct btrfs_key key;
7554 struct btrfs_key extent_key;
7555 struct btrfs_file_extent_item *fi;
7556 struct btrfs_leaf_ref *ref;
7557 struct disk_extent *new_extent;
7558 u64 bytenr;
7559 u64 num_bytes;
7560 u32 nritems;
7561 u32 i;
7562 int ext_index;
7563 int nr_extent;
7564 int ret;
7565
7566 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
7567 if (!new_extent)
7568 return -ENOMEM;
7569
7570 ref = btrfs_lookup_leaf_ref(root, leaf->start);
7571 BUG_ON(!ref);
7572
7573 ext_index = -1;
7574 nritems = btrfs_header_nritems(leaf);
7575 for (i = 0; i < nritems; i++) {
7576 btrfs_item_key_to_cpu(leaf, &key, i);
7577 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
7578 continue;
7579 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
7580 if (btrfs_file_extent_type(leaf, fi) ==
7581 BTRFS_FILE_EXTENT_INLINE)
7582 continue;
7583 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7584 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7585 if (bytenr == 0)
7586 continue;
7587
7588 ext_index++;
7589 if (bytenr >= group->key.objectid + group->key.offset ||
7590 bytenr + num_bytes <= group->key.objectid)
7591 continue;
7592
7593 extent_key.objectid = bytenr;
7594 extent_key.offset = num_bytes;
7595 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
7596 nr_extent = 1;
7597 ret = get_new_locations(reloc_inode, &extent_key,
7598 group->key.objectid, 1,
7599 &new_extent, &nr_extent);
7600 if (ret > 0)
7601 continue;
7602 BUG_ON(ret < 0);
7603
7604 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
7605 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
7606 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
7607 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
7608
7609 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7610 new_extent->disk_bytenr);
7611 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7612 new_extent->disk_num_bytes);
7613 btrfs_mark_buffer_dirty(leaf);
7614
7615 ret = btrfs_inc_extent_ref(trans, root,
7616 new_extent->disk_bytenr,
7617 new_extent->disk_num_bytes,
7618 leaf->start,
7619 root->root_key.objectid,
7620 trans->transid, key.objectid);
7621 BUG_ON(ret);
7622
7623 ret = btrfs_free_extent(trans, root,
7624 bytenr, num_bytes, leaf->start,
7625 btrfs_header_owner(leaf),
7626 btrfs_header_generation(leaf),
7627 key.objectid, 0);
7628 BUG_ON(ret);
7629 cond_resched();
7630 }
7631 kfree(new_extent);
7632 BUG_ON(ext_index + 1 != ref->nritems);
7633 btrfs_free_leaf_ref(root, ref);
7634 return 0;
7635}
7636
7637int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
7638 struct btrfs_root *root)
7639{
7640 struct btrfs_root *reloc_root;
7641 int ret;
7642
7643 if (root->reloc_root) {
7644 reloc_root = root->reloc_root;
7645 root->reloc_root = NULL;
7646 list_add(&reloc_root->dead_list,
7647 &root->fs_info->dead_reloc_roots);
7648
7649 btrfs_set_root_bytenr(&reloc_root->root_item,
7650 reloc_root->node->start);
7651 btrfs_set_root_level(&root->root_item,
7652 btrfs_header_level(reloc_root->node));
7653 memset(&reloc_root->root_item.drop_progress, 0,
7654 sizeof(struct btrfs_disk_key));
7655 reloc_root->root_item.drop_level = 0;
7656
7657 ret = btrfs_update_root(trans, root->fs_info->tree_root,
7658 &reloc_root->root_key,
7659 &reloc_root->root_item);
7660 BUG_ON(ret);
7661 }
7662 return 0;
7663}
7664
7665int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
7666{
7667 struct btrfs_trans_handle *trans;
7668 struct btrfs_root *reloc_root;
7669 struct btrfs_root *prev_root = NULL;
7670 struct list_head dead_roots;
7671 int ret;
7672 unsigned long nr;
7673
7674 INIT_LIST_HEAD(&dead_roots);
7675 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
7676
7677 while (!list_empty(&dead_roots)) {
7678 reloc_root = list_entry(dead_roots.prev,
7679 struct btrfs_root, dead_list);
7680 list_del_init(&reloc_root->dead_list);
7681
7682 BUG_ON(reloc_root->commit_root != NULL);
7683 while (1) {
7684 trans = btrfs_join_transaction(root);
7685 BUG_ON(IS_ERR(trans));
7686
7687 mutex_lock(&root->fs_info->drop_mutex);
7688 ret = btrfs_drop_snapshot(trans, reloc_root);
7689 if (ret != -EAGAIN)
7690 break;
7691 mutex_unlock(&root->fs_info->drop_mutex);
7692
7693 nr = trans->blocks_used;
7694 ret = btrfs_end_transaction(trans, root);
7695 BUG_ON(ret);
7696 btrfs_btree_balance_dirty(root, nr);
7697 }
7698
7699 free_extent_buffer(reloc_root->node);
7700
7701 ret = btrfs_del_root(trans, root->fs_info->tree_root,
7702 &reloc_root->root_key);
7703 BUG_ON(ret);
7704 mutex_unlock(&root->fs_info->drop_mutex);
7705
7706 nr = trans->blocks_used;
7707 ret = btrfs_end_transaction(trans, root);
7708 BUG_ON(ret);
7709 btrfs_btree_balance_dirty(root, nr);
7710
7711 kfree(prev_root);
7712 prev_root = reloc_root;
7713 }
7714 if (prev_root) {
7715 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
7716 kfree(prev_root);
7717 }
7718 return 0;
7719}
7720
7721int btrfs_add_dead_reloc_root(struct btrfs_root *root)
7722{
7723 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
7724 return 0;
7725}
7726
7727int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
7728{
7729 struct btrfs_root *reloc_root;
7730 struct btrfs_trans_handle *trans;
7731 struct btrfs_key location;
7732 int found;
7733 int ret;
7734
7735 mutex_lock(&root->fs_info->tree_reloc_mutex);
7736 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
7737 BUG_ON(ret);
7738 found = !list_empty(&root->fs_info->dead_reloc_roots);
7739 mutex_unlock(&root->fs_info->tree_reloc_mutex);
7740
7741 if (found) {
7742 trans = btrfs_start_transaction(root, 1);
7743 BUG_ON(IS_ERR(trans));
7744 ret = btrfs_commit_transaction(trans, root);
7745 BUG_ON(ret);
7746 }
7747
7748 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
7749 location.offset = (u64)-1;
7750 location.type = BTRFS_ROOT_ITEM_KEY;
7751
7752 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
7753 BUG_ON(!reloc_root);
7754 ret = btrfs_orphan_cleanup(reloc_root);
7755 BUG_ON(ret);
7756 return 0;
7757}
7758
7759static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
7760 struct btrfs_root *root)
7761{
7762 struct btrfs_root *reloc_root;
7763 struct extent_buffer *eb;
7764 struct btrfs_root_item *root_item;
7765 struct btrfs_key root_key;
7766 int ret;
7767
7768 BUG_ON(!root->ref_cows);
7769 if (root->reloc_root)
7770 return 0;
7771
7772 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
7773 if (!root_item)
7774 return -ENOMEM;
7775
7776 ret = btrfs_copy_root(trans, root, root->commit_root,
7777 &eb, BTRFS_TREE_RELOC_OBJECTID);
7778 BUG_ON(ret);
7779
7780 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
7781 root_key.offset = root->root_key.objectid;
7782 root_key.type = BTRFS_ROOT_ITEM_KEY;
7783
7784 memcpy(root_item, &root->root_item, sizeof(root_item));
7785 btrfs_set_root_refs(root_item, 0);
7786 btrfs_set_root_bytenr(root_item, eb->start);
7787 btrfs_set_root_level(root_item, btrfs_header_level(eb));
7788 btrfs_set_root_generation(root_item, trans->transid);
7789
7790 btrfs_tree_unlock(eb);
7791 free_extent_buffer(eb);
7792
7793 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
7794 &root_key, root_item);
7795 BUG_ON(ret);
7796 kfree(root_item);
7797
7798 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
7799 &root_key);
7800 BUG_ON(IS_ERR(reloc_root));
7801 reloc_root->last_trans = trans->transid;
7802 reloc_root->commit_root = NULL;
7803 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
7804
7805 root->reloc_root = reloc_root;
7806 return 0;
7807}
7808
7809/*
7810 * Core function of space balance.
7811 *
7812 * The idea is using reloc trees to relocate tree blocks in reference
7813 * counted roots. There is one reloc tree for each subvol, and all
7814 * reloc trees share same root key objectid. Reloc trees are snapshots
7815 * of the latest committed roots of subvols (root->commit_root).
7816 *
7817 * To relocate a tree block referenced by a subvol, there are two steps.
7818 * COW the block through subvol's reloc tree, then update block pointer
7819 * in the subvol to point to the new block. Since all reloc trees share
7820 * same root key objectid, doing special handing for tree blocks owned
7821 * by them is easy. Once a tree block has been COWed in one reloc tree,
7822 * we can use the resulting new block directly when the same block is
7823 * required to COW again through other reloc trees. By this way, relocated
7824 * tree blocks are shared between reloc trees, so they are also shared
7825 * between subvols.
7826 */
7827static noinline int relocate_one_path(struct btrfs_trans_handle *trans,
7828 struct btrfs_root *root,
7829 struct btrfs_path *path,
7830 struct btrfs_key *first_key,
7831 struct btrfs_ref_path *ref_path,
7832 struct btrfs_block_group_cache *group,
7833 struct inode *reloc_inode)
7834{
7835 struct btrfs_root *reloc_root;
7836 struct extent_buffer *eb = NULL;
7837 struct btrfs_key *keys;
7838 u64 *nodes;
7839 int level;
7840 int shared_level;
7841 int lowest_level = 0;
7842 int ret;
7843
7844 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
7845 lowest_level = ref_path->owner_objectid;
7846
7847 if (!root->ref_cows) {
7848 path->lowest_level = lowest_level;
7849 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
7850 BUG_ON(ret < 0);
7851 path->lowest_level = 0;
7852 btrfs_release_path(root, path);
7853 return 0;
7854 }
7855
7856 mutex_lock(&root->fs_info->tree_reloc_mutex);
7857 ret = init_reloc_tree(trans, root);
7858 BUG_ON(ret);
7859 reloc_root = root->reloc_root;
7860
7861 shared_level = ref_path->shared_level;
7862 ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
7863
7864 keys = ref_path->node_keys;
7865 nodes = ref_path->new_nodes;
7866 memset(&keys[shared_level + 1], 0,
7867 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
7868 memset(&nodes[shared_level + 1], 0,
7869 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
7870
7871 if (nodes[lowest_level] == 0) {
7872 path->lowest_level = lowest_level;
7873 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7874 0, 1);
7875 BUG_ON(ret);
7876 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
7877 eb = path->nodes[level];
7878 if (!eb || eb == reloc_root->node)
7879 break;
7880 nodes[level] = eb->start;
7881 if (level == 0)
7882 btrfs_item_key_to_cpu(eb, &keys[level], 0);
7883 else
7884 btrfs_node_key_to_cpu(eb, &keys[level], 0);
7885 }
7886 if (nodes[0] &&
7887 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7888 eb = path->nodes[0];
7889 ret = replace_extents_in_leaf(trans, reloc_root, eb,
7890 group, reloc_inode);
7891 BUG_ON(ret);
7892 }
7893 btrfs_release_path(reloc_root, path);
7894 } else {
7895 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
7896 lowest_level);
7897 BUG_ON(ret);
7898 }
7899
7900 /*
7901 * replace tree blocks in the fs tree with tree blocks in
7902 * the reloc tree.
7903 */
7904 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
7905 BUG_ON(ret < 0);
7906
7907 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7908 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7909 0, 0);
7910 BUG_ON(ret);
7911 extent_buffer_get(path->nodes[0]);
7912 eb = path->nodes[0];
7913 btrfs_release_path(reloc_root, path);
7914 ret = invalidate_extent_cache(reloc_root, eb, group, root);
7915 BUG_ON(ret);
7916 free_extent_buffer(eb);
7917 }
7918
7919 mutex_unlock(&root->fs_info->tree_reloc_mutex);
7920 path->lowest_level = 0;
7921 return 0;
7922}
7923
7924static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
7925 struct btrfs_root *root,
7926 struct btrfs_path *path,
7927 struct btrfs_key *first_key,
7928 struct btrfs_ref_path *ref_path)
7929{
7930 int ret;
7931
7932 ret = relocate_one_path(trans, root, path, first_key,
7933 ref_path, NULL, NULL);
7934 BUG_ON(ret);
7935
7936 return 0;
7937}
7938
7939static noinline int del_extent_zero(struct btrfs_trans_handle *trans,
7940 struct btrfs_root *extent_root,
7941 struct btrfs_path *path,
7942 struct btrfs_key *extent_key)
7943{
7944 int ret;
7945
7946 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
7947 if (ret)
7948 goto out;
7949 ret = btrfs_del_item(trans, extent_root, path);
7950out:
7951 btrfs_release_path(extent_root, path);
7952 return ret;
7953}
7954
7955static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info,
7956 struct btrfs_ref_path *ref_path)
7957{
7958 struct btrfs_key root_key;
7959
7960 root_key.objectid = ref_path->root_objectid;
7961 root_key.type = BTRFS_ROOT_ITEM_KEY;
7962 if (is_cowonly_root(ref_path->root_objectid))
7963 root_key.offset = 0;
7964 else
7965 root_key.offset = (u64)-1;
7966
7967 return btrfs_read_fs_root_no_name(fs_info, &root_key);
7968}
7969
7970static noinline int relocate_one_extent(struct btrfs_root *extent_root,
7971 struct btrfs_path *path,
7972 struct btrfs_key *extent_key,
7973 struct btrfs_block_group_cache *group,
7974 struct inode *reloc_inode, int pass)
7975{
7976 struct btrfs_trans_handle *trans;
7977 struct btrfs_root *found_root;
7978 struct btrfs_ref_path *ref_path = NULL;
7979 struct disk_extent *new_extents = NULL;
7980 int nr_extents = 0;
7981 int loops;
7982 int ret;
7983 int level;
7984 struct btrfs_key first_key;
7985 u64 prev_block = 0;
7986
7987
7988 trans = btrfs_start_transaction(extent_root, 1);
7989 BUG_ON(IS_ERR(trans));
7990
7991 if (extent_key->objectid == 0) {
7992 ret = del_extent_zero(trans, extent_root, path, extent_key);
7993 goto out;
7994 }
7995
7996 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
7997 if (!ref_path) {
7998 ret = -ENOMEM;
7999 goto out;
8000 }
8001
8002 for (loops = 0; ; loops++) {
8003 if (loops == 0) {
8004 ret = btrfs_first_ref_path(trans, extent_root, ref_path,
8005 extent_key->objectid);
8006 } else {
8007 ret = btrfs_next_ref_path(trans, extent_root, ref_path);
8008 }
8009 if (ret < 0)
8010 goto out;
8011 if (ret > 0)
8012 break;
8013
8014 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
8015 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
8016 continue;
8017
8018 found_root = read_ref_root(extent_root->fs_info, ref_path);
8019 BUG_ON(!found_root);
8020 /*
8021 * for reference counted tree, only process reference paths
8022 * rooted at the latest committed root.
8023 */
8024 if (found_root->ref_cows &&
8025 ref_path->root_generation != found_root->root_key.offset)
8026 continue;
8027
8028 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
8029 if (pass == 0) {
8030 /*
8031 * copy data extents to new locations
8032 */
8033 u64 group_start = group->key.objectid;
8034 ret = relocate_data_extent(reloc_inode,
8035 extent_key,
8036 group_start);
8037 if (ret < 0)
8038 goto out;
8039 break;
8040 }
8041 level = 0;
8042 } else {
8043 level = ref_path->owner_objectid;
8044 }
8045
8046 if (prev_block != ref_path->nodes[level]) {
8047 struct extent_buffer *eb;
8048 u64 block_start = ref_path->nodes[level];
8049 u64 block_size = btrfs_level_size(found_root, level);
8050
8051 eb = read_tree_block(found_root, block_start,
8052 block_size, 0);
8053 if (!eb) {
8054 ret = -EIO;
8055 goto out;
8056 }
8057 btrfs_tree_lock(eb);
8058 BUG_ON(level != btrfs_header_level(eb));
8059
8060 if (level == 0)
8061 btrfs_item_key_to_cpu(eb, &first_key, 0);
8062 else
8063 btrfs_node_key_to_cpu(eb, &first_key, 0);
8064
8065 btrfs_tree_unlock(eb);
8066 free_extent_buffer(eb);
8067 prev_block = block_start;
8068 }
8069
8070 mutex_lock(&extent_root->fs_info->trans_mutex);
8071 btrfs_record_root_in_trans(found_root);
8072 mutex_unlock(&extent_root->fs_info->trans_mutex);
8073 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
8074 /*
8075 * try to update data extent references while
8076 * keeping metadata shared between snapshots.
8077 */
8078 if (pass == 1) {
8079 ret = relocate_one_path(trans, found_root,
8080 path, &first_key, ref_path,
8081 group, reloc_inode);
8082 if (ret < 0)
8083 goto out;
8084 continue;
8085 }
8086 /*
8087 * use fallback method to process the remaining
8088 * references.
8089 */
8090 if (!new_extents) {
8091 u64 group_start = group->key.objectid;
8092 new_extents = kmalloc(sizeof(*new_extents),
8093 GFP_NOFS);
8094 if (!new_extents) {
8095 ret = -ENOMEM;
8096 goto out;
8097 }
8098 nr_extents = 1;
8099 ret = get_new_locations(reloc_inode,
8100 extent_key,
8101 group_start, 1,
8102 &new_extents,
8103 &nr_extents);
8104 if (ret)
8105 goto out;
8106 }
8107 ret = replace_one_extent(trans, found_root,
8108 path, extent_key,
8109 &first_key, ref_path,
8110 new_extents, nr_extents);
8111 } else {
8112 ret = relocate_tree_block(trans, found_root, path,
8113 &first_key, ref_path);
8114 }
8115 if (ret < 0)
8116 goto out;
8117 }
8118 ret = 0;
8119out:
8120 btrfs_end_transaction(trans, extent_root);
8121 kfree(new_extents);
8122 kfree(ref_path);
8123 return ret;
8124}
8125#endif
8126
8127static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6478static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
8128{ 6479{
8129 u64 num_devices; 6480 u64 num_devices;
@@ -8588,10 +6939,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8588 ret = -ENOMEM; 6939 ret = -ENOMEM;
8589 goto error; 6940 goto error;
8590 } 6941 }
6942 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
6943 GFP_NOFS);
6944 if (!cache->free_space_ctl) {
6945 kfree(cache);
6946 ret = -ENOMEM;
6947 goto error;
6948 }
8591 6949
8592 atomic_set(&cache->count, 1); 6950 atomic_set(&cache->count, 1);
8593 spin_lock_init(&cache->lock); 6951 spin_lock_init(&cache->lock);
8594 spin_lock_init(&cache->tree_lock);
8595 cache->fs_info = info; 6952 cache->fs_info = info;
8596 INIT_LIST_HEAD(&cache->list); 6953 INIT_LIST_HEAD(&cache->list);
8597 INIT_LIST_HEAD(&cache->cluster_list); 6954 INIT_LIST_HEAD(&cache->cluster_list);
@@ -8599,24 +6956,18 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8599 if (need_clear) 6956 if (need_clear)
8600 cache->disk_cache_state = BTRFS_DC_CLEAR; 6957 cache->disk_cache_state = BTRFS_DC_CLEAR;
8601 6958
8602 /*
8603 * we only want to have 32k of ram per block group for keeping
8604 * track of free space, and if we pass 1/2 of that we want to
8605 * start converting things over to using bitmaps
8606 */
8607 cache->extents_thresh = ((1024 * 32) / 2) /
8608 sizeof(struct btrfs_free_space);
8609
8610 read_extent_buffer(leaf, &cache->item, 6959 read_extent_buffer(leaf, &cache->item,
8611 btrfs_item_ptr_offset(leaf, path->slots[0]), 6960 btrfs_item_ptr_offset(leaf, path->slots[0]),
8612 sizeof(cache->item)); 6961 sizeof(cache->item));
8613 memcpy(&cache->key, &found_key, sizeof(found_key)); 6962 memcpy(&cache->key, &found_key, sizeof(found_key));
8614 6963
8615 key.objectid = found_key.objectid + found_key.offset; 6964 key.objectid = found_key.objectid + found_key.offset;
8616 btrfs_release_path(root, path); 6965 btrfs_release_path(path);
8617 cache->flags = btrfs_block_group_flags(&cache->item); 6966 cache->flags = btrfs_block_group_flags(&cache->item);
8618 cache->sectorsize = root->sectorsize; 6967 cache->sectorsize = root->sectorsize;
8619 6968
6969 btrfs_init_free_space_ctl(cache);
6970
8620 /* 6971 /*
8621 * We need to exclude the super stripes now so that the space 6972 * We need to exclude the super stripes now so that the space
8622 * info has super bytes accounted for, otherwise we'll think 6973 * info has super bytes accounted for, otherwise we'll think
@@ -8703,6 +7054,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8703 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7054 cache = kzalloc(sizeof(*cache), GFP_NOFS);
8704 if (!cache) 7055 if (!cache)
8705 return -ENOMEM; 7056 return -ENOMEM;
7057 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
7058 GFP_NOFS);
7059 if (!cache->free_space_ctl) {
7060 kfree(cache);
7061 return -ENOMEM;
7062 }
8706 7063
8707 cache->key.objectid = chunk_offset; 7064 cache->key.objectid = chunk_offset;
8708 cache->key.offset = size; 7065 cache->key.offset = size;
@@ -8710,19 +7067,13 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8710 cache->sectorsize = root->sectorsize; 7067 cache->sectorsize = root->sectorsize;
8711 cache->fs_info = root->fs_info; 7068 cache->fs_info = root->fs_info;
8712 7069
8713 /*
8714 * we only want to have 32k of ram per block group for keeping track
8715 * of free space, and if we pass 1/2 of that we want to start
8716 * converting things over to using bitmaps
8717 */
8718 cache->extents_thresh = ((1024 * 32) / 2) /
8719 sizeof(struct btrfs_free_space);
8720 atomic_set(&cache->count, 1); 7070 atomic_set(&cache->count, 1);
8721 spin_lock_init(&cache->lock); 7071 spin_lock_init(&cache->lock);
8722 spin_lock_init(&cache->tree_lock);
8723 INIT_LIST_HEAD(&cache->list); 7072 INIT_LIST_HEAD(&cache->list);
8724 INIT_LIST_HEAD(&cache->cluster_list); 7073 INIT_LIST_HEAD(&cache->cluster_list);
8725 7074
7075 btrfs_init_free_space_ctl(cache);
7076
8726 btrfs_set_block_group_used(&cache->item, bytes_used); 7077 btrfs_set_block_group_used(&cache->item, bytes_used);
8727 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7078 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
8728 cache->flags = type; 7079 cache->flags = type;
@@ -8835,12 +7186,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8835 if (ret < 0) 7186 if (ret < 0)
8836 goto out; 7187 goto out;
8837 if (ret > 0) 7188 if (ret > 0)
8838 btrfs_release_path(tree_root, path); 7189 btrfs_release_path(path);
8839 if (ret == 0) { 7190 if (ret == 0) {
8840 ret = btrfs_del_item(trans, tree_root, path); 7191 ret = btrfs_del_item(trans, tree_root, path);
8841 if (ret) 7192 if (ret)
8842 goto out; 7193 goto out;
8843 btrfs_release_path(tree_root, path); 7194 btrfs_release_path(path);
8844 } 7195 }
8845 7196
8846 spin_lock(&root->fs_info->block_group_cache_lock); 7197 spin_lock(&root->fs_info->block_group_cache_lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b5f6f227a97c..b181a94a7170 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -101,7 +101,7 @@ void extent_io_exit(void)
101} 101}
102 102
103void extent_io_tree_init(struct extent_io_tree *tree, 103void extent_io_tree_init(struct extent_io_tree *tree,
104 struct address_space *mapping, gfp_t mask) 104 struct address_space *mapping)
105{ 105{
106 tree->state = RB_ROOT; 106 tree->state = RB_ROOT;
107 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC); 107 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
@@ -439,6 +439,15 @@ static int clear_state_bit(struct extent_io_tree *tree,
439 return ret; 439 return ret;
440} 440}
441 441
442static struct extent_state *
443alloc_extent_state_atomic(struct extent_state *prealloc)
444{
445 if (!prealloc)
446 prealloc = alloc_extent_state(GFP_ATOMIC);
447
448 return prealloc;
449}
450
442/* 451/*
443 * clear some bits on a range in the tree. This may require splitting 452 * clear some bits on a range in the tree. This may require splitting
444 * or inserting elements in the tree, so the gfp mask is used to 453 * or inserting elements in the tree, so the gfp mask is used to
@@ -529,8 +538,8 @@ hit_next:
529 */ 538 */
530 539
531 if (state->start < start) { 540 if (state->start < start) {
532 if (!prealloc) 541 prealloc = alloc_extent_state_atomic(prealloc);
533 prealloc = alloc_extent_state(GFP_ATOMIC); 542 BUG_ON(!prealloc);
534 err = split_state(tree, state, prealloc, start); 543 err = split_state(tree, state, prealloc, start);
535 BUG_ON(err == -EEXIST); 544 BUG_ON(err == -EEXIST);
536 prealloc = NULL; 545 prealloc = NULL;
@@ -551,8 +560,8 @@ hit_next:
551 * on the first half 560 * on the first half
552 */ 561 */
553 if (state->start <= end && state->end > end) { 562 if (state->start <= end && state->end > end) {
554 if (!prealloc) 563 prealloc = alloc_extent_state_atomic(prealloc);
555 prealloc = alloc_extent_state(GFP_ATOMIC); 564 BUG_ON(!prealloc);
556 err = split_state(tree, state, prealloc, end + 1); 565 err = split_state(tree, state, prealloc, end + 1);
557 BUG_ON(err == -EEXIST); 566 BUG_ON(err == -EEXIST);
558 if (wake) 567 if (wake)
@@ -725,8 +734,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
725again: 734again:
726 if (!prealloc && (mask & __GFP_WAIT)) { 735 if (!prealloc && (mask & __GFP_WAIT)) {
727 prealloc = alloc_extent_state(mask); 736 prealloc = alloc_extent_state(mask);
728 if (!prealloc) 737 BUG_ON(!prealloc);
729 return -ENOMEM;
730 } 738 }
731 739
732 spin_lock(&tree->lock); 740 spin_lock(&tree->lock);
@@ -743,6 +751,8 @@ again:
743 */ 751 */
744 node = tree_search(tree, start); 752 node = tree_search(tree, start);
745 if (!node) { 753 if (!node) {
754 prealloc = alloc_extent_state_atomic(prealloc);
755 BUG_ON(!prealloc);
746 err = insert_state(tree, prealloc, start, end, &bits); 756 err = insert_state(tree, prealloc, start, end, &bits);
747 prealloc = NULL; 757 prealloc = NULL;
748 BUG_ON(err == -EEXIST); 758 BUG_ON(err == -EEXIST);
@@ -771,20 +781,18 @@ hit_next:
771 if (err) 781 if (err)
772 goto out; 782 goto out;
773 783
784 next_node = rb_next(node);
774 cache_state(state, cached_state); 785 cache_state(state, cached_state);
775 merge_state(tree, state); 786 merge_state(tree, state);
776 if (last_end == (u64)-1) 787 if (last_end == (u64)-1)
777 goto out; 788 goto out;
778 789
779 start = last_end + 1; 790 start = last_end + 1;
780 if (start < end && prealloc && !need_resched()) { 791 if (next_node && start < end && prealloc && !need_resched()) {
781 next_node = rb_next(node); 792 state = rb_entry(next_node, struct extent_state,
782 if (next_node) { 793 rb_node);
783 state = rb_entry(next_node, struct extent_state, 794 if (state->start == start)
784 rb_node); 795 goto hit_next;
785 if (state->start == start)
786 goto hit_next;
787 }
788 } 796 }
789 goto search_again; 797 goto search_again;
790 } 798 }
@@ -811,6 +819,9 @@ hit_next:
811 err = -EEXIST; 819 err = -EEXIST;
812 goto out; 820 goto out;
813 } 821 }
822
823 prealloc = alloc_extent_state_atomic(prealloc);
824 BUG_ON(!prealloc);
814 err = split_state(tree, state, prealloc, start); 825 err = split_state(tree, state, prealloc, start);
815 BUG_ON(err == -EEXIST); 826 BUG_ON(err == -EEXIST);
816 prealloc = NULL; 827 prealloc = NULL;
@@ -841,14 +852,25 @@ hit_next:
841 this_end = end; 852 this_end = end;
842 else 853 else
843 this_end = last_start - 1; 854 this_end = last_start - 1;
855
856 prealloc = alloc_extent_state_atomic(prealloc);
857 BUG_ON(!prealloc);
858
859 /*
860 * Avoid to free 'prealloc' if it can be merged with
861 * the later extent.
862 */
863 atomic_inc(&prealloc->refs);
844 err = insert_state(tree, prealloc, start, this_end, 864 err = insert_state(tree, prealloc, start, this_end,
845 &bits); 865 &bits);
846 BUG_ON(err == -EEXIST); 866 BUG_ON(err == -EEXIST);
847 if (err) { 867 if (err) {
868 free_extent_state(prealloc);
848 prealloc = NULL; 869 prealloc = NULL;
849 goto out; 870 goto out;
850 } 871 }
851 cache_state(prealloc, cached_state); 872 cache_state(prealloc, cached_state);
873 free_extent_state(prealloc);
852 prealloc = NULL; 874 prealloc = NULL;
853 start = this_end + 1; 875 start = this_end + 1;
854 goto search_again; 876 goto search_again;
@@ -865,6 +887,9 @@ hit_next:
865 err = -EEXIST; 887 err = -EEXIST;
866 goto out; 888 goto out;
867 } 889 }
890
891 prealloc = alloc_extent_state_atomic(prealloc);
892 BUG_ON(!prealloc);
868 err = split_state(tree, state, prealloc, end + 1); 893 err = split_state(tree, state, prealloc, end + 1);
869 BUG_ON(err == -EEXIST); 894 BUG_ON(err == -EEXIST);
870 895
@@ -941,13 +966,6 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
941 NULL, mask); 966 NULL, mask);
942} 967}
943 968
944static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
945 gfp_t mask)
946{
947 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
948 NULL, mask);
949}
950
951int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 969int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
952 struct extent_state **cached_state, gfp_t mask) 970 struct extent_state **cached_state, gfp_t mask)
953{ 971{
@@ -963,11 +981,6 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
963 cached_state, mask); 981 cached_state, mask);
964} 982}
965 983
966int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
967{
968 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
969}
970
971/* 984/*
972 * either insert or lock state struct between start and end use mask to tell 985 * either insert or lock state struct between start and end use mask to tell
973 * us if waiting is desired. 986 * us if waiting is desired.
@@ -1028,25 +1041,6 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
1028} 1041}
1029 1042
1030/* 1043/*
1031 * helper function to set pages and extents in the tree dirty
1032 */
1033int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
1034{
1035 unsigned long index = start >> PAGE_CACHE_SHIFT;
1036 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1037 struct page *page;
1038
1039 while (index <= end_index) {
1040 page = find_get_page(tree->mapping, index);
1041 BUG_ON(!page);
1042 __set_page_dirty_nobuffers(page);
1043 page_cache_release(page);
1044 index++;
1045 }
1046 return 0;
1047}
1048
1049/*
1050 * helper function to set both pages and extents in the tree writeback 1044 * helper function to set both pages and extents in the tree writeback
1051 */ 1045 */
1052static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) 1046static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@ -1819,46 +1813,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1819 bio_put(bio); 1813 bio_put(bio);
1820} 1814}
1821 1815
1822/*
1823 * IO done from prepare_write is pretty simple, we just unlock
1824 * the structs in the extent tree when done, and set the uptodate bits
1825 * as appropriate.
1826 */
1827static void end_bio_extent_preparewrite(struct bio *bio, int err)
1828{
1829 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1830 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1831 struct extent_io_tree *tree;
1832 u64 start;
1833 u64 end;
1834
1835 do {
1836 struct page *page = bvec->bv_page;
1837 struct extent_state *cached = NULL;
1838 tree = &BTRFS_I(page->mapping->host)->io_tree;
1839
1840 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1841 bvec->bv_offset;
1842 end = start + bvec->bv_len - 1;
1843
1844 if (--bvec >= bio->bi_io_vec)
1845 prefetchw(&bvec->bv_page->flags);
1846
1847 if (uptodate) {
1848 set_extent_uptodate(tree, start, end, &cached,
1849 GFP_ATOMIC);
1850 } else {
1851 ClearPageUptodate(page);
1852 SetPageError(page);
1853 }
1854
1855 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1856
1857 } while (bvec >= bio->bi_io_vec);
1858
1859 bio_put(bio);
1860}
1861
1862struct bio * 1816struct bio *
1863btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 1817btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1864 gfp_t gfp_flags) 1818 gfp_t gfp_flags)
@@ -2007,7 +1961,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2007 struct btrfs_ordered_extent *ordered; 1961 struct btrfs_ordered_extent *ordered;
2008 int ret; 1962 int ret;
2009 int nr = 0; 1963 int nr = 0;
2010 size_t page_offset = 0; 1964 size_t pg_offset = 0;
2011 size_t iosize; 1965 size_t iosize;
2012 size_t disk_io_size; 1966 size_t disk_io_size;
2013 size_t blocksize = inode->i_sb->s_blocksize; 1967 size_t blocksize = inode->i_sb->s_blocksize;
@@ -2043,9 +1997,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2043 char *userpage; 1997 char *userpage;
2044 struct extent_state *cached = NULL; 1998 struct extent_state *cached = NULL;
2045 1999
2046 iosize = PAGE_CACHE_SIZE - page_offset; 2000 iosize = PAGE_CACHE_SIZE - pg_offset;
2047 userpage = kmap_atomic(page, KM_USER0); 2001 userpage = kmap_atomic(page, KM_USER0);
2048 memset(userpage + page_offset, 0, iosize); 2002 memset(userpage + pg_offset, 0, iosize);
2049 flush_dcache_page(page); 2003 flush_dcache_page(page);
2050 kunmap_atomic(userpage, KM_USER0); 2004 kunmap_atomic(userpage, KM_USER0);
2051 set_extent_uptodate(tree, cur, cur + iosize - 1, 2005 set_extent_uptodate(tree, cur, cur + iosize - 1,
@@ -2054,9 +2008,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2054 &cached, GFP_NOFS); 2008 &cached, GFP_NOFS);
2055 break; 2009 break;
2056 } 2010 }
2057 em = get_extent(inode, page, page_offset, cur, 2011 em = get_extent(inode, page, pg_offset, cur,
2058 end - cur + 1, 0); 2012 end - cur + 1, 0);
2059 if (IS_ERR(em) || !em) { 2013 if (IS_ERR_OR_NULL(em)) {
2060 SetPageError(page); 2014 SetPageError(page);
2061 unlock_extent(tree, cur, end, GFP_NOFS); 2015 unlock_extent(tree, cur, end, GFP_NOFS);
2062 break; 2016 break;
@@ -2094,7 +2048,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2094 struct extent_state *cached = NULL; 2048 struct extent_state *cached = NULL;
2095 2049
2096 userpage = kmap_atomic(page, KM_USER0); 2050 userpage = kmap_atomic(page, KM_USER0);
2097 memset(userpage + page_offset, 0, iosize); 2051 memset(userpage + pg_offset, 0, iosize);
2098 flush_dcache_page(page); 2052 flush_dcache_page(page);
2099 kunmap_atomic(userpage, KM_USER0); 2053 kunmap_atomic(userpage, KM_USER0);
2100 2054
@@ -2103,7 +2057,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2103 unlock_extent_cached(tree, cur, cur + iosize - 1, 2057 unlock_extent_cached(tree, cur, cur + iosize - 1,
2104 &cached, GFP_NOFS); 2058 &cached, GFP_NOFS);
2105 cur = cur + iosize; 2059 cur = cur + iosize;
2106 page_offset += iosize; 2060 pg_offset += iosize;
2107 continue; 2061 continue;
2108 } 2062 }
2109 /* the get_extent function already copied into the page */ 2063 /* the get_extent function already copied into the page */
@@ -2112,7 +2066,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2112 check_page_uptodate(tree, page); 2066 check_page_uptodate(tree, page);
2113 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2067 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2114 cur = cur + iosize; 2068 cur = cur + iosize;
2115 page_offset += iosize; 2069 pg_offset += iosize;
2116 continue; 2070 continue;
2117 } 2071 }
2118 /* we have an inline extent but it didn't get marked up 2072 /* we have an inline extent but it didn't get marked up
@@ -2122,7 +2076,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2122 SetPageError(page); 2076 SetPageError(page);
2123 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2077 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2124 cur = cur + iosize; 2078 cur = cur + iosize;
2125 page_offset += iosize; 2079 pg_offset += iosize;
2126 continue; 2080 continue;
2127 } 2081 }
2128 2082
@@ -2135,7 +2089,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2135 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; 2089 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2136 pnr -= page->index; 2090 pnr -= page->index;
2137 ret = submit_extent_page(READ, tree, page, 2091 ret = submit_extent_page(READ, tree, page,
2138 sector, disk_io_size, page_offset, 2092 sector, disk_io_size, pg_offset,
2139 bdev, bio, pnr, 2093 bdev, bio, pnr,
2140 end_bio_extent_readpage, mirror_num, 2094 end_bio_extent_readpage, mirror_num,
2141 *bio_flags, 2095 *bio_flags,
@@ -2146,7 +2100,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2146 if (ret) 2100 if (ret)
2147 SetPageError(page); 2101 SetPageError(page);
2148 cur = cur + iosize; 2102 cur = cur + iosize;
2149 page_offset += iosize; 2103 pg_offset += iosize;
2150 } 2104 }
2151 if (!nr) { 2105 if (!nr) {
2152 if (!PageError(page)) 2106 if (!PageError(page))
@@ -2341,7 +2295,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2341 } 2295 }
2342 em = epd->get_extent(inode, page, pg_offset, cur, 2296 em = epd->get_extent(inode, page, pg_offset, cur,
2343 end - cur + 1, 1); 2297 end - cur + 1, 1);
2344 if (IS_ERR(em) || !em) { 2298 if (IS_ERR_OR_NULL(em)) {
2345 SetPageError(page); 2299 SetPageError(page);
2346 break; 2300 break;
2347 } 2301 }
@@ -2720,128 +2674,6 @@ int extent_invalidatepage(struct extent_io_tree *tree,
2720} 2674}
2721 2675
2722/* 2676/*
2723 * simple commit_write call, set_range_dirty is used to mark both
2724 * the pages and the extent records as dirty
2725 */
2726int extent_commit_write(struct extent_io_tree *tree,
2727 struct inode *inode, struct page *page,
2728 unsigned from, unsigned to)
2729{
2730 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2731
2732 set_page_extent_mapped(page);
2733 set_page_dirty(page);
2734
2735 if (pos > inode->i_size) {
2736 i_size_write(inode, pos);
2737 mark_inode_dirty(inode);
2738 }
2739 return 0;
2740}
2741
2742int extent_prepare_write(struct extent_io_tree *tree,
2743 struct inode *inode, struct page *page,
2744 unsigned from, unsigned to, get_extent_t *get_extent)
2745{
2746 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2747 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
2748 u64 block_start;
2749 u64 orig_block_start;
2750 u64 block_end;
2751 u64 cur_end;
2752 struct extent_map *em;
2753 unsigned blocksize = 1 << inode->i_blkbits;
2754 size_t page_offset = 0;
2755 size_t block_off_start;
2756 size_t block_off_end;
2757 int err = 0;
2758 int iocount = 0;
2759 int ret = 0;
2760 int isnew;
2761
2762 set_page_extent_mapped(page);
2763
2764 block_start = (page_start + from) & ~((u64)blocksize - 1);
2765 block_end = (page_start + to - 1) | (blocksize - 1);
2766 orig_block_start = block_start;
2767
2768 lock_extent(tree, page_start, page_end, GFP_NOFS);
2769 while (block_start <= block_end) {
2770 em = get_extent(inode, page, page_offset, block_start,
2771 block_end - block_start + 1, 1);
2772 if (IS_ERR(em) || !em)
2773 goto err;
2774
2775 cur_end = min(block_end, extent_map_end(em) - 1);
2776 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
2777 block_off_end = block_off_start + blocksize;
2778 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
2779
2780 if (!PageUptodate(page) && isnew &&
2781 (block_off_end > to || block_off_start < from)) {
2782 void *kaddr;
2783
2784 kaddr = kmap_atomic(page, KM_USER0);
2785 if (block_off_end > to)
2786 memset(kaddr + to, 0, block_off_end - to);
2787 if (block_off_start < from)
2788 memset(kaddr + block_off_start, 0,
2789 from - block_off_start);
2790 flush_dcache_page(page);
2791 kunmap_atomic(kaddr, KM_USER0);
2792 }
2793 if ((em->block_start != EXTENT_MAP_HOLE &&
2794 em->block_start != EXTENT_MAP_INLINE) &&
2795 !isnew && !PageUptodate(page) &&
2796 (block_off_end > to || block_off_start < from) &&
2797 !test_range_bit(tree, block_start, cur_end,
2798 EXTENT_UPTODATE, 1, NULL)) {
2799 u64 sector;
2800 u64 extent_offset = block_start - em->start;
2801 size_t iosize;
2802 sector = (em->block_start + extent_offset) >> 9;
2803 iosize = (cur_end - block_start + blocksize) &
2804 ~((u64)blocksize - 1);
2805 /*
2806 * we've already got the extent locked, but we
2807 * need to split the state such that our end_bio
2808 * handler can clear the lock.
2809 */
2810 set_extent_bit(tree, block_start,
2811 block_start + iosize - 1,
2812 EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
2813 ret = submit_extent_page(READ, tree, page,
2814 sector, iosize, page_offset, em->bdev,
2815 NULL, 1,
2816 end_bio_extent_preparewrite, 0,
2817 0, 0);
2818 if (ret && !err)
2819 err = ret;
2820 iocount++;
2821 block_start = block_start + iosize;
2822 } else {
2823 struct extent_state *cached = NULL;
2824
2825 set_extent_uptodate(tree, block_start, cur_end, &cached,
2826 GFP_NOFS);
2827 unlock_extent_cached(tree, block_start, cur_end,
2828 &cached, GFP_NOFS);
2829 block_start = cur_end + 1;
2830 }
2831 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
2832 free_extent_map(em);
2833 }
2834 if (iocount) {
2835 wait_extent_bit(tree, orig_block_start,
2836 block_end, EXTENT_LOCKED);
2837 }
2838 check_page_uptodate(tree, page);
2839err:
2840 /* FIXME, zero out newly allocated blocks on error */
2841 return err;
2842}
2843
2844/*
2845 * a helper for releasepage, this tests for areas of the page that 2677 * a helper for releasepage, this tests for areas of the page that
2846 * are locked or under IO and drops the related state bits if it is safe 2678 * are locked or under IO and drops the related state bits if it is safe
2847 * to drop the page. 2679 * to drop the page.
@@ -2899,7 +2731,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2899 len = end - start + 1; 2731 len = end - start + 1;
2900 write_lock(&map->lock); 2732 write_lock(&map->lock);
2901 em = lookup_extent_mapping(map, start, len); 2733 em = lookup_extent_mapping(map, start, len);
2902 if (!em || IS_ERR(em)) { 2734 if (IS_ERR_OR_NULL(em)) {
2903 write_unlock(&map->lock); 2735 write_unlock(&map->lock);
2904 break; 2736 break;
2905 } 2737 }
@@ -2927,33 +2759,6 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2927 return try_release_extent_state(map, tree, page, mask); 2759 return try_release_extent_state(map, tree, page, mask);
2928} 2760}
2929 2761
2930sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
2931 get_extent_t *get_extent)
2932{
2933 struct inode *inode = mapping->host;
2934 struct extent_state *cached_state = NULL;
2935 u64 start = iblock << inode->i_blkbits;
2936 sector_t sector = 0;
2937 size_t blksize = (1 << inode->i_blkbits);
2938 struct extent_map *em;
2939
2940 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
2941 0, &cached_state, GFP_NOFS);
2942 em = get_extent(inode, NULL, 0, start, blksize, 0);
2943 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start,
2944 start + blksize - 1, &cached_state, GFP_NOFS);
2945 if (!em || IS_ERR(em))
2946 return 0;
2947
2948 if (em->block_start > EXTENT_MAP_LAST_BYTE)
2949 goto out;
2950
2951 sector = (em->block_start + start - em->start) >> inode->i_blkbits;
2952out:
2953 free_extent_map(em);
2954 return sector;
2955}
2956
2957/* 2762/*
2958 * helper function for fiemap, which doesn't want to see any holes. 2763 * helper function for fiemap, which doesn't want to see any holes.
2959 * This maps until we find something past 'last' 2764 * This maps until we find something past 'last'
@@ -2976,7 +2781,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
2976 break; 2781 break;
2977 len = (len + sectorsize - 1) & ~(sectorsize - 1); 2782 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2978 em = get_extent(inode, NULL, 0, offset, len, 0); 2783 em = get_extent(inode, NULL, 0, offset, len, 0);
2979 if (!em || IS_ERR(em)) 2784 if (IS_ERR_OR_NULL(em))
2980 return em; 2785 return em;
2981 2786
2982 /* if this isn't a hole return it */ 2787 /* if this isn't a hole return it */
@@ -3030,7 +2835,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3030 * because there might be preallocation past i_size 2835 * because there might be preallocation past i_size
3031 */ 2836 */
3032 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2837 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
3033 path, inode->i_ino, -1, 0); 2838 path, btrfs_ino(inode), -1, 0);
3034 if (ret < 0) { 2839 if (ret < 0) {
3035 btrfs_free_path(path); 2840 btrfs_free_path(path);
3036 return ret; 2841 return ret;
@@ -3043,7 +2848,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3043 found_type = btrfs_key_type(&found_key); 2848 found_type = btrfs_key_type(&found_key);
3044 2849
3045 /* No extents, but there might be delalloc bits */ 2850 /* No extents, but there might be delalloc bits */
3046 if (found_key.objectid != inode->i_ino || 2851 if (found_key.objectid != btrfs_ino(inode) ||
3047 found_type != BTRFS_EXTENT_DATA_KEY) { 2852 found_type != BTRFS_EXTENT_DATA_KEY) {
3048 /* have to trust i_size as the end */ 2853 /* have to trust i_size as the end */
3049 last = (u64)-1; 2854 last = (u64)-1;
@@ -3266,8 +3071,7 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
3266 3071
3267struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 3072struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3268 u64 start, unsigned long len, 3073 u64 start, unsigned long len,
3269 struct page *page0, 3074 struct page *page0)
3270 gfp_t mask)
3271{ 3075{
3272 unsigned long num_pages = num_extent_pages(start, len); 3076 unsigned long num_pages = num_extent_pages(start, len);
3273 unsigned long i; 3077 unsigned long i;
@@ -3288,7 +3092,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3288 } 3092 }
3289 rcu_read_unlock(); 3093 rcu_read_unlock();
3290 3094
3291 eb = __alloc_extent_buffer(tree, start, len, mask); 3095 eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
3292 if (!eb) 3096 if (!eb)
3293 return NULL; 3097 return NULL;
3294 3098
@@ -3305,7 +3109,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3305 i = 0; 3109 i = 0;
3306 } 3110 }
3307 for (; i < num_pages; i++, index++) { 3111 for (; i < num_pages; i++, index++) {
3308 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); 3112 p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM);
3309 if (!p) { 3113 if (!p) {
3310 WARN_ON(1); 3114 WARN_ON(1);
3311 goto free_eb; 3115 goto free_eb;
@@ -3377,8 +3181,7 @@ free_eb:
3377} 3181}
3378 3182
3379struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 3183struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3380 u64 start, unsigned long len, 3184 u64 start, unsigned long len)
3381 gfp_t mask)
3382{ 3185{
3383 struct extent_buffer *eb; 3186 struct extent_buffer *eb;
3384 3187
@@ -3439,13 +3242,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3439 return 0; 3242 return 0;
3440} 3243}
3441 3244
3442int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
3443 struct extent_buffer *eb)
3444{
3445 return wait_on_extent_writeback(tree, eb->start,
3446 eb->start + eb->len - 1);
3447}
3448
3449int set_extent_buffer_dirty(struct extent_io_tree *tree, 3245int set_extent_buffer_dirty(struct extent_io_tree *tree,
3450 struct extent_buffer *eb) 3246 struct extent_buffer *eb)
3451{ 3247{
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index af2d7179c372..4e8445a4757c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -153,23 +153,14 @@ static inline int extent_compress_type(unsigned long bio_flags)
153 153
154struct extent_map_tree; 154struct extent_map_tree;
155 155
156static inline struct extent_state *extent_state_next(struct extent_state *state)
157{
158 struct rb_node *node;
159 node = rb_next(&state->rb_node);
160 if (!node)
161 return NULL;
162 return rb_entry(node, struct extent_state, rb_node);
163}
164
165typedef struct extent_map *(get_extent_t)(struct inode *inode, 156typedef struct extent_map *(get_extent_t)(struct inode *inode,
166 struct page *page, 157 struct page *page,
167 size_t page_offset, 158 size_t pg_offset,
168 u64 start, u64 len, 159 u64 start, u64 len,
169 int create); 160 int create);
170 161
171void extent_io_tree_init(struct extent_io_tree *tree, 162void extent_io_tree_init(struct extent_io_tree *tree,
172 struct address_space *mapping, gfp_t mask); 163 struct address_space *mapping);
173int try_release_extent_mapping(struct extent_map_tree *map, 164int try_release_extent_mapping(struct extent_map_tree *map,
174 struct extent_io_tree *tree, struct page *page, 165 struct extent_io_tree *tree, struct page *page,
175 gfp_t mask); 166 gfp_t mask);
@@ -215,14 +206,8 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
215 gfp_t mask); 206 gfp_t mask);
216int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 207int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
217 gfp_t mask); 208 gfp_t mask);
218int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
219 gfp_t mask);
220int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start,
221 u64 end, gfp_t mask);
222int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 209int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
223 struct extent_state **cached_state, gfp_t mask); 210 struct extent_state **cached_state, gfp_t mask);
224int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
225 gfp_t mask);
226int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 211int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
227 u64 *start_ret, u64 *end_ret, int bits); 212 u64 *start_ret, u64 *end_ret, int bits);
228struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 213struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
@@ -243,28 +228,17 @@ int extent_readpages(struct extent_io_tree *tree,
243 struct address_space *mapping, 228 struct address_space *mapping,
244 struct list_head *pages, unsigned nr_pages, 229 struct list_head *pages, unsigned nr_pages,
245 get_extent_t get_extent); 230 get_extent_t get_extent);
246int extent_prepare_write(struct extent_io_tree *tree,
247 struct inode *inode, struct page *page,
248 unsigned from, unsigned to, get_extent_t *get_extent);
249int extent_commit_write(struct extent_io_tree *tree,
250 struct inode *inode, struct page *page,
251 unsigned from, unsigned to);
252sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
253 get_extent_t *get_extent);
254int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 231int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
255 __u64 start, __u64 len, get_extent_t *get_extent); 232 __u64 start, __u64 len, get_extent_t *get_extent);
256int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end);
257int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); 233int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
258int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); 234int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
259void set_page_extent_mapped(struct page *page); 235void set_page_extent_mapped(struct page *page);
260 236
261struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 237struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
262 u64 start, unsigned long len, 238 u64 start, unsigned long len,
263 struct page *page0, 239 struct page *page0);
264 gfp_t mask);
265struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 240struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
266 u64 start, unsigned long len, 241 u64 start, unsigned long len);
267 gfp_t mask);
268void free_extent_buffer(struct extent_buffer *eb); 242void free_extent_buffer(struct extent_buffer *eb);
269int read_extent_buffer_pages(struct extent_io_tree *tree, 243int read_extent_buffer_pages(struct extent_io_tree *tree,
270 struct extent_buffer *eb, u64 start, int wait, 244 struct extent_buffer *eb, u64 start, int wait,
@@ -292,16 +266,11 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
292 unsigned long src_offset, unsigned long len); 266 unsigned long src_offset, unsigned long len);
293void memset_extent_buffer(struct extent_buffer *eb, char c, 267void memset_extent_buffer(struct extent_buffer *eb, char c,
294 unsigned long start, unsigned long len); 268 unsigned long start, unsigned long len);
295int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
296 struct extent_buffer *eb);
297int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end);
298int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); 269int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
299int clear_extent_buffer_dirty(struct extent_io_tree *tree, 270int clear_extent_buffer_dirty(struct extent_io_tree *tree,
300 struct extent_buffer *eb); 271 struct extent_buffer *eb);
301int set_extent_buffer_dirty(struct extent_io_tree *tree, 272int set_extent_buffer_dirty(struct extent_io_tree *tree,
302 struct extent_buffer *eb); 273 struct extent_buffer *eb);
303int test_extent_buffer_dirty(struct extent_io_tree *tree,
304 struct extent_buffer *eb);
305int set_extent_buffer_uptodate(struct extent_io_tree *tree, 274int set_extent_buffer_uptodate(struct extent_io_tree *tree,
306 struct extent_buffer *eb); 275 struct extent_buffer *eb);
307int clear_extent_buffer_uptodate(struct extent_io_tree *tree, 276int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
@@ -319,7 +288,6 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
319 unsigned long *map_start, 288 unsigned long *map_start,
320 unsigned long *map_len, int km); 289 unsigned long *map_len, int km);
321void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); 290void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
322int release_extent_buffer_tail_pages(struct extent_buffer *eb);
323int extent_range_uptodate(struct extent_io_tree *tree, 291int extent_range_uptodate(struct extent_io_tree *tree,
324 u64 start, u64 end); 292 u64 start, u64 end);
325int extent_clear_unlock_delalloc(struct inode *inode, 293int extent_clear_unlock_delalloc(struct inode *inode,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index a24a3f2fa13e..2d0410344ea3 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -28,12 +28,11 @@ void extent_map_exit(void)
28/** 28/**
29 * extent_map_tree_init - initialize extent map tree 29 * extent_map_tree_init - initialize extent map tree
30 * @tree: tree to initialize 30 * @tree: tree to initialize
31 * @mask: flags for memory allocations during tree operations
32 * 31 *
33 * Initialize the extent tree @tree. Should be called for each new inode 32 * Initialize the extent tree @tree. Should be called for each new inode
34 * or other user of the extent_map interface. 33 * or other user of the extent_map interface.
35 */ 34 */
36void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) 35void extent_map_tree_init(struct extent_map_tree *tree)
37{ 36{
38 tree->map = RB_ROOT; 37 tree->map = RB_ROOT;
39 rwlock_init(&tree->lock); 38 rwlock_init(&tree->lock);
@@ -41,16 +40,15 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
41 40
42/** 41/**
43 * alloc_extent_map - allocate new extent map structure 42 * alloc_extent_map - allocate new extent map structure
44 * @mask: memory allocation flags
45 * 43 *
46 * Allocate a new extent_map structure. The new structure is 44 * Allocate a new extent_map structure. The new structure is
47 * returned with a reference count of one and needs to be 45 * returned with a reference count of one and needs to be
48 * freed using free_extent_map() 46 * freed using free_extent_map()
49 */ 47 */
50struct extent_map *alloc_extent_map(gfp_t mask) 48struct extent_map *alloc_extent_map(void)
51{ 49{
52 struct extent_map *em; 50 struct extent_map *em;
53 em = kmem_cache_alloc(extent_map_cache, mask); 51 em = kmem_cache_alloc(extent_map_cache, GFP_NOFS);
54 if (!em) 52 if (!em)
55 return NULL; 53 return NULL;
56 em->in_tree = 0; 54 em->in_tree = 0;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 28b44dbd1e35..33a7890b1f40 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -49,14 +49,14 @@ static inline u64 extent_map_block_end(struct extent_map *em)
49 return em->block_start + em->block_len; 49 return em->block_start + em->block_len;
50} 50}
51 51
52void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); 52void extent_map_tree_init(struct extent_map_tree *tree);
53struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, 53struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
54 u64 start, u64 len); 54 u64 start, u64 len);
55int add_extent_mapping(struct extent_map_tree *tree, 55int add_extent_mapping(struct extent_map_tree *tree,
56 struct extent_map *em); 56 struct extent_map *em);
57int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); 57int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
58 58
59struct extent_map *alloc_extent_map(gfp_t mask); 59struct extent_map *alloc_extent_map(void);
60void free_extent_map(struct extent_map *em); 60void free_extent_map(struct extent_map *em);
61int __init extent_map_init(void); 61int __init extent_map_init(void);
62void extent_map_exit(void); 62void extent_map_exit(void);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a6a9d4e8b491..90d4ee52cd45 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -193,7 +193,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
193 u32 item_size; 193 u32 item_size;
194 194
195 if (item) 195 if (item)
196 btrfs_release_path(root, path); 196 btrfs_release_path(path);
197 item = btrfs_lookup_csum(NULL, root->fs_info->csum_root, 197 item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
198 path, disk_bytenr, 0); 198 path, disk_bytenr, 0);
199 if (IS_ERR(item)) { 199 if (IS_ERR(item)) {
@@ -208,12 +208,13 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
208 EXTENT_NODATASUM, GFP_NOFS); 208 EXTENT_NODATASUM, GFP_NOFS);
209 } else { 209 } else {
210 printk(KERN_INFO "btrfs no csum found " 210 printk(KERN_INFO "btrfs no csum found "
211 "for inode %lu start %llu\n", 211 "for inode %llu start %llu\n",
212 inode->i_ino, 212 (unsigned long long)
213 btrfs_ino(inode),
213 (unsigned long long)offset); 214 (unsigned long long)offset);
214 } 215 }
215 item = NULL; 216 item = NULL;
216 btrfs_release_path(root, path); 217 btrfs_release_path(path);
217 goto found; 218 goto found;
218 } 219 }
219 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 220 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
@@ -266,7 +267,7 @@ int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
266} 267}
267 268
268int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 269int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
269 struct list_head *list) 270 struct list_head *list, int search_commit)
270{ 271{
271 struct btrfs_key key; 272 struct btrfs_key key;
272 struct btrfs_path *path; 273 struct btrfs_path *path;
@@ -283,6 +284,12 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
283 path = btrfs_alloc_path(); 284 path = btrfs_alloc_path();
284 BUG_ON(!path); 285 BUG_ON(!path);
285 286
287 if (search_commit) {
288 path->skip_locking = 1;
289 path->reada = 2;
290 path->search_commit_root = 1;
291 }
292
286 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 293 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
287 key.offset = start; 294 key.offset = start;
288 key.type = BTRFS_EXTENT_CSUM_KEY; 295 key.type = BTRFS_EXTENT_CSUM_KEY;
@@ -495,7 +502,6 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
495 u32 new_size = (bytenr - key->offset) >> blocksize_bits; 502 u32 new_size = (bytenr - key->offset) >> blocksize_bits;
496 new_size *= csum_size; 503 new_size *= csum_size;
497 ret = btrfs_truncate_item(trans, root, path, new_size, 1); 504 ret = btrfs_truncate_item(trans, root, path, new_size, 1);
498 BUG_ON(ret);
499 } else if (key->offset >= bytenr && csum_end > end_byte && 505 } else if (key->offset >= bytenr && csum_end > end_byte &&
500 end_byte > key->offset) { 506 end_byte > key->offset) {
501 /* 507 /*
@@ -508,7 +514,6 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
508 new_size *= csum_size; 514 new_size *= csum_size;
509 515
510 ret = btrfs_truncate_item(trans, root, path, new_size, 0); 516 ret = btrfs_truncate_item(trans, root, path, new_size, 0);
511 BUG_ON(ret);
512 517
513 key->offset = end_byte; 518 key->offset = end_byte;
514 ret = btrfs_set_item_key_safe(trans, root, path, key); 519 ret = btrfs_set_item_key_safe(trans, root, path, key);
@@ -551,10 +556,10 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
551 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 556 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
552 if (ret > 0) { 557 if (ret > 0) {
553 if (path->slots[0] == 0) 558 if (path->slots[0] == 0)
554 goto out; 559 break;
555 path->slots[0]--; 560 path->slots[0]--;
556 } else if (ret < 0) { 561 } else if (ret < 0) {
557 goto out; 562 break;
558 } 563 }
559 564
560 leaf = path->nodes[0]; 565 leaf = path->nodes[0];
@@ -579,7 +584,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
579 /* delete the entire item, it is inside our range */ 584 /* delete the entire item, it is inside our range */
580 if (key.offset >= bytenr && csum_end <= end_byte) { 585 if (key.offset >= bytenr && csum_end <= end_byte) {
581 ret = btrfs_del_item(trans, root, path); 586 ret = btrfs_del_item(trans, root, path);
582 BUG_ON(ret); 587 if (ret)
588 goto out;
583 if (key.offset == bytenr) 589 if (key.offset == bytenr)
584 break; 590 break;
585 } else if (key.offset < bytenr && csum_end > end_byte) { 591 } else if (key.offset < bytenr && csum_end > end_byte) {
@@ -631,11 +637,12 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
631 if (key.offset < bytenr) 637 if (key.offset < bytenr)
632 break; 638 break;
633 } 639 }
634 btrfs_release_path(root, path); 640 btrfs_release_path(path);
635 } 641 }
642 ret = 0;
636out: 643out:
637 btrfs_free_path(path); 644 btrfs_free_path(path);
638 return 0; 645 return ret;
639} 646}
640 647
641int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 648int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
@@ -722,7 +729,7 @@ again:
722 * at this point, we know the tree has an item, but it isn't big 729 * at this point, we know the tree has an item, but it isn't big
723 * enough yet to put our csum in. Grow it 730 * enough yet to put our csum in. Grow it
724 */ 731 */
725 btrfs_release_path(root, path); 732 btrfs_release_path(path);
726 ret = btrfs_search_slot(trans, root, &file_key, path, 733 ret = btrfs_search_slot(trans, root, &file_key, path,
727 csum_size, 1); 734 csum_size, 1);
728 if (ret < 0) 735 if (ret < 0)
@@ -761,12 +768,11 @@ again:
761 goto insert; 768 goto insert;
762 769
763 ret = btrfs_extend_item(trans, root, path, diff); 770 ret = btrfs_extend_item(trans, root, path, diff);
764 BUG_ON(ret);
765 goto csum; 771 goto csum;
766 } 772 }
767 773
768insert: 774insert:
769 btrfs_release_path(root, path); 775 btrfs_release_path(path);
770 csum_offset = 0; 776 csum_offset = 0;
771 if (found_next) { 777 if (found_next) {
772 u64 tmp = total_bytes + root->sectorsize; 778 u64 tmp = total_bytes + root->sectorsize;
@@ -850,7 +856,7 @@ next_sector:
850 } 856 }
851 btrfs_mark_buffer_dirty(path->nodes[0]); 857 btrfs_mark_buffer_dirty(path->nodes[0]);
852 if (total_bytes < sums->len) { 858 if (total_bytes < sums->len) {
853 btrfs_release_path(root, path); 859 btrfs_release_path(path);
854 cond_resched(); 860 cond_resched();
855 goto again; 861 goto again;
856 } 862 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index cd5e82e500cf..e3a1b0c2394c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -40,6 +40,263 @@
40#include "locking.h" 40#include "locking.h"
41#include "compat.h" 41#include "compat.h"
42 42
43/*
44 * when auto defrag is enabled we
45 * queue up these defrag structs to remember which
46 * inodes need defragging passes
47 */
48struct inode_defrag {
49 struct rb_node rb_node;
50 /* objectid */
51 u64 ino;
52 /*
53 * transid where the defrag was added, we search for
54 * extents newer than this
55 */
56 u64 transid;
57
58 /* root objectid */
59 u64 root;
60
61 /* last offset we were able to defrag */
62 u64 last_offset;
63
64 /* if we've wrapped around back to zero once already */
65 int cycled;
66};
67
68/* pop a record for an inode into the defrag tree. The lock
69 * must be held already
70 *
71 * If you're inserting a record for an older transid than an
72 * existing record, the transid already in the tree is lowered
73 *
74 * If an existing record is found the defrag item you
75 * pass in is freed
76 */
77static int __btrfs_add_inode_defrag(struct inode *inode,
78 struct inode_defrag *defrag)
79{
80 struct btrfs_root *root = BTRFS_I(inode)->root;
81 struct inode_defrag *entry;
82 struct rb_node **p;
83 struct rb_node *parent = NULL;
84
85 p = &root->fs_info->defrag_inodes.rb_node;
86 while (*p) {
87 parent = *p;
88 entry = rb_entry(parent, struct inode_defrag, rb_node);
89
90 if (defrag->ino < entry->ino)
91 p = &parent->rb_left;
92 else if (defrag->ino > entry->ino)
93 p = &parent->rb_right;
94 else {
95 /* if we're reinserting an entry for
96 * an old defrag run, make sure to
97 * lower the transid of our existing record
98 */
99 if (defrag->transid < entry->transid)
100 entry->transid = defrag->transid;
101 if (defrag->last_offset > entry->last_offset)
102 entry->last_offset = defrag->last_offset;
103 goto exists;
104 }
105 }
106 BTRFS_I(inode)->in_defrag = 1;
107 rb_link_node(&defrag->rb_node, parent, p);
108 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
109 return 0;
110
111exists:
112 kfree(defrag);
113 return 0;
114
115}
116
117/*
118 * insert a defrag record for this inode if auto defrag is
119 * enabled
120 */
121int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
122 struct inode *inode)
123{
124 struct btrfs_root *root = BTRFS_I(inode)->root;
125 struct inode_defrag *defrag;
126 int ret = 0;
127 u64 transid;
128
129 if (!btrfs_test_opt(root, AUTO_DEFRAG))
130 return 0;
131
132 if (root->fs_info->closing)
133 return 0;
134
135 if (BTRFS_I(inode)->in_defrag)
136 return 0;
137
138 if (trans)
139 transid = trans->transid;
140 else
141 transid = BTRFS_I(inode)->root->last_trans;
142
143 defrag = kzalloc(sizeof(*defrag), GFP_NOFS);
144 if (!defrag)
145 return -ENOMEM;
146
147 defrag->ino = inode->i_ino;
148 defrag->transid = transid;
149 defrag->root = root->root_key.objectid;
150
151 spin_lock(&root->fs_info->defrag_inodes_lock);
152 if (!BTRFS_I(inode)->in_defrag)
153 ret = __btrfs_add_inode_defrag(inode, defrag);
154 spin_unlock(&root->fs_info->defrag_inodes_lock);
155 return ret;
156}
157
158/*
159 * must be called with the defrag_inodes lock held
160 */
161struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino,
162 struct rb_node **next)
163{
164 struct inode_defrag *entry = NULL;
165 struct rb_node *p;
166 struct rb_node *parent = NULL;
167
168 p = info->defrag_inodes.rb_node;
169 while (p) {
170 parent = p;
171 entry = rb_entry(parent, struct inode_defrag, rb_node);
172
173 if (ino < entry->ino)
174 p = parent->rb_left;
175 else if (ino > entry->ino)
176 p = parent->rb_right;
177 else
178 return entry;
179 }
180
181 if (next) {
182 while (parent && ino > entry->ino) {
183 parent = rb_next(parent);
184 entry = rb_entry(parent, struct inode_defrag, rb_node);
185 }
186 *next = parent;
187 }
188 return NULL;
189}
190
191/*
192 * run through the list of inodes in the FS that need
193 * defragging
194 */
195int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
196{
197 struct inode_defrag *defrag;
198 struct btrfs_root *inode_root;
199 struct inode *inode;
200 struct rb_node *n;
201 struct btrfs_key key;
202 struct btrfs_ioctl_defrag_range_args range;
203 u64 first_ino = 0;
204 int num_defrag;
205 int defrag_batch = 1024;
206
207 memset(&range, 0, sizeof(range));
208 range.len = (u64)-1;
209
210 atomic_inc(&fs_info->defrag_running);
211 spin_lock(&fs_info->defrag_inodes_lock);
212 while(1) {
213 n = NULL;
214
215 /* find an inode to defrag */
216 defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n);
217 if (!defrag) {
218 if (n)
219 defrag = rb_entry(n, struct inode_defrag, rb_node);
220 else if (first_ino) {
221 first_ino = 0;
222 continue;
223 } else {
224 break;
225 }
226 }
227
228 /* remove it from the rbtree */
229 first_ino = defrag->ino + 1;
230 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
231
232 if (fs_info->closing)
233 goto next_free;
234
235 spin_unlock(&fs_info->defrag_inodes_lock);
236
237 /* get the inode */
238 key.objectid = defrag->root;
239 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
240 key.offset = (u64)-1;
241 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
242 if (IS_ERR(inode_root))
243 goto next;
244
245 key.objectid = defrag->ino;
246 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
247 key.offset = 0;
248
249 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
250 if (IS_ERR(inode))
251 goto next;
252
253 /* do a chunk of defrag */
254 BTRFS_I(inode)->in_defrag = 0;
255 range.start = defrag->last_offset;
256 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
257 defrag_batch);
258 /*
259 * if we filled the whole defrag batch, there
260 * must be more work to do. Queue this defrag
261 * again
262 */
263 if (num_defrag == defrag_batch) {
264 defrag->last_offset = range.start;
265 __btrfs_add_inode_defrag(inode, defrag);
266 /*
267 * we don't want to kfree defrag, we added it back to
268 * the rbtree
269 */
270 defrag = NULL;
271 } else if (defrag->last_offset && !defrag->cycled) {
272 /*
273 * we didn't fill our defrag batch, but
274 * we didn't start at zero. Make sure we loop
275 * around to the start of the file.
276 */
277 defrag->last_offset = 0;
278 defrag->cycled = 1;
279 __btrfs_add_inode_defrag(inode, defrag);
280 defrag = NULL;
281 }
282
283 iput(inode);
284next:
285 spin_lock(&fs_info->defrag_inodes_lock);
286next_free:
287 kfree(defrag);
288 }
289 spin_unlock(&fs_info->defrag_inodes_lock);
290
291 atomic_dec(&fs_info->defrag_running);
292
293 /*
294 * during unmount, we use the transaction_wait queue to
295 * wait for the defragger to stop
296 */
297 wake_up(&fs_info->transaction_wait);
298 return 0;
299}
43 300
44/* simple helper to fault in pages and copy. This should go away 301/* simple helper to fault in pages and copy. This should go away
45 * and be replaced with calls into generic code. 302 * and be replaced with calls into generic code.
@@ -191,9 +448,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
191 } 448 }
192 while (1) { 449 while (1) {
193 if (!split) 450 if (!split)
194 split = alloc_extent_map(GFP_NOFS); 451 split = alloc_extent_map();
195 if (!split2) 452 if (!split2)
196 split2 = alloc_extent_map(GFP_NOFS); 453 split2 = alloc_extent_map();
197 BUG_ON(!split || !split2); 454 BUG_ON(!split || !split2);
198 455
199 write_lock(&em_tree->lock); 456 write_lock(&em_tree->lock);
@@ -298,6 +555,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
298 struct btrfs_path *path; 555 struct btrfs_path *path;
299 struct btrfs_key key; 556 struct btrfs_key key;
300 struct btrfs_key new_key; 557 struct btrfs_key new_key;
558 u64 ino = btrfs_ino(inode);
301 u64 search_start = start; 559 u64 search_start = start;
302 u64 disk_bytenr = 0; 560 u64 disk_bytenr = 0;
303 u64 num_bytes = 0; 561 u64 num_bytes = 0;
@@ -318,14 +576,14 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
318 576
319 while (1) { 577 while (1) {
320 recow = 0; 578 recow = 0;
321 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 579 ret = btrfs_lookup_file_extent(trans, root, path, ino,
322 search_start, -1); 580 search_start, -1);
323 if (ret < 0) 581 if (ret < 0)
324 break; 582 break;
325 if (ret > 0 && path->slots[0] > 0 && search_start == start) { 583 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
326 leaf = path->nodes[0]; 584 leaf = path->nodes[0];
327 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); 585 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
328 if (key.objectid == inode->i_ino && 586 if (key.objectid == ino &&
329 key.type == BTRFS_EXTENT_DATA_KEY) 587 key.type == BTRFS_EXTENT_DATA_KEY)
330 path->slots[0]--; 588 path->slots[0]--;
331 } 589 }
@@ -346,7 +604,7 @@ next_slot:
346 } 604 }
347 605
348 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 606 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
349 if (key.objectid > inode->i_ino || 607 if (key.objectid > ino ||
350 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) 608 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
351 break; 609 break;
352 610
@@ -376,7 +634,7 @@ next_slot:
376 634
377 search_start = max(key.offset, start); 635 search_start = max(key.offset, start);
378 if (recow) { 636 if (recow) {
379 btrfs_release_path(root, path); 637 btrfs_release_path(path);
380 continue; 638 continue;
381 } 639 }
382 640
@@ -393,7 +651,7 @@ next_slot:
393 ret = btrfs_duplicate_item(trans, root, path, 651 ret = btrfs_duplicate_item(trans, root, path,
394 &new_key); 652 &new_key);
395 if (ret == -EAGAIN) { 653 if (ret == -EAGAIN) {
396 btrfs_release_path(root, path); 654 btrfs_release_path(path);
397 continue; 655 continue;
398 } 656 }
399 if (ret < 0) 657 if (ret < 0)
@@ -516,7 +774,7 @@ next_slot:
516 del_nr = 0; 774 del_nr = 0;
517 del_slot = 0; 775 del_slot = 0;
518 776
519 btrfs_release_path(root, path); 777 btrfs_release_path(path);
520 continue; 778 continue;
521 } 779 }
522 780
@@ -592,6 +850,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
592 int del_slot = 0; 850 int del_slot = 0;
593 int recow; 851 int recow;
594 int ret; 852 int ret;
853 u64 ino = btrfs_ino(inode);
595 854
596 btrfs_drop_extent_cache(inode, start, end - 1, 0); 855 btrfs_drop_extent_cache(inode, start, end - 1, 0);
597 856
@@ -600,7 +859,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
600again: 859again:
601 recow = 0; 860 recow = 0;
602 split = start; 861 split = start;
603 key.objectid = inode->i_ino; 862 key.objectid = ino;
604 key.type = BTRFS_EXTENT_DATA_KEY; 863 key.type = BTRFS_EXTENT_DATA_KEY;
605 key.offset = split; 864 key.offset = split;
606 865
@@ -612,8 +871,7 @@ again:
612 871
613 leaf = path->nodes[0]; 872 leaf = path->nodes[0];
614 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 873 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
615 BUG_ON(key.objectid != inode->i_ino || 874 BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY);
616 key.type != BTRFS_EXTENT_DATA_KEY);
617 fi = btrfs_item_ptr(leaf, path->slots[0], 875 fi = btrfs_item_ptr(leaf, path->slots[0],
618 struct btrfs_file_extent_item); 876 struct btrfs_file_extent_item);
619 BUG_ON(btrfs_file_extent_type(leaf, fi) != 877 BUG_ON(btrfs_file_extent_type(leaf, fi) !=
@@ -630,7 +888,7 @@ again:
630 other_start = 0; 888 other_start = 0;
631 other_end = start; 889 other_end = start;
632 if (extent_mergeable(leaf, path->slots[0] - 1, 890 if (extent_mergeable(leaf, path->slots[0] - 1,
633 inode->i_ino, bytenr, orig_offset, 891 ino, bytenr, orig_offset,
634 &other_start, &other_end)) { 892 &other_start, &other_end)) {
635 new_key.offset = end; 893 new_key.offset = end;
636 btrfs_set_item_key_safe(trans, root, path, &new_key); 894 btrfs_set_item_key_safe(trans, root, path, &new_key);
@@ -653,7 +911,7 @@ again:
653 other_start = end; 911 other_start = end;
654 other_end = 0; 912 other_end = 0;
655 if (extent_mergeable(leaf, path->slots[0] + 1, 913 if (extent_mergeable(leaf, path->slots[0] + 1,
656 inode->i_ino, bytenr, orig_offset, 914 ino, bytenr, orig_offset,
657 &other_start, &other_end)) { 915 &other_start, &other_end)) {
658 fi = btrfs_item_ptr(leaf, path->slots[0], 916 fi = btrfs_item_ptr(leaf, path->slots[0],
659 struct btrfs_file_extent_item); 917 struct btrfs_file_extent_item);
@@ -681,7 +939,7 @@ again:
681 new_key.offset = split; 939 new_key.offset = split;
682 ret = btrfs_duplicate_item(trans, root, path, &new_key); 940 ret = btrfs_duplicate_item(trans, root, path, &new_key);
683 if (ret == -EAGAIN) { 941 if (ret == -EAGAIN) {
684 btrfs_release_path(root, path); 942 btrfs_release_path(path);
685 goto again; 943 goto again;
686 } 944 }
687 BUG_ON(ret < 0); 945 BUG_ON(ret < 0);
@@ -702,7 +960,7 @@ again:
702 960
703 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 961 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
704 root->root_key.objectid, 962 root->root_key.objectid,
705 inode->i_ino, orig_offset); 963 ino, orig_offset);
706 BUG_ON(ret); 964 BUG_ON(ret);
707 965
708 if (split == start) { 966 if (split == start) {
@@ -718,10 +976,10 @@ again:
718 other_start = end; 976 other_start = end;
719 other_end = 0; 977 other_end = 0;
720 if (extent_mergeable(leaf, path->slots[0] + 1, 978 if (extent_mergeable(leaf, path->slots[0] + 1,
721 inode->i_ino, bytenr, orig_offset, 979 ino, bytenr, orig_offset,
722 &other_start, &other_end)) { 980 &other_start, &other_end)) {
723 if (recow) { 981 if (recow) {
724 btrfs_release_path(root, path); 982 btrfs_release_path(path);
725 goto again; 983 goto again;
726 } 984 }
727 extent_end = other_end; 985 extent_end = other_end;
@@ -729,16 +987,16 @@ again:
729 del_nr++; 987 del_nr++;
730 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 988 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
731 0, root->root_key.objectid, 989 0, root->root_key.objectid,
732 inode->i_ino, orig_offset); 990 ino, orig_offset);
733 BUG_ON(ret); 991 BUG_ON(ret);
734 } 992 }
735 other_start = 0; 993 other_start = 0;
736 other_end = start; 994 other_end = start;
737 if (extent_mergeable(leaf, path->slots[0] - 1, 995 if (extent_mergeable(leaf, path->slots[0] - 1,
738 inode->i_ino, bytenr, orig_offset, 996 ino, bytenr, orig_offset,
739 &other_start, &other_end)) { 997 &other_start, &other_end)) {
740 if (recow) { 998 if (recow) {
741 btrfs_release_path(root, path); 999 btrfs_release_path(path);
742 goto again; 1000 goto again;
743 } 1001 }
744 key.offset = other_start; 1002 key.offset = other_start;
@@ -746,7 +1004,7 @@ again:
746 del_nr++; 1004 del_nr++;
747 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1005 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
748 0, root->root_key.objectid, 1006 0, root->root_key.objectid,
749 inode->i_ino, orig_offset); 1007 ino, orig_offset);
750 BUG_ON(ret); 1008 BUG_ON(ret);
751 } 1009 }
752 if (del_nr == 0) { 1010 if (del_nr == 0) {
@@ -1373,7 +1631,7 @@ static long btrfs_fallocate(struct file *file, int mode,
1373 while (1) { 1631 while (1) {
1374 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 1632 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
1375 alloc_end - cur_offset, 0); 1633 alloc_end - cur_offset, 0);
1376 BUG_ON(IS_ERR(em) || !em); 1634 BUG_ON(IS_ERR_OR_NULL(em));
1377 last_byte = min(extent_map_end(em), alloc_end); 1635 last_byte = min(extent_map_end(em), alloc_end);
1378 last_byte = (last_byte + mask) & ~mask; 1636 last_byte = (last_byte + mask) & ~mask;
1379 if (em->block_start == EXTENT_MAP_HOLE || 1637 if (em->block_start == EXTENT_MAP_HOLE ||
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d634a7e42207..dd38d4c3a599 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -25,18 +25,17 @@
25#include "transaction.h" 25#include "transaction.h"
26#include "disk-io.h" 26#include "disk-io.h"
27#include "extent_io.h" 27#include "extent_io.h"
28#include "inode-map.h"
28 29
29#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 30#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
30#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 31#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
31 32
32static void recalculate_thresholds(struct btrfs_block_group_cache 33static int link_free_space(struct btrfs_free_space_ctl *ctl,
33 *block_group);
34static int link_free_space(struct btrfs_block_group_cache *block_group,
35 struct btrfs_free_space *info); 34 struct btrfs_free_space *info);
36 35
37struct inode *lookup_free_space_inode(struct btrfs_root *root, 36static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 struct btrfs_block_group_cache 37 struct btrfs_path *path,
39 *block_group, struct btrfs_path *path) 38 u64 offset)
40{ 39{
41 struct btrfs_key key; 40 struct btrfs_key key;
42 struct btrfs_key location; 41 struct btrfs_key location;
@@ -46,22 +45,15 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
46 struct inode *inode = NULL; 45 struct inode *inode = NULL;
47 int ret; 46 int ret;
48 47
49 spin_lock(&block_group->lock);
50 if (block_group->inode)
51 inode = igrab(block_group->inode);
52 spin_unlock(&block_group->lock);
53 if (inode)
54 return inode;
55
56 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 48 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
57 key.offset = block_group->key.objectid; 49 key.offset = offset;
58 key.type = 0; 50 key.type = 0;
59 51
60 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 52 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
61 if (ret < 0) 53 if (ret < 0)
62 return ERR_PTR(ret); 54 return ERR_PTR(ret);
63 if (ret > 0) { 55 if (ret > 0) {
64 btrfs_release_path(root, path); 56 btrfs_release_path(path);
65 return ERR_PTR(-ENOENT); 57 return ERR_PTR(-ENOENT);
66 } 58 }
67 59
@@ -70,7 +62,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
70 struct btrfs_free_space_header); 62 struct btrfs_free_space_header);
71 btrfs_free_space_key(leaf, header, &disk_key); 63 btrfs_free_space_key(leaf, header, &disk_key);
72 btrfs_disk_key_to_cpu(&location, &disk_key); 64 btrfs_disk_key_to_cpu(&location, &disk_key);
73 btrfs_release_path(root, path); 65 btrfs_release_path(path);
74 66
75 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); 67 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
76 if (!inode) 68 if (!inode)
@@ -84,6 +76,27 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
84 76
85 inode->i_mapping->flags &= ~__GFP_FS; 77 inode->i_mapping->flags &= ~__GFP_FS;
86 78
79 return inode;
80}
81
82struct inode *lookup_free_space_inode(struct btrfs_root *root,
83 struct btrfs_block_group_cache
84 *block_group, struct btrfs_path *path)
85{
86 struct inode *inode = NULL;
87
88 spin_lock(&block_group->lock);
89 if (block_group->inode)
90 inode = igrab(block_group->inode);
91 spin_unlock(&block_group->lock);
92 if (inode)
93 return inode;
94
95 inode = __lookup_free_space_inode(root, path,
96 block_group->key.objectid);
97 if (IS_ERR(inode))
98 return inode;
99
87 spin_lock(&block_group->lock); 100 spin_lock(&block_group->lock);
88 if (!root->fs_info->closing) { 101 if (!root->fs_info->closing) {
89 block_group->inode = igrab(inode); 102 block_group->inode = igrab(inode);
@@ -94,24 +107,18 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
94 return inode; 107 return inode;
95} 108}
96 109
97int create_free_space_inode(struct btrfs_root *root, 110int __create_free_space_inode(struct btrfs_root *root,
98 struct btrfs_trans_handle *trans, 111 struct btrfs_trans_handle *trans,
99 struct btrfs_block_group_cache *block_group, 112 struct btrfs_path *path, u64 ino, u64 offset)
100 struct btrfs_path *path)
101{ 113{
102 struct btrfs_key key; 114 struct btrfs_key key;
103 struct btrfs_disk_key disk_key; 115 struct btrfs_disk_key disk_key;
104 struct btrfs_free_space_header *header; 116 struct btrfs_free_space_header *header;
105 struct btrfs_inode_item *inode_item; 117 struct btrfs_inode_item *inode_item;
106 struct extent_buffer *leaf; 118 struct extent_buffer *leaf;
107 u64 objectid;
108 int ret; 119 int ret;
109 120
110 ret = btrfs_find_free_objectid(trans, root, 0, &objectid); 121 ret = btrfs_insert_empty_inode(trans, root, path, ino);
111 if (ret < 0)
112 return ret;
113
114 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
115 if (ret) 122 if (ret)
116 return ret; 123 return ret;
117 124
@@ -131,19 +138,18 @@ int create_free_space_inode(struct btrfs_root *root,
131 BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); 138 BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
132 btrfs_set_inode_nlink(leaf, inode_item, 1); 139 btrfs_set_inode_nlink(leaf, inode_item, 1);
133 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 140 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
134 btrfs_set_inode_block_group(leaf, inode_item, 141 btrfs_set_inode_block_group(leaf, inode_item, offset);
135 block_group->key.objectid);
136 btrfs_mark_buffer_dirty(leaf); 142 btrfs_mark_buffer_dirty(leaf);
137 btrfs_release_path(root, path); 143 btrfs_release_path(path);
138 144
139 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 145 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
140 key.offset = block_group->key.objectid; 146 key.offset = offset;
141 key.type = 0; 147 key.type = 0;
142 148
143 ret = btrfs_insert_empty_item(trans, root, path, &key, 149 ret = btrfs_insert_empty_item(trans, root, path, &key,
144 sizeof(struct btrfs_free_space_header)); 150 sizeof(struct btrfs_free_space_header));
145 if (ret < 0) { 151 if (ret < 0) {
146 btrfs_release_path(root, path); 152 btrfs_release_path(path);
147 return ret; 153 return ret;
148 } 154 }
149 leaf = path->nodes[0]; 155 leaf = path->nodes[0];
@@ -152,11 +158,27 @@ int create_free_space_inode(struct btrfs_root *root,
152 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); 158 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
153 btrfs_set_free_space_key(leaf, header, &disk_key); 159 btrfs_set_free_space_key(leaf, header, &disk_key);
154 btrfs_mark_buffer_dirty(leaf); 160 btrfs_mark_buffer_dirty(leaf);
155 btrfs_release_path(root, path); 161 btrfs_release_path(path);
156 162
157 return 0; 163 return 0;
158} 164}
159 165
166int create_free_space_inode(struct btrfs_root *root,
167 struct btrfs_trans_handle *trans,
168 struct btrfs_block_group_cache *block_group,
169 struct btrfs_path *path)
170{
171 int ret;
172 u64 ino;
173
174 ret = btrfs_find_free_objectid(root, &ino);
175 if (ret < 0)
176 return ret;
177
178 return __create_free_space_inode(root, trans, path, ino,
179 block_group->key.objectid);
180}
181
160int btrfs_truncate_free_space_cache(struct btrfs_root *root, 182int btrfs_truncate_free_space_cache(struct btrfs_root *root,
161 struct btrfs_trans_handle *trans, 183 struct btrfs_trans_handle *trans,
162 struct btrfs_path *path, 184 struct btrfs_path *path,
@@ -187,7 +209,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
187 return ret; 209 return ret;
188 } 210 }
189 211
190 return btrfs_update_inode(trans, root, inode); 212 ret = btrfs_update_inode(trans, root, inode);
213 return ret;
191} 214}
192 215
193static int readahead_cache(struct inode *inode) 216static int readahead_cache(struct inode *inode)
@@ -209,15 +232,13 @@ static int readahead_cache(struct inode *inode)
209 return 0; 232 return 0;
210} 233}
211 234
212int load_free_space_cache(struct btrfs_fs_info *fs_info, 235int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
213 struct btrfs_block_group_cache *block_group) 236 struct btrfs_free_space_ctl *ctl,
237 struct btrfs_path *path, u64 offset)
214{ 238{
215 struct btrfs_root *root = fs_info->tree_root;
216 struct inode *inode;
217 struct btrfs_free_space_header *header; 239 struct btrfs_free_space_header *header;
218 struct extent_buffer *leaf; 240 struct extent_buffer *leaf;
219 struct page *page; 241 struct page *page;
220 struct btrfs_path *path;
221 u32 *checksums = NULL, *crc; 242 u32 *checksums = NULL, *crc;
222 char *disk_crcs = NULL; 243 char *disk_crcs = NULL;
223 struct btrfs_key key; 244 struct btrfs_key key;
@@ -225,76 +246,47 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
225 u64 num_entries; 246 u64 num_entries;
226 u64 num_bitmaps; 247 u64 num_bitmaps;
227 u64 generation; 248 u64 generation;
228 u64 used = btrfs_block_group_used(&block_group->item);
229 u32 cur_crc = ~(u32)0; 249 u32 cur_crc = ~(u32)0;
230 pgoff_t index = 0; 250 pgoff_t index = 0;
231 unsigned long first_page_offset; 251 unsigned long first_page_offset;
232 int num_checksums; 252 int num_checksums;
233 int ret = 0; 253 int ret = 0, ret2;
234
235 /*
236 * If we're unmounting then just return, since this does a search on the
237 * normal root and not the commit root and we could deadlock.
238 */
239 smp_mb();
240 if (fs_info->closing)
241 return 0;
242
243 /*
244 * If this block group has been marked to be cleared for one reason or
245 * another then we can't trust the on disk cache, so just return.
246 */
247 spin_lock(&block_group->lock);
248 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
249 spin_unlock(&block_group->lock);
250 return 0;
251 }
252 spin_unlock(&block_group->lock);
253 254
254 INIT_LIST_HEAD(&bitmaps); 255 INIT_LIST_HEAD(&bitmaps);
255 256
256 path = btrfs_alloc_path();
257 if (!path)
258 return 0;
259
260 inode = lookup_free_space_inode(root, block_group, path);
261 if (IS_ERR(inode)) {
262 btrfs_free_path(path);
263 return 0;
264 }
265
266 /* Nothing in the space cache, goodbye */ 257 /* Nothing in the space cache, goodbye */
267 if (!i_size_read(inode)) { 258 if (!i_size_read(inode))
268 btrfs_free_path(path);
269 goto out; 259 goto out;
270 }
271 260
272 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 261 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
273 key.offset = block_group->key.objectid; 262 key.offset = offset;
274 key.type = 0; 263 key.type = 0;
275 264
276 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 265 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
277 if (ret) { 266 if (ret < 0)
278 btrfs_free_path(path); 267 goto out;
268 else if (ret > 0) {
269 btrfs_release_path(path);
270 ret = 0;
279 goto out; 271 goto out;
280 } 272 }
281 273
274 ret = -1;
275
282 leaf = path->nodes[0]; 276 leaf = path->nodes[0];
283 header = btrfs_item_ptr(leaf, path->slots[0], 277 header = btrfs_item_ptr(leaf, path->slots[0],
284 struct btrfs_free_space_header); 278 struct btrfs_free_space_header);
285 num_entries = btrfs_free_space_entries(leaf, header); 279 num_entries = btrfs_free_space_entries(leaf, header);
286 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 280 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
287 generation = btrfs_free_space_generation(leaf, header); 281 generation = btrfs_free_space_generation(leaf, header);
288 btrfs_free_path(path); 282 btrfs_release_path(path);
289 283
290 if (BTRFS_I(inode)->generation != generation) { 284 if (BTRFS_I(inode)->generation != generation) {
291 printk(KERN_ERR "btrfs: free space inode generation (%llu) did" 285 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
292 " not match free space cache generation (%llu) for " 286 " not match free space cache generation (%llu)\n",
293 "block group %llu\n",
294 (unsigned long long)BTRFS_I(inode)->generation, 287 (unsigned long long)BTRFS_I(inode)->generation,
295 (unsigned long long)generation, 288 (unsigned long long)generation);
296 (unsigned long long)block_group->key.objectid); 289 goto out;
297 goto free_cache;
298 } 290 }
299 291
300 if (!num_entries) 292 if (!num_entries)
@@ -311,10 +303,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
311 goto out; 303 goto out;
312 304
313 ret = readahead_cache(inode); 305 ret = readahead_cache(inode);
314 if (ret) { 306 if (ret)
315 ret = 0;
316 goto out; 307 goto out;
317 }
318 308
319 while (1) { 309 while (1) {
320 struct btrfs_free_space_entry *entry; 310 struct btrfs_free_space_entry *entry;
@@ -333,10 +323,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
333 } 323 }
334 324
335 page = grab_cache_page(inode->i_mapping, index); 325 page = grab_cache_page(inode->i_mapping, index);
336 if (!page) { 326 if (!page)
337 ret = 0;
338 goto free_cache; 327 goto free_cache;
339 }
340 328
341 if (!PageUptodate(page)) { 329 if (!PageUptodate(page)) {
342 btrfs_readpage(NULL, page); 330 btrfs_readpage(NULL, page);
@@ -345,9 +333,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
345 unlock_page(page); 333 unlock_page(page);
346 page_cache_release(page); 334 page_cache_release(page);
347 printk(KERN_ERR "btrfs: error reading free " 335 printk(KERN_ERR "btrfs: error reading free "
348 "space cache: %llu\n", 336 "space cache\n");
349 (unsigned long long)
350 block_group->key.objectid);
351 goto free_cache; 337 goto free_cache;
352 } 338 }
353 } 339 }
@@ -360,13 +346,10 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
360 gen = addr + (sizeof(u32) * num_checksums); 346 gen = addr + (sizeof(u32) * num_checksums);
361 if (*gen != BTRFS_I(inode)->generation) { 347 if (*gen != BTRFS_I(inode)->generation) {
362 printk(KERN_ERR "btrfs: space cache generation" 348 printk(KERN_ERR "btrfs: space cache generation"
363 " (%llu) does not match inode (%llu) " 349 " (%llu) does not match inode (%llu)\n",
364 "for block group %llu\n",
365 (unsigned long long)*gen, 350 (unsigned long long)*gen,
366 (unsigned long long) 351 (unsigned long long)
367 BTRFS_I(inode)->generation, 352 BTRFS_I(inode)->generation);
368 (unsigned long long)
369 block_group->key.objectid);
370 kunmap(page); 353 kunmap(page);
371 unlock_page(page); 354 unlock_page(page);
372 page_cache_release(page); 355 page_cache_release(page);
@@ -382,9 +365,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
382 PAGE_CACHE_SIZE - start_offset); 365 PAGE_CACHE_SIZE - start_offset);
383 btrfs_csum_final(cur_crc, (char *)&cur_crc); 366 btrfs_csum_final(cur_crc, (char *)&cur_crc);
384 if (cur_crc != *crc) { 367 if (cur_crc != *crc) {
385 printk(KERN_ERR "btrfs: crc mismatch for page %lu in " 368 printk(KERN_ERR "btrfs: crc mismatch for page %lu\n",
386 "block group %llu\n", index, 369 index);
387 (unsigned long long)block_group->key.objectid);
388 kunmap(page); 370 kunmap(page);
389 unlock_page(page); 371 unlock_page(page);
390 page_cache_release(page); 372 page_cache_release(page);
@@ -417,9 +399,9 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
417 } 399 }
418 400
419 if (entry->type == BTRFS_FREE_SPACE_EXTENT) { 401 if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
420 spin_lock(&block_group->tree_lock); 402 spin_lock(&ctl->tree_lock);
421 ret = link_free_space(block_group, e); 403 ret = link_free_space(ctl, e);
422 spin_unlock(&block_group->tree_lock); 404 spin_unlock(&ctl->tree_lock);
423 if (ret) { 405 if (ret) {
424 printk(KERN_ERR "Duplicate entries in " 406 printk(KERN_ERR "Duplicate entries in "
425 "free space cache, dumping\n"); 407 "free space cache, dumping\n");
@@ -438,11 +420,11 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
438 page_cache_release(page); 420 page_cache_release(page);
439 goto free_cache; 421 goto free_cache;
440 } 422 }
441 spin_lock(&block_group->tree_lock); 423 spin_lock(&ctl->tree_lock);
442 ret = link_free_space(block_group, e); 424 ret2 = link_free_space(ctl, e);
443 block_group->total_bitmaps++; 425 ctl->total_bitmaps++;
444 recalculate_thresholds(block_group); 426 ctl->op->recalc_thresholds(ctl);
445 spin_unlock(&block_group->tree_lock); 427 spin_unlock(&ctl->tree_lock);
446 list_add_tail(&e->list, &bitmaps); 428 list_add_tail(&e->list, &bitmaps);
447 if (ret) { 429 if (ret) {
448 printk(KERN_ERR "Duplicate entries in " 430 printk(KERN_ERR "Duplicate entries in "
@@ -486,41 +468,97 @@ next:
486 index++; 468 index++;
487 } 469 }
488 470
489 spin_lock(&block_group->tree_lock);
490 if (block_group->free_space != (block_group->key.offset - used -
491 block_group->bytes_super)) {
492 spin_unlock(&block_group->tree_lock);
493 printk(KERN_ERR "block group %llu has an wrong amount of free "
494 "space\n", block_group->key.objectid);
495 ret = 0;
496 goto free_cache;
497 }
498 spin_unlock(&block_group->tree_lock);
499
500 ret = 1; 471 ret = 1;
501out: 472out:
502 kfree(checksums); 473 kfree(checksums);
503 kfree(disk_crcs); 474 kfree(disk_crcs);
504 iput(inode);
505 return ret; 475 return ret;
506
507free_cache: 476free_cache:
508 /* This cache is bogus, make sure it gets cleared */ 477 __btrfs_remove_free_space_cache(ctl);
478 goto out;
479}
480
481int load_free_space_cache(struct btrfs_fs_info *fs_info,
482 struct btrfs_block_group_cache *block_group)
483{
484 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
485 struct btrfs_root *root = fs_info->tree_root;
486 struct inode *inode;
487 struct btrfs_path *path;
488 int ret;
489 bool matched;
490 u64 used = btrfs_block_group_used(&block_group->item);
491
492 /*
493 * If we're unmounting then just return, since this does a search on the
494 * normal root and not the commit root and we could deadlock.
495 */
496 smp_mb();
497 if (fs_info->closing)
498 return 0;
499
500 /*
501 * If this block group has been marked to be cleared for one reason or
502 * another then we can't trust the on disk cache, so just return.
503 */
509 spin_lock(&block_group->lock); 504 spin_lock(&block_group->lock);
510 block_group->disk_cache_state = BTRFS_DC_CLEAR; 505 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
506 spin_unlock(&block_group->lock);
507 return 0;
508 }
511 spin_unlock(&block_group->lock); 509 spin_unlock(&block_group->lock);
512 btrfs_remove_free_space_cache(block_group); 510
513 goto out; 511 path = btrfs_alloc_path();
512 if (!path)
513 return 0;
514
515 inode = lookup_free_space_inode(root, block_group, path);
516 if (IS_ERR(inode)) {
517 btrfs_free_path(path);
518 return 0;
519 }
520
521 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
522 path, block_group->key.objectid);
523 btrfs_free_path(path);
524 if (ret <= 0)
525 goto out;
526
527 spin_lock(&ctl->tree_lock);
528 matched = (ctl->free_space == (block_group->key.offset - used -
529 block_group->bytes_super));
530 spin_unlock(&ctl->tree_lock);
531
532 if (!matched) {
533 __btrfs_remove_free_space_cache(ctl);
534 printk(KERN_ERR "block group %llu has an wrong amount of free "
535 "space\n", block_group->key.objectid);
536 ret = -1;
537 }
538out:
539 if (ret < 0) {
540 /* This cache is bogus, make sure it gets cleared */
541 spin_lock(&block_group->lock);
542 block_group->disk_cache_state = BTRFS_DC_CLEAR;
543 spin_unlock(&block_group->lock);
544 ret = 0;
545
546 printk(KERN_ERR "btrfs: failed to load free space cache "
547 "for block group %llu\n", block_group->key.objectid);
548 }
549
550 iput(inode);
551 return ret;
514} 552}
515 553
516int btrfs_write_out_cache(struct btrfs_root *root, 554int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
517 struct btrfs_trans_handle *trans, 555 struct btrfs_free_space_ctl *ctl,
518 struct btrfs_block_group_cache *block_group, 556 struct btrfs_block_group_cache *block_group,
519 struct btrfs_path *path) 557 struct btrfs_trans_handle *trans,
558 struct btrfs_path *path, u64 offset)
520{ 559{
521 struct btrfs_free_space_header *header; 560 struct btrfs_free_space_header *header;
522 struct extent_buffer *leaf; 561 struct extent_buffer *leaf;
523 struct inode *inode;
524 struct rb_node *node; 562 struct rb_node *node;
525 struct list_head *pos, *n; 563 struct list_head *pos, *n;
526 struct page **pages; 564 struct page **pages;
@@ -537,35 +575,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
537 int index = 0, num_pages = 0; 575 int index = 0, num_pages = 0;
538 int entries = 0; 576 int entries = 0;
539 int bitmaps = 0; 577 int bitmaps = 0;
540 int ret = 0; 578 int ret = -1;
541 bool next_page = false; 579 bool next_page = false;
542 bool out_of_space = false; 580 bool out_of_space = false;
543 581
544 root = root->fs_info->tree_root;
545
546 INIT_LIST_HEAD(&bitmap_list); 582 INIT_LIST_HEAD(&bitmap_list);
547 583
548 spin_lock(&block_group->lock); 584 node = rb_first(&ctl->free_space_offset);
549 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 585 if (!node)
550 spin_unlock(&block_group->lock);
551 return 0;
552 }
553 spin_unlock(&block_group->lock);
554
555 inode = lookup_free_space_inode(root, block_group, path);
556 if (IS_ERR(inode))
557 return 0;
558
559 if (!i_size_read(inode)) {
560 iput(inode);
561 return 0; 586 return 0;
562 }
563 587
564 node = rb_first(&block_group->free_space_offset); 588 if (!i_size_read(inode))
565 if (!node) { 589 return -1;
566 iput(inode);
567 return 0;
568 }
569 590
570 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 591 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
571 PAGE_CACHE_SHIFT; 592 PAGE_CACHE_SHIFT;
@@ -575,16 +596,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
575 596
576 /* We need a checksum per page. */ 597 /* We need a checksum per page. */
577 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); 598 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
578 if (!crc) { 599 if (!crc)
579 iput(inode); 600 return -1;
580 return 0;
581 }
582 601
583 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); 602 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
584 if (!pages) { 603 if (!pages) {
585 kfree(crc); 604 kfree(crc);
586 iput(inode); 605 return -1;
587 return 0;
588 } 606 }
589 607
590 /* Since the first page has all of our checksums and our generation we 608 /* Since the first page has all of our checksums and our generation we
@@ -594,7 +612,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
594 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); 612 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
595 613
596 /* Get the cluster for this block_group if it exists */ 614 /* Get the cluster for this block_group if it exists */
597 if (!list_empty(&block_group->cluster_list)) 615 if (block_group && !list_empty(&block_group->cluster_list))
598 cluster = list_entry(block_group->cluster_list.next, 616 cluster = list_entry(block_group->cluster_list.next,
599 struct btrfs_free_cluster, 617 struct btrfs_free_cluster,
600 block_group_list); 618 block_group_list);
@@ -636,7 +654,8 @@ int btrfs_write_out_cache(struct btrfs_root *root,
636 * When searching for pinned extents, we need to start at our start 654 * When searching for pinned extents, we need to start at our start
637 * offset. 655 * offset.
638 */ 656 */
639 start = block_group->key.objectid; 657 if (block_group)
658 start = block_group->key.objectid;
640 659
641 /* Write out the extent entries */ 660 /* Write out the extent entries */
642 do { 661 do {
@@ -694,8 +713,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
694 * We want to add any pinned extents to our free space cache 713 * We want to add any pinned extents to our free space cache
695 * so we don't leak the space 714 * so we don't leak the space
696 */ 715 */
697 while (!next_page && (start < block_group->key.objectid + 716 while (block_group && !next_page &&
698 block_group->key.offset)) { 717 (start < block_group->key.objectid +
718 block_group->key.offset)) {
699 ret = find_first_extent_bit(unpin, start, &start, &end, 719 ret = find_first_extent_bit(unpin, start, &start, &end,
700 EXTENT_DIRTY); 720 EXTENT_DIRTY);
701 if (ret) { 721 if (ret) {
@@ -813,12 +833,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
813 filemap_write_and_wait(inode->i_mapping); 833 filemap_write_and_wait(inode->i_mapping);
814 834
815 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 835 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
816 key.offset = block_group->key.objectid; 836 key.offset = offset;
817 key.type = 0; 837 key.type = 0;
818 838
819 ret = btrfs_search_slot(trans, root, &key, path, 1, 1); 839 ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
820 if (ret < 0) { 840 if (ret < 0) {
821 ret = 0; 841 ret = -1;
822 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 842 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
823 EXTENT_DIRTY | EXTENT_DELALLOC | 843 EXTENT_DIRTY | EXTENT_DELALLOC |
824 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); 844 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
@@ -831,13 +851,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
831 path->slots[0]--; 851 path->slots[0]--;
832 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 852 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
833 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 853 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
834 found_key.offset != block_group->key.objectid) { 854 found_key.offset != offset) {
835 ret = 0; 855 ret = -1;
836 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 856 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
837 EXTENT_DIRTY | EXTENT_DELALLOC | 857 EXTENT_DIRTY | EXTENT_DELALLOC |
838 EXTENT_DO_ACCOUNTING, 0, 0, NULL, 858 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
839 GFP_NOFS); 859 GFP_NOFS);
840 btrfs_release_path(root, path); 860 btrfs_release_path(path);
841 goto out_free; 861 goto out_free;
842 } 862 }
843 } 863 }
@@ -847,49 +867,83 @@ int btrfs_write_out_cache(struct btrfs_root *root,
847 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 867 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
848 btrfs_set_free_space_generation(leaf, header, trans->transid); 868 btrfs_set_free_space_generation(leaf, header, trans->transid);
849 btrfs_mark_buffer_dirty(leaf); 869 btrfs_mark_buffer_dirty(leaf);
850 btrfs_release_path(root, path); 870 btrfs_release_path(path);
851 871
852 ret = 1; 872 ret = 1;
853 873
854out_free: 874out_free:
855 if (ret == 0) { 875 if (ret != 1) {
856 invalidate_inode_pages2_range(inode->i_mapping, 0, index); 876 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
857 spin_lock(&block_group->lock);
858 block_group->disk_cache_state = BTRFS_DC_ERROR;
859 spin_unlock(&block_group->lock);
860 BTRFS_I(inode)->generation = 0; 877 BTRFS_I(inode)->generation = 0;
861 } 878 }
862 kfree(checksums); 879 kfree(checksums);
863 kfree(pages); 880 kfree(pages);
864 btrfs_update_inode(trans, root, inode); 881 btrfs_update_inode(trans, root, inode);
882 return ret;
883}
884
885int btrfs_write_out_cache(struct btrfs_root *root,
886 struct btrfs_trans_handle *trans,
887 struct btrfs_block_group_cache *block_group,
888 struct btrfs_path *path)
889{
890 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
891 struct inode *inode;
892 int ret = 0;
893
894 root = root->fs_info->tree_root;
895
896 spin_lock(&block_group->lock);
897 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
898 spin_unlock(&block_group->lock);
899 return 0;
900 }
901 spin_unlock(&block_group->lock);
902
903 inode = lookup_free_space_inode(root, block_group, path);
904 if (IS_ERR(inode))
905 return 0;
906
907 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
908 path, block_group->key.objectid);
909 if (ret < 0) {
910 spin_lock(&block_group->lock);
911 block_group->disk_cache_state = BTRFS_DC_ERROR;
912 spin_unlock(&block_group->lock);
913 ret = 0;
914
915 printk(KERN_ERR "btrfs: failed to write free space cace "
916 "for block group %llu\n", block_group->key.objectid);
917 }
918
865 iput(inode); 919 iput(inode);
866 return ret; 920 return ret;
867} 921}
868 922
869static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 923static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
870 u64 offset) 924 u64 offset)
871{ 925{
872 BUG_ON(offset < bitmap_start); 926 BUG_ON(offset < bitmap_start);
873 offset -= bitmap_start; 927 offset -= bitmap_start;
874 return (unsigned long)(div64_u64(offset, sectorsize)); 928 return (unsigned long)(div_u64(offset, unit));
875} 929}
876 930
877static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize) 931static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
878{ 932{
879 return (unsigned long)(div64_u64(bytes, sectorsize)); 933 return (unsigned long)(div_u64(bytes, unit));
880} 934}
881 935
882static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group, 936static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
883 u64 offset) 937 u64 offset)
884{ 938{
885 u64 bitmap_start; 939 u64 bitmap_start;
886 u64 bytes_per_bitmap; 940 u64 bytes_per_bitmap;
887 941
888 bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize; 942 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
889 bitmap_start = offset - block_group->key.objectid; 943 bitmap_start = offset - ctl->start;
890 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 944 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
891 bitmap_start *= bytes_per_bitmap; 945 bitmap_start *= bytes_per_bitmap;
892 bitmap_start += block_group->key.objectid; 946 bitmap_start += ctl->start;
893 947
894 return bitmap_start; 948 return bitmap_start;
895} 949}
@@ -953,10 +1007,10 @@ static int tree_insert_offset(struct rb_root *root, u64 offset,
953 * offset. 1007 * offset.
954 */ 1008 */
955static struct btrfs_free_space * 1009static struct btrfs_free_space *
956tree_search_offset(struct btrfs_block_group_cache *block_group, 1010tree_search_offset(struct btrfs_free_space_ctl *ctl,
957 u64 offset, int bitmap_only, int fuzzy) 1011 u64 offset, int bitmap_only, int fuzzy)
958{ 1012{
959 struct rb_node *n = block_group->free_space_offset.rb_node; 1013 struct rb_node *n = ctl->free_space_offset.rb_node;
960 struct btrfs_free_space *entry, *prev = NULL; 1014 struct btrfs_free_space *entry, *prev = NULL;
961 1015
962 /* find entry that is closest to the 'offset' */ 1016 /* find entry that is closest to the 'offset' */
@@ -1052,8 +1106,7 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
1052 break; 1106 break;
1053 } 1107 }
1054 } 1108 }
1055 if (entry->offset + BITS_PER_BITMAP * 1109 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1056 block_group->sectorsize > offset)
1057 return entry; 1110 return entry;
1058 } else if (entry->offset + entry->bytes > offset) 1111 } else if (entry->offset + entry->bytes > offset)
1059 return entry; 1112 return entry;
@@ -1064,7 +1117,7 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
1064 while (1) { 1117 while (1) {
1065 if (entry->bitmap) { 1118 if (entry->bitmap) {
1066 if (entry->offset + BITS_PER_BITMAP * 1119 if (entry->offset + BITS_PER_BITMAP *
1067 block_group->sectorsize > offset) 1120 ctl->unit > offset)
1068 break; 1121 break;
1069 } else { 1122 } else {
1070 if (entry->offset + entry->bytes > offset) 1123 if (entry->offset + entry->bytes > offset)
@@ -1080,42 +1133,47 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
1080} 1133}
1081 1134
1082static inline void 1135static inline void
1083__unlink_free_space(struct btrfs_block_group_cache *block_group, 1136__unlink_free_space(struct btrfs_free_space_ctl *ctl,
1084 struct btrfs_free_space *info) 1137 struct btrfs_free_space *info)
1085{ 1138{
1086 rb_erase(&info->offset_index, &block_group->free_space_offset); 1139 rb_erase(&info->offset_index, &ctl->free_space_offset);
1087 block_group->free_extents--; 1140 ctl->free_extents--;
1088} 1141}
1089 1142
1090static void unlink_free_space(struct btrfs_block_group_cache *block_group, 1143static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1091 struct btrfs_free_space *info) 1144 struct btrfs_free_space *info)
1092{ 1145{
1093 __unlink_free_space(block_group, info); 1146 __unlink_free_space(ctl, info);
1094 block_group->free_space -= info->bytes; 1147 ctl->free_space -= info->bytes;
1095} 1148}
1096 1149
1097static int link_free_space(struct btrfs_block_group_cache *block_group, 1150static int link_free_space(struct btrfs_free_space_ctl *ctl,
1098 struct btrfs_free_space *info) 1151 struct btrfs_free_space *info)
1099{ 1152{
1100 int ret = 0; 1153 int ret = 0;
1101 1154
1102 BUG_ON(!info->bitmap && !info->bytes); 1155 BUG_ON(!info->bitmap && !info->bytes);
1103 ret = tree_insert_offset(&block_group->free_space_offset, info->offset, 1156 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1104 &info->offset_index, (info->bitmap != NULL)); 1157 &info->offset_index, (info->bitmap != NULL));
1105 if (ret) 1158 if (ret)
1106 return ret; 1159 return ret;
1107 1160
1108 block_group->free_space += info->bytes; 1161 ctl->free_space += info->bytes;
1109 block_group->free_extents++; 1162 ctl->free_extents++;
1110 return ret; 1163 return ret;
1111} 1164}
1112 1165
1113static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) 1166static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1114{ 1167{
1168 struct btrfs_block_group_cache *block_group = ctl->private;
1115 u64 max_bytes; 1169 u64 max_bytes;
1116 u64 bitmap_bytes; 1170 u64 bitmap_bytes;
1117 u64 extent_bytes; 1171 u64 extent_bytes;
1118 u64 size = block_group->key.offset; 1172 u64 size = block_group->key.offset;
1173 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1174 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1175
1176 BUG_ON(ctl->total_bitmaps > max_bitmaps);
1119 1177
1120 /* 1178 /*
1121 * The goal is to keep the total amount of memory used per 1gb of space 1179 * The goal is to keep the total amount of memory used per 1gb of space
@@ -1133,10 +1191,10 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
1133 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1191 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1134 * we add more bitmaps. 1192 * we add more bitmaps.
1135 */ 1193 */
1136 bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; 1194 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1137 1195
1138 if (bitmap_bytes >= max_bytes) { 1196 if (bitmap_bytes >= max_bytes) {
1139 block_group->extents_thresh = 0; 1197 ctl->extents_thresh = 0;
1140 return; 1198 return;
1141 } 1199 }
1142 1200
@@ -1147,47 +1205,43 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
1147 extent_bytes = max_bytes - bitmap_bytes; 1205 extent_bytes = max_bytes - bitmap_bytes;
1148 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 1206 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1149 1207
1150 block_group->extents_thresh = 1208 ctl->extents_thresh =
1151 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1209 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1152} 1210}
1153 1211
1154static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, 1212static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1155 struct btrfs_free_space *info, u64 offset, 1213 struct btrfs_free_space *info, u64 offset,
1156 u64 bytes) 1214 u64 bytes)
1157{ 1215{
1158 unsigned long start, end; 1216 unsigned long start, count;
1159 unsigned long i;
1160 1217
1161 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 1218 start = offset_to_bit(info->offset, ctl->unit, offset);
1162 end = start + bytes_to_bits(bytes, block_group->sectorsize); 1219 count = bytes_to_bits(bytes, ctl->unit);
1163 BUG_ON(end > BITS_PER_BITMAP); 1220 BUG_ON(start + count > BITS_PER_BITMAP);
1164 1221
1165 for (i = start; i < end; i++) 1222 bitmap_clear(info->bitmap, start, count);
1166 clear_bit(i, info->bitmap);
1167 1223
1168 info->bytes -= bytes; 1224 info->bytes -= bytes;
1169 block_group->free_space -= bytes; 1225 ctl->free_space -= bytes;
1170} 1226}
1171 1227
1172static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, 1228static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1173 struct btrfs_free_space *info, u64 offset, 1229 struct btrfs_free_space *info, u64 offset,
1174 u64 bytes) 1230 u64 bytes)
1175{ 1231{
1176 unsigned long start, end; 1232 unsigned long start, count;
1177 unsigned long i;
1178 1233
1179 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 1234 start = offset_to_bit(info->offset, ctl->unit, offset);
1180 end = start + bytes_to_bits(bytes, block_group->sectorsize); 1235 count = bytes_to_bits(bytes, ctl->unit);
1181 BUG_ON(end > BITS_PER_BITMAP); 1236 BUG_ON(start + count > BITS_PER_BITMAP);
1182 1237
1183 for (i = start; i < end; i++) 1238 bitmap_set(info->bitmap, start, count);
1184 set_bit(i, info->bitmap);
1185 1239
1186 info->bytes += bytes; 1240 info->bytes += bytes;
1187 block_group->free_space += bytes; 1241 ctl->free_space += bytes;
1188} 1242}
1189 1243
1190static int search_bitmap(struct btrfs_block_group_cache *block_group, 1244static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1191 struct btrfs_free_space *bitmap_info, u64 *offset, 1245 struct btrfs_free_space *bitmap_info, u64 *offset,
1192 u64 *bytes) 1246 u64 *bytes)
1193{ 1247{
@@ -1195,9 +1249,9 @@ static int search_bitmap(struct btrfs_block_group_cache *block_group,
1195 unsigned long bits, i; 1249 unsigned long bits, i;
1196 unsigned long next_zero; 1250 unsigned long next_zero;
1197 1251
1198 i = offset_to_bit(bitmap_info->offset, block_group->sectorsize, 1252 i = offset_to_bit(bitmap_info->offset, ctl->unit,
1199 max_t(u64, *offset, bitmap_info->offset)); 1253 max_t(u64, *offset, bitmap_info->offset));
1200 bits = bytes_to_bits(*bytes, block_group->sectorsize); 1254 bits = bytes_to_bits(*bytes, ctl->unit);
1201 1255
1202 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 1256 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1203 i < BITS_PER_BITMAP; 1257 i < BITS_PER_BITMAP;
@@ -1212,29 +1266,25 @@ static int search_bitmap(struct btrfs_block_group_cache *block_group,
1212 } 1266 }
1213 1267
1214 if (found_bits) { 1268 if (found_bits) {
1215 *offset = (u64)(i * block_group->sectorsize) + 1269 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1216 bitmap_info->offset; 1270 *bytes = (u64)(found_bits) * ctl->unit;
1217 *bytes = (u64)(found_bits) * block_group->sectorsize;
1218 return 0; 1271 return 0;
1219 } 1272 }
1220 1273
1221 return -1; 1274 return -1;
1222} 1275}
1223 1276
1224static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache 1277static struct btrfs_free_space *
1225 *block_group, u64 *offset, 1278find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1226 u64 *bytes, int debug)
1227{ 1279{
1228 struct btrfs_free_space *entry; 1280 struct btrfs_free_space *entry;
1229 struct rb_node *node; 1281 struct rb_node *node;
1230 int ret; 1282 int ret;
1231 1283
1232 if (!block_group->free_space_offset.rb_node) 1284 if (!ctl->free_space_offset.rb_node)
1233 return NULL; 1285 return NULL;
1234 1286
1235 entry = tree_search_offset(block_group, 1287 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1236 offset_to_bitmap(block_group, *offset),
1237 0, 1);
1238 if (!entry) 1288 if (!entry)
1239 return NULL; 1289 return NULL;
1240 1290
@@ -1244,7 +1294,7 @@ static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
1244 continue; 1294 continue;
1245 1295
1246 if (entry->bitmap) { 1296 if (entry->bitmap) {
1247 ret = search_bitmap(block_group, entry, offset, bytes); 1297 ret = search_bitmap(ctl, entry, offset, bytes);
1248 if (!ret) 1298 if (!ret)
1249 return entry; 1299 return entry;
1250 continue; 1300 continue;
@@ -1258,33 +1308,28 @@ static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
1258 return NULL; 1308 return NULL;
1259} 1309}
1260 1310
1261static void add_new_bitmap(struct btrfs_block_group_cache *block_group, 1311static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1262 struct btrfs_free_space *info, u64 offset) 1312 struct btrfs_free_space *info, u64 offset)
1263{ 1313{
1264 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 1314 info->offset = offset_to_bitmap(ctl, offset);
1265 int max_bitmaps = (int)div64_u64(block_group->key.offset +
1266 bytes_per_bg - 1, bytes_per_bg);
1267 BUG_ON(block_group->total_bitmaps >= max_bitmaps);
1268
1269 info->offset = offset_to_bitmap(block_group, offset);
1270 info->bytes = 0; 1315 info->bytes = 0;
1271 link_free_space(block_group, info); 1316 link_free_space(ctl, info);
1272 block_group->total_bitmaps++; 1317 ctl->total_bitmaps++;
1273 1318
1274 recalculate_thresholds(block_group); 1319 ctl->op->recalc_thresholds(ctl);
1275} 1320}
1276 1321
1277static void free_bitmap(struct btrfs_block_group_cache *block_group, 1322static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1278 struct btrfs_free_space *bitmap_info) 1323 struct btrfs_free_space *bitmap_info)
1279{ 1324{
1280 unlink_free_space(block_group, bitmap_info); 1325 unlink_free_space(ctl, bitmap_info);
1281 kfree(bitmap_info->bitmap); 1326 kfree(bitmap_info->bitmap);
1282 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1327 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1283 block_group->total_bitmaps--; 1328 ctl->total_bitmaps--;
1284 recalculate_thresholds(block_group); 1329 ctl->op->recalc_thresholds(ctl);
1285} 1330}
1286 1331
1287static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, 1332static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1288 struct btrfs_free_space *bitmap_info, 1333 struct btrfs_free_space *bitmap_info,
1289 u64 *offset, u64 *bytes) 1334 u64 *offset, u64 *bytes)
1290{ 1335{
@@ -1293,8 +1338,7 @@ static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_gro
1293 int ret; 1338 int ret;
1294 1339
1295again: 1340again:
1296 end = bitmap_info->offset + 1341 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1297 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
1298 1342
1299 /* 1343 /*
1300 * XXX - this can go away after a few releases. 1344 * XXX - this can go away after a few releases.
@@ -1309,24 +1353,22 @@ again:
1309 search_start = *offset; 1353 search_start = *offset;
1310 search_bytes = *bytes; 1354 search_bytes = *bytes;
1311 search_bytes = min(search_bytes, end - search_start + 1); 1355 search_bytes = min(search_bytes, end - search_start + 1);
1312 ret = search_bitmap(block_group, bitmap_info, &search_start, 1356 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1313 &search_bytes);
1314 BUG_ON(ret < 0 || search_start != *offset); 1357 BUG_ON(ret < 0 || search_start != *offset);
1315 1358
1316 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1359 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1317 bitmap_clear_bits(block_group, bitmap_info, *offset, 1360 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1318 end - *offset + 1);
1319 *bytes -= end - *offset + 1; 1361 *bytes -= end - *offset + 1;
1320 *offset = end + 1; 1362 *offset = end + 1;
1321 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1363 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1322 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); 1364 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1323 *bytes = 0; 1365 *bytes = 0;
1324 } 1366 }
1325 1367
1326 if (*bytes) { 1368 if (*bytes) {
1327 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1369 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1328 if (!bitmap_info->bytes) 1370 if (!bitmap_info->bytes)
1329 free_bitmap(block_group, bitmap_info); 1371 free_bitmap(ctl, bitmap_info);
1330 1372
1331 /* 1373 /*
1332 * no entry after this bitmap, but we still have bytes to 1374 * no entry after this bitmap, but we still have bytes to
@@ -1353,31 +1395,28 @@ again:
1353 */ 1395 */
1354 search_start = *offset; 1396 search_start = *offset;
1355 search_bytes = *bytes; 1397 search_bytes = *bytes;
1356 ret = search_bitmap(block_group, bitmap_info, &search_start, 1398 ret = search_bitmap(ctl, bitmap_info, &search_start,
1357 &search_bytes); 1399 &search_bytes);
1358 if (ret < 0 || search_start != *offset) 1400 if (ret < 0 || search_start != *offset)
1359 return -EAGAIN; 1401 return -EAGAIN;
1360 1402
1361 goto again; 1403 goto again;
1362 } else if (!bitmap_info->bytes) 1404 } else if (!bitmap_info->bytes)
1363 free_bitmap(block_group, bitmap_info); 1405 free_bitmap(ctl, bitmap_info);
1364 1406
1365 return 0; 1407 return 0;
1366} 1408}
1367 1409
1368static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, 1410static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1369 struct btrfs_free_space *info) 1411 struct btrfs_free_space *info)
1370{ 1412{
1371 struct btrfs_free_space *bitmap_info; 1413 struct btrfs_block_group_cache *block_group = ctl->private;
1372 int added = 0;
1373 u64 bytes, offset, end;
1374 int ret;
1375 1414
1376 /* 1415 /*
1377 * If we are below the extents threshold then we can add this as an 1416 * If we are below the extents threshold then we can add this as an
1378 * extent, and don't have to deal with the bitmap 1417 * extent, and don't have to deal with the bitmap
1379 */ 1418 */
1380 if (block_group->free_extents < block_group->extents_thresh) { 1419 if (ctl->free_extents < ctl->extents_thresh) {
1381 /* 1420 /*
1382 * If this block group has some small extents we don't want to 1421 * If this block group has some small extents we don't want to
1383 * use up all of our free slots in the cache with them, we want 1422 * use up all of our free slots in the cache with them, we want
@@ -1386,11 +1425,10 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
1386 * the overhead of a bitmap if we don't have to. 1425 * the overhead of a bitmap if we don't have to.
1387 */ 1426 */
1388 if (info->bytes <= block_group->sectorsize * 4) { 1427 if (info->bytes <= block_group->sectorsize * 4) {
1389 if (block_group->free_extents * 2 <= 1428 if (ctl->free_extents * 2 <= ctl->extents_thresh)
1390 block_group->extents_thresh) 1429 return false;
1391 return 0;
1392 } else { 1430 } else {
1393 return 0; 1431 return false;
1394 } 1432 }
1395 } 1433 }
1396 1434
@@ -1400,31 +1438,42 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
1400 */ 1438 */
1401 if (BITS_PER_BITMAP * block_group->sectorsize > 1439 if (BITS_PER_BITMAP * block_group->sectorsize >
1402 block_group->key.offset) 1440 block_group->key.offset)
1403 return 0; 1441 return false;
1442
1443 return true;
1444}
1445
1446static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1447 struct btrfs_free_space *info)
1448{
1449 struct btrfs_free_space *bitmap_info;
1450 int added = 0;
1451 u64 bytes, offset, end;
1452 int ret;
1404 1453
1405 bytes = info->bytes; 1454 bytes = info->bytes;
1406 offset = info->offset; 1455 offset = info->offset;
1407 1456
1457 if (!ctl->op->use_bitmap(ctl, info))
1458 return 0;
1459
1408again: 1460again:
1409 bitmap_info = tree_search_offset(block_group, 1461 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1410 offset_to_bitmap(block_group, offset),
1411 1, 0); 1462 1, 0);
1412 if (!bitmap_info) { 1463 if (!bitmap_info) {
1413 BUG_ON(added); 1464 BUG_ON(added);
1414 goto new_bitmap; 1465 goto new_bitmap;
1415 } 1466 }
1416 1467
1417 end = bitmap_info->offset + 1468 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1418 (u64)(BITS_PER_BITMAP * block_group->sectorsize);
1419 1469
1420 if (offset >= bitmap_info->offset && offset + bytes > end) { 1470 if (offset >= bitmap_info->offset && offset + bytes > end) {
1421 bitmap_set_bits(block_group, bitmap_info, offset, 1471 bitmap_set_bits(ctl, bitmap_info, offset, end - offset);
1422 end - offset);
1423 bytes -= end - offset; 1472 bytes -= end - offset;
1424 offset = end; 1473 offset = end;
1425 added = 0; 1474 added = 0;
1426 } else if (offset >= bitmap_info->offset && offset + bytes <= end) { 1475 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
1427 bitmap_set_bits(block_group, bitmap_info, offset, bytes); 1476 bitmap_set_bits(ctl, bitmap_info, offset, bytes);
1428 bytes = 0; 1477 bytes = 0;
1429 } else { 1478 } else {
1430 BUG(); 1479 BUG();
@@ -1438,19 +1487,19 @@ again:
1438 1487
1439new_bitmap: 1488new_bitmap:
1440 if (info && info->bitmap) { 1489 if (info && info->bitmap) {
1441 add_new_bitmap(block_group, info, offset); 1490 add_new_bitmap(ctl, info, offset);
1442 added = 1; 1491 added = 1;
1443 info = NULL; 1492 info = NULL;
1444 goto again; 1493 goto again;
1445 } else { 1494 } else {
1446 spin_unlock(&block_group->tree_lock); 1495 spin_unlock(&ctl->tree_lock);
1447 1496
1448 /* no pre-allocated info, allocate a new one */ 1497 /* no pre-allocated info, allocate a new one */
1449 if (!info) { 1498 if (!info) {
1450 info = kmem_cache_zalloc(btrfs_free_space_cachep, 1499 info = kmem_cache_zalloc(btrfs_free_space_cachep,
1451 GFP_NOFS); 1500 GFP_NOFS);
1452 if (!info) { 1501 if (!info) {
1453 spin_lock(&block_group->tree_lock); 1502 spin_lock(&ctl->tree_lock);
1454 ret = -ENOMEM; 1503 ret = -ENOMEM;
1455 goto out; 1504 goto out;
1456 } 1505 }
@@ -1458,7 +1507,7 @@ new_bitmap:
1458 1507
1459 /* allocate the bitmap */ 1508 /* allocate the bitmap */
1460 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 1509 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1461 spin_lock(&block_group->tree_lock); 1510 spin_lock(&ctl->tree_lock);
1462 if (!info->bitmap) { 1511 if (!info->bitmap) {
1463 ret = -ENOMEM; 1512 ret = -ENOMEM;
1464 goto out; 1513 goto out;
@@ -1476,7 +1525,7 @@ out:
1476 return ret; 1525 return ret;
1477} 1526}
1478 1527
1479bool try_merge_free_space(struct btrfs_block_group_cache *block_group, 1528static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1480 struct btrfs_free_space *info, bool update_stat) 1529 struct btrfs_free_space *info, bool update_stat)
1481{ 1530{
1482 struct btrfs_free_space *left_info; 1531 struct btrfs_free_space *left_info;
@@ -1490,18 +1539,18 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1490 * are adding, if there is remove that struct and add a new one to 1539 * are adding, if there is remove that struct and add a new one to
1491 * cover the entire range 1540 * cover the entire range
1492 */ 1541 */
1493 right_info = tree_search_offset(block_group, offset + bytes, 0, 0); 1542 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1494 if (right_info && rb_prev(&right_info->offset_index)) 1543 if (right_info && rb_prev(&right_info->offset_index))
1495 left_info = rb_entry(rb_prev(&right_info->offset_index), 1544 left_info = rb_entry(rb_prev(&right_info->offset_index),
1496 struct btrfs_free_space, offset_index); 1545 struct btrfs_free_space, offset_index);
1497 else 1546 else
1498 left_info = tree_search_offset(block_group, offset - 1, 0, 0); 1547 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1499 1548
1500 if (right_info && !right_info->bitmap) { 1549 if (right_info && !right_info->bitmap) {
1501 if (update_stat) 1550 if (update_stat)
1502 unlink_free_space(block_group, right_info); 1551 unlink_free_space(ctl, right_info);
1503 else 1552 else
1504 __unlink_free_space(block_group, right_info); 1553 __unlink_free_space(ctl, right_info);
1505 info->bytes += right_info->bytes; 1554 info->bytes += right_info->bytes;
1506 kmem_cache_free(btrfs_free_space_cachep, right_info); 1555 kmem_cache_free(btrfs_free_space_cachep, right_info);
1507 merged = true; 1556 merged = true;
@@ -1510,9 +1559,9 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1510 if (left_info && !left_info->bitmap && 1559 if (left_info && !left_info->bitmap &&
1511 left_info->offset + left_info->bytes == offset) { 1560 left_info->offset + left_info->bytes == offset) {
1512 if (update_stat) 1561 if (update_stat)
1513 unlink_free_space(block_group, left_info); 1562 unlink_free_space(ctl, left_info);
1514 else 1563 else
1515 __unlink_free_space(block_group, left_info); 1564 __unlink_free_space(ctl, left_info);
1516 info->offset = left_info->offset; 1565 info->offset = left_info->offset;
1517 info->bytes += left_info->bytes; 1566 info->bytes += left_info->bytes;
1518 kmem_cache_free(btrfs_free_space_cachep, left_info); 1567 kmem_cache_free(btrfs_free_space_cachep, left_info);
@@ -1522,8 +1571,8 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1522 return merged; 1571 return merged;
1523} 1572}
1524 1573
1525int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 1574int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1526 u64 offset, u64 bytes) 1575 u64 offset, u64 bytes)
1527{ 1576{
1528 struct btrfs_free_space *info; 1577 struct btrfs_free_space *info;
1529 int ret = 0; 1578 int ret = 0;
@@ -1535,9 +1584,9 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1535 info->offset = offset; 1584 info->offset = offset;
1536 info->bytes = bytes; 1585 info->bytes = bytes;
1537 1586
1538 spin_lock(&block_group->tree_lock); 1587 spin_lock(&ctl->tree_lock);
1539 1588
1540 if (try_merge_free_space(block_group, info, true)) 1589 if (try_merge_free_space(ctl, info, true))
1541 goto link; 1590 goto link;
1542 1591
1543 /* 1592 /*
@@ -1545,7 +1594,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1545 * extent then we know we're going to have to allocate a new extent, so 1594 * extent then we know we're going to have to allocate a new extent, so
1546 * before we do that see if we need to drop this into a bitmap 1595 * before we do that see if we need to drop this into a bitmap
1547 */ 1596 */
1548 ret = insert_into_bitmap(block_group, info); 1597 ret = insert_into_bitmap(ctl, info);
1549 if (ret < 0) { 1598 if (ret < 0) {
1550 goto out; 1599 goto out;
1551 } else if (ret) { 1600 } else if (ret) {
@@ -1553,11 +1602,11 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1553 goto out; 1602 goto out;
1554 } 1603 }
1555link: 1604link:
1556 ret = link_free_space(block_group, info); 1605 ret = link_free_space(ctl, info);
1557 if (ret) 1606 if (ret)
1558 kmem_cache_free(btrfs_free_space_cachep, info); 1607 kmem_cache_free(btrfs_free_space_cachep, info);
1559out: 1608out:
1560 spin_unlock(&block_group->tree_lock); 1609 spin_unlock(&ctl->tree_lock);
1561 1610
1562 if (ret) { 1611 if (ret) {
1563 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1612 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
@@ -1570,21 +1619,21 @@ out:
1570int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 1619int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1571 u64 offset, u64 bytes) 1620 u64 offset, u64 bytes)
1572{ 1621{
1622 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1573 struct btrfs_free_space *info; 1623 struct btrfs_free_space *info;
1574 struct btrfs_free_space *next_info = NULL; 1624 struct btrfs_free_space *next_info = NULL;
1575 int ret = 0; 1625 int ret = 0;
1576 1626
1577 spin_lock(&block_group->tree_lock); 1627 spin_lock(&ctl->tree_lock);
1578 1628
1579again: 1629again:
1580 info = tree_search_offset(block_group, offset, 0, 0); 1630 info = tree_search_offset(ctl, offset, 0, 0);
1581 if (!info) { 1631 if (!info) {
1582 /* 1632 /*
1583 * oops didn't find an extent that matched the space we wanted 1633 * oops didn't find an extent that matched the space we wanted
1584 * to remove, look for a bitmap instead 1634 * to remove, look for a bitmap instead
1585 */ 1635 */
1586 info = tree_search_offset(block_group, 1636 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1587 offset_to_bitmap(block_group, offset),
1588 1, 0); 1637 1, 0);
1589 if (!info) { 1638 if (!info) {
1590 WARN_ON(1); 1639 WARN_ON(1);
@@ -1599,8 +1648,8 @@ again:
1599 offset_index); 1648 offset_index);
1600 1649
1601 if (next_info->bitmap) 1650 if (next_info->bitmap)
1602 end = next_info->offset + BITS_PER_BITMAP * 1651 end = next_info->offset +
1603 block_group->sectorsize - 1; 1652 BITS_PER_BITMAP * ctl->unit - 1;
1604 else 1653 else
1605 end = next_info->offset + next_info->bytes; 1654 end = next_info->offset + next_info->bytes;
1606 1655
@@ -1620,20 +1669,20 @@ again:
1620 } 1669 }
1621 1670
1622 if (info->bytes == bytes) { 1671 if (info->bytes == bytes) {
1623 unlink_free_space(block_group, info); 1672 unlink_free_space(ctl, info);
1624 if (info->bitmap) { 1673 if (info->bitmap) {
1625 kfree(info->bitmap); 1674 kfree(info->bitmap);
1626 block_group->total_bitmaps--; 1675 ctl->total_bitmaps--;
1627 } 1676 }
1628 kmem_cache_free(btrfs_free_space_cachep, info); 1677 kmem_cache_free(btrfs_free_space_cachep, info);
1629 goto out_lock; 1678 goto out_lock;
1630 } 1679 }
1631 1680
1632 if (!info->bitmap && info->offset == offset) { 1681 if (!info->bitmap && info->offset == offset) {
1633 unlink_free_space(block_group, info); 1682 unlink_free_space(ctl, info);
1634 info->offset += bytes; 1683 info->offset += bytes;
1635 info->bytes -= bytes; 1684 info->bytes -= bytes;
1636 link_free_space(block_group, info); 1685 link_free_space(ctl, info);
1637 goto out_lock; 1686 goto out_lock;
1638 } 1687 }
1639 1688
@@ -1647,13 +1696,13 @@ again:
1647 * first unlink the old info and then 1696 * first unlink the old info and then
1648 * insert it again after the hole we're creating 1697 * insert it again after the hole we're creating
1649 */ 1698 */
1650 unlink_free_space(block_group, info); 1699 unlink_free_space(ctl, info);
1651 if (offset + bytes < info->offset + info->bytes) { 1700 if (offset + bytes < info->offset + info->bytes) {
1652 u64 old_end = info->offset + info->bytes; 1701 u64 old_end = info->offset + info->bytes;
1653 1702
1654 info->offset = offset + bytes; 1703 info->offset = offset + bytes;
1655 info->bytes = old_end - info->offset; 1704 info->bytes = old_end - info->offset;
1656 ret = link_free_space(block_group, info); 1705 ret = link_free_space(ctl, info);
1657 WARN_ON(ret); 1706 WARN_ON(ret);
1658 if (ret) 1707 if (ret)
1659 goto out_lock; 1708 goto out_lock;
@@ -1663,7 +1712,7 @@ again:
1663 */ 1712 */
1664 kmem_cache_free(btrfs_free_space_cachep, info); 1713 kmem_cache_free(btrfs_free_space_cachep, info);
1665 } 1714 }
1666 spin_unlock(&block_group->tree_lock); 1715 spin_unlock(&ctl->tree_lock);
1667 1716
1668 /* step two, insert a new info struct to cover 1717 /* step two, insert a new info struct to cover
1669 * anything before the hole 1718 * anything before the hole
@@ -1674,12 +1723,12 @@ again:
1674 goto out; 1723 goto out;
1675 } 1724 }
1676 1725
1677 ret = remove_from_bitmap(block_group, info, &offset, &bytes); 1726 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1678 if (ret == -EAGAIN) 1727 if (ret == -EAGAIN)
1679 goto again; 1728 goto again;
1680 BUG_ON(ret); 1729 BUG_ON(ret);
1681out_lock: 1730out_lock:
1682 spin_unlock(&block_group->tree_lock); 1731 spin_unlock(&ctl->tree_lock);
1683out: 1732out:
1684 return ret; 1733 return ret;
1685} 1734}
@@ -1687,11 +1736,12 @@ out:
1687void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 1736void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1688 u64 bytes) 1737 u64 bytes)
1689{ 1738{
1739 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1690 struct btrfs_free_space *info; 1740 struct btrfs_free_space *info;
1691 struct rb_node *n; 1741 struct rb_node *n;
1692 int count = 0; 1742 int count = 0;
1693 1743
1694 for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) { 1744 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1695 info = rb_entry(n, struct btrfs_free_space, offset_index); 1745 info = rb_entry(n, struct btrfs_free_space, offset_index);
1696 if (info->bytes >= bytes) 1746 if (info->bytes >= bytes)
1697 count++; 1747 count++;
@@ -1706,19 +1756,28 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1706 "\n", count); 1756 "\n", count);
1707} 1757}
1708 1758
1709u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) 1759static struct btrfs_free_space_op free_space_op = {
1760 .recalc_thresholds = recalculate_thresholds,
1761 .use_bitmap = use_bitmap,
1762};
1763
1764void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1710{ 1765{
1711 struct btrfs_free_space *info; 1766 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1712 struct rb_node *n;
1713 u64 ret = 0;
1714 1767
1715 for (n = rb_first(&block_group->free_space_offset); n; 1768 spin_lock_init(&ctl->tree_lock);
1716 n = rb_next(n)) { 1769 ctl->unit = block_group->sectorsize;
1717 info = rb_entry(n, struct btrfs_free_space, offset_index); 1770 ctl->start = block_group->key.objectid;
1718 ret += info->bytes; 1771 ctl->private = block_group;
1719 } 1772 ctl->op = &free_space_op;
1720 1773
1721 return ret; 1774 /*
1775 * we only want to have 32k of ram per block group for keeping
1776 * track of free space, and if we pass 1/2 of that we want to
1777 * start converting things over to using bitmaps
1778 */
1779 ctl->extents_thresh = ((1024 * 32) / 2) /
1780 sizeof(struct btrfs_free_space);
1722} 1781}
1723 1782
1724/* 1783/*
@@ -1732,6 +1791,7 @@ __btrfs_return_cluster_to_free_space(
1732 struct btrfs_block_group_cache *block_group, 1791 struct btrfs_block_group_cache *block_group,
1733 struct btrfs_free_cluster *cluster) 1792 struct btrfs_free_cluster *cluster)
1734{ 1793{
1794 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1735 struct btrfs_free_space *entry; 1795 struct btrfs_free_space *entry;
1736 struct rb_node *node; 1796 struct rb_node *node;
1737 1797
@@ -1753,8 +1813,8 @@ __btrfs_return_cluster_to_free_space(
1753 1813
1754 bitmap = (entry->bitmap != NULL); 1814 bitmap = (entry->bitmap != NULL);
1755 if (!bitmap) 1815 if (!bitmap)
1756 try_merge_free_space(block_group, entry, false); 1816 try_merge_free_space(ctl, entry, false);
1757 tree_insert_offset(&block_group->free_space_offset, 1817 tree_insert_offset(&ctl->free_space_offset,
1758 entry->offset, &entry->offset_index, bitmap); 1818 entry->offset, &entry->offset_index, bitmap);
1759 } 1819 }
1760 cluster->root = RB_ROOT; 1820 cluster->root = RB_ROOT;
@@ -1765,14 +1825,38 @@ out:
1765 return 0; 1825 return 0;
1766} 1826}
1767 1827
1768void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 1828void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
1769{ 1829{
1770 struct btrfs_free_space *info; 1830 struct btrfs_free_space *info;
1771 struct rb_node *node; 1831 struct rb_node *node;
1832
1833 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
1834 info = rb_entry(node, struct btrfs_free_space, offset_index);
1835 unlink_free_space(ctl, info);
1836 kfree(info->bitmap);
1837 kmem_cache_free(btrfs_free_space_cachep, info);
1838 if (need_resched()) {
1839 spin_unlock(&ctl->tree_lock);
1840 cond_resched();
1841 spin_lock(&ctl->tree_lock);
1842 }
1843 }
1844}
1845
1846void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
1847{
1848 spin_lock(&ctl->tree_lock);
1849 __btrfs_remove_free_space_cache_locked(ctl);
1850 spin_unlock(&ctl->tree_lock);
1851}
1852
1853void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1854{
1855 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1772 struct btrfs_free_cluster *cluster; 1856 struct btrfs_free_cluster *cluster;
1773 struct list_head *head; 1857 struct list_head *head;
1774 1858
1775 spin_lock(&block_group->tree_lock); 1859 spin_lock(&ctl->tree_lock);
1776 while ((head = block_group->cluster_list.next) != 1860 while ((head = block_group->cluster_list.next) !=
1777 &block_group->cluster_list) { 1861 &block_group->cluster_list) {
1778 cluster = list_entry(head, struct btrfs_free_cluster, 1862 cluster = list_entry(head, struct btrfs_free_cluster,
@@ -1781,60 +1865,46 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1781 WARN_ON(cluster->block_group != block_group); 1865 WARN_ON(cluster->block_group != block_group);
1782 __btrfs_return_cluster_to_free_space(block_group, cluster); 1866 __btrfs_return_cluster_to_free_space(block_group, cluster);
1783 if (need_resched()) { 1867 if (need_resched()) {
1784 spin_unlock(&block_group->tree_lock); 1868 spin_unlock(&ctl->tree_lock);
1785 cond_resched(); 1869 cond_resched();
1786 spin_lock(&block_group->tree_lock); 1870 spin_lock(&ctl->tree_lock);
1787 } 1871 }
1788 } 1872 }
1873 __btrfs_remove_free_space_cache_locked(ctl);
1874 spin_unlock(&ctl->tree_lock);
1789 1875
1790 while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
1791 info = rb_entry(node, struct btrfs_free_space, offset_index);
1792 if (!info->bitmap) {
1793 unlink_free_space(block_group, info);
1794 kmem_cache_free(btrfs_free_space_cachep, info);
1795 } else {
1796 free_bitmap(block_group, info);
1797 }
1798
1799 if (need_resched()) {
1800 spin_unlock(&block_group->tree_lock);
1801 cond_resched();
1802 spin_lock(&block_group->tree_lock);
1803 }
1804 }
1805
1806 spin_unlock(&block_group->tree_lock);
1807} 1876}
1808 1877
1809u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 1878u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1810 u64 offset, u64 bytes, u64 empty_size) 1879 u64 offset, u64 bytes, u64 empty_size)
1811{ 1880{
1881 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1812 struct btrfs_free_space *entry = NULL; 1882 struct btrfs_free_space *entry = NULL;
1813 u64 bytes_search = bytes + empty_size; 1883 u64 bytes_search = bytes + empty_size;
1814 u64 ret = 0; 1884 u64 ret = 0;
1815 1885
1816 spin_lock(&block_group->tree_lock); 1886 spin_lock(&ctl->tree_lock);
1817 entry = find_free_space(block_group, &offset, &bytes_search, 0); 1887 entry = find_free_space(ctl, &offset, &bytes_search);
1818 if (!entry) 1888 if (!entry)
1819 goto out; 1889 goto out;
1820 1890
1821 ret = offset; 1891 ret = offset;
1822 if (entry->bitmap) { 1892 if (entry->bitmap) {
1823 bitmap_clear_bits(block_group, entry, offset, bytes); 1893 bitmap_clear_bits(ctl, entry, offset, bytes);
1824 if (!entry->bytes) 1894 if (!entry->bytes)
1825 free_bitmap(block_group, entry); 1895 free_bitmap(ctl, entry);
1826 } else { 1896 } else {
1827 unlink_free_space(block_group, entry); 1897 unlink_free_space(ctl, entry);
1828 entry->offset += bytes; 1898 entry->offset += bytes;
1829 entry->bytes -= bytes; 1899 entry->bytes -= bytes;
1830 if (!entry->bytes) 1900 if (!entry->bytes)
1831 kmem_cache_free(btrfs_free_space_cachep, entry); 1901 kmem_cache_free(btrfs_free_space_cachep, entry);
1832 else 1902 else
1833 link_free_space(block_group, entry); 1903 link_free_space(ctl, entry);
1834 } 1904 }
1835 1905
1836out: 1906out:
1837 spin_unlock(&block_group->tree_lock); 1907 spin_unlock(&ctl->tree_lock);
1838 1908
1839 return ret; 1909 return ret;
1840} 1910}
@@ -1851,6 +1921,7 @@ int btrfs_return_cluster_to_free_space(
1851 struct btrfs_block_group_cache *block_group, 1921 struct btrfs_block_group_cache *block_group,
1852 struct btrfs_free_cluster *cluster) 1922 struct btrfs_free_cluster *cluster)
1853{ 1923{
1924 struct btrfs_free_space_ctl *ctl;
1854 int ret; 1925 int ret;
1855 1926
1856 /* first, get a safe pointer to the block group */ 1927 /* first, get a safe pointer to the block group */
@@ -1869,10 +1940,12 @@ int btrfs_return_cluster_to_free_space(
1869 atomic_inc(&block_group->count); 1940 atomic_inc(&block_group->count);
1870 spin_unlock(&cluster->lock); 1941 spin_unlock(&cluster->lock);
1871 1942
1943 ctl = block_group->free_space_ctl;
1944
1872 /* now return any extents the cluster had on it */ 1945 /* now return any extents the cluster had on it */
1873 spin_lock(&block_group->tree_lock); 1946 spin_lock(&ctl->tree_lock);
1874 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 1947 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
1875 spin_unlock(&block_group->tree_lock); 1948 spin_unlock(&ctl->tree_lock);
1876 1949
1877 /* finally drop our ref */ 1950 /* finally drop our ref */
1878 btrfs_put_block_group(block_group); 1951 btrfs_put_block_group(block_group);
@@ -1884,6 +1957,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1884 struct btrfs_free_space *entry, 1957 struct btrfs_free_space *entry,
1885 u64 bytes, u64 min_start) 1958 u64 bytes, u64 min_start)
1886{ 1959{
1960 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1887 int err; 1961 int err;
1888 u64 search_start = cluster->window_start; 1962 u64 search_start = cluster->window_start;
1889 u64 search_bytes = bytes; 1963 u64 search_bytes = bytes;
@@ -1892,13 +1966,12 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1892 search_start = min_start; 1966 search_start = min_start;
1893 search_bytes = bytes; 1967 search_bytes = bytes;
1894 1968
1895 err = search_bitmap(block_group, entry, &search_start, 1969 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
1896 &search_bytes);
1897 if (err) 1970 if (err)
1898 return 0; 1971 return 0;
1899 1972
1900 ret = search_start; 1973 ret = search_start;
1901 bitmap_clear_bits(block_group, entry, ret, bytes); 1974 bitmap_clear_bits(ctl, entry, ret, bytes);
1902 1975
1903 return ret; 1976 return ret;
1904} 1977}
@@ -1912,6 +1985,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1912 struct btrfs_free_cluster *cluster, u64 bytes, 1985 struct btrfs_free_cluster *cluster, u64 bytes,
1913 u64 min_start) 1986 u64 min_start)
1914{ 1987{
1988 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1915 struct btrfs_free_space *entry = NULL; 1989 struct btrfs_free_space *entry = NULL;
1916 struct rb_node *node; 1990 struct rb_node *node;
1917 u64 ret = 0; 1991 u64 ret = 0;
@@ -1931,8 +2005,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1931 while(1) { 2005 while(1) {
1932 if (entry->bytes < bytes || 2006 if (entry->bytes < bytes ||
1933 (!entry->bitmap && entry->offset < min_start)) { 2007 (!entry->bitmap && entry->offset < min_start)) {
1934 struct rb_node *node;
1935
1936 node = rb_next(&entry->offset_index); 2008 node = rb_next(&entry->offset_index);
1937 if (!node) 2009 if (!node)
1938 break; 2010 break;
@@ -1946,7 +2018,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1946 cluster, entry, bytes, 2018 cluster, entry, bytes,
1947 min_start); 2019 min_start);
1948 if (ret == 0) { 2020 if (ret == 0) {
1949 struct rb_node *node;
1950 node = rb_next(&entry->offset_index); 2021 node = rb_next(&entry->offset_index);
1951 if (!node) 2022 if (!node)
1952 break; 2023 break;
@@ -1972,20 +2043,20 @@ out:
1972 if (!ret) 2043 if (!ret)
1973 return 0; 2044 return 0;
1974 2045
1975 spin_lock(&block_group->tree_lock); 2046 spin_lock(&ctl->tree_lock);
1976 2047
1977 block_group->free_space -= bytes; 2048 ctl->free_space -= bytes;
1978 if (entry->bytes == 0) { 2049 if (entry->bytes == 0) {
1979 block_group->free_extents--; 2050 ctl->free_extents--;
1980 if (entry->bitmap) { 2051 if (entry->bitmap) {
1981 kfree(entry->bitmap); 2052 kfree(entry->bitmap);
1982 block_group->total_bitmaps--; 2053 ctl->total_bitmaps--;
1983 recalculate_thresholds(block_group); 2054 ctl->op->recalc_thresholds(ctl);
1984 } 2055 }
1985 kmem_cache_free(btrfs_free_space_cachep, entry); 2056 kmem_cache_free(btrfs_free_space_cachep, entry);
1986 } 2057 }
1987 2058
1988 spin_unlock(&block_group->tree_lock); 2059 spin_unlock(&ctl->tree_lock);
1989 2060
1990 return ret; 2061 return ret;
1991} 2062}
@@ -1995,6 +2066,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1995 struct btrfs_free_cluster *cluster, 2066 struct btrfs_free_cluster *cluster,
1996 u64 offset, u64 bytes, u64 min_bytes) 2067 u64 offset, u64 bytes, u64 min_bytes)
1997{ 2068{
2069 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1998 unsigned long next_zero; 2070 unsigned long next_zero;
1999 unsigned long i; 2071 unsigned long i;
2000 unsigned long search_bits; 2072 unsigned long search_bits;
@@ -2049,7 +2121,7 @@ again:
2049 2121
2050 cluster->window_start = start * block_group->sectorsize + 2122 cluster->window_start = start * block_group->sectorsize +
2051 entry->offset; 2123 entry->offset;
2052 rb_erase(&entry->offset_index, &block_group->free_space_offset); 2124 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2053 ret = tree_insert_offset(&cluster->root, entry->offset, 2125 ret = tree_insert_offset(&cluster->root, entry->offset,
2054 &entry->offset_index, 1); 2126 &entry->offset_index, 1);
2055 BUG_ON(ret); 2127 BUG_ON(ret);
@@ -2064,6 +2136,7 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2064 struct btrfs_free_cluster *cluster, 2136 struct btrfs_free_cluster *cluster,
2065 u64 offset, u64 bytes, u64 min_bytes) 2137 u64 offset, u64 bytes, u64 min_bytes)
2066{ 2138{
2139 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2067 struct btrfs_free_space *first = NULL; 2140 struct btrfs_free_space *first = NULL;
2068 struct btrfs_free_space *entry = NULL; 2141 struct btrfs_free_space *entry = NULL;
2069 struct btrfs_free_space *prev = NULL; 2142 struct btrfs_free_space *prev = NULL;
@@ -2074,7 +2147,7 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2074 u64 max_extent; 2147 u64 max_extent;
2075 u64 max_gap = 128 * 1024; 2148 u64 max_gap = 128 * 1024;
2076 2149
2077 entry = tree_search_offset(block_group, offset, 0, 1); 2150 entry = tree_search_offset(ctl, offset, 0, 1);
2078 if (!entry) 2151 if (!entry)
2079 return -ENOSPC; 2152 return -ENOSPC;
2080 2153
@@ -2140,7 +2213,7 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2140 if (entry->bitmap) 2213 if (entry->bitmap)
2141 continue; 2214 continue;
2142 2215
2143 rb_erase(&entry->offset_index, &block_group->free_space_offset); 2216 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2144 ret = tree_insert_offset(&cluster->root, entry->offset, 2217 ret = tree_insert_offset(&cluster->root, entry->offset,
2145 &entry->offset_index, 0); 2218 &entry->offset_index, 0);
2146 BUG_ON(ret); 2219 BUG_ON(ret);
@@ -2159,16 +2232,15 @@ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2159 struct btrfs_free_cluster *cluster, 2232 struct btrfs_free_cluster *cluster,
2160 u64 offset, u64 bytes, u64 min_bytes) 2233 u64 offset, u64 bytes, u64 min_bytes)
2161{ 2234{
2235 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2162 struct btrfs_free_space *entry; 2236 struct btrfs_free_space *entry;
2163 struct rb_node *node; 2237 struct rb_node *node;
2164 int ret = -ENOSPC; 2238 int ret = -ENOSPC;
2165 2239
2166 if (block_group->total_bitmaps == 0) 2240 if (ctl->total_bitmaps == 0)
2167 return -ENOSPC; 2241 return -ENOSPC;
2168 2242
2169 entry = tree_search_offset(block_group, 2243 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2170 offset_to_bitmap(block_group, offset),
2171 0, 1);
2172 if (!entry) 2244 if (!entry)
2173 return -ENOSPC; 2245 return -ENOSPC;
2174 2246
@@ -2201,6 +2273,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2201 struct btrfs_free_cluster *cluster, 2273 struct btrfs_free_cluster *cluster,
2202 u64 offset, u64 bytes, u64 empty_size) 2274 u64 offset, u64 bytes, u64 empty_size)
2203{ 2275{
2276 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2204 u64 min_bytes; 2277 u64 min_bytes;
2205 int ret; 2278 int ret;
2206 2279
@@ -2220,14 +2293,14 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2220 } else 2293 } else
2221 min_bytes = max(bytes, (bytes + empty_size) >> 2); 2294 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2222 2295
2223 spin_lock(&block_group->tree_lock); 2296 spin_lock(&ctl->tree_lock);
2224 2297
2225 /* 2298 /*
2226 * If we know we don't have enough space to make a cluster don't even 2299 * If we know we don't have enough space to make a cluster don't even
2227 * bother doing all the work to try and find one. 2300 * bother doing all the work to try and find one.
2228 */ 2301 */
2229 if (block_group->free_space < min_bytes) { 2302 if (ctl->free_space < min_bytes) {
2230 spin_unlock(&block_group->tree_lock); 2303 spin_unlock(&ctl->tree_lock);
2231 return -ENOSPC; 2304 return -ENOSPC;
2232 } 2305 }
2233 2306
@@ -2253,7 +2326,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2253 } 2326 }
2254out: 2327out:
2255 spin_unlock(&cluster->lock); 2328 spin_unlock(&cluster->lock);
2256 spin_unlock(&block_group->tree_lock); 2329 spin_unlock(&ctl->tree_lock);
2257 2330
2258 return ret; 2331 return ret;
2259} 2332}
@@ -2274,6 +2347,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2274int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 2347int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2275 u64 *trimmed, u64 start, u64 end, u64 minlen) 2348 u64 *trimmed, u64 start, u64 end, u64 minlen)
2276{ 2349{
2350 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2277 struct btrfs_free_space *entry = NULL; 2351 struct btrfs_free_space *entry = NULL;
2278 struct btrfs_fs_info *fs_info = block_group->fs_info; 2352 struct btrfs_fs_info *fs_info = block_group->fs_info;
2279 u64 bytes = 0; 2353 u64 bytes = 0;
@@ -2283,52 +2357,50 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2283 *trimmed = 0; 2357 *trimmed = 0;
2284 2358
2285 while (start < end) { 2359 while (start < end) {
2286 spin_lock(&block_group->tree_lock); 2360 spin_lock(&ctl->tree_lock);
2287 2361
2288 if (block_group->free_space < minlen) { 2362 if (ctl->free_space < minlen) {
2289 spin_unlock(&block_group->tree_lock); 2363 spin_unlock(&ctl->tree_lock);
2290 break; 2364 break;
2291 } 2365 }
2292 2366
2293 entry = tree_search_offset(block_group, start, 0, 1); 2367 entry = tree_search_offset(ctl, start, 0, 1);
2294 if (!entry) 2368 if (!entry)
2295 entry = tree_search_offset(block_group, 2369 entry = tree_search_offset(ctl,
2296 offset_to_bitmap(block_group, 2370 offset_to_bitmap(ctl, start),
2297 start),
2298 1, 1); 2371 1, 1);
2299 2372
2300 if (!entry || entry->offset >= end) { 2373 if (!entry || entry->offset >= end) {
2301 spin_unlock(&block_group->tree_lock); 2374 spin_unlock(&ctl->tree_lock);
2302 break; 2375 break;
2303 } 2376 }
2304 2377
2305 if (entry->bitmap) { 2378 if (entry->bitmap) {
2306 ret = search_bitmap(block_group, entry, &start, &bytes); 2379 ret = search_bitmap(ctl, entry, &start, &bytes);
2307 if (!ret) { 2380 if (!ret) {
2308 if (start >= end) { 2381 if (start >= end) {
2309 spin_unlock(&block_group->tree_lock); 2382 spin_unlock(&ctl->tree_lock);
2310 break; 2383 break;
2311 } 2384 }
2312 bytes = min(bytes, end - start); 2385 bytes = min(bytes, end - start);
2313 bitmap_clear_bits(block_group, entry, 2386 bitmap_clear_bits(ctl, entry, start, bytes);
2314 start, bytes);
2315 if (entry->bytes == 0) 2387 if (entry->bytes == 0)
2316 free_bitmap(block_group, entry); 2388 free_bitmap(ctl, entry);
2317 } else { 2389 } else {
2318 start = entry->offset + BITS_PER_BITMAP * 2390 start = entry->offset + BITS_PER_BITMAP *
2319 block_group->sectorsize; 2391 block_group->sectorsize;
2320 spin_unlock(&block_group->tree_lock); 2392 spin_unlock(&ctl->tree_lock);
2321 ret = 0; 2393 ret = 0;
2322 continue; 2394 continue;
2323 } 2395 }
2324 } else { 2396 } else {
2325 start = entry->offset; 2397 start = entry->offset;
2326 bytes = min(entry->bytes, end - start); 2398 bytes = min(entry->bytes, end - start);
2327 unlink_free_space(block_group, entry); 2399 unlink_free_space(ctl, entry);
2328 kmem_cache_free(btrfs_free_space_cachep, entry); 2400 kmem_cache_free(btrfs_free_space_cachep, entry);
2329 } 2401 }
2330 2402
2331 spin_unlock(&block_group->tree_lock); 2403 spin_unlock(&ctl->tree_lock);
2332 2404
2333 if (bytes >= minlen) { 2405 if (bytes >= minlen) {
2334 int update_ret; 2406 int update_ret;
@@ -2340,8 +2412,7 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2340 bytes, 2412 bytes,
2341 &actually_trimmed); 2413 &actually_trimmed);
2342 2414
2343 btrfs_add_free_space(block_group, 2415 btrfs_add_free_space(block_group, start, bytes);
2344 start, bytes);
2345 if (!update_ret) 2416 if (!update_ret)
2346 btrfs_update_reserved_bytes(block_group, 2417 btrfs_update_reserved_bytes(block_group,
2347 bytes, 0, 1); 2418 bytes, 0, 1);
@@ -2363,3 +2434,145 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2363 2434
2364 return ret; 2435 return ret;
2365} 2436}
2437
2438/*
2439 * Find the left-most item in the cache tree, and then return the
2440 * smallest inode number in the item.
2441 *
2442 * Note: the returned inode number may not be the smallest one in
2443 * the tree, if the left-most item is a bitmap.
2444 */
2445u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2446{
2447 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2448 struct btrfs_free_space *entry = NULL;
2449 u64 ino = 0;
2450
2451 spin_lock(&ctl->tree_lock);
2452
2453 if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2454 goto out;
2455
2456 entry = rb_entry(rb_first(&ctl->free_space_offset),
2457 struct btrfs_free_space, offset_index);
2458
2459 if (!entry->bitmap) {
2460 ino = entry->offset;
2461
2462 unlink_free_space(ctl, entry);
2463 entry->offset++;
2464 entry->bytes--;
2465 if (!entry->bytes)
2466 kmem_cache_free(btrfs_free_space_cachep, entry);
2467 else
2468 link_free_space(ctl, entry);
2469 } else {
2470 u64 offset = 0;
2471 u64 count = 1;
2472 int ret;
2473
2474 ret = search_bitmap(ctl, entry, &offset, &count);
2475 BUG_ON(ret);
2476
2477 ino = offset;
2478 bitmap_clear_bits(ctl, entry, offset, 1);
2479 if (entry->bytes == 0)
2480 free_bitmap(ctl, entry);
2481 }
2482out:
2483 spin_unlock(&ctl->tree_lock);
2484
2485 return ino;
2486}
2487
2488struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2489 struct btrfs_path *path)
2490{
2491 struct inode *inode = NULL;
2492
2493 spin_lock(&root->cache_lock);
2494 if (root->cache_inode)
2495 inode = igrab(root->cache_inode);
2496 spin_unlock(&root->cache_lock);
2497 if (inode)
2498 return inode;
2499
2500 inode = __lookup_free_space_inode(root, path, 0);
2501 if (IS_ERR(inode))
2502 return inode;
2503
2504 spin_lock(&root->cache_lock);
2505 if (!root->fs_info->closing)
2506 root->cache_inode = igrab(inode);
2507 spin_unlock(&root->cache_lock);
2508
2509 return inode;
2510}
2511
2512int create_free_ino_inode(struct btrfs_root *root,
2513 struct btrfs_trans_handle *trans,
2514 struct btrfs_path *path)
2515{
2516 return __create_free_space_inode(root, trans, path,
2517 BTRFS_FREE_INO_OBJECTID, 0);
2518}
2519
2520int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2521{
2522 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2523 struct btrfs_path *path;
2524 struct inode *inode;
2525 int ret = 0;
2526 u64 root_gen = btrfs_root_generation(&root->root_item);
2527
2528 /*
2529 * If we're unmounting then just return, since this does a search on the
2530 * normal root and not the commit root and we could deadlock.
2531 */
2532 smp_mb();
2533 if (fs_info->closing)
2534 return 0;
2535
2536 path = btrfs_alloc_path();
2537 if (!path)
2538 return 0;
2539
2540 inode = lookup_free_ino_inode(root, path);
2541 if (IS_ERR(inode))
2542 goto out;
2543
2544 if (root_gen != BTRFS_I(inode)->generation)
2545 goto out_put;
2546
2547 ret = __load_free_space_cache(root, inode, ctl, path, 0);
2548
2549 if (ret < 0)
2550 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2551 "root %llu\n", root->root_key.objectid);
2552out_put:
2553 iput(inode);
2554out:
2555 btrfs_free_path(path);
2556 return ret;
2557}
2558
2559int btrfs_write_out_ino_cache(struct btrfs_root *root,
2560 struct btrfs_trans_handle *trans,
2561 struct btrfs_path *path)
2562{
2563 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2564 struct inode *inode;
2565 int ret;
2566
2567 inode = lookup_free_ino_inode(root, path);
2568 if (IS_ERR(inode))
2569 return 0;
2570
2571 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2572 if (ret < 0)
2573 printk(KERN_ERR "btrfs: failed to write free ino cache "
2574 "for root %llu\n", root->root_key.objectid);
2575
2576 iput(inode);
2577 return ret;
2578}
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 65c3b935289f..8f2613f779ed 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -27,6 +27,25 @@ struct btrfs_free_space {
27 struct list_head list; 27 struct list_head list;
28}; 28};
29 29
30struct btrfs_free_space_ctl {
31 spinlock_t tree_lock;
32 struct rb_root free_space_offset;
33 u64 free_space;
34 int extents_thresh;
35 int free_extents;
36 int total_bitmaps;
37 int unit;
38 u64 start;
39 struct btrfs_free_space_op *op;
40 void *private;
41};
42
43struct btrfs_free_space_op {
44 void (*recalc_thresholds)(struct btrfs_free_space_ctl *ctl);
45 bool (*use_bitmap)(struct btrfs_free_space_ctl *ctl,
46 struct btrfs_free_space *info);
47};
48
30struct inode *lookup_free_space_inode(struct btrfs_root *root, 49struct inode *lookup_free_space_inode(struct btrfs_root *root,
31 struct btrfs_block_group_cache 50 struct btrfs_block_group_cache
32 *block_group, struct btrfs_path *path); 51 *block_group, struct btrfs_path *path);
@@ -45,17 +64,38 @@ int btrfs_write_out_cache(struct btrfs_root *root,
45 struct btrfs_trans_handle *trans, 64 struct btrfs_trans_handle *trans,
46 struct btrfs_block_group_cache *block_group, 65 struct btrfs_block_group_cache *block_group,
47 struct btrfs_path *path); 66 struct btrfs_path *path);
48int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 67
49 u64 bytenr, u64 size); 68struct inode *lookup_free_ino_inode(struct btrfs_root *root,
69 struct btrfs_path *path);
70int create_free_ino_inode(struct btrfs_root *root,
71 struct btrfs_trans_handle *trans,
72 struct btrfs_path *path);
73int load_free_ino_cache(struct btrfs_fs_info *fs_info,
74 struct btrfs_root *root);
75int btrfs_write_out_ino_cache(struct btrfs_root *root,
76 struct btrfs_trans_handle *trans,
77 struct btrfs_path *path);
78
79void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
80int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
81 u64 bytenr, u64 size);
82static inline int
83btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
84 u64 bytenr, u64 size)
85{
86 return __btrfs_add_free_space(block_group->free_space_ctl,
87 bytenr, size);
88}
50int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 89int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
51 u64 bytenr, u64 size); 90 u64 bytenr, u64 size);
91void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
52void btrfs_remove_free_space_cache(struct btrfs_block_group_cache 92void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
53 *block_group); 93 *block_group);
54u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 94u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
55 u64 offset, u64 bytes, u64 empty_size); 95 u64 offset, u64 bytes, u64 empty_size);
96u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
56void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 97void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
57 u64 bytes); 98 u64 bytes);
58u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
59int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 99int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
60 struct btrfs_root *root, 100 struct btrfs_root *root,
61 struct btrfs_block_group_cache *block_group, 101 struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 64f1150bb48d..baa74f3db691 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -130,7 +130,6 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
130 item_size - (ptr + sub_item_len - item_start)); 130 item_size - (ptr + sub_item_len - item_start));
131 ret = btrfs_truncate_item(trans, root, path, 131 ret = btrfs_truncate_item(trans, root, path,
132 item_size - sub_item_len, 1); 132 item_size - sub_item_len, 1);
133 BUG_ON(ret);
134out: 133out:
135 btrfs_free_path(path); 134 btrfs_free_path(path);
136 return ret; 135 return ret;
@@ -167,7 +166,6 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
167 166
168 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 167 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
169 ret = btrfs_extend_item(trans, root, path, ins_len); 168 ret = btrfs_extend_item(trans, root, path, ins_len);
170 BUG_ON(ret);
171 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 169 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
172 struct btrfs_inode_ref); 170 struct btrfs_inode_ref);
173 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); 171 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index c05a08f4c411..3262cd17a12f 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -16,11 +16,446 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/delay.h>
20#include <linux/kthread.h>
21#include <linux/pagemap.h>
22
19#include "ctree.h" 23#include "ctree.h"
20#include "disk-io.h" 24#include "disk-io.h"
25#include "free-space-cache.h"
26#include "inode-map.h"
21#include "transaction.h" 27#include "transaction.h"
22 28
23int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) 29static int caching_kthread(void *data)
30{
31 struct btrfs_root *root = data;
32 struct btrfs_fs_info *fs_info = root->fs_info;
33 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
34 struct btrfs_key key;
35 struct btrfs_path *path;
36 struct extent_buffer *leaf;
37 u64 last = (u64)-1;
38 int slot;
39 int ret;
40
41 path = btrfs_alloc_path();
42 if (!path)
43 return -ENOMEM;
44
45 /* Since the commit root is read-only, we can safely skip locking. */
46 path->skip_locking = 1;
47 path->search_commit_root = 1;
48 path->reada = 2;
49
50 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
51 key.offset = 0;
52 key.type = BTRFS_INODE_ITEM_KEY;
53again:
54 /* need to make sure the commit_root doesn't disappear */
55 mutex_lock(&root->fs_commit_mutex);
56
57 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
58 if (ret < 0)
59 goto out;
60
61 while (1) {
62 smp_mb();
63 if (fs_info->closing)
64 goto out;
65
66 leaf = path->nodes[0];
67 slot = path->slots[0];
68 if (slot >= btrfs_header_nritems(leaf)) {
69 ret = btrfs_next_leaf(root, path);
70 if (ret < 0)
71 goto out;
72 else if (ret > 0)
73 break;
74
75 if (need_resched() ||
76 btrfs_transaction_in_commit(fs_info)) {
77 leaf = path->nodes[0];
78
79 if (btrfs_header_nritems(leaf) == 0) {
80 WARN_ON(1);
81 break;
82 }
83
84 /*
85 * Save the key so we can advances forward
86 * in the next search.
87 */
88 btrfs_item_key_to_cpu(leaf, &key, 0);
89 btrfs_release_path(path);
90 root->cache_progress = last;
91 mutex_unlock(&root->fs_commit_mutex);
92 schedule_timeout(1);
93 goto again;
94 } else
95 continue;
96 }
97
98 btrfs_item_key_to_cpu(leaf, &key, slot);
99
100 if (key.type != BTRFS_INODE_ITEM_KEY)
101 goto next;
102
103 if (key.objectid >= root->highest_objectid)
104 break;
105
106 if (last != (u64)-1 && last + 1 != key.objectid) {
107 __btrfs_add_free_space(ctl, last + 1,
108 key.objectid - last - 1);
109 wake_up(&root->cache_wait);
110 }
111
112 last = key.objectid;
113next:
114 path->slots[0]++;
115 }
116
117 if (last < root->highest_objectid - 1) {
118 __btrfs_add_free_space(ctl, last + 1,
119 root->highest_objectid - last - 1);
120 }
121
122 spin_lock(&root->cache_lock);
123 root->cached = BTRFS_CACHE_FINISHED;
124 spin_unlock(&root->cache_lock);
125
126 root->cache_progress = (u64)-1;
127 btrfs_unpin_free_ino(root);
128out:
129 wake_up(&root->cache_wait);
130 mutex_unlock(&root->fs_commit_mutex);
131
132 btrfs_free_path(path);
133
134 return ret;
135}
136
137static void start_caching(struct btrfs_root *root)
138{
139 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
140 struct task_struct *tsk;
141 int ret;
142 u64 objectid;
143
144 spin_lock(&root->cache_lock);
145 if (root->cached != BTRFS_CACHE_NO) {
146 spin_unlock(&root->cache_lock);
147 return;
148 }
149
150 root->cached = BTRFS_CACHE_STARTED;
151 spin_unlock(&root->cache_lock);
152
153 ret = load_free_ino_cache(root->fs_info, root);
154 if (ret == 1) {
155 spin_lock(&root->cache_lock);
156 root->cached = BTRFS_CACHE_FINISHED;
157 spin_unlock(&root->cache_lock);
158 return;
159 }
160
161 /*
162 * It can be quite time-consuming to fill the cache by searching
163 * through the extent tree, and this can keep ino allocation path
164 * waiting. Therefore at start we quickly find out the highest
165 * inode number and we know we can use inode numbers which fall in
166 * [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID].
167 */
168 ret = btrfs_find_free_objectid(root, &objectid);
169 if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
170 __btrfs_add_free_space(ctl, objectid,
171 BTRFS_LAST_FREE_OBJECTID - objectid + 1);
172 }
173
174 tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
175 root->root_key.objectid);
176 BUG_ON(IS_ERR(tsk));
177}
178
179int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
180{
181again:
182 *objectid = btrfs_find_ino_for_alloc(root);
183
184 if (*objectid != 0)
185 return 0;
186
187 start_caching(root);
188
189 wait_event(root->cache_wait,
190 root->cached == BTRFS_CACHE_FINISHED ||
191 root->free_ino_ctl->free_space > 0);
192
193 if (root->cached == BTRFS_CACHE_FINISHED &&
194 root->free_ino_ctl->free_space == 0)
195 return -ENOSPC;
196 else
197 goto again;
198}
199
200void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
201{
202 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
203 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
204again:
205 if (root->cached == BTRFS_CACHE_FINISHED) {
206 __btrfs_add_free_space(ctl, objectid, 1);
207 } else {
208 /*
209 * If we are in the process of caching free ino chunks,
210 * to avoid adding the same inode number to the free_ino
211 * tree twice due to cross transaction, we'll leave it
212 * in the pinned tree until a transaction is committed
213 * or the caching work is done.
214 */
215
216 mutex_lock(&root->fs_commit_mutex);
217 spin_lock(&root->cache_lock);
218 if (root->cached == BTRFS_CACHE_FINISHED) {
219 spin_unlock(&root->cache_lock);
220 mutex_unlock(&root->fs_commit_mutex);
221 goto again;
222 }
223 spin_unlock(&root->cache_lock);
224
225 start_caching(root);
226
227 if (objectid <= root->cache_progress ||
228 objectid > root->highest_objectid)
229 __btrfs_add_free_space(ctl, objectid, 1);
230 else
231 __btrfs_add_free_space(pinned, objectid, 1);
232
233 mutex_unlock(&root->fs_commit_mutex);
234 }
235}
236
237/*
238 * When a transaction is committed, we'll move those inode numbers which
239 * are smaller than root->cache_progress from pinned tree to free_ino tree,
240 * and others will just be dropped, because the commit root we were
241 * searching has changed.
242 *
243 * Must be called with root->fs_commit_mutex held
244 */
245void btrfs_unpin_free_ino(struct btrfs_root *root)
246{
247 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
248 struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
249 struct btrfs_free_space *info;
250 struct rb_node *n;
251 u64 count;
252
253 while (1) {
254 n = rb_first(rbroot);
255 if (!n)
256 break;
257
258 info = rb_entry(n, struct btrfs_free_space, offset_index);
259 BUG_ON(info->bitmap);
260
261 if (info->offset > root->cache_progress)
262 goto free;
263 else if (info->offset + info->bytes > root->cache_progress)
264 count = root->cache_progress - info->offset + 1;
265 else
266 count = info->bytes;
267
268 __btrfs_add_free_space(ctl, info->offset, count);
269free:
270 rb_erase(&info->offset_index, rbroot);
271 kfree(info);
272 }
273}
274
275#define INIT_THRESHOLD (((1024 * 32) / 2) / sizeof(struct btrfs_free_space))
276#define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8)
277
278/*
279 * The goal is to keep the memory used by the free_ino tree won't
280 * exceed the memory if we use bitmaps only.
281 */
282static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
283{
284 struct btrfs_free_space *info;
285 struct rb_node *n;
286 int max_ino;
287 int max_bitmaps;
288
289 n = rb_last(&ctl->free_space_offset);
290 if (!n) {
291 ctl->extents_thresh = INIT_THRESHOLD;
292 return;
293 }
294 info = rb_entry(n, struct btrfs_free_space, offset_index);
295
296 /*
297 * Find the maximum inode number in the filesystem. Note we
298 * ignore the fact that this can be a bitmap, because we are
299 * not doing precise calculation.
300 */
301 max_ino = info->bytes - 1;
302
303 max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP;
304 if (max_bitmaps <= ctl->total_bitmaps) {
305 ctl->extents_thresh = 0;
306 return;
307 }
308
309 ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
310 PAGE_CACHE_SIZE / sizeof(*info);
311}
312
313/*
314 * We don't fall back to bitmap, if we are below the extents threshold
315 * or this chunk of inode numbers is a big one.
316 */
317static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
318 struct btrfs_free_space *info)
319{
320 if (ctl->free_extents < ctl->extents_thresh ||
321 info->bytes > INODES_PER_BITMAP / 10)
322 return false;
323
324 return true;
325}
326
327static struct btrfs_free_space_op free_ino_op = {
328 .recalc_thresholds = recalculate_thresholds,
329 .use_bitmap = use_bitmap,
330};
331
332static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl)
333{
334}
335
336static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
337 struct btrfs_free_space *info)
338{
339 /*
340 * We always use extents for two reasons:
341 *
342 * - The pinned tree is only used during the process of caching
343 * work.
344 * - Make code simpler. See btrfs_unpin_free_ino().
345 */
346 return false;
347}
348
349static struct btrfs_free_space_op pinned_free_ino_op = {
350 .recalc_thresholds = pinned_recalc_thresholds,
351 .use_bitmap = pinned_use_bitmap,
352};
353
354void btrfs_init_free_ino_ctl(struct btrfs_root *root)
355{
356 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
357 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
358
359 spin_lock_init(&ctl->tree_lock);
360 ctl->unit = 1;
361 ctl->start = 0;
362 ctl->private = NULL;
363 ctl->op = &free_ino_op;
364
365 /*
366 * Initially we allow to use 16K of ram to cache chunks of
367 * inode numbers before we resort to bitmaps. This is somewhat
368 * arbitrary, but it will be adjusted in runtime.
369 */
370 ctl->extents_thresh = INIT_THRESHOLD;
371
372 spin_lock_init(&pinned->tree_lock);
373 pinned->unit = 1;
374 pinned->start = 0;
375 pinned->private = NULL;
376 pinned->extents_thresh = 0;
377 pinned->op = &pinned_free_ino_op;
378}
379
380int btrfs_save_ino_cache(struct btrfs_root *root,
381 struct btrfs_trans_handle *trans)
382{
383 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
384 struct btrfs_path *path;
385 struct inode *inode;
386 u64 alloc_hint = 0;
387 int ret;
388 int prealloc;
389 bool retry = false;
390
391 path = btrfs_alloc_path();
392 if (!path)
393 return -ENOMEM;
394again:
395 inode = lookup_free_ino_inode(root, path);
396 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
397 ret = PTR_ERR(inode);
398 goto out;
399 }
400
401 if (IS_ERR(inode)) {
402 BUG_ON(retry);
403 retry = true;
404
405 ret = create_free_ino_inode(root, trans, path);
406 if (ret)
407 goto out;
408 goto again;
409 }
410
411 BTRFS_I(inode)->generation = 0;
412 ret = btrfs_update_inode(trans, root, inode);
413 WARN_ON(ret);
414
415 if (i_size_read(inode) > 0) {
416 ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
417 if (ret)
418 goto out_put;
419 }
420
421 spin_lock(&root->cache_lock);
422 if (root->cached != BTRFS_CACHE_FINISHED) {
423 ret = -1;
424 spin_unlock(&root->cache_lock);
425 goto out_put;
426 }
427 spin_unlock(&root->cache_lock);
428
429 spin_lock(&ctl->tree_lock);
430 prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
431 prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
432 prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
433 spin_unlock(&ctl->tree_lock);
434
435 /* Just to make sure we have enough space */
436 prealloc += 8 * PAGE_CACHE_SIZE;
437
438 ret = btrfs_check_data_free_space(inode, prealloc);
439 if (ret)
440 goto out_put;
441
442 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
443 prealloc, prealloc, &alloc_hint);
444 if (ret)
445 goto out_put;
446 btrfs_free_reserved_data_space(inode, prealloc);
447
448out_put:
449 iput(inode);
450out:
451 if (ret == 0)
452 ret = btrfs_write_out_ino_cache(root, trans, path);
453
454 btrfs_free_path(path);
455 return ret;
456}
457
458static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
24{ 459{
25 struct btrfs_path *path; 460 struct btrfs_path *path;
26 int ret; 461 int ret;
@@ -55,15 +490,14 @@ error:
55 return ret; 490 return ret;
56} 491}
57 492
58int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, 493int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
59 struct btrfs_root *root,
60 u64 dirid, u64 *objectid)
61{ 494{
62 int ret; 495 int ret;
63 mutex_lock(&root->objectid_mutex); 496 mutex_lock(&root->objectid_mutex);
64 497
65 if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { 498 if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
66 ret = btrfs_find_highest_inode(root, &root->highest_objectid); 499 ret = btrfs_find_highest_objectid(root,
500 &root->highest_objectid);
67 if (ret) 501 if (ret)
68 goto out; 502 goto out;
69 } 503 }
diff --git a/fs/btrfs/inode-map.h b/fs/btrfs/inode-map.h
new file mode 100644
index 000000000000..ddb347bfee23
--- /dev/null
+++ b/fs/btrfs/inode-map.h
@@ -0,0 +1,13 @@
1#ifndef __BTRFS_INODE_MAP
2#define __BTRFS_INODE_MAP
3
4void btrfs_init_free_ino_ctl(struct btrfs_root *root);
5void btrfs_unpin_free_ino(struct btrfs_root *root);
6void btrfs_return_ino(struct btrfs_root *root, u64 objectid);
7int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid);
8int btrfs_save_ino_cache(struct btrfs_root *root,
9 struct btrfs_trans_handle *trans);
10
11int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
12
13#endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dc8fb2b3a145..a83e44bf3206 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -37,6 +37,7 @@
37#include <linux/posix_acl.h> 37#include <linux/posix_acl.h>
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/ratelimit.h>
40#include "compat.h" 41#include "compat.h"
41#include "ctree.h" 42#include "ctree.h"
42#include "disk-io.h" 43#include "disk-io.h"
@@ -51,6 +52,7 @@
51#include "compression.h" 52#include "compression.h"
52#include "locking.h" 53#include "locking.h"
53#include "free-space-cache.h" 54#include "free-space-cache.h"
55#include "inode-map.h"
54 56
55struct btrfs_iget_args { 57struct btrfs_iget_args {
56 u64 ino; 58 u64 ino;
@@ -137,7 +139,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
137 139
138 path->leave_spinning = 1; 140 path->leave_spinning = 1;
139 141
140 key.objectid = inode->i_ino; 142 key.objectid = btrfs_ino(inode);
141 key.offset = start; 143 key.offset = start;
142 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 144 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
143 datasize = btrfs_file_extent_calc_inline_size(cur_size); 145 datasize = btrfs_file_extent_calc_inline_size(cur_size);
@@ -339,6 +341,10 @@ static noinline int compress_file_range(struct inode *inode,
339 int will_compress; 341 int will_compress;
340 int compress_type = root->fs_info->compress_type; 342 int compress_type = root->fs_info->compress_type;
341 343
344 /* if this is a small write inside eof, kick off a defragbot */
345 if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024)
346 btrfs_add_inode_defrag(NULL, inode);
347
342 actual_end = min_t(u64, isize, end + 1); 348 actual_end = min_t(u64, isize, end + 1);
343again: 349again:
344 will_compress = 0; 350 will_compress = 0;
@@ -648,7 +654,7 @@ retry:
648 async_extent->start + 654 async_extent->start +
649 async_extent->ram_size - 1, 0); 655 async_extent->ram_size - 1, 0);
650 656
651 em = alloc_extent_map(GFP_NOFS); 657 em = alloc_extent_map();
652 BUG_ON(!em); 658 BUG_ON(!em);
653 em->start = async_extent->start; 659 em->start = async_extent->start;
654 em->len = async_extent->ram_size; 660 em->len = async_extent->ram_size;
@@ -744,6 +750,15 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
744 return alloc_hint; 750 return alloc_hint;
745} 751}
746 752
753static inline bool is_free_space_inode(struct btrfs_root *root,
754 struct inode *inode)
755{
756 if (root == root->fs_info->tree_root ||
757 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
758 return true;
759 return false;
760}
761
747/* 762/*
748 * when extent_io.c finds a delayed allocation range in the file, 763 * when extent_io.c finds a delayed allocation range in the file,
749 * the call backs end up in this code. The basic idea is to 764 * the call backs end up in this code. The basic idea is to
@@ -776,7 +791,7 @@ static noinline int cow_file_range(struct inode *inode,
776 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 791 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
777 int ret = 0; 792 int ret = 0;
778 793
779 BUG_ON(root == root->fs_info->tree_root); 794 BUG_ON(is_free_space_inode(root, inode));
780 trans = btrfs_join_transaction(root); 795 trans = btrfs_join_transaction(root);
781 BUG_ON(IS_ERR(trans)); 796 BUG_ON(IS_ERR(trans));
782 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 797 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -786,6 +801,10 @@ static noinline int cow_file_range(struct inode *inode,
786 disk_num_bytes = num_bytes; 801 disk_num_bytes = num_bytes;
787 ret = 0; 802 ret = 0;
788 803
804 /* if this is a small write inside eof, kick off defrag */
805 if (end <= BTRFS_I(inode)->disk_i_size && num_bytes < 64 * 1024)
806 btrfs_add_inode_defrag(trans, inode);
807
789 if (start == 0) { 808 if (start == 0) {
790 /* lets try to make an inline extent */ 809 /* lets try to make an inline extent */
791 ret = cow_file_range_inline(trans, root, inode, 810 ret = cow_file_range_inline(trans, root, inode,
@@ -824,7 +843,7 @@ static noinline int cow_file_range(struct inode *inode,
824 (u64)-1, &ins, 1); 843 (u64)-1, &ins, 1);
825 BUG_ON(ret); 844 BUG_ON(ret);
826 845
827 em = alloc_extent_map(GFP_NOFS); 846 em = alloc_extent_map();
828 BUG_ON(!em); 847 BUG_ON(!em);
829 em->start = start; 848 em->start = start;
830 em->orig_start = em->start; 849 em->orig_start = em->start;
@@ -1006,7 +1025,7 @@ static noinline int csum_exist_in_range(struct btrfs_root *root,
1006 LIST_HEAD(list); 1025 LIST_HEAD(list);
1007 1026
1008 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr, 1027 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
1009 bytenr + num_bytes - 1, &list); 1028 bytenr + num_bytes - 1, &list, 0);
1010 if (ret == 0 && list_empty(&list)) 1029 if (ret == 0 && list_empty(&list))
1011 return 0; 1030 return 0;
1012 1031
@@ -1047,30 +1066,33 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1047 int type; 1066 int type;
1048 int nocow; 1067 int nocow;
1049 int check_prev = 1; 1068 int check_prev = 1;
1050 bool nolock = false; 1069 bool nolock;
1070 u64 ino = btrfs_ino(inode);
1051 1071
1052 path = btrfs_alloc_path(); 1072 path = btrfs_alloc_path();
1053 BUG_ON(!path); 1073 BUG_ON(!path);
1054 if (root == root->fs_info->tree_root) { 1074
1055 nolock = true; 1075 nolock = is_free_space_inode(root, inode);
1076
1077 if (nolock)
1056 trans = btrfs_join_transaction_nolock(root); 1078 trans = btrfs_join_transaction_nolock(root);
1057 } else { 1079 else
1058 trans = btrfs_join_transaction(root); 1080 trans = btrfs_join_transaction(root);
1059 } 1081
1060 BUG_ON(IS_ERR(trans)); 1082 BUG_ON(IS_ERR(trans));
1061 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1083 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1062 1084
1063 cow_start = (u64)-1; 1085 cow_start = (u64)-1;
1064 cur_offset = start; 1086 cur_offset = start;
1065 while (1) { 1087 while (1) {
1066 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 1088 ret = btrfs_lookup_file_extent(trans, root, path, ino,
1067 cur_offset, 0); 1089 cur_offset, 0);
1068 BUG_ON(ret < 0); 1090 BUG_ON(ret < 0);
1069 if (ret > 0 && path->slots[0] > 0 && check_prev) { 1091 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1070 leaf = path->nodes[0]; 1092 leaf = path->nodes[0];
1071 btrfs_item_key_to_cpu(leaf, &found_key, 1093 btrfs_item_key_to_cpu(leaf, &found_key,
1072 path->slots[0] - 1); 1094 path->slots[0] - 1);
1073 if (found_key.objectid == inode->i_ino && 1095 if (found_key.objectid == ino &&
1074 found_key.type == BTRFS_EXTENT_DATA_KEY) 1096 found_key.type == BTRFS_EXTENT_DATA_KEY)
1075 path->slots[0]--; 1097 path->slots[0]--;
1076 } 1098 }
@@ -1091,7 +1113,7 @@ next_slot:
1091 num_bytes = 0; 1113 num_bytes = 0;
1092 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1114 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1093 1115
1094 if (found_key.objectid > inode->i_ino || 1116 if (found_key.objectid > ino ||
1095 found_key.type > BTRFS_EXTENT_DATA_KEY || 1117 found_key.type > BTRFS_EXTENT_DATA_KEY ||
1096 found_key.offset > end) 1118 found_key.offset > end)
1097 break; 1119 break;
@@ -1126,7 +1148,7 @@ next_slot:
1126 goto out_check; 1148 goto out_check;
1127 if (btrfs_extent_readonly(root, disk_bytenr)) 1149 if (btrfs_extent_readonly(root, disk_bytenr))
1128 goto out_check; 1150 goto out_check;
1129 if (btrfs_cross_ref_exist(trans, root, inode->i_ino, 1151 if (btrfs_cross_ref_exist(trans, root, ino,
1130 found_key.offset - 1152 found_key.offset -
1131 extent_offset, disk_bytenr)) 1153 extent_offset, disk_bytenr))
1132 goto out_check; 1154 goto out_check;
@@ -1163,7 +1185,7 @@ out_check:
1163 goto next_slot; 1185 goto next_slot;
1164 } 1186 }
1165 1187
1166 btrfs_release_path(root, path); 1188 btrfs_release_path(path);
1167 if (cow_start != (u64)-1) { 1189 if (cow_start != (u64)-1) {
1168 ret = cow_file_range(inode, locked_page, cow_start, 1190 ret = cow_file_range(inode, locked_page, cow_start,
1169 found_key.offset - 1, page_started, 1191 found_key.offset - 1, page_started,
@@ -1176,7 +1198,7 @@ out_check:
1176 struct extent_map *em; 1198 struct extent_map *em;
1177 struct extent_map_tree *em_tree; 1199 struct extent_map_tree *em_tree;
1178 em_tree = &BTRFS_I(inode)->extent_tree; 1200 em_tree = &BTRFS_I(inode)->extent_tree;
1179 em = alloc_extent_map(GFP_NOFS); 1201 em = alloc_extent_map();
1180 BUG_ON(!em); 1202 BUG_ON(!em);
1181 em->start = cur_offset; 1203 em->start = cur_offset;
1182 em->orig_start = em->start; 1204 em->orig_start = em->start;
@@ -1221,7 +1243,7 @@ out_check:
1221 if (cur_offset > end) 1243 if (cur_offset > end)
1222 break; 1244 break;
1223 } 1245 }
1224 btrfs_release_path(root, path); 1246 btrfs_release_path(path);
1225 1247
1226 if (cur_offset <= end && cow_start == (u64)-1) 1248 if (cur_offset <= end && cow_start == (u64)-1)
1227 cow_start = cur_offset; 1249 cow_start = cur_offset;
@@ -1309,14 +1331,13 @@ static int btrfs_set_bit_hook(struct inode *inode,
1309 1331
1310 /* 1332 /*
1311 * set_bit and clear bit hooks normally require _irqsave/restore 1333 * set_bit and clear bit hooks normally require _irqsave/restore
1312 * but in this case, we are only testeing for the DELALLOC 1334 * but in this case, we are only testing for the DELALLOC
1313 * bit, which is only set or cleared with irqs on 1335 * bit, which is only set or cleared with irqs on
1314 */ 1336 */
1315 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1337 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1316 struct btrfs_root *root = BTRFS_I(inode)->root; 1338 struct btrfs_root *root = BTRFS_I(inode)->root;
1317 u64 len = state->end + 1 - state->start; 1339 u64 len = state->end + 1 - state->start;
1318 int do_list = (root->root_key.objectid != 1340 bool do_list = !is_free_space_inode(root, inode);
1319 BTRFS_ROOT_TREE_OBJECTID);
1320 1341
1321 if (*bits & EXTENT_FIRST_DELALLOC) 1342 if (*bits & EXTENT_FIRST_DELALLOC)
1322 *bits &= ~EXTENT_FIRST_DELALLOC; 1343 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1343,14 +1364,13 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1343{ 1364{
1344 /* 1365 /*
1345 * set_bit and clear bit hooks normally require _irqsave/restore 1366 * set_bit and clear bit hooks normally require _irqsave/restore
1346 * but in this case, we are only testeing for the DELALLOC 1367 * but in this case, we are only testing for the DELALLOC
1347 * bit, which is only set or cleared with irqs on 1368 * bit, which is only set or cleared with irqs on
1348 */ 1369 */
1349 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1370 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1350 struct btrfs_root *root = BTRFS_I(inode)->root; 1371 struct btrfs_root *root = BTRFS_I(inode)->root;
1351 u64 len = state->end + 1 - state->start; 1372 u64 len = state->end + 1 - state->start;
1352 int do_list = (root->root_key.objectid != 1373 bool do_list = !is_free_space_inode(root, inode);
1353 BTRFS_ROOT_TREE_OBJECTID);
1354 1374
1355 if (*bits & EXTENT_FIRST_DELALLOC) 1375 if (*bits & EXTENT_FIRST_DELALLOC)
1356 *bits &= ~EXTENT_FIRST_DELALLOC; 1376 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1457,7 +1477,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1457 1477
1458 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1478 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1459 1479
1460 if (root == root->fs_info->tree_root) 1480 if (is_free_space_inode(root, inode))
1461 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); 1481 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
1462 else 1482 else
1463 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1483 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
@@ -1641,7 +1661,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1641 &hint, 0); 1661 &hint, 0);
1642 BUG_ON(ret); 1662 BUG_ON(ret);
1643 1663
1644 ins.objectid = inode->i_ino; 1664 ins.objectid = btrfs_ino(inode);
1645 ins.offset = file_pos; 1665 ins.offset = file_pos;
1646 ins.type = BTRFS_EXTENT_DATA_KEY; 1666 ins.type = BTRFS_EXTENT_DATA_KEY;
1647 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); 1667 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
@@ -1672,7 +1692,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1672 ins.type = BTRFS_EXTENT_ITEM_KEY; 1692 ins.type = BTRFS_EXTENT_ITEM_KEY;
1673 ret = btrfs_alloc_reserved_file_extent(trans, root, 1693 ret = btrfs_alloc_reserved_file_extent(trans, root,
1674 root->root_key.objectid, 1694 root->root_key.objectid,
1675 inode->i_ino, file_pos, &ins); 1695 btrfs_ino(inode), file_pos, &ins);
1676 BUG_ON(ret); 1696 BUG_ON(ret);
1677 btrfs_free_path(path); 1697 btrfs_free_path(path);
1678 1698
@@ -1698,7 +1718,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1698 struct extent_state *cached_state = NULL; 1718 struct extent_state *cached_state = NULL;
1699 int compress_type = 0; 1719 int compress_type = 0;
1700 int ret; 1720 int ret;
1701 bool nolock = false; 1721 bool nolock;
1702 1722
1703 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, 1723 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1704 end - start + 1); 1724 end - start + 1);
@@ -1706,7 +1726,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1706 return 0; 1726 return 0;
1707 BUG_ON(!ordered_extent); 1727 BUG_ON(!ordered_extent);
1708 1728
1709 nolock = (root == root->fs_info->tree_root); 1729 nolock = is_free_space_inode(root, inode);
1710 1730
1711 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1731 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1712 BUG_ON(!list_empty(&ordered_extent->list)); 1732 BUG_ON(!list_empty(&ordered_extent->list));
@@ -1850,7 +1870,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1850 } 1870 }
1851 read_unlock(&em_tree->lock); 1871 read_unlock(&em_tree->lock);
1852 1872
1853 if (!em || IS_ERR(em)) { 1873 if (IS_ERR_OR_NULL(em)) {
1854 kfree(failrec); 1874 kfree(failrec);
1855 return -EIO; 1875 return -EIO;
1856 } 1876 }
@@ -1999,12 +2019,11 @@ good:
1999 return 0; 2019 return 0;
2000 2020
2001zeroit: 2021zeroit:
2002 if (printk_ratelimit()) { 2022 printk_ratelimited(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u "
2003 printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " 2023 "private %llu\n",
2004 "private %llu\n", page->mapping->host->i_ino, 2024 (unsigned long long)btrfs_ino(page->mapping->host),
2005 (unsigned long long)start, csum, 2025 (unsigned long long)start, csum,
2006 (unsigned long long)private); 2026 (unsigned long long)private);
2007 }
2008 memset(kaddr + offset, 1, end - start + 1); 2027 memset(kaddr + offset, 1, end - start + 1);
2009 flush_dcache_page(page); 2028 flush_dcache_page(page);
2010 kunmap_atomic(kaddr, KM_USER0); 2029 kunmap_atomic(kaddr, KM_USER0);
@@ -2239,7 +2258,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2239 2258
2240 /* insert an orphan item to track this unlinked/truncated file */ 2259 /* insert an orphan item to track this unlinked/truncated file */
2241 if (insert >= 1) { 2260 if (insert >= 1) {
2242 ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); 2261 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
2243 BUG_ON(ret); 2262 BUG_ON(ret);
2244 } 2263 }
2245 2264
@@ -2276,7 +2295,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2276 spin_unlock(&root->orphan_lock); 2295 spin_unlock(&root->orphan_lock);
2277 2296
2278 if (trans && delete_item) { 2297 if (trans && delete_item) {
2279 ret = btrfs_del_orphan_item(trans, root, inode->i_ino); 2298 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
2280 BUG_ON(ret); 2299 BUG_ON(ret);
2281 } 2300 }
2282 2301
@@ -2341,7 +2360,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2341 break; 2360 break;
2342 2361
2343 /* release the path since we're done with it */ 2362 /* release the path since we're done with it */
2344 btrfs_release_path(root, path); 2363 btrfs_release_path(path);
2345 2364
2346 /* 2365 /*
2347 * this is where we are basically btrfs_lookup, without the 2366 * this is where we are basically btrfs_lookup, without the
@@ -2542,7 +2561,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
2542 * try to precache a NULL acl entry for files that don't have 2561 * try to precache a NULL acl entry for files that don't have
2543 * any xattrs or acls 2562 * any xattrs or acls
2544 */ 2563 */
2545 maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); 2564 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
2565 btrfs_ino(inode));
2546 if (!maybe_acls) 2566 if (!maybe_acls)
2547 cache_no_acl(inode); 2567 cache_no_acl(inode);
2548 2568
@@ -2649,11 +2669,26 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2649 struct extent_buffer *leaf; 2669 struct extent_buffer *leaf;
2650 int ret; 2670 int ret;
2651 2671
2672 /*
2673 * If root is tree root, it means this inode is used to
2674 * store free space information. And these inodes are updated
2675 * when committing the transaction, so they needn't delaye to
2676 * be updated, or deadlock will occured.
2677 */
2678 if (!is_free_space_inode(root, inode)) {
2679 ret = btrfs_delayed_update_inode(trans, root, inode);
2680 if (!ret)
2681 btrfs_set_inode_last_trans(trans, inode);
2682 return ret;
2683 }
2684
2652 path = btrfs_alloc_path(); 2685 path = btrfs_alloc_path();
2653 BUG_ON(!path); 2686 if (!path)
2687 return -ENOMEM;
2688
2654 path->leave_spinning = 1; 2689 path->leave_spinning = 1;
2655 ret = btrfs_lookup_inode(trans, root, path, 2690 ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
2656 &BTRFS_I(inode)->location, 1); 2691 1);
2657 if (ret) { 2692 if (ret) {
2658 if (ret > 0) 2693 if (ret > 0)
2659 ret = -ENOENT; 2694 ret = -ENOENT;
@@ -2663,7 +2698,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2663 btrfs_unlock_up_safe(path, 1); 2698 btrfs_unlock_up_safe(path, 1);
2664 leaf = path->nodes[0]; 2699 leaf = path->nodes[0];
2665 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2700 inode_item = btrfs_item_ptr(leaf, path->slots[0],
2666 struct btrfs_inode_item); 2701 struct btrfs_inode_item);
2667 2702
2668 fill_inode_item(trans, leaf, inode_item, inode); 2703 fill_inode_item(trans, leaf, inode_item, inode);
2669 btrfs_mark_buffer_dirty(leaf); 2704 btrfs_mark_buffer_dirty(leaf);
@@ -2674,7 +2709,6 @@ failed:
2674 return ret; 2709 return ret;
2675} 2710}
2676 2711
2677
2678/* 2712/*
2679 * unlink helper that gets used here in inode.c and in the tree logging 2713 * unlink helper that gets used here in inode.c and in the tree logging
2680 * recovery code. It remove a link in a directory with a given name, and 2714 * recovery code. It remove a link in a directory with a given name, and
@@ -2691,6 +2725,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2691 struct btrfs_dir_item *di; 2725 struct btrfs_dir_item *di;
2692 struct btrfs_key key; 2726 struct btrfs_key key;
2693 u64 index; 2727 u64 index;
2728 u64 ino = btrfs_ino(inode);
2729 u64 dir_ino = btrfs_ino(dir);
2694 2730
2695 path = btrfs_alloc_path(); 2731 path = btrfs_alloc_path();
2696 if (!path) { 2732 if (!path) {
@@ -2699,7 +2735,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2699 } 2735 }
2700 2736
2701 path->leave_spinning = 1; 2737 path->leave_spinning = 1;
2702 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 2738 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
2703 name, name_len, -1); 2739 name, name_len, -1);
2704 if (IS_ERR(di)) { 2740 if (IS_ERR(di)) {
2705 ret = PTR_ERR(di); 2741 ret = PTR_ERR(di);
@@ -2714,33 +2750,23 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2714 ret = btrfs_delete_one_dir_name(trans, root, path, di); 2750 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2715 if (ret) 2751 if (ret)
2716 goto err; 2752 goto err;
2717 btrfs_release_path(root, path); 2753 btrfs_release_path(path);
2718 2754
2719 ret = btrfs_del_inode_ref(trans, root, name, name_len, 2755 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
2720 inode->i_ino, 2756 dir_ino, &index);
2721 dir->i_ino, &index);
2722 if (ret) { 2757 if (ret) {
2723 printk(KERN_INFO "btrfs failed to delete reference to %.*s, " 2758 printk(KERN_INFO "btrfs failed to delete reference to %.*s, "
2724 "inode %lu parent %lu\n", name_len, name, 2759 "inode %llu parent %llu\n", name_len, name,
2725 inode->i_ino, dir->i_ino); 2760 (unsigned long long)ino, (unsigned long long)dir_ino);
2726 goto err; 2761 goto err;
2727 } 2762 }
2728 2763
2729 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, 2764 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
2730 index, name, name_len, -1); 2765 if (ret)
2731 if (IS_ERR(di)) {
2732 ret = PTR_ERR(di);
2733 goto err;
2734 }
2735 if (!di) {
2736 ret = -ENOENT;
2737 goto err; 2766 goto err;
2738 }
2739 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2740 btrfs_release_path(root, path);
2741 2767
2742 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, 2768 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
2743 inode, dir->i_ino); 2769 inode, dir_ino);
2744 BUG_ON(ret != 0 && ret != -ENOENT); 2770 BUG_ON(ret != 0 && ret != -ENOENT);
2745 2771
2746 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2772 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
@@ -2818,12 +2844,14 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2818 int check_link = 1; 2844 int check_link = 1;
2819 int err = -ENOSPC; 2845 int err = -ENOSPC;
2820 int ret; 2846 int ret;
2847 u64 ino = btrfs_ino(inode);
2848 u64 dir_ino = btrfs_ino(dir);
2821 2849
2822 trans = btrfs_start_transaction(root, 10); 2850 trans = btrfs_start_transaction(root, 10);
2823 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 2851 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
2824 return trans; 2852 return trans;
2825 2853
2826 if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) 2854 if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
2827 return ERR_PTR(-ENOSPC); 2855 return ERR_PTR(-ENOSPC);
2828 2856
2829 /* check if there is someone else holds reference */ 2857 /* check if there is someone else holds reference */
@@ -2864,7 +2892,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2864 } else { 2892 } else {
2865 check_link = 0; 2893 check_link = 0;
2866 } 2894 }
2867 btrfs_release_path(root, path); 2895 btrfs_release_path(path);
2868 2896
2869 ret = btrfs_lookup_inode(trans, root, path, 2897 ret = btrfs_lookup_inode(trans, root, path,
2870 &BTRFS_I(inode)->location, 0); 2898 &BTRFS_I(inode)->location, 0);
@@ -2878,11 +2906,11 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2878 } else { 2906 } else {
2879 check_link = 0; 2907 check_link = 0;
2880 } 2908 }
2881 btrfs_release_path(root, path); 2909 btrfs_release_path(path);
2882 2910
2883 if (ret == 0 && S_ISREG(inode->i_mode)) { 2911 if (ret == 0 && S_ISREG(inode->i_mode)) {
2884 ret = btrfs_lookup_file_extent(trans, root, path, 2912 ret = btrfs_lookup_file_extent(trans, root, path,
2885 inode->i_ino, (u64)-1, 0); 2913 ino, (u64)-1, 0);
2886 if (ret < 0) { 2914 if (ret < 0) {
2887 err = ret; 2915 err = ret;
2888 goto out; 2916 goto out;
@@ -2890,7 +2918,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2890 BUG_ON(ret == 0); 2918 BUG_ON(ret == 0);
2891 if (check_path_shared(root, path)) 2919 if (check_path_shared(root, path))
2892 goto out; 2920 goto out;
2893 btrfs_release_path(root, path); 2921 btrfs_release_path(path);
2894 } 2922 }
2895 2923
2896 if (!check_link) { 2924 if (!check_link) {
@@ -2898,7 +2926,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2898 goto out; 2926 goto out;
2899 } 2927 }
2900 2928
2901 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 2929 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
2902 dentry->d_name.name, dentry->d_name.len, 0); 2930 dentry->d_name.name, dentry->d_name.len, 0);
2903 if (IS_ERR(di)) { 2931 if (IS_ERR(di)) {
2904 err = PTR_ERR(di); 2932 err = PTR_ERR(di);
@@ -2911,11 +2939,11 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2911 err = 0; 2939 err = 0;
2912 goto out; 2940 goto out;
2913 } 2941 }
2914 btrfs_release_path(root, path); 2942 btrfs_release_path(path);
2915 2943
2916 ref = btrfs_lookup_inode_ref(trans, root, path, 2944 ref = btrfs_lookup_inode_ref(trans, root, path,
2917 dentry->d_name.name, dentry->d_name.len, 2945 dentry->d_name.name, dentry->d_name.len,
2918 inode->i_ino, dir->i_ino, 0); 2946 ino, dir_ino, 0);
2919 if (IS_ERR(ref)) { 2947 if (IS_ERR(ref)) {
2920 err = PTR_ERR(ref); 2948 err = PTR_ERR(ref);
2921 goto out; 2949 goto out;
@@ -2924,9 +2952,17 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2924 if (check_path_shared(root, path)) 2952 if (check_path_shared(root, path))
2925 goto out; 2953 goto out;
2926 index = btrfs_inode_ref_index(path->nodes[0], ref); 2954 index = btrfs_inode_ref_index(path->nodes[0], ref);
2927 btrfs_release_path(root, path); 2955 btrfs_release_path(path);
2928 2956
2929 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, 2957 /*
2958 * This is a commit root search, if we can lookup inode item and other
2959 * relative items in the commit root, it means the transaction of
2960 * dir/file creation has been committed, and the dir index item that we
2961 * delay to insert has also been inserted into the commit root. So
2962 * we needn't worry about the delayed insertion of the dir index item
2963 * here.
2964 */
2965 di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index,
2930 dentry->d_name.name, dentry->d_name.len, 0); 2966 dentry->d_name.name, dentry->d_name.len, 0);
2931 if (IS_ERR(di)) { 2967 if (IS_ERR(di)) {
2932 err = PTR_ERR(di); 2968 err = PTR_ERR(di);
@@ -2999,54 +3035,47 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2999 struct btrfs_key key; 3035 struct btrfs_key key;
3000 u64 index; 3036 u64 index;
3001 int ret; 3037 int ret;
3038 u64 dir_ino = btrfs_ino(dir);
3002 3039
3003 path = btrfs_alloc_path(); 3040 path = btrfs_alloc_path();
3004 if (!path) 3041 if (!path)
3005 return -ENOMEM; 3042 return -ENOMEM;
3006 3043
3007 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 3044 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
3008 name, name_len, -1); 3045 name, name_len, -1);
3009 BUG_ON(!di || IS_ERR(di)); 3046 BUG_ON(IS_ERR_OR_NULL(di));
3010 3047
3011 leaf = path->nodes[0]; 3048 leaf = path->nodes[0];
3012 btrfs_dir_item_key_to_cpu(leaf, di, &key); 3049 btrfs_dir_item_key_to_cpu(leaf, di, &key);
3013 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); 3050 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
3014 ret = btrfs_delete_one_dir_name(trans, root, path, di); 3051 ret = btrfs_delete_one_dir_name(trans, root, path, di);
3015 BUG_ON(ret); 3052 BUG_ON(ret);
3016 btrfs_release_path(root, path); 3053 btrfs_release_path(path);
3017 3054
3018 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, 3055 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
3019 objectid, root->root_key.objectid, 3056 objectid, root->root_key.objectid,
3020 dir->i_ino, &index, name, name_len); 3057 dir_ino, &index, name, name_len);
3021 if (ret < 0) { 3058 if (ret < 0) {
3022 BUG_ON(ret != -ENOENT); 3059 BUG_ON(ret != -ENOENT);
3023 di = btrfs_search_dir_index_item(root, path, dir->i_ino, 3060 di = btrfs_search_dir_index_item(root, path, dir_ino,
3024 name, name_len); 3061 name, name_len);
3025 BUG_ON(!di || IS_ERR(di)); 3062 BUG_ON(IS_ERR_OR_NULL(di));
3026 3063
3027 leaf = path->nodes[0]; 3064 leaf = path->nodes[0];
3028 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3065 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3029 btrfs_release_path(root, path); 3066 btrfs_release_path(path);
3030 index = key.offset; 3067 index = key.offset;
3031 } 3068 }
3069 btrfs_release_path(path);
3032 3070
3033 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, 3071 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
3034 index, name, name_len, -1);
3035 BUG_ON(!di || IS_ERR(di));
3036
3037 leaf = path->nodes[0];
3038 btrfs_dir_item_key_to_cpu(leaf, di, &key);
3039 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
3040 ret = btrfs_delete_one_dir_name(trans, root, path, di);
3041 BUG_ON(ret); 3072 BUG_ON(ret);
3042 btrfs_release_path(root, path);
3043 3073
3044 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 3074 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
3045 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 3075 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
3046 ret = btrfs_update_inode(trans, root, dir); 3076 ret = btrfs_update_inode(trans, root, dir);
3047 BUG_ON(ret); 3077 BUG_ON(ret);
3048 3078
3049 btrfs_free_path(path);
3050 return 0; 3079 return 0;
3051} 3080}
3052 3081
@@ -3059,14 +3088,14 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
3059 unsigned long nr = 0; 3088 unsigned long nr = 0;
3060 3089
3061 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || 3090 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
3062 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 3091 btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
3063 return -ENOTEMPTY; 3092 return -ENOTEMPTY;
3064 3093
3065 trans = __unlink_start_trans(dir, dentry); 3094 trans = __unlink_start_trans(dir, dentry);
3066 if (IS_ERR(trans)) 3095 if (IS_ERR(trans))
3067 return PTR_ERR(trans); 3096 return PTR_ERR(trans);
3068 3097
3069 if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 3098 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
3070 err = btrfs_unlink_subvol(trans, root, dir, 3099 err = btrfs_unlink_subvol(trans, root, dir,
3071 BTRFS_I(inode)->location.objectid, 3100 BTRFS_I(inode)->location.objectid,
3072 dentry->d_name.name, 3101 dentry->d_name.name,
@@ -3091,178 +3120,6 @@ out:
3091 return err; 3120 return err;
3092} 3121}
3093 3122
3094#if 0
3095/*
3096 * when truncating bytes in a file, it is possible to avoid reading
3097 * the leaves that contain only checksum items. This can be the
3098 * majority of the IO required to delete a large file, but it must
3099 * be done carefully.
3100 *
3101 * The keys in the level just above the leaves are checked to make sure
3102 * the lowest key in a given leaf is a csum key, and starts at an offset
3103 * after the new size.
3104 *
3105 * Then the key for the next leaf is checked to make sure it also has
3106 * a checksum item for the same file. If it does, we know our target leaf
3107 * contains only checksum items, and it can be safely freed without reading
3108 * it.
3109 *
3110 * This is just an optimization targeted at large files. It may do
3111 * nothing. It will return 0 unless things went badly.
3112 */
3113static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans,
3114 struct btrfs_root *root,
3115 struct btrfs_path *path,
3116 struct inode *inode, u64 new_size)
3117{
3118 struct btrfs_key key;
3119 int ret;
3120 int nritems;
3121 struct btrfs_key found_key;
3122 struct btrfs_key other_key;
3123 struct btrfs_leaf_ref *ref;
3124 u64 leaf_gen;
3125 u64 leaf_start;
3126
3127 path->lowest_level = 1;
3128 key.objectid = inode->i_ino;
3129 key.type = BTRFS_CSUM_ITEM_KEY;
3130 key.offset = new_size;
3131again:
3132 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3133 if (ret < 0)
3134 goto out;
3135
3136 if (path->nodes[1] == NULL) {
3137 ret = 0;
3138 goto out;
3139 }
3140 ret = 0;
3141 btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]);
3142 nritems = btrfs_header_nritems(path->nodes[1]);
3143
3144 if (!nritems)
3145 goto out;
3146
3147 if (path->slots[1] >= nritems)
3148 goto next_node;
3149
3150 /* did we find a key greater than anything we want to delete? */
3151 if (found_key.objectid > inode->i_ino ||
3152 (found_key.objectid == inode->i_ino && found_key.type > key.type))
3153 goto out;
3154
3155 /* we check the next key in the node to make sure the leave contains
3156 * only checksum items. This comparison doesn't work if our
3157 * leaf is the last one in the node
3158 */
3159 if (path->slots[1] + 1 >= nritems) {
3160next_node:
3161 /* search forward from the last key in the node, this
3162 * will bring us into the next node in the tree
3163 */
3164 btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1);
3165
3166 /* unlikely, but we inc below, so check to be safe */
3167 if (found_key.offset == (u64)-1)
3168 goto out;
3169
3170 /* search_forward needs a path with locks held, do the
3171 * search again for the original key. It is possible
3172 * this will race with a balance and return a path that
3173 * we could modify, but this drop is just an optimization
3174 * and is allowed to miss some leaves.
3175 */
3176 btrfs_release_path(root, path);
3177 found_key.offset++;
3178
3179 /* setup a max key for search_forward */
3180 other_key.offset = (u64)-1;
3181 other_key.type = key.type;
3182 other_key.objectid = key.objectid;
3183
3184 path->keep_locks = 1;
3185 ret = btrfs_search_forward(root, &found_key, &other_key,
3186 path, 0, 0);
3187 path->keep_locks = 0;
3188 if (ret || found_key.objectid != key.objectid ||
3189 found_key.type != key.type) {
3190 ret = 0;
3191 goto out;
3192 }
3193
3194 key.offset = found_key.offset;
3195 btrfs_release_path(root, path);
3196 cond_resched();
3197 goto again;
3198 }
3199
3200 /* we know there's one more slot after us in the tree,
3201 * read that key so we can verify it is also a checksum item
3202 */
3203 btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1);
3204
3205 if (found_key.objectid < inode->i_ino)
3206 goto next_key;
3207
3208 if (found_key.type != key.type || found_key.offset < new_size)
3209 goto next_key;
3210
3211 /*
3212 * if the key for the next leaf isn't a csum key from this objectid,
3213 * we can't be sure there aren't good items inside this leaf.
3214 * Bail out
3215 */
3216 if (other_key.objectid != inode->i_ino || other_key.type != key.type)
3217 goto out;
3218
3219 leaf_start = btrfs_node_blockptr(path->nodes[1], path->slots[1]);
3220 leaf_gen = btrfs_node_ptr_generation(path->nodes[1], path->slots[1]);
3221 /*
3222 * it is safe to delete this leaf, it contains only
3223 * csum items from this inode at an offset >= new_size
3224 */
3225 ret = btrfs_del_leaf(trans, root, path, leaf_start);
3226 BUG_ON(ret);
3227
3228 if (root->ref_cows && leaf_gen < trans->transid) {
3229 ref = btrfs_alloc_leaf_ref(root, 0);
3230 if (ref) {
3231 ref->root_gen = root->root_key.offset;
3232 ref->bytenr = leaf_start;
3233 ref->owner = 0;
3234 ref->generation = leaf_gen;
3235 ref->nritems = 0;
3236
3237 btrfs_sort_leaf_ref(ref);
3238
3239 ret = btrfs_add_leaf_ref(root, ref, 0);
3240 WARN_ON(ret);
3241 btrfs_free_leaf_ref(root, ref);
3242 } else {
3243 WARN_ON(1);
3244 }
3245 }
3246next_key:
3247 btrfs_release_path(root, path);
3248
3249 if (other_key.objectid == inode->i_ino &&
3250 other_key.type == key.type && other_key.offset > key.offset) {
3251 key.offset = other_key.offset;
3252 cond_resched();
3253 goto again;
3254 }
3255 ret = 0;
3256out:
3257 /* fixup any changes we've made to the path */
3258 path->lowest_level = 0;
3259 path->keep_locks = 0;
3260 btrfs_release_path(root, path);
3261 return ret;
3262}
3263
3264#endif
3265
3266/* 3123/*
3267 * this can truncate away extent items, csum items and directory items. 3124 * this can truncate away extent items, csum items and directory items.
3268 * It starts at a high offset and removes keys until it can't find 3125 * It starts at a high offset and removes keys until it can't find
@@ -3298,17 +3155,27 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3298 int encoding; 3155 int encoding;
3299 int ret; 3156 int ret;
3300 int err = 0; 3157 int err = 0;
3158 u64 ino = btrfs_ino(inode);
3301 3159
3302 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 3160 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
3303 3161
3304 if (root->ref_cows || root == root->fs_info->tree_root) 3162 if (root->ref_cows || root == root->fs_info->tree_root)
3305 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3163 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
3306 3164
3165 /*
3166 * This function is also used to drop the items in the log tree before
3167 * we relog the inode, so if root != BTRFS_I(inode)->root, it means
3168 * it is used to drop the loged items. So we shouldn't kill the delayed
3169 * items.
3170 */
3171 if (min_type == 0 && root == BTRFS_I(inode)->root)
3172 btrfs_kill_delayed_inode_items(inode);
3173
3307 path = btrfs_alloc_path(); 3174 path = btrfs_alloc_path();
3308 BUG_ON(!path); 3175 BUG_ON(!path);
3309 path->reada = -1; 3176 path->reada = -1;
3310 3177
3311 key.objectid = inode->i_ino; 3178 key.objectid = ino;
3312 key.offset = (u64)-1; 3179 key.offset = (u64)-1;
3313 key.type = (u8)-1; 3180 key.type = (u8)-1;
3314 3181
@@ -3336,7 +3203,7 @@ search_again:
3336 found_type = btrfs_key_type(&found_key); 3203 found_type = btrfs_key_type(&found_key);
3337 encoding = 0; 3204 encoding = 0;
3338 3205
3339 if (found_key.objectid != inode->i_ino) 3206 if (found_key.objectid != ino)
3340 break; 3207 break;
3341 3208
3342 if (found_type < min_type) 3209 if (found_type < min_type)
@@ -3426,7 +3293,6 @@ search_again:
3426 btrfs_file_extent_calc_inline_size(size); 3293 btrfs_file_extent_calc_inline_size(size);
3427 ret = btrfs_truncate_item(trans, root, path, 3294 ret = btrfs_truncate_item(trans, root, path,
3428 size, 1); 3295 size, 1);
3429 BUG_ON(ret);
3430 } else if (root->ref_cows) { 3296 } else if (root->ref_cows) {
3431 inode_sub_bytes(inode, item_end + 1 - 3297 inode_sub_bytes(inode, item_end + 1 -
3432 found_key.offset); 3298 found_key.offset);
@@ -3455,7 +3321,7 @@ delete:
3455 ret = btrfs_free_extent(trans, root, extent_start, 3321 ret = btrfs_free_extent(trans, root, extent_start,
3456 extent_num_bytes, 0, 3322 extent_num_bytes, 0,
3457 btrfs_header_owner(leaf), 3323 btrfs_header_owner(leaf),
3458 inode->i_ino, extent_offset); 3324 ino, extent_offset);
3459 BUG_ON(ret); 3325 BUG_ON(ret);
3460 } 3326 }
3461 3327
@@ -3464,7 +3330,9 @@ delete:
3464 3330
3465 if (path->slots[0] == 0 || 3331 if (path->slots[0] == 0 ||
3466 path->slots[0] != pending_del_slot) { 3332 path->slots[0] != pending_del_slot) {
3467 if (root->ref_cows) { 3333 if (root->ref_cows &&
3334 BTRFS_I(inode)->location.objectid !=
3335 BTRFS_FREE_INO_OBJECTID) {
3468 err = -EAGAIN; 3336 err = -EAGAIN;
3469 goto out; 3337 goto out;
3470 } 3338 }
@@ -3475,7 +3343,7 @@ delete:
3475 BUG_ON(ret); 3343 BUG_ON(ret);
3476 pending_del_nr = 0; 3344 pending_del_nr = 0;
3477 } 3345 }
3478 btrfs_release_path(root, path); 3346 btrfs_release_path(path);
3479 goto search_again; 3347 goto search_again;
3480 } else { 3348 } else {
3481 path->slots[0]--; 3349 path->slots[0]--;
@@ -3633,7 +3501,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3633 while (1) { 3501 while (1) {
3634 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3502 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
3635 block_end - cur_offset, 0); 3503 block_end - cur_offset, 0);
3636 BUG_ON(IS_ERR(em) || !em); 3504 BUG_ON(IS_ERR_OR_NULL(em));
3637 last_byte = min(extent_map_end(em), block_end); 3505 last_byte = min(extent_map_end(em), block_end);
3638 last_byte = (last_byte + mask) & ~mask; 3506 last_byte = (last_byte + mask) & ~mask;
3639 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 3507 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
@@ -3653,7 +3521,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3653 break; 3521 break;
3654 3522
3655 err = btrfs_insert_file_extent(trans, root, 3523 err = btrfs_insert_file_extent(trans, root,
3656 inode->i_ino, cur_offset, 0, 3524 btrfs_ino(inode), cur_offset, 0,
3657 0, hole_size, 0, hole_size, 3525 0, hole_size, 0, hole_size,
3658 0, 0, 0); 3526 0, 0, 0);
3659 if (err) 3527 if (err)
@@ -3755,7 +3623,7 @@ void btrfs_evict_inode(struct inode *inode)
3755 3623
3756 truncate_inode_pages(&inode->i_data, 0); 3624 truncate_inode_pages(&inode->i_data, 0);
3757 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || 3625 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
3758 root == root->fs_info->tree_root)) 3626 is_free_space_inode(root, inode)))
3759 goto no_delete; 3627 goto no_delete;
3760 3628
3761 if (is_bad_inode(inode)) { 3629 if (is_bad_inode(inode)) {
@@ -3807,6 +3675,10 @@ void btrfs_evict_inode(struct inode *inode)
3807 BUG_ON(ret); 3675 BUG_ON(ret);
3808 } 3676 }
3809 3677
3678 if (!(root == root->fs_info->tree_root ||
3679 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
3680 btrfs_return_ino(root, btrfs_ino(inode));
3681
3810 nr = trans->blocks_used; 3682 nr = trans->blocks_used;
3811 btrfs_end_transaction(trans, root); 3683 btrfs_end_transaction(trans, root);
3812 btrfs_btree_balance_dirty(root, nr); 3684 btrfs_btree_balance_dirty(root, nr);
@@ -3832,12 +3704,12 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
3832 path = btrfs_alloc_path(); 3704 path = btrfs_alloc_path();
3833 BUG_ON(!path); 3705 BUG_ON(!path);
3834 3706
3835 di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, 3707 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
3836 namelen, 0); 3708 namelen, 0);
3837 if (IS_ERR(di)) 3709 if (IS_ERR(di))
3838 ret = PTR_ERR(di); 3710 ret = PTR_ERR(di);
3839 3711
3840 if (!di || IS_ERR(di)) 3712 if (IS_ERR_OR_NULL(di))
3841 goto out_err; 3713 goto out_err;
3842 3714
3843 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); 3715 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
@@ -3885,7 +3757,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
3885 3757
3886 leaf = path->nodes[0]; 3758 leaf = path->nodes[0];
3887 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); 3759 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
3888 if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || 3760 if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) ||
3889 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) 3761 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
3890 goto out; 3762 goto out;
3891 3763
@@ -3895,7 +3767,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
3895 if (ret) 3767 if (ret)
3896 goto out; 3768 goto out;
3897 3769
3898 btrfs_release_path(root->fs_info->tree_root, path); 3770 btrfs_release_path(path);
3899 3771
3900 new_root = btrfs_read_fs_root_no_name(root->fs_info, location); 3772 new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
3901 if (IS_ERR(new_root)) { 3773 if (IS_ERR(new_root)) {
@@ -3924,6 +3796,7 @@ static void inode_tree_add(struct inode *inode)
3924 struct btrfs_inode *entry; 3796 struct btrfs_inode *entry;
3925 struct rb_node **p; 3797 struct rb_node **p;
3926 struct rb_node *parent; 3798 struct rb_node *parent;
3799 u64 ino = btrfs_ino(inode);
3927again: 3800again:
3928 p = &root->inode_tree.rb_node; 3801 p = &root->inode_tree.rb_node;
3929 parent = NULL; 3802 parent = NULL;
@@ -3936,9 +3809,9 @@ again:
3936 parent = *p; 3809 parent = *p;
3937 entry = rb_entry(parent, struct btrfs_inode, rb_node); 3810 entry = rb_entry(parent, struct btrfs_inode, rb_node);
3938 3811
3939 if (inode->i_ino < entry->vfs_inode.i_ino) 3812 if (ino < btrfs_ino(&entry->vfs_inode))
3940 p = &parent->rb_left; 3813 p = &parent->rb_left;
3941 else if (inode->i_ino > entry->vfs_inode.i_ino) 3814 else if (ino > btrfs_ino(&entry->vfs_inode))
3942 p = &parent->rb_right; 3815 p = &parent->rb_right;
3943 else { 3816 else {
3944 WARN_ON(!(entry->vfs_inode.i_state & 3817 WARN_ON(!(entry->vfs_inode.i_state &
@@ -4002,9 +3875,9 @@ again:
4002 prev = node; 3875 prev = node;
4003 entry = rb_entry(node, struct btrfs_inode, rb_node); 3876 entry = rb_entry(node, struct btrfs_inode, rb_node);
4004 3877
4005 if (objectid < entry->vfs_inode.i_ino) 3878 if (objectid < btrfs_ino(&entry->vfs_inode))
4006 node = node->rb_left; 3879 node = node->rb_left;
4007 else if (objectid > entry->vfs_inode.i_ino) 3880 else if (objectid > btrfs_ino(&entry->vfs_inode))
4008 node = node->rb_right; 3881 node = node->rb_right;
4009 else 3882 else
4010 break; 3883 break;
@@ -4012,7 +3885,7 @@ again:
4012 if (!node) { 3885 if (!node) {
4013 while (prev) { 3886 while (prev) {
4014 entry = rb_entry(prev, struct btrfs_inode, rb_node); 3887 entry = rb_entry(prev, struct btrfs_inode, rb_node);
4015 if (objectid <= entry->vfs_inode.i_ino) { 3888 if (objectid <= btrfs_ino(&entry->vfs_inode)) {
4016 node = prev; 3889 node = prev;
4017 break; 3890 break;
4018 } 3891 }
@@ -4021,7 +3894,7 @@ again:
4021 } 3894 }
4022 while (node) { 3895 while (node) {
4023 entry = rb_entry(node, struct btrfs_inode, rb_node); 3896 entry = rb_entry(node, struct btrfs_inode, rb_node);
4024 objectid = entry->vfs_inode.i_ino + 1; 3897 objectid = btrfs_ino(&entry->vfs_inode) + 1;
4025 inode = igrab(&entry->vfs_inode); 3898 inode = igrab(&entry->vfs_inode);
4026 if (inode) { 3899 if (inode) {
4027 spin_unlock(&root->inode_lock); 3900 spin_unlock(&root->inode_lock);
@@ -4059,7 +3932,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
4059static int btrfs_find_actor(struct inode *inode, void *opaque) 3932static int btrfs_find_actor(struct inode *inode, void *opaque)
4060{ 3933{
4061 struct btrfs_iget_args *args = opaque; 3934 struct btrfs_iget_args *args = opaque;
4062 return args->ino == inode->i_ino && 3935 return args->ino == btrfs_ino(inode) &&
4063 args->root == BTRFS_I(inode)->root; 3936 args->root == BTRFS_I(inode)->root;
4064} 3937}
4065 3938
@@ -4204,7 +4077,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
4204 return d_splice_alias(inode, dentry); 4077 return d_splice_alias(inode, dentry);
4205} 4078}
4206 4079
4207static unsigned char btrfs_filetype_table[] = { 4080unsigned char btrfs_filetype_table[] = {
4208 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 4081 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
4209}; 4082};
4210 4083
@@ -4218,6 +4091,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4218 struct btrfs_key key; 4091 struct btrfs_key key;
4219 struct btrfs_key found_key; 4092 struct btrfs_key found_key;
4220 struct btrfs_path *path; 4093 struct btrfs_path *path;
4094 struct list_head ins_list;
4095 struct list_head del_list;
4221 int ret; 4096 int ret;
4222 struct extent_buffer *leaf; 4097 struct extent_buffer *leaf;
4223 int slot; 4098 int slot;
@@ -4230,6 +4105,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4230 char tmp_name[32]; 4105 char tmp_name[32];
4231 char *name_ptr; 4106 char *name_ptr;
4232 int name_len; 4107 int name_len;
4108 int is_curr = 0; /* filp->f_pos points to the current index? */
4233 4109
4234 /* FIXME, use a real flag for deciding about the key type */ 4110 /* FIXME, use a real flag for deciding about the key type */
4235 if (root->fs_info->tree_root == root) 4111 if (root->fs_info->tree_root == root)
@@ -4237,9 +4113,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4237 4113
4238 /* special case for "." */ 4114 /* special case for "." */
4239 if (filp->f_pos == 0) { 4115 if (filp->f_pos == 0) {
4240 over = filldir(dirent, ".", 1, 4116 over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR);
4241 1, inode->i_ino,
4242 DT_DIR);
4243 if (over) 4117 if (over)
4244 return 0; 4118 return 0;
4245 filp->f_pos = 1; 4119 filp->f_pos = 1;
@@ -4256,11 +4130,18 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4256 path = btrfs_alloc_path(); 4130 path = btrfs_alloc_path();
4257 if (!path) 4131 if (!path)
4258 return -ENOMEM; 4132 return -ENOMEM;
4133
4259 path->reada = 1; 4134 path->reada = 1;
4260 4135
4136 if (key_type == BTRFS_DIR_INDEX_KEY) {
4137 INIT_LIST_HEAD(&ins_list);
4138 INIT_LIST_HEAD(&del_list);
4139 btrfs_get_delayed_items(inode, &ins_list, &del_list);
4140 }
4141
4261 btrfs_set_key_type(&key, key_type); 4142 btrfs_set_key_type(&key, key_type);
4262 key.offset = filp->f_pos; 4143 key.offset = filp->f_pos;
4263 key.objectid = inode->i_ino; 4144 key.objectid = btrfs_ino(inode);
4264 4145
4265 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4146 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4266 if (ret < 0) 4147 if (ret < 0)
@@ -4287,8 +4168,13 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4287 break; 4168 break;
4288 if (found_key.offset < filp->f_pos) 4169 if (found_key.offset < filp->f_pos)
4289 goto next; 4170 goto next;
4171 if (key_type == BTRFS_DIR_INDEX_KEY &&
4172 btrfs_should_delete_dir_index(&del_list,
4173 found_key.offset))
4174 goto next;
4290 4175
4291 filp->f_pos = found_key.offset; 4176 filp->f_pos = found_key.offset;
4177 is_curr = 1;
4292 4178
4293 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); 4179 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
4294 di_cur = 0; 4180 di_cur = 0;
@@ -4343,6 +4229,15 @@ next:
4343 path->slots[0]++; 4229 path->slots[0]++;
4344 } 4230 }
4345 4231
4232 if (key_type == BTRFS_DIR_INDEX_KEY) {
4233 if (is_curr)
4234 filp->f_pos++;
4235 ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir,
4236 &ins_list);
4237 if (ret)
4238 goto nopos;
4239 }
4240
4346 /* Reached end of directory/root. Bump pos past the last item. */ 4241 /* Reached end of directory/root. Bump pos past the last item. */
4347 if (key_type == BTRFS_DIR_INDEX_KEY) 4242 if (key_type == BTRFS_DIR_INDEX_KEY)
4348 /* 4243 /*
@@ -4355,6 +4250,8 @@ next:
4355nopos: 4250nopos:
4356 ret = 0; 4251 ret = 0;
4357err: 4252err:
4253 if (key_type == BTRFS_DIR_INDEX_KEY)
4254 btrfs_put_delayed_items(&ins_list, &del_list);
4358 btrfs_free_path(path); 4255 btrfs_free_path(path);
4359 return ret; 4256 return ret;
4360} 4257}
@@ -4370,7 +4267,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4370 return 0; 4267 return 0;
4371 4268
4372 smp_mb(); 4269 smp_mb();
4373 nolock = (root->fs_info->closing && root == root->fs_info->tree_root); 4270 if (root->fs_info->closing && is_free_space_inode(root, inode))
4271 nolock = true;
4374 4272
4375 if (wbc->sync_mode == WB_SYNC_ALL) { 4273 if (wbc->sync_mode == WB_SYNC_ALL) {
4376 if (nolock) 4274 if (nolock)
@@ -4411,24 +4309,24 @@ void btrfs_dirty_inode(struct inode *inode)
4411 btrfs_end_transaction(trans, root); 4309 btrfs_end_transaction(trans, root);
4412 trans = btrfs_start_transaction(root, 1); 4310 trans = btrfs_start_transaction(root, 1);
4413 if (IS_ERR(trans)) { 4311 if (IS_ERR(trans)) {
4414 if (printk_ratelimit()) { 4312 printk_ratelimited(KERN_ERR "btrfs: fail to "
4415 printk(KERN_ERR "btrfs: fail to " 4313 "dirty inode %llu error %ld\n",
4416 "dirty inode %lu error %ld\n", 4314 (unsigned long long)btrfs_ino(inode),
4417 inode->i_ino, PTR_ERR(trans)); 4315 PTR_ERR(trans));
4418 }
4419 return; 4316 return;
4420 } 4317 }
4421 4318
4422 ret = btrfs_update_inode(trans, root, inode); 4319 ret = btrfs_update_inode(trans, root, inode);
4423 if (ret) { 4320 if (ret) {
4424 if (printk_ratelimit()) { 4321 printk_ratelimited(KERN_ERR "btrfs: fail to "
4425 printk(KERN_ERR "btrfs: fail to " 4322 "dirty inode %llu error %d\n",
4426 "dirty inode %lu error %d\n", 4323 (unsigned long long)btrfs_ino(inode),
4427 inode->i_ino, ret); 4324 ret);
4428 }
4429 } 4325 }
4430 } 4326 }
4431 btrfs_end_transaction(trans, root); 4327 btrfs_end_transaction(trans, root);
4328 if (BTRFS_I(inode)->delayed_node)
4329 btrfs_balance_delayed_items(root);
4432} 4330}
4433 4331
4434/* 4332/*
@@ -4444,7 +4342,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
4444 struct extent_buffer *leaf; 4342 struct extent_buffer *leaf;
4445 int ret; 4343 int ret;
4446 4344
4447 key.objectid = inode->i_ino; 4345 key.objectid = btrfs_ino(inode);
4448 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); 4346 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
4449 key.offset = (u64)-1; 4347 key.offset = (u64)-1;
4450 4348
@@ -4476,7 +4374,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
4476 leaf = path->nodes[0]; 4374 leaf = path->nodes[0];
4477 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4375 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4478 4376
4479 if (found_key.objectid != inode->i_ino || 4377 if (found_key.objectid != btrfs_ino(inode) ||
4480 btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) { 4378 btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) {
4481 BTRFS_I(inode)->index_cnt = 2; 4379 BTRFS_I(inode)->index_cnt = 2;
4482 goto out; 4380 goto out;
@@ -4497,9 +4395,12 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
4497 int ret = 0; 4395 int ret = 0;
4498 4396
4499 if (BTRFS_I(dir)->index_cnt == (u64)-1) { 4397 if (BTRFS_I(dir)->index_cnt == (u64)-1) {
4500 ret = btrfs_set_inode_index_count(dir); 4398 ret = btrfs_inode_delayed_dir_index_count(dir);
4501 if (ret) 4399 if (ret) {
4502 return ret; 4400 ret = btrfs_set_inode_index_count(dir);
4401 if (ret)
4402 return ret;
4403 }
4503 } 4404 }
4504 4405
4505 *index = BTRFS_I(dir)->index_cnt; 4406 *index = BTRFS_I(dir)->index_cnt;
@@ -4535,6 +4436,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4535 return ERR_PTR(-ENOMEM); 4436 return ERR_PTR(-ENOMEM);
4536 } 4437 }
4537 4438
4439 /*
4440 * we have to initialize this early, so we can reclaim the inode
4441 * number if we fail afterwards in this function.
4442 */
4443 inode->i_ino = objectid;
4444
4538 if (dir) { 4445 if (dir) {
4539 trace_btrfs_inode_request(dir); 4446 trace_btrfs_inode_request(dir);
4540 4447
@@ -4578,7 +4485,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4578 goto fail; 4485 goto fail;
4579 4486
4580 inode_init_owner(inode, dir, mode); 4487 inode_init_owner(inode, dir, mode);
4581 inode->i_ino = objectid;
4582 inode_set_bytes(inode, 0); 4488 inode_set_bytes(inode, 0);
4583 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 4489 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
4584 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 4490 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -4642,29 +4548,29 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4642 int ret = 0; 4548 int ret = 0;
4643 struct btrfs_key key; 4549 struct btrfs_key key;
4644 struct btrfs_root *root = BTRFS_I(parent_inode)->root; 4550 struct btrfs_root *root = BTRFS_I(parent_inode)->root;
4551 u64 ino = btrfs_ino(inode);
4552 u64 parent_ino = btrfs_ino(parent_inode);
4645 4553
4646 if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 4554 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
4647 memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); 4555 memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
4648 } else { 4556 } else {
4649 key.objectid = inode->i_ino; 4557 key.objectid = ino;
4650 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 4558 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
4651 key.offset = 0; 4559 key.offset = 0;
4652 } 4560 }
4653 4561
4654 if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 4562 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
4655 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 4563 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
4656 key.objectid, root->root_key.objectid, 4564 key.objectid, root->root_key.objectid,
4657 parent_inode->i_ino, 4565 parent_ino, index, name, name_len);
4658 index, name, name_len);
4659 } else if (add_backref) { 4566 } else if (add_backref) {
4660 ret = btrfs_insert_inode_ref(trans, root, 4567 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
4661 name, name_len, inode->i_ino, 4568 parent_ino, index);
4662 parent_inode->i_ino, index);
4663 } 4569 }
4664 4570
4665 if (ret == 0) { 4571 if (ret == 0) {
4666 ret = btrfs_insert_dir_item(trans, root, name, name_len, 4572 ret = btrfs_insert_dir_item(trans, root, name, name_len,
4667 parent_inode->i_ino, &key, 4573 parent_inode, &key,
4668 btrfs_inode_type(inode), index); 4574 btrfs_inode_type(inode), index);
4669 BUG_ON(ret); 4575 BUG_ON(ret);
4670 4576
@@ -4707,10 +4613,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4707 if (!new_valid_dev(rdev)) 4613 if (!new_valid_dev(rdev))
4708 return -EINVAL; 4614 return -EINVAL;
4709 4615
4710 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4711 if (err)
4712 return err;
4713
4714 /* 4616 /*
4715 * 2 for inode item and ref 4617 * 2 for inode item and ref
4716 * 2 for dir items 4618 * 2 for dir items
@@ -4720,8 +4622,12 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4720 if (IS_ERR(trans)) 4622 if (IS_ERR(trans))
4721 return PTR_ERR(trans); 4623 return PTR_ERR(trans);
4722 4624
4625 err = btrfs_find_free_ino(root, &objectid);
4626 if (err)
4627 goto out_unlock;
4628
4723 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4629 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4724 dentry->d_name.len, dir->i_ino, objectid, 4630 dentry->d_name.len, btrfs_ino(dir), objectid,
4725 mode, &index); 4631 mode, &index);
4726 if (IS_ERR(inode)) { 4632 if (IS_ERR(inode)) {
4727 err = PTR_ERR(inode); 4633 err = PTR_ERR(inode);
@@ -4765,9 +4671,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4765 u64 objectid; 4671 u64 objectid;
4766 u64 index = 0; 4672 u64 index = 0;
4767 4673
4768 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4769 if (err)
4770 return err;
4771 /* 4674 /*
4772 * 2 for inode item and ref 4675 * 2 for inode item and ref
4773 * 2 for dir items 4676 * 2 for dir items
@@ -4777,8 +4680,12 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4777 if (IS_ERR(trans)) 4680 if (IS_ERR(trans))
4778 return PTR_ERR(trans); 4681 return PTR_ERR(trans);
4779 4682
4683 err = btrfs_find_free_ino(root, &objectid);
4684 if (err)
4685 goto out_unlock;
4686
4780 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4687 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4781 dentry->d_name.len, dir->i_ino, objectid, 4688 dentry->d_name.len, btrfs_ino(dir), objectid,
4782 mode, &index); 4689 mode, &index);
4783 if (IS_ERR(inode)) { 4690 if (IS_ERR(inode)) {
4784 err = PTR_ERR(inode); 4691 err = PTR_ERR(inode);
@@ -4883,10 +4790,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4883 u64 index = 0; 4790 u64 index = 0;
4884 unsigned long nr = 1; 4791 unsigned long nr = 1;
4885 4792
4886 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4887 if (err)
4888 return err;
4889
4890 /* 4793 /*
4891 * 2 items for inode and ref 4794 * 2 items for inode and ref
4892 * 2 items for dir items 4795 * 2 items for dir items
@@ -4896,8 +4799,12 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4896 if (IS_ERR(trans)) 4799 if (IS_ERR(trans))
4897 return PTR_ERR(trans); 4800 return PTR_ERR(trans);
4898 4801
4802 err = btrfs_find_free_ino(root, &objectid);
4803 if (err)
4804 goto out_fail;
4805
4899 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4806 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4900 dentry->d_name.len, dir->i_ino, objectid, 4807 dentry->d_name.len, btrfs_ino(dir), objectid,
4901 S_IFDIR | mode, &index); 4808 S_IFDIR | mode, &index);
4902 if (IS_ERR(inode)) { 4809 if (IS_ERR(inode)) {
4903 err = PTR_ERR(inode); 4810 err = PTR_ERR(inode);
@@ -5016,7 +4923,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
5016 u64 bytenr; 4923 u64 bytenr;
5017 u64 extent_start = 0; 4924 u64 extent_start = 0;
5018 u64 extent_end = 0; 4925 u64 extent_end = 0;
5019 u64 objectid = inode->i_ino; 4926 u64 objectid = btrfs_ino(inode);
5020 u32 found_type; 4927 u32 found_type;
5021 struct btrfs_path *path = NULL; 4928 struct btrfs_path *path = NULL;
5022 struct btrfs_root *root = BTRFS_I(inode)->root; 4929 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5044,7 +4951,7 @@ again:
5044 else 4951 else
5045 goto out; 4952 goto out;
5046 } 4953 }
5047 em = alloc_extent_map(GFP_NOFS); 4954 em = alloc_extent_map();
5048 if (!em) { 4955 if (!em) {
5049 err = -ENOMEM; 4956 err = -ENOMEM;
5050 goto out; 4957 goto out;
@@ -5206,8 +5113,10 @@ again:
5206 kunmap(page); 5113 kunmap(page);
5207 free_extent_map(em); 5114 free_extent_map(em);
5208 em = NULL; 5115 em = NULL;
5209 btrfs_release_path(root, path); 5116
5117 btrfs_release_path(path);
5210 trans = btrfs_join_transaction(root); 5118 trans = btrfs_join_transaction(root);
5119
5211 if (IS_ERR(trans)) 5120 if (IS_ERR(trans))
5212 return ERR_CAST(trans); 5121 return ERR_CAST(trans);
5213 goto again; 5122 goto again;
@@ -5232,7 +5141,7 @@ not_found_em:
5232 em->block_start = EXTENT_MAP_HOLE; 5141 em->block_start = EXTENT_MAP_HOLE;
5233 set_bit(EXTENT_FLAG_VACANCY, &em->flags); 5142 set_bit(EXTENT_FLAG_VACANCY, &em->flags);
5234insert: 5143insert:
5235 btrfs_release_path(root, path); 5144 btrfs_release_path(path);
5236 if (em->start > start || extent_map_end(em) <= start) { 5145 if (em->start > start || extent_map_end(em) <= start) {
5237 printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed " 5146 printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed "
5238 "[%llu %llu]\n", (unsigned long long)em->start, 5147 "[%llu %llu]\n", (unsigned long long)em->start,
@@ -5365,7 +5274,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
5365 u64 hole_start = start; 5274 u64 hole_start = start;
5366 u64 hole_len = len; 5275 u64 hole_len = len;
5367 5276
5368 em = alloc_extent_map(GFP_NOFS); 5277 em = alloc_extent_map();
5369 if (!em) { 5278 if (!em) {
5370 err = -ENOMEM; 5279 err = -ENOMEM;
5371 goto out; 5280 goto out;
@@ -5455,6 +5364,9 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5455 if (IS_ERR(trans)) 5364 if (IS_ERR(trans))
5456 return ERR_CAST(trans); 5365 return ERR_CAST(trans);
5457 5366
5367 if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024)
5368 btrfs_add_inode_defrag(trans, inode);
5369
5458 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 5370 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5459 5371
5460 alloc_hint = get_extent_allocation_hint(inode, start, len); 5372 alloc_hint = get_extent_allocation_hint(inode, start, len);
@@ -5466,7 +5378,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5466 } 5378 }
5467 5379
5468 if (!em) { 5380 if (!em) {
5469 em = alloc_extent_map(GFP_NOFS); 5381 em = alloc_extent_map();
5470 if (!em) { 5382 if (!em) {
5471 em = ERR_PTR(-ENOMEM); 5383 em = ERR_PTR(-ENOMEM);
5472 goto out; 5384 goto out;
@@ -5532,7 +5444,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5532 if (!path) 5444 if (!path)
5533 return -ENOMEM; 5445 return -ENOMEM;
5534 5446
5535 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 5447 ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
5536 offset, 0); 5448 offset, 0);
5537 if (ret < 0) 5449 if (ret < 0)
5538 goto out; 5450 goto out;
@@ -5549,7 +5461,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5549 ret = 0; 5461 ret = 0;
5550 leaf = path->nodes[0]; 5462 leaf = path->nodes[0];
5551 btrfs_item_key_to_cpu(leaf, &key, slot); 5463 btrfs_item_key_to_cpu(leaf, &key, slot);
5552 if (key.objectid != inode->i_ino || 5464 if (key.objectid != btrfs_ino(inode) ||
5553 key.type != BTRFS_EXTENT_DATA_KEY) { 5465 key.type != BTRFS_EXTENT_DATA_KEY) {
5554 /* not our file or wrong item type, must cow */ 5466 /* not our file or wrong item type, must cow */
5555 goto out; 5467 goto out;
@@ -5583,7 +5495,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5583 * look for other files referencing this extent, if we 5495 * look for other files referencing this extent, if we
5584 * find any we must cow 5496 * find any we must cow
5585 */ 5497 */
5586 if (btrfs_cross_ref_exist(trans, root, inode->i_ino, 5498 if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
5587 key.offset - backref_offset, disk_bytenr)) 5499 key.offset - backref_offset, disk_bytenr))
5588 goto out; 5500 goto out;
5589 5501
@@ -5773,9 +5685,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5773 5685
5774 flush_dcache_page(bvec->bv_page); 5686 flush_dcache_page(bvec->bv_page);
5775 if (csum != *private) { 5687 if (csum != *private) {
5776 printk(KERN_ERR "btrfs csum failed ino %lu off" 5688 printk(KERN_ERR "btrfs csum failed ino %llu off"
5777 " %llu csum %u private %u\n", 5689 " %llu csum %u private %u\n",
5778 inode->i_ino, (unsigned long long)start, 5690 (unsigned long long)btrfs_ino(inode),
5691 (unsigned long long)start,
5779 csum, *private); 5692 csum, *private);
5780 err = -EIO; 5693 err = -EIO;
5781 } 5694 }
@@ -5922,9 +5835,9 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
5922 struct btrfs_dio_private *dip = bio->bi_private; 5835 struct btrfs_dio_private *dip = bio->bi_private;
5923 5836
5924 if (err) { 5837 if (err) {
5925 printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " 5838 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
5926 "sector %#Lx len %u err no %d\n", 5839 "sector %#Lx len %u err no %d\n",
5927 dip->inode->i_ino, bio->bi_rw, 5840 (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw,
5928 (unsigned long long)bio->bi_sector, bio->bi_size, err); 5841 (unsigned long long)bio->bi_sector, bio->bi_size, err);
5929 dip->errors = 1; 5842 dip->errors = 1;
5930 5843
@@ -6817,12 +6730,15 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6817 ei->ordered_data_close = 0; 6730 ei->ordered_data_close = 0;
6818 ei->orphan_meta_reserved = 0; 6731 ei->orphan_meta_reserved = 0;
6819 ei->dummy_inode = 0; 6732 ei->dummy_inode = 0;
6733 ei->in_defrag = 0;
6820 ei->force_compress = BTRFS_COMPRESS_NONE; 6734 ei->force_compress = BTRFS_COMPRESS_NONE;
6821 6735
6736 ei->delayed_node = NULL;
6737
6822 inode = &ei->vfs_inode; 6738 inode = &ei->vfs_inode;
6823 extent_map_tree_init(&ei->extent_tree, GFP_NOFS); 6739 extent_map_tree_init(&ei->extent_tree);
6824 extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS); 6740 extent_io_tree_init(&ei->io_tree, &inode->i_data);
6825 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS); 6741 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
6826 mutex_init(&ei->log_mutex); 6742 mutex_init(&ei->log_mutex);
6827 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 6743 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
6828 INIT_LIST_HEAD(&ei->i_orphan); 6744 INIT_LIST_HEAD(&ei->i_orphan);
@@ -6871,8 +6787,8 @@ void btrfs_destroy_inode(struct inode *inode)
6871 6787
6872 spin_lock(&root->orphan_lock); 6788 spin_lock(&root->orphan_lock);
6873 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6789 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
6874 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", 6790 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
6875 inode->i_ino); 6791 (unsigned long long)btrfs_ino(inode));
6876 list_del_init(&BTRFS_I(inode)->i_orphan); 6792 list_del_init(&BTRFS_I(inode)->i_orphan);
6877 } 6793 }
6878 spin_unlock(&root->orphan_lock); 6794 spin_unlock(&root->orphan_lock);
@@ -6894,6 +6810,7 @@ void btrfs_destroy_inode(struct inode *inode)
6894 inode_tree_del(inode); 6810 inode_tree_del(inode);
6895 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 6811 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
6896free: 6812free:
6813 btrfs_remove_delayed_node(inode);
6897 call_rcu(&inode->i_rcu, btrfs_i_callback); 6814 call_rcu(&inode->i_rcu, btrfs_i_callback);
6898} 6815}
6899 6816
@@ -6902,7 +6819,7 @@ int btrfs_drop_inode(struct inode *inode)
6902 struct btrfs_root *root = BTRFS_I(inode)->root; 6819 struct btrfs_root *root = BTRFS_I(inode)->root;
6903 6820
6904 if (btrfs_root_refs(&root->root_item) == 0 && 6821 if (btrfs_root_refs(&root->root_item) == 0 &&
6905 root != root->fs_info->tree_root) 6822 !is_free_space_inode(root, inode))
6906 return 1; 6823 return 1;
6907 else 6824 else
6908 return generic_drop_inode(inode); 6825 return generic_drop_inode(inode);
@@ -7011,16 +6928,17 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7011 u64 index = 0; 6928 u64 index = 0;
7012 u64 root_objectid; 6929 u64 root_objectid;
7013 int ret; 6930 int ret;
6931 u64 old_ino = btrfs_ino(old_inode);
7014 6932
7015 if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) 6933 if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
7016 return -EPERM; 6934 return -EPERM;
7017 6935
7018 /* we only allow rename subvolume link between subvolumes */ 6936 /* we only allow rename subvolume link between subvolumes */
7019 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) 6937 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
7020 return -EXDEV; 6938 return -EXDEV;
7021 6939
7022 if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || 6940 if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
7023 (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) 6941 (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID))
7024 return -ENOTEMPTY; 6942 return -ENOTEMPTY;
7025 6943
7026 if (S_ISDIR(old_inode->i_mode) && new_inode && 6944 if (S_ISDIR(old_inode->i_mode) && new_inode &&
@@ -7036,7 +6954,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7036 filemap_flush(old_inode->i_mapping); 6954 filemap_flush(old_inode->i_mapping);
7037 6955
7038 /* close the racy window with snapshot create/destroy ioctl */ 6956 /* close the racy window with snapshot create/destroy ioctl */
7039 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 6957 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
7040 down_read(&root->fs_info->subvol_sem); 6958 down_read(&root->fs_info->subvol_sem);
7041 /* 6959 /*
7042 * We want to reserve the absolute worst case amount of items. So if 6960 * We want to reserve the absolute worst case amount of items. So if
@@ -7059,15 +6977,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7059 if (ret) 6977 if (ret)
7060 goto out_fail; 6978 goto out_fail;
7061 6979
7062 if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 6980 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
7063 /* force full log commit if subvolume involved. */ 6981 /* force full log commit if subvolume involved. */
7064 root->fs_info->last_trans_log_full_commit = trans->transid; 6982 root->fs_info->last_trans_log_full_commit = trans->transid;
7065 } else { 6983 } else {
7066 ret = btrfs_insert_inode_ref(trans, dest, 6984 ret = btrfs_insert_inode_ref(trans, dest,
7067 new_dentry->d_name.name, 6985 new_dentry->d_name.name,
7068 new_dentry->d_name.len, 6986 new_dentry->d_name.len,
7069 old_inode->i_ino, 6987 old_ino,
7070 new_dir->i_ino, index); 6988 btrfs_ino(new_dir), index);
7071 if (ret) 6989 if (ret)
7072 goto out_fail; 6990 goto out_fail;
7073 /* 6991 /*
@@ -7083,10 +7001,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7083 * make sure the inode gets flushed if it is replacing 7001 * make sure the inode gets flushed if it is replacing
7084 * something. 7002 * something.
7085 */ 7003 */
7086 if (new_inode && new_inode->i_size && 7004 if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
7087 old_inode && S_ISREG(old_inode->i_mode)) {
7088 btrfs_add_ordered_operation(trans, root, old_inode); 7005 btrfs_add_ordered_operation(trans, root, old_inode);
7089 }
7090 7006
7091 old_dir->i_ctime = old_dir->i_mtime = ctime; 7007 old_dir->i_ctime = old_dir->i_mtime = ctime;
7092 new_dir->i_ctime = new_dir->i_mtime = ctime; 7008 new_dir->i_ctime = new_dir->i_mtime = ctime;
@@ -7095,7 +7011,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7095 if (old_dentry->d_parent != new_dentry->d_parent) 7011 if (old_dentry->d_parent != new_dentry->d_parent)
7096 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); 7012 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
7097 7013
7098 if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 7014 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
7099 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; 7015 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
7100 ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, 7016 ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
7101 old_dentry->d_name.name, 7017 old_dentry->d_name.name,
@@ -7112,7 +7028,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7112 7028
7113 if (new_inode) { 7029 if (new_inode) {
7114 new_inode->i_ctime = CURRENT_TIME; 7030 new_inode->i_ctime = CURRENT_TIME;
7115 if (unlikely(new_inode->i_ino == 7031 if (unlikely(btrfs_ino(new_inode) ==
7116 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 7032 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
7117 root_objectid = BTRFS_I(new_inode)->location.objectid; 7033 root_objectid = BTRFS_I(new_inode)->location.objectid;
7118 ret = btrfs_unlink_subvol(trans, dest, new_dir, 7034 ret = btrfs_unlink_subvol(trans, dest, new_dir,
@@ -7140,7 +7056,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7140 new_dentry->d_name.len, 0, index); 7056 new_dentry->d_name.len, 0, index);
7141 BUG_ON(ret); 7057 BUG_ON(ret);
7142 7058
7143 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 7059 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
7144 struct dentry *parent = dget_parent(new_dentry); 7060 struct dentry *parent = dget_parent(new_dentry);
7145 btrfs_log_new_name(trans, old_inode, old_dir, parent); 7061 btrfs_log_new_name(trans, old_inode, old_dir, parent);
7146 dput(parent); 7062 dput(parent);
@@ -7149,7 +7065,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7149out_fail: 7065out_fail:
7150 btrfs_end_transaction_throttle(trans, root); 7066 btrfs_end_transaction_throttle(trans, root);
7151out_notrans: 7067out_notrans:
7152 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 7068 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
7153 up_read(&root->fs_info->subvol_sem); 7069 up_read(&root->fs_info->subvol_sem);
7154 7070
7155 return ret; 7071 return ret;
@@ -7203,58 +7119,6 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7203 return 0; 7119 return 0;
7204} 7120}
7205 7121
7206int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
7207 int sync)
7208{
7209 struct btrfs_inode *binode;
7210 struct inode *inode = NULL;
7211
7212 spin_lock(&root->fs_info->delalloc_lock);
7213 while (!list_empty(&root->fs_info->delalloc_inodes)) {
7214 binode = list_entry(root->fs_info->delalloc_inodes.next,
7215 struct btrfs_inode, delalloc_inodes);
7216 inode = igrab(&binode->vfs_inode);
7217 if (inode) {
7218 list_move_tail(&binode->delalloc_inodes,
7219 &root->fs_info->delalloc_inodes);
7220 break;
7221 }
7222
7223 list_del_init(&binode->delalloc_inodes);
7224 cond_resched_lock(&root->fs_info->delalloc_lock);
7225 }
7226 spin_unlock(&root->fs_info->delalloc_lock);
7227
7228 if (inode) {
7229 if (sync) {
7230 filemap_write_and_wait(inode->i_mapping);
7231 /*
7232 * We have to do this because compression doesn't
7233 * actually set PG_writeback until it submits the pages
7234 * for IO, which happens in an async thread, so we could
7235 * race and not actually wait for any writeback pages
7236 * because they've not been submitted yet. Technically
7237 * this could still be the case for the ordered stuff
7238 * since the async thread may not have started to do its
7239 * work yet. If this becomes the case then we need to
7240 * figure out a way to make sure that in writepage we
7241 * wait for any async pages to be submitted before
7242 * returning so that fdatawait does what its supposed to
7243 * do.
7244 */
7245 btrfs_wait_ordered_range(inode, 0, (u64)-1);
7246 } else {
7247 filemap_flush(inode->i_mapping);
7248 }
7249 if (delay_iput)
7250 btrfs_add_delayed_iput(inode);
7251 else
7252 iput(inode);
7253 return 1;
7254 }
7255 return 0;
7256}
7257
7258static int btrfs_symlink(struct inode *dir, struct dentry *dentry, 7122static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7259 const char *symname) 7123 const char *symname)
7260{ 7124{
@@ -7278,9 +7142,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7278 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) 7142 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
7279 return -ENAMETOOLONG; 7143 return -ENAMETOOLONG;
7280 7144
7281 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
7282 if (err)
7283 return err;
7284 /* 7145 /*
7285 * 2 items for inode item and ref 7146 * 2 items for inode item and ref
7286 * 2 items for dir items 7147 * 2 items for dir items
@@ -7290,8 +7151,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7290 if (IS_ERR(trans)) 7151 if (IS_ERR(trans))
7291 return PTR_ERR(trans); 7152 return PTR_ERR(trans);
7292 7153
7154 err = btrfs_find_free_ino(root, &objectid);
7155 if (err)
7156 goto out_unlock;
7157
7293 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 7158 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
7294 dentry->d_name.len, dir->i_ino, objectid, 7159 dentry->d_name.len, btrfs_ino(dir), objectid,
7295 S_IFLNK|S_IRWXUGO, &index); 7160 S_IFLNK|S_IRWXUGO, &index);
7296 if (IS_ERR(inode)) { 7161 if (IS_ERR(inode)) {
7297 err = PTR_ERR(inode); 7162 err = PTR_ERR(inode);
@@ -7319,7 +7184,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7319 7184
7320 path = btrfs_alloc_path(); 7185 path = btrfs_alloc_path();
7321 BUG_ON(!path); 7186 BUG_ON(!path);
7322 key.objectid = inode->i_ino; 7187 key.objectid = btrfs_ino(inode);
7323 key.offset = 0; 7188 key.offset = 0;
7324 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 7189 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
7325 datasize = btrfs_file_extent_calc_inline_size(name_len); 7190 datasize = btrfs_file_extent_calc_inline_size(name_len);
@@ -7327,6 +7192,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7327 datasize); 7192 datasize);
7328 if (err) { 7193 if (err) {
7329 drop_inode = 1; 7194 drop_inode = 1;
7195 btrfs_free_path(path);
7330 goto out_unlock; 7196 goto out_unlock;
7331 } 7197 }
7332 leaf = path->nodes[0]; 7198 leaf = path->nodes[0];
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8e90ccf4b76a..74c80595d707 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -50,6 +50,7 @@
50#include "print-tree.h" 50#include "print-tree.h"
51#include "volumes.h" 51#include "volumes.h"
52#include "locking.h" 52#include "locking.h"
53#include "inode-map.h"
53 54
54/* Mask out flags that are inappropriate for the given type of inode. */ 55/* Mask out flags that are inappropriate for the given type of inode. */
55static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 56static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -281,8 +282,9 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
281 if (!capable(CAP_SYS_ADMIN)) 282 if (!capable(CAP_SYS_ADMIN))
282 return -EPERM; 283 return -EPERM;
283 284
284 mutex_lock(&fs_info->fs_devices->device_list_mutex); 285 rcu_read_lock();
285 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { 286 list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
287 dev_list) {
286 if (!device->bdev) 288 if (!device->bdev)
287 continue; 289 continue;
288 q = bdev_get_queue(device->bdev); 290 q = bdev_get_queue(device->bdev);
@@ -292,7 +294,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
292 minlen); 294 minlen);
293 } 295 }
294 } 296 }
295 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 297 rcu_read_unlock();
296 if (!num_devices) 298 if (!num_devices)
297 return -EOPNOTSUPP; 299 return -EOPNOTSUPP;
298 300
@@ -329,8 +331,7 @@ static noinline int create_subvol(struct btrfs_root *root,
329 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 331 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
330 u64 index = 0; 332 u64 index = 0;
331 333
332 ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, 334 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
333 0, &objectid);
334 if (ret) { 335 if (ret) {
335 dput(parent); 336 dput(parent);
336 return ret; 337 return ret;
@@ -421,7 +422,7 @@ static noinline int create_subvol(struct btrfs_root *root,
421 BUG_ON(ret); 422 BUG_ON(ret);
422 423
423 ret = btrfs_insert_dir_item(trans, root, 424 ret = btrfs_insert_dir_item(trans, root,
424 name, namelen, dir->i_ino, &key, 425 name, namelen, dir, &key,
425 BTRFS_FT_DIR, index); 426 BTRFS_FT_DIR, index);
426 if (ret) 427 if (ret)
427 goto fail; 428 goto fail;
@@ -432,7 +433,7 @@ static noinline int create_subvol(struct btrfs_root *root,
432 433
433 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 434 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
434 objectid, root->root_key.objectid, 435 objectid, root->root_key.objectid,
435 dir->i_ino, index, name, namelen); 436 btrfs_ino(dir), index, name, namelen);
436 437
437 BUG_ON(ret); 438 BUG_ON(ret);
438 439
@@ -654,6 +655,106 @@ out_unlock:
654 return error; 655 return error;
655} 656}
656 657
658/*
659 * When we're defragging a range, we don't want to kick it off again
660 * if it is really just waiting for delalloc to send it down.
661 * If we find a nice big extent or delalloc range for the bytes in the
662 * file you want to defrag, we return 0 to let you know to skip this
663 * part of the file
664 */
665static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh)
666{
667 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
668 struct extent_map *em = NULL;
669 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
670 u64 end;
671
672 read_lock(&em_tree->lock);
673 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
674 read_unlock(&em_tree->lock);
675
676 if (em) {
677 end = extent_map_end(em);
678 free_extent_map(em);
679 if (end - offset > thresh)
680 return 0;
681 }
682 /* if we already have a nice delalloc here, just stop */
683 thresh /= 2;
684 end = count_range_bits(io_tree, &offset, offset + thresh,
685 thresh, EXTENT_DELALLOC, 1);
686 if (end >= thresh)
687 return 0;
688 return 1;
689}
690
691/*
692 * helper function to walk through a file and find extents
693 * newer than a specific transid, and smaller than thresh.
694 *
695 * This is used by the defragging code to find new and small
696 * extents
697 */
698static int find_new_extents(struct btrfs_root *root,
699 struct inode *inode, u64 newer_than,
700 u64 *off, int thresh)
701{
702 struct btrfs_path *path;
703 struct btrfs_key min_key;
704 struct btrfs_key max_key;
705 struct extent_buffer *leaf;
706 struct btrfs_file_extent_item *extent;
707 int type;
708 int ret;
709
710 path = btrfs_alloc_path();
711 if (!path)
712 return -ENOMEM;
713
714 min_key.objectid = inode->i_ino;
715 min_key.type = BTRFS_EXTENT_DATA_KEY;
716 min_key.offset = *off;
717
718 max_key.objectid = inode->i_ino;
719 max_key.type = (u8)-1;
720 max_key.offset = (u64)-1;
721
722 path->keep_locks = 1;
723
724 while(1) {
725 ret = btrfs_search_forward(root, &min_key, &max_key,
726 path, 0, newer_than);
727 if (ret != 0)
728 goto none;
729 if (min_key.objectid != inode->i_ino)
730 goto none;
731 if (min_key.type != BTRFS_EXTENT_DATA_KEY)
732 goto none;
733
734 leaf = path->nodes[0];
735 extent = btrfs_item_ptr(leaf, path->slots[0],
736 struct btrfs_file_extent_item);
737
738 type = btrfs_file_extent_type(leaf, extent);
739 if (type == BTRFS_FILE_EXTENT_REG &&
740 btrfs_file_extent_num_bytes(leaf, extent) < thresh &&
741 check_defrag_in_cache(inode, min_key.offset, thresh)) {
742 *off = min_key.offset;
743 btrfs_free_path(path);
744 return 0;
745 }
746
747 if (min_key.offset == (u64)-1)
748 goto none;
749
750 min_key.offset++;
751 btrfs_release_path(path);
752 }
753none:
754 btrfs_free_path(path);
755 return -ENOENT;
756}
757
657static int should_defrag_range(struct inode *inode, u64 start, u64 len, 758static int should_defrag_range(struct inode *inode, u64 start, u64 len,
658 int thresh, u64 *last_len, u64 *skip, 759 int thresh, u64 *last_len, u64 *skip,
659 u64 *defrag_end) 760 u64 *defrag_end)
@@ -663,10 +764,6 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
663 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 764 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
664 int ret = 1; 765 int ret = 1;
665 766
666
667 if (thresh == 0)
668 thresh = 256 * 1024;
669
670 /* 767 /*
671 * make sure that once we start defragging and extent, we keep on 768 * make sure that once we start defragging and extent, we keep on
672 * defragging it 769 * defragging it
@@ -725,27 +822,176 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
725 return ret; 822 return ret;
726} 823}
727 824
728static int btrfs_defrag_file(struct file *file, 825/*
729 struct btrfs_ioctl_defrag_range_args *range) 826 * it doesn't do much good to defrag one or two pages
827 * at a time. This pulls in a nice chunk of pages
828 * to COW and defrag.
829 *
830 * It also makes sure the delalloc code has enough
831 * dirty data to avoid making new small extents as part
832 * of the defrag
833 *
834 * It's a good idea to start RA on this range
835 * before calling this.
836 */
837static int cluster_pages_for_defrag(struct inode *inode,
838 struct page **pages,
839 unsigned long start_index,
840 int num_pages)
730{ 841{
731 struct inode *inode = fdentry(file)->d_inode; 842 unsigned long file_end;
732 struct btrfs_root *root = BTRFS_I(inode)->root; 843 u64 isize = i_size_read(inode);
733 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 844 u64 page_start;
845 u64 page_end;
846 int ret;
847 int i;
848 int i_done;
734 struct btrfs_ordered_extent *ordered; 849 struct btrfs_ordered_extent *ordered;
735 struct page *page; 850 struct extent_state *cached_state = NULL;
851
852 if (isize == 0)
853 return 0;
854 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
855
856 ret = btrfs_delalloc_reserve_space(inode,
857 num_pages << PAGE_CACHE_SHIFT);
858 if (ret)
859 return ret;
860again:
861 ret = 0;
862 i_done = 0;
863
864 /* step one, lock all the pages */
865 for (i = 0; i < num_pages; i++) {
866 struct page *page;
867 page = grab_cache_page(inode->i_mapping,
868 start_index + i);
869 if (!page)
870 break;
871
872 if (!PageUptodate(page)) {
873 btrfs_readpage(NULL, page);
874 lock_page(page);
875 if (!PageUptodate(page)) {
876 unlock_page(page);
877 page_cache_release(page);
878 ret = -EIO;
879 break;
880 }
881 }
882 isize = i_size_read(inode);
883 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
884 if (!isize || page->index > file_end ||
885 page->mapping != inode->i_mapping) {
886 /* whoops, we blew past eof, skip this page */
887 unlock_page(page);
888 page_cache_release(page);
889 break;
890 }
891 pages[i] = page;
892 i_done++;
893 }
894 if (!i_done || ret)
895 goto out;
896
897 if (!(inode->i_sb->s_flags & MS_ACTIVE))
898 goto out;
899
900 /*
901 * so now we have a nice long stream of locked
902 * and up to date pages, lets wait on them
903 */
904 for (i = 0; i < i_done; i++)
905 wait_on_page_writeback(pages[i]);
906
907 page_start = page_offset(pages[0]);
908 page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
909
910 lock_extent_bits(&BTRFS_I(inode)->io_tree,
911 page_start, page_end - 1, 0, &cached_state,
912 GFP_NOFS);
913 ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1);
914 if (ordered &&
915 ordered->file_offset + ordered->len > page_start &&
916 ordered->file_offset < page_end) {
917 btrfs_put_ordered_extent(ordered);
918 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
919 page_start, page_end - 1,
920 &cached_state, GFP_NOFS);
921 for (i = 0; i < i_done; i++) {
922 unlock_page(pages[i]);
923 page_cache_release(pages[i]);
924 }
925 btrfs_wait_ordered_range(inode, page_start,
926 page_end - page_start);
927 goto again;
928 }
929 if (ordered)
930 btrfs_put_ordered_extent(ordered);
931
932 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
933 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
934 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
935 GFP_NOFS);
936
937 if (i_done != num_pages) {
938 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
939 btrfs_delalloc_release_space(inode,
940 (num_pages - i_done) << PAGE_CACHE_SHIFT);
941 }
942
943
944 btrfs_set_extent_delalloc(inode, page_start, page_end - 1,
945 &cached_state);
946
947 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
948 page_start, page_end - 1, &cached_state,
949 GFP_NOFS);
950
951 for (i = 0; i < i_done; i++) {
952 clear_page_dirty_for_io(pages[i]);
953 ClearPageChecked(pages[i]);
954 set_page_extent_mapped(pages[i]);
955 set_page_dirty(pages[i]);
956 unlock_page(pages[i]);
957 page_cache_release(pages[i]);
958 }
959 return i_done;
960out:
961 for (i = 0; i < i_done; i++) {
962 unlock_page(pages[i]);
963 page_cache_release(pages[i]);
964 }
965 btrfs_delalloc_release_space(inode, num_pages << PAGE_CACHE_SHIFT);
966 return ret;
967
968}
969
970int btrfs_defrag_file(struct inode *inode, struct file *file,
971 struct btrfs_ioctl_defrag_range_args *range,
972 u64 newer_than, unsigned long max_to_defrag)
973{
974 struct btrfs_root *root = BTRFS_I(inode)->root;
736 struct btrfs_super_block *disk_super; 975 struct btrfs_super_block *disk_super;
976 struct file_ra_state *ra = NULL;
737 unsigned long last_index; 977 unsigned long last_index;
738 unsigned long ra_pages = root->fs_info->bdi.ra_pages;
739 unsigned long total_read = 0;
740 u64 features; 978 u64 features;
741 u64 page_start;
742 u64 page_end;
743 u64 last_len = 0; 979 u64 last_len = 0;
744 u64 skip = 0; 980 u64 skip = 0;
745 u64 defrag_end = 0; 981 u64 defrag_end = 0;
982 u64 newer_off = range->start;
983 int newer_left = 0;
746 unsigned long i; 984 unsigned long i;
747 int ret; 985 int ret;
986 int defrag_count = 0;
748 int compress_type = BTRFS_COMPRESS_ZLIB; 987 int compress_type = BTRFS_COMPRESS_ZLIB;
988 int extent_thresh = range->extent_thresh;
989 int newer_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
990 u64 new_align = ~((u64)128 * 1024 - 1);
991 struct page **pages = NULL;
992
993 if (extent_thresh == 0)
994 extent_thresh = 256 * 1024;
749 995
750 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 996 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
751 if (range->compress_type > BTRFS_COMPRESS_TYPES) 997 if (range->compress_type > BTRFS_COMPRESS_TYPES)
@@ -757,6 +1003,27 @@ static int btrfs_defrag_file(struct file *file,
757 if (inode->i_size == 0) 1003 if (inode->i_size == 0)
758 return 0; 1004 return 0;
759 1005
1006 /*
1007 * if we were not given a file, allocate a readahead
1008 * context
1009 */
1010 if (!file) {
1011 ra = kzalloc(sizeof(*ra), GFP_NOFS);
1012 if (!ra)
1013 return -ENOMEM;
1014 file_ra_state_init(ra, inode->i_mapping);
1015 } else {
1016 ra = &file->f_ra;
1017 }
1018
1019 pages = kmalloc(sizeof(struct page *) * newer_cluster,
1020 GFP_NOFS);
1021 if (!pages) {
1022 ret = -ENOMEM;
1023 goto out_ra;
1024 }
1025
1026 /* find the last page to defrag */
760 if (range->start + range->len > range->start) { 1027 if (range->start + range->len > range->start) {
761 last_index = min_t(u64, inode->i_size - 1, 1028 last_index = min_t(u64, inode->i_size - 1,
762 range->start + range->len - 1) >> PAGE_CACHE_SHIFT; 1029 range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
@@ -764,11 +1031,37 @@ static int btrfs_defrag_file(struct file *file,
764 last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; 1031 last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
765 } 1032 }
766 1033
767 i = range->start >> PAGE_CACHE_SHIFT; 1034 if (newer_than) {
768 while (i <= last_index) { 1035 ret = find_new_extents(root, inode, newer_than,
769 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1036 &newer_off, 64 * 1024);
1037 if (!ret) {
1038 range->start = newer_off;
1039 /*
1040 * we always align our defrag to help keep
1041 * the extents in the file evenly spaced
1042 */
1043 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
1044 newer_left = newer_cluster;
1045 } else
1046 goto out_ra;
1047 } else {
1048 i = range->start >> PAGE_CACHE_SHIFT;
1049 }
1050 if (!max_to_defrag)
1051 max_to_defrag = last_index - 1;
1052
1053 while (i <= last_index && defrag_count < max_to_defrag) {
1054 /*
1055 * make sure we stop running if someone unmounts
1056 * the FS
1057 */
1058 if (!(inode->i_sb->s_flags & MS_ACTIVE))
1059 break;
1060
1061 if (!newer_than &&
1062 !should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
770 PAGE_CACHE_SIZE, 1063 PAGE_CACHE_SIZE,
771 range->extent_thresh, 1064 extent_thresh,
772 &last_len, &skip, 1065 &last_len, &skip,
773 &defrag_end)) { 1066 &defrag_end)) {
774 unsigned long next; 1067 unsigned long next;
@@ -780,92 +1073,39 @@ static int btrfs_defrag_file(struct file *file,
780 i = max(i + 1, next); 1073 i = max(i + 1, next);
781 continue; 1074 continue;
782 } 1075 }
783
784 if (total_read % ra_pages == 0) {
785 btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
786 min(last_index, i + ra_pages - 1));
787 }
788 total_read++;
789 mutex_lock(&inode->i_mutex);
790 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 1076 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
791 BTRFS_I(inode)->force_compress = compress_type; 1077 BTRFS_I(inode)->force_compress = compress_type;
792 1078
793 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 1079 btrfs_force_ra(inode->i_mapping, ra, file, i, newer_cluster);
794 if (ret)
795 goto err_unlock;
796again:
797 if (inode->i_size == 0 ||
798 i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
799 ret = 0;
800 goto err_reservations;
801 }
802 1080
803 page = grab_cache_page(inode->i_mapping, i); 1081 ret = cluster_pages_for_defrag(inode, pages, i, newer_cluster);
804 if (!page) { 1082 if (ret < 0)
805 ret = -ENOMEM; 1083 goto out_ra;
806 goto err_reservations;
807 }
808
809 if (!PageUptodate(page)) {
810 btrfs_readpage(NULL, page);
811 lock_page(page);
812 if (!PageUptodate(page)) {
813 unlock_page(page);
814 page_cache_release(page);
815 ret = -EIO;
816 goto err_reservations;
817 }
818 }
819
820 if (page->mapping != inode->i_mapping) {
821 unlock_page(page);
822 page_cache_release(page);
823 goto again;
824 }
825 1084
826 wait_on_page_writeback(page); 1085 defrag_count += ret;
1086 balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret);
1087 i += ret;
827 1088
828 if (PageDirty(page)) { 1089 if (newer_than) {
829 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 1090 if (newer_off == (u64)-1)
830 goto loop_unlock; 1091 break;
831 }
832
833 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
834 page_end = page_start + PAGE_CACHE_SIZE - 1;
835 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
836 1092
837 ordered = btrfs_lookup_ordered_extent(inode, page_start); 1093 newer_off = max(newer_off + 1,
838 if (ordered) { 1094 (u64)i << PAGE_CACHE_SHIFT);
839 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 1095
840 unlock_page(page); 1096 ret = find_new_extents(root, inode,
841 page_cache_release(page); 1097 newer_than, &newer_off,
842 btrfs_start_ordered_extent(inode, ordered, 1); 1098 64 * 1024);
843 btrfs_put_ordered_extent(ordered); 1099 if (!ret) {
844 goto again; 1100 range->start = newer_off;
1101 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
1102 newer_left = newer_cluster;
1103 } else {
1104 break;
1105 }
1106 } else {
1107 i++;
845 } 1108 }
846 set_page_extent_mapped(page);
847
848 /*
849 * this makes sure page_mkwrite is called on the
850 * page if it is dirtied again later
851 */
852 clear_page_dirty_for_io(page);
853 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start,
854 page_end, EXTENT_DIRTY | EXTENT_DELALLOC |
855 EXTENT_DO_ACCOUNTING, GFP_NOFS);
856
857 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
858 ClearPageChecked(page);
859 set_page_dirty(page);
860 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
861
862loop_unlock:
863 unlock_page(page);
864 page_cache_release(page);
865 mutex_unlock(&inode->i_mutex);
866
867 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
868 i++;
869 } 1109 }
870 1110
871 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) 1111 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO))
@@ -897,12 +1137,14 @@ loop_unlock:
897 btrfs_set_super_incompat_flags(disk_super, features); 1137 btrfs_set_super_incompat_flags(disk_super, features);
898 } 1138 }
899 1139
900 return 0; 1140 if (!file)
1141 kfree(ra);
1142 return defrag_count;
901 1143
902err_reservations: 1144out_ra:
903 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 1145 if (!file)
904err_unlock: 1146 kfree(ra);
905 mutex_unlock(&inode->i_mutex); 1147 kfree(pages);
906 return ret; 1148 return ret;
907} 1149}
908 1150
@@ -1128,7 +1370,7 @@ static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
1128 int ret = 0; 1370 int ret = 0;
1129 u64 flags = 0; 1371 u64 flags = 0;
1130 1372
1131 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1373 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
1132 return -EINVAL; 1374 return -EINVAL;
1133 1375
1134 down_read(&root->fs_info->subvol_sem); 1376 down_read(&root->fs_info->subvol_sem);
@@ -1155,7 +1397,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1155 if (root->fs_info->sb->s_flags & MS_RDONLY) 1397 if (root->fs_info->sb->s_flags & MS_RDONLY)
1156 return -EROFS; 1398 return -EROFS;
1157 1399
1158 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1400 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
1159 return -EINVAL; 1401 return -EINVAL;
1160 1402
1161 if (copy_from_user(&flags, arg, sizeof(flags))) 1403 if (copy_from_user(&flags, arg, sizeof(flags)))
@@ -1278,7 +1520,6 @@ static noinline int copy_to_sk(struct btrfs_root *root,
1278 int nritems; 1520 int nritems;
1279 int i; 1521 int i;
1280 int slot; 1522 int slot;
1281 int found = 0;
1282 int ret = 0; 1523 int ret = 0;
1283 1524
1284 leaf = path->nodes[0]; 1525 leaf = path->nodes[0];
@@ -1325,7 +1566,7 @@ static noinline int copy_to_sk(struct btrfs_root *root,
1325 item_off, item_len); 1566 item_off, item_len);
1326 *sk_offset += item_len; 1567 *sk_offset += item_len;
1327 } 1568 }
1328 found++; 1569 (*num_found)++;
1329 1570
1330 if (*num_found >= sk->nr_items) 1571 if (*num_found >= sk->nr_items)
1331 break; 1572 break;
@@ -1344,7 +1585,6 @@ advance_key:
1344 } else 1585 } else
1345 ret = 1; 1586 ret = 1;
1346overflow: 1587overflow:
1347 *num_found += found;
1348 return ret; 1588 return ret;
1349} 1589}
1350 1590
@@ -1401,7 +1641,7 @@ static noinline int search_ioctl(struct inode *inode,
1401 } 1641 }
1402 ret = copy_to_sk(root, path, &key, sk, args->buf, 1642 ret = copy_to_sk(root, path, &key, sk, args->buf,
1403 &sk_offset, &num_found); 1643 &sk_offset, &num_found);
1404 btrfs_release_path(root, path); 1644 btrfs_release_path(path);
1405 if (ret || num_found >= sk->nr_items) 1645 if (ret || num_found >= sk->nr_items)
1406 break; 1646 break;
1407 1647
@@ -1508,7 +1748,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1508 if (key.offset == BTRFS_FIRST_FREE_OBJECTID) 1748 if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
1509 break; 1749 break;
1510 1750
1511 btrfs_release_path(root, path); 1751 btrfs_release_path(path);
1512 key.objectid = key.offset; 1752 key.objectid = key.offset;
1513 key.offset = (u64)-1; 1753 key.offset = (u64)-1;
1514 dirid = key.objectid; 1754 dirid = key.objectid;
@@ -1638,7 +1878,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1638 goto out_dput; 1878 goto out_dput;
1639 } 1879 }
1640 1880
1641 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 1881 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
1642 err = -EINVAL; 1882 err = -EINVAL;
1643 goto out_dput; 1883 goto out_dput;
1644 } 1884 }
@@ -1756,7 +1996,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
1756 /* the rest are all set to zero by kzalloc */ 1996 /* the rest are all set to zero by kzalloc */
1757 range->len = (u64)-1; 1997 range->len = (u64)-1;
1758 } 1998 }
1759 ret = btrfs_defrag_file(file, range); 1999 ret = btrfs_defrag_file(fdentry(file)->d_inode, file,
2000 range, 0, 0);
2001 if (ret > 0)
2002 ret = 0;
1760 kfree(range); 2003 kfree(range);
1761 break; 2004 break;
1762 default: 2005 default:
@@ -1808,6 +2051,75 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
1808 return ret; 2051 return ret;
1809} 2052}
1810 2053
2054static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
2055{
2056 struct btrfs_ioctl_fs_info_args fi_args;
2057 struct btrfs_device *device;
2058 struct btrfs_device *next;
2059 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2060
2061 if (!capable(CAP_SYS_ADMIN))
2062 return -EPERM;
2063
2064 fi_args.num_devices = fs_devices->num_devices;
2065 fi_args.max_id = 0;
2066 memcpy(&fi_args.fsid, root->fs_info->fsid, sizeof(fi_args.fsid));
2067
2068 mutex_lock(&fs_devices->device_list_mutex);
2069 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
2070 if (device->devid > fi_args.max_id)
2071 fi_args.max_id = device->devid;
2072 }
2073 mutex_unlock(&fs_devices->device_list_mutex);
2074
2075 if (copy_to_user(arg, &fi_args, sizeof(fi_args)))
2076 return -EFAULT;
2077
2078 return 0;
2079}
2080
2081static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2082{
2083 struct btrfs_ioctl_dev_info_args *di_args;
2084 struct btrfs_device *dev;
2085 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2086 int ret = 0;
2087 char *s_uuid = NULL;
2088 char empty_uuid[BTRFS_UUID_SIZE] = {0};
2089
2090 if (!capable(CAP_SYS_ADMIN))
2091 return -EPERM;
2092
2093 di_args = memdup_user(arg, sizeof(*di_args));
2094 if (IS_ERR(di_args))
2095 return PTR_ERR(di_args);
2096
2097 if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0)
2098 s_uuid = di_args->uuid;
2099
2100 mutex_lock(&fs_devices->device_list_mutex);
2101 dev = btrfs_find_device(root, di_args->devid, s_uuid, NULL);
2102 mutex_unlock(&fs_devices->device_list_mutex);
2103
2104 if (!dev) {
2105 ret = -ENODEV;
2106 goto out;
2107 }
2108
2109 di_args->devid = dev->devid;
2110 di_args->bytes_used = dev->bytes_used;
2111 di_args->total_bytes = dev->total_bytes;
2112 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2113 strncpy(di_args->path, dev->name, sizeof(di_args->path));
2114
2115out:
2116 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
2117 ret = -EFAULT;
2118
2119 kfree(di_args);
2120 return ret;
2121}
2122
1811static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 2123static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1812 u64 off, u64 olen, u64 destoff) 2124 u64 off, u64 olen, u64 destoff)
1813{ 2125{
@@ -1924,7 +2236,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1924 } 2236 }
1925 2237
1926 /* clone data */ 2238 /* clone data */
1927 key.objectid = src->i_ino; 2239 key.objectid = btrfs_ino(src);
1928 key.type = BTRFS_EXTENT_DATA_KEY; 2240 key.type = BTRFS_EXTENT_DATA_KEY;
1929 key.offset = 0; 2241 key.offset = 0;
1930 2242
@@ -1951,7 +2263,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1951 2263
1952 btrfs_item_key_to_cpu(leaf, &key, slot); 2264 btrfs_item_key_to_cpu(leaf, &key, slot);
1953 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 2265 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
1954 key.objectid != src->i_ino) 2266 key.objectid != btrfs_ino(src))
1955 break; 2267 break;
1956 2268
1957 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { 2269 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
@@ -1987,14 +2299,14 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1987 datal = btrfs_file_extent_ram_bytes(leaf, 2299 datal = btrfs_file_extent_ram_bytes(leaf,
1988 extent); 2300 extent);
1989 } 2301 }
1990 btrfs_release_path(root, path); 2302 btrfs_release_path(path);
1991 2303
1992 if (key.offset + datal <= off || 2304 if (key.offset + datal <= off ||
1993 key.offset >= off+len) 2305 key.offset >= off+len)
1994 goto next; 2306 goto next;
1995 2307
1996 memcpy(&new_key, &key, sizeof(new_key)); 2308 memcpy(&new_key, &key, sizeof(new_key));
1997 new_key.objectid = inode->i_ino; 2309 new_key.objectid = btrfs_ino(inode);
1998 if (off <= key.offset) 2310 if (off <= key.offset)
1999 new_key.offset = key.offset + destoff - off; 2311 new_key.offset = key.offset + destoff - off;
2000 else 2312 else
@@ -2048,7 +2360,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2048 ret = btrfs_inc_extent_ref(trans, root, 2360 ret = btrfs_inc_extent_ref(trans, root,
2049 disko, diskl, 0, 2361 disko, diskl, 0,
2050 root->root_key.objectid, 2362 root->root_key.objectid,
2051 inode->i_ino, 2363 btrfs_ino(inode),
2052 new_key.offset - datao); 2364 new_key.offset - datao);
2053 BUG_ON(ret); 2365 BUG_ON(ret);
2054 } 2366 }
@@ -2097,7 +2409,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2097 } 2409 }
2098 2410
2099 btrfs_mark_buffer_dirty(leaf); 2411 btrfs_mark_buffer_dirty(leaf);
2100 btrfs_release_path(root, path); 2412 btrfs_release_path(path);
2101 2413
2102 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2414 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2103 2415
@@ -2118,12 +2430,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2118 btrfs_end_transaction(trans, root); 2430 btrfs_end_transaction(trans, root);
2119 } 2431 }
2120next: 2432next:
2121 btrfs_release_path(root, path); 2433 btrfs_release_path(path);
2122 key.offset++; 2434 key.offset++;
2123 } 2435 }
2124 ret = 0; 2436 ret = 0;
2125out: 2437out:
2126 btrfs_release_path(root, path); 2438 btrfs_release_path(path);
2127 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 2439 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
2128out_unlock: 2440out_unlock:
2129 mutex_unlock(&src->i_mutex); 2441 mutex_unlock(&src->i_mutex);
@@ -2464,6 +2776,58 @@ static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
2464 return btrfs_wait_for_commit(root, transid); 2776 return btrfs_wait_for_commit(root, transid);
2465} 2777}
2466 2778
2779static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg)
2780{
2781 int ret;
2782 struct btrfs_ioctl_scrub_args *sa;
2783
2784 if (!capable(CAP_SYS_ADMIN))
2785 return -EPERM;
2786
2787 sa = memdup_user(arg, sizeof(*sa));
2788 if (IS_ERR(sa))
2789 return PTR_ERR(sa);
2790
2791 ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end,
2792 &sa->progress, sa->flags & BTRFS_SCRUB_READONLY);
2793
2794 if (copy_to_user(arg, sa, sizeof(*sa)))
2795 ret = -EFAULT;
2796
2797 kfree(sa);
2798 return ret;
2799}
2800
2801static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg)
2802{
2803 if (!capable(CAP_SYS_ADMIN))
2804 return -EPERM;
2805
2806 return btrfs_scrub_cancel(root);
2807}
2808
2809static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
2810 void __user *arg)
2811{
2812 struct btrfs_ioctl_scrub_args *sa;
2813 int ret;
2814
2815 if (!capable(CAP_SYS_ADMIN))
2816 return -EPERM;
2817
2818 sa = memdup_user(arg, sizeof(*sa));
2819 if (IS_ERR(sa))
2820 return PTR_ERR(sa);
2821
2822 ret = btrfs_scrub_progress(root, sa->devid, &sa->progress);
2823
2824 if (copy_to_user(arg, sa, sizeof(*sa)))
2825 ret = -EFAULT;
2826
2827 kfree(sa);
2828 return ret;
2829}
2830
2467long btrfs_ioctl(struct file *file, unsigned int 2831long btrfs_ioctl(struct file *file, unsigned int
2468 cmd, unsigned long arg) 2832 cmd, unsigned long arg)
2469{ 2833{
@@ -2503,6 +2867,10 @@ long btrfs_ioctl(struct file *file, unsigned int
2503 return btrfs_ioctl_add_dev(root, argp); 2867 return btrfs_ioctl_add_dev(root, argp);
2504 case BTRFS_IOC_RM_DEV: 2868 case BTRFS_IOC_RM_DEV:
2505 return btrfs_ioctl_rm_dev(root, argp); 2869 return btrfs_ioctl_rm_dev(root, argp);
2870 case BTRFS_IOC_FS_INFO:
2871 return btrfs_ioctl_fs_info(root, argp);
2872 case BTRFS_IOC_DEV_INFO:
2873 return btrfs_ioctl_dev_info(root, argp);
2506 case BTRFS_IOC_BALANCE: 2874 case BTRFS_IOC_BALANCE:
2507 return btrfs_balance(root->fs_info->dev_root); 2875 return btrfs_balance(root->fs_info->dev_root);
2508 case BTRFS_IOC_CLONE: 2876 case BTRFS_IOC_CLONE:
@@ -2526,6 +2894,12 @@ long btrfs_ioctl(struct file *file, unsigned int
2526 return btrfs_ioctl_start_sync(file, argp); 2894 return btrfs_ioctl_start_sync(file, argp);
2527 case BTRFS_IOC_WAIT_SYNC: 2895 case BTRFS_IOC_WAIT_SYNC:
2528 return btrfs_ioctl_wait_sync(file, argp); 2896 return btrfs_ioctl_wait_sync(file, argp);
2897 case BTRFS_IOC_SCRUB:
2898 return btrfs_ioctl_scrub(root, argp);
2899 case BTRFS_IOC_SCRUB_CANCEL:
2900 return btrfs_ioctl_scrub_cancel(root, argp);
2901 case BTRFS_IOC_SCRUB_PROGRESS:
2902 return btrfs_ioctl_scrub_progress(root, argp);
2529 } 2903 }
2530 2904
2531 return -ENOTTY; 2905 return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 8fb382167b13..ad1ea789fcb4 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -32,6 +32,8 @@ struct btrfs_ioctl_vol_args {
32 32
33#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) 33#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
34#define BTRFS_SUBVOL_RDONLY (1ULL << 1) 34#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
35#define BTRFS_FSID_SIZE 16
36#define BTRFS_UUID_SIZE 16
35 37
36#define BTRFS_SUBVOL_NAME_MAX 4039 38#define BTRFS_SUBVOL_NAME_MAX 4039
37struct btrfs_ioctl_vol_args_v2 { 39struct btrfs_ioctl_vol_args_v2 {
@@ -42,6 +44,71 @@ struct btrfs_ioctl_vol_args_v2 {
42 char name[BTRFS_SUBVOL_NAME_MAX + 1]; 44 char name[BTRFS_SUBVOL_NAME_MAX + 1];
43}; 45};
44 46
47/*
48 * structure to report errors and progress to userspace, either as a
49 * result of a finished scrub, a canceled scrub or a progress inquiry
50 */
51struct btrfs_scrub_progress {
52 __u64 data_extents_scrubbed; /* # of data extents scrubbed */
53 __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */
54 __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */
55 __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */
56 __u64 read_errors; /* # of read errors encountered (EIO) */
57 __u64 csum_errors; /* # of failed csum checks */
58 __u64 verify_errors; /* # of occurences, where the metadata
59 * of a tree block did not match the
60 * expected values, like generation or
61 * logical */
62 __u64 no_csum; /* # of 4k data block for which no csum
63 * is present, probably the result of
64 * data written with nodatasum */
65 __u64 csum_discards; /* # of csum for which no data was found
66 * in the extent tree. */
67 __u64 super_errors; /* # of bad super blocks encountered */
68 __u64 malloc_errors; /* # of internal kmalloc errors. These
69 * will likely cause an incomplete
70 * scrub */
71 __u64 uncorrectable_errors; /* # of errors where either no intact
72 * copy was found or the writeback
73 * failed */
74 __u64 corrected_errors; /* # of errors corrected */
75 __u64 last_physical; /* last physical address scrubbed. In
76 * case a scrub was aborted, this can
77 * be used to restart the scrub */
78 __u64 unverified_errors; /* # of occurences where a read for a
79 * full (64k) bio failed, but the re-
80 * check succeeded for each 4k piece.
81 * Intermittent error. */
82};
83
84#define BTRFS_SCRUB_READONLY 1
85struct btrfs_ioctl_scrub_args {
86 __u64 devid; /* in */
87 __u64 start; /* in */
88 __u64 end; /* in */
89 __u64 flags; /* in */
90 struct btrfs_scrub_progress progress; /* out */
91 /* pad to 1k */
92 __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8];
93};
94
95#define BTRFS_DEVICE_PATH_NAME_MAX 1024
96struct btrfs_ioctl_dev_info_args {
97 __u64 devid; /* in/out */
98 __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */
99 __u64 bytes_used; /* out */
100 __u64 total_bytes; /* out */
101 __u64 unused[379]; /* pad to 4k */
102 __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */
103};
104
105struct btrfs_ioctl_fs_info_args {
106 __u64 max_id; /* out */
107 __u64 num_devices; /* out */
108 __u8 fsid[BTRFS_FSID_SIZE]; /* out */
109 __u64 reserved[124]; /* pad to 1k */
110};
111
45#define BTRFS_INO_LOOKUP_PATH_MAX 4080 112#define BTRFS_INO_LOOKUP_PATH_MAX 4080
46struct btrfs_ioctl_ino_lookup_args { 113struct btrfs_ioctl_ino_lookup_args {
47 __u64 treeid; 114 __u64 treeid;
@@ -114,37 +181,6 @@ struct btrfs_ioctl_clone_range_args {
114#define BTRFS_DEFRAG_RANGE_COMPRESS 1 181#define BTRFS_DEFRAG_RANGE_COMPRESS 1
115#define BTRFS_DEFRAG_RANGE_START_IO 2 182#define BTRFS_DEFRAG_RANGE_START_IO 2
116 183
117struct btrfs_ioctl_defrag_range_args {
118 /* start of the defrag operation */
119 __u64 start;
120
121 /* number of bytes to defrag, use (u64)-1 to say all */
122 __u64 len;
123
124 /*
125 * flags for the operation, which can include turning
126 * on compression for this one defrag
127 */
128 __u64 flags;
129
130 /*
131 * any extent bigger than this will be considered
132 * already defragged. Use 0 to take the kernel default
133 * Use 1 to say every single extent must be rewritten
134 */
135 __u32 extent_thresh;
136
137 /*
138 * which compression method to use if turning on compression
139 * for this defrag operation. If unspecified, zlib will
140 * be used
141 */
142 __u32 compress_type;
143
144 /* spare for later */
145 __u32 unused[4];
146};
147
148struct btrfs_ioctl_space_info { 184struct btrfs_ioctl_space_info {
149 __u64 flags; 185 __u64 flags;
150 __u64 total_bytes; 186 __u64 total_bytes;
@@ -203,4 +239,13 @@ struct btrfs_ioctl_space_args {
203 struct btrfs_ioctl_vol_args_v2) 239 struct btrfs_ioctl_vol_args_v2)
204#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) 240#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
205#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) 241#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
242#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
243 struct btrfs_ioctl_scrub_args)
244#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28)
245#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \
246 struct btrfs_ioctl_scrub_args)
247#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \
248 struct btrfs_ioctl_dev_info_args)
249#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
250 struct btrfs_ioctl_fs_info_args)
206#endif 251#endif
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 6151f2ea38bb..66fa43dc3f0f 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -185,31 +185,6 @@ sleep:
185 return 0; 185 return 0;
186} 186}
187 187
188/*
189 * Very quick trylock, this does not spin or schedule. It returns
190 * 1 with the spinlock held if it was able to take the lock, or it
191 * returns zero if it was unable to take the lock.
192 *
193 * After this call, scheduling is not safe without first calling
194 * btrfs_set_lock_blocking()
195 */
196int btrfs_try_tree_lock(struct extent_buffer *eb)
197{
198 if (spin_trylock(&eb->lock)) {
199 if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
200 /*
201 * we've got the spinlock, but the real owner is
202 * blocking. Drop the spinlock and return failure
203 */
204 spin_unlock(&eb->lock);
205 return 0;
206 }
207 return 1;
208 }
209 /* someone else has the spinlock giveup */
210 return 0;
211}
212
213int btrfs_tree_unlock(struct extent_buffer *eb) 188int btrfs_tree_unlock(struct extent_buffer *eb)
214{ 189{
215 /* 190 /*
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 6c4ce457168c..5c33a560a2f1 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -21,8 +21,6 @@
21 21
22int btrfs_tree_lock(struct extent_buffer *eb); 22int btrfs_tree_lock(struct extent_buffer *eb);
23int btrfs_tree_unlock(struct extent_buffer *eb); 23int btrfs_tree_unlock(struct extent_buffer *eb);
24
25int btrfs_try_tree_lock(struct extent_buffer *eb);
26int btrfs_try_spin_lock(struct extent_buffer *eb); 24int btrfs_try_spin_lock(struct extent_buffer *eb);
27 25
28void btrfs_set_lock_blocking(struct extent_buffer *eb); 26void btrfs_set_lock_blocking(struct extent_buffer *eb);
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c
index a97314cf6bd6..82d569cb6267 100644
--- a/fs/btrfs/ref-cache.c
+++ b/fs/btrfs/ref-cache.c
@@ -23,56 +23,6 @@
23#include "ref-cache.h" 23#include "ref-cache.h"
24#include "transaction.h" 24#include "transaction.h"
25 25
26/*
27 * leaf refs are used to cache the information about which extents
28 * a given leaf has references on. This allows us to process that leaf
29 * in btrfs_drop_snapshot without needing to read it back from disk.
30 */
31
32/*
33 * kmalloc a leaf reference struct and update the counters for the
34 * total ref cache size
35 */
36struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
37 int nr_extents)
38{
39 struct btrfs_leaf_ref *ref;
40 size_t size = btrfs_leaf_ref_size(nr_extents);
41
42 ref = kmalloc(size, GFP_NOFS);
43 if (ref) {
44 spin_lock(&root->fs_info->ref_cache_lock);
45 root->fs_info->total_ref_cache_size += size;
46 spin_unlock(&root->fs_info->ref_cache_lock);
47
48 memset(ref, 0, sizeof(*ref));
49 atomic_set(&ref->usage, 1);
50 INIT_LIST_HEAD(&ref->list);
51 }
52 return ref;
53}
54
55/*
56 * free a leaf reference struct and update the counters for the
57 * total ref cache size
58 */
59void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
60{
61 if (!ref)
62 return;
63 WARN_ON(atomic_read(&ref->usage) == 0);
64 if (atomic_dec_and_test(&ref->usage)) {
65 size_t size = btrfs_leaf_ref_size(ref->nritems);
66
67 BUG_ON(ref->in_tree);
68 kfree(ref);
69
70 spin_lock(&root->fs_info->ref_cache_lock);
71 root->fs_info->total_ref_cache_size -= size;
72 spin_unlock(&root->fs_info->ref_cache_lock);
73 }
74}
75
76static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, 26static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
77 struct rb_node *node) 27 struct rb_node *node)
78{ 28{
@@ -116,117 +66,3 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
116 } 66 }
117 return NULL; 67 return NULL;
118} 68}
119
120int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
121 int shared)
122{
123 struct btrfs_leaf_ref *ref = NULL;
124 struct btrfs_leaf_ref_tree *tree = root->ref_tree;
125
126 if (shared)
127 tree = &root->fs_info->shared_ref_tree;
128 if (!tree)
129 return 0;
130
131 spin_lock(&tree->lock);
132 while (!list_empty(&tree->list)) {
133 ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list);
134 BUG_ON(ref->tree != tree);
135 if (ref->root_gen > max_root_gen)
136 break;
137 if (!xchg(&ref->in_tree, 0)) {
138 cond_resched_lock(&tree->lock);
139 continue;
140 }
141
142 rb_erase(&ref->rb_node, &tree->root);
143 list_del_init(&ref->list);
144
145 spin_unlock(&tree->lock);
146 btrfs_free_leaf_ref(root, ref);
147 cond_resched();
148 spin_lock(&tree->lock);
149 }
150 spin_unlock(&tree->lock);
151 return 0;
152}
153
154/*
155 * find the leaf ref for a given extent. This returns the ref struct with
156 * a usage reference incremented
157 */
158struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
159 u64 bytenr)
160{
161 struct rb_node *rb;
162 struct btrfs_leaf_ref *ref = NULL;
163 struct btrfs_leaf_ref_tree *tree = root->ref_tree;
164again:
165 if (tree) {
166 spin_lock(&tree->lock);
167 rb = tree_search(&tree->root, bytenr);
168 if (rb)
169 ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node);
170 if (ref)
171 atomic_inc(&ref->usage);
172 spin_unlock(&tree->lock);
173 if (ref)
174 return ref;
175 }
176 if (tree != &root->fs_info->shared_ref_tree) {
177 tree = &root->fs_info->shared_ref_tree;
178 goto again;
179 }
180 return NULL;
181}
182
183/*
184 * add a fully filled in leaf ref struct
185 * remove all the refs older than a given root generation
186 */
187int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
188 int shared)
189{
190 int ret = 0;
191 struct rb_node *rb;
192 struct btrfs_leaf_ref_tree *tree = root->ref_tree;
193
194 if (shared)
195 tree = &root->fs_info->shared_ref_tree;
196
197 spin_lock(&tree->lock);
198 rb = tree_insert(&tree->root, ref->bytenr, &ref->rb_node);
199 if (rb) {
200 ret = -EEXIST;
201 } else {
202 atomic_inc(&ref->usage);
203 ref->tree = tree;
204 ref->in_tree = 1;
205 list_add_tail(&ref->list, &tree->list);
206 }
207 spin_unlock(&tree->lock);
208 return ret;
209}
210
211/*
212 * remove a single leaf ref from the tree. This drops the ref held by the tree
213 * only
214 */
215int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
216{
217 struct btrfs_leaf_ref_tree *tree;
218
219 if (!xchg(&ref->in_tree, 0))
220 return 0;
221
222 tree = ref->tree;
223 spin_lock(&tree->lock);
224
225 rb_erase(&ref->rb_node, &tree->root);
226 list_del_init(&ref->list);
227
228 spin_unlock(&tree->lock);
229
230 btrfs_free_leaf_ref(root, ref);
231 return 0;
232}
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h
index e2a55cb2072b..24f7001f6387 100644
--- a/fs/btrfs/ref-cache.h
+++ b/fs/btrfs/ref-cache.h
@@ -49,28 +49,4 @@ static inline size_t btrfs_leaf_ref_size(int nr_extents)
49 return sizeof(struct btrfs_leaf_ref) + 49 return sizeof(struct btrfs_leaf_ref) +
50 sizeof(struct btrfs_extent_info) * nr_extents; 50 sizeof(struct btrfs_extent_info) * nr_extents;
51} 51}
52
53static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree)
54{
55 tree->root = RB_ROOT;
56 INIT_LIST_HEAD(&tree->list);
57 spin_lock_init(&tree->lock);
58}
59
60static inline int btrfs_leaf_ref_tree_empty(struct btrfs_leaf_ref_tree *tree)
61{
62 return RB_EMPTY_ROOT(&tree->root);
63}
64
65void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree);
66struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
67 int nr_extents);
68void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
69struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
70 u64 bytenr);
71int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
72 int shared);
73int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
74 int shared);
75int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
76#endif 52#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 5872b41581f4..f25b10a22a0a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -30,6 +30,7 @@
30#include "btrfs_inode.h" 30#include "btrfs_inode.h"
31#include "async-thread.h" 31#include "async-thread.h"
32#include "free-space-cache.h" 32#include "free-space-cache.h"
33#include "inode-map.h"
33 34
34/* 35/*
35 * backref_node, mapping_node and tree_block start with this 36 * backref_node, mapping_node and tree_block start with this
@@ -507,6 +508,7 @@ static int update_backref_cache(struct btrfs_trans_handle *trans,
507 return 1; 508 return 1;
508} 509}
509 510
511
510static int should_ignore_root(struct btrfs_root *root) 512static int should_ignore_root(struct btrfs_root *root)
511{ 513{
512 struct btrfs_root *reloc_root; 514 struct btrfs_root *reloc_root;
@@ -529,7 +531,6 @@ static int should_ignore_root(struct btrfs_root *root)
529 */ 531 */
530 return 1; 532 return 1;
531} 533}
532
533/* 534/*
534 * find reloc tree by address of tree root 535 * find reloc tree by address of tree root
535 */ 536 */
@@ -963,7 +964,7 @@ again:
963 lower = upper; 964 lower = upper;
964 upper = NULL; 965 upper = NULL;
965 } 966 }
966 btrfs_release_path(root, path2); 967 btrfs_release_path(path2);
967next: 968next:
968 if (ptr < end) { 969 if (ptr < end) {
969 ptr += btrfs_extent_inline_ref_size(key.type); 970 ptr += btrfs_extent_inline_ref_size(key.type);
@@ -976,7 +977,7 @@ next:
976 if (ptr >= end) 977 if (ptr >= end)
977 path1->slots[0]++; 978 path1->slots[0]++;
978 } 979 }
979 btrfs_release_path(rc->extent_root, path1); 980 btrfs_release_path(path1);
980 981
981 cur->checked = 1; 982 cur->checked = 1;
982 WARN_ON(exist); 983 WARN_ON(exist);
@@ -1411,9 +1412,9 @@ again:
1411 prev = node; 1412 prev = node;
1412 entry = rb_entry(node, struct btrfs_inode, rb_node); 1413 entry = rb_entry(node, struct btrfs_inode, rb_node);
1413 1414
1414 if (objectid < entry->vfs_inode.i_ino) 1415 if (objectid < btrfs_ino(&entry->vfs_inode))
1415 node = node->rb_left; 1416 node = node->rb_left;
1416 else if (objectid > entry->vfs_inode.i_ino) 1417 else if (objectid > btrfs_ino(&entry->vfs_inode))
1417 node = node->rb_right; 1418 node = node->rb_right;
1418 else 1419 else
1419 break; 1420 break;
@@ -1421,7 +1422,7 @@ again:
1421 if (!node) { 1422 if (!node) {
1422 while (prev) { 1423 while (prev) {
1423 entry = rb_entry(prev, struct btrfs_inode, rb_node); 1424 entry = rb_entry(prev, struct btrfs_inode, rb_node);
1424 if (objectid <= entry->vfs_inode.i_ino) { 1425 if (objectid <= btrfs_ino(&entry->vfs_inode)) {
1425 node = prev; 1426 node = prev;
1426 break; 1427 break;
1427 } 1428 }
@@ -1436,7 +1437,7 @@ again:
1436 return inode; 1437 return inode;
1437 } 1438 }
1438 1439
1439 objectid = entry->vfs_inode.i_ino + 1; 1440 objectid = btrfs_ino(&entry->vfs_inode) + 1;
1440 if (cond_resched_lock(&root->inode_lock)) 1441 if (cond_resched_lock(&root->inode_lock))
1441 goto again; 1442 goto again;
1442 1443
@@ -1472,7 +1473,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1472 return -ENOMEM; 1473 return -ENOMEM;
1473 1474
1474 bytenr -= BTRFS_I(reloc_inode)->index_cnt; 1475 bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1475 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 1476 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode),
1476 bytenr, 0); 1477 bytenr, 0);
1477 if (ret < 0) 1478 if (ret < 0)
1478 goto out; 1479 goto out;
@@ -1560,11 +1561,11 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1560 if (first) { 1561 if (first) {
1561 inode = find_next_inode(root, key.objectid); 1562 inode = find_next_inode(root, key.objectid);
1562 first = 0; 1563 first = 0;
1563 } else if (inode && inode->i_ino < key.objectid) { 1564 } else if (inode && btrfs_ino(inode) < key.objectid) {
1564 btrfs_add_delayed_iput(inode); 1565 btrfs_add_delayed_iput(inode);
1565 inode = find_next_inode(root, key.objectid); 1566 inode = find_next_inode(root, key.objectid);
1566 } 1567 }
1567 if (inode && inode->i_ino == key.objectid) { 1568 if (inode && btrfs_ino(inode) == key.objectid) {
1568 end = key.offset + 1569 end = key.offset +
1569 btrfs_file_extent_num_bytes(leaf, fi); 1570 btrfs_file_extent_num_bytes(leaf, fi);
1570 WARN_ON(!IS_ALIGNED(key.offset, 1571 WARN_ON(!IS_ALIGNED(key.offset,
@@ -1751,7 +1752,7 @@ again:
1751 1752
1752 btrfs_node_key_to_cpu(path->nodes[level], &key, 1753 btrfs_node_key_to_cpu(path->nodes[level], &key,
1753 path->slots[level]); 1754 path->slots[level]);
1754 btrfs_release_path(src, path); 1755 btrfs_release_path(path);
1755 1756
1756 path->lowest_level = level; 1757 path->lowest_level = level;
1757 ret = btrfs_search_slot(trans, src, &key, path, 0, 1); 1758 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
@@ -1895,6 +1896,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1895 struct inode *inode = NULL; 1896 struct inode *inode = NULL;
1896 u64 objectid; 1897 u64 objectid;
1897 u64 start, end; 1898 u64 start, end;
1899 u64 ino;
1898 1900
1899 objectid = min_key->objectid; 1901 objectid = min_key->objectid;
1900 while (1) { 1902 while (1) {
@@ -1907,17 +1909,18 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1907 inode = find_next_inode(root, objectid); 1909 inode = find_next_inode(root, objectid);
1908 if (!inode) 1910 if (!inode)
1909 break; 1911 break;
1912 ino = btrfs_ino(inode);
1910 1913
1911 if (inode->i_ino > max_key->objectid) { 1914 if (ino > max_key->objectid) {
1912 iput(inode); 1915 iput(inode);
1913 break; 1916 break;
1914 } 1917 }
1915 1918
1916 objectid = inode->i_ino + 1; 1919 objectid = ino + 1;
1917 if (!S_ISREG(inode->i_mode)) 1920 if (!S_ISREG(inode->i_mode))
1918 continue; 1921 continue;
1919 1922
1920 if (unlikely(min_key->objectid == inode->i_ino)) { 1923 if (unlikely(min_key->objectid == ino)) {
1921 if (min_key->type > BTRFS_EXTENT_DATA_KEY) 1924 if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1922 continue; 1925 continue;
1923 if (min_key->type < BTRFS_EXTENT_DATA_KEY) 1926 if (min_key->type < BTRFS_EXTENT_DATA_KEY)
@@ -1930,7 +1933,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1930 start = 0; 1933 start = 0;
1931 } 1934 }
1932 1935
1933 if (unlikely(max_key->objectid == inode->i_ino)) { 1936 if (unlikely(max_key->objectid == ino)) {
1934 if (max_key->type < BTRFS_EXTENT_DATA_KEY) 1937 if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1935 continue; 1938 continue;
1936 if (max_key->type > BTRFS_EXTENT_DATA_KEY) { 1939 if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
@@ -2499,7 +2502,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2499 path->locks[upper->level] = 0; 2502 path->locks[upper->level] = 0;
2500 2503
2501 slot = path->slots[upper->level]; 2504 slot = path->slots[upper->level];
2502 btrfs_release_path(NULL, path); 2505 btrfs_release_path(path);
2503 } else { 2506 } else {
2504 ret = btrfs_bin_search(upper->eb, key, upper->level, 2507 ret = btrfs_bin_search(upper->eb, key, upper->level,
2505 &slot); 2508 &slot);
@@ -2740,7 +2743,7 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
2740 } else { 2743 } else {
2741 path->lowest_level = node->level; 2744 path->lowest_level = node->level;
2742 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2745 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2743 btrfs_release_path(root, path); 2746 btrfs_release_path(path);
2744 if (ret > 0) 2747 if (ret > 0)
2745 ret = 0; 2748 ret = 0;
2746 } 2749 }
@@ -2873,7 +2876,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2873 struct extent_map *em; 2876 struct extent_map *em;
2874 int ret = 0; 2877 int ret = 0;
2875 2878
2876 em = alloc_extent_map(GFP_NOFS); 2879 em = alloc_extent_map();
2877 if (!em) 2880 if (!em)
2878 return -ENOMEM; 2881 return -ENOMEM;
2879 2882
@@ -3122,7 +3125,7 @@ static int add_tree_block(struct reloc_control *rc,
3122#endif 3125#endif
3123 } 3126 }
3124 3127
3125 btrfs_release_path(rc->extent_root, path); 3128 btrfs_release_path(path);
3126 3129
3127 BUG_ON(level == -1); 3130 BUG_ON(level == -1);
3128 3131
@@ -3223,7 +3226,7 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3223 key.offset = 0; 3226 key.offset = 0;
3224 3227
3225 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 3228 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3226 if (!inode || IS_ERR(inode) || is_bad_inode(inode)) { 3229 if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
3227 if (inode && !IS_ERR(inode)) 3230 if (inode && !IS_ERR(inode))
3228 iput(inode); 3231 iput(inode);
3229 return -ENOENT; 3232 return -ENOENT;
@@ -3509,7 +3512,7 @@ int add_data_references(struct reloc_control *rc,
3509 } 3512 }
3510 path->slots[0]++; 3513 path->slots[0]++;
3511 } 3514 }
3512 btrfs_release_path(rc->extent_root, path); 3515 btrfs_release_path(path);
3513 if (err) 3516 if (err)
3514 free_block_list(blocks); 3517 free_block_list(blocks);
3515 return err; 3518 return err;
@@ -3572,7 +3575,7 @@ next:
3572 EXTENT_DIRTY); 3575 EXTENT_DIRTY);
3573 3576
3574 if (ret == 0 && start <= key.objectid) { 3577 if (ret == 0 && start <= key.objectid) {
3575 btrfs_release_path(rc->extent_root, path); 3578 btrfs_release_path(path);
3576 rc->search_start = end + 1; 3579 rc->search_start = end + 1;
3577 } else { 3580 } else {
3578 rc->search_start = key.objectid + key.offset; 3581 rc->search_start = key.objectid + key.offset;
@@ -3580,7 +3583,7 @@ next:
3580 return 0; 3583 return 0;
3581 } 3584 }
3582 } 3585 }
3583 btrfs_release_path(rc->extent_root, path); 3586 btrfs_release_path(path);
3584 return ret; 3587 return ret;
3585} 3588}
3586 3589
@@ -3718,7 +3721,7 @@ restart:
3718 flags = BTRFS_EXTENT_FLAG_DATA; 3721 flags = BTRFS_EXTENT_FLAG_DATA;
3719 3722
3720 if (path_change) { 3723 if (path_change) {
3721 btrfs_release_path(rc->extent_root, path); 3724 btrfs_release_path(path);
3722 3725
3723 path->search_commit_root = 1; 3726 path->search_commit_root = 1;
3724 path->skip_locking = 1; 3727 path->skip_locking = 1;
@@ -3741,7 +3744,7 @@ restart:
3741 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3744 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3742 ret = add_data_references(rc, &key, path, &blocks); 3745 ret = add_data_references(rc, &key, path, &blocks);
3743 } else { 3746 } else {
3744 btrfs_release_path(rc->extent_root, path); 3747 btrfs_release_path(path);
3745 ret = 0; 3748 ret = 0;
3746 } 3749 }
3747 if (ret < 0) { 3750 if (ret < 0) {
@@ -3804,7 +3807,7 @@ restart:
3804 } 3807 }
3805 } 3808 }
3806 3809
3807 btrfs_release_path(rc->extent_root, path); 3810 btrfs_release_path(path);
3808 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3811 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
3809 GFP_NOFS); 3812 GFP_NOFS);
3810 3813
@@ -3872,7 +3875,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3872 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | 3875 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
3873 BTRFS_INODE_PREALLOC); 3876 BTRFS_INODE_PREALLOC);
3874 btrfs_mark_buffer_dirty(leaf); 3877 btrfs_mark_buffer_dirty(leaf);
3875 btrfs_release_path(root, path); 3878 btrfs_release_path(path);
3876out: 3879out:
3877 btrfs_free_path(path); 3880 btrfs_free_path(path);
3878 return ret; 3881 return ret;
@@ -3902,7 +3905,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3902 if (IS_ERR(trans)) 3905 if (IS_ERR(trans))
3903 return ERR_CAST(trans); 3906 return ERR_CAST(trans);
3904 3907
3905 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 3908 err = btrfs_find_free_objectid(root, &objectid);
3906 if (err) 3909 if (err)
3907 goto out; 3910 goto out;
3908 3911
@@ -3940,7 +3943,7 @@ static struct reloc_control *alloc_reloc_control(void)
3940 INIT_LIST_HEAD(&rc->reloc_roots); 3943 INIT_LIST_HEAD(&rc->reloc_roots);
3941 backref_cache_init(&rc->backref_cache); 3944 backref_cache_init(&rc->backref_cache);
3942 mapping_tree_init(&rc->reloc_root_tree); 3945 mapping_tree_init(&rc->reloc_root_tree);
3943 extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); 3946 extent_io_tree_init(&rc->processed_blocks, NULL);
3944 return rc; 3947 return rc;
3945} 3948}
3946 3949
@@ -4115,7 +4118,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4115 } 4118 }
4116 leaf = path->nodes[0]; 4119 leaf = path->nodes[0];
4117 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 4120 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4118 btrfs_release_path(root->fs_info->tree_root, path); 4121 btrfs_release_path(path);
4119 4122
4120 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || 4123 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
4121 key.type != BTRFS_ROOT_ITEM_KEY) 4124 key.type != BTRFS_ROOT_ITEM_KEY)
@@ -4147,7 +4150,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4147 4150
4148 key.offset--; 4151 key.offset--;
4149 } 4152 }
4150 btrfs_release_path(root->fs_info->tree_root, path); 4153 btrfs_release_path(path);
4151 4154
4152 if (list_empty(&reloc_roots)) 4155 if (list_empty(&reloc_roots))
4153 goto out; 4156 goto out;
@@ -4248,7 +4251,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4248 4251
4249 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 4252 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
4250 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 4253 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
4251 disk_bytenr + len - 1, &list); 4254 disk_bytenr + len - 1, &list, 0);
4252 4255
4253 while (!list_empty(&list)) { 4256 while (!list_empty(&list)) {
4254 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 4257 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 6928bff62daa..ebe45443de06 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -22,53 +22,6 @@
22#include "print-tree.h" 22#include "print-tree.h"
23 23
24/* 24/*
25 * search forward for a root, starting with objectid 'search_start'
26 * if a root key is found, the objectid we find is filled into 'found_objectid'
27 * and 0 is returned. < 0 is returned on error, 1 if there is nothing
28 * left in the tree.
29 */
30int btrfs_search_root(struct btrfs_root *root, u64 search_start,
31 u64 *found_objectid)
32{
33 struct btrfs_path *path;
34 struct btrfs_key search_key;
35 int ret;
36
37 root = root->fs_info->tree_root;
38 search_key.objectid = search_start;
39 search_key.type = (u8)-1;
40 search_key.offset = (u64)-1;
41
42 path = btrfs_alloc_path();
43 BUG_ON(!path);
44again:
45 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
46 if (ret < 0)
47 goto out;
48 if (ret == 0) {
49 ret = 1;
50 goto out;
51 }
52 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
53 ret = btrfs_next_leaf(root, path);
54 if (ret)
55 goto out;
56 }
57 btrfs_item_key_to_cpu(path->nodes[0], &search_key, path->slots[0]);
58 if (search_key.type != BTRFS_ROOT_ITEM_KEY) {
59 search_key.offset++;
60 btrfs_release_path(root, path);
61 goto again;
62 }
63 ret = 0;
64 *found_objectid = search_key.objectid;
65
66out:
67 btrfs_free_path(path);
68 return ret;
69}
70
71/*
72 * lookup the root with the highest offset for a given objectid. The key we do 25 * lookup the root with the highest offset for a given objectid. The key we do
73 * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 26 * find is copied into 'key'. If we find something return 0, otherwise 1, < 0
74 * on error. 27 * on error.
@@ -230,7 +183,7 @@ again:
230 183
231 memcpy(&found_key, &key, sizeof(key)); 184 memcpy(&found_key, &key, sizeof(key));
232 key.offset++; 185 key.offset++;
233 btrfs_release_path(root, path); 186 btrfs_release_path(path);
234 dead_root = 187 dead_root =
235 btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 188 btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
236 &found_key); 189 &found_key);
@@ -292,7 +245,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
292 } 245 }
293 246
294 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 247 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
295 btrfs_release_path(tree_root, path); 248 btrfs_release_path(path);
296 249
297 if (key.objectid != BTRFS_ORPHAN_OBJECTID || 250 if (key.objectid != BTRFS_ORPHAN_OBJECTID ||
298 key.type != BTRFS_ORPHAN_ITEM_KEY) 251 key.type != BTRFS_ORPHAN_ITEM_KEY)
@@ -385,18 +338,22 @@ again:
385 *sequence = btrfs_root_ref_sequence(leaf, ref); 338 *sequence = btrfs_root_ref_sequence(leaf, ref);
386 339
387 ret = btrfs_del_item(trans, tree_root, path); 340 ret = btrfs_del_item(trans, tree_root, path);
388 BUG_ON(ret); 341 if (ret) {
342 err = ret;
343 goto out;
344 }
389 } else 345 } else
390 err = -ENOENT; 346 err = -ENOENT;
391 347
392 if (key.type == BTRFS_ROOT_BACKREF_KEY) { 348 if (key.type == BTRFS_ROOT_BACKREF_KEY) {
393 btrfs_release_path(tree_root, path); 349 btrfs_release_path(path);
394 key.objectid = ref_id; 350 key.objectid = ref_id;
395 key.type = BTRFS_ROOT_REF_KEY; 351 key.type = BTRFS_ROOT_REF_KEY;
396 key.offset = root_id; 352 key.offset = root_id;
397 goto again; 353 goto again;
398 } 354 }
399 355
356out:
400 btrfs_free_path(path); 357 btrfs_free_path(path);
401 return err; 358 return err;
402} 359}
@@ -463,7 +420,7 @@ again:
463 btrfs_mark_buffer_dirty(leaf); 420 btrfs_mark_buffer_dirty(leaf);
464 421
465 if (key.type == BTRFS_ROOT_BACKREF_KEY) { 422 if (key.type == BTRFS_ROOT_BACKREF_KEY) {
466 btrfs_release_path(tree_root, path); 423 btrfs_release_path(path);
467 key.objectid = ref_id; 424 key.objectid = ref_id;
468 key.type = BTRFS_ROOT_REF_KEY; 425 key.type = BTRFS_ROOT_REF_KEY;
469 key.offset = root_id; 426 key.offset = root_id;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
new file mode 100644
index 000000000000..6dfed0c27ac3
--- /dev/null
+++ b/fs/btrfs/scrub.c
@@ -0,0 +1,1369 @@
1/*
2 * Copyright (C) 2011 STRATO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include <linux/slab.h>
25#include <linux/workqueue.h>
26#include "ctree.h"
27#include "volumes.h"
28#include "disk-io.h"
29#include "ordered-data.h"
30
31/*
32 * This is only the first step towards a full-features scrub. It reads all
33 * extent and super block and verifies the checksums. In case a bad checksum
34 * is found or the extent cannot be read, good data will be written back if
35 * any can be found.
36 *
37 * Future enhancements:
38 * - To enhance the performance, better read-ahead strategies for the
39 * extent-tree can be employed.
40 * - In case an unrepairable extent is encountered, track which files are
41 * affected and report them
42 * - In case of a read error on files with nodatasum, map the file and read
43 * the extent to trigger a writeback of the good copy
44 * - track and record media errors, throw out bad devices
45 * - add a mode to also read unallocated space
46 * - make the prefetch cancellable
47 */
48
49struct scrub_bio;
50struct scrub_page;
51struct scrub_dev;
52static void scrub_bio_end_io(struct bio *bio, int err);
53static void scrub_checksum(struct btrfs_work *work);
54static int scrub_checksum_data(struct scrub_dev *sdev,
55 struct scrub_page *spag, void *buffer);
56static int scrub_checksum_tree_block(struct scrub_dev *sdev,
57 struct scrub_page *spag, u64 logical,
58 void *buffer);
59static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
60static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
61static void scrub_fixup_end_io(struct bio *bio, int err);
62static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
63 struct page *page);
64static void scrub_fixup(struct scrub_bio *sbio, int ix);
65
66#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
67#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */
68
69struct scrub_page {
70 u64 flags; /* extent flags */
71 u64 generation;
72 u64 mirror_num;
73 int have_csum;
74 u8 csum[BTRFS_CSUM_SIZE];
75};
76
77struct scrub_bio {
78 int index;
79 struct scrub_dev *sdev;
80 struct bio *bio;
81 int err;
82 u64 logical;
83 u64 physical;
84 struct scrub_page spag[SCRUB_PAGES_PER_BIO];
85 u64 count;
86 int next_free;
87 struct btrfs_work work;
88};
89
90struct scrub_dev {
91 struct scrub_bio *bios[SCRUB_BIOS_PER_DEV];
92 struct btrfs_device *dev;
93 int first_free;
94 int curr;
95 atomic_t in_flight;
96 spinlock_t list_lock;
97 wait_queue_head_t list_wait;
98 u16 csum_size;
99 struct list_head csum_list;
100 atomic_t cancel_req;
101 int readonly;
102 /*
103 * statistics
104 */
105 struct btrfs_scrub_progress stat;
106 spinlock_t stat_lock;
107};
108
109static void scrub_free_csums(struct scrub_dev *sdev)
110{
111 while (!list_empty(&sdev->csum_list)) {
112 struct btrfs_ordered_sum *sum;
113 sum = list_first_entry(&sdev->csum_list,
114 struct btrfs_ordered_sum, list);
115 list_del(&sum->list);
116 kfree(sum);
117 }
118}
119
120static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
121{
122 int i;
123 int j;
124 struct page *last_page;
125
126 if (!sdev)
127 return;
128
129 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
130 struct scrub_bio *sbio = sdev->bios[i];
131 struct bio *bio;
132
133 if (!sbio)
134 break;
135
136 bio = sbio->bio;
137 if (bio) {
138 last_page = NULL;
139 for (j = 0; j < bio->bi_vcnt; ++j) {
140 if (bio->bi_io_vec[j].bv_page == last_page)
141 continue;
142 last_page = bio->bi_io_vec[j].bv_page;
143 __free_page(last_page);
144 }
145 bio_put(bio);
146 }
147 kfree(sbio);
148 }
149
150 scrub_free_csums(sdev);
151 kfree(sdev);
152}
153
154static noinline_for_stack
155struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
156{
157 struct scrub_dev *sdev;
158 int i;
159 int j;
160 int ret;
161 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
162
163 sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
164 if (!sdev)
165 goto nomem;
166 sdev->dev = dev;
167 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
168 struct bio *bio;
169 struct scrub_bio *sbio;
170
171 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
172 if (!sbio)
173 goto nomem;
174 sdev->bios[i] = sbio;
175
176 bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
177 if (!bio)
178 goto nomem;
179
180 sbio->index = i;
181 sbio->sdev = sdev;
182 sbio->bio = bio;
183 sbio->count = 0;
184 sbio->work.func = scrub_checksum;
185 bio->bi_private = sdev->bios[i];
186 bio->bi_end_io = scrub_bio_end_io;
187 bio->bi_sector = 0;
188 bio->bi_bdev = dev->bdev;
189 bio->bi_size = 0;
190
191 for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) {
192 struct page *page;
193 page = alloc_page(GFP_NOFS);
194 if (!page)
195 goto nomem;
196
197 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
198 if (!ret)
199 goto nomem;
200 }
201 WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO);
202
203 if (i != SCRUB_BIOS_PER_DEV-1)
204 sdev->bios[i]->next_free = i + 1;
205 else
206 sdev->bios[i]->next_free = -1;
207 }
208 sdev->first_free = 0;
209 sdev->curr = -1;
210 atomic_set(&sdev->in_flight, 0);
211 atomic_set(&sdev->cancel_req, 0);
212 sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy);
213 INIT_LIST_HEAD(&sdev->csum_list);
214
215 spin_lock_init(&sdev->list_lock);
216 spin_lock_init(&sdev->stat_lock);
217 init_waitqueue_head(&sdev->list_wait);
218 return sdev;
219
220nomem:
221 scrub_free_dev(sdev);
222 return ERR_PTR(-ENOMEM);
223}
224
225/*
226 * scrub_recheck_error gets called when either verification of the page
227 * failed or the bio failed to read, e.g. with EIO. In the latter case,
228 * recheck_error gets called for every page in the bio, even though only
229 * one may be bad
230 */
231static void scrub_recheck_error(struct scrub_bio *sbio, int ix)
232{
233 if (sbio->err) {
234 if (scrub_fixup_io(READ, sbio->sdev->dev->bdev,
235 (sbio->physical + ix * PAGE_SIZE) >> 9,
236 sbio->bio->bi_io_vec[ix].bv_page) == 0) {
237 if (scrub_fixup_check(sbio, ix) == 0)
238 return;
239 }
240 }
241
242 scrub_fixup(sbio, ix);
243}
244
245static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
246{
247 int ret = 1;
248 struct page *page;
249 void *buffer;
250 u64 flags = sbio->spag[ix].flags;
251
252 page = sbio->bio->bi_io_vec[ix].bv_page;
253 buffer = kmap_atomic(page, KM_USER0);
254 if (flags & BTRFS_EXTENT_FLAG_DATA) {
255 ret = scrub_checksum_data(sbio->sdev,
256 sbio->spag + ix, buffer);
257 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
258 ret = scrub_checksum_tree_block(sbio->sdev,
259 sbio->spag + ix,
260 sbio->logical + ix * PAGE_SIZE,
261 buffer);
262 } else {
263 WARN_ON(1);
264 }
265 kunmap_atomic(buffer, KM_USER0);
266
267 return ret;
268}
269
270static void scrub_fixup_end_io(struct bio *bio, int err)
271{
272 complete((struct completion *)bio->bi_private);
273}
274
275static void scrub_fixup(struct scrub_bio *sbio, int ix)
276{
277 struct scrub_dev *sdev = sbio->sdev;
278 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
279 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
280 struct btrfs_multi_bio *multi = NULL;
281 u64 logical = sbio->logical + ix * PAGE_SIZE;
282 u64 length;
283 int i;
284 int ret;
285 DECLARE_COMPLETION_ONSTACK(complete);
286
287 if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
288 (sbio->spag[ix].have_csum == 0)) {
289 /*
290 * nodatasum, don't try to fix anything
291 * FIXME: we can do better, open the inode and trigger a
292 * writeback
293 */
294 goto uncorrectable;
295 }
296
297 length = PAGE_SIZE;
298 ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length,
299 &multi, 0);
300 if (ret || !multi || length < PAGE_SIZE) {
301 printk(KERN_ERR
302 "scrub_fixup: btrfs_map_block failed us for %llu\n",
303 (unsigned long long)logical);
304 WARN_ON(1);
305 return;
306 }
307
308 if (multi->num_stripes == 1)
309 /* there aren't any replicas */
310 goto uncorrectable;
311
312 /*
313 * first find a good copy
314 */
315 for (i = 0; i < multi->num_stripes; ++i) {
316 if (i == sbio->spag[ix].mirror_num)
317 continue;
318
319 if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev,
320 multi->stripes[i].physical >> 9,
321 sbio->bio->bi_io_vec[ix].bv_page)) {
322 /* I/O-error, this is not a good copy */
323 continue;
324 }
325
326 if (scrub_fixup_check(sbio, ix) == 0)
327 break;
328 }
329 if (i == multi->num_stripes)
330 goto uncorrectable;
331
332 if (!sdev->readonly) {
333 /*
334 * bi_io_vec[ix].bv_page now contains good data, write it back
335 */
336 if (scrub_fixup_io(WRITE, sdev->dev->bdev,
337 (sbio->physical + ix * PAGE_SIZE) >> 9,
338 sbio->bio->bi_io_vec[ix].bv_page)) {
339 /* I/O-error, writeback failed, give up */
340 goto uncorrectable;
341 }
342 }
343
344 kfree(multi);
345 spin_lock(&sdev->stat_lock);
346 ++sdev->stat.corrected_errors;
347 spin_unlock(&sdev->stat_lock);
348
349 if (printk_ratelimit())
350 printk(KERN_ERR "btrfs: fixed up at %llu\n",
351 (unsigned long long)logical);
352 return;
353
354uncorrectable:
355 kfree(multi);
356 spin_lock(&sdev->stat_lock);
357 ++sdev->stat.uncorrectable_errors;
358 spin_unlock(&sdev->stat_lock);
359
360 if (printk_ratelimit())
361 printk(KERN_ERR "btrfs: unable to fixup at %llu\n",
362 (unsigned long long)logical);
363}
364
365static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
366 struct page *page)
367{
368 struct bio *bio = NULL;
369 int ret;
370 DECLARE_COMPLETION_ONSTACK(complete);
371
372 /* we are going to wait on this IO */
373 rw |= REQ_SYNC;
374
375 bio = bio_alloc(GFP_NOFS, 1);
376 bio->bi_bdev = bdev;
377 bio->bi_sector = sector;
378 bio_add_page(bio, page, PAGE_SIZE, 0);
379 bio->bi_end_io = scrub_fixup_end_io;
380 bio->bi_private = &complete;
381 submit_bio(rw, bio);
382
383 wait_for_completion(&complete);
384
385 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
386 bio_put(bio);
387 return ret;
388}
389
390static void scrub_bio_end_io(struct bio *bio, int err)
391{
392 struct scrub_bio *sbio = bio->bi_private;
393 struct scrub_dev *sdev = sbio->sdev;
394 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
395
396 sbio->err = err;
397
398 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
399}
400
401static void scrub_checksum(struct btrfs_work *work)
402{
403 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
404 struct scrub_dev *sdev = sbio->sdev;
405 struct page *page;
406 void *buffer;
407 int i;
408 u64 flags;
409 u64 logical;
410 int ret;
411
412 if (sbio->err) {
413 for (i = 0; i < sbio->count; ++i)
414 scrub_recheck_error(sbio, i);
415
416 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
417 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
418 sbio->bio->bi_phys_segments = 0;
419 sbio->bio->bi_idx = 0;
420
421 for (i = 0; i < sbio->count; i++) {
422 struct bio_vec *bi;
423 bi = &sbio->bio->bi_io_vec[i];
424 bi->bv_offset = 0;
425 bi->bv_len = PAGE_SIZE;
426 }
427
428 spin_lock(&sdev->stat_lock);
429 ++sdev->stat.read_errors;
430 spin_unlock(&sdev->stat_lock);
431 goto out;
432 }
433 for (i = 0; i < sbio->count; ++i) {
434 page = sbio->bio->bi_io_vec[i].bv_page;
435 buffer = kmap_atomic(page, KM_USER0);
436 flags = sbio->spag[i].flags;
437 logical = sbio->logical + i * PAGE_SIZE;
438 ret = 0;
439 if (flags & BTRFS_EXTENT_FLAG_DATA) {
440 ret = scrub_checksum_data(sdev, sbio->spag + i, buffer);
441 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
442 ret = scrub_checksum_tree_block(sdev, sbio->spag + i,
443 logical, buffer);
444 } else if (flags & BTRFS_EXTENT_FLAG_SUPER) {
445 BUG_ON(i);
446 (void)scrub_checksum_super(sbio, buffer);
447 } else {
448 WARN_ON(1);
449 }
450 kunmap_atomic(buffer, KM_USER0);
451 if (ret)
452 scrub_recheck_error(sbio, i);
453 }
454
455out:
456 spin_lock(&sdev->list_lock);
457 sbio->next_free = sdev->first_free;
458 sdev->first_free = sbio->index;
459 spin_unlock(&sdev->list_lock);
460 atomic_dec(&sdev->in_flight);
461 wake_up(&sdev->list_wait);
462}
463
464static int scrub_checksum_data(struct scrub_dev *sdev,
465 struct scrub_page *spag, void *buffer)
466{
467 u8 csum[BTRFS_CSUM_SIZE];
468 u32 crc = ~(u32)0;
469 int fail = 0;
470 struct btrfs_root *root = sdev->dev->dev_root;
471
472 if (!spag->have_csum)
473 return 0;
474
475 crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE);
476 btrfs_csum_final(crc, csum);
477 if (memcmp(csum, spag->csum, sdev->csum_size))
478 fail = 1;
479
480 spin_lock(&sdev->stat_lock);
481 ++sdev->stat.data_extents_scrubbed;
482 sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
483 if (fail)
484 ++sdev->stat.csum_errors;
485 spin_unlock(&sdev->stat_lock);
486
487 return fail;
488}
489
490static int scrub_checksum_tree_block(struct scrub_dev *sdev,
491 struct scrub_page *spag, u64 logical,
492 void *buffer)
493{
494 struct btrfs_header *h;
495 struct btrfs_root *root = sdev->dev->dev_root;
496 struct btrfs_fs_info *fs_info = root->fs_info;
497 u8 csum[BTRFS_CSUM_SIZE];
498 u32 crc = ~(u32)0;
499 int fail = 0;
500 int crc_fail = 0;
501
502 /*
503 * we don't use the getter functions here, as we
504 * a) don't have an extent buffer and
505 * b) the page is already kmapped
506 */
507 h = (struct btrfs_header *)buffer;
508
509 if (logical != le64_to_cpu(h->bytenr))
510 ++fail;
511
512 if (spag->generation != le64_to_cpu(h->generation))
513 ++fail;
514
515 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
516 ++fail;
517
518 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
519 BTRFS_UUID_SIZE))
520 ++fail;
521
522 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
523 PAGE_SIZE - BTRFS_CSUM_SIZE);
524 btrfs_csum_final(crc, csum);
525 if (memcmp(csum, h->csum, sdev->csum_size))
526 ++crc_fail;
527
528 spin_lock(&sdev->stat_lock);
529 ++sdev->stat.tree_extents_scrubbed;
530 sdev->stat.tree_bytes_scrubbed += PAGE_SIZE;
531 if (crc_fail)
532 ++sdev->stat.csum_errors;
533 if (fail)
534 ++sdev->stat.verify_errors;
535 spin_unlock(&sdev->stat_lock);
536
537 return fail || crc_fail;
538}
539
540static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
541{
542 struct btrfs_super_block *s;
543 u64 logical;
544 struct scrub_dev *sdev = sbio->sdev;
545 struct btrfs_root *root = sdev->dev->dev_root;
546 struct btrfs_fs_info *fs_info = root->fs_info;
547 u8 csum[BTRFS_CSUM_SIZE];
548 u32 crc = ~(u32)0;
549 int fail = 0;
550
551 s = (struct btrfs_super_block *)buffer;
552 logical = sbio->logical;
553
554 if (logical != le64_to_cpu(s->bytenr))
555 ++fail;
556
557 if (sbio->spag[0].generation != le64_to_cpu(s->generation))
558 ++fail;
559
560 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
561 ++fail;
562
563 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
564 PAGE_SIZE - BTRFS_CSUM_SIZE);
565 btrfs_csum_final(crc, csum);
566 if (memcmp(csum, s->csum, sbio->sdev->csum_size))
567 ++fail;
568
569 if (fail) {
570 /*
571 * if we find an error in a super block, we just report it.
572 * They will get written with the next transaction commit
573 * anyway
574 */
575 spin_lock(&sdev->stat_lock);
576 ++sdev->stat.super_errors;
577 spin_unlock(&sdev->stat_lock);
578 }
579
580 return fail;
581}
582
583static int scrub_submit(struct scrub_dev *sdev)
584{
585 struct scrub_bio *sbio;
586
587 if (sdev->curr == -1)
588 return 0;
589
590 sbio = sdev->bios[sdev->curr];
591
592 sbio->bio->bi_sector = sbio->physical >> 9;
593 sbio->bio->bi_size = sbio->count * PAGE_SIZE;
594 sbio->bio->bi_next = NULL;
595 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
596 sbio->bio->bi_comp_cpu = -1;
597 sbio->bio->bi_bdev = sdev->dev->bdev;
598 sbio->err = 0;
599 sdev->curr = -1;
600 atomic_inc(&sdev->in_flight);
601
602 submit_bio(0, sbio->bio);
603
604 return 0;
605}
606
607static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
608 u64 physical, u64 flags, u64 gen, u64 mirror_num,
609 u8 *csum, int force)
610{
611 struct scrub_bio *sbio;
612
613again:
614 /*
615 * grab a fresh bio or wait for one to become available
616 */
617 while (sdev->curr == -1) {
618 spin_lock(&sdev->list_lock);
619 sdev->curr = sdev->first_free;
620 if (sdev->curr != -1) {
621 sdev->first_free = sdev->bios[sdev->curr]->next_free;
622 sdev->bios[sdev->curr]->next_free = -1;
623 sdev->bios[sdev->curr]->count = 0;
624 spin_unlock(&sdev->list_lock);
625 } else {
626 spin_unlock(&sdev->list_lock);
627 wait_event(sdev->list_wait, sdev->first_free != -1);
628 }
629 }
630 sbio = sdev->bios[sdev->curr];
631 if (sbio->count == 0) {
632 sbio->physical = physical;
633 sbio->logical = logical;
634 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
635 sbio->logical + sbio->count * PAGE_SIZE != logical) {
636 scrub_submit(sdev);
637 goto again;
638 }
639 sbio->spag[sbio->count].flags = flags;
640 sbio->spag[sbio->count].generation = gen;
641 sbio->spag[sbio->count].have_csum = 0;
642 sbio->spag[sbio->count].mirror_num = mirror_num;
643 if (csum) {
644 sbio->spag[sbio->count].have_csum = 1;
645 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
646 }
647 ++sbio->count;
648 if (sbio->count == SCRUB_PAGES_PER_BIO || force)
649 scrub_submit(sdev);
650
651 return 0;
652}
653
654static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
655 u8 *csum)
656{
657 struct btrfs_ordered_sum *sum = NULL;
658 int ret = 0;
659 unsigned long i;
660 unsigned long num_sectors;
661 u32 sectorsize = sdev->dev->dev_root->sectorsize;
662
663 while (!list_empty(&sdev->csum_list)) {
664 sum = list_first_entry(&sdev->csum_list,
665 struct btrfs_ordered_sum, list);
666 if (sum->bytenr > logical)
667 return 0;
668 if (sum->bytenr + sum->len > logical)
669 break;
670
671 ++sdev->stat.csum_discards;
672 list_del(&sum->list);
673 kfree(sum);
674 sum = NULL;
675 }
676 if (!sum)
677 return 0;
678
679 num_sectors = sum->len / sectorsize;
680 for (i = 0; i < num_sectors; ++i) {
681 if (sum->sums[i].bytenr == logical) {
682 memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
683 ret = 1;
684 break;
685 }
686 }
687 if (ret && i == num_sectors - 1) {
688 list_del(&sum->list);
689 kfree(sum);
690 }
691 return ret;
692}
693
694/* scrub extent tries to collect up to 64 kB for each bio */
695static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
696 u64 physical, u64 flags, u64 gen, u64 mirror_num)
697{
698 int ret;
699 u8 csum[BTRFS_CSUM_SIZE];
700
701 while (len) {
702 u64 l = min_t(u64, len, PAGE_SIZE);
703 int have_csum = 0;
704
705 if (flags & BTRFS_EXTENT_FLAG_DATA) {
706 /* push csums to sbio */
707 have_csum = scrub_find_csum(sdev, logical, l, csum);
708 if (have_csum == 0)
709 ++sdev->stat.no_csum;
710 }
711 ret = scrub_page(sdev, logical, l, physical, flags, gen,
712 mirror_num, have_csum ? csum : NULL, 0);
713 if (ret)
714 return ret;
715 len -= l;
716 logical += l;
717 physical += l;
718 }
719 return 0;
720}
721
722static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
723 struct map_lookup *map, int num, u64 base, u64 length)
724{
725 struct btrfs_path *path;
726 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
727 struct btrfs_root *root = fs_info->extent_root;
728 struct btrfs_root *csum_root = fs_info->csum_root;
729 struct btrfs_extent_item *extent;
730 u64 flags;
731 int ret;
732 int slot;
733 int i;
734 u64 nstripes;
735 int start_stripe;
736 struct extent_buffer *l;
737 struct btrfs_key key;
738 u64 physical;
739 u64 logical;
740 u64 generation;
741 u64 mirror_num;
742
743 u64 increment = map->stripe_len;
744 u64 offset;
745
746 nstripes = length;
747 offset = 0;
748 do_div(nstripes, map->stripe_len);
749 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
750 offset = map->stripe_len * num;
751 increment = map->stripe_len * map->num_stripes;
752 mirror_num = 0;
753 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
754 int factor = map->num_stripes / map->sub_stripes;
755 offset = map->stripe_len * (num / map->sub_stripes);
756 increment = map->stripe_len * factor;
757 mirror_num = num % map->sub_stripes;
758 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
759 increment = map->stripe_len;
760 mirror_num = num % map->num_stripes;
761 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
762 increment = map->stripe_len;
763 mirror_num = num % map->num_stripes;
764 } else {
765 increment = map->stripe_len;
766 mirror_num = 0;
767 }
768
769 path = btrfs_alloc_path();
770 if (!path)
771 return -ENOMEM;
772
773 path->reada = 2;
774 path->search_commit_root = 1;
775 path->skip_locking = 1;
776
777 /*
778 * find all extents for each stripe and just read them to get
779 * them into the page cache
780 * FIXME: we can do better. build a more intelligent prefetching
781 */
782 logical = base + offset;
783 physical = map->stripes[num].physical;
784 ret = 0;
785 for (i = 0; i < nstripes; ++i) {
786 key.objectid = logical;
787 key.type = BTRFS_EXTENT_ITEM_KEY;
788 key.offset = (u64)0;
789
790 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
791 if (ret < 0)
792 goto out;
793
794 l = path->nodes[0];
795 slot = path->slots[0];
796 btrfs_item_key_to_cpu(l, &key, slot);
797 if (key.objectid != logical) {
798 ret = btrfs_previous_item(root, path, 0,
799 BTRFS_EXTENT_ITEM_KEY);
800 if (ret < 0)
801 goto out;
802 }
803
804 while (1) {
805 l = path->nodes[0];
806 slot = path->slots[0];
807 if (slot >= btrfs_header_nritems(l)) {
808 ret = btrfs_next_leaf(root, path);
809 if (ret == 0)
810 continue;
811 if (ret < 0)
812 goto out;
813
814 break;
815 }
816 btrfs_item_key_to_cpu(l, &key, slot);
817
818 if (key.objectid >= logical + map->stripe_len)
819 break;
820
821 path->slots[0]++;
822 }
823 btrfs_release_path(path);
824 logical += increment;
825 physical += map->stripe_len;
826 cond_resched();
827 }
828
829 /*
830 * collect all data csums for the stripe to avoid seeking during
831 * the scrub. This might currently (crc32) end up to be about 1MB
832 */
833 start_stripe = 0;
834again:
835 logical = base + offset + start_stripe * increment;
836 for (i = start_stripe; i < nstripes; ++i) {
837 ret = btrfs_lookup_csums_range(csum_root, logical,
838 logical + map->stripe_len - 1,
839 &sdev->csum_list, 1);
840 if (ret)
841 goto out;
842
843 logical += increment;
844 cond_resched();
845 }
846 /*
847 * now find all extents for each stripe and scrub them
848 */
849 logical = base + offset + start_stripe * increment;
850 physical = map->stripes[num].physical + start_stripe * map->stripe_len;
851 ret = 0;
852 for (i = start_stripe; i < nstripes; ++i) {
853 /*
854 * canceled?
855 */
856 if (atomic_read(&fs_info->scrub_cancel_req) ||
857 atomic_read(&sdev->cancel_req)) {
858 ret = -ECANCELED;
859 goto out;
860 }
861 /*
862 * check to see if we have to pause
863 */
864 if (atomic_read(&fs_info->scrub_pause_req)) {
865 /* push queued extents */
866 scrub_submit(sdev);
867 wait_event(sdev->list_wait,
868 atomic_read(&sdev->in_flight) == 0);
869 atomic_inc(&fs_info->scrubs_paused);
870 wake_up(&fs_info->scrub_pause_wait);
871 mutex_lock(&fs_info->scrub_lock);
872 while (atomic_read(&fs_info->scrub_pause_req)) {
873 mutex_unlock(&fs_info->scrub_lock);
874 wait_event(fs_info->scrub_pause_wait,
875 atomic_read(&fs_info->scrub_pause_req) == 0);
876 mutex_lock(&fs_info->scrub_lock);
877 }
878 atomic_dec(&fs_info->scrubs_paused);
879 mutex_unlock(&fs_info->scrub_lock);
880 wake_up(&fs_info->scrub_pause_wait);
881 scrub_free_csums(sdev);
882 start_stripe = i;
883 goto again;
884 }
885
886 key.objectid = logical;
887 key.type = BTRFS_EXTENT_ITEM_KEY;
888 key.offset = (u64)0;
889
890 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
891 if (ret < 0)
892 goto out;
893
894 l = path->nodes[0];
895 slot = path->slots[0];
896 btrfs_item_key_to_cpu(l, &key, slot);
897 if (key.objectid != logical) {
898 ret = btrfs_previous_item(root, path, 0,
899 BTRFS_EXTENT_ITEM_KEY);
900 if (ret < 0)
901 goto out;
902 }
903
904 while (1) {
905 l = path->nodes[0];
906 slot = path->slots[0];
907 if (slot >= btrfs_header_nritems(l)) {
908 ret = btrfs_next_leaf(root, path);
909 if (ret == 0)
910 continue;
911 if (ret < 0)
912 goto out;
913
914 break;
915 }
916 btrfs_item_key_to_cpu(l, &key, slot);
917
918 if (key.objectid + key.offset <= logical)
919 goto next;
920
921 if (key.objectid >= logical + map->stripe_len)
922 break;
923
924 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
925 goto next;
926
927 extent = btrfs_item_ptr(l, slot,
928 struct btrfs_extent_item);
929 flags = btrfs_extent_flags(l, extent);
930 generation = btrfs_extent_generation(l, extent);
931
932 if (key.objectid < logical &&
933 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
934 printk(KERN_ERR
935 "btrfs scrub: tree block %llu spanning "
936 "stripes, ignored. logical=%llu\n",
937 (unsigned long long)key.objectid,
938 (unsigned long long)logical);
939 goto next;
940 }
941
942 /*
943 * trim extent to this stripe
944 */
945 if (key.objectid < logical) {
946 key.offset -= logical - key.objectid;
947 key.objectid = logical;
948 }
949 if (key.objectid + key.offset >
950 logical + map->stripe_len) {
951 key.offset = logical + map->stripe_len -
952 key.objectid;
953 }
954
955 ret = scrub_extent(sdev, key.objectid, key.offset,
956 key.objectid - logical + physical,
957 flags, generation, mirror_num);
958 if (ret)
959 goto out;
960
961next:
962 path->slots[0]++;
963 }
964 btrfs_release_path(path);
965 logical += increment;
966 physical += map->stripe_len;
967 spin_lock(&sdev->stat_lock);
968 sdev->stat.last_physical = physical;
969 spin_unlock(&sdev->stat_lock);
970 }
971 /* push queued extents */
972 scrub_submit(sdev);
973
974out:
975 btrfs_free_path(path);
976 return ret < 0 ? ret : 0;
977}
978
979static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
980 u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
981{
982 struct btrfs_mapping_tree *map_tree =
983 &sdev->dev->dev_root->fs_info->mapping_tree;
984 struct map_lookup *map;
985 struct extent_map *em;
986 int i;
987 int ret = -EINVAL;
988
989 read_lock(&map_tree->map_tree.lock);
990 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
991 read_unlock(&map_tree->map_tree.lock);
992
993 if (!em)
994 return -EINVAL;
995
996 map = (struct map_lookup *)em->bdev;
997 if (em->start != chunk_offset)
998 goto out;
999
1000 if (em->len < length)
1001 goto out;
1002
1003 for (i = 0; i < map->num_stripes; ++i) {
1004 if (map->stripes[i].dev == sdev->dev) {
1005 ret = scrub_stripe(sdev, map, i, chunk_offset, length);
1006 if (ret)
1007 goto out;
1008 }
1009 }
1010out:
1011 free_extent_map(em);
1012
1013 return ret;
1014}
1015
1016static noinline_for_stack
1017int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1018{
1019 struct btrfs_dev_extent *dev_extent = NULL;
1020 struct btrfs_path *path;
1021 struct btrfs_root *root = sdev->dev->dev_root;
1022 struct btrfs_fs_info *fs_info = root->fs_info;
1023 u64 length;
1024 u64 chunk_tree;
1025 u64 chunk_objectid;
1026 u64 chunk_offset;
1027 int ret;
1028 int slot;
1029 struct extent_buffer *l;
1030 struct btrfs_key key;
1031 struct btrfs_key found_key;
1032 struct btrfs_block_group_cache *cache;
1033
1034 path = btrfs_alloc_path();
1035 if (!path)
1036 return -ENOMEM;
1037
1038 path->reada = 2;
1039 path->search_commit_root = 1;
1040 path->skip_locking = 1;
1041
1042 key.objectid = sdev->dev->devid;
1043 key.offset = 0ull;
1044 key.type = BTRFS_DEV_EXTENT_KEY;
1045
1046
1047 while (1) {
1048 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1049 if (ret < 0)
1050 goto out;
1051 ret = 0;
1052
1053 l = path->nodes[0];
1054 slot = path->slots[0];
1055
1056 btrfs_item_key_to_cpu(l, &found_key, slot);
1057
1058 if (found_key.objectid != sdev->dev->devid)
1059 break;
1060
1061 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
1062 break;
1063
1064 if (found_key.offset >= end)
1065 break;
1066
1067 if (found_key.offset < key.offset)
1068 break;
1069
1070 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1071 length = btrfs_dev_extent_length(l, dev_extent);
1072
1073 if (found_key.offset + length <= start) {
1074 key.offset = found_key.offset + length;
1075 btrfs_release_path(path);
1076 continue;
1077 }
1078
1079 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1080 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1081 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1082
1083 /*
1084 * get a reference on the corresponding block group to prevent
1085 * the chunk from going away while we scrub it
1086 */
1087 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1088 if (!cache) {
1089 ret = -ENOENT;
1090 goto out;
1091 }
1092 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1093 chunk_offset, length);
1094 btrfs_put_block_group(cache);
1095 if (ret)
1096 break;
1097
1098 key.offset = found_key.offset + length;
1099 btrfs_release_path(path);
1100 }
1101
1102out:
1103 btrfs_free_path(path);
1104 return ret;
1105}
1106
1107static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1108{
1109 int i;
1110 u64 bytenr;
1111 u64 gen;
1112 int ret;
1113 struct btrfs_device *device = sdev->dev;
1114 struct btrfs_root *root = device->dev_root;
1115
1116 gen = root->fs_info->last_trans_committed;
1117
1118 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1119 bytenr = btrfs_sb_offset(i);
1120 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
1121 break;
1122
1123 ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr,
1124 BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
1125 if (ret)
1126 return ret;
1127 }
1128 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1129
1130 return 0;
1131}
1132
1133/*
1134 * get a reference count on fs_info->scrub_workers. start worker if necessary
1135 */
1136static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1137{
1138 struct btrfs_fs_info *fs_info = root->fs_info;
1139
1140 mutex_lock(&fs_info->scrub_lock);
1141 if (fs_info->scrub_workers_refcnt == 0)
1142 btrfs_start_workers(&fs_info->scrub_workers, 1);
1143 ++fs_info->scrub_workers_refcnt;
1144 mutex_unlock(&fs_info->scrub_lock);
1145
1146 return 0;
1147}
1148
1149static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
1150{
1151 struct btrfs_fs_info *fs_info = root->fs_info;
1152
1153 mutex_lock(&fs_info->scrub_lock);
1154 if (--fs_info->scrub_workers_refcnt == 0)
1155 btrfs_stop_workers(&fs_info->scrub_workers);
1156 WARN_ON(fs_info->scrub_workers_refcnt < 0);
1157 mutex_unlock(&fs_info->scrub_lock);
1158}
1159
1160
1161int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1162 struct btrfs_scrub_progress *progress, int readonly)
1163{
1164 struct scrub_dev *sdev;
1165 struct btrfs_fs_info *fs_info = root->fs_info;
1166 int ret;
1167 struct btrfs_device *dev;
1168
1169 if (root->fs_info->closing)
1170 return -EINVAL;
1171
1172 /*
1173 * check some assumptions
1174 */
1175 if (root->sectorsize != PAGE_SIZE ||
1176 root->sectorsize != root->leafsize ||
1177 root->sectorsize != root->nodesize) {
1178 printk(KERN_ERR "btrfs_scrub: size assumptions fail\n");
1179 return -EINVAL;
1180 }
1181
1182 ret = scrub_workers_get(root);
1183 if (ret)
1184 return ret;
1185
1186 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1187 dev = btrfs_find_device(root, devid, NULL, NULL);
1188 if (!dev || dev->missing) {
1189 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1190 scrub_workers_put(root);
1191 return -ENODEV;
1192 }
1193 mutex_lock(&fs_info->scrub_lock);
1194
1195 if (!dev->in_fs_metadata) {
1196 mutex_unlock(&fs_info->scrub_lock);
1197 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1198 scrub_workers_put(root);
1199 return -ENODEV;
1200 }
1201
1202 if (dev->scrub_device) {
1203 mutex_unlock(&fs_info->scrub_lock);
1204 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1205 scrub_workers_put(root);
1206 return -EINPROGRESS;
1207 }
1208 sdev = scrub_setup_dev(dev);
1209 if (IS_ERR(sdev)) {
1210 mutex_unlock(&fs_info->scrub_lock);
1211 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1212 scrub_workers_put(root);
1213 return PTR_ERR(sdev);
1214 }
1215 sdev->readonly = readonly;
1216 dev->scrub_device = sdev;
1217
1218 atomic_inc(&fs_info->scrubs_running);
1219 mutex_unlock(&fs_info->scrub_lock);
1220 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1221
1222 down_read(&fs_info->scrub_super_lock);
1223 ret = scrub_supers(sdev);
1224 up_read(&fs_info->scrub_super_lock);
1225
1226 if (!ret)
1227 ret = scrub_enumerate_chunks(sdev, start, end);
1228
1229 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1230
1231 atomic_dec(&fs_info->scrubs_running);
1232 wake_up(&fs_info->scrub_pause_wait);
1233
1234 if (progress)
1235 memcpy(progress, &sdev->stat, sizeof(*progress));
1236
1237 mutex_lock(&fs_info->scrub_lock);
1238 dev->scrub_device = NULL;
1239 mutex_unlock(&fs_info->scrub_lock);
1240
1241 scrub_free_dev(sdev);
1242 scrub_workers_put(root);
1243
1244 return ret;
1245}
1246
1247int btrfs_scrub_pause(struct btrfs_root *root)
1248{
1249 struct btrfs_fs_info *fs_info = root->fs_info;
1250
1251 mutex_lock(&fs_info->scrub_lock);
1252 atomic_inc(&fs_info->scrub_pause_req);
1253 while (atomic_read(&fs_info->scrubs_paused) !=
1254 atomic_read(&fs_info->scrubs_running)) {
1255 mutex_unlock(&fs_info->scrub_lock);
1256 wait_event(fs_info->scrub_pause_wait,
1257 atomic_read(&fs_info->scrubs_paused) ==
1258 atomic_read(&fs_info->scrubs_running));
1259 mutex_lock(&fs_info->scrub_lock);
1260 }
1261 mutex_unlock(&fs_info->scrub_lock);
1262
1263 return 0;
1264}
1265
1266int btrfs_scrub_continue(struct btrfs_root *root)
1267{
1268 struct btrfs_fs_info *fs_info = root->fs_info;
1269
1270 atomic_dec(&fs_info->scrub_pause_req);
1271 wake_up(&fs_info->scrub_pause_wait);
1272 return 0;
1273}
1274
1275int btrfs_scrub_pause_super(struct btrfs_root *root)
1276{
1277 down_write(&root->fs_info->scrub_super_lock);
1278 return 0;
1279}
1280
1281int btrfs_scrub_continue_super(struct btrfs_root *root)
1282{
1283 up_write(&root->fs_info->scrub_super_lock);
1284 return 0;
1285}
1286
1287int btrfs_scrub_cancel(struct btrfs_root *root)
1288{
1289 struct btrfs_fs_info *fs_info = root->fs_info;
1290
1291 mutex_lock(&fs_info->scrub_lock);
1292 if (!atomic_read(&fs_info->scrubs_running)) {
1293 mutex_unlock(&fs_info->scrub_lock);
1294 return -ENOTCONN;
1295 }
1296
1297 atomic_inc(&fs_info->scrub_cancel_req);
1298 while (atomic_read(&fs_info->scrubs_running)) {
1299 mutex_unlock(&fs_info->scrub_lock);
1300 wait_event(fs_info->scrub_pause_wait,
1301 atomic_read(&fs_info->scrubs_running) == 0);
1302 mutex_lock(&fs_info->scrub_lock);
1303 }
1304 atomic_dec(&fs_info->scrub_cancel_req);
1305 mutex_unlock(&fs_info->scrub_lock);
1306
1307 return 0;
1308}
1309
1310int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
1311{
1312 struct btrfs_fs_info *fs_info = root->fs_info;
1313 struct scrub_dev *sdev;
1314
1315 mutex_lock(&fs_info->scrub_lock);
1316 sdev = dev->scrub_device;
1317 if (!sdev) {
1318 mutex_unlock(&fs_info->scrub_lock);
1319 return -ENOTCONN;
1320 }
1321 atomic_inc(&sdev->cancel_req);
1322 while (dev->scrub_device) {
1323 mutex_unlock(&fs_info->scrub_lock);
1324 wait_event(fs_info->scrub_pause_wait,
1325 dev->scrub_device == NULL);
1326 mutex_lock(&fs_info->scrub_lock);
1327 }
1328 mutex_unlock(&fs_info->scrub_lock);
1329
1330 return 0;
1331}
1332int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
1333{
1334 struct btrfs_fs_info *fs_info = root->fs_info;
1335 struct btrfs_device *dev;
1336 int ret;
1337
1338 /*
1339 * we have to hold the device_list_mutex here so the device
1340 * does not go away in cancel_dev. FIXME: find a better solution
1341 */
1342 mutex_lock(&fs_info->fs_devices->device_list_mutex);
1343 dev = btrfs_find_device(root, devid, NULL, NULL);
1344 if (!dev) {
1345 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1346 return -ENODEV;
1347 }
1348 ret = btrfs_scrub_cancel_dev(root, dev);
1349 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1350
1351 return ret;
1352}
1353
1354int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
1355 struct btrfs_scrub_progress *progress)
1356{
1357 struct btrfs_device *dev;
1358 struct scrub_dev *sdev = NULL;
1359
1360 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1361 dev = btrfs_find_device(root, devid, NULL, NULL);
1362 if (dev)
1363 sdev = dev->scrub_device;
1364 if (sdev)
1365 memcpy(progress, &sdev->stat, sizeof(*progress));
1366 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1367
1368 return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
1369}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0ac712efcdf2..28e3cb2607ff 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -40,6 +40,7 @@
40#include <linux/magic.h> 40#include <linux/magic.h>
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include "compat.h" 42#include "compat.h"
43#include "delayed-inode.h"
43#include "ctree.h" 44#include "ctree.h"
44#include "disk-io.h" 45#include "disk-io.h"
45#include "transaction.h" 46#include "transaction.h"
@@ -159,7 +160,7 @@ enum {
159 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 160 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
160 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 161 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
161 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, 162 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
162 Opt_enospc_debug, Opt_subvolrootid, Opt_err, 163 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_err,
163}; 164};
164 165
165static match_table_t tokens = { 166static match_table_t tokens = {
@@ -190,6 +191,7 @@ static match_table_t tokens = {
190 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 191 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
191 {Opt_enospc_debug, "enospc_debug"}, 192 {Opt_enospc_debug, "enospc_debug"},
192 {Opt_subvolrootid, "subvolrootid=%d"}, 193 {Opt_subvolrootid, "subvolrootid=%d"},
194 {Opt_defrag, "autodefrag"},
193 {Opt_err, NULL}, 195 {Opt_err, NULL},
194}; 196};
195 197
@@ -368,6 +370,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
368 case Opt_enospc_debug: 370 case Opt_enospc_debug:
369 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); 371 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
370 break; 372 break;
373 case Opt_defrag:
374 printk(KERN_INFO "btrfs: enabling auto defrag");
375 btrfs_set_opt(info->mount_opt, AUTO_DEFRAG);
376 break;
371 case Opt_err: 377 case Opt_err:
372 printk(KERN_INFO "btrfs: unrecognized mount option " 378 printk(KERN_INFO "btrfs: unrecognized mount option "
373 "'%s'\n", p); 379 "'%s'\n", p);
@@ -506,8 +512,10 @@ static struct dentry *get_default_root(struct super_block *sb,
506 */ 512 */
507 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); 513 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
508 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); 514 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
509 if (IS_ERR(di)) 515 if (IS_ERR(di)) {
516 btrfs_free_path(path);
510 return ERR_CAST(di); 517 return ERR_CAST(di);
518 }
511 if (!di) { 519 if (!di) {
512 /* 520 /*
513 * Ok the default dir item isn't there. This is weird since 521 * Ok the default dir item isn't there. This is weird since
@@ -739,7 +747,7 @@ static int btrfs_set_super(struct super_block *s, void *data)
739 * for multiple device setup. Make sure to keep it in sync. 747 * for multiple device setup. Make sure to keep it in sync.
740 */ 748 */
741static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, 749static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
742 const char *dev_name, void *data) 750 const char *device_name, void *data)
743{ 751{
744 struct block_device *bdev = NULL; 752 struct block_device *bdev = NULL;
745 struct super_block *s; 753 struct super_block *s;
@@ -762,7 +770,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
762 if (error) 770 if (error)
763 return ERR_PTR(error); 771 return ERR_PTR(error);
764 772
765 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices); 773 error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
766 if (error) 774 if (error)
767 goto error_free_subvol_name; 775 goto error_free_subvol_name;
768 776
@@ -913,6 +921,32 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
913 return 0; 921 return 0;
914} 922}
915 923
924/* Used to sort the devices by max_avail(descending sort) */
925static int btrfs_cmp_device_free_bytes(const void *dev_info1,
926 const void *dev_info2)
927{
928 if (((struct btrfs_device_info *)dev_info1)->max_avail >
929 ((struct btrfs_device_info *)dev_info2)->max_avail)
930 return -1;
931 else if (((struct btrfs_device_info *)dev_info1)->max_avail <
932 ((struct btrfs_device_info *)dev_info2)->max_avail)
933 return 1;
934 else
935 return 0;
936}
937
938/*
939 * sort the devices by max_avail, in which max free extent size of each device
940 * is stored.(Descending Sort)
941 */
942static inline void btrfs_descending_sort_devices(
943 struct btrfs_device_info *devices,
944 size_t nr_devices)
945{
946 sort(devices, nr_devices, sizeof(struct btrfs_device_info),
947 btrfs_cmp_device_free_bytes, NULL);
948}
949
916/* 950/*
917 * The helper to calc the free space on the devices that can be used to store 951 * The helper to calc the free space on the devices that can be used to store
918 * file data. 952 * file data.
@@ -1206,10 +1240,14 @@ static int __init init_btrfs_fs(void)
1206 if (err) 1240 if (err)
1207 goto free_extent_io; 1241 goto free_extent_io;
1208 1242
1209 err = btrfs_interface_init(); 1243 err = btrfs_delayed_inode_init();
1210 if (err) 1244 if (err)
1211 goto free_extent_map; 1245 goto free_extent_map;
1212 1246
1247 err = btrfs_interface_init();
1248 if (err)
1249 goto free_delayed_inode;
1250
1213 err = register_filesystem(&btrfs_fs_type); 1251 err = register_filesystem(&btrfs_fs_type);
1214 if (err) 1252 if (err)
1215 goto unregister_ioctl; 1253 goto unregister_ioctl;
@@ -1219,6 +1257,8 @@ static int __init init_btrfs_fs(void)
1219 1257
1220unregister_ioctl: 1258unregister_ioctl:
1221 btrfs_interface_exit(); 1259 btrfs_interface_exit();
1260free_delayed_inode:
1261 btrfs_delayed_inode_exit();
1222free_extent_map: 1262free_extent_map:
1223 extent_map_exit(); 1263 extent_map_exit();
1224free_extent_io: 1264free_extent_io:
@@ -1235,6 +1275,7 @@ free_sysfs:
1235static void __exit exit_btrfs_fs(void) 1275static void __exit exit_btrfs_fs(void)
1236{ 1276{
1237 btrfs_destroy_cachep(); 1277 btrfs_destroy_cachep();
1278 btrfs_delayed_inode_exit();
1238 extent_map_exit(); 1279 extent_map_exit();
1239 extent_io_exit(); 1280 extent_io_exit();
1240 btrfs_interface_exit(); 1281 btrfs_interface_exit();
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 4ce16ef702a3..c3c223ae6691 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -174,86 +174,9 @@ static const struct sysfs_ops btrfs_root_attr_ops = {
174 .store = btrfs_root_attr_store, 174 .store = btrfs_root_attr_store,
175}; 175};
176 176
177static struct kobj_type btrfs_root_ktype = {
178 .default_attrs = btrfs_root_attrs,
179 .sysfs_ops = &btrfs_root_attr_ops,
180 .release = btrfs_root_release,
181};
182
183static struct kobj_type btrfs_super_ktype = {
184 .default_attrs = btrfs_super_attrs,
185 .sysfs_ops = &btrfs_super_attr_ops,
186 .release = btrfs_super_release,
187};
188
189/* /sys/fs/btrfs/ entry */ 177/* /sys/fs/btrfs/ entry */
190static struct kset *btrfs_kset; 178static struct kset *btrfs_kset;
191 179
192int btrfs_sysfs_add_super(struct btrfs_fs_info *fs)
193{
194 int error;
195 char *name;
196 char c;
197 int len = strlen(fs->sb->s_id) + 1;
198 int i;
199
200 name = kmalloc(len, GFP_NOFS);
201 if (!name) {
202 error = -ENOMEM;
203 goto fail;
204 }
205
206 for (i = 0; i < len; i++) {
207 c = fs->sb->s_id[i];
208 if (c == '/' || c == '\\')
209 c = '!';
210 name[i] = c;
211 }
212 name[len] = '\0';
213
214 fs->super_kobj.kset = btrfs_kset;
215 error = kobject_init_and_add(&fs->super_kobj, &btrfs_super_ktype,
216 NULL, "%s", name);
217 kfree(name);
218 if (error)
219 goto fail;
220
221 return 0;
222
223fail:
224 printk(KERN_ERR "btrfs: sysfs creation for super failed\n");
225 return error;
226}
227
228int btrfs_sysfs_add_root(struct btrfs_root *root)
229{
230 int error;
231
232 error = kobject_init_and_add(&root->root_kobj, &btrfs_root_ktype,
233 &root->fs_info->super_kobj,
234 "%s", root->name);
235 if (error)
236 goto fail;
237
238 return 0;
239
240fail:
241 printk(KERN_ERR "btrfs: sysfs creation for root failed\n");
242 return error;
243}
244
245void btrfs_sysfs_del_root(struct btrfs_root *root)
246{
247 kobject_put(&root->root_kobj);
248 wait_for_completion(&root->kobj_unregister);
249}
250
251void btrfs_sysfs_del_super(struct btrfs_fs_info *fs)
252{
253 kobject_put(&fs->super_kobj);
254 wait_for_completion(&fs->kobj_unregister);
255}
256
257int btrfs_init_sysfs(void) 180int btrfs_init_sysfs(void)
258{ 181{
259 btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); 182 btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f4ea695325b2..2d5c6d2aa4e4 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -27,6 +27,7 @@
27#include "transaction.h" 27#include "transaction.h"
28#include "locking.h" 28#include "locking.h"
29#include "tree-log.h" 29#include "tree-log.h"
30#include "inode-map.h"
30 31
31#define BTRFS_ROOT_TRANS_TAG 0 32#define BTRFS_ROOT_TRANS_TAG 0
32 33
@@ -110,8 +111,7 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
110 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 111 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
111 list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 112 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
112 extent_io_tree_init(&cur_trans->dirty_pages, 113 extent_io_tree_init(&cur_trans->dirty_pages,
113 root->fs_info->btree_inode->i_mapping, 114 root->fs_info->btree_inode->i_mapping);
114 GFP_NOFS);
115 root->fs_info->generation++; 115 root->fs_info->generation++;
116 cur_trans->transid = root->fs_info->generation; 116 cur_trans->transid = root->fs_info->generation;
117 root->fs_info->running_transaction = cur_trans; 117 root->fs_info->running_transaction = cur_trans;
@@ -368,49 +368,6 @@ out:
368 return ret; 368 return ret;
369} 369}
370 370
371#if 0
372/*
373 * rate limit against the drop_snapshot code. This helps to slow down new
374 * operations if the drop_snapshot code isn't able to keep up.
375 */
376static void throttle_on_drops(struct btrfs_root *root)
377{
378 struct btrfs_fs_info *info = root->fs_info;
379 int harder_count = 0;
380
381harder:
382 if (atomic_read(&info->throttles)) {
383 DEFINE_WAIT(wait);
384 int thr;
385 thr = atomic_read(&info->throttle_gen);
386
387 do {
388 prepare_to_wait(&info->transaction_throttle,
389 &wait, TASK_UNINTERRUPTIBLE);
390 if (!atomic_read(&info->throttles)) {
391 finish_wait(&info->transaction_throttle, &wait);
392 break;
393 }
394 schedule();
395 finish_wait(&info->transaction_throttle, &wait);
396 } while (thr == atomic_read(&info->throttle_gen));
397 harder_count++;
398
399 if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 &&
400 harder_count < 2)
401 goto harder;
402
403 if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 &&
404 harder_count < 10)
405 goto harder;
406
407 if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 &&
408 harder_count < 20)
409 goto harder;
410 }
411}
412#endif
413
414void btrfs_throttle(struct btrfs_root *root) 371void btrfs_throttle(struct btrfs_root *root)
415{ 372{
416 if (!atomic_read(&root->fs_info->open_ioctl_trans)) 373 if (!atomic_read(&root->fs_info->open_ioctl_trans))
@@ -514,19 +471,40 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
514int btrfs_end_transaction(struct btrfs_trans_handle *trans, 471int btrfs_end_transaction(struct btrfs_trans_handle *trans,
515 struct btrfs_root *root) 472 struct btrfs_root *root)
516{ 473{
517 return __btrfs_end_transaction(trans, root, 0, 1); 474 int ret;
475
476 ret = __btrfs_end_transaction(trans, root, 0, 1);
477 if (ret)
478 return ret;
479 return 0;
518} 480}
519 481
520int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 482int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
521 struct btrfs_root *root) 483 struct btrfs_root *root)
522{ 484{
523 return __btrfs_end_transaction(trans, root, 1, 1); 485 int ret;
486
487 ret = __btrfs_end_transaction(trans, root, 1, 1);
488 if (ret)
489 return ret;
490 return 0;
524} 491}
525 492
526int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, 493int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
527 struct btrfs_root *root) 494 struct btrfs_root *root)
528{ 495{
529 return __btrfs_end_transaction(trans, root, 0, 0); 496 int ret;
497
498 ret = __btrfs_end_transaction(trans, root, 0, 0);
499 if (ret)
500 return ret;
501 return 0;
502}
503
504int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
505 struct btrfs_root *root)
506{
507 return __btrfs_end_transaction(trans, root, 1, 1);
530} 508}
531 509
532/* 510/*
@@ -789,8 +767,14 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
789 btrfs_update_reloc_root(trans, root); 767 btrfs_update_reloc_root(trans, root);
790 btrfs_orphan_commit_root(trans, root); 768 btrfs_orphan_commit_root(trans, root);
791 769
770 btrfs_save_ino_cache(root, trans);
771
792 if (root->commit_root != root->node) { 772 if (root->commit_root != root->node) {
773 mutex_lock(&root->fs_commit_mutex);
793 switch_commit_root(root); 774 switch_commit_root(root);
775 btrfs_unpin_free_ino(root);
776 mutex_unlock(&root->fs_commit_mutex);
777
794 btrfs_set_root_node(&root->root_item, 778 btrfs_set_root_node(&root->root_item,
795 root->node); 779 root->node);
796 } 780 }
@@ -840,97 +824,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
840 return ret; 824 return ret;
841} 825}
842 826
843#if 0
844/*
845 * when dropping snapshots, we generate a ton of delayed refs, and it makes
846 * sense not to join the transaction while it is trying to flush the current
847 * queue of delayed refs out.
848 *
849 * This is used by the drop snapshot code only
850 */
851static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
852{
853 DEFINE_WAIT(wait);
854
855 mutex_lock(&info->trans_mutex);
856 while (info->running_transaction &&
857 info->running_transaction->delayed_refs.flushing) {
858 prepare_to_wait(&info->transaction_wait, &wait,
859 TASK_UNINTERRUPTIBLE);
860 mutex_unlock(&info->trans_mutex);
861
862 schedule();
863
864 mutex_lock(&info->trans_mutex);
865 finish_wait(&info->transaction_wait, &wait);
866 }
867 mutex_unlock(&info->trans_mutex);
868 return 0;
869}
870
871/*
872 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
873 * all of them
874 */
875int btrfs_drop_dead_root(struct btrfs_root *root)
876{
877 struct btrfs_trans_handle *trans;
878 struct btrfs_root *tree_root = root->fs_info->tree_root;
879 unsigned long nr;
880 int ret;
881
882 while (1) {
883 /*
884 * we don't want to jump in and create a bunch of
885 * delayed refs if the transaction is starting to close
886 */
887 wait_transaction_pre_flush(tree_root->fs_info);
888 trans = btrfs_start_transaction(tree_root, 1);
889
890 /*
891 * we've joined a transaction, make sure it isn't
892 * closing right now
893 */
894 if (trans->transaction->delayed_refs.flushing) {
895 btrfs_end_transaction(trans, tree_root);
896 continue;
897 }
898
899 ret = btrfs_drop_snapshot(trans, root);
900 if (ret != -EAGAIN)
901 break;
902
903 ret = btrfs_update_root(trans, tree_root,
904 &root->root_key,
905 &root->root_item);
906 if (ret)
907 break;
908
909 nr = trans->blocks_used;
910 ret = btrfs_end_transaction(trans, tree_root);
911 BUG_ON(ret);
912
913 btrfs_btree_balance_dirty(tree_root, nr);
914 cond_resched();
915 }
916 BUG_ON(ret);
917
918 ret = btrfs_del_root(trans, tree_root, &root->root_key);
919 BUG_ON(ret);
920
921 nr = trans->blocks_used;
922 ret = btrfs_end_transaction(trans, tree_root);
923 BUG_ON(ret);
924
925 free_extent_buffer(root->node);
926 free_extent_buffer(root->commit_root);
927 kfree(root);
928
929 btrfs_btree_balance_dirty(tree_root, nr);
930 return ret;
931}
932#endif
933
934/* 827/*
935 * new snapshots need to be created at a very specific time in the 828 * new snapshots need to be created at a very specific time in the
936 * transaction commit. This does the actual creation 829 * transaction commit. This does the actual creation
@@ -961,7 +854,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
961 goto fail; 854 goto fail;
962 } 855 }
963 856
964 ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); 857 ret = btrfs_find_free_objectid(tree_root, &objectid);
965 if (ret) { 858 if (ret) {
966 pending->error = ret; 859 pending->error = ret;
967 goto fail; 860 goto fail;
@@ -998,7 +891,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
998 BUG_ON(ret); 891 BUG_ON(ret);
999 ret = btrfs_insert_dir_item(trans, parent_root, 892 ret = btrfs_insert_dir_item(trans, parent_root,
1000 dentry->d_name.name, dentry->d_name.len, 893 dentry->d_name.name, dentry->d_name.len,
1001 parent_inode->i_ino, &key, 894 parent_inode, &key,
1002 BTRFS_FT_DIR, index); 895 BTRFS_FT_DIR, index);
1003 BUG_ON(ret); 896 BUG_ON(ret);
1004 897
@@ -1040,7 +933,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1040 */ 933 */
1041 ret = btrfs_add_root_ref(trans, tree_root, objectid, 934 ret = btrfs_add_root_ref(trans, tree_root, objectid,
1042 parent_root->root_key.objectid, 935 parent_root->root_key.objectid,
1043 parent_inode->i_ino, index, 936 btrfs_ino(parent_inode), index,
1044 dentry->d_name.name, dentry->d_name.len); 937 dentry->d_name.name, dentry->d_name.len);
1045 BUG_ON(ret); 938 BUG_ON(ret);
1046 dput(parent); 939 dput(parent);
@@ -1068,6 +961,14 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
1068 int ret; 961 int ret;
1069 962
1070 list_for_each_entry(pending, head, list) { 963 list_for_each_entry(pending, head, list) {
964 /*
965 * We must deal with the delayed items before creating
966 * snapshots, or we will create a snapthot with inconsistent
967 * information.
968 */
969 ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
970 BUG_ON(ret);
971
1071 ret = create_pending_snapshot(trans, fs_info, pending); 972 ret = create_pending_snapshot(trans, fs_info, pending);
1072 BUG_ON(ret); 973 BUG_ON(ret);
1073 } 974 }
@@ -1316,6 +1217,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1316 BUG_ON(ret); 1217 BUG_ON(ret);
1317 } 1218 }
1318 1219
1220 ret = btrfs_run_delayed_items(trans, root);
1221 BUG_ON(ret);
1222
1319 /* 1223 /*
1320 * rename don't use btrfs_join_transaction, so, once we 1224 * rename don't use btrfs_join_transaction, so, once we
1321 * set the transaction to blocked above, we aren't going 1225 * set the transaction to blocked above, we aren't going
@@ -1343,11 +1247,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1343 ret = create_pending_snapshots(trans, root->fs_info); 1247 ret = create_pending_snapshots(trans, root->fs_info);
1344 BUG_ON(ret); 1248 BUG_ON(ret);
1345 1249
1250 ret = btrfs_run_delayed_items(trans, root);
1251 BUG_ON(ret);
1252
1346 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1253 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1347 BUG_ON(ret); 1254 BUG_ON(ret);
1348 1255
1349 WARN_ON(cur_trans != trans->transaction); 1256 WARN_ON(cur_trans != trans->transaction);
1350 1257
1258 btrfs_scrub_pause(root);
1351 /* btrfs_commit_tree_roots is responsible for getting the 1259 /* btrfs_commit_tree_roots is responsible for getting the
1352 * various roots consistent with each other. Every pointer 1260 * various roots consistent with each other. Every pointer
1353 * in the tree of tree roots has to point to the most up to date 1261 * in the tree of tree roots has to point to the most up to date
@@ -1431,6 +1339,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1431 1339
1432 trace_btrfs_transaction_commit(root); 1340 trace_btrfs_transaction_commit(root);
1433 1341
1342 btrfs_scrub_continue(root);
1343
1434 if (current->journal_info == trans) 1344 if (current->journal_info == trans)
1435 current->journal_info = NULL; 1345 current->journal_info = NULL;
1436 1346
@@ -1458,6 +1368,8 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1458 root = list_entry(list.next, struct btrfs_root, root_list); 1368 root = list_entry(list.next, struct btrfs_root, root_list);
1459 list_del(&root->root_list); 1369 list_del(&root->root_list);
1460 1370
1371 btrfs_kill_all_delayed_nodes(root);
1372
1461 if (btrfs_header_backref_rev(root->node) < 1373 if (btrfs_header_backref_rev(root->node) <
1462 BTRFS_MIXED_BACKREF_REV) 1374 BTRFS_MIXED_BACKREF_REV)
1463 btrfs_drop_snapshot(root, NULL, 0); 1375 btrfs_drop_snapshot(root, NULL, 0);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index da7289e06a82..02564e6230ac 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -88,11 +88,8 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
88int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); 88int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
89int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 89int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
90 struct btrfs_root *root); 90 struct btrfs_root *root);
91int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
92 struct btrfs_root *root);
93 91
94int btrfs_add_dead_root(struct btrfs_root *root); 92int btrfs_add_dead_root(struct btrfs_root *root);
95int btrfs_drop_dead_root(struct btrfs_root *root);
96int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); 93int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
97int btrfs_clean_old_snapshots(struct btrfs_root *root); 94int btrfs_clean_old_snapshots(struct btrfs_root *root);
98int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 95int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -102,6 +99,8 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
102 int wait_for_unblock); 99 int wait_for_unblock);
103int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 100int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
104 struct btrfs_root *root); 101 struct btrfs_root *root);
102int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root);
105int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, 104int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
106 struct btrfs_root *root); 105 struct btrfs_root *root);
107void btrfs_throttle(struct btrfs_root *root); 106void btrfs_throttle(struct btrfs_root *root);
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 992ab425599d..3b580ee8ab1d 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -97,7 +97,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
97 ret = 0; 97 ret = 0;
98 goto out; 98 goto out;
99 } 99 }
100 btrfs_release_path(root, path); 100 btrfs_release_path(path);
101 wret = btrfs_search_slot(trans, root, &key, path, 0, 1); 101 wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
102 102
103 if (wret < 0) { 103 if (wret < 0) {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f997ec0c1ba4..592396c6dc47 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -333,13 +333,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
333 goto insert; 333 goto insert;
334 334
335 if (item_size == 0) { 335 if (item_size == 0) {
336 btrfs_release_path(root, path); 336 btrfs_release_path(path);
337 return 0; 337 return 0;
338 } 338 }
339 dst_copy = kmalloc(item_size, GFP_NOFS); 339 dst_copy = kmalloc(item_size, GFP_NOFS);
340 src_copy = kmalloc(item_size, GFP_NOFS); 340 src_copy = kmalloc(item_size, GFP_NOFS);
341 if (!dst_copy || !src_copy) { 341 if (!dst_copy || !src_copy) {
342 btrfs_release_path(root, path); 342 btrfs_release_path(path);
343 kfree(dst_copy); 343 kfree(dst_copy);
344 kfree(src_copy); 344 kfree(src_copy);
345 return -ENOMEM; 345 return -ENOMEM;
@@ -361,13 +361,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
361 * sync 361 * sync
362 */ 362 */
363 if (ret == 0) { 363 if (ret == 0) {
364 btrfs_release_path(root, path); 364 btrfs_release_path(path);
365 return 0; 365 return 0;
366 } 366 }
367 367
368 } 368 }
369insert: 369insert:
370 btrfs_release_path(root, path); 370 btrfs_release_path(path);
371 /* try to insert the key into the destination tree */ 371 /* try to insert the key into the destination tree */
372 ret = btrfs_insert_empty_item(trans, root, path, 372 ret = btrfs_insert_empty_item(trans, root, path,
373 key, item_size); 373 key, item_size);
@@ -382,7 +382,6 @@ insert:
382 } else if (found_size < item_size) { 382 } else if (found_size < item_size) {
383 ret = btrfs_extend_item(trans, root, path, 383 ret = btrfs_extend_item(trans, root, path,
384 item_size - found_size); 384 item_size - found_size);
385 BUG_ON(ret);
386 } 385 }
387 } else if (ret) { 386 } else if (ret) {
388 return ret; 387 return ret;
@@ -438,7 +437,7 @@ insert:
438 } 437 }
439no_copy: 438no_copy:
440 btrfs_mark_buffer_dirty(path->nodes[0]); 439 btrfs_mark_buffer_dirty(path->nodes[0]);
441 btrfs_release_path(root, path); 440 btrfs_release_path(path);
442 return 0; 441 return 0;
443} 442}
444 443
@@ -519,7 +518,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
519 * file. This must be done before the btrfs_drop_extents run 518 * file. This must be done before the btrfs_drop_extents run
520 * so we don't try to drop this extent. 519 * so we don't try to drop this extent.
521 */ 520 */
522 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 521 ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
523 start, 0); 522 start, 0);
524 523
525 if (ret == 0 && 524 if (ret == 0 &&
@@ -544,11 +543,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
544 * we don't have to do anything 543 * we don't have to do anything
545 */ 544 */
546 if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { 545 if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) {
547 btrfs_release_path(root, path); 546 btrfs_release_path(path);
548 goto out; 547 goto out;
549 } 548 }
550 } 549 }
551 btrfs_release_path(root, path); 550 btrfs_release_path(path);
552 551
553 saved_nbytes = inode_get_bytes(inode); 552 saved_nbytes = inode_get_bytes(inode);
554 /* drop any overlapping extents */ 553 /* drop any overlapping extents */
@@ -590,6 +589,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
590 ins.objectid, ins.offset, 589 ins.objectid, ins.offset,
591 0, root->root_key.objectid, 590 0, root->root_key.objectid,
592 key->objectid, offset); 591 key->objectid, offset);
592 BUG_ON(ret);
593 } else { 593 } else {
594 /* 594 /*
595 * insert the extent pointer in the extent 595 * insert the extent pointer in the extent
@@ -600,7 +600,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
600 key->objectid, offset, &ins); 600 key->objectid, offset, &ins);
601 BUG_ON(ret); 601 BUG_ON(ret);
602 } 602 }
603 btrfs_release_path(root, path); 603 btrfs_release_path(path);
604 604
605 if (btrfs_file_extent_compression(eb, item)) { 605 if (btrfs_file_extent_compression(eb, item)) {
606 csum_start = ins.objectid; 606 csum_start = ins.objectid;
@@ -614,7 +614,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
614 614
615 ret = btrfs_lookup_csums_range(root->log_root, 615 ret = btrfs_lookup_csums_range(root->log_root,
616 csum_start, csum_end - 1, 616 csum_start, csum_end - 1,
617 &ordered_sums); 617 &ordered_sums, 0);
618 BUG_ON(ret); 618 BUG_ON(ret);
619 while (!list_empty(&ordered_sums)) { 619 while (!list_empty(&ordered_sums)) {
620 struct btrfs_ordered_sum *sums; 620 struct btrfs_ordered_sum *sums;
@@ -629,7 +629,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
629 kfree(sums); 629 kfree(sums);
630 } 630 }
631 } else { 631 } else {
632 btrfs_release_path(root, path); 632 btrfs_release_path(path);
633 } 633 }
634 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 634 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
635 /* inline extents are easy, we just overwrite them */ 635 /* inline extents are easy, we just overwrite them */
@@ -675,10 +675,13 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
675 return -ENOMEM; 675 return -ENOMEM;
676 676
677 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); 677 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
678 btrfs_release_path(root, path); 678 btrfs_release_path(path);
679 679
680 inode = read_one_inode(root, location.objectid); 680 inode = read_one_inode(root, location.objectid);
681 BUG_ON(!inode); 681 if (!inode) {
682 kfree(name);
683 return -EIO;
684 }
682 685
683 ret = link_to_fixup_dir(trans, root, path, location.objectid); 686 ret = link_to_fixup_dir(trans, root, path, location.objectid);
684 BUG_ON(ret); 687 BUG_ON(ret);
@@ -713,7 +716,7 @@ static noinline int inode_in_dir(struct btrfs_root *root,
713 goto out; 716 goto out;
714 } else 717 } else
715 goto out; 718 goto out;
716 btrfs_release_path(root, path); 719 btrfs_release_path(path);
717 720
718 di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); 721 di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
719 if (di && !IS_ERR(di)) { 722 if (di && !IS_ERR(di)) {
@@ -724,7 +727,7 @@ static noinline int inode_in_dir(struct btrfs_root *root,
724 goto out; 727 goto out;
725 match = 1; 728 match = 1;
726out: 729out:
727 btrfs_release_path(root, path); 730 btrfs_release_path(path);
728 return match; 731 return match;
729} 732}
730 733
@@ -817,7 +820,10 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
817 return -ENOENT; 820 return -ENOENT;
818 821
819 inode = read_one_inode(root, key->objectid); 822 inode = read_one_inode(root, key->objectid);
820 BUG_ON(!inode); 823 if (!inode) {
824 iput(dir);
825 return -EIO;
826 }
821 827
822 ref_ptr = btrfs_item_ptr_offset(eb, slot); 828 ref_ptr = btrfs_item_ptr_offset(eb, slot);
823 ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); 829 ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
@@ -832,7 +838,7 @@ again:
832 read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); 838 read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen);
833 839
834 /* if we already have a perfect match, we're done */ 840 /* if we already have a perfect match, we're done */
835 if (inode_in_dir(root, path, dir->i_ino, inode->i_ino, 841 if (inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
836 btrfs_inode_ref_index(eb, ref), 842 btrfs_inode_ref_index(eb, ref),
837 name, namelen)) { 843 name, namelen)) {
838 goto out; 844 goto out;
@@ -884,7 +890,7 @@ again:
884 if (!backref_in_log(log, key, victim_name, 890 if (!backref_in_log(log, key, victim_name,
885 victim_name_len)) { 891 victim_name_len)) {
886 btrfs_inc_nlink(inode); 892 btrfs_inc_nlink(inode);
887 btrfs_release_path(root, path); 893 btrfs_release_path(path);
888 894
889 ret = btrfs_unlink_inode(trans, root, dir, 895 ret = btrfs_unlink_inode(trans, root, dir,
890 inode, victim_name, 896 inode, victim_name,
@@ -901,7 +907,7 @@ again:
901 */ 907 */
902 search_done = 1; 908 search_done = 1;
903 } 909 }
904 btrfs_release_path(root, path); 910 btrfs_release_path(path);
905 911
906insert: 912insert:
907 /* insert our name */ 913 /* insert our name */
@@ -922,7 +928,7 @@ out:
922 BUG_ON(ret); 928 BUG_ON(ret);
923 929
924out_nowrite: 930out_nowrite:
925 btrfs_release_path(root, path); 931 btrfs_release_path(path);
926 iput(dir); 932 iput(dir);
927 iput(inode); 933 iput(inode);
928 return 0; 934 return 0;
@@ -960,8 +966,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
960 unsigned long ptr; 966 unsigned long ptr;
961 unsigned long ptr_end; 967 unsigned long ptr_end;
962 int name_len; 968 int name_len;
969 u64 ino = btrfs_ino(inode);
963 970
964 key.objectid = inode->i_ino; 971 key.objectid = ino;
965 key.type = BTRFS_INODE_REF_KEY; 972 key.type = BTRFS_INODE_REF_KEY;
966 key.offset = (u64)-1; 973 key.offset = (u64)-1;
967 974
@@ -980,7 +987,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
980 } 987 }
981 btrfs_item_key_to_cpu(path->nodes[0], &key, 988 btrfs_item_key_to_cpu(path->nodes[0], &key,
982 path->slots[0]); 989 path->slots[0]);
983 if (key.objectid != inode->i_ino || 990 if (key.objectid != ino ||
984 key.type != BTRFS_INODE_REF_KEY) 991 key.type != BTRFS_INODE_REF_KEY)
985 break; 992 break;
986 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 993 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
@@ -999,9 +1006,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
999 if (key.offset == 0) 1006 if (key.offset == 0)
1000 break; 1007 break;
1001 key.offset--; 1008 key.offset--;
1002 btrfs_release_path(root, path); 1009 btrfs_release_path(path);
1003 } 1010 }
1004 btrfs_release_path(root, path); 1011 btrfs_release_path(path);
1005 if (nlink != inode->i_nlink) { 1012 if (nlink != inode->i_nlink) {
1006 inode->i_nlink = nlink; 1013 inode->i_nlink = nlink;
1007 btrfs_update_inode(trans, root, inode); 1014 btrfs_update_inode(trans, root, inode);
@@ -1011,10 +1018,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1011 if (inode->i_nlink == 0) { 1018 if (inode->i_nlink == 0) {
1012 if (S_ISDIR(inode->i_mode)) { 1019 if (S_ISDIR(inode->i_mode)) {
1013 ret = replay_dir_deletes(trans, root, NULL, path, 1020 ret = replay_dir_deletes(trans, root, NULL, path,
1014 inode->i_ino, 1); 1021 ino, 1);
1015 BUG_ON(ret); 1022 BUG_ON(ret);
1016 } 1023 }
1017 ret = insert_orphan_item(trans, root, inode->i_ino); 1024 ret = insert_orphan_item(trans, root, ino);
1018 BUG_ON(ret); 1025 BUG_ON(ret);
1019 } 1026 }
1020 btrfs_free_path(path); 1027 btrfs_free_path(path);
@@ -1050,11 +1057,13 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
1050 break; 1057 break;
1051 1058
1052 ret = btrfs_del_item(trans, root, path); 1059 ret = btrfs_del_item(trans, root, path);
1053 BUG_ON(ret); 1060 if (ret)
1061 goto out;
1054 1062
1055 btrfs_release_path(root, path); 1063 btrfs_release_path(path);
1056 inode = read_one_inode(root, key.offset); 1064 inode = read_one_inode(root, key.offset);
1057 BUG_ON(!inode); 1065 if (!inode)
1066 return -EIO;
1058 1067
1059 ret = fixup_inode_link_count(trans, root, inode); 1068 ret = fixup_inode_link_count(trans, root, inode);
1060 BUG_ON(ret); 1069 BUG_ON(ret);
@@ -1068,8 +1077,10 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
1068 */ 1077 */
1069 key.offset = (u64)-1; 1078 key.offset = (u64)-1;
1070 } 1079 }
1071 btrfs_release_path(root, path); 1080 ret = 0;
1072 return 0; 1081out:
1082 btrfs_release_path(path);
1083 return ret;
1073} 1084}
1074 1085
1075 1086
@@ -1088,7 +1099,8 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
1088 struct inode *inode; 1099 struct inode *inode;
1089 1100
1090 inode = read_one_inode(root, objectid); 1101 inode = read_one_inode(root, objectid);
1091 BUG_ON(!inode); 1102 if (!inode)
1103 return -EIO;
1092 1104
1093 key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; 1105 key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
1094 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); 1106 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
@@ -1096,7 +1108,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
1096 1108
1097 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1109 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1098 1110
1099 btrfs_release_path(root, path); 1111 btrfs_release_path(path);
1100 if (ret == 0) { 1112 if (ret == 0) {
1101 btrfs_inc_nlink(inode); 1113 btrfs_inc_nlink(inode);
1102 btrfs_update_inode(trans, root, inode); 1114 btrfs_update_inode(trans, root, inode);
@@ -1175,7 +1187,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1175 int ret; 1187 int ret;
1176 1188
1177 dir = read_one_inode(root, key->objectid); 1189 dir = read_one_inode(root, key->objectid);
1178 BUG_ON(!dir); 1190 if (!dir)
1191 return -EIO;
1179 1192
1180 name_len = btrfs_dir_name_len(eb, di); 1193 name_len = btrfs_dir_name_len(eb, di);
1181 name = kmalloc(name_len, GFP_NOFS); 1194 name = kmalloc(name_len, GFP_NOFS);
@@ -1192,7 +1205,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1192 exists = 1; 1205 exists = 1;
1193 else 1206 else
1194 exists = 0; 1207 exists = 0;
1195 btrfs_release_path(root, path); 1208 btrfs_release_path(path);
1196 1209
1197 if (key->type == BTRFS_DIR_ITEM_KEY) { 1210 if (key->type == BTRFS_DIR_ITEM_KEY) {
1198 dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, 1211 dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
@@ -1205,7 +1218,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1205 } else { 1218 } else {
1206 BUG(); 1219 BUG();
1207 } 1220 }
1208 if (!dst_di || IS_ERR(dst_di)) { 1221 if (IS_ERR_OR_NULL(dst_di)) {
1209 /* we need a sequence number to insert, so we only 1222 /* we need a sequence number to insert, so we only
1210 * do inserts for the BTRFS_DIR_INDEX_KEY types 1223 * do inserts for the BTRFS_DIR_INDEX_KEY types
1211 */ 1224 */
@@ -1236,13 +1249,13 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1236 if (key->type == BTRFS_DIR_INDEX_KEY) 1249 if (key->type == BTRFS_DIR_INDEX_KEY)
1237 goto insert; 1250 goto insert;
1238out: 1251out:
1239 btrfs_release_path(root, path); 1252 btrfs_release_path(path);
1240 kfree(name); 1253 kfree(name);
1241 iput(dir); 1254 iput(dir);
1242 return 0; 1255 return 0;
1243 1256
1244insert: 1257insert:
1245 btrfs_release_path(root, path); 1258 btrfs_release_path(path);
1246 ret = insert_one_name(trans, root, path, key->objectid, key->offset, 1259 ret = insert_one_name(trans, root, path, key->objectid, key->offset,
1247 name, name_len, log_type, &log_key); 1260 name, name_len, log_type, &log_key);
1248 1261
@@ -1363,7 +1376,7 @@ next:
1363 *end_ret = found_end; 1376 *end_ret = found_end;
1364 ret = 0; 1377 ret = 0;
1365out: 1378out:
1366 btrfs_release_path(root, path); 1379 btrfs_release_path(path);
1367 return ret; 1380 return ret;
1368} 1381}
1369 1382
@@ -1426,12 +1439,15 @@ again:
1426 dir_key->offset, 1439 dir_key->offset,
1427 name, name_len, 0); 1440 name, name_len, 0);
1428 } 1441 }
1429 if (!log_di || IS_ERR(log_di)) { 1442 if (IS_ERR_OR_NULL(log_di)) {
1430 btrfs_dir_item_key_to_cpu(eb, di, &location); 1443 btrfs_dir_item_key_to_cpu(eb, di, &location);
1431 btrfs_release_path(root, path); 1444 btrfs_release_path(path);
1432 btrfs_release_path(log, log_path); 1445 btrfs_release_path(log_path);
1433 inode = read_one_inode(root, location.objectid); 1446 inode = read_one_inode(root, location.objectid);
1434 BUG_ON(!inode); 1447 if (!inode) {
1448 kfree(name);
1449 return -EIO;
1450 }
1435 1451
1436 ret = link_to_fixup_dir(trans, root, 1452 ret = link_to_fixup_dir(trans, root,
1437 path, location.objectid); 1453 path, location.objectid);
@@ -1453,7 +1469,7 @@ again:
1453 ret = 0; 1469 ret = 0;
1454 goto out; 1470 goto out;
1455 } 1471 }
1456 btrfs_release_path(log, log_path); 1472 btrfs_release_path(log_path);
1457 kfree(name); 1473 kfree(name);
1458 1474
1459 ptr = (unsigned long)(di + 1); 1475 ptr = (unsigned long)(di + 1);
@@ -1461,8 +1477,8 @@ again:
1461 } 1477 }
1462 ret = 0; 1478 ret = 0;
1463out: 1479out:
1464 btrfs_release_path(root, path); 1480 btrfs_release_path(path);
1465 btrfs_release_path(log, log_path); 1481 btrfs_release_path(log_path);
1466 return ret; 1482 return ret;
1467} 1483}
1468 1484
@@ -1550,7 +1566,7 @@ again:
1550 break; 1566 break;
1551 dir_key.offset = found_key.offset + 1; 1567 dir_key.offset = found_key.offset + 1;
1552 } 1568 }
1553 btrfs_release_path(root, path); 1569 btrfs_release_path(path);
1554 if (range_end == (u64)-1) 1570 if (range_end == (u64)-1)
1555 break; 1571 break;
1556 range_start = range_end + 1; 1572 range_start = range_end + 1;
@@ -1561,11 +1577,11 @@ next_type:
1561 if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { 1577 if (key_type == BTRFS_DIR_LOG_ITEM_KEY) {
1562 key_type = BTRFS_DIR_LOG_INDEX_KEY; 1578 key_type = BTRFS_DIR_LOG_INDEX_KEY;
1563 dir_key.type = BTRFS_DIR_INDEX_KEY; 1579 dir_key.type = BTRFS_DIR_INDEX_KEY;
1564 btrfs_release_path(root, path); 1580 btrfs_release_path(path);
1565 goto again; 1581 goto again;
1566 } 1582 }
1567out: 1583out:
1568 btrfs_release_path(root, path); 1584 btrfs_release_path(path);
1569 btrfs_free_path(log_path); 1585 btrfs_free_path(log_path);
1570 iput(dir); 1586 iput(dir);
1571 return ret; 1587 return ret;
@@ -2093,7 +2109,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2093 * the running transaction open, so a full commit can't hop 2109 * the running transaction open, so a full commit can't hop
2094 * in and cause problems either. 2110 * in and cause problems either.
2095 */ 2111 */
2112 btrfs_scrub_pause_super(root);
2096 write_ctree_super(trans, root->fs_info->tree_root, 1); 2113 write_ctree_super(trans, root->fs_info->tree_root, 1);
2114 btrfs_scrub_continue_super(root);
2097 ret = 0; 2115 ret = 0;
2098 2116
2099 mutex_lock(&root->log_mutex); 2117 mutex_lock(&root->log_mutex);
@@ -2197,6 +2215,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2197 int ret; 2215 int ret;
2198 int err = 0; 2216 int err = 0;
2199 int bytes_del = 0; 2217 int bytes_del = 0;
2218 u64 dir_ino = btrfs_ino(dir);
2200 2219
2201 if (BTRFS_I(dir)->logged_trans < trans->transid) 2220 if (BTRFS_I(dir)->logged_trans < trans->transid)
2202 return 0; 2221 return 0;
@@ -2214,7 +2233,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2214 goto out_unlock; 2233 goto out_unlock;
2215 } 2234 }
2216 2235
2217 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, 2236 di = btrfs_lookup_dir_item(trans, log, path, dir_ino,
2218 name, name_len, -1); 2237 name, name_len, -1);
2219 if (IS_ERR(di)) { 2238 if (IS_ERR(di)) {
2220 err = PTR_ERR(di); 2239 err = PTR_ERR(di);
@@ -2225,8 +2244,8 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2225 bytes_del += name_len; 2244 bytes_del += name_len;
2226 BUG_ON(ret); 2245 BUG_ON(ret);
2227 } 2246 }
2228 btrfs_release_path(log, path); 2247 btrfs_release_path(path);
2229 di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, 2248 di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
2230 index, name, name_len, -1); 2249 index, name, name_len, -1);
2231 if (IS_ERR(di)) { 2250 if (IS_ERR(di)) {
2232 err = PTR_ERR(di); 2251 err = PTR_ERR(di);
@@ -2244,10 +2263,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2244 if (bytes_del) { 2263 if (bytes_del) {
2245 struct btrfs_key key; 2264 struct btrfs_key key;
2246 2265
2247 key.objectid = dir->i_ino; 2266 key.objectid = dir_ino;
2248 key.offset = 0; 2267 key.offset = 0;
2249 key.type = BTRFS_INODE_ITEM_KEY; 2268 key.type = BTRFS_INODE_ITEM_KEY;
2250 btrfs_release_path(log, path); 2269 btrfs_release_path(path);
2251 2270
2252 ret = btrfs_search_slot(trans, log, &key, path, 0, 1); 2271 ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
2253 if (ret < 0) { 2272 if (ret < 0) {
@@ -2269,7 +2288,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2269 btrfs_mark_buffer_dirty(path->nodes[0]); 2288 btrfs_mark_buffer_dirty(path->nodes[0]);
2270 } else 2289 } else
2271 ret = 0; 2290 ret = 0;
2272 btrfs_release_path(log, path); 2291 btrfs_release_path(path);
2273 } 2292 }
2274fail: 2293fail:
2275 btrfs_free_path(path); 2294 btrfs_free_path(path);
@@ -2303,7 +2322,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
2303 log = root->log_root; 2322 log = root->log_root;
2304 mutex_lock(&BTRFS_I(inode)->log_mutex); 2323 mutex_lock(&BTRFS_I(inode)->log_mutex);
2305 2324
2306 ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, 2325 ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
2307 dirid, &index); 2326 dirid, &index);
2308 mutex_unlock(&BTRFS_I(inode)->log_mutex); 2327 mutex_unlock(&BTRFS_I(inode)->log_mutex);
2309 if (ret == -ENOSPC) { 2328 if (ret == -ENOSPC) {
@@ -2344,7 +2363,7 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
2344 struct btrfs_dir_log_item); 2363 struct btrfs_dir_log_item);
2345 btrfs_set_dir_log_end(path->nodes[0], item, last_offset); 2364 btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
2346 btrfs_mark_buffer_dirty(path->nodes[0]); 2365 btrfs_mark_buffer_dirty(path->nodes[0]);
2347 btrfs_release_path(log, path); 2366 btrfs_release_path(path);
2348 return 0; 2367 return 0;
2349} 2368}
2350 2369
@@ -2369,13 +2388,14 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2369 int nritems; 2388 int nritems;
2370 u64 first_offset = min_offset; 2389 u64 first_offset = min_offset;
2371 u64 last_offset = (u64)-1; 2390 u64 last_offset = (u64)-1;
2391 u64 ino = btrfs_ino(inode);
2372 2392
2373 log = root->log_root; 2393 log = root->log_root;
2374 max_key.objectid = inode->i_ino; 2394 max_key.objectid = ino;
2375 max_key.offset = (u64)-1; 2395 max_key.offset = (u64)-1;
2376 max_key.type = key_type; 2396 max_key.type = key_type;
2377 2397
2378 min_key.objectid = inode->i_ino; 2398 min_key.objectid = ino;
2379 min_key.type = key_type; 2399 min_key.type = key_type;
2380 min_key.offset = min_offset; 2400 min_key.offset = min_offset;
2381 2401
@@ -2388,18 +2408,17 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2388 * we didn't find anything from this transaction, see if there 2408 * we didn't find anything from this transaction, see if there
2389 * is anything at all 2409 * is anything at all
2390 */ 2410 */
2391 if (ret != 0 || min_key.objectid != inode->i_ino || 2411 if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) {
2392 min_key.type != key_type) { 2412 min_key.objectid = ino;
2393 min_key.objectid = inode->i_ino;
2394 min_key.type = key_type; 2413 min_key.type = key_type;
2395 min_key.offset = (u64)-1; 2414 min_key.offset = (u64)-1;
2396 btrfs_release_path(root, path); 2415 btrfs_release_path(path);
2397 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 2416 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
2398 if (ret < 0) { 2417 if (ret < 0) {
2399 btrfs_release_path(root, path); 2418 btrfs_release_path(path);
2400 return ret; 2419 return ret;
2401 } 2420 }
2402 ret = btrfs_previous_item(root, path, inode->i_ino, key_type); 2421 ret = btrfs_previous_item(root, path, ino, key_type);
2403 2422
2404 /* if ret == 0 there are items for this type, 2423 /* if ret == 0 there are items for this type,
2405 * create a range to tell us the last key of this type. 2424 * create a range to tell us the last key of this type.
@@ -2417,7 +2436,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2417 } 2436 }
2418 2437
2419 /* go backward to find any previous key */ 2438 /* go backward to find any previous key */
2420 ret = btrfs_previous_item(root, path, inode->i_ino, key_type); 2439 ret = btrfs_previous_item(root, path, ino, key_type);
2421 if (ret == 0) { 2440 if (ret == 0) {
2422 struct btrfs_key tmp; 2441 struct btrfs_key tmp;
2423 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 2442 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
@@ -2432,7 +2451,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2432 } 2451 }
2433 } 2452 }
2434 } 2453 }
2435 btrfs_release_path(root, path); 2454 btrfs_release_path(path);
2436 2455
2437 /* find the first key from this transaction again */ 2456 /* find the first key from this transaction again */
2438 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 2457 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
@@ -2452,8 +2471,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2452 for (i = path->slots[0]; i < nritems; i++) { 2471 for (i = path->slots[0]; i < nritems; i++) {
2453 btrfs_item_key_to_cpu(src, &min_key, i); 2472 btrfs_item_key_to_cpu(src, &min_key, i);
2454 2473
2455 if (min_key.objectid != inode->i_ino || 2474 if (min_key.objectid != ino || min_key.type != key_type)
2456 min_key.type != key_type)
2457 goto done; 2475 goto done;
2458 ret = overwrite_item(trans, log, dst_path, src, i, 2476 ret = overwrite_item(trans, log, dst_path, src, i,
2459 &min_key); 2477 &min_key);
@@ -2474,7 +2492,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2474 goto done; 2492 goto done;
2475 } 2493 }
2476 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 2494 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
2477 if (tmp.objectid != inode->i_ino || tmp.type != key_type) { 2495 if (tmp.objectid != ino || tmp.type != key_type) {
2478 last_offset = (u64)-1; 2496 last_offset = (u64)-1;
2479 goto done; 2497 goto done;
2480 } 2498 }
@@ -2490,8 +2508,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2490 } 2508 }
2491 } 2509 }
2492done: 2510done:
2493 btrfs_release_path(root, path); 2511 btrfs_release_path(path);
2494 btrfs_release_path(log, dst_path); 2512 btrfs_release_path(dst_path);
2495 2513
2496 if (err == 0) { 2514 if (err == 0) {
2497 *last_offset_ret = last_offset; 2515 *last_offset_ret = last_offset;
@@ -2500,8 +2518,7 @@ done:
2500 * is valid 2518 * is valid
2501 */ 2519 */
2502 ret = insert_dir_log_key(trans, log, path, key_type, 2520 ret = insert_dir_log_key(trans, log, path, key_type,
2503 inode->i_ino, first_offset, 2521 ino, first_offset, last_offset);
2504 last_offset);
2505 if (ret) 2522 if (ret)
2506 err = ret; 2523 err = ret;
2507 } 2524 }
@@ -2587,10 +2604,11 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
2587 break; 2604 break;
2588 2605
2589 ret = btrfs_del_item(trans, log, path); 2606 ret = btrfs_del_item(trans, log, path);
2590 BUG_ON(ret); 2607 if (ret)
2591 btrfs_release_path(log, path); 2608 break;
2609 btrfs_release_path(path);
2592 } 2610 }
2593 btrfs_release_path(log, path); 2611 btrfs_release_path(path);
2594 return ret; 2612 return ret;
2595} 2613}
2596 2614
@@ -2665,6 +2683,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2665 extent = btrfs_item_ptr(src, start_slot + i, 2683 extent = btrfs_item_ptr(src, start_slot + i,
2666 struct btrfs_file_extent_item); 2684 struct btrfs_file_extent_item);
2667 2685
2686 if (btrfs_file_extent_generation(src, extent) < trans->transid)
2687 continue;
2688
2668 found_type = btrfs_file_extent_type(src, extent); 2689 found_type = btrfs_file_extent_type(src, extent);
2669 if (found_type == BTRFS_FILE_EXTENT_REG || 2690 if (found_type == BTRFS_FILE_EXTENT_REG ||
2670 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 2691 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
@@ -2689,14 +2710,14 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2689 ret = btrfs_lookup_csums_range( 2710 ret = btrfs_lookup_csums_range(
2690 log->fs_info->csum_root, 2711 log->fs_info->csum_root,
2691 ds + cs, ds + cs + cl - 1, 2712 ds + cs, ds + cs + cl - 1,
2692 &ordered_sums); 2713 &ordered_sums, 0);
2693 BUG_ON(ret); 2714 BUG_ON(ret);
2694 } 2715 }
2695 } 2716 }
2696 } 2717 }
2697 2718
2698 btrfs_mark_buffer_dirty(dst_path->nodes[0]); 2719 btrfs_mark_buffer_dirty(dst_path->nodes[0]);
2699 btrfs_release_path(log, dst_path); 2720 btrfs_release_path(dst_path);
2700 kfree(ins_data); 2721 kfree(ins_data);
2701 2722
2702 /* 2723 /*
@@ -2745,6 +2766,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2745 int nritems; 2766 int nritems;
2746 int ins_start_slot = 0; 2767 int ins_start_slot = 0;
2747 int ins_nr; 2768 int ins_nr;
2769 u64 ino = btrfs_ino(inode);
2748 2770
2749 log = root->log_root; 2771 log = root->log_root;
2750 2772
@@ -2757,11 +2779,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2757 return -ENOMEM; 2779 return -ENOMEM;
2758 } 2780 }
2759 2781
2760 min_key.objectid = inode->i_ino; 2782 min_key.objectid = ino;
2761 min_key.type = BTRFS_INODE_ITEM_KEY; 2783 min_key.type = BTRFS_INODE_ITEM_KEY;
2762 min_key.offset = 0; 2784 min_key.offset = 0;
2763 2785
2764 max_key.objectid = inode->i_ino; 2786 max_key.objectid = ino;
2765 2787
2766 /* today the code can only do partial logging of directories */ 2788 /* today the code can only do partial logging of directories */
2767 if (!S_ISDIR(inode->i_mode)) 2789 if (!S_ISDIR(inode->i_mode))
@@ -2773,6 +2795,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2773 max_key.type = (u8)-1; 2795 max_key.type = (u8)-1;
2774 max_key.offset = (u64)-1; 2796 max_key.offset = (u64)-1;
2775 2797
2798 ret = btrfs_commit_inode_delayed_items(trans, inode);
2799 if (ret) {
2800 btrfs_free_path(path);
2801 btrfs_free_path(dst_path);
2802 return ret;
2803 }
2804
2776 mutex_lock(&BTRFS_I(inode)->log_mutex); 2805 mutex_lock(&BTRFS_I(inode)->log_mutex);
2777 2806
2778 /* 2807 /*
@@ -2784,8 +2813,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2784 2813
2785 if (inode_only == LOG_INODE_EXISTS) 2814 if (inode_only == LOG_INODE_EXISTS)
2786 max_key_type = BTRFS_XATTR_ITEM_KEY; 2815 max_key_type = BTRFS_XATTR_ITEM_KEY;
2787 ret = drop_objectid_items(trans, log, path, 2816 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
2788 inode->i_ino, max_key_type);
2789 } else { 2817 } else {
2790 ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); 2818 ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0);
2791 } 2819 }
@@ -2803,7 +2831,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2803 break; 2831 break;
2804again: 2832again:
2805 /* note, ins_nr might be > 0 here, cleanup outside the loop */ 2833 /* note, ins_nr might be > 0 here, cleanup outside the loop */
2806 if (min_key.objectid != inode->i_ino) 2834 if (min_key.objectid != ino)
2807 break; 2835 break;
2808 if (min_key.type > max_key.type) 2836 if (min_key.type > max_key.type)
2809 break; 2837 break;
@@ -2845,7 +2873,7 @@ next_slot:
2845 } 2873 }
2846 ins_nr = 0; 2874 ins_nr = 0;
2847 } 2875 }
2848 btrfs_release_path(root, path); 2876 btrfs_release_path(path);
2849 2877
2850 if (min_key.offset < (u64)-1) 2878 if (min_key.offset < (u64)-1)
2851 min_key.offset++; 2879 min_key.offset++;
@@ -2868,8 +2896,8 @@ next_slot:
2868 } 2896 }
2869 WARN_ON(ins_nr); 2897 WARN_ON(ins_nr);
2870 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 2898 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
2871 btrfs_release_path(root, path); 2899 btrfs_release_path(path);
2872 btrfs_release_path(log, dst_path); 2900 btrfs_release_path(dst_path);
2873 ret = log_directory_changes(trans, root, inode, path, dst_path); 2901 ret = log_directory_changes(trans, root, inode, path, dst_path);
2874 if (ret) { 2902 if (ret) {
2875 err = ret; 2903 err = ret;
@@ -3136,7 +3164,7 @@ again:
3136 } 3164 }
3137 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 3165 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3138 path->slots[0]); 3166 path->slots[0]);
3139 btrfs_release_path(log_root_tree, path); 3167 btrfs_release_path(path);
3140 if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) 3168 if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
3141 break; 3169 break;
3142 3170
@@ -3171,7 +3199,7 @@ again:
3171 if (found_key.offset == 0) 3199 if (found_key.offset == 0)
3172 break; 3200 break;
3173 } 3201 }
3174 btrfs_release_path(log_root_tree, path); 3202 btrfs_release_path(path);
3175 3203
3176 /* step one is to pin it all, step two is to replay just inodes */ 3204 /* step one is to pin it all, step two is to replay just inodes */
3177 if (wc.pin) { 3205 if (wc.pin) {
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 3dfae84c8cc8..2270ac58d746 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -38,7 +38,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
38 struct btrfs_root *root, 38 struct btrfs_root *root,
39 const char *name, int name_len, 39 const char *name, int name_len,
40 struct inode *inode, u64 dirid); 40 struct inode *inode, u64 dirid);
41int btrfs_join_running_log_trans(struct btrfs_root *root);
42int btrfs_end_log_trans(struct btrfs_root *root); 41int btrfs_end_log_trans(struct btrfs_root *root);
43int btrfs_pin_log_trans(struct btrfs_root *root); 42int btrfs_pin_log_trans(struct btrfs_root *root);
44int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, 43int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/version.sh b/fs/btrfs/version.sh
deleted file mode 100644
index 1ca1952fd917..000000000000
--- a/fs/btrfs/version.sh
+++ /dev/null
@@ -1,43 +0,0 @@
1#!/bin/bash
2#
3# determine-version -- report a useful version for releases
4#
5# Copyright 2008, Aron Griffis <agriffis@n01se.net>
6# Copyright 2008, Oracle
7# Released under the GNU GPLv2
8
9v="v0.16"
10
11which git &> /dev/null
12if [ $? == 0 ]; then
13 git branch >& /dev/null
14 if [ $? == 0 ]; then
15 if head=`git rev-parse --verify HEAD 2>/dev/null`; then
16 if tag=`git describe --tags 2>/dev/null`; then
17 v="$tag"
18 fi
19
20 # Are there uncommitted changes?
21 git update-index --refresh --unmerged > /dev/null
22 if git diff-index --name-only HEAD | \
23 grep -v "^scripts/package" \
24 | read dummy; then
25 v="$v"-dirty
26 fi
27 fi
28 fi
29fi
30
31echo "#ifndef __BUILD_VERSION" > .build-version.h
32echo "#define __BUILD_VERSION" >> .build-version.h
33echo "#define BTRFS_BUILD_VERSION \"Btrfs $v\"" >> .build-version.h
34echo "#endif" >> .build-version.h
35
36diff -q version.h .build-version.h >& /dev/null
37
38if [ $? == 0 ]; then
39 rm .build-version.h
40 exit 0
41fi
42
43mv .build-version.h version.h
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c7367ae5a3e6..c48214ef5c09 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -38,22 +38,9 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans,
38 struct btrfs_device *device); 38 struct btrfs_device *device);
39static int btrfs_relocate_sys_chunks(struct btrfs_root *root); 39static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
40 40
41#define map_lookup_size(n) (sizeof(struct map_lookup) + \
42 (sizeof(struct btrfs_bio_stripe) * (n)))
43
44static DEFINE_MUTEX(uuid_mutex); 41static DEFINE_MUTEX(uuid_mutex);
45static LIST_HEAD(fs_uuids); 42static LIST_HEAD(fs_uuids);
46 43
47void btrfs_lock_volumes(void)
48{
49 mutex_lock(&uuid_mutex);
50}
51
52void btrfs_unlock_volumes(void)
53{
54 mutex_unlock(&uuid_mutex);
55}
56
57static void lock_chunks(struct btrfs_root *root) 44static void lock_chunks(struct btrfs_root *root)
58{ 45{
59 mutex_lock(&root->fs_info->chunk_mutex); 46 mutex_lock(&root->fs_info->chunk_mutex);
@@ -363,7 +350,7 @@ static noinline int device_list_add(const char *path,
363 INIT_LIST_HEAD(&device->dev_alloc_list); 350 INIT_LIST_HEAD(&device->dev_alloc_list);
364 351
365 mutex_lock(&fs_devices->device_list_mutex); 352 mutex_lock(&fs_devices->device_list_mutex);
366 list_add(&device->dev_list, &fs_devices->devices); 353 list_add_rcu(&device->dev_list, &fs_devices->devices);
367 mutex_unlock(&fs_devices->device_list_mutex); 354 mutex_unlock(&fs_devices->device_list_mutex);
368 355
369 device->fs_devices = fs_devices; 356 device->fs_devices = fs_devices;
@@ -406,7 +393,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
406 fs_devices->latest_trans = orig->latest_trans; 393 fs_devices->latest_trans = orig->latest_trans;
407 memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); 394 memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
408 395
409 mutex_lock(&orig->device_list_mutex); 396 /* We have held the volume lock, it is safe to get the devices. */
410 list_for_each_entry(orig_dev, &orig->devices, dev_list) { 397 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
411 device = kzalloc(sizeof(*device), GFP_NOFS); 398 device = kzalloc(sizeof(*device), GFP_NOFS);
412 if (!device) 399 if (!device)
@@ -429,10 +416,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
429 device->fs_devices = fs_devices; 416 device->fs_devices = fs_devices;
430 fs_devices->num_devices++; 417 fs_devices->num_devices++;
431 } 418 }
432 mutex_unlock(&orig->device_list_mutex);
433 return fs_devices; 419 return fs_devices;
434error: 420error:
435 mutex_unlock(&orig->device_list_mutex);
436 free_fs_devices(fs_devices); 421 free_fs_devices(fs_devices);
437 return ERR_PTR(-ENOMEM); 422 return ERR_PTR(-ENOMEM);
438} 423}
@@ -443,7 +428,7 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
443 428
444 mutex_lock(&uuid_mutex); 429 mutex_lock(&uuid_mutex);
445again: 430again:
446 mutex_lock(&fs_devices->device_list_mutex); 431 /* This is the initialized path, it is safe to release the devices. */
447 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 432 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
448 if (device->in_fs_metadata) 433 if (device->in_fs_metadata)
449 continue; 434 continue;
@@ -463,7 +448,6 @@ again:
463 kfree(device->name); 448 kfree(device->name);
464 kfree(device); 449 kfree(device);
465 } 450 }
466 mutex_unlock(&fs_devices->device_list_mutex);
467 451
468 if (fs_devices->seed) { 452 if (fs_devices->seed) {
469 fs_devices = fs_devices->seed; 453 fs_devices = fs_devices->seed;
@@ -474,6 +458,29 @@ again:
474 return 0; 458 return 0;
475} 459}
476 460
461static void __free_device(struct work_struct *work)
462{
463 struct btrfs_device *device;
464
465 device = container_of(work, struct btrfs_device, rcu_work);
466
467 if (device->bdev)
468 blkdev_put(device->bdev, device->mode);
469
470 kfree(device->name);
471 kfree(device);
472}
473
474static void free_device(struct rcu_head *head)
475{
476 struct btrfs_device *device;
477
478 device = container_of(head, struct btrfs_device, rcu);
479
480 INIT_WORK(&device->rcu_work, __free_device);
481 schedule_work(&device->rcu_work);
482}
483
477static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) 484static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
478{ 485{
479 struct btrfs_device *device; 486 struct btrfs_device *device;
@@ -481,20 +488,32 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
481 if (--fs_devices->opened > 0) 488 if (--fs_devices->opened > 0)
482 return 0; 489 return 0;
483 490
491 mutex_lock(&fs_devices->device_list_mutex);
484 list_for_each_entry(device, &fs_devices->devices, dev_list) { 492 list_for_each_entry(device, &fs_devices->devices, dev_list) {
485 if (device->bdev) { 493 struct btrfs_device *new_device;
486 blkdev_put(device->bdev, device->mode); 494
495 if (device->bdev)
487 fs_devices->open_devices--; 496 fs_devices->open_devices--;
488 } 497
489 if (device->writeable) { 498 if (device->writeable) {
490 list_del_init(&device->dev_alloc_list); 499 list_del_init(&device->dev_alloc_list);
491 fs_devices->rw_devices--; 500 fs_devices->rw_devices--;
492 } 501 }
493 502
494 device->bdev = NULL; 503 new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
495 device->writeable = 0; 504 BUG_ON(!new_device);
496 device->in_fs_metadata = 0; 505 memcpy(new_device, device, sizeof(*new_device));
506 new_device->name = kstrdup(device->name, GFP_NOFS);
507 BUG_ON(!new_device->name);
508 new_device->bdev = NULL;
509 new_device->writeable = 0;
510 new_device->in_fs_metadata = 0;
511 list_replace_rcu(&device->dev_list, &new_device->dev_list);
512
513 call_rcu(&device->rcu, free_device);
497 } 514 }
515 mutex_unlock(&fs_devices->device_list_mutex);
516
498 WARN_ON(fs_devices->open_devices); 517 WARN_ON(fs_devices->open_devices);
499 WARN_ON(fs_devices->rw_devices); 518 WARN_ON(fs_devices->rw_devices);
500 fs_devices->opened = 0; 519 fs_devices->opened = 0;
@@ -597,6 +616,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
597 list_add(&device->dev_alloc_list, 616 list_add(&device->dev_alloc_list,
598 &fs_devices->alloc_list); 617 &fs_devices->alloc_list);
599 } 618 }
619 brelse(bh);
600 continue; 620 continue;
601 621
602error_brelse: 622error_brelse:
@@ -815,10 +835,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
815 /* we don't want to overwrite the superblock on the drive, 835 /* we don't want to overwrite the superblock on the drive,
816 * so we make sure to start at an offset of at least 1MB 836 * so we make sure to start at an offset of at least 1MB
817 */ 837 */
818 search_start = 1024 * 1024; 838 search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
819
820 if (root->fs_info->alloc_start + num_bytes <= search_end)
821 search_start = max(root->fs_info->alloc_start, search_start);
822 839
823 max_hole_start = search_start; 840 max_hole_start = search_start;
824 max_hole_size = 0; 841 max_hole_size = 0;
@@ -949,14 +966,14 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
949 if (ret > 0) { 966 if (ret > 0) {
950 ret = btrfs_previous_item(root, path, key.objectid, 967 ret = btrfs_previous_item(root, path, key.objectid,
951 BTRFS_DEV_EXTENT_KEY); 968 BTRFS_DEV_EXTENT_KEY);
952 BUG_ON(ret); 969 if (ret)
970 goto out;
953 leaf = path->nodes[0]; 971 leaf = path->nodes[0];
954 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 972 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
955 extent = btrfs_item_ptr(leaf, path->slots[0], 973 extent = btrfs_item_ptr(leaf, path->slots[0],
956 struct btrfs_dev_extent); 974 struct btrfs_dev_extent);
957 BUG_ON(found_key.offset > start || found_key.offset + 975 BUG_ON(found_key.offset > start || found_key.offset +
958 btrfs_dev_extent_length(leaf, extent) < start); 976 btrfs_dev_extent_length(leaf, extent) < start);
959 ret = 0;
960 } else if (ret == 0) { 977 } else if (ret == 0) {
961 leaf = path->nodes[0]; 978 leaf = path->nodes[0];
962 extent = btrfs_item_ptr(leaf, path->slots[0], 979 extent = btrfs_item_ptr(leaf, path->slots[0],
@@ -967,8 +984,8 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
967 if (device->bytes_used > 0) 984 if (device->bytes_used > 0)
968 device->bytes_used -= btrfs_dev_extent_length(leaf, extent); 985 device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
969 ret = btrfs_del_item(trans, root, path); 986 ret = btrfs_del_item(trans, root, path);
970 BUG_ON(ret);
971 987
988out:
972 btrfs_free_path(path); 989 btrfs_free_path(path);
973 return ret; 990 return ret;
974} 991}
@@ -1203,11 +1220,13 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1203 struct block_device *bdev; 1220 struct block_device *bdev;
1204 struct buffer_head *bh = NULL; 1221 struct buffer_head *bh = NULL;
1205 struct btrfs_super_block *disk_super; 1222 struct btrfs_super_block *disk_super;
1223 struct btrfs_fs_devices *cur_devices;
1206 u64 all_avail; 1224 u64 all_avail;
1207 u64 devid; 1225 u64 devid;
1208 u64 num_devices; 1226 u64 num_devices;
1209 u8 *dev_uuid; 1227 u8 *dev_uuid;
1210 int ret = 0; 1228 int ret = 0;
1229 bool clear_super = false;
1211 1230
1212 mutex_lock(&uuid_mutex); 1231 mutex_lock(&uuid_mutex);
1213 mutex_lock(&root->fs_info->volume_mutex); 1232 mutex_lock(&root->fs_info->volume_mutex);
@@ -1238,14 +1257,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1238 1257
1239 device = NULL; 1258 device = NULL;
1240 devices = &root->fs_info->fs_devices->devices; 1259 devices = &root->fs_info->fs_devices->devices;
1241 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 1260 /*
1261 * It is safe to read the devices since the volume_mutex
1262 * is held.
1263 */
1242 list_for_each_entry(tmp, devices, dev_list) { 1264 list_for_each_entry(tmp, devices, dev_list) {
1243 if (tmp->in_fs_metadata && !tmp->bdev) { 1265 if (tmp->in_fs_metadata && !tmp->bdev) {
1244 device = tmp; 1266 device = tmp;
1245 break; 1267 break;
1246 } 1268 }
1247 } 1269 }
1248 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1249 bdev = NULL; 1270 bdev = NULL;
1250 bh = NULL; 1271 bh = NULL;
1251 disk_super = NULL; 1272 disk_super = NULL;
@@ -1287,8 +1308,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1287 } 1308 }
1288 1309
1289 if (device->writeable) { 1310 if (device->writeable) {
1311 lock_chunks(root);
1290 list_del_init(&device->dev_alloc_list); 1312 list_del_init(&device->dev_alloc_list);
1313 unlock_chunks(root);
1291 root->fs_info->fs_devices->rw_devices--; 1314 root->fs_info->fs_devices->rw_devices--;
1315 clear_super = true;
1292 } 1316 }
1293 1317
1294 ret = btrfs_shrink_device(device, 0); 1318 ret = btrfs_shrink_device(device, 0);
@@ -1300,15 +1324,17 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1300 goto error_undo; 1324 goto error_undo;
1301 1325
1302 device->in_fs_metadata = 0; 1326 device->in_fs_metadata = 0;
1327 btrfs_scrub_cancel_dev(root, device);
1303 1328
1304 /* 1329 /*
1305 * the device list mutex makes sure that we don't change 1330 * the device list mutex makes sure that we don't change
1306 * the device list while someone else is writing out all 1331 * the device list while someone else is writing out all
1307 * the device supers. 1332 * the device supers.
1308 */ 1333 */
1334
1335 cur_devices = device->fs_devices;
1309 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 1336 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1310 list_del_init(&device->dev_list); 1337 list_del_rcu(&device->dev_list);
1311 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1312 1338
1313 device->fs_devices->num_devices--; 1339 device->fs_devices->num_devices--;
1314 1340
@@ -1322,34 +1348,36 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1322 if (device->bdev == root->fs_info->fs_devices->latest_bdev) 1348 if (device->bdev == root->fs_info->fs_devices->latest_bdev)
1323 root->fs_info->fs_devices->latest_bdev = next_device->bdev; 1349 root->fs_info->fs_devices->latest_bdev = next_device->bdev;
1324 1350
1325 if (device->bdev) { 1351 if (device->bdev)
1326 blkdev_put(device->bdev, device->mode);
1327 device->bdev = NULL;
1328 device->fs_devices->open_devices--; 1352 device->fs_devices->open_devices--;
1329 } 1353
1354 call_rcu(&device->rcu, free_device);
1355 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1330 1356
1331 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; 1357 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
1332 btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices); 1358 btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices);
1333 1359
1334 if (device->fs_devices->open_devices == 0) { 1360 if (cur_devices->open_devices == 0) {
1335 struct btrfs_fs_devices *fs_devices; 1361 struct btrfs_fs_devices *fs_devices;
1336 fs_devices = root->fs_info->fs_devices; 1362 fs_devices = root->fs_info->fs_devices;
1337 while (fs_devices) { 1363 while (fs_devices) {
1338 if (fs_devices->seed == device->fs_devices) 1364 if (fs_devices->seed == cur_devices)
1339 break; 1365 break;
1340 fs_devices = fs_devices->seed; 1366 fs_devices = fs_devices->seed;
1341 } 1367 }
1342 fs_devices->seed = device->fs_devices->seed; 1368 fs_devices->seed = cur_devices->seed;
1343 device->fs_devices->seed = NULL; 1369 cur_devices->seed = NULL;
1344 __btrfs_close_devices(device->fs_devices); 1370 lock_chunks(root);
1345 free_fs_devices(device->fs_devices); 1371 __btrfs_close_devices(cur_devices);
1372 unlock_chunks(root);
1373 free_fs_devices(cur_devices);
1346 } 1374 }
1347 1375
1348 /* 1376 /*
1349 * at this point, the device is zero sized. We want to 1377 * at this point, the device is zero sized. We want to
1350 * remove it from the devices list and zero out the old super 1378 * remove it from the devices list and zero out the old super
1351 */ 1379 */
1352 if (device->writeable) { 1380 if (clear_super) {
1353 /* make sure this device isn't detected as part of 1381 /* make sure this device isn't detected as part of
1354 * the FS anymore 1382 * the FS anymore
1355 */ 1383 */
@@ -1358,8 +1386,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1358 sync_dirty_buffer(bh); 1386 sync_dirty_buffer(bh);
1359 } 1387 }
1360 1388
1361 kfree(device->name);
1362 kfree(device);
1363 ret = 0; 1389 ret = 0;
1364 1390
1365error_brelse: 1391error_brelse:
@@ -1373,8 +1399,10 @@ out:
1373 return ret; 1399 return ret;
1374error_undo: 1400error_undo:
1375 if (device->writeable) { 1401 if (device->writeable) {
1402 lock_chunks(root);
1376 list_add(&device->dev_alloc_list, 1403 list_add(&device->dev_alloc_list,
1377 &root->fs_info->fs_devices->alloc_list); 1404 &root->fs_info->fs_devices->alloc_list);
1405 unlock_chunks(root);
1378 root->fs_info->fs_devices->rw_devices++; 1406 root->fs_info->fs_devices->rw_devices++;
1379 } 1407 }
1380 goto error_brelse; 1408 goto error_brelse;
@@ -1414,7 +1442,12 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
1414 INIT_LIST_HEAD(&seed_devices->devices); 1442 INIT_LIST_HEAD(&seed_devices->devices);
1415 INIT_LIST_HEAD(&seed_devices->alloc_list); 1443 INIT_LIST_HEAD(&seed_devices->alloc_list);
1416 mutex_init(&seed_devices->device_list_mutex); 1444 mutex_init(&seed_devices->device_list_mutex);
1417 list_splice_init(&fs_devices->devices, &seed_devices->devices); 1445
1446 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1447 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
1448 synchronize_rcu);
1449 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1450
1418 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); 1451 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
1419 list_for_each_entry(device, &seed_devices->devices, dev_list) { 1452 list_for_each_entry(device, &seed_devices->devices, dev_list) {
1420 device->fs_devices = seed_devices; 1453 device->fs_devices = seed_devices;
@@ -1475,7 +1508,7 @@ next_slot:
1475 goto error; 1508 goto error;
1476 leaf = path->nodes[0]; 1509 leaf = path->nodes[0];
1477 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1510 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1478 btrfs_release_path(root, path); 1511 btrfs_release_path(path);
1479 continue; 1512 continue;
1480 } 1513 }
1481 1514
@@ -1611,7 +1644,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1611 * half setup 1644 * half setup
1612 */ 1645 */
1613 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 1646 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1614 list_add(&device->dev_list, &root->fs_info->fs_devices->devices); 1647 list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices);
1615 list_add(&device->dev_alloc_list, 1648 list_add(&device->dev_alloc_list,
1616 &root->fs_info->fs_devices->alloc_list); 1649 &root->fs_info->fs_devices->alloc_list);
1617 root->fs_info->fs_devices->num_devices++; 1650 root->fs_info->fs_devices->num_devices++;
@@ -1769,10 +1802,9 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
1769 BUG_ON(ret); 1802 BUG_ON(ret);
1770 1803
1771 ret = btrfs_del_item(trans, root, path); 1804 ret = btrfs_del_item(trans, root, path);
1772 BUG_ON(ret);
1773 1805
1774 btrfs_free_path(path); 1806 btrfs_free_path(path);
1775 return 0; 1807 return ret;
1776} 1808}
1777 1809
1778static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 1810static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
@@ -1947,7 +1979,7 @@ again:
1947 chunk = btrfs_item_ptr(leaf, path->slots[0], 1979 chunk = btrfs_item_ptr(leaf, path->slots[0],
1948 struct btrfs_chunk); 1980 struct btrfs_chunk);
1949 chunk_type = btrfs_chunk_type(leaf, chunk); 1981 chunk_type = btrfs_chunk_type(leaf, chunk);
1950 btrfs_release_path(chunk_root, path); 1982 btrfs_release_path(path);
1951 1983
1952 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) { 1984 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
1953 ret = btrfs_relocate_chunk(chunk_root, chunk_tree, 1985 ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
@@ -2065,7 +2097,7 @@ int btrfs_balance(struct btrfs_root *dev_root)
2065 if (found_key.offset == 0) 2097 if (found_key.offset == 0)
2066 break; 2098 break;
2067 2099
2068 btrfs_release_path(chunk_root, path); 2100 btrfs_release_path(path);
2069 ret = btrfs_relocate_chunk(chunk_root, 2101 ret = btrfs_relocate_chunk(chunk_root,
2070 chunk_root->root_key.objectid, 2102 chunk_root->root_key.objectid,
2071 found_key.objectid, 2103 found_key.objectid,
@@ -2137,7 +2169,7 @@ again:
2137 goto done; 2169 goto done;
2138 if (ret) { 2170 if (ret) {
2139 ret = 0; 2171 ret = 0;
2140 btrfs_release_path(root, path); 2172 btrfs_release_path(path);
2141 break; 2173 break;
2142 } 2174 }
2143 2175
@@ -2146,7 +2178,7 @@ again:
2146 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 2178 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
2147 2179
2148 if (key.objectid != device->devid) { 2180 if (key.objectid != device->devid) {
2149 btrfs_release_path(root, path); 2181 btrfs_release_path(path);
2150 break; 2182 break;
2151 } 2183 }
2152 2184
@@ -2154,14 +2186,14 @@ again:
2154 length = btrfs_dev_extent_length(l, dev_extent); 2186 length = btrfs_dev_extent_length(l, dev_extent);
2155 2187
2156 if (key.offset + length <= new_size) { 2188 if (key.offset + length <= new_size) {
2157 btrfs_release_path(root, path); 2189 btrfs_release_path(path);
2158 break; 2190 break;
2159 } 2191 }
2160 2192
2161 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); 2193 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2162 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); 2194 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
2163 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); 2195 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
2164 btrfs_release_path(root, path); 2196 btrfs_release_path(path);
2165 2197
2166 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, 2198 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
2167 chunk_offset); 2199 chunk_offset);
@@ -2237,275 +2269,204 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
2237 return 0; 2269 return 0;
2238} 2270}
2239 2271
2240static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, 2272/*
2241 int num_stripes, int sub_stripes) 2273 * sort the devices in descending order by max_avail, total_avail
2274 */
2275static int btrfs_cmp_device_info(const void *a, const void *b)
2242{ 2276{
2243 if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) 2277 const struct btrfs_device_info *di_a = a;
2244 return calc_size; 2278 const struct btrfs_device_info *di_b = b;
2245 else if (type & BTRFS_BLOCK_GROUP_RAID10)
2246 return calc_size * (num_stripes / sub_stripes);
2247 else
2248 return calc_size * num_stripes;
2249}
2250 2279
2251/* Used to sort the devices by max_avail(descending sort) */ 2280 if (di_a->max_avail > di_b->max_avail)
2252int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2)
2253{
2254 if (((struct btrfs_device_info *)dev_info1)->max_avail >
2255 ((struct btrfs_device_info *)dev_info2)->max_avail)
2256 return -1; 2281 return -1;
2257 else if (((struct btrfs_device_info *)dev_info1)->max_avail < 2282 if (di_a->max_avail < di_b->max_avail)
2258 ((struct btrfs_device_info *)dev_info2)->max_avail)
2259 return 1; 2283 return 1;
2260 else 2284 if (di_a->total_avail > di_b->total_avail)
2261 return 0; 2285 return -1;
2286 if (di_a->total_avail < di_b->total_avail)
2287 return 1;
2288 return 0;
2262} 2289}
2263 2290
2264static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, 2291static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2265 int *num_stripes, int *min_stripes, 2292 struct btrfs_root *extent_root,
2266 int *sub_stripes) 2293 struct map_lookup **map_ret,
2294 u64 *num_bytes_out, u64 *stripe_size_out,
2295 u64 start, u64 type)
2267{ 2296{
2268 *num_stripes = 1; 2297 struct btrfs_fs_info *info = extent_root->fs_info;
2269 *min_stripes = 1; 2298 struct btrfs_fs_devices *fs_devices = info->fs_devices;
2270 *sub_stripes = 0; 2299 struct list_head *cur;
2300 struct map_lookup *map = NULL;
2301 struct extent_map_tree *em_tree;
2302 struct extent_map *em;
2303 struct btrfs_device_info *devices_info = NULL;
2304 u64 total_avail;
2305 int num_stripes; /* total number of stripes to allocate */
2306 int sub_stripes; /* sub_stripes info for map */
2307 int dev_stripes; /* stripes per dev */
2308 int devs_max; /* max devs to use */
2309 int devs_min; /* min devs needed */
2310 int devs_increment; /* ndevs has to be a multiple of this */
2311 int ncopies; /* how many copies to data has */
2312 int ret;
2313 u64 max_stripe_size;
2314 u64 max_chunk_size;
2315 u64 stripe_size;
2316 u64 num_bytes;
2317 int ndevs;
2318 int i;
2319 int j;
2271 2320
2272 if (type & (BTRFS_BLOCK_GROUP_RAID0)) { 2321 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
2273 *num_stripes = fs_devices->rw_devices; 2322 (type & BTRFS_BLOCK_GROUP_DUP)) {
2274 *min_stripes = 2; 2323 WARN_ON(1);
2275 } 2324 type &= ~BTRFS_BLOCK_GROUP_DUP;
2276 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
2277 *num_stripes = 2;
2278 *min_stripes = 2;
2279 }
2280 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
2281 if (fs_devices->rw_devices < 2)
2282 return -ENOSPC;
2283 *num_stripes = 2;
2284 *min_stripes = 2;
2285 }
2286 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2287 *num_stripes = fs_devices->rw_devices;
2288 if (*num_stripes < 4)
2289 return -ENOSPC;
2290 *num_stripes &= ~(u32)1;
2291 *sub_stripes = 2;
2292 *min_stripes = 4;
2293 } 2325 }
2294 2326
2295 return 0; 2327 if (list_empty(&fs_devices->alloc_list))
2296} 2328 return -ENOSPC;
2297 2329
2298static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, 2330 sub_stripes = 1;
2299 u64 proposed_size, u64 type, 2331 dev_stripes = 1;
2300 int num_stripes, int small_stripe) 2332 devs_increment = 1;
2301{ 2333 ncopies = 1;
2302 int min_stripe_size = 1 * 1024 * 1024; 2334 devs_max = 0; /* 0 == as many as possible */
2303 u64 calc_size = proposed_size; 2335 devs_min = 1;
2304 u64 max_chunk_size = calc_size;
2305 int ncopies = 1;
2306 2336
2307 if (type & (BTRFS_BLOCK_GROUP_RAID1 | 2337 /*
2308 BTRFS_BLOCK_GROUP_DUP | 2338 * define the properties of each RAID type.
2309 BTRFS_BLOCK_GROUP_RAID10)) 2339 * FIXME: move this to a global table and use it in all RAID
2340 * calculation code
2341 */
2342 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
2343 dev_stripes = 2;
2344 ncopies = 2;
2345 devs_max = 1;
2346 } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
2347 devs_min = 2;
2348 } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
2349 devs_increment = 2;
2310 ncopies = 2; 2350 ncopies = 2;
2351 devs_max = 2;
2352 devs_min = 2;
2353 } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2354 sub_stripes = 2;
2355 devs_increment = 2;
2356 ncopies = 2;
2357 devs_min = 4;
2358 } else {
2359 devs_max = 1;
2360 }
2311 2361
2312 if (type & BTRFS_BLOCK_GROUP_DATA) { 2362 if (type & BTRFS_BLOCK_GROUP_DATA) {
2313 max_chunk_size = 10 * calc_size; 2363 max_stripe_size = 1024 * 1024 * 1024;
2314 min_stripe_size = 64 * 1024 * 1024; 2364 max_chunk_size = 10 * max_stripe_size;
2315 } else if (type & BTRFS_BLOCK_GROUP_METADATA) { 2365 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2316 max_chunk_size = 256 * 1024 * 1024; 2366 max_stripe_size = 256 * 1024 * 1024;
2317 min_stripe_size = 32 * 1024 * 1024; 2367 max_chunk_size = max_stripe_size;
2318 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { 2368 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2319 calc_size = 8 * 1024 * 1024; 2369 max_stripe_size = 8 * 1024 * 1024;
2320 max_chunk_size = calc_size * 2; 2370 max_chunk_size = 2 * max_stripe_size;
2321 min_stripe_size = 1 * 1024 * 1024; 2371 } else {
2372 printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n",
2373 type);
2374 BUG_ON(1);
2322 } 2375 }
2323 2376
2324 /* we don't want a chunk larger than 10% of writeable space */ 2377 /* we don't want a chunk larger than 10% of writeable space */
2325 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), 2378 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
2326 max_chunk_size); 2379 max_chunk_size);
2327 2380
2328 if (calc_size * num_stripes > max_chunk_size * ncopies) { 2381 devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
2329 calc_size = max_chunk_size * ncopies; 2382 GFP_NOFS);
2330 do_div(calc_size, num_stripes); 2383 if (!devices_info)
2331 do_div(calc_size, BTRFS_STRIPE_LEN); 2384 return -ENOMEM;
2332 calc_size *= BTRFS_STRIPE_LEN;
2333 }
2334 2385
2335 /* we don't want tiny stripes */ 2386 cur = fs_devices->alloc_list.next;
2336 if (!small_stripe)
2337 calc_size = max_t(u64, min_stripe_size, calc_size);
2338 2387
2339 /* 2388 /*
2340 * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure 2389 * in the first pass through the devices list, we gather information
2341 * we end up with something bigger than a stripe 2390 * about the available holes on each device.
2342 */ 2391 */
2343 calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); 2392 ndevs = 0;
2344 2393 while (cur != &fs_devices->alloc_list) {
2345 do_div(calc_size, BTRFS_STRIPE_LEN); 2394 struct btrfs_device *device;
2346 calc_size *= BTRFS_STRIPE_LEN; 2395 u64 max_avail;
2347 2396 u64 dev_offset;
2348 return calc_size;
2349}
2350
2351static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map,
2352 int num_stripes)
2353{
2354 struct map_lookup *new;
2355 size_t len = map_lookup_size(num_stripes);
2356
2357 BUG_ON(map->num_stripes < num_stripes);
2358
2359 if (map->num_stripes == num_stripes)
2360 return map;
2361
2362 new = kmalloc(len, GFP_NOFS);
2363 if (!new) {
2364 /* just change map->num_stripes */
2365 map->num_stripes = num_stripes;
2366 return map;
2367 }
2368
2369 memcpy(new, map, len);
2370 new->num_stripes = num_stripes;
2371 kfree(map);
2372 return new;
2373}
2374 2397
2375/* 2398 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
2376 * helper to allocate device space from btrfs_device_info, in which we stored
2377 * max free space information of every device. It is used when we can not
2378 * allocate chunks by default size.
2379 *
2380 * By this helper, we can allocate a new chunk as larger as possible.
2381 */
2382static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans,
2383 struct btrfs_fs_devices *fs_devices,
2384 struct btrfs_device_info *devices,
2385 int nr_device, u64 type,
2386 struct map_lookup **map_lookup,
2387 int min_stripes, u64 *stripe_size)
2388{
2389 int i, index, sort_again = 0;
2390 int min_devices = min_stripes;
2391 u64 max_avail, min_free;
2392 struct map_lookup *map = *map_lookup;
2393 int ret;
2394 2399
2395 if (nr_device < min_stripes) 2400 cur = cur->next;
2396 return -ENOSPC;
2397 2401
2398 btrfs_descending_sort_devices(devices, nr_device); 2402 if (!device->writeable) {
2403 printk(KERN_ERR
2404 "btrfs: read-only device in alloc_list\n");
2405 WARN_ON(1);
2406 continue;
2407 }
2399 2408
2400 max_avail = devices[0].max_avail; 2409 if (!device->in_fs_metadata)
2401 if (!max_avail) 2410 continue;
2402 return -ENOSPC;
2403 2411
2404 for (i = 0; i < nr_device; i++) { 2412 if (device->total_bytes > device->bytes_used)
2405 /* 2413 total_avail = device->total_bytes - device->bytes_used;
2406 * if dev_offset = 0, it means the free space of this device 2414 else
2407 * is less than what we need, and we didn't search max avail 2415 total_avail = 0;
2408 * extent on this device, so do it now. 2416 /* avail is off by max(alloc_start, 1MB), but that is the same
2417 * for all devices, so it doesn't hurt the sorting later on
2409 */ 2418 */
2410 if (!devices[i].dev_offset) {
2411 ret = find_free_dev_extent(trans, devices[i].dev,
2412 max_avail,
2413 &devices[i].dev_offset,
2414 &devices[i].max_avail);
2415 if (ret != 0 && ret != -ENOSPC)
2416 return ret;
2417 sort_again = 1;
2418 }
2419 }
2420
2421 /* we update the max avail free extent of each devices, sort again */
2422 if (sort_again)
2423 btrfs_descending_sort_devices(devices, nr_device);
2424 2419
2425 if (type & BTRFS_BLOCK_GROUP_DUP) 2420 ret = find_free_dev_extent(trans, device,
2426 min_devices = 1; 2421 max_stripe_size * dev_stripes,
2422 &dev_offset, &max_avail);
2423 if (ret && ret != -ENOSPC)
2424 goto error;
2427 2425
2428 if (!devices[min_devices - 1].max_avail) 2426 if (ret == 0)
2429 return -ENOSPC; 2427 max_avail = max_stripe_size * dev_stripes;
2430 2428
2431 max_avail = devices[min_devices - 1].max_avail; 2429 if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
2432 if (type & BTRFS_BLOCK_GROUP_DUP) 2430 continue;
2433 do_div(max_avail, 2);
2434 2431
2435 max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, 2432 devices_info[ndevs].dev_offset = dev_offset;
2436 min_stripes, 1); 2433 devices_info[ndevs].max_avail = max_avail;
2437 if (type & BTRFS_BLOCK_GROUP_DUP) 2434 devices_info[ndevs].total_avail = total_avail;
2438 min_free = max_avail * 2; 2435 devices_info[ndevs].dev = device;
2439 else 2436 ++ndevs;
2440 min_free = max_avail; 2437 }
2441 2438
2442 if (min_free > devices[min_devices - 1].max_avail) 2439 /*
2443 return -ENOSPC; 2440 * now sort the devices by hole size / available space
2441 */
2442 sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
2443 btrfs_cmp_device_info, NULL);
2444 2444
2445 map = __shrink_map_lookup_stripes(map, min_stripes); 2445 /* round down to number of usable stripes */
2446 *stripe_size = max_avail; 2446 ndevs -= ndevs % devs_increment;
2447 2447
2448 index = 0; 2448 if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) {
2449 for (i = 0; i < min_stripes; i++) { 2449 ret = -ENOSPC;
2450 map->stripes[i].dev = devices[index].dev; 2450 goto error;
2451 map->stripes[i].physical = devices[index].dev_offset;
2452 if (type & BTRFS_BLOCK_GROUP_DUP) {
2453 i++;
2454 map->stripes[i].dev = devices[index].dev;
2455 map->stripes[i].physical = devices[index].dev_offset +
2456 max_avail;
2457 }
2458 index++;
2459 } 2451 }
2460 *map_lookup = map;
2461 2452
2462 return 0; 2453 if (devs_max && ndevs > devs_max)
2463} 2454 ndevs = devs_max;
2464 2455 /*
2465static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 2456 * the primary goal is to maximize the number of stripes, so use as many
2466 struct btrfs_root *extent_root, 2457 * devices as possible, even if the stripes are not maximum sized.
2467 struct map_lookup **map_ret, 2458 */
2468 u64 *num_bytes, u64 *stripe_size, 2459 stripe_size = devices_info[ndevs-1].max_avail;
2469 u64 start, u64 type) 2460 num_stripes = ndevs * dev_stripes;
2470{
2471 struct btrfs_fs_info *info = extent_root->fs_info;
2472 struct btrfs_device *device = NULL;
2473 struct btrfs_fs_devices *fs_devices = info->fs_devices;
2474 struct list_head *cur;
2475 struct map_lookup *map;
2476 struct extent_map_tree *em_tree;
2477 struct extent_map *em;
2478 struct btrfs_device_info *devices_info;
2479 struct list_head private_devs;
2480 u64 calc_size = 1024 * 1024 * 1024;
2481 u64 min_free;
2482 u64 avail;
2483 u64 dev_offset;
2484 int num_stripes;
2485 int min_stripes;
2486 int sub_stripes;
2487 int min_devices; /* the min number of devices we need */
2488 int i;
2489 int ret;
2490 int index;
2491 2461
2492 if ((type & BTRFS_BLOCK_GROUP_RAID1) && 2462 if (stripe_size * num_stripes > max_chunk_size * ncopies) {
2493 (type & BTRFS_BLOCK_GROUP_DUP)) { 2463 stripe_size = max_chunk_size * ncopies;
2494 WARN_ON(1); 2464 do_div(stripe_size, num_stripes);
2495 type &= ~BTRFS_BLOCK_GROUP_DUP;
2496 } 2465 }
2497 if (list_empty(&fs_devices->alloc_list))
2498 return -ENOSPC;
2499
2500 ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes,
2501 &min_stripes, &sub_stripes);
2502 if (ret)
2503 return ret;
2504 2466
2505 devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, 2467 do_div(stripe_size, dev_stripes);
2506 GFP_NOFS); 2468 do_div(stripe_size, BTRFS_STRIPE_LEN);
2507 if (!devices_info) 2469 stripe_size *= BTRFS_STRIPE_LEN;
2508 return -ENOMEM;
2509 2470
2510 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); 2471 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
2511 if (!map) { 2472 if (!map) {
@@ -2514,85 +2475,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2514 } 2475 }
2515 map->num_stripes = num_stripes; 2476 map->num_stripes = num_stripes;
2516 2477
2517 cur = fs_devices->alloc_list.next; 2478 for (i = 0; i < ndevs; ++i) {
2518 index = 0; 2479 for (j = 0; j < dev_stripes; ++j) {
2519 i = 0; 2480 int s = i * dev_stripes + j;
2520 2481 map->stripes[s].dev = devices_info[i].dev;
2521 calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, 2482 map->stripes[s].physical = devices_info[i].dev_offset +
2522 num_stripes, 0); 2483 j * stripe_size;
2523
2524 if (type & BTRFS_BLOCK_GROUP_DUP) {
2525 min_free = calc_size * 2;
2526 min_devices = 1;
2527 } else {
2528 min_free = calc_size;
2529 min_devices = min_stripes;
2530 }
2531
2532 INIT_LIST_HEAD(&private_devs);
2533 while (index < num_stripes) {
2534 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
2535 BUG_ON(!device->writeable);
2536 if (device->total_bytes > device->bytes_used)
2537 avail = device->total_bytes - device->bytes_used;
2538 else
2539 avail = 0;
2540 cur = cur->next;
2541
2542 if (device->in_fs_metadata && avail >= min_free) {
2543 ret = find_free_dev_extent(trans, device, min_free,
2544 &devices_info[i].dev_offset,
2545 &devices_info[i].max_avail);
2546 if (ret == 0) {
2547 list_move_tail(&device->dev_alloc_list,
2548 &private_devs);
2549 map->stripes[index].dev = device;
2550 map->stripes[index].physical =
2551 devices_info[i].dev_offset;
2552 index++;
2553 if (type & BTRFS_BLOCK_GROUP_DUP) {
2554 map->stripes[index].dev = device;
2555 map->stripes[index].physical =
2556 devices_info[i].dev_offset +
2557 calc_size;
2558 index++;
2559 }
2560 } else if (ret != -ENOSPC)
2561 goto error;
2562
2563 devices_info[i].dev = device;
2564 i++;
2565 } else if (device->in_fs_metadata &&
2566 avail >= BTRFS_STRIPE_LEN) {
2567 devices_info[i].dev = device;
2568 devices_info[i].max_avail = avail;
2569 i++;
2570 }
2571
2572 if (cur == &fs_devices->alloc_list)
2573 break;
2574 }
2575
2576 list_splice(&private_devs, &fs_devices->alloc_list);
2577 if (index < num_stripes) {
2578 if (index >= min_stripes) {
2579 num_stripes = index;
2580 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2581 num_stripes /= sub_stripes;
2582 num_stripes *= sub_stripes;
2583 }
2584
2585 map = __shrink_map_lookup_stripes(map, num_stripes);
2586 } else if (i >= min_devices) {
2587 ret = __btrfs_alloc_tiny_space(trans, fs_devices,
2588 devices_info, i, type,
2589 &map, min_stripes,
2590 &calc_size);
2591 if (ret)
2592 goto error;
2593 } else {
2594 ret = -ENOSPC;
2595 goto error;
2596 } 2484 }
2597 } 2485 }
2598 map->sector_size = extent_root->sectorsize; 2486 map->sector_size = extent_root->sectorsize;
@@ -2603,20 +2491,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2603 map->sub_stripes = sub_stripes; 2491 map->sub_stripes = sub_stripes;
2604 2492
2605 *map_ret = map; 2493 *map_ret = map;
2606 *stripe_size = calc_size; 2494 num_bytes = stripe_size * (num_stripes / ncopies);
2607 *num_bytes = chunk_bytes_by_type(type, calc_size,
2608 map->num_stripes, sub_stripes);
2609 2495
2610 trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes); 2496 *stripe_size_out = stripe_size;
2497 *num_bytes_out = num_bytes;
2611 2498
2612 em = alloc_extent_map(GFP_NOFS); 2499 trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes);
2500
2501 em = alloc_extent_map();
2613 if (!em) { 2502 if (!em) {
2614 ret = -ENOMEM; 2503 ret = -ENOMEM;
2615 goto error; 2504 goto error;
2616 } 2505 }
2617 em->bdev = (struct block_device *)map; 2506 em->bdev = (struct block_device *)map;
2618 em->start = start; 2507 em->start = start;
2619 em->len = *num_bytes; 2508 em->len = num_bytes;
2620 em->block_start = 0; 2509 em->block_start = 0;
2621 em->block_len = em->len; 2510 em->block_len = em->len;
2622 2511
@@ -2629,20 +2518,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2629 2518
2630 ret = btrfs_make_block_group(trans, extent_root, 0, type, 2519 ret = btrfs_make_block_group(trans, extent_root, 0, type,
2631 BTRFS_FIRST_CHUNK_TREE_OBJECTID, 2520 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2632 start, *num_bytes); 2521 start, num_bytes);
2633 BUG_ON(ret); 2522 BUG_ON(ret);
2634 2523
2635 index = 0; 2524 for (i = 0; i < map->num_stripes; ++i) {
2636 while (index < map->num_stripes) { 2525 struct btrfs_device *device;
2637 device = map->stripes[index].dev; 2526 u64 dev_offset;
2638 dev_offset = map->stripes[index].physical; 2527
2528 device = map->stripes[i].dev;
2529 dev_offset = map->stripes[i].physical;
2639 2530
2640 ret = btrfs_alloc_dev_extent(trans, device, 2531 ret = btrfs_alloc_dev_extent(trans, device,
2641 info->chunk_root->root_key.objectid, 2532 info->chunk_root->root_key.objectid,
2642 BTRFS_FIRST_CHUNK_TREE_OBJECTID, 2533 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2643 start, dev_offset, calc_size); 2534 start, dev_offset, stripe_size);
2644 BUG_ON(ret); 2535 BUG_ON(ret);
2645 index++;
2646 } 2536 }
2647 2537
2648 kfree(devices_info); 2538 kfree(devices_info);
@@ -2849,7 +2739,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2849 2739
2850void btrfs_mapping_init(struct btrfs_mapping_tree *tree) 2740void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
2851{ 2741{
2852 extent_map_tree_init(&tree->map_tree, GFP_NOFS); 2742 extent_map_tree_init(&tree->map_tree);
2853} 2743}
2854 2744
2855void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) 2745void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
@@ -3499,7 +3389,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
3499 free_extent_map(em); 3389 free_extent_map(em);
3500 } 3390 }
3501 3391
3502 em = alloc_extent_map(GFP_NOFS); 3392 em = alloc_extent_map();
3503 if (!em) 3393 if (!em)
3504 return -ENOMEM; 3394 return -ENOMEM;
3505 num_stripes = btrfs_chunk_num_stripes(leaf, chunk); 3395 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
@@ -3688,15 +3578,6 @@ static int read_one_dev(struct btrfs_root *root,
3688 return ret; 3578 return ret;
3689} 3579}
3690 3580
3691int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
3692{
3693 struct btrfs_dev_item *dev_item;
3694
3695 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
3696 dev_item);
3697 return read_one_dev(root, buf, dev_item);
3698}
3699
3700int btrfs_read_sys_array(struct btrfs_root *root) 3581int btrfs_read_sys_array(struct btrfs_root *root)
3701{ 3582{
3702 struct btrfs_super_block *super_copy = &root->fs_info->super_copy; 3583 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
@@ -3813,7 +3694,7 @@ again:
3813 } 3694 }
3814 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { 3695 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
3815 key.objectid = 0; 3696 key.objectid = 0;
3816 btrfs_release_path(root, path); 3697 btrfs_release_path(path);
3817 goto again; 3698 goto again;
3818 } 3699 }
3819 ret = 0; 3700 ret = 0;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index cc2eadaf7a27..7c12d61ae7ae 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -85,7 +85,12 @@ struct btrfs_device {
85 /* physical drive uuid (or lvm uuid) */ 85 /* physical drive uuid (or lvm uuid) */
86 u8 uuid[BTRFS_UUID_SIZE]; 86 u8 uuid[BTRFS_UUID_SIZE];
87 87
88 /* per-device scrub information */
89 struct scrub_dev *scrub_device;
90
88 struct btrfs_work work; 91 struct btrfs_work work;
92 struct rcu_head rcu;
93 struct work_struct rcu_work;
89}; 94};
90 95
91struct btrfs_fs_devices { 96struct btrfs_fs_devices {
@@ -144,6 +149,7 @@ struct btrfs_device_info {
144 struct btrfs_device *dev; 149 struct btrfs_device *dev;
145 u64 dev_offset; 150 u64 dev_offset;
146 u64 max_avail; 151 u64 max_avail;
152 u64 total_avail;
147}; 153};
148 154
149struct map_lookup { 155struct map_lookup {
@@ -157,20 +163,8 @@ struct map_lookup {
157 struct btrfs_bio_stripe stripes[]; 163 struct btrfs_bio_stripe stripes[];
158}; 164};
159 165
160/* Used to sort the devices by max_avail(descending sort) */ 166#define map_lookup_size(n) (sizeof(struct map_lookup) + \
161int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); 167 (sizeof(struct btrfs_bio_stripe) * (n)))
162
163/*
164 * sort the devices by max_avail, in which max free extent size of each device
165 * is stored.(Descending Sort)
166 */
167static inline void btrfs_descending_sort_devices(
168 struct btrfs_device_info *devices,
169 size_t nr_devices)
170{
171 sort(devices, nr_devices, sizeof(struct btrfs_device_info),
172 btrfs_cmp_device_free_bytes, NULL);
173}
174 168
175int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, 169int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
176 u64 end, u64 *length); 170 u64 end, u64 *length);
@@ -196,7 +190,6 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
196void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); 190void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
197int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, 191int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
198 int mirror_num, int async_submit); 192 int mirror_num, int async_submit);
199int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf);
200int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 193int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
201 fmode_t flags, void *holder); 194 fmode_t flags, void *holder);
202int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, 195int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
@@ -209,8 +202,6 @@ int btrfs_add_device(struct btrfs_trans_handle *trans,
209int btrfs_rm_device(struct btrfs_root *root, char *device_path); 202int btrfs_rm_device(struct btrfs_root *root, char *device_path);
210int btrfs_cleanup_fs_uuids(void); 203int btrfs_cleanup_fs_uuids(void);
211int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); 204int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
212int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
213 u64 logical, struct page *page);
214int btrfs_grow_device(struct btrfs_trans_handle *trans, 205int btrfs_grow_device(struct btrfs_trans_handle *trans,
215 struct btrfs_device *device, u64 new_size); 206 struct btrfs_device *device, u64 new_size);
216struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, 207struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
@@ -218,8 +209,6 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
218int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); 209int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
219int btrfs_init_new_device(struct btrfs_root *root, char *path); 210int btrfs_init_new_device(struct btrfs_root *root, char *path);
220int btrfs_balance(struct btrfs_root *dev_root); 211int btrfs_balance(struct btrfs_root *dev_root);
221void btrfs_unlock_volumes(void);
222void btrfs_lock_volumes(void);
223int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 212int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
224int find_free_dev_extent(struct btrfs_trans_handle *trans, 213int find_free_dev_extent(struct btrfs_trans_handle *trans,
225 struct btrfs_device *device, u64 num_bytes, 214 struct btrfs_device *device, u64 num_bytes,
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 72ab0295ca74..5366fe452ab0 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -44,7 +44,7 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
44 return -ENOMEM; 44 return -ENOMEM;
45 45
46 /* lookup the xattr by name */ 46 /* lookup the xattr by name */
47 di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, 47 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), name,
48 strlen(name), 0); 48 strlen(name), 0);
49 if (!di) { 49 if (!di) {
50 ret = -ENODATA; 50 ret = -ENODATA;
@@ -103,7 +103,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
103 return -ENOMEM; 103 return -ENOMEM;
104 104
105 /* first lets see if we already have this xattr */ 105 /* first lets see if we already have this xattr */
106 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, 106 di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
107 strlen(name), -1); 107 strlen(name), -1);
108 if (IS_ERR(di)) { 108 if (IS_ERR(di)) {
109 ret = PTR_ERR(di); 109 ret = PTR_ERR(di);
@@ -120,13 +120,13 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
120 120
121 ret = btrfs_delete_one_dir_name(trans, root, path, di); 121 ret = btrfs_delete_one_dir_name(trans, root, path, di);
122 BUG_ON(ret); 122 BUG_ON(ret);
123 btrfs_release_path(root, path); 123 btrfs_release_path(path);
124 124
125 /* if we don't have a value then we are removing the xattr */ 125 /* if we don't have a value then we are removing the xattr */
126 if (!value) 126 if (!value)
127 goto out; 127 goto out;
128 } else { 128 } else {
129 btrfs_release_path(root, path); 129 btrfs_release_path(path);
130 130
131 if (flags & XATTR_REPLACE) { 131 if (flags & XATTR_REPLACE) {
132 /* we couldn't find the attr to replace */ 132 /* we couldn't find the attr to replace */
@@ -136,7 +136,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
136 } 136 }
137 137
138 /* ok we have to create a completely new xattr */ 138 /* ok we have to create a completely new xattr */
139 ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino, 139 ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
140 name, name_len, value, size); 140 name, name_len, value, size);
141 BUG_ON(ret); 141 BUG_ON(ret);
142out: 142out:
@@ -188,7 +188,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
188 * NOTE: we set key.offset = 0; because we want to start with the 188 * NOTE: we set key.offset = 0; because we want to start with the
189 * first xattr that we find and walk forward 189 * first xattr that we find and walk forward
190 */ 190 */
191 key.objectid = inode->i_ino; 191 key.objectid = btrfs_ino(inode);
192 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); 192 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
193 key.offset = 0; 193 key.offset = 0;
194 194