aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/acl.c2
-rw-r--r--fs/btrfs/btrfs_inode.h14
-rw-r--r--fs/btrfs/compression.c47
-rw-r--r--fs/btrfs/compression.h2
-rw-r--r--fs/btrfs/ctree.c43
-rw-r--r--fs/btrfs/ctree.h196
-rw-r--r--fs/btrfs/delayed-inode.c1695
-rw-r--r--fs/btrfs/delayed-inode.h141
-rw-r--r--fs/btrfs/delayed-ref.c114
-rw-r--r--fs/btrfs/delayed-ref.h6
-rw-r--r--fs/btrfs/dir-item.c36
-rw-r--r--fs/btrfs/disk-io.c189
-rw-r--r--fs/btrfs/disk-io.h19
-rw-r--r--fs/btrfs/export.c25
-rw-r--r--fs/btrfs/extent-tree.c1781
-rw-r--r--fs/btrfs/extent_io.c271
-rw-r--r--fs/btrfs/extent_io.h40
-rw-r--r--fs/btrfs/extent_map.c8
-rw-r--r--fs/btrfs/extent_map.h4
-rw-r--r--fs/btrfs/file-item.c25
-rw-r--r--fs/btrfs/file.c45
-rw-r--r--fs/btrfs/free-space-cache.c993
-rw-r--r--fs/btrfs/free-space-cache.h48
-rw-r--r--fs/btrfs/inode-map.c428
-rw-r--r--fs/btrfs/inode-map.h13
-rw-r--r--fs/btrfs/inode.c682
-rw-r--r--fs/btrfs/ioctl.c165
-rw-r--r--fs/btrfs/ioctl.h76
-rw-r--r--fs/btrfs/locking.c25
-rw-r--r--fs/btrfs/locking.h2
-rw-r--r--fs/btrfs/ref-cache.c164
-rw-r--r--fs/btrfs/ref-cache.h24
-rw-r--r--fs/btrfs/relocation.c67
-rw-r--r--fs/btrfs/root-tree.c55
-rw-r--r--fs/btrfs/scrub.c1368
-rw-r--r--fs/btrfs/super.c40
-rw-r--r--fs/btrfs/sysfs.c77
-rw-r--r--fs/btrfs/transaction.c196
-rw-r--r--fs/btrfs/transaction.h5
-rw-r--r--fs/btrfs/tree-defrag.c2
-rw-r--r--fs/btrfs/tree-log.c172
-rw-r--r--fs/btrfs/tree-log.h1
-rw-r--r--fs/btrfs/volumes.c540
-rw-r--r--fs/btrfs/volumes.h25
-rw-r--r--fs/btrfs/xattr.c12
46 files changed, 5604 insertions, 4281 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 31610ea73aec..9b72dcf1cd25 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -7,4 +7,4 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ 9 export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
10 compression.o delayed-ref.o relocation.o 10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 44ea5b92e1ba..f66fc9959733 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -288,7 +288,7 @@ int btrfs_acl_chmod(struct inode *inode)
288 return 0; 288 return 0;
289 289
290 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); 290 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
291 if (IS_ERR(acl) || !acl) 291 if (IS_ERR_OR_NULL(acl))
292 return PTR_ERR(acl); 292 return PTR_ERR(acl);
293 293
294 clone = posix_acl_clone(acl, GFP_KERNEL); 294 clone = posix_acl_clone(acl, GFP_KERNEL);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 57c3bb2884ce..d0b0e43a6a8b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -22,6 +22,7 @@
22#include "extent_map.h" 22#include "extent_map.h"
23#include "extent_io.h" 23#include "extent_io.h"
24#include "ordered-data.h" 24#include "ordered-data.h"
25#include "delayed-inode.h"
25 26
26/* in memory btrfs inode */ 27/* in memory btrfs inode */
27struct btrfs_inode { 28struct btrfs_inode {
@@ -158,14 +159,27 @@ struct btrfs_inode {
158 */ 159 */
159 unsigned force_compress:4; 160 unsigned force_compress:4;
160 161
162 struct btrfs_delayed_node *delayed_node;
163
161 struct inode vfs_inode; 164 struct inode vfs_inode;
162}; 165};
163 166
167extern unsigned char btrfs_filetype_table[];
168
164static inline struct btrfs_inode *BTRFS_I(struct inode *inode) 169static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
165{ 170{
166 return container_of(inode, struct btrfs_inode, vfs_inode); 171 return container_of(inode, struct btrfs_inode, vfs_inode);
167} 172}
168 173
174static inline u64 btrfs_ino(struct inode *inode)
175{
176 u64 ino = BTRFS_I(inode)->location.objectid;
177
178 if (ino <= BTRFS_FIRST_FREE_OBJECTID)
179 ino = inode->i_ino;
180 return ino;
181}
182
169static inline void btrfs_i_size_write(struct inode *inode, u64 size) 183static inline void btrfs_i_size_write(struct inode *inode, u64 size)
170{ 184{
171 i_size_write(inode, size); 185 i_size_write(inode, size);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 41d1d7c70e29..bfe42b03eaf9 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -125,9 +125,10 @@ static int check_compressed_csum(struct inode *inode,
125 kunmap_atomic(kaddr, KM_USER0); 125 kunmap_atomic(kaddr, KM_USER0);
126 126
127 if (csum != *cb_sum) { 127 if (csum != *cb_sum) {
128 printk(KERN_INFO "btrfs csum failed ino %lu " 128 printk(KERN_INFO "btrfs csum failed ino %llu "
129 "extent %llu csum %u " 129 "extent %llu csum %u "
130 "wanted %u mirror %d\n", inode->i_ino, 130 "wanted %u mirror %d\n",
131 (unsigned long long)btrfs_ino(inode),
131 (unsigned long long)disk_start, 132 (unsigned long long)disk_start,
132 csum, *cb_sum, cb->mirror_num); 133 csum, *cb_sum, cb->mirror_num);
133 ret = -EIO; 134 ret = -EIO;
@@ -332,7 +333,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
332 struct compressed_bio *cb; 333 struct compressed_bio *cb;
333 unsigned long bytes_left; 334 unsigned long bytes_left;
334 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 335 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
335 int page_index = 0; 336 int pg_index = 0;
336 struct page *page; 337 struct page *page;
337 u64 first_byte = disk_start; 338 u64 first_byte = disk_start;
338 struct block_device *bdev; 339 struct block_device *bdev;
@@ -366,8 +367,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
366 367
367 /* create and submit bios for the compressed pages */ 368 /* create and submit bios for the compressed pages */
368 bytes_left = compressed_len; 369 bytes_left = compressed_len;
369 for (page_index = 0; page_index < cb->nr_pages; page_index++) { 370 for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
370 page = compressed_pages[page_index]; 371 page = compressed_pages[pg_index];
371 page->mapping = inode->i_mapping; 372 page->mapping = inode->i_mapping;
372 if (bio->bi_size) 373 if (bio->bi_size)
373 ret = io_tree->ops->merge_bio_hook(page, 0, 374 ret = io_tree->ops->merge_bio_hook(page, 0,
@@ -432,7 +433,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
432 struct compressed_bio *cb) 433 struct compressed_bio *cb)
433{ 434{
434 unsigned long end_index; 435 unsigned long end_index;
435 unsigned long page_index; 436 unsigned long pg_index;
436 u64 last_offset; 437 u64 last_offset;
437 u64 isize = i_size_read(inode); 438 u64 isize = i_size_read(inode);
438 int ret; 439 int ret;
@@ -456,13 +457,13 @@ static noinline int add_ra_bio_pages(struct inode *inode,
456 end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 457 end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
457 458
458 while (last_offset < compressed_end) { 459 while (last_offset < compressed_end) {
459 page_index = last_offset >> PAGE_CACHE_SHIFT; 460 pg_index = last_offset >> PAGE_CACHE_SHIFT;
460 461
461 if (page_index > end_index) 462 if (pg_index > end_index)
462 break; 463 break;
463 464
464 rcu_read_lock(); 465 rcu_read_lock();
465 page = radix_tree_lookup(&mapping->page_tree, page_index); 466 page = radix_tree_lookup(&mapping->page_tree, pg_index);
466 rcu_read_unlock(); 467 rcu_read_unlock();
467 if (page) { 468 if (page) {
468 misses++; 469 misses++;
@@ -476,7 +477,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
476 if (!page) 477 if (!page)
477 break; 478 break;
478 479
479 if (add_to_page_cache_lru(page, mapping, page_index, 480 if (add_to_page_cache_lru(page, mapping, pg_index,
480 GFP_NOFS)) { 481 GFP_NOFS)) {
481 page_cache_release(page); 482 page_cache_release(page);
482 goto next; 483 goto next;
@@ -560,7 +561,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
560 unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; 561 unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
561 unsigned long compressed_len; 562 unsigned long compressed_len;
562 unsigned long nr_pages; 563 unsigned long nr_pages;
563 unsigned long page_index; 564 unsigned long pg_index;
564 struct page *page; 565 struct page *page;
565 struct block_device *bdev; 566 struct block_device *bdev;
566 struct bio *comp_bio; 567 struct bio *comp_bio;
@@ -613,10 +614,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
613 614
614 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 615 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
615 616
616 for (page_index = 0; page_index < nr_pages; page_index++) { 617 for (pg_index = 0; pg_index < nr_pages; pg_index++) {
617 cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | 618 cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS |
618 __GFP_HIGHMEM); 619 __GFP_HIGHMEM);
619 if (!cb->compressed_pages[page_index]) 620 if (!cb->compressed_pages[pg_index])
620 goto fail2; 621 goto fail2;
621 } 622 }
622 cb->nr_pages = nr_pages; 623 cb->nr_pages = nr_pages;
@@ -634,8 +635,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
634 comp_bio->bi_end_io = end_compressed_bio_read; 635 comp_bio->bi_end_io = end_compressed_bio_read;
635 atomic_inc(&cb->pending_bios); 636 atomic_inc(&cb->pending_bios);
636 637
637 for (page_index = 0; page_index < nr_pages; page_index++) { 638 for (pg_index = 0; pg_index < nr_pages; pg_index++) {
638 page = cb->compressed_pages[page_index]; 639 page = cb->compressed_pages[pg_index];
639 page->mapping = inode->i_mapping; 640 page->mapping = inode->i_mapping;
640 page->index = em_start >> PAGE_CACHE_SHIFT; 641 page->index = em_start >> PAGE_CACHE_SHIFT;
641 642
@@ -702,8 +703,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
702 return 0; 703 return 0;
703 704
704fail2: 705fail2:
705 for (page_index = 0; page_index < nr_pages; page_index++) 706 for (pg_index = 0; pg_index < nr_pages; pg_index++)
706 free_page((unsigned long)cb->compressed_pages[page_index]); 707 free_page((unsigned long)cb->compressed_pages[pg_index]);
707 708
708 kfree(cb->compressed_pages); 709 kfree(cb->compressed_pages);
709fail1: 710fail1:
@@ -945,7 +946,7 @@ void btrfs_exit_compress(void)
945int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, 946int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
946 unsigned long total_out, u64 disk_start, 947 unsigned long total_out, u64 disk_start,
947 struct bio_vec *bvec, int vcnt, 948 struct bio_vec *bvec, int vcnt,
948 unsigned long *page_index, 949 unsigned long *pg_index,
949 unsigned long *pg_offset) 950 unsigned long *pg_offset)
950{ 951{
951 unsigned long buf_offset; 952 unsigned long buf_offset;
@@ -954,7 +955,7 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
954 unsigned long working_bytes = total_out - buf_start; 955 unsigned long working_bytes = total_out - buf_start;
955 unsigned long bytes; 956 unsigned long bytes;
956 char *kaddr; 957 char *kaddr;
957 struct page *page_out = bvec[*page_index].bv_page; 958 struct page *page_out = bvec[*pg_index].bv_page;
958 959
959 /* 960 /*
960 * start byte is the first byte of the page we're currently 961 * start byte is the first byte of the page we're currently
@@ -995,11 +996,11 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
995 996
996 /* check if we need to pick another page */ 997 /* check if we need to pick another page */
997 if (*pg_offset == PAGE_CACHE_SIZE) { 998 if (*pg_offset == PAGE_CACHE_SIZE) {
998 (*page_index)++; 999 (*pg_index)++;
999 if (*page_index >= vcnt) 1000 if (*pg_index >= vcnt)
1000 return 0; 1001 return 0;
1001 1002
1002 page_out = bvec[*page_index].bv_page; 1003 page_out = bvec[*pg_index].bv_page;
1003 *pg_offset = 0; 1004 *pg_offset = 0;
1004 start_byte = page_offset(page_out) - disk_start; 1005 start_byte = page_offset(page_out) - disk_start;
1005 1006
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 51000174b9d7..a12059f4f0fd 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -37,7 +37,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
37int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, 37int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
38 unsigned long total_out, u64 disk_start, 38 unsigned long total_out, u64 disk_start,
39 struct bio_vec *bvec, int vcnt, 39 struct bio_vec *bvec, int vcnt,
40 unsigned long *page_index, 40 unsigned long *pg_index,
41 unsigned long *pg_offset); 41 unsigned long *pg_offset);
42 42
43int btrfs_submit_compressed_write(struct inode *inode, u64 start, 43int btrfs_submit_compressed_write(struct inode *inode, u64 start,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 6f1a59cc41ff..b0e18d986e0a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -38,11 +38,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
38 struct extent_buffer *src_buf); 38 struct extent_buffer *src_buf);
39static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 39static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
40 struct btrfs_path *path, int level, int slot); 40 struct btrfs_path *path, int level, int slot);
41static int setup_items_for_insert(struct btrfs_trans_handle *trans,
42 struct btrfs_root *root, struct btrfs_path *path,
43 struct btrfs_key *cpu_key, u32 *data_size,
44 u32 total_data, u32 total_size, int nr);
45
46 41
47struct btrfs_path *btrfs_alloc_path(void) 42struct btrfs_path *btrfs_alloc_path(void)
48{ 43{
@@ -107,7 +102,7 @@ void btrfs_free_path(struct btrfs_path *p)
107{ 102{
108 if (!p) 103 if (!p)
109 return; 104 return;
110 btrfs_release_path(NULL, p); 105 btrfs_release_path(p);
111 kmem_cache_free(btrfs_path_cachep, p); 106 kmem_cache_free(btrfs_path_cachep, p);
112} 107}
113 108
@@ -117,7 +112,7 @@ void btrfs_free_path(struct btrfs_path *p)
117 * 112 *
118 * It is safe to call this on paths that no locks or extent buffers held. 113 * It is safe to call this on paths that no locks or extent buffers held.
119 */ 114 */
120noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) 115noinline void btrfs_release_path(struct btrfs_path *p)
121{ 116{
122 int i; 117 int i;
123 118
@@ -1328,7 +1323,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1328 ret = -EAGAIN; 1323 ret = -EAGAIN;
1329 1324
1330 /* release the whole path */ 1325 /* release the whole path */
1331 btrfs_release_path(root, path); 1326 btrfs_release_path(path);
1332 1327
1333 /* read the blocks */ 1328 /* read the blocks */
1334 if (block1) 1329 if (block1)
@@ -1475,7 +1470,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1475 return 0; 1470 return 0;
1476 } 1471 }
1477 free_extent_buffer(tmp); 1472 free_extent_buffer(tmp);
1478 btrfs_release_path(NULL, p); 1473 btrfs_release_path(p);
1479 return -EIO; 1474 return -EIO;
1480 } 1475 }
1481 } 1476 }
@@ -1494,7 +1489,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1494 if (p->reada) 1489 if (p->reada)
1495 reada_for_search(root, p, level, slot, key->objectid); 1490 reada_for_search(root, p, level, slot, key->objectid);
1496 1491
1497 btrfs_release_path(NULL, p); 1492 btrfs_release_path(p);
1498 1493
1499 ret = -EAGAIN; 1494 ret = -EAGAIN;
1500 tmp = read_tree_block(root, blocknr, blocksize, 0); 1495 tmp = read_tree_block(root, blocknr, blocksize, 0);
@@ -1563,7 +1558,7 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
1563 } 1558 }
1564 b = p->nodes[level]; 1559 b = p->nodes[level];
1565 if (!b) { 1560 if (!b) {
1566 btrfs_release_path(NULL, p); 1561 btrfs_release_path(p);
1567 goto again; 1562 goto again;
1568 } 1563 }
1569 BUG_ON(btrfs_header_nritems(b) == 1); 1564 BUG_ON(btrfs_header_nritems(b) == 1);
@@ -1753,7 +1748,7 @@ done:
1753 if (!p->leave_spinning) 1748 if (!p->leave_spinning)
1754 btrfs_set_path_blocking(p); 1749 btrfs_set_path_blocking(p);
1755 if (ret < 0) 1750 if (ret < 0)
1756 btrfs_release_path(root, p); 1751 btrfs_release_path(p);
1757 return ret; 1752 return ret;
1758} 1753}
1759 1754
@@ -3026,7 +3021,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3026 struct btrfs_file_extent_item); 3021 struct btrfs_file_extent_item);
3027 extent_len = btrfs_file_extent_num_bytes(leaf, fi); 3022 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3028 } 3023 }
3029 btrfs_release_path(root, path); 3024 btrfs_release_path(path);
3030 3025
3031 path->keep_locks = 1; 3026 path->keep_locks = 1;
3032 path->search_for_split = 1; 3027 path->search_for_split = 1;
@@ -3555,11 +3550,10 @@ out:
3555 * to save stack depth by doing the bulk of the work in a function 3550 * to save stack depth by doing the bulk of the work in a function
3556 * that doesn't call btrfs_search_slot 3551 * that doesn't call btrfs_search_slot
3557 */ 3552 */
3558static noinline_for_stack int 3553int setup_items_for_insert(struct btrfs_trans_handle *trans,
3559setup_items_for_insert(struct btrfs_trans_handle *trans, 3554 struct btrfs_root *root, struct btrfs_path *path,
3560 struct btrfs_root *root, struct btrfs_path *path, 3555 struct btrfs_key *cpu_key, u32 *data_size,
3561 struct btrfs_key *cpu_key, u32 *data_size, 3556 u32 total_data, u32 total_size, int nr)
3562 u32 total_data, u32 total_size, int nr)
3563{ 3557{
3564 struct btrfs_item *item; 3558 struct btrfs_item *item;
3565 int i; 3559 int i;
@@ -3643,7 +3637,6 @@ setup_items_for_insert(struct btrfs_trans_handle *trans,
3643 3637
3644 ret = 0; 3638 ret = 0;
3645 if (slot == 0) { 3639 if (slot == 0) {
3646 struct btrfs_disk_key disk_key;
3647 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3640 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3648 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3641 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3649 } 3642 }
@@ -3945,7 +3938,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3945 else 3938 else
3946 return 1; 3939 return 1;
3947 3940
3948 btrfs_release_path(root, path); 3941 btrfs_release_path(path);
3949 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3942 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3950 if (ret < 0) 3943 if (ret < 0)
3951 return ret; 3944 return ret;
@@ -4069,7 +4062,7 @@ find_next_key:
4069 sret = btrfs_find_next_key(root, path, min_key, level, 4062 sret = btrfs_find_next_key(root, path, min_key, level,
4070 cache_only, min_trans); 4063 cache_only, min_trans);
4071 if (sret == 0) { 4064 if (sret == 0) {
4072 btrfs_release_path(root, path); 4065 btrfs_release_path(path);
4073 goto again; 4066 goto again;
4074 } else { 4067 } else {
4075 goto out; 4068 goto out;
@@ -4148,7 +4141,7 @@ next:
4148 btrfs_node_key_to_cpu(c, &cur_key, slot); 4141 btrfs_node_key_to_cpu(c, &cur_key, slot);
4149 4142
4150 orig_lowest = path->lowest_level; 4143 orig_lowest = path->lowest_level;
4151 btrfs_release_path(root, path); 4144 btrfs_release_path(path);
4152 path->lowest_level = level; 4145 path->lowest_level = level;
4153 ret = btrfs_search_slot(NULL, root, &cur_key, path, 4146 ret = btrfs_search_slot(NULL, root, &cur_key, path,
4154 0, 0); 4147 0, 0);
@@ -4225,7 +4218,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4225again: 4218again:
4226 level = 1; 4219 level = 1;
4227 next = NULL; 4220 next = NULL;
4228 btrfs_release_path(root, path); 4221 btrfs_release_path(path);
4229 4222
4230 path->keep_locks = 1; 4223 path->keep_locks = 1;
4231 4224
@@ -4281,7 +4274,7 @@ again:
4281 goto again; 4274 goto again;
4282 4275
4283 if (ret < 0) { 4276 if (ret < 0) {
4284 btrfs_release_path(root, path); 4277 btrfs_release_path(path);
4285 goto done; 4278 goto done;
4286 } 4279 }
4287 4280
@@ -4320,7 +4313,7 @@ again:
4320 goto again; 4313 goto again;
4321 4314
4322 if (ret < 0) { 4315 if (ret < 0) {
4323 btrfs_release_path(root, path); 4316 btrfs_release_path(path);
4324 goto done; 4317 goto done;
4325 } 4318 }
4326 4319
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f290b98e2fe6..026fc47b42cf 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -23,6 +23,7 @@
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/highmem.h> 24#include <linux/highmem.h>
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/rwsem.h>
26#include <linux/completion.h> 27#include <linux/completion.h>
27#include <linux/backing-dev.h> 28#include <linux/backing-dev.h>
28#include <linux/wait.h> 29#include <linux/wait.h>
@@ -33,6 +34,7 @@
33#include "extent_io.h" 34#include "extent_io.h"
34#include "extent_map.h" 35#include "extent_map.h"
35#include "async-thread.h" 36#include "async-thread.h"
37#include "ioctl.h"
36 38
37struct btrfs_trans_handle; 39struct btrfs_trans_handle;
38struct btrfs_transaction; 40struct btrfs_transaction;
@@ -105,6 +107,12 @@ struct btrfs_ordered_sum;
105/* For storing free space cache */ 107/* For storing free space cache */
106#define BTRFS_FREE_SPACE_OBJECTID -11ULL 108#define BTRFS_FREE_SPACE_OBJECTID -11ULL
107 109
110/*
111 * The inode number assigned to the special inode for sotring
112 * free ino cache
113 */
114#define BTRFS_FREE_INO_OBJECTID -12ULL
115
108/* dummy objectid represents multiple objectids */ 116/* dummy objectid represents multiple objectids */
109#define BTRFS_MULTIPLE_OBJECTIDS -255ULL 117#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
110 118
@@ -187,7 +195,6 @@ struct btrfs_mapping_tree {
187 struct extent_map_tree map_tree; 195 struct extent_map_tree map_tree;
188}; 196};
189 197
190#define BTRFS_UUID_SIZE 16
191struct btrfs_dev_item { 198struct btrfs_dev_item {
192 /* the internal btrfs device id */ 199 /* the internal btrfs device id */
193 __le64 devid; 200 __le64 devid;
@@ -294,7 +301,6 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
294 sizeof(struct btrfs_stripe) * (num_stripes - 1); 301 sizeof(struct btrfs_stripe) * (num_stripes - 1);
295} 302}
296 303
297#define BTRFS_FSID_SIZE 16
298#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) 304#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
299#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) 305#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
300 306
@@ -510,6 +516,12 @@ struct btrfs_extent_item_v0 {
510/* use full backrefs for extent pointers in the block */ 516/* use full backrefs for extent pointers in the block */
511#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8) 517#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
512 518
519/*
520 * this flag is only used internally by scrub and may be changed at any time
521 * it is only declared here to avoid collisions
522 */
523#define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48)
524
513struct btrfs_tree_block_info { 525struct btrfs_tree_block_info {
514 struct btrfs_disk_key key; 526 struct btrfs_disk_key key;
515 u8 level; 527 u8 level;
@@ -740,12 +752,12 @@ struct btrfs_space_info {
740 */ 752 */
741 unsigned long reservation_progress; 753 unsigned long reservation_progress;
742 754
743 int full:1; /* indicates that we cannot allocate any more 755 unsigned int full:1; /* indicates that we cannot allocate any more
744 chunks for this space */ 756 chunks for this space */
745 int chunk_alloc:1; /* set if we are allocating a chunk */ 757 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
746 758
747 int force_alloc; /* set if we need to force a chunk alloc for 759 unsigned int force_alloc; /* set if we need to force a chunk
748 this space */ 760 alloc for this space */
749 761
750 struct list_head list; 762 struct list_head list;
751 763
@@ -830,9 +842,6 @@ struct btrfs_block_group_cache {
830 u64 bytes_super; 842 u64 bytes_super;
831 u64 flags; 843 u64 flags;
832 u64 sectorsize; 844 u64 sectorsize;
833 int extents_thresh;
834 int free_extents;
835 int total_bitmaps;
836 unsigned int ro:1; 845 unsigned int ro:1;
837 unsigned int dirty:1; 846 unsigned int dirty:1;
838 unsigned int iref:1; 847 unsigned int iref:1;
@@ -847,9 +856,7 @@ struct btrfs_block_group_cache {
847 struct btrfs_space_info *space_info; 856 struct btrfs_space_info *space_info;
848 857
849 /* free space cache stuff */ 858 /* free space cache stuff */
850 spinlock_t tree_lock; 859 struct btrfs_free_space_ctl *free_space_ctl;
851 struct rb_root free_space_offset;
852 u64 free_space;
853 860
854 /* block group cache stuff */ 861 /* block group cache stuff */
855 struct rb_node cache_node; 862 struct rb_node cache_node;
@@ -869,6 +876,7 @@ struct btrfs_block_group_cache {
869struct reloc_control; 876struct reloc_control;
870struct btrfs_device; 877struct btrfs_device;
871struct btrfs_fs_devices; 878struct btrfs_fs_devices;
879struct btrfs_delayed_root;
872struct btrfs_fs_info { 880struct btrfs_fs_info {
873 u8 fsid[BTRFS_FSID_SIZE]; 881 u8 fsid[BTRFS_FSID_SIZE];
874 u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; 882 u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
@@ -895,7 +903,10 @@ struct btrfs_fs_info {
895 /* logical->physical extent mapping */ 903 /* logical->physical extent mapping */
896 struct btrfs_mapping_tree mapping_tree; 904 struct btrfs_mapping_tree mapping_tree;
897 905
898 /* block reservation for extent, checksum and root tree */ 906 /*
907 * block reservation for extent, checksum, root tree and
908 * delayed dir index item
909 */
899 struct btrfs_block_rsv global_block_rsv; 910 struct btrfs_block_rsv global_block_rsv;
900 /* block reservation for delay allocation */ 911 /* block reservation for delay allocation */
901 struct btrfs_block_rsv delalloc_block_rsv; 912 struct btrfs_block_rsv delalloc_block_rsv;
@@ -1022,6 +1033,7 @@ struct btrfs_fs_info {
1022 * for the sys_munmap function call path 1033 * for the sys_munmap function call path
1023 */ 1034 */
1024 struct btrfs_workers fixup_workers; 1035 struct btrfs_workers fixup_workers;
1036 struct btrfs_workers delayed_workers;
1025 struct task_struct *transaction_kthread; 1037 struct task_struct *transaction_kthread;
1026 struct task_struct *cleaner_kthread; 1038 struct task_struct *cleaner_kthread;
1027 int thread_pool_size; 1039 int thread_pool_size;
@@ -1077,8 +1089,21 @@ struct btrfs_fs_info {
1077 1089
1078 void *bdev_holder; 1090 void *bdev_holder;
1079 1091
1092 /* private scrub information */
1093 struct mutex scrub_lock;
1094 atomic_t scrubs_running;
1095 atomic_t scrub_pause_req;
1096 atomic_t scrubs_paused;
1097 atomic_t scrub_cancel_req;
1098 wait_queue_head_t scrub_pause_wait;
1099 struct rw_semaphore scrub_super_lock;
1100 int scrub_workers_refcnt;
1101 struct btrfs_workers scrub_workers;
1102
1080 /* filesystem state */ 1103 /* filesystem state */
1081 u64 fs_state; 1104 u64 fs_state;
1105
1106 struct btrfs_delayed_root *delayed_root;
1082}; 1107};
1083 1108
1084/* 1109/*
@@ -1104,6 +1129,16 @@ struct btrfs_root {
1104 spinlock_t accounting_lock; 1129 spinlock_t accounting_lock;
1105 struct btrfs_block_rsv *block_rsv; 1130 struct btrfs_block_rsv *block_rsv;
1106 1131
1132 /* free ino cache stuff */
1133 struct mutex fs_commit_mutex;
1134 struct btrfs_free_space_ctl *free_ino_ctl;
1135 enum btrfs_caching_type cached;
1136 spinlock_t cache_lock;
1137 wait_queue_head_t cache_wait;
1138 struct btrfs_free_space_ctl *free_ino_pinned;
1139 u64 cache_progress;
1140 struct inode *cache_inode;
1141
1107 struct mutex log_mutex; 1142 struct mutex log_mutex;
1108 wait_queue_head_t log_writer_wait; 1143 wait_queue_head_t log_writer_wait;
1109 wait_queue_head_t log_commit_wait[2]; 1144 wait_queue_head_t log_commit_wait[2];
@@ -1159,6 +1194,11 @@ struct btrfs_root {
1159 struct rb_root inode_tree; 1194 struct rb_root inode_tree;
1160 1195
1161 /* 1196 /*
1197 * radix tree that keeps track of delayed nodes of every inode,
1198 * protected by inode_lock
1199 */
1200 struct radix_tree_root delayed_nodes_tree;
1201 /*
1162 * right now this just gets used so that a root has its own devid 1202 * right now this just gets used so that a root has its own devid
1163 * for stat. It may be used for more later 1203 * for stat. It may be used for more later
1164 */ 1204 */
@@ -1437,26 +1477,12 @@ static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
1437 return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); 1477 return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
1438} 1478}
1439 1479
1440static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb,
1441 struct btrfs_chunk *c, int nr,
1442 u64 val)
1443{
1444 btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val);
1445}
1446
1447static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb, 1480static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
1448 struct btrfs_chunk *c, int nr) 1481 struct btrfs_chunk *c, int nr)
1449{ 1482{
1450 return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); 1483 return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
1451} 1484}
1452 1485
1453static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb,
1454 struct btrfs_chunk *c, int nr,
1455 u64 val)
1456{
1457 btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val);
1458}
1459
1460/* struct btrfs_block_group_item */ 1486/* struct btrfs_block_group_item */
1461BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, 1487BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
1462 used, 64); 1488 used, 64);
@@ -1514,14 +1540,6 @@ btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
1514 return (struct btrfs_timespec *)ptr; 1540 return (struct btrfs_timespec *)ptr;
1515} 1541}
1516 1542
1517static inline struct btrfs_timespec *
1518btrfs_inode_otime(struct btrfs_inode_item *inode_item)
1519{
1520 unsigned long ptr = (unsigned long)inode_item;
1521 ptr += offsetof(struct btrfs_inode_item, otime);
1522 return (struct btrfs_timespec *)ptr;
1523}
1524
1525BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); 1543BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
1526BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); 1544BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
1527 1545
@@ -1872,33 +1890,6 @@ static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
1872 return (u8 *)ptr; 1890 return (u8 *)ptr;
1873} 1891}
1874 1892
1875static inline u8 *btrfs_super_fsid(struct extent_buffer *eb)
1876{
1877 unsigned long ptr = offsetof(struct btrfs_super_block, fsid);
1878 return (u8 *)ptr;
1879}
1880
1881static inline u8 *btrfs_header_csum(struct extent_buffer *eb)
1882{
1883 unsigned long ptr = offsetof(struct btrfs_header, csum);
1884 return (u8 *)ptr;
1885}
1886
1887static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb)
1888{
1889 return NULL;
1890}
1891
1892static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb)
1893{
1894 return NULL;
1895}
1896
1897static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb)
1898{
1899 return NULL;
1900}
1901
1902static inline int btrfs_is_leaf(struct extent_buffer *eb) 1893static inline int btrfs_is_leaf(struct extent_buffer *eb)
1903{ 1894{
1904 return btrfs_header_level(eb) == 0; 1895 return btrfs_header_level(eb) == 0;
@@ -2052,22 +2043,6 @@ static inline struct btrfs_root *btrfs_sb(struct super_block *sb)
2052 return sb->s_fs_info; 2043 return sb->s_fs_info;
2053} 2044}
2054 2045
2055static inline int btrfs_set_root_name(struct btrfs_root *root,
2056 const char *name, int len)
2057{
2058 /* if we already have a name just free it */
2059 kfree(root->name);
2060
2061 root->name = kmalloc(len+1, GFP_KERNEL);
2062 if (!root->name)
2063 return -ENOMEM;
2064
2065 memcpy(root->name, name, len);
2066 root->name[len] = '\0';
2067
2068 return 0;
2069}
2070
2071static inline u32 btrfs_level_size(struct btrfs_root *root, int level) 2046static inline u32 btrfs_level_size(struct btrfs_root *root, int level)
2072{ 2047{
2073 if (level == 0) 2048 if (level == 0)
@@ -2096,6 +2071,13 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
2096} 2071}
2097 2072
2098/* extent-tree.c */ 2073/* extent-tree.c */
2074static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
2075 int num_items)
2076{
2077 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
2078 3 * num_items;
2079}
2080
2099void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 2081void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
2100int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2082int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2101 struct btrfs_root *root, unsigned long count); 2083 struct btrfs_root *root, unsigned long count);
@@ -2105,12 +2087,9 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
2105 u64 num_bytes, u64 *refs, u64 *flags); 2087 u64 num_bytes, u64 *refs, u64 *flags);
2106int btrfs_pin_extent(struct btrfs_root *root, 2088int btrfs_pin_extent(struct btrfs_root *root,
2107 u64 bytenr, u64 num, int reserved); 2089 u64 bytenr, u64 num, int reserved);
2108int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
2109 struct btrfs_root *root, struct extent_buffer *leaf);
2110int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2090int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2111 struct btrfs_root *root, 2091 struct btrfs_root *root,
2112 u64 objectid, u64 offset, u64 bytenr); 2092 u64 objectid, u64 offset, u64 bytenr);
2113int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
2114struct btrfs_block_group_cache *btrfs_lookup_block_group( 2093struct btrfs_block_group_cache *btrfs_lookup_block_group(
2115 struct btrfs_fs_info *info, 2094 struct btrfs_fs_info *info,
2116 u64 bytenr); 2095 u64 bytenr);
@@ -2287,10 +2266,12 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
2287 struct btrfs_root *root, struct extent_buffer *parent, 2266 struct btrfs_root *root, struct extent_buffer *parent,
2288 int start_slot, int cache_only, u64 *last_ret, 2267 int start_slot, int cache_only, u64 *last_ret,
2289 struct btrfs_key *progress); 2268 struct btrfs_key *progress);
2290void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); 2269void btrfs_release_path(struct btrfs_path *p);
2291struct btrfs_path *btrfs_alloc_path(void); 2270struct btrfs_path *btrfs_alloc_path(void);
2292void btrfs_free_path(struct btrfs_path *p); 2271void btrfs_free_path(struct btrfs_path *p);
2293void btrfs_set_path_blocking(struct btrfs_path *p); 2272void btrfs_set_path_blocking(struct btrfs_path *p);
2273void btrfs_clear_path_blocking(struct btrfs_path *p,
2274 struct extent_buffer *held);
2294void btrfs_unlock_up_safe(struct btrfs_path *p, int level); 2275void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
2295 2276
2296int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2277int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -2302,13 +2283,12 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
2302 return btrfs_del_items(trans, root, path, path->slots[0], 1); 2283 return btrfs_del_items(trans, root, path, path->slots[0], 1);
2303} 2284}
2304 2285
2286int setup_items_for_insert(struct btrfs_trans_handle *trans,
2287 struct btrfs_root *root, struct btrfs_path *path,
2288 struct btrfs_key *cpu_key, u32 *data_size,
2289 u32 total_data, u32 total_size, int nr);
2305int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 2290int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2306 *root, struct btrfs_key *key, void *data, u32 data_size); 2291 *root, struct btrfs_key *key, void *data, u32 data_size);
2307int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
2308 struct btrfs_root *root,
2309 struct btrfs_path *path,
2310 struct btrfs_key *cpu_key, u32 *data_size,
2311 int nr);
2312int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 2292int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2313 struct btrfs_root *root, 2293 struct btrfs_root *root,
2314 struct btrfs_path *path, 2294 struct btrfs_path *path,
@@ -2354,8 +2334,6 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
2354 *item); 2334 *item);
2355int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct 2335int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
2356 btrfs_root_item *item, struct btrfs_key *key); 2336 btrfs_root_item *item, struct btrfs_key *key);
2357int btrfs_search_root(struct btrfs_root *root, u64 search_start,
2358 u64 *found_objectid);
2359int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); 2337int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
2360int btrfs_find_orphan_roots(struct btrfs_root *tree_root); 2338int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
2361int btrfs_set_root_node(struct btrfs_root_item *item, 2339int btrfs_set_root_node(struct btrfs_root_item *item,
@@ -2365,7 +2343,7 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
2365/* dir-item.c */ 2343/* dir-item.c */
2366int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, 2344int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
2367 struct btrfs_root *root, const char *name, 2345 struct btrfs_root *root, const char *name,
2368 int name_len, u64 dir, 2346 int name_len, struct inode *dir,
2369 struct btrfs_key *location, u8 type, u64 index); 2347 struct btrfs_key *location, u8 type, u64 index);
2370struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, 2348struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
2371 struct btrfs_root *root, 2349 struct btrfs_root *root,
@@ -2410,12 +2388,6 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
2410 struct btrfs_root *root, u64 offset); 2388 struct btrfs_root *root, u64 offset);
2411int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset); 2389int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
2412 2390
2413/* inode-map.c */
2414int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
2415 struct btrfs_root *fs_root,
2416 u64 dirid, u64 *objectid);
2417int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid);
2418
2419/* inode-item.c */ 2391/* inode-item.c */
2420int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, 2392int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
2421 struct btrfs_root *root, 2393 struct btrfs_root *root,
@@ -2460,8 +2432,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
2460 struct btrfs_ordered_sum *sums); 2432 struct btrfs_ordered_sum *sums);
2461int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, 2433int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
2462 struct bio *bio, u64 file_start, int contig); 2434 struct bio *bio, u64 file_start, int contig);
2463int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
2464 u64 start, unsigned long len);
2465struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, 2435struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
2466 struct btrfs_root *root, 2436 struct btrfs_root *root,
2467 struct btrfs_path *path, 2437 struct btrfs_path *path,
@@ -2469,8 +2439,8 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
2469int btrfs_csum_truncate(struct btrfs_trans_handle *trans, 2439int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
2470 struct btrfs_root *root, struct btrfs_path *path, 2440 struct btrfs_root *root, struct btrfs_path *path,
2471 u64 isize); 2441 u64 isize);
2472int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, 2442int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
2473 u64 end, struct list_head *list); 2443 struct list_head *list, int search_commit);
2474/* inode.c */ 2444/* inode.c */
2475 2445
2476/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ 2446/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
@@ -2499,8 +2469,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2499 u32 min_type); 2469 u32 min_type);
2500 2470
2501int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); 2471int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2502int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
2503 int sync);
2504int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, 2472int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2505 struct extent_state **cached_state); 2473 struct extent_state **cached_state);
2506int btrfs_writepages(struct address_space *mapping, 2474int btrfs_writepages(struct address_space *mapping,
@@ -2517,7 +2485,6 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
2517int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 2485int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2518int btrfs_readpage(struct file *file, struct page *page); 2486int btrfs_readpage(struct file *file, struct page *page);
2519void btrfs_evict_inode(struct inode *inode); 2487void btrfs_evict_inode(struct inode *inode);
2520void btrfs_put_inode(struct inode *inode);
2521int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); 2488int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
2522void btrfs_dirty_inode(struct inode *inode); 2489void btrfs_dirty_inode(struct inode *inode);
2523struct inode *btrfs_alloc_inode(struct super_block *sb); 2490struct inode *btrfs_alloc_inode(struct super_block *sb);
@@ -2528,10 +2495,8 @@ void btrfs_destroy_cachep(void);
2528long btrfs_ioctl_trans_end(struct file *file); 2495long btrfs_ioctl_trans_end(struct file *file);
2529struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 2496struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
2530 struct btrfs_root *root, int *was_new); 2497 struct btrfs_root *root, int *was_new);
2531int btrfs_commit_write(struct file *file, struct page *page,
2532 unsigned from, unsigned to);
2533struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, 2498struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2534 size_t page_offset, u64 start, u64 end, 2499 size_t pg_offset, u64 start, u64 end,
2535 int create); 2500 int create);
2536int btrfs_update_inode(struct btrfs_trans_handle *trans, 2501int btrfs_update_inode(struct btrfs_trans_handle *trans,
2537 struct btrfs_root *root, 2502 struct btrfs_root *root,
@@ -2568,7 +2533,6 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
2568int btrfs_sync_file(struct file *file, int datasync); 2533int btrfs_sync_file(struct file *file, int datasync);
2569int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, 2534int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
2570 int skip_pinned); 2535 int skip_pinned);
2571int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2572extern const struct file_operations btrfs_file_operations; 2536extern const struct file_operations btrfs_file_operations;
2573int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, 2537int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2574 u64 start, u64 end, u64 *hint_byte, int drop_cache); 2538 u64 start, u64 end, u64 *hint_byte, int drop_cache);
@@ -2588,10 +2552,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
2588/* sysfs.c */ 2552/* sysfs.c */
2589int btrfs_init_sysfs(void); 2553int btrfs_init_sysfs(void);
2590void btrfs_exit_sysfs(void); 2554void btrfs_exit_sysfs(void);
2591int btrfs_sysfs_add_super(struct btrfs_fs_info *fs);
2592int btrfs_sysfs_add_root(struct btrfs_root *root);
2593void btrfs_sysfs_del_root(struct btrfs_root *root);
2594void btrfs_sysfs_del_super(struct btrfs_fs_info *root);
2595 2555
2596/* xattr.c */ 2556/* xattr.c */
2597ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); 2557ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
@@ -2634,4 +2594,18 @@ void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
2634 u64 *bytes_to_reserve); 2594 u64 *bytes_to_reserve);
2635void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, 2595void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
2636 struct btrfs_pending_snapshot *pending); 2596 struct btrfs_pending_snapshot *pending);
2597
2598/* scrub.c */
2599int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
2600 struct btrfs_scrub_progress *progress, int readonly);
2601int btrfs_scrub_pause(struct btrfs_root *root);
2602int btrfs_scrub_pause_super(struct btrfs_root *root);
2603int btrfs_scrub_continue(struct btrfs_root *root);
2604int btrfs_scrub_continue_super(struct btrfs_root *root);
2605int btrfs_scrub_cancel(struct btrfs_root *root);
2606int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev);
2607int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid);
2608int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
2609 struct btrfs_scrub_progress *progress);
2610
2637#endif 2611#endif
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
new file mode 100644
index 000000000000..01e29503a54b
--- /dev/null
+++ b/fs/btrfs/delayed-inode.c
@@ -0,0 +1,1695 @@
1/*
2 * Copyright (C) 2011 Fujitsu. All rights reserved.
3 * Written by Miao Xie <miaox@cn.fujitsu.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
18 */
19
20#include <linux/slab.h>
21#include "delayed-inode.h"
22#include "disk-io.h"
23#include "transaction.h"
24
25#define BTRFS_DELAYED_WRITEBACK 400
26#define BTRFS_DELAYED_BACKGROUND 100
27
28static struct kmem_cache *delayed_node_cache;
29
30int __init btrfs_delayed_inode_init(void)
31{
32 delayed_node_cache = kmem_cache_create("delayed_node",
33 sizeof(struct btrfs_delayed_node),
34 0,
35 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
36 NULL);
37 if (!delayed_node_cache)
38 return -ENOMEM;
39 return 0;
40}
41
42void btrfs_delayed_inode_exit(void)
43{
44 if (delayed_node_cache)
45 kmem_cache_destroy(delayed_node_cache);
46}
47
48static inline void btrfs_init_delayed_node(
49 struct btrfs_delayed_node *delayed_node,
50 struct btrfs_root *root, u64 inode_id)
51{
52 delayed_node->root = root;
53 delayed_node->inode_id = inode_id;
54 atomic_set(&delayed_node->refs, 0);
55 delayed_node->count = 0;
56 delayed_node->in_list = 0;
57 delayed_node->inode_dirty = 0;
58 delayed_node->ins_root = RB_ROOT;
59 delayed_node->del_root = RB_ROOT;
60 mutex_init(&delayed_node->mutex);
61 delayed_node->index_cnt = 0;
62 INIT_LIST_HEAD(&delayed_node->n_list);
63 INIT_LIST_HEAD(&delayed_node->p_list);
64 delayed_node->bytes_reserved = 0;
65}
66
67static inline int btrfs_is_continuous_delayed_item(
68 struct btrfs_delayed_item *item1,
69 struct btrfs_delayed_item *item2)
70{
71 if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
72 item1->key.objectid == item2->key.objectid &&
73 item1->key.type == item2->key.type &&
74 item1->key.offset + 1 == item2->key.offset)
75 return 1;
76 return 0;
77}
78
79static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
80 struct btrfs_root *root)
81{
82 return root->fs_info->delayed_root;
83}
84
85static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
86 struct inode *inode)
87{
88 struct btrfs_delayed_node *node;
89 struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
90 struct btrfs_root *root = btrfs_inode->root;
91 u64 ino = btrfs_ino(inode);
92 int ret;
93
94again:
95 node = ACCESS_ONCE(btrfs_inode->delayed_node);
96 if (node) {
97 atomic_inc(&node->refs); /* can be accessed */
98 return node;
99 }
100
101 spin_lock(&root->inode_lock);
102 node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
103 if (node) {
104 if (btrfs_inode->delayed_node) {
105 spin_unlock(&root->inode_lock);
106 goto again;
107 }
108 btrfs_inode->delayed_node = node;
109 atomic_inc(&node->refs); /* can be accessed */
110 atomic_inc(&node->refs); /* cached in the inode */
111 spin_unlock(&root->inode_lock);
112 return node;
113 }
114 spin_unlock(&root->inode_lock);
115
116 node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
117 if (!node)
118 return ERR_PTR(-ENOMEM);
119 btrfs_init_delayed_node(node, root, ino);
120
121 atomic_inc(&node->refs); /* cached in the btrfs inode */
122 atomic_inc(&node->refs); /* can be accessed */
123
124 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
125 if (ret) {
126 kmem_cache_free(delayed_node_cache, node);
127 return ERR_PTR(ret);
128 }
129
130 spin_lock(&root->inode_lock);
131 ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
132 if (ret == -EEXIST) {
133 kmem_cache_free(delayed_node_cache, node);
134 spin_unlock(&root->inode_lock);
135 radix_tree_preload_end();
136 goto again;
137 }
138 btrfs_inode->delayed_node = node;
139 spin_unlock(&root->inode_lock);
140 radix_tree_preload_end();
141
142 return node;
143}
144
145/*
146 * Call it when holding delayed_node->mutex
147 *
148 * If mod = 1, add this node into the prepared list.
149 */
150static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
151 struct btrfs_delayed_node *node,
152 int mod)
153{
154 spin_lock(&root->lock);
155 if (node->in_list) {
156 if (!list_empty(&node->p_list))
157 list_move_tail(&node->p_list, &root->prepare_list);
158 else if (mod)
159 list_add_tail(&node->p_list, &root->prepare_list);
160 } else {
161 list_add_tail(&node->n_list, &root->node_list);
162 list_add_tail(&node->p_list, &root->prepare_list);
163 atomic_inc(&node->refs); /* inserted into list */
164 root->nodes++;
165 node->in_list = 1;
166 }
167 spin_unlock(&root->lock);
168}
169
170/* Call it when holding delayed_node->mutex */
171static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
172 struct btrfs_delayed_node *node)
173{
174 spin_lock(&root->lock);
175 if (node->in_list) {
176 root->nodes--;
177 atomic_dec(&node->refs); /* not in the list */
178 list_del_init(&node->n_list);
179 if (!list_empty(&node->p_list))
180 list_del_init(&node->p_list);
181 node->in_list = 0;
182 }
183 spin_unlock(&root->lock);
184}
185
186struct btrfs_delayed_node *btrfs_first_delayed_node(
187 struct btrfs_delayed_root *delayed_root)
188{
189 struct list_head *p;
190 struct btrfs_delayed_node *node = NULL;
191
192 spin_lock(&delayed_root->lock);
193 if (list_empty(&delayed_root->node_list))
194 goto out;
195
196 p = delayed_root->node_list.next;
197 node = list_entry(p, struct btrfs_delayed_node, n_list);
198 atomic_inc(&node->refs);
199out:
200 spin_unlock(&delayed_root->lock);
201
202 return node;
203}
204
205struct btrfs_delayed_node *btrfs_next_delayed_node(
206 struct btrfs_delayed_node *node)
207{
208 struct btrfs_delayed_root *delayed_root;
209 struct list_head *p;
210 struct btrfs_delayed_node *next = NULL;
211
212 delayed_root = node->root->fs_info->delayed_root;
213 spin_lock(&delayed_root->lock);
214 if (!node->in_list) { /* not in the list */
215 if (list_empty(&delayed_root->node_list))
216 goto out;
217 p = delayed_root->node_list.next;
218 } else if (list_is_last(&node->n_list, &delayed_root->node_list))
219 goto out;
220 else
221 p = node->n_list.next;
222
223 next = list_entry(p, struct btrfs_delayed_node, n_list);
224 atomic_inc(&next->refs);
225out:
226 spin_unlock(&delayed_root->lock);
227
228 return next;
229}
230
231static void __btrfs_release_delayed_node(
232 struct btrfs_delayed_node *delayed_node,
233 int mod)
234{
235 struct btrfs_delayed_root *delayed_root;
236
237 if (!delayed_node)
238 return;
239
240 delayed_root = delayed_node->root->fs_info->delayed_root;
241
242 mutex_lock(&delayed_node->mutex);
243 if (delayed_node->count)
244 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
245 else
246 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
247 mutex_unlock(&delayed_node->mutex);
248
249 if (atomic_dec_and_test(&delayed_node->refs)) {
250 struct btrfs_root *root = delayed_node->root;
251 spin_lock(&root->inode_lock);
252 if (atomic_read(&delayed_node->refs) == 0) {
253 radix_tree_delete(&root->delayed_nodes_tree,
254 delayed_node->inode_id);
255 kmem_cache_free(delayed_node_cache, delayed_node);
256 }
257 spin_unlock(&root->inode_lock);
258 }
259}
260
261static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
262{
263 __btrfs_release_delayed_node(node, 0);
264}
265
266struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
267 struct btrfs_delayed_root *delayed_root)
268{
269 struct list_head *p;
270 struct btrfs_delayed_node *node = NULL;
271
272 spin_lock(&delayed_root->lock);
273 if (list_empty(&delayed_root->prepare_list))
274 goto out;
275
276 p = delayed_root->prepare_list.next;
277 list_del_init(p);
278 node = list_entry(p, struct btrfs_delayed_node, p_list);
279 atomic_inc(&node->refs);
280out:
281 spin_unlock(&delayed_root->lock);
282
283 return node;
284}
285
286static inline void btrfs_release_prepared_delayed_node(
287 struct btrfs_delayed_node *node)
288{
289 __btrfs_release_delayed_node(node, 1);
290}
291
292struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
293{
294 struct btrfs_delayed_item *item;
295 item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
296 if (item) {
297 item->data_len = data_len;
298 item->ins_or_del = 0;
299 item->bytes_reserved = 0;
300 item->block_rsv = NULL;
301 item->delayed_node = NULL;
302 atomic_set(&item->refs, 1);
303 }
304 return item;
305}
306
307/*
308 * __btrfs_lookup_delayed_item - look up the delayed item by key
309 * @delayed_node: pointer to the delayed node
310 * @key: the key to look up
311 * @prev: used to store the prev item if the right item isn't found
312 * @next: used to store the next item if the right item isn't found
313 *
314 * Note: if we don't find the right item, we will return the prev item and
315 * the next item.
316 */
317static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
318 struct rb_root *root,
319 struct btrfs_key *key,
320 struct btrfs_delayed_item **prev,
321 struct btrfs_delayed_item **next)
322{
323 struct rb_node *node, *prev_node = NULL;
324 struct btrfs_delayed_item *delayed_item = NULL;
325 int ret = 0;
326
327 node = root->rb_node;
328
329 while (node) {
330 delayed_item = rb_entry(node, struct btrfs_delayed_item,
331 rb_node);
332 prev_node = node;
333 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
334 if (ret < 0)
335 node = node->rb_right;
336 else if (ret > 0)
337 node = node->rb_left;
338 else
339 return delayed_item;
340 }
341
342 if (prev) {
343 if (!prev_node)
344 *prev = NULL;
345 else if (ret < 0)
346 *prev = delayed_item;
347 else if ((node = rb_prev(prev_node)) != NULL) {
348 *prev = rb_entry(node, struct btrfs_delayed_item,
349 rb_node);
350 } else
351 *prev = NULL;
352 }
353
354 if (next) {
355 if (!prev_node)
356 *next = NULL;
357 else if (ret > 0)
358 *next = delayed_item;
359 else if ((node = rb_next(prev_node)) != NULL) {
360 *next = rb_entry(node, struct btrfs_delayed_item,
361 rb_node);
362 } else
363 *next = NULL;
364 }
365 return NULL;
366}
367
368struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
369 struct btrfs_delayed_node *delayed_node,
370 struct btrfs_key *key)
371{
372 struct btrfs_delayed_item *item;
373
374 item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
375 NULL, NULL);
376 return item;
377}
378
379struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item(
380 struct btrfs_delayed_node *delayed_node,
381 struct btrfs_key *key)
382{
383 struct btrfs_delayed_item *item;
384
385 item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
386 NULL, NULL);
387 return item;
388}
389
390struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item(
391 struct btrfs_delayed_node *delayed_node,
392 struct btrfs_key *key)
393{
394 struct btrfs_delayed_item *item, *next;
395
396 item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
397 NULL, &next);
398 if (!item)
399 item = next;
400
401 return item;
402}
403
404struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item(
405 struct btrfs_delayed_node *delayed_node,
406 struct btrfs_key *key)
407{
408 struct btrfs_delayed_item *item, *next;
409
410 item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
411 NULL, &next);
412 if (!item)
413 item = next;
414
415 return item;
416}
417
418static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
419 struct btrfs_delayed_item *ins,
420 int action)
421{
422 struct rb_node **p, *node;
423 struct rb_node *parent_node = NULL;
424 struct rb_root *root;
425 struct btrfs_delayed_item *item;
426 int cmp;
427
428 if (action == BTRFS_DELAYED_INSERTION_ITEM)
429 root = &delayed_node->ins_root;
430 else if (action == BTRFS_DELAYED_DELETION_ITEM)
431 root = &delayed_node->del_root;
432 else
433 BUG();
434 p = &root->rb_node;
435 node = &ins->rb_node;
436
437 while (*p) {
438 parent_node = *p;
439 item = rb_entry(parent_node, struct btrfs_delayed_item,
440 rb_node);
441
442 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
443 if (cmp < 0)
444 p = &(*p)->rb_right;
445 else if (cmp > 0)
446 p = &(*p)->rb_left;
447 else
448 return -EEXIST;
449 }
450
451 rb_link_node(node, parent_node, p);
452 rb_insert_color(node, root);
453 ins->delayed_node = delayed_node;
454 ins->ins_or_del = action;
455
456 if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
457 action == BTRFS_DELAYED_INSERTION_ITEM &&
458 ins->key.offset >= delayed_node->index_cnt)
459 delayed_node->index_cnt = ins->key.offset + 1;
460
461 delayed_node->count++;
462 atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
463 return 0;
464}
465
466static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
467 struct btrfs_delayed_item *item)
468{
469 return __btrfs_add_delayed_item(node, item,
470 BTRFS_DELAYED_INSERTION_ITEM);
471}
472
473static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
474 struct btrfs_delayed_item *item)
475{
476 return __btrfs_add_delayed_item(node, item,
477 BTRFS_DELAYED_DELETION_ITEM);
478}
479
480static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
481{
482 struct rb_root *root;
483 struct btrfs_delayed_root *delayed_root;
484
485 delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
486
487 BUG_ON(!delayed_root);
488 BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
489 delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
490
491 if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
492 root = &delayed_item->delayed_node->ins_root;
493 else
494 root = &delayed_item->delayed_node->del_root;
495
496 rb_erase(&delayed_item->rb_node, root);
497 delayed_item->delayed_node->count--;
498 atomic_dec(&delayed_root->items);
499 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND &&
500 waitqueue_active(&delayed_root->wait))
501 wake_up(&delayed_root->wait);
502}
503
504static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
505{
506 if (item) {
507 __btrfs_remove_delayed_item(item);
508 if (atomic_dec_and_test(&item->refs))
509 kfree(item);
510 }
511}
512
513struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
514 struct btrfs_delayed_node *delayed_node)
515{
516 struct rb_node *p;
517 struct btrfs_delayed_item *item = NULL;
518
519 p = rb_first(&delayed_node->ins_root);
520 if (p)
521 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
522
523 return item;
524}
525
526struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
527 struct btrfs_delayed_node *delayed_node)
528{
529 struct rb_node *p;
530 struct btrfs_delayed_item *item = NULL;
531
532 p = rb_first(&delayed_node->del_root);
533 if (p)
534 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
535
536 return item;
537}
538
539struct btrfs_delayed_item *__btrfs_next_delayed_item(
540 struct btrfs_delayed_item *item)
541{
542 struct rb_node *p;
543 struct btrfs_delayed_item *next = NULL;
544
545 p = rb_next(&item->rb_node);
546 if (p)
547 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
548
549 return next;
550}
551
552static inline struct btrfs_delayed_node *btrfs_get_delayed_node(
553 struct inode *inode)
554{
555 struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
556 struct btrfs_delayed_node *delayed_node;
557
558 delayed_node = btrfs_inode->delayed_node;
559 if (delayed_node)
560 atomic_inc(&delayed_node->refs);
561
562 return delayed_node;
563}
564
565static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
566 u64 root_id)
567{
568 struct btrfs_key root_key;
569
570 if (root->objectid == root_id)
571 return root;
572
573 root_key.objectid = root_id;
574 root_key.type = BTRFS_ROOT_ITEM_KEY;
575 root_key.offset = (u64)-1;
576 return btrfs_read_fs_root_no_name(root->fs_info, &root_key);
577}
578
579static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
580 struct btrfs_root *root,
581 struct btrfs_delayed_item *item)
582{
583 struct btrfs_block_rsv *src_rsv;
584 struct btrfs_block_rsv *dst_rsv;
585 u64 num_bytes;
586 int ret;
587
588 if (!trans->bytes_reserved)
589 return 0;
590
591 src_rsv = trans->block_rsv;
592 dst_rsv = &root->fs_info->global_block_rsv;
593
594 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
595 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
596 if (!ret) {
597 item->bytes_reserved = num_bytes;
598 item->block_rsv = dst_rsv;
599 }
600
601 return ret;
602}
603
604static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
605 struct btrfs_delayed_item *item)
606{
607 if (!item->bytes_reserved)
608 return;
609
610 btrfs_block_rsv_release(root, item->block_rsv,
611 item->bytes_reserved);
612}
613
614static int btrfs_delayed_inode_reserve_metadata(
615 struct btrfs_trans_handle *trans,
616 struct btrfs_root *root,
617 struct btrfs_delayed_node *node)
618{
619 struct btrfs_block_rsv *src_rsv;
620 struct btrfs_block_rsv *dst_rsv;
621 u64 num_bytes;
622 int ret;
623
624 if (!trans->bytes_reserved)
625 return 0;
626
627 src_rsv = trans->block_rsv;
628 dst_rsv = &root->fs_info->global_block_rsv;
629
630 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
631 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
632 if (!ret)
633 node->bytes_reserved = num_bytes;
634
635 return ret;
636}
637
638static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
639 struct btrfs_delayed_node *node)
640{
641 struct btrfs_block_rsv *rsv;
642
643 if (!node->bytes_reserved)
644 return;
645
646 rsv = &root->fs_info->global_block_rsv;
647 btrfs_block_rsv_release(root, rsv,
648 node->bytes_reserved);
649 node->bytes_reserved = 0;
650}
651
652/*
653 * This helper will insert some continuous items into the same leaf according
654 * to the free space of the leaf.
655 */
656static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
657 struct btrfs_root *root,
658 struct btrfs_path *path,
659 struct btrfs_delayed_item *item)
660{
661 struct btrfs_delayed_item *curr, *next;
662 int free_space;
663 int total_data_size = 0, total_size = 0;
664 struct extent_buffer *leaf;
665 char *data_ptr;
666 struct btrfs_key *keys;
667 u32 *data_size;
668 struct list_head head;
669 int slot;
670 int nitems;
671 int i;
672 int ret = 0;
673
674 BUG_ON(!path->nodes[0]);
675
676 leaf = path->nodes[0];
677 free_space = btrfs_leaf_free_space(root, leaf);
678 INIT_LIST_HEAD(&head);
679
680 next = item;
681
682 /*
683 * count the number of the continuous items that we can insert in batch
684 */
685 while (total_size + next->data_len + sizeof(struct btrfs_item) <=
686 free_space) {
687 total_data_size += next->data_len;
688 total_size += next->data_len + sizeof(struct btrfs_item);
689 list_add_tail(&next->tree_list, &head);
690 nitems++;
691
692 curr = next;
693 next = __btrfs_next_delayed_item(curr);
694 if (!next)
695 break;
696
697 if (!btrfs_is_continuous_delayed_item(curr, next))
698 break;
699 }
700
701 if (!nitems) {
702 ret = 0;
703 goto out;
704 }
705
706 /*
707 * we need allocate some memory space, but it might cause the task
708 * to sleep, so we set all locked nodes in the path to blocking locks
709 * first.
710 */
711 btrfs_set_path_blocking(path);
712
713 keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
714 if (!keys) {
715 ret = -ENOMEM;
716 goto out;
717 }
718
719 data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
720 if (!data_size) {
721 ret = -ENOMEM;
722 goto error;
723 }
724
725 /* get keys of all the delayed items */
726 i = 0;
727 list_for_each_entry(next, &head, tree_list) {
728 keys[i] = next->key;
729 data_size[i] = next->data_len;
730 i++;
731 }
732
733 /* reset all the locked nodes in the patch to spinning locks. */
734 btrfs_clear_path_blocking(path, NULL);
735
736 /* insert the keys of the items */
737 ret = setup_items_for_insert(trans, root, path, keys, data_size,
738 total_data_size, total_size, nitems);
739 if (ret)
740 goto error;
741
742 /* insert the dir index items */
743 slot = path->slots[0];
744 list_for_each_entry_safe(curr, next, &head, tree_list) {
745 data_ptr = btrfs_item_ptr(leaf, slot, char);
746 write_extent_buffer(leaf, &curr->data,
747 (unsigned long)data_ptr,
748 curr->data_len);
749 slot++;
750
751 btrfs_delayed_item_release_metadata(root, curr);
752
753 list_del(&curr->tree_list);
754 btrfs_release_delayed_item(curr);
755 }
756
757error:
758 kfree(data_size);
759 kfree(keys);
760out:
761 return ret;
762}
763
764/*
765 * This helper can just do simple insertion that needn't extend item for new
766 * data, such as directory name index insertion, inode insertion.
767 */
768static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
769 struct btrfs_root *root,
770 struct btrfs_path *path,
771 struct btrfs_delayed_item *delayed_item)
772{
773 struct extent_buffer *leaf;
774 struct btrfs_item *item;
775 char *ptr;
776 int ret;
777
778 ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
779 delayed_item->data_len);
780 if (ret < 0 && ret != -EEXIST)
781 return ret;
782
783 leaf = path->nodes[0];
784
785 item = btrfs_item_nr(leaf, path->slots[0]);
786 ptr = btrfs_item_ptr(leaf, path->slots[0], char);
787
788 write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
789 delayed_item->data_len);
790 btrfs_mark_buffer_dirty(leaf);
791
792 btrfs_delayed_item_release_metadata(root, delayed_item);
793 return 0;
794}
795
796/*
797 * we insert an item first, then if there are some continuous items, we try
798 * to insert those items into the same leaf.
799 */
800static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
801 struct btrfs_path *path,
802 struct btrfs_root *root,
803 struct btrfs_delayed_node *node)
804{
805 struct btrfs_delayed_item *curr, *prev;
806 int ret = 0;
807
808do_again:
809 mutex_lock(&node->mutex);
810 curr = __btrfs_first_delayed_insertion_item(node);
811 if (!curr)
812 goto insert_end;
813
814 ret = btrfs_insert_delayed_item(trans, root, path, curr);
815 if (ret < 0) {
816 btrfs_release_path(path);
817 goto insert_end;
818 }
819
820 prev = curr;
821 curr = __btrfs_next_delayed_item(prev);
822 if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
823 /* insert the continuous items into the same leaf */
824 path->slots[0]++;
825 btrfs_batch_insert_items(trans, root, path, curr);
826 }
827 btrfs_release_delayed_item(prev);
828 btrfs_mark_buffer_dirty(path->nodes[0]);
829
830 btrfs_release_path(path);
831 mutex_unlock(&node->mutex);
832 goto do_again;
833
834insert_end:
835 mutex_unlock(&node->mutex);
836 return ret;
837}
838
839static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
840 struct btrfs_root *root,
841 struct btrfs_path *path,
842 struct btrfs_delayed_item *item)
843{
844 struct btrfs_delayed_item *curr, *next;
845 struct extent_buffer *leaf;
846 struct btrfs_key key;
847 struct list_head head;
848 int nitems, i, last_item;
849 int ret = 0;
850
851 BUG_ON(!path->nodes[0]);
852
853 leaf = path->nodes[0];
854
855 i = path->slots[0];
856 last_item = btrfs_header_nritems(leaf) - 1;
857 if (i > last_item)
858 return -ENOENT; /* FIXME: Is errno suitable? */
859
860 next = item;
861 INIT_LIST_HEAD(&head);
862 btrfs_item_key_to_cpu(leaf, &key, i);
863 nitems = 0;
864 /*
865 * count the number of the dir index items that we can delete in batch
866 */
867 while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
868 list_add_tail(&next->tree_list, &head);
869 nitems++;
870
871 curr = next;
872 next = __btrfs_next_delayed_item(curr);
873 if (!next)
874 break;
875
876 if (!btrfs_is_continuous_delayed_item(curr, next))
877 break;
878
879 i++;
880 if (i > last_item)
881 break;
882 btrfs_item_key_to_cpu(leaf, &key, i);
883 }
884
885 if (!nitems)
886 return 0;
887
888 ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
889 if (ret)
890 goto out;
891
892 list_for_each_entry_safe(curr, next, &head, tree_list) {
893 btrfs_delayed_item_release_metadata(root, curr);
894 list_del(&curr->tree_list);
895 btrfs_release_delayed_item(curr);
896 }
897
898out:
899 return ret;
900}
901
902static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
903 struct btrfs_path *path,
904 struct btrfs_root *root,
905 struct btrfs_delayed_node *node)
906{
907 struct btrfs_delayed_item *curr, *prev;
908 int ret = 0;
909
910do_again:
911 mutex_lock(&node->mutex);
912 curr = __btrfs_first_delayed_deletion_item(node);
913 if (!curr)
914 goto delete_fail;
915
916 ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
917 if (ret < 0)
918 goto delete_fail;
919 else if (ret > 0) {
920 /*
921 * can't find the item which the node points to, so this node
922 * is invalid, just drop it.
923 */
924 prev = curr;
925 curr = __btrfs_next_delayed_item(prev);
926 btrfs_release_delayed_item(prev);
927 ret = 0;
928 btrfs_release_path(path);
929 if (curr)
930 goto do_again;
931 else
932 goto delete_fail;
933 }
934
935 btrfs_batch_delete_items(trans, root, path, curr);
936 btrfs_release_path(path);
937 mutex_unlock(&node->mutex);
938 goto do_again;
939
940delete_fail:
941 btrfs_release_path(path);
942 mutex_unlock(&node->mutex);
943 return ret;
944}
945
946static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
947{
948 struct btrfs_delayed_root *delayed_root;
949
950 if (delayed_node && delayed_node->inode_dirty) {
951 BUG_ON(!delayed_node->root);
952 delayed_node->inode_dirty = 0;
953 delayed_node->count--;
954
955 delayed_root = delayed_node->root->fs_info->delayed_root;
956 atomic_dec(&delayed_root->items);
957 if (atomic_read(&delayed_root->items) <
958 BTRFS_DELAYED_BACKGROUND &&
959 waitqueue_active(&delayed_root->wait))
960 wake_up(&delayed_root->wait);
961 }
962}
963
964static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
965 struct btrfs_root *root,
966 struct btrfs_path *path,
967 struct btrfs_delayed_node *node)
968{
969 struct btrfs_key key;
970 struct btrfs_inode_item *inode_item;
971 struct extent_buffer *leaf;
972 int ret;
973
974 mutex_lock(&node->mutex);
975 if (!node->inode_dirty) {
976 mutex_unlock(&node->mutex);
977 return 0;
978 }
979
980 key.objectid = node->inode_id;
981 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
982 key.offset = 0;
983 ret = btrfs_lookup_inode(trans, root, path, &key, 1);
984 if (ret > 0) {
985 btrfs_release_path(path);
986 mutex_unlock(&node->mutex);
987 return -ENOENT;
988 } else if (ret < 0) {
989 mutex_unlock(&node->mutex);
990 return ret;
991 }
992
993 btrfs_unlock_up_safe(path, 1);
994 leaf = path->nodes[0];
995 inode_item = btrfs_item_ptr(leaf, path->slots[0],
996 struct btrfs_inode_item);
997 write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
998 sizeof(struct btrfs_inode_item));
999 btrfs_mark_buffer_dirty(leaf);
1000 btrfs_release_path(path);
1001
1002 btrfs_delayed_inode_release_metadata(root, node);
1003 btrfs_release_delayed_inode(node);
1004 mutex_unlock(&node->mutex);
1005
1006 return 0;
1007}
1008
1009/* Called when committing the transaction. */
1010int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1011 struct btrfs_root *root)
1012{
1013 struct btrfs_delayed_root *delayed_root;
1014 struct btrfs_delayed_node *curr_node, *prev_node;
1015 struct btrfs_path *path;
1016 int ret = 0;
1017
1018 path = btrfs_alloc_path();
1019 if (!path)
1020 return -ENOMEM;
1021 path->leave_spinning = 1;
1022
1023 delayed_root = btrfs_get_delayed_root(root);
1024
1025 curr_node = btrfs_first_delayed_node(delayed_root);
1026 while (curr_node) {
1027 root = curr_node->root;
1028 ret = btrfs_insert_delayed_items(trans, path, root,
1029 curr_node);
1030 if (!ret)
1031 ret = btrfs_delete_delayed_items(trans, path, root,
1032 curr_node);
1033 if (!ret)
1034 ret = btrfs_update_delayed_inode(trans, root, path,
1035 curr_node);
1036 if (ret) {
1037 btrfs_release_delayed_node(curr_node);
1038 break;
1039 }
1040
1041 prev_node = curr_node;
1042 curr_node = btrfs_next_delayed_node(curr_node);
1043 btrfs_release_delayed_node(prev_node);
1044 }
1045
1046 btrfs_free_path(path);
1047 return ret;
1048}
1049
1050static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1051 struct btrfs_delayed_node *node)
1052{
1053 struct btrfs_path *path;
1054 int ret;
1055
1056 path = btrfs_alloc_path();
1057 if (!path)
1058 return -ENOMEM;
1059 path->leave_spinning = 1;
1060
1061 ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1062 if (!ret)
1063 ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1064 if (!ret)
1065 ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1066 btrfs_free_path(path);
1067
1068 return ret;
1069}
1070
1071int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1072 struct inode *inode)
1073{
1074 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1075 int ret;
1076
1077 if (!delayed_node)
1078 return 0;
1079
1080 mutex_lock(&delayed_node->mutex);
1081 if (!delayed_node->count) {
1082 mutex_unlock(&delayed_node->mutex);
1083 btrfs_release_delayed_node(delayed_node);
1084 return 0;
1085 }
1086 mutex_unlock(&delayed_node->mutex);
1087
1088 ret = __btrfs_commit_inode_delayed_items(trans, delayed_node);
1089 btrfs_release_delayed_node(delayed_node);
1090 return ret;
1091}
1092
1093void btrfs_remove_delayed_node(struct inode *inode)
1094{
1095 struct btrfs_delayed_node *delayed_node;
1096
1097 delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
1098 if (!delayed_node)
1099 return;
1100
1101 BTRFS_I(inode)->delayed_node = NULL;
1102 btrfs_release_delayed_node(delayed_node);
1103}
1104
1105struct btrfs_async_delayed_node {
1106 struct btrfs_root *root;
1107 struct btrfs_delayed_node *delayed_node;
1108 struct btrfs_work work;
1109};
1110
1111static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1112{
1113 struct btrfs_async_delayed_node *async_node;
1114 struct btrfs_trans_handle *trans;
1115 struct btrfs_path *path;
1116 struct btrfs_delayed_node *delayed_node = NULL;
1117 struct btrfs_root *root;
1118 unsigned long nr = 0;
1119 int need_requeue = 0;
1120 int ret;
1121
1122 async_node = container_of(work, struct btrfs_async_delayed_node, work);
1123
1124 path = btrfs_alloc_path();
1125 if (!path)
1126 goto out;
1127 path->leave_spinning = 1;
1128
1129 delayed_node = async_node->delayed_node;
1130 root = delayed_node->root;
1131
1132 trans = btrfs_join_transaction(root, 0);
1133 if (IS_ERR(trans))
1134 goto free_path;
1135
1136 ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
1137 if (!ret)
1138 ret = btrfs_delete_delayed_items(trans, path, root,
1139 delayed_node);
1140
1141 if (!ret)
1142 btrfs_update_delayed_inode(trans, root, path, delayed_node);
1143
1144 /*
1145 * Maybe new delayed items have been inserted, so we need requeue
1146 * the work. Besides that, we must dequeue the empty delayed nodes
1147 * to avoid the race between delayed items balance and the worker.
1148 * The race like this:
1149 * Task1 Worker thread
1150 * count == 0, needn't requeue
1151 * also needn't insert the
1152 * delayed node into prepare
1153 * list again.
1154 * add lots of delayed items
1155 * queue the delayed node
1156 * already in the list,
1157 * and not in the prepare
1158 * list, it means the delayed
1159 * node is being dealt with
1160 * by the worker.
1161 * do delayed items balance
1162 * the delayed node is being
1163 * dealt with by the worker
1164 * now, just wait.
1165 * the worker goto idle.
1166 * Task1 will sleep until the transaction is commited.
1167 */
1168 mutex_lock(&delayed_node->mutex);
1169 if (delayed_node->count)
1170 need_requeue = 1;
1171 else
1172 btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
1173 delayed_node);
1174 mutex_unlock(&delayed_node->mutex);
1175
1176 nr = trans->blocks_used;
1177
1178 btrfs_end_transaction_dmeta(trans, root);
1179 __btrfs_btree_balance_dirty(root, nr);
1180free_path:
1181 btrfs_free_path(path);
1182out:
1183 if (need_requeue)
1184 btrfs_requeue_work(&async_node->work);
1185 else {
1186 btrfs_release_prepared_delayed_node(delayed_node);
1187 kfree(async_node);
1188 }
1189}
1190
1191static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1192 struct btrfs_root *root, int all)
1193{
1194 struct btrfs_async_delayed_node *async_node;
1195 struct btrfs_delayed_node *curr;
1196 int count = 0;
1197
1198again:
1199 curr = btrfs_first_prepared_delayed_node(delayed_root);
1200 if (!curr)
1201 return 0;
1202
1203 async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
1204 if (!async_node) {
1205 btrfs_release_prepared_delayed_node(curr);
1206 return -ENOMEM;
1207 }
1208
1209 async_node->root = root;
1210 async_node->delayed_node = curr;
1211
1212 async_node->work.func = btrfs_async_run_delayed_node_done;
1213 async_node->work.flags = 0;
1214
1215 btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
1216 count++;
1217
1218 if (all || count < 4)
1219 goto again;
1220
1221 return 0;
1222}
1223
1224void btrfs_balance_delayed_items(struct btrfs_root *root)
1225{
1226 struct btrfs_delayed_root *delayed_root;
1227
1228 delayed_root = btrfs_get_delayed_root(root);
1229
1230 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1231 return;
1232
1233 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1234 int ret;
1235 ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
1236 if (ret)
1237 return;
1238
1239 wait_event_interruptible_timeout(
1240 delayed_root->wait,
1241 (atomic_read(&delayed_root->items) <
1242 BTRFS_DELAYED_BACKGROUND),
1243 HZ);
1244 return;
1245 }
1246
1247 btrfs_wq_run_delayed_node(delayed_root, root, 0);
1248}
1249
1250int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1251 struct btrfs_root *root, const char *name,
1252 int name_len, struct inode *dir,
1253 struct btrfs_disk_key *disk_key, u8 type,
1254 u64 index)
1255{
1256 struct btrfs_delayed_node *delayed_node;
1257 struct btrfs_delayed_item *delayed_item;
1258 struct btrfs_dir_item *dir_item;
1259 int ret;
1260
1261 delayed_node = btrfs_get_or_create_delayed_node(dir);
1262 if (IS_ERR(delayed_node))
1263 return PTR_ERR(delayed_node);
1264
1265 delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1266 if (!delayed_item) {
1267 ret = -ENOMEM;
1268 goto release_node;
1269 }
1270
1271 ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
1272 /*
1273 * we have reserved enough space when we start a new transaction,
1274 * so reserving metadata failure is impossible
1275 */
1276 BUG_ON(ret);
1277
1278 delayed_item->key.objectid = btrfs_ino(dir);
1279 btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
1280 delayed_item->key.offset = index;
1281
1282 dir_item = (struct btrfs_dir_item *)delayed_item->data;
1283 dir_item->location = *disk_key;
1284 dir_item->transid = cpu_to_le64(trans->transid);
1285 dir_item->data_len = 0;
1286 dir_item->name_len = cpu_to_le16(name_len);
1287 dir_item->type = type;
1288 memcpy((char *)(dir_item + 1), name, name_len);
1289
1290 mutex_lock(&delayed_node->mutex);
1291 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1292 if (unlikely(ret)) {
1293 printk(KERN_ERR "err add delayed dir index item(name: %s) into "
1294 "the insertion tree of the delayed node"
1295 "(root id: %llu, inode id: %llu, errno: %d)\n",
1296 name,
1297 (unsigned long long)delayed_node->root->objectid,
1298 (unsigned long long)delayed_node->inode_id,
1299 ret);
1300 BUG();
1301 }
1302 mutex_unlock(&delayed_node->mutex);
1303
1304release_node:
1305 btrfs_release_delayed_node(delayed_node);
1306 return ret;
1307}
1308
1309static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
1310 struct btrfs_delayed_node *node,
1311 struct btrfs_key *key)
1312{
1313 struct btrfs_delayed_item *item;
1314
1315 mutex_lock(&node->mutex);
1316 item = __btrfs_lookup_delayed_insertion_item(node, key);
1317 if (!item) {
1318 mutex_unlock(&node->mutex);
1319 return 1;
1320 }
1321
1322 btrfs_delayed_item_release_metadata(root, item);
1323 btrfs_release_delayed_item(item);
1324 mutex_unlock(&node->mutex);
1325 return 0;
1326}
1327
1328int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1329 struct btrfs_root *root, struct inode *dir,
1330 u64 index)
1331{
1332 struct btrfs_delayed_node *node;
1333 struct btrfs_delayed_item *item;
1334 struct btrfs_key item_key;
1335 int ret;
1336
1337 node = btrfs_get_or_create_delayed_node(dir);
1338 if (IS_ERR(node))
1339 return PTR_ERR(node);
1340
1341 item_key.objectid = btrfs_ino(dir);
1342 btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY);
1343 item_key.offset = index;
1344
1345 ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
1346 if (!ret)
1347 goto end;
1348
1349 item = btrfs_alloc_delayed_item(0);
1350 if (!item) {
1351 ret = -ENOMEM;
1352 goto end;
1353 }
1354
1355 item->key = item_key;
1356
1357 ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
1358 /*
1359 * we have reserved enough space when we start a new transaction,
1360 * so reserving metadata failure is impossible.
1361 */
1362 BUG_ON(ret);
1363
1364 mutex_lock(&node->mutex);
1365 ret = __btrfs_add_delayed_deletion_item(node, item);
1366 if (unlikely(ret)) {
1367 printk(KERN_ERR "err add delayed dir index item(index: %llu) "
1368 "into the deletion tree of the delayed node"
1369 "(root id: %llu, inode id: %llu, errno: %d)\n",
1370 (unsigned long long)index,
1371 (unsigned long long)node->root->objectid,
1372 (unsigned long long)node->inode_id,
1373 ret);
1374 BUG();
1375 }
1376 mutex_unlock(&node->mutex);
1377end:
1378 btrfs_release_delayed_node(node);
1379 return ret;
1380}
1381
1382int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1383{
1384 struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node;
1385 int ret = 0;
1386
1387 if (!delayed_node)
1388 return -ENOENT;
1389
1390 /*
1391 * Since we have held i_mutex of this directory, it is impossible that
1392 * a new directory index is added into the delayed node and index_cnt
1393 * is updated now. So we needn't lock the delayed node.
1394 */
1395 if (!delayed_node->index_cnt)
1396 return -EINVAL;
1397
1398 BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
1399 return ret;
1400}
1401
1402void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
1403 struct list_head *del_list)
1404{
1405 struct btrfs_delayed_node *delayed_node;
1406 struct btrfs_delayed_item *item;
1407
1408 delayed_node = btrfs_get_delayed_node(inode);
1409 if (!delayed_node)
1410 return;
1411
1412 mutex_lock(&delayed_node->mutex);
1413 item = __btrfs_first_delayed_insertion_item(delayed_node);
1414 while (item) {
1415 atomic_inc(&item->refs);
1416 list_add_tail(&item->readdir_list, ins_list);
1417 item = __btrfs_next_delayed_item(item);
1418 }
1419
1420 item = __btrfs_first_delayed_deletion_item(delayed_node);
1421 while (item) {
1422 atomic_inc(&item->refs);
1423 list_add_tail(&item->readdir_list, del_list);
1424 item = __btrfs_next_delayed_item(item);
1425 }
1426 mutex_unlock(&delayed_node->mutex);
1427 /*
1428 * This delayed node is still cached in the btrfs inode, so refs
1429 * must be > 1 now, and we needn't check it is going to be freed
1430 * or not.
1431 *
1432 * Besides that, this function is used to read dir, we do not
1433 * insert/delete delayed items in this period. So we also needn't
1434 * requeue or dequeue this delayed node.
1435 */
1436 atomic_dec(&delayed_node->refs);
1437}
1438
1439void btrfs_put_delayed_items(struct list_head *ins_list,
1440 struct list_head *del_list)
1441{
1442 struct btrfs_delayed_item *curr, *next;
1443
1444 list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1445 list_del(&curr->readdir_list);
1446 if (atomic_dec_and_test(&curr->refs))
1447 kfree(curr);
1448 }
1449
1450 list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1451 list_del(&curr->readdir_list);
1452 if (atomic_dec_and_test(&curr->refs))
1453 kfree(curr);
1454 }
1455}
1456
1457int btrfs_should_delete_dir_index(struct list_head *del_list,
1458 u64 index)
1459{
1460 struct btrfs_delayed_item *curr, *next;
1461 int ret;
1462
1463 if (list_empty(del_list))
1464 return 0;
1465
1466 list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1467 if (curr->key.offset > index)
1468 break;
1469
1470 list_del(&curr->readdir_list);
1471 ret = (curr->key.offset == index);
1472
1473 if (atomic_dec_and_test(&curr->refs))
1474 kfree(curr);
1475
1476 if (ret)
1477 return 1;
1478 else
1479 continue;
1480 }
1481 return 0;
1482}
1483
1484/*
1485 * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1486 *
1487 */
1488int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
1489 filldir_t filldir,
1490 struct list_head *ins_list)
1491{
1492 struct btrfs_dir_item *di;
1493 struct btrfs_delayed_item *curr, *next;
1494 struct btrfs_key location;
1495 char *name;
1496 int name_len;
1497 int over = 0;
1498 unsigned char d_type;
1499
1500 if (list_empty(ins_list))
1501 return 0;
1502
1503 /*
1504 * Changing the data of the delayed item is impossible. So
1505 * we needn't lock them. And we have held i_mutex of the
1506 * directory, nobody can delete any directory indexes now.
1507 */
1508 list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1509 list_del(&curr->readdir_list);
1510
1511 if (curr->key.offset < filp->f_pos) {
1512 if (atomic_dec_and_test(&curr->refs))
1513 kfree(curr);
1514 continue;
1515 }
1516
1517 filp->f_pos = curr->key.offset;
1518
1519 di = (struct btrfs_dir_item *)curr->data;
1520 name = (char *)(di + 1);
1521 name_len = le16_to_cpu(di->name_len);
1522
1523 d_type = btrfs_filetype_table[di->type];
1524 btrfs_disk_key_to_cpu(&location, &di->location);
1525
1526 over = filldir(dirent, name, name_len, curr->key.offset,
1527 location.objectid, d_type);
1528
1529 if (atomic_dec_and_test(&curr->refs))
1530 kfree(curr);
1531
1532 if (over)
1533 return 1;
1534 }
1535 return 0;
1536}
1537
1538BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
1539 generation, 64);
1540BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
1541 sequence, 64);
1542BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
1543 transid, 64);
1544BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
1545BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
1546 nbytes, 64);
1547BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
1548 block_group, 64);
1549BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
1550BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
1551BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
1552BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
1553BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
1554BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
1555
1556BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
1557BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
1558
1559static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1560 struct btrfs_inode_item *inode_item,
1561 struct inode *inode)
1562{
1563 btrfs_set_stack_inode_uid(inode_item, inode->i_uid);
1564 btrfs_set_stack_inode_gid(inode_item, inode->i_gid);
1565 btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1566 btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1567 btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1568 btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1569 btrfs_set_stack_inode_generation(inode_item,
1570 BTRFS_I(inode)->generation);
1571 btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence);
1572 btrfs_set_stack_inode_transid(inode_item, trans->transid);
1573 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1574 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1575 btrfs_set_stack_inode_block_group(inode_item,
1576 BTRFS_I(inode)->block_group);
1577
1578 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1579 inode->i_atime.tv_sec);
1580 btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
1581 inode->i_atime.tv_nsec);
1582
1583 btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
1584 inode->i_mtime.tv_sec);
1585 btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
1586 inode->i_mtime.tv_nsec);
1587
1588 btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
1589 inode->i_ctime.tv_sec);
1590 btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
1591 inode->i_ctime.tv_nsec);
1592}
1593
1594int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1595 struct btrfs_root *root, struct inode *inode)
1596{
1597 struct btrfs_delayed_node *delayed_node;
1598 int ret;
1599
1600 delayed_node = btrfs_get_or_create_delayed_node(inode);
1601 if (IS_ERR(delayed_node))
1602 return PTR_ERR(delayed_node);
1603
1604 mutex_lock(&delayed_node->mutex);
1605 if (delayed_node->inode_dirty) {
1606 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1607 goto release_node;
1608 }
1609
1610 ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node);
1611 /*
1612 * we must reserve enough space when we start a new transaction,
1613 * so reserving metadata failure is impossible
1614 */
1615 BUG_ON(ret);
1616
1617 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1618 delayed_node->inode_dirty = 1;
1619 delayed_node->count++;
1620 atomic_inc(&root->fs_info->delayed_root->items);
1621release_node:
1622 mutex_unlock(&delayed_node->mutex);
1623 btrfs_release_delayed_node(delayed_node);
1624 return ret;
1625}
1626
1627static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1628{
1629 struct btrfs_root *root = delayed_node->root;
1630 struct btrfs_delayed_item *curr_item, *prev_item;
1631
1632 mutex_lock(&delayed_node->mutex);
1633 curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1634 while (curr_item) {
1635 btrfs_delayed_item_release_metadata(root, curr_item);
1636 prev_item = curr_item;
1637 curr_item = __btrfs_next_delayed_item(prev_item);
1638 btrfs_release_delayed_item(prev_item);
1639 }
1640
1641 curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1642 while (curr_item) {
1643 btrfs_delayed_item_release_metadata(root, curr_item);
1644 prev_item = curr_item;
1645 curr_item = __btrfs_next_delayed_item(prev_item);
1646 btrfs_release_delayed_item(prev_item);
1647 }
1648
1649 if (delayed_node->inode_dirty) {
1650 btrfs_delayed_inode_release_metadata(root, delayed_node);
1651 btrfs_release_delayed_inode(delayed_node);
1652 }
1653 mutex_unlock(&delayed_node->mutex);
1654}
1655
1656void btrfs_kill_delayed_inode_items(struct inode *inode)
1657{
1658 struct btrfs_delayed_node *delayed_node;
1659
1660 delayed_node = btrfs_get_delayed_node(inode);
1661 if (!delayed_node)
1662 return;
1663
1664 __btrfs_kill_delayed_node(delayed_node);
1665 btrfs_release_delayed_node(delayed_node);
1666}
1667
1668void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1669{
1670 u64 inode_id = 0;
1671 struct btrfs_delayed_node *delayed_nodes[8];
1672 int i, n;
1673
1674 while (1) {
1675 spin_lock(&root->inode_lock);
1676 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1677 (void **)delayed_nodes, inode_id,
1678 ARRAY_SIZE(delayed_nodes));
1679 if (!n) {
1680 spin_unlock(&root->inode_lock);
1681 break;
1682 }
1683
1684 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1685
1686 for (i = 0; i < n; i++)
1687 atomic_inc(&delayed_nodes[i]->refs);
1688 spin_unlock(&root->inode_lock);
1689
1690 for (i = 0; i < n; i++) {
1691 __btrfs_kill_delayed_node(delayed_nodes[i]);
1692 btrfs_release_delayed_node(delayed_nodes[i]);
1693 }
1694 }
1695}
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
new file mode 100644
index 000000000000..eb7d240aa648
--- /dev/null
+++ b/fs/btrfs/delayed-inode.h
@@ -0,0 +1,141 @@
1/*
2 * Copyright (C) 2011 Fujitsu. All rights reserved.
3 * Written by Miao Xie <miaox@cn.fujitsu.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
18 */
19
20#ifndef __DELAYED_TREE_OPERATION_H
21#define __DELAYED_TREE_OPERATION_H
22
23#include <linux/rbtree.h>
24#include <linux/spinlock.h>
25#include <linux/mutex.h>
26#include <linux/list.h>
27#include <linux/wait.h>
28#include <asm/atomic.h>
29
30#include "ctree.h"
31
32/* types of the delayed item */
33#define BTRFS_DELAYED_INSERTION_ITEM 1
34#define BTRFS_DELAYED_DELETION_ITEM 2
35
36struct btrfs_delayed_root {
37 spinlock_t lock;
38 struct list_head node_list;
39 /*
40 * Used for delayed nodes which is waiting to be dealt with by the
41 * worker. If the delayed node is inserted into the work queue, we
42 * drop it from this list.
43 */
44 struct list_head prepare_list;
45 atomic_t items; /* for delayed items */
46 int nodes; /* for delayed nodes */
47 wait_queue_head_t wait;
48};
49
50struct btrfs_delayed_node {
51 u64 inode_id;
52 u64 bytes_reserved;
53 struct btrfs_root *root;
54 /* Used to add the node into the delayed root's node list. */
55 struct list_head n_list;
56 /*
57 * Used to add the node into the prepare list, the nodes in this list
58 * is waiting to be dealt with by the async worker.
59 */
60 struct list_head p_list;
61 struct rb_root ins_root;
62 struct rb_root del_root;
63 struct mutex mutex;
64 struct btrfs_inode_item inode_item;
65 atomic_t refs;
66 u64 index_cnt;
67 bool in_list;
68 bool inode_dirty;
69 int count;
70};
71
72struct btrfs_delayed_item {
73 struct rb_node rb_node;
74 struct btrfs_key key;
75 struct list_head tree_list; /* used for batch insert/delete items */
76 struct list_head readdir_list; /* used for readdir items */
77 u64 bytes_reserved;
78 struct btrfs_block_rsv *block_rsv;
79 struct btrfs_delayed_node *delayed_node;
80 atomic_t refs;
81 int ins_or_del;
82 u32 data_len;
83 char data[0];
84};
85
86static inline void btrfs_init_delayed_root(
87 struct btrfs_delayed_root *delayed_root)
88{
89 atomic_set(&delayed_root->items, 0);
90 delayed_root->nodes = 0;
91 spin_lock_init(&delayed_root->lock);
92 init_waitqueue_head(&delayed_root->wait);
93 INIT_LIST_HEAD(&delayed_root->node_list);
94 INIT_LIST_HEAD(&delayed_root->prepare_list);
95}
96
97int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
98 struct btrfs_root *root, const char *name,
99 int name_len, struct inode *dir,
100 struct btrfs_disk_key *disk_key, u8 type,
101 u64 index);
102
103int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
104 struct btrfs_root *root, struct inode *dir,
105 u64 index);
106
107int btrfs_inode_delayed_dir_index_count(struct inode *inode);
108
109int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
110 struct btrfs_root *root);
111
112void btrfs_balance_delayed_items(struct btrfs_root *root);
113
114int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
115 struct inode *inode);
116/* Used for evicting the inode. */
117void btrfs_remove_delayed_node(struct inode *inode);
118void btrfs_kill_delayed_inode_items(struct inode *inode);
119
120
121int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
122 struct btrfs_root *root, struct inode *inode);
123
124/* Used for drop dead root */
125void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
126
127/* Used for readdir() */
128void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
129 struct list_head *del_list);
130void btrfs_put_delayed_items(struct list_head *ins_list,
131 struct list_head *del_list);
132int btrfs_should_delete_dir_index(struct list_head *del_list,
133 u64 index);
134int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
135 filldir_t filldir,
136 struct list_head *ins_list);
137
138/* for init */
139int __init btrfs_delayed_inode_init(void);
140void btrfs_delayed_inode_exit(void);
141#endif
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index bce28f653899..125cf76fcd08 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -281,44 +281,6 @@ again:
281} 281}
282 282
283/* 283/*
284 * This checks to see if there are any delayed refs in the
285 * btree for a given bytenr. It returns one if it finds any
286 * and zero otherwise.
287 *
288 * If it only finds a head node, it returns 0.
289 *
290 * The idea is to use this when deciding if you can safely delete an
291 * extent from the extent allocation tree. There may be a pending
292 * ref in the rbtree that adds or removes references, so as long as this
293 * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent
294 * allocation tree.
295 */
296int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
297{
298 struct btrfs_delayed_ref_node *ref;
299 struct btrfs_delayed_ref_root *delayed_refs;
300 struct rb_node *prev_node;
301 int ret = 0;
302
303 delayed_refs = &trans->transaction->delayed_refs;
304 spin_lock(&delayed_refs->lock);
305
306 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
307 if (ref) {
308 prev_node = rb_prev(&ref->rb_node);
309 if (!prev_node)
310 goto out;
311 ref = rb_entry(prev_node, struct btrfs_delayed_ref_node,
312 rb_node);
313 if (ref->bytenr == bytenr)
314 ret = 1;
315 }
316out:
317 spin_unlock(&delayed_refs->lock);
318 return ret;
319}
320
321/*
322 * helper function to update an extent delayed ref in the 284 * helper function to update an extent delayed ref in the
323 * rbtree. existing and update must both have the same 285 * rbtree. existing and update must both have the same
324 * bytenr and parent 286 * bytenr and parent
@@ -747,79 +709,3 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
747 return btrfs_delayed_node_to_head(ref); 709 return btrfs_delayed_node_to_head(ref);
748 return NULL; 710 return NULL;
749} 711}
750
751/*
752 * add a delayed ref to the tree. This does all of the accounting required
753 * to make sure the delayed ref is eventually processed before this
754 * transaction commits.
755 *
756 * The main point of this call is to add and remove a backreference in a single
757 * shot, taking the lock only once, and only searching for the head node once.
758 *
759 * It is the same as doing a ref add and delete in two separate calls.
760 */
761#if 0
762int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
763 u64 bytenr, u64 num_bytes, u64 orig_parent,
764 u64 parent, u64 orig_ref_root, u64 ref_root,
765 u64 orig_ref_generation, u64 ref_generation,
766 u64 owner_objectid, int pin)
767{
768 struct btrfs_delayed_ref *ref;
769 struct btrfs_delayed_ref *old_ref;
770 struct btrfs_delayed_ref_head *head_ref;
771 struct btrfs_delayed_ref_root *delayed_refs;
772 int ret;
773
774 ref = kmalloc(sizeof(*ref), GFP_NOFS);
775 if (!ref)
776 return -ENOMEM;
777
778 old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS);
779 if (!old_ref) {
780 kfree(ref);
781 return -ENOMEM;
782 }
783
784 /*
785 * the parent = 0 case comes from cases where we don't actually
786 * know the parent yet. It will get updated later via a add/drop
787 * pair.
788 */
789 if (parent == 0)
790 parent = bytenr;
791 if (orig_parent == 0)
792 orig_parent = bytenr;
793
794 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
795 if (!head_ref) {
796 kfree(ref);
797 kfree(old_ref);
798 return -ENOMEM;
799 }
800 delayed_refs = &trans->transaction->delayed_refs;
801 spin_lock(&delayed_refs->lock);
802
803 /*
804 * insert both the head node and the new ref without dropping
805 * the spin lock
806 */
807 ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
808 (u64)-1, 0, 0, 0,
809 BTRFS_UPDATE_DELAYED_HEAD, 0);
810 BUG_ON(ret);
811
812 ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
813 parent, ref_root, ref_generation,
814 owner_objectid, BTRFS_ADD_DELAYED_REF, 0);
815 BUG_ON(ret);
816
817 ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes,
818 orig_parent, orig_ref_root,
819 orig_ref_generation, owner_objectid,
820 BTRFS_DROP_DELAYED_REF, pin);
821 BUG_ON(ret);
822 spin_unlock(&delayed_refs->lock);
823 return 0;
824}
825#endif
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 50e3cf92fbda..e287e3b0eab0 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -166,12 +166,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
166 166
167struct btrfs_delayed_ref_head * 167struct btrfs_delayed_ref_head *
168btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); 168btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
169int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
170int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
171 u64 bytenr, u64 num_bytes, u64 orig_parent,
172 u64 parent, u64 orig_ref_root, u64 ref_root,
173 u64 orig_ref_generation, u64 ref_generation,
174 u64 owner_objectid, int pin);
175int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, 169int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
176 struct btrfs_delayed_ref_head *head); 170 struct btrfs_delayed_ref_head *head);
177int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 171int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index dd421c48c353..685f2593c4f0 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -123,8 +123,9 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
123 * to use for the second index (if one is created). 123 * to use for the second index (if one is created).
124 */ 124 */
125int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root 125int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
126 *root, const char *name, int name_len, u64 dir, 126 *root, const char *name, int name_len,
127 struct btrfs_key *location, u8 type, u64 index) 127 struct inode *dir, struct btrfs_key *location,
128 u8 type, u64 index)
128{ 129{
129 int ret = 0; 130 int ret = 0;
130 int ret2 = 0; 131 int ret2 = 0;
@@ -136,13 +137,17 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
136 struct btrfs_disk_key disk_key; 137 struct btrfs_disk_key disk_key;
137 u32 data_size; 138 u32 data_size;
138 139
139 key.objectid = dir; 140 key.objectid = btrfs_ino(dir);
140 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); 141 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
141 key.offset = btrfs_name_hash(name, name_len); 142 key.offset = btrfs_name_hash(name, name_len);
142 143
143 path = btrfs_alloc_path(); 144 path = btrfs_alloc_path();
145 if (!path)
146 return -ENOMEM;
144 path->leave_spinning = 1; 147 path->leave_spinning = 1;
145 148
149 btrfs_cpu_key_to_disk(&disk_key, location);
150
146 data_size = sizeof(*dir_item) + name_len; 151 data_size = sizeof(*dir_item) + name_len;
147 dir_item = insert_with_overflow(trans, root, path, &key, data_size, 152 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
148 name, name_len); 153 name, name_len);
@@ -154,7 +159,6 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
154 } 159 }
155 160
156 leaf = path->nodes[0]; 161 leaf = path->nodes[0];
157 btrfs_cpu_key_to_disk(&disk_key, location);
158 btrfs_set_dir_item_key(leaf, dir_item, &disk_key); 162 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
159 btrfs_set_dir_type(leaf, dir_item, type); 163 btrfs_set_dir_type(leaf, dir_item, type);
160 btrfs_set_dir_data_len(leaf, dir_item, 0); 164 btrfs_set_dir_data_len(leaf, dir_item, 0);
@@ -171,29 +175,11 @@ second_insert:
171 ret = 0; 175 ret = 0;
172 goto out_free; 176 goto out_free;
173 } 177 }
174 btrfs_release_path(root, path); 178 btrfs_release_path(path);
175
176 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
177 key.offset = index;
178 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
179 name, name_len);
180 if (IS_ERR(dir_item)) {
181 ret2 = PTR_ERR(dir_item);
182 goto out_free;
183 }
184 leaf = path->nodes[0];
185 btrfs_cpu_key_to_disk(&disk_key, location);
186 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
187 btrfs_set_dir_type(leaf, dir_item, type);
188 btrfs_set_dir_data_len(leaf, dir_item, 0);
189 btrfs_set_dir_name_len(leaf, dir_item, name_len);
190 btrfs_set_dir_transid(leaf, dir_item, trans->transid);
191 name_ptr = (unsigned long)(dir_item + 1);
192 write_extent_buffer(leaf, name, name_ptr, name_len);
193 btrfs_mark_buffer_dirty(leaf);
194 179
180 ret2 = btrfs_insert_delayed_dir_index(trans, root, name, name_len, dir,
181 &disk_key, type, index);
195out_free: 182out_free:
196
197 btrfs_free_path(path); 183 btrfs_free_path(path);
198 if (ret) 184 if (ret)
199 return ret; 185 return ret;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index deba3d9c8853..16d335b342a2 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -29,6 +29,7 @@
29#include <linux/crc32c.h> 29#include <linux/crc32c.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/migrate.h> 31#include <linux/migrate.h>
32#include <linux/ratelimit.h>
32#include <asm/unaligned.h> 33#include <asm/unaligned.h>
33#include "compat.h" 34#include "compat.h"
34#include "ctree.h" 35#include "ctree.h"
@@ -41,6 +42,7 @@
41#include "locking.h" 42#include "locking.h"
42#include "tree-log.h" 43#include "tree-log.h"
43#include "free-space-cache.h" 44#include "free-space-cache.h"
45#include "inode-map.h"
44 46
45static struct extent_io_ops btree_extent_io_ops; 47static struct extent_io_ops btree_extent_io_ops;
46static void end_workqueue_fn(struct btrfs_work *work); 48static void end_workqueue_fn(struct btrfs_work *work);
@@ -137,7 +139,7 @@ static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
137 * that covers the entire device 139 * that covers the entire device
138 */ 140 */
139static struct extent_map *btree_get_extent(struct inode *inode, 141static struct extent_map *btree_get_extent(struct inode *inode,
140 struct page *page, size_t page_offset, u64 start, u64 len, 142 struct page *page, size_t pg_offset, u64 start, u64 len,
141 int create) 143 int create)
142{ 144{
143 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 145 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
@@ -154,7 +156,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
154 } 156 }
155 read_unlock(&em_tree->lock); 157 read_unlock(&em_tree->lock);
156 158
157 em = alloc_extent_map(GFP_NOFS); 159 em = alloc_extent_map();
158 if (!em) { 160 if (!em) {
159 em = ERR_PTR(-ENOMEM); 161 em = ERR_PTR(-ENOMEM);
160 goto out; 162 goto out;
@@ -254,14 +256,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
254 memcpy(&found, result, csum_size); 256 memcpy(&found, result, csum_size);
255 257
256 read_extent_buffer(buf, &val, 0, csum_size); 258 read_extent_buffer(buf, &val, 0, csum_size);
257 if (printk_ratelimit()) { 259 printk_ratelimited(KERN_INFO "btrfs: %s checksum verify "
258 printk(KERN_INFO "btrfs: %s checksum verify "
259 "failed on %llu wanted %X found %X " 260 "failed on %llu wanted %X found %X "
260 "level %d\n", 261 "level %d\n",
261 root->fs_info->sb->s_id, 262 root->fs_info->sb->s_id,
262 (unsigned long long)buf->start, val, found, 263 (unsigned long long)buf->start, val, found,
263 btrfs_header_level(buf)); 264 btrfs_header_level(buf));
264 }
265 if (result != (char *)&inline_result) 265 if (result != (char *)&inline_result)
266 kfree(result); 266 kfree(result);
267 return 1; 267 return 1;
@@ -296,13 +296,11 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
296 ret = 0; 296 ret = 0;
297 goto out; 297 goto out;
298 } 298 }
299 if (printk_ratelimit()) { 299 printk_ratelimited("parent transid verify failed on %llu wanted %llu "
300 printk("parent transid verify failed on %llu wanted %llu "
301 "found %llu\n", 300 "found %llu\n",
302 (unsigned long long)eb->start, 301 (unsigned long long)eb->start,
303 (unsigned long long)parent_transid, 302 (unsigned long long)parent_transid,
304 (unsigned long long)btrfs_header_generation(eb)); 303 (unsigned long long)btrfs_header_generation(eb));
305 }
306 ret = 1; 304 ret = 1;
307 clear_extent_buffer_uptodate(io_tree, eb, &cached_state); 305 clear_extent_buffer_uptodate(io_tree, eb, &cached_state);
308out: 306out:
@@ -380,7 +378,7 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
380 len = page->private >> 2; 378 len = page->private >> 2;
381 WARN_ON(len == 0); 379 WARN_ON(len == 0);
382 380
383 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); 381 eb = alloc_extent_buffer(tree, start, len, page);
384 if (eb == NULL) { 382 if (eb == NULL) {
385 WARN_ON(1); 383 WARN_ON(1);
386 goto out; 384 goto out;
@@ -525,7 +523,7 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
525 len = page->private >> 2; 523 len = page->private >> 2;
526 WARN_ON(len == 0); 524 WARN_ON(len == 0);
527 525
528 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); 526 eb = alloc_extent_buffer(tree, start, len, page);
529 if (eb == NULL) { 527 if (eb == NULL) {
530 ret = -EIO; 528 ret = -EIO;
531 goto out; 529 goto out;
@@ -533,12 +531,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
533 531
534 found_start = btrfs_header_bytenr(eb); 532 found_start = btrfs_header_bytenr(eb);
535 if (found_start != start) { 533 if (found_start != start) {
536 if (printk_ratelimit()) { 534 printk_ratelimited(KERN_INFO "btrfs bad tree block start "
537 printk(KERN_INFO "btrfs bad tree block start "
538 "%llu %llu\n", 535 "%llu %llu\n",
539 (unsigned long long)found_start, 536 (unsigned long long)found_start,
540 (unsigned long long)eb->start); 537 (unsigned long long)eb->start);
541 }
542 ret = -EIO; 538 ret = -EIO;
543 goto err; 539 goto err;
544 } 540 }
@@ -550,10 +546,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
550 goto err; 546 goto err;
551 } 547 }
552 if (check_tree_block_fsid(root, eb)) { 548 if (check_tree_block_fsid(root, eb)) {
553 if (printk_ratelimit()) { 549 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
554 printk(KERN_INFO "btrfs bad fsid on block %llu\n",
555 (unsigned long long)eb->start); 550 (unsigned long long)eb->start);
556 }
557 ret = -EIO; 551 ret = -EIO;
558 goto err; 552 goto err;
559 } 553 }
@@ -650,12 +644,6 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
650 return 256 * limit; 644 return 256 * limit;
651} 645}
652 646
653int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
654{
655 return atomic_read(&info->nr_async_bios) >
656 btrfs_async_submit_limit(info);
657}
658
659static void run_one_async_start(struct btrfs_work *work) 647static void run_one_async_start(struct btrfs_work *work)
660{ 648{
661 struct async_submit_bio *async; 649 struct async_submit_bio *async;
@@ -963,7 +951,7 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
963 struct inode *btree_inode = root->fs_info->btree_inode; 951 struct inode *btree_inode = root->fs_info->btree_inode;
964 struct extent_buffer *eb; 952 struct extent_buffer *eb;
965 eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 953 eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
966 bytenr, blocksize, GFP_NOFS); 954 bytenr, blocksize);
967 return eb; 955 return eb;
968} 956}
969 957
@@ -974,7 +962,7 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
974 struct extent_buffer *eb; 962 struct extent_buffer *eb;
975 963
976 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 964 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
977 bytenr, blocksize, NULL, GFP_NOFS); 965 bytenr, blocksize, NULL);
978 return eb; 966 return eb;
979} 967}
980 968
@@ -1058,6 +1046,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1058 root->name = NULL; 1046 root->name = NULL;
1059 root->in_sysfs = 0; 1047 root->in_sysfs = 0;
1060 root->inode_tree = RB_ROOT; 1048 root->inode_tree = RB_ROOT;
1049 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
1061 root->block_rsv = NULL; 1050 root->block_rsv = NULL;
1062 root->orphan_block_rsv = NULL; 1051 root->orphan_block_rsv = NULL;
1063 1052
@@ -1079,7 +1068,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1079 root->log_transid = 0; 1068 root->log_transid = 0;
1080 root->last_log_commit = 0; 1069 root->last_log_commit = 0;
1081 extent_io_tree_init(&root->dirty_log_pages, 1070 extent_io_tree_init(&root->dirty_log_pages,
1082 fs_info->btree_inode->i_mapping, GFP_NOFS); 1071 fs_info->btree_inode->i_mapping);
1083 1072
1084 memset(&root->root_key, 0, sizeof(root->root_key)); 1073 memset(&root->root_key, 0, sizeof(root->root_key));
1085 memset(&root->root_item, 0, sizeof(root->root_item)); 1074 memset(&root->root_item, 0, sizeof(root->root_item));
@@ -1282,21 +1271,6 @@ out:
1282 return root; 1271 return root;
1283} 1272}
1284 1273
1285struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
1286 u64 root_objectid)
1287{
1288 struct btrfs_root *root;
1289
1290 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
1291 return fs_info->tree_root;
1292 if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
1293 return fs_info->extent_root;
1294
1295 root = radix_tree_lookup(&fs_info->fs_roots_radix,
1296 (unsigned long)root_objectid);
1297 return root;
1298}
1299
1300struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, 1274struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1301 struct btrfs_key *location) 1275 struct btrfs_key *location)
1302{ 1276{
@@ -1325,6 +1299,19 @@ again:
1325 if (IS_ERR(root)) 1299 if (IS_ERR(root))
1326 return root; 1300 return root;
1327 1301
1302 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
1303 if (!root->free_ino_ctl)
1304 goto fail;
1305 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
1306 GFP_NOFS);
1307 if (!root->free_ino_pinned)
1308 goto fail;
1309
1310 btrfs_init_free_ino_ctl(root);
1311 mutex_init(&root->fs_commit_mutex);
1312 spin_lock_init(&root->cache_lock);
1313 init_waitqueue_head(&root->cache_wait);
1314
1328 set_anon_super(&root->anon_super, NULL); 1315 set_anon_super(&root->anon_super, NULL);
1329 1316
1330 if (btrfs_root_refs(&root->root_item) == 0) { 1317 if (btrfs_root_refs(&root->root_item) == 0) {
@@ -1368,41 +1355,6 @@ fail:
1368 return ERR_PTR(ret); 1355 return ERR_PTR(ret);
1369} 1356}
1370 1357
1371struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
1372 struct btrfs_key *location,
1373 const char *name, int namelen)
1374{
1375 return btrfs_read_fs_root_no_name(fs_info, location);
1376#if 0
1377 struct btrfs_root *root;
1378 int ret;
1379
1380 root = btrfs_read_fs_root_no_name(fs_info, location);
1381 if (!root)
1382 return NULL;
1383
1384 if (root->in_sysfs)
1385 return root;
1386
1387 ret = btrfs_set_root_name(root, name, namelen);
1388 if (ret) {
1389 free_extent_buffer(root->node);
1390 kfree(root);
1391 return ERR_PTR(ret);
1392 }
1393
1394 ret = btrfs_sysfs_add_root(root);
1395 if (ret) {
1396 free_extent_buffer(root->node);
1397 kfree(root->name);
1398 kfree(root);
1399 return ERR_PTR(ret);
1400 }
1401 root->in_sysfs = 1;
1402 return root;
1403#endif
1404}
1405
1406static int btrfs_congested_fn(void *congested_data, int bdi_bits) 1358static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1407{ 1359{
1408 struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; 1360 struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
@@ -1612,7 +1564,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1612 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), 1564 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
1613 GFP_NOFS); 1565 GFP_NOFS);
1614 struct btrfs_root *tree_root = btrfs_sb(sb); 1566 struct btrfs_root *tree_root = btrfs_sb(sb);
1615 struct btrfs_fs_info *fs_info = tree_root->fs_info; 1567 struct btrfs_fs_info *fs_info = NULL;
1616 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), 1568 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
1617 GFP_NOFS); 1569 GFP_NOFS);
1618 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), 1570 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
@@ -1624,11 +1576,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1624 1576
1625 struct btrfs_super_block *disk_super; 1577 struct btrfs_super_block *disk_super;
1626 1578
1627 if (!extent_root || !tree_root || !fs_info || 1579 if (!extent_root || !tree_root || !tree_root->fs_info ||
1628 !chunk_root || !dev_root || !csum_root) { 1580 !chunk_root || !dev_root || !csum_root) {
1629 err = -ENOMEM; 1581 err = -ENOMEM;
1630 goto fail; 1582 goto fail;
1631 } 1583 }
1584 fs_info = tree_root->fs_info;
1632 1585
1633 ret = init_srcu_struct(&fs_info->subvol_srcu); 1586 ret = init_srcu_struct(&fs_info->subvol_srcu);
1634 if (ret) { 1587 if (ret) {
@@ -1694,6 +1647,24 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1694 1647
1695 INIT_LIST_HEAD(&fs_info->ordered_extents); 1648 INIT_LIST_HEAD(&fs_info->ordered_extents);
1696 spin_lock_init(&fs_info->ordered_extent_lock); 1649 spin_lock_init(&fs_info->ordered_extent_lock);
1650 fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
1651 GFP_NOFS);
1652 if (!fs_info->delayed_root) {
1653 err = -ENOMEM;
1654 goto fail_iput;
1655 }
1656 btrfs_init_delayed_root(fs_info->delayed_root);
1657
1658 mutex_init(&fs_info->scrub_lock);
1659 atomic_set(&fs_info->scrubs_running, 0);
1660 atomic_set(&fs_info->scrub_pause_req, 0);
1661 atomic_set(&fs_info->scrubs_paused, 0);
1662 atomic_set(&fs_info->scrub_cancel_req, 0);
1663 init_waitqueue_head(&fs_info->scrub_pause_wait);
1664 init_rwsem(&fs_info->scrub_super_lock);
1665 fs_info->scrub_workers_refcnt = 0;
1666 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1667 fs_info->thread_pool_size, &fs_info->generic_worker);
1697 1668
1698 sb->s_blocksize = 4096; 1669 sb->s_blocksize = 4096;
1699 sb->s_blocksize_bits = blksize_bits(4096); 1670 sb->s_blocksize_bits = blksize_bits(4096);
@@ -1712,10 +1683,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1712 1683
1713 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); 1684 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
1714 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, 1685 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
1715 fs_info->btree_inode->i_mapping, 1686 fs_info->btree_inode->i_mapping);
1716 GFP_NOFS); 1687 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
1717 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
1718 GFP_NOFS);
1719 1688
1720 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; 1689 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
1721 1690
@@ -1729,9 +1698,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1729 fs_info->block_group_cache_tree = RB_ROOT; 1698 fs_info->block_group_cache_tree = RB_ROOT;
1730 1699
1731 extent_io_tree_init(&fs_info->freed_extents[0], 1700 extent_io_tree_init(&fs_info->freed_extents[0],
1732 fs_info->btree_inode->i_mapping, GFP_NOFS); 1701 fs_info->btree_inode->i_mapping);
1733 extent_io_tree_init(&fs_info->freed_extents[1], 1702 extent_io_tree_init(&fs_info->freed_extents[1],
1734 fs_info->btree_inode->i_mapping, GFP_NOFS); 1703 fs_info->btree_inode->i_mapping);
1735 fs_info->pinned_extents = &fs_info->freed_extents[0]; 1704 fs_info->pinned_extents = &fs_info->freed_extents[0];
1736 fs_info->do_barriers = 1; 1705 fs_info->do_barriers = 1;
1737 1706
@@ -1761,7 +1730,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1761 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 1730 bh = btrfs_read_dev_super(fs_devices->latest_bdev);
1762 if (!bh) { 1731 if (!bh) {
1763 err = -EINVAL; 1732 err = -EINVAL;
1764 goto fail_iput; 1733 goto fail_alloc;
1765 } 1734 }
1766 1735
1767 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); 1736 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy));
@@ -1773,7 +1742,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1773 1742
1774 disk_super = &fs_info->super_copy; 1743 disk_super = &fs_info->super_copy;
1775 if (!btrfs_super_root(disk_super)) 1744 if (!btrfs_super_root(disk_super))
1776 goto fail_iput; 1745 goto fail_alloc;
1777 1746
1778 /* check FS state, whether FS is broken. */ 1747 /* check FS state, whether FS is broken. */
1779 fs_info->fs_state |= btrfs_super_flags(disk_super); 1748 fs_info->fs_state |= btrfs_super_flags(disk_super);
@@ -1789,7 +1758,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1789 ret = btrfs_parse_options(tree_root, options); 1758 ret = btrfs_parse_options(tree_root, options);
1790 if (ret) { 1759 if (ret) {
1791 err = ret; 1760 err = ret;
1792 goto fail_iput; 1761 goto fail_alloc;
1793 } 1762 }
1794 1763
1795 features = btrfs_super_incompat_flags(disk_super) & 1764 features = btrfs_super_incompat_flags(disk_super) &
@@ -1799,7 +1768,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1799 "unsupported optional features (%Lx).\n", 1768 "unsupported optional features (%Lx).\n",
1800 (unsigned long long)features); 1769 (unsigned long long)features);
1801 err = -EINVAL; 1770 err = -EINVAL;
1802 goto fail_iput; 1771 goto fail_alloc;
1803 } 1772 }
1804 1773
1805 features = btrfs_super_incompat_flags(disk_super); 1774 features = btrfs_super_incompat_flags(disk_super);
@@ -1815,7 +1784,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1815 "unsupported option features (%Lx).\n", 1784 "unsupported option features (%Lx).\n",
1816 (unsigned long long)features); 1785 (unsigned long long)features);
1817 err = -EINVAL; 1786 err = -EINVAL;
1818 goto fail_iput; 1787 goto fail_alloc;
1819 } 1788 }
1820 1789
1821 btrfs_init_workers(&fs_info->generic_worker, 1790 btrfs_init_workers(&fs_info->generic_worker,
@@ -1862,6 +1831,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1862 &fs_info->generic_worker); 1831 &fs_info->generic_worker);
1863 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", 1832 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
1864 1, &fs_info->generic_worker); 1833 1, &fs_info->generic_worker);
1834 btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta",
1835 fs_info->thread_pool_size,
1836 &fs_info->generic_worker);
1865 1837
1866 /* 1838 /*
1867 * endios are largely parallel and should have a very 1839 * endios are largely parallel and should have a very
@@ -1883,6 +1855,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1883 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 1855 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
1884 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1856 btrfs_start_workers(&fs_info->endio_write_workers, 1);
1885 btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 1857 btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
1858 btrfs_start_workers(&fs_info->delayed_workers, 1);
1886 1859
1887 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1860 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1888 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1861 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2139,6 +2112,9 @@ fail_sb_buffer:
2139 btrfs_stop_workers(&fs_info->endio_write_workers); 2112 btrfs_stop_workers(&fs_info->endio_write_workers);
2140 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2113 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2141 btrfs_stop_workers(&fs_info->submit_workers); 2114 btrfs_stop_workers(&fs_info->submit_workers);
2115 btrfs_stop_workers(&fs_info->delayed_workers);
2116fail_alloc:
2117 kfree(fs_info->delayed_root);
2142fail_iput: 2118fail_iput:
2143 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2119 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2144 iput(fs_info->btree_inode); 2120 iput(fs_info->btree_inode);
@@ -2166,11 +2142,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
2166 if (uptodate) { 2142 if (uptodate) {
2167 set_buffer_uptodate(bh); 2143 set_buffer_uptodate(bh);
2168 } else { 2144 } else {
2169 if (printk_ratelimit()) { 2145 printk_ratelimited(KERN_WARNING "lost page write due to "
2170 printk(KERN_WARNING "lost page write due to "
2171 "I/O error on %s\n", 2146 "I/O error on %s\n",
2172 bdevname(bh->b_bdev, b)); 2147 bdevname(bh->b_bdev, b));
2173 }
2174 /* note, we dont' set_buffer_write_io_error because we have 2148 /* note, we dont' set_buffer_write_io_error because we have
2175 * our own ways of dealing with the IO errors 2149 * our own ways of dealing with the IO errors
2176 */ 2150 */
@@ -2405,12 +2379,15 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2405 if (btrfs_root_refs(&root->root_item) == 0) 2379 if (btrfs_root_refs(&root->root_item) == 0)
2406 synchronize_srcu(&fs_info->subvol_srcu); 2380 synchronize_srcu(&fs_info->subvol_srcu);
2407 2381
2382 __btrfs_remove_free_space_cache(root->free_ino_pinned);
2383 __btrfs_remove_free_space_cache(root->free_ino_ctl);
2408 free_fs_root(root); 2384 free_fs_root(root);
2409 return 0; 2385 return 0;
2410} 2386}
2411 2387
2412static void free_fs_root(struct btrfs_root *root) 2388static void free_fs_root(struct btrfs_root *root)
2413{ 2389{
2390 iput(root->cache_inode);
2414 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); 2391 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
2415 if (root->anon_super.s_dev) { 2392 if (root->anon_super.s_dev) {
2416 down_write(&root->anon_super.s_umount); 2393 down_write(&root->anon_super.s_umount);
@@ -2418,6 +2395,8 @@ static void free_fs_root(struct btrfs_root *root)
2418 } 2395 }
2419 free_extent_buffer(root->node); 2396 free_extent_buffer(root->node);
2420 free_extent_buffer(root->commit_root); 2397 free_extent_buffer(root->commit_root);
2398 kfree(root->free_ino_ctl);
2399 kfree(root->free_ino_pinned);
2421 kfree(root->name); 2400 kfree(root->name);
2422 kfree(root); 2401 kfree(root);
2423} 2402}
@@ -2521,6 +2500,7 @@ int close_ctree(struct btrfs_root *root)
2521 fs_info->closing = 1; 2500 fs_info->closing = 1;
2522 smp_mb(); 2501 smp_mb();
2523 2502
2503 btrfs_scrub_cancel(root);
2524 btrfs_put_block_group_cache(fs_info); 2504 btrfs_put_block_group_cache(fs_info);
2525 2505
2526 /* 2506 /*
@@ -2579,6 +2559,7 @@ int close_ctree(struct btrfs_root *root)
2579 del_fs_roots(fs_info); 2559 del_fs_roots(fs_info);
2580 2560
2581 iput(fs_info->btree_inode); 2561 iput(fs_info->btree_inode);
2562 kfree(fs_info->delayed_root);
2582 2563
2583 btrfs_stop_workers(&fs_info->generic_worker); 2564 btrfs_stop_workers(&fs_info->generic_worker);
2584 btrfs_stop_workers(&fs_info->fixup_workers); 2565 btrfs_stop_workers(&fs_info->fixup_workers);
@@ -2590,6 +2571,7 @@ int close_ctree(struct btrfs_root *root)
2590 btrfs_stop_workers(&fs_info->endio_write_workers); 2571 btrfs_stop_workers(&fs_info->endio_write_workers);
2591 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2572 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2592 btrfs_stop_workers(&fs_info->submit_workers); 2573 btrfs_stop_workers(&fs_info->submit_workers);
2574 btrfs_stop_workers(&fs_info->delayed_workers);
2593 2575
2594 btrfs_close_devices(fs_info->fs_devices); 2576 btrfs_close_devices(fs_info->fs_devices);
2595 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2577 btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -2666,6 +2648,29 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2666 if (current->flags & PF_MEMALLOC) 2648 if (current->flags & PF_MEMALLOC)
2667 return; 2649 return;
2668 2650
2651 btrfs_balance_delayed_items(root);
2652
2653 num_dirty = root->fs_info->dirty_metadata_bytes;
2654
2655 if (num_dirty > thresh) {
2656 balance_dirty_pages_ratelimited_nr(
2657 root->fs_info->btree_inode->i_mapping, 1);
2658 }
2659 return;
2660}
2661
2662void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2663{
2664 /*
2665 * looks as though older kernels can get into trouble with
2666 * this code, they end up stuck in balance_dirty_pages forever
2667 */
2668 u64 num_dirty;
2669 unsigned long thresh = 32 * 1024 * 1024;
2670
2671 if (current->flags & PF_MEMALLOC)
2672 return;
2673
2669 num_dirty = root->fs_info->dirty_metadata_bytes; 2674 num_dirty = root->fs_info->dirty_metadata_bytes;
2670 2675
2671 if (num_dirty > thresh) { 2676 if (num_dirty > thresh) {
@@ -2698,7 +2703,7 @@ int btree_lock_page_hook(struct page *page)
2698 goto out; 2703 goto out;
2699 2704
2700 len = page->private >> 2; 2705 len = page->private >> 2;
2701 eb = find_extent_buffer(io_tree, bytenr, len, GFP_NOFS); 2706 eb = find_extent_buffer(io_tree, bytenr, len);
2702 if (!eb) 2707 if (!eb)
2703 goto out; 2708 goto out;
2704 2709
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 07b20dc2fd95..a0b610a67aae 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -55,35 +55,20 @@ int btrfs_commit_super(struct btrfs_root *root);
55int btrfs_error_commit_super(struct btrfs_root *root); 55int btrfs_error_commit_super(struct btrfs_root *root);
56struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 56struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
57 u64 bytenr, u32 blocksize); 57 u64 bytenr, u32 blocksize);
58struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
59 u64 root_objectid);
60struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
61 struct btrfs_key *location,
62 const char *name, int namelen);
63struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, 58struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
64 struct btrfs_key *location); 59 struct btrfs_key *location);
65struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, 60struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
66 struct btrfs_key *location); 61 struct btrfs_key *location);
67int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); 62int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
68int btrfs_insert_dev_radix(struct btrfs_root *root,
69 struct block_device *bdev,
70 u64 device_id,
71 u64 block_start,
72 u64 num_blocks);
73void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); 63void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
64void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
74int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); 65int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
75void btrfs_mark_buffer_dirty(struct extent_buffer *buf); 66void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
76void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf);
77int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); 67int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
78int btrfs_set_buffer_uptodate(struct extent_buffer *buf); 68int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
79int wait_on_tree_block_writeback(struct btrfs_root *root,
80 struct extent_buffer *buf);
81int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); 69int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
82u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); 70u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
83void btrfs_csum_final(u32 crc, char *result); 71void btrfs_csum_final(u32 crc, char *result);
84int btrfs_open_device(struct btrfs_device *dev);
85int btrfs_verify_block_csum(struct btrfs_root *root,
86 struct extent_buffer *buf);
87int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 72int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
88 int metadata); 73 int metadata);
89int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, 74int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
@@ -91,8 +76,6 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
91 unsigned long bio_flags, u64 bio_offset, 76 unsigned long bio_flags, u64 bio_offset,
92 extent_submit_bio_hook_t *submit_bio_start, 77 extent_submit_bio_hook_t *submit_bio_start,
93 extent_submit_bio_hook_t *submit_bio_done); 78 extent_submit_bio_hook_t *submit_bio_done);
94
95int btrfs_congested_async(struct btrfs_fs_info *info, int iodone);
96unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); 79unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
97int btrfs_write_tree_block(struct extent_buffer *buf); 80int btrfs_write_tree_block(struct extent_buffer *buf);
98int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); 81int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index b4ffad859adb..1b8dc33778f9 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -32,7 +32,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
32 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 32 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
33 type = FILEID_BTRFS_WITHOUT_PARENT; 33 type = FILEID_BTRFS_WITHOUT_PARENT;
34 34
35 fid->objectid = inode->i_ino; 35 fid->objectid = btrfs_ino(inode);
36 fid->root_objectid = BTRFS_I(inode)->root->objectid; 36 fid->root_objectid = BTRFS_I(inode)->root->objectid;
37 fid->gen = inode->i_generation; 37 fid->gen = inode->i_generation;
38 38
@@ -178,13 +178,13 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
178 if (!path) 178 if (!path)
179 return ERR_PTR(-ENOMEM); 179 return ERR_PTR(-ENOMEM);
180 180
181 if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 181 if (btrfs_ino(dir) == BTRFS_FIRST_FREE_OBJECTID) {
182 key.objectid = root->root_key.objectid; 182 key.objectid = root->root_key.objectid;
183 key.type = BTRFS_ROOT_BACKREF_KEY; 183 key.type = BTRFS_ROOT_BACKREF_KEY;
184 key.offset = (u64)-1; 184 key.offset = (u64)-1;
185 root = root->fs_info->tree_root; 185 root = root->fs_info->tree_root;
186 } else { 186 } else {
187 key.objectid = dir->i_ino; 187 key.objectid = btrfs_ino(dir);
188 key.type = BTRFS_INODE_REF_KEY; 188 key.type = BTRFS_INODE_REF_KEY;
189 key.offset = (u64)-1; 189 key.offset = (u64)-1;
190 } 190 }
@@ -244,6 +244,7 @@ static int btrfs_get_name(struct dentry *parent, char *name,
244 struct btrfs_key key; 244 struct btrfs_key key;
245 int name_len; 245 int name_len;
246 int ret; 246 int ret;
247 u64 ino;
247 248
248 if (!dir || !inode) 249 if (!dir || !inode)
249 return -EINVAL; 250 return -EINVAL;
@@ -251,19 +252,21 @@ static int btrfs_get_name(struct dentry *parent, char *name,
251 if (!S_ISDIR(dir->i_mode)) 252 if (!S_ISDIR(dir->i_mode))
252 return -EINVAL; 253 return -EINVAL;
253 254
255 ino = btrfs_ino(inode);
256
254 path = btrfs_alloc_path(); 257 path = btrfs_alloc_path();
255 if (!path) 258 if (!path)
256 return -ENOMEM; 259 return -ENOMEM;
257 path->leave_spinning = 1; 260 path->leave_spinning = 1;
258 261
259 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 262 if (ino == BTRFS_FIRST_FREE_OBJECTID) {
260 key.objectid = BTRFS_I(inode)->root->root_key.objectid; 263 key.objectid = BTRFS_I(inode)->root->root_key.objectid;
261 key.type = BTRFS_ROOT_BACKREF_KEY; 264 key.type = BTRFS_ROOT_BACKREF_KEY;
262 key.offset = (u64)-1; 265 key.offset = (u64)-1;
263 root = root->fs_info->tree_root; 266 root = root->fs_info->tree_root;
264 } else { 267 } else {
265 key.objectid = inode->i_ino; 268 key.objectid = ino;
266 key.offset = dir->i_ino; 269 key.offset = btrfs_ino(dir);
267 key.type = BTRFS_INODE_REF_KEY; 270 key.type = BTRFS_INODE_REF_KEY;
268 } 271 }
269 272
@@ -272,7 +275,7 @@ static int btrfs_get_name(struct dentry *parent, char *name,
272 btrfs_free_path(path); 275 btrfs_free_path(path);
273 return ret; 276 return ret;
274 } else if (ret > 0) { 277 } else if (ret > 0) {
275 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 278 if (ino == BTRFS_FIRST_FREE_OBJECTID) {
276 path->slots[0]--; 279 path->slots[0]--;
277 } else { 280 } else {
278 btrfs_free_path(path); 281 btrfs_free_path(path);
@@ -281,11 +284,11 @@ static int btrfs_get_name(struct dentry *parent, char *name,
281 } 284 }
282 leaf = path->nodes[0]; 285 leaf = path->nodes[0];
283 286
284 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 287 if (ino == BTRFS_FIRST_FREE_OBJECTID) {
285 rref = btrfs_item_ptr(leaf, path->slots[0], 288 rref = btrfs_item_ptr(leaf, path->slots[0],
286 struct btrfs_root_ref); 289 struct btrfs_root_ref);
287 name_ptr = (unsigned long)(rref + 1); 290 name_ptr = (unsigned long)(rref + 1);
288 name_len = btrfs_root_ref_name_len(leaf, rref); 291 name_len = btrfs_root_ref_name_len(leaf, rref);
289 } else { 292 } else {
290 iref = btrfs_item_ptr(leaf, path->slots[0], 293 iref = btrfs_item_ptr(leaf, path->slots[0],
291 struct btrfs_inode_ref); 294 struct btrfs_inode_ref);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 103e141afeb3..169bd62ce776 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -94,7 +94,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
94 return (cache->flags & bits) == bits; 94 return (cache->flags & bits) == bits;
95} 95}
96 96
97void btrfs_get_block_group(struct btrfs_block_group_cache *cache) 97static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
98{ 98{
99 atomic_inc(&cache->count); 99 atomic_inc(&cache->count);
100} 100}
@@ -105,6 +105,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
105 WARN_ON(cache->pinned > 0); 105 WARN_ON(cache->pinned > 0);
106 WARN_ON(cache->reserved > 0); 106 WARN_ON(cache->reserved > 0);
107 WARN_ON(cache->reserved_pinned > 0); 107 WARN_ON(cache->reserved_pinned > 0);
108 kfree(cache->free_space_ctl);
108 kfree(cache); 109 kfree(cache);
109 } 110 }
110} 111}
@@ -379,7 +380,7 @@ again:
379 break; 380 break;
380 381
381 caching_ctl->progress = last; 382 caching_ctl->progress = last;
382 btrfs_release_path(extent_root, path); 383 btrfs_release_path(path);
383 up_read(&fs_info->extent_commit_sem); 384 up_read(&fs_info->extent_commit_sem);
384 mutex_unlock(&caching_ctl->mutex); 385 mutex_unlock(&caching_ctl->mutex);
385 if (btrfs_transaction_in_commit(fs_info)) 386 if (btrfs_transaction_in_commit(fs_info))
@@ -754,8 +755,12 @@ again:
754 atomic_inc(&head->node.refs); 755 atomic_inc(&head->node.refs);
755 spin_unlock(&delayed_refs->lock); 756 spin_unlock(&delayed_refs->lock);
756 757
757 btrfs_release_path(root->fs_info->extent_root, path); 758 btrfs_release_path(path);
758 759
760 /*
761 * Mutex was contended, block until it's released and try
762 * again
763 */
759 mutex_lock(&head->mutex); 764 mutex_lock(&head->mutex);
760 mutex_unlock(&head->mutex); 765 mutex_unlock(&head->mutex);
761 btrfs_put_delayed_ref(&head->node); 766 btrfs_put_delayed_ref(&head->node);
@@ -934,7 +939,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
934 break; 939 break;
935 } 940 }
936 } 941 }
937 btrfs_release_path(root, path); 942 btrfs_release_path(path);
938 943
939 if (owner < BTRFS_FIRST_FREE_OBJECTID) 944 if (owner < BTRFS_FIRST_FREE_OBJECTID)
940 new_size += sizeof(*bi); 945 new_size += sizeof(*bi);
@@ -1041,7 +1046,7 @@ again:
1041 return 0; 1046 return 0;
1042#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1047#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1043 key.type = BTRFS_EXTENT_REF_V0_KEY; 1048 key.type = BTRFS_EXTENT_REF_V0_KEY;
1044 btrfs_release_path(root, path); 1049 btrfs_release_path(path);
1045 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1050 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1046 if (ret < 0) { 1051 if (ret < 0) {
1047 err = ret; 1052 err = ret;
@@ -1079,7 +1084,7 @@ again:
1079 if (match_extent_data_ref(leaf, ref, root_objectid, 1084 if (match_extent_data_ref(leaf, ref, root_objectid,
1080 owner, offset)) { 1085 owner, offset)) {
1081 if (recow) { 1086 if (recow) {
1082 btrfs_release_path(root, path); 1087 btrfs_release_path(path);
1083 goto again; 1088 goto again;
1084 } 1089 }
1085 err = 0; 1090 err = 0;
@@ -1140,7 +1145,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1140 if (match_extent_data_ref(leaf, ref, root_objectid, 1145 if (match_extent_data_ref(leaf, ref, root_objectid,
1141 owner, offset)) 1146 owner, offset))
1142 break; 1147 break;
1143 btrfs_release_path(root, path); 1148 btrfs_release_path(path);
1144 key.offset++; 1149 key.offset++;
1145 ret = btrfs_insert_empty_item(trans, root, path, &key, 1150 ret = btrfs_insert_empty_item(trans, root, path, &key,
1146 size); 1151 size);
@@ -1166,7 +1171,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1166 btrfs_mark_buffer_dirty(leaf); 1171 btrfs_mark_buffer_dirty(leaf);
1167 ret = 0; 1172 ret = 0;
1168fail: 1173fail:
1169 btrfs_release_path(root, path); 1174 btrfs_release_path(path);
1170 return ret; 1175 return ret;
1171} 1176}
1172 1177
@@ -1292,7 +1297,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1292 ret = -ENOENT; 1297 ret = -ENOENT;
1293#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1298#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1294 if (ret == -ENOENT && parent) { 1299 if (ret == -ENOENT && parent) {
1295 btrfs_release_path(root, path); 1300 btrfs_release_path(path);
1296 key.type = BTRFS_EXTENT_REF_V0_KEY; 1301 key.type = BTRFS_EXTENT_REF_V0_KEY;
1297 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1302 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1298 if (ret > 0) 1303 if (ret > 0)
@@ -1321,7 +1326,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1321 } 1326 }
1322 1327
1323 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1328 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1324 btrfs_release_path(root, path); 1329 btrfs_release_path(path);
1325 return ret; 1330 return ret;
1326} 1331}
1327 1332
@@ -1606,7 +1611,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1606 if (ret != -ENOENT) 1611 if (ret != -ENOENT)
1607 return ret; 1612 return ret;
1608 1613
1609 btrfs_release_path(root, path); 1614 btrfs_release_path(path);
1610 *ref_ret = NULL; 1615 *ref_ret = NULL;
1611 1616
1612 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1617 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
@@ -1859,7 +1864,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1859 __run_delayed_extent_op(extent_op, leaf, item); 1864 __run_delayed_extent_op(extent_op, leaf, item);
1860 1865
1861 btrfs_mark_buffer_dirty(leaf); 1866 btrfs_mark_buffer_dirty(leaf);
1862 btrfs_release_path(root->fs_info->extent_root, path); 1867 btrfs_release_path(path);
1863 1868
1864 path->reada = 1; 1869 path->reada = 1;
1865 path->leave_spinning = 1; 1870 path->leave_spinning = 1;
@@ -2294,6 +2299,10 @@ again:
2294 atomic_inc(&ref->refs); 2299 atomic_inc(&ref->refs);
2295 2300
2296 spin_unlock(&delayed_refs->lock); 2301 spin_unlock(&delayed_refs->lock);
2302 /*
2303 * Mutex was contended, block until it's
2304 * released and try again
2305 */
2297 mutex_lock(&head->mutex); 2306 mutex_lock(&head->mutex);
2298 mutex_unlock(&head->mutex); 2307 mutex_unlock(&head->mutex);
2299 2308
@@ -2358,8 +2367,12 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2358 atomic_inc(&head->node.refs); 2367 atomic_inc(&head->node.refs);
2359 spin_unlock(&delayed_refs->lock); 2368 spin_unlock(&delayed_refs->lock);
2360 2369
2361 btrfs_release_path(root->fs_info->extent_root, path); 2370 btrfs_release_path(path);
2362 2371
2372 /*
2373 * Mutex was contended, block until it's released and let
2374 * caller try again
2375 */
2363 mutex_lock(&head->mutex); 2376 mutex_lock(&head->mutex);
2364 mutex_unlock(&head->mutex); 2377 mutex_unlock(&head->mutex);
2365 btrfs_put_delayed_ref(&head->node); 2378 btrfs_put_delayed_ref(&head->node);
@@ -2507,126 +2520,6 @@ out:
2507 return ret; 2520 return ret;
2508} 2521}
2509 2522
2510#if 0
2511int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2512 struct extent_buffer *buf, u32 nr_extents)
2513{
2514 struct btrfs_key key;
2515 struct btrfs_file_extent_item *fi;
2516 u64 root_gen;
2517 u32 nritems;
2518 int i;
2519 int level;
2520 int ret = 0;
2521 int shared = 0;
2522
2523 if (!root->ref_cows)
2524 return 0;
2525
2526 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2527 shared = 0;
2528 root_gen = root->root_key.offset;
2529 } else {
2530 shared = 1;
2531 root_gen = trans->transid - 1;
2532 }
2533
2534 level = btrfs_header_level(buf);
2535 nritems = btrfs_header_nritems(buf);
2536
2537 if (level == 0) {
2538 struct btrfs_leaf_ref *ref;
2539 struct btrfs_extent_info *info;
2540
2541 ref = btrfs_alloc_leaf_ref(root, nr_extents);
2542 if (!ref) {
2543 ret = -ENOMEM;
2544 goto out;
2545 }
2546
2547 ref->root_gen = root_gen;
2548 ref->bytenr = buf->start;
2549 ref->owner = btrfs_header_owner(buf);
2550 ref->generation = btrfs_header_generation(buf);
2551 ref->nritems = nr_extents;
2552 info = ref->extents;
2553
2554 for (i = 0; nr_extents > 0 && i < nritems; i++) {
2555 u64 disk_bytenr;
2556 btrfs_item_key_to_cpu(buf, &key, i);
2557 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2558 continue;
2559 fi = btrfs_item_ptr(buf, i,
2560 struct btrfs_file_extent_item);
2561 if (btrfs_file_extent_type(buf, fi) ==
2562 BTRFS_FILE_EXTENT_INLINE)
2563 continue;
2564 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2565 if (disk_bytenr == 0)
2566 continue;
2567
2568 info->bytenr = disk_bytenr;
2569 info->num_bytes =
2570 btrfs_file_extent_disk_num_bytes(buf, fi);
2571 info->objectid = key.objectid;
2572 info->offset = key.offset;
2573 info++;
2574 }
2575
2576 ret = btrfs_add_leaf_ref(root, ref, shared);
2577 if (ret == -EEXIST && shared) {
2578 struct btrfs_leaf_ref *old;
2579 old = btrfs_lookup_leaf_ref(root, ref->bytenr);
2580 BUG_ON(!old);
2581 btrfs_remove_leaf_ref(root, old);
2582 btrfs_free_leaf_ref(root, old);
2583 ret = btrfs_add_leaf_ref(root, ref, shared);
2584 }
2585 WARN_ON(ret);
2586 btrfs_free_leaf_ref(root, ref);
2587 }
2588out:
2589 return ret;
2590}
2591
2592/* when a block goes through cow, we update the reference counts of
2593 * everything that block points to. The internal pointers of the block
2594 * can be in just about any order, and it is likely to have clusters of
2595 * things that are close together and clusters of things that are not.
2596 *
2597 * To help reduce the seeks that come with updating all of these reference
2598 * counts, sort them by byte number before actual updates are done.
2599 *
2600 * struct refsort is used to match byte number to slot in the btree block.
2601 * we sort based on the byte number and then use the slot to actually
2602 * find the item.
2603 *
2604 * struct refsort is smaller than strcut btrfs_item and smaller than
2605 * struct btrfs_key_ptr. Since we're currently limited to the page size
2606 * for a btree block, there's no way for a kmalloc of refsorts for a
2607 * single node to be bigger than a page.
2608 */
2609struct refsort {
2610 u64 bytenr;
2611 u32 slot;
2612};
2613
2614/*
2615 * for passing into sort()
2616 */
2617static int refsort_cmp(const void *a_void, const void *b_void)
2618{
2619 const struct refsort *a = a_void;
2620 const struct refsort *b = b_void;
2621
2622 if (a->bytenr < b->bytenr)
2623 return -1;
2624 if (a->bytenr > b->bytenr)
2625 return 1;
2626 return 0;
2627}
2628#endif
2629
2630static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2523static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2631 struct btrfs_root *root, 2524 struct btrfs_root *root,
2632 struct extent_buffer *buf, 2525 struct extent_buffer *buf,
@@ -2729,7 +2622,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
2729 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2622 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
2730 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2623 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
2731 btrfs_mark_buffer_dirty(leaf); 2624 btrfs_mark_buffer_dirty(leaf);
2732 btrfs_release_path(extent_root, path); 2625 btrfs_release_path(path);
2733fail: 2626fail:
2734 if (ret) 2627 if (ret)
2735 return ret; 2628 return ret;
@@ -2782,7 +2675,7 @@ again:
2782 inode = lookup_free_space_inode(root, block_group, path); 2675 inode = lookup_free_space_inode(root, block_group, path);
2783 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 2676 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
2784 ret = PTR_ERR(inode); 2677 ret = PTR_ERR(inode);
2785 btrfs_release_path(root, path); 2678 btrfs_release_path(path);
2786 goto out; 2679 goto out;
2787 } 2680 }
2788 2681
@@ -2851,7 +2744,7 @@ again:
2851out_put: 2744out_put:
2852 iput(inode); 2745 iput(inode);
2853out_free: 2746out_free:
2854 btrfs_release_path(root, path); 2747 btrfs_release_path(path);
2855out: 2748out:
2856 spin_lock(&block_group->lock); 2749 spin_lock(&block_group->lock);
2857 block_group->disk_cache_state = dcs; 2750 block_group->disk_cache_state = dcs;
@@ -3141,7 +3034,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3141 /* make sure bytes are sectorsize aligned */ 3034 /* make sure bytes are sectorsize aligned */
3142 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3035 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
3143 3036
3144 if (root == root->fs_info->tree_root) { 3037 if (root == root->fs_info->tree_root ||
3038 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
3145 alloc_chunk = 0; 3039 alloc_chunk = 0;
3146 committed = 1; 3040 committed = 1;
3147 } 3041 }
@@ -3208,18 +3102,6 @@ commit_trans:
3208 goto again; 3102 goto again;
3209 } 3103 }
3210 3104
3211#if 0 /* I hope we never need this code again, just in case */
3212 printk(KERN_ERR "no space left, need %llu, %llu bytes_used, "
3213 "%llu bytes_reserved, " "%llu bytes_pinned, "
3214 "%llu bytes_readonly, %llu may use %llu total\n",
3215 (unsigned long long)bytes,
3216 (unsigned long long)data_sinfo->bytes_used,
3217 (unsigned long long)data_sinfo->bytes_reserved,
3218 (unsigned long long)data_sinfo->bytes_pinned,
3219 (unsigned long long)data_sinfo->bytes_readonly,
3220 (unsigned long long)data_sinfo->bytes_may_use,
3221 (unsigned long long)data_sinfo->total_bytes);
3222#endif
3223 return -ENOSPC; 3105 return -ENOSPC;
3224 } 3106 }
3225 data_sinfo->bytes_may_use += bytes; 3107 data_sinfo->bytes_may_use += bytes;
@@ -3652,8 +3534,8 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
3652 spin_unlock(&block_rsv->lock); 3534 spin_unlock(&block_rsv->lock);
3653} 3535}
3654 3536
3655void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, 3537static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3656 struct btrfs_block_rsv *dest, u64 num_bytes) 3538 struct btrfs_block_rsv *dest, u64 num_bytes)
3657{ 3539{
3658 struct btrfs_space_info *space_info = block_rsv->space_info; 3540 struct btrfs_space_info *space_info = block_rsv->space_info;
3659 3541
@@ -3856,23 +3738,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
3856 u64 meta_used; 3738 u64 meta_used;
3857 u64 data_used; 3739 u64 data_used;
3858 int csum_size = btrfs_super_csum_size(&fs_info->super_copy); 3740 int csum_size = btrfs_super_csum_size(&fs_info->super_copy);
3859#if 0
3860 /*
3861 * per tree used space accounting can be inaccuracy, so we
3862 * can't rely on it.
3863 */
3864 spin_lock(&fs_info->extent_root->accounting_lock);
3865 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item);
3866 spin_unlock(&fs_info->extent_root->accounting_lock);
3867 3741
3868 spin_lock(&fs_info->csum_root->accounting_lock);
3869 num_bytes += btrfs_root_used(&fs_info->csum_root->root_item);
3870 spin_unlock(&fs_info->csum_root->accounting_lock);
3871
3872 spin_lock(&fs_info->tree_root->accounting_lock);
3873 num_bytes += btrfs_root_used(&fs_info->tree_root->root_item);
3874 spin_unlock(&fs_info->tree_root->accounting_lock);
3875#endif
3876 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); 3742 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
3877 spin_lock(&sinfo->lock); 3743 spin_lock(&sinfo->lock);
3878 data_used = sinfo->bytes_used; 3744 data_used = sinfo->bytes_used;
@@ -3925,10 +3791,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3925 block_rsv->reserved = block_rsv->size; 3791 block_rsv->reserved = block_rsv->size;
3926 block_rsv->full = 1; 3792 block_rsv->full = 1;
3927 } 3793 }
3928#if 0 3794
3929 printk(KERN_INFO"global block rsv size %llu reserved %llu\n",
3930 block_rsv->size, block_rsv->reserved);
3931#endif
3932 spin_unlock(&sinfo->lock); 3795 spin_unlock(&sinfo->lock);
3933 spin_unlock(&block_rsv->lock); 3796 spin_unlock(&block_rsv->lock);
3934} 3797}
@@ -3974,12 +3837,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3974 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3837 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3975} 3838}
3976 3839
3977static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
3978{
3979 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3980 3 * num_items;
3981}
3982
3983int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3840int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3984 struct btrfs_root *root, 3841 struct btrfs_root *root,
3985 int num_items) 3842 int num_items)
@@ -3990,7 +3847,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3990 if (num_items == 0 || root->fs_info->chunk_root == root) 3847 if (num_items == 0 || root->fs_info->chunk_root == root)
3991 return 0; 3848 return 0;
3992 3849
3993 num_bytes = calc_trans_metadata_size(root, num_items); 3850 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
3994 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3851 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
3995 num_bytes); 3852 num_bytes);
3996 if (!ret) { 3853 if (!ret) {
@@ -4029,14 +3886,14 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
4029 * If all of the metadata space is used, we can commit 3886 * If all of the metadata space is used, we can commit
4030 * transaction and use space it freed. 3887 * transaction and use space it freed.
4031 */ 3888 */
4032 u64 num_bytes = calc_trans_metadata_size(root, 4); 3889 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
4033 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3890 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4034} 3891}
4035 3892
4036void btrfs_orphan_release_metadata(struct inode *inode) 3893void btrfs_orphan_release_metadata(struct inode *inode)
4037{ 3894{
4038 struct btrfs_root *root = BTRFS_I(inode)->root; 3895 struct btrfs_root *root = BTRFS_I(inode)->root;
4039 u64 num_bytes = calc_trans_metadata_size(root, 4); 3896 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
4040 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3897 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
4041} 3898}
4042 3899
@@ -4050,7 +3907,7 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
4050 * two for root back/forward refs, two for directory entries 3907 * two for root back/forward refs, two for directory entries
4051 * and one for root of the snapshot. 3908 * and one for root of the snapshot.
4052 */ 3909 */
4053 u64 num_bytes = calc_trans_metadata_size(root, 5); 3910 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
4054 dst_rsv->space_info = src_rsv->space_info; 3911 dst_rsv->space_info = src_rsv->space_info;
4055 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3912 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4056} 3913}
@@ -4079,7 +3936,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4079 3936
4080 if (nr_extents > reserved_extents) { 3937 if (nr_extents > reserved_extents) {
4081 nr_extents -= reserved_extents; 3938 nr_extents -= reserved_extents;
4082 to_reserve = calc_trans_metadata_size(root, nr_extents); 3939 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
4083 } else { 3940 } else {
4084 nr_extents = 0; 3941 nr_extents = 0;
4085 to_reserve = 0; 3942 to_reserve = 0;
@@ -4133,7 +3990,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4133 3990
4134 to_free = calc_csum_metadata_size(inode, num_bytes); 3991 to_free = calc_csum_metadata_size(inode, num_bytes);
4135 if (nr_extents > 0) 3992 if (nr_extents > 0)
4136 to_free += calc_trans_metadata_size(root, nr_extents); 3993 to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
4137 3994
4138 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 3995 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
4139 to_free); 3996 to_free);
@@ -4542,7 +4399,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4542 NULL, refs_to_drop, 4399 NULL, refs_to_drop,
4543 is_data); 4400 is_data);
4544 BUG_ON(ret); 4401 BUG_ON(ret);
4545 btrfs_release_path(extent_root, path); 4402 btrfs_release_path(path);
4546 path->leave_spinning = 1; 4403 path->leave_spinning = 1;
4547 4404
4548 key.objectid = bytenr; 4405 key.objectid = bytenr;
@@ -4581,7 +4438,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4581 owner_objectid, 0); 4438 owner_objectid, 0);
4582 BUG_ON(ret < 0); 4439 BUG_ON(ret < 0);
4583 4440
4584 btrfs_release_path(extent_root, path); 4441 btrfs_release_path(path);
4585 path->leave_spinning = 1; 4442 path->leave_spinning = 1;
4586 4443
4587 key.objectid = bytenr; 4444 key.objectid = bytenr;
@@ -4651,7 +4508,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4651 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 4508 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
4652 num_to_del); 4509 num_to_del);
4653 BUG_ON(ret); 4510 BUG_ON(ret);
4654 btrfs_release_path(extent_root, path); 4511 btrfs_release_path(path);
4655 4512
4656 if (is_data) { 4513 if (is_data) {
4657 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 4514 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
@@ -4894,7 +4751,7 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
4894 return 0; 4751 return 0;
4895 4752
4896 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 4753 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
4897 (cache->free_space >= num_bytes)); 4754 (cache->free_space_ctl->free_space >= num_bytes));
4898 4755
4899 put_caching_control(caching_ctl); 4756 put_caching_control(caching_ctl);
4900 return 0; 4757 return 0;
@@ -6481,7 +6338,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6481 trans->block_rsv = block_rsv; 6338 trans->block_rsv = block_rsv;
6482 } 6339 }
6483 } 6340 }
6484 btrfs_release_path(root, path); 6341 btrfs_release_path(path);
6485 BUG_ON(err); 6342 BUG_ON(err);
6486 6343
6487 ret = btrfs_del_root(trans, tree_root, &root->root_key); 6344 ret = btrfs_del_root(trans, tree_root, &root->root_key);
@@ -6585,1514 +6442,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6585 return ret; 6442 return ret;
6586} 6443}
6587 6444
6588#if 0
6589static unsigned long calc_ra(unsigned long start, unsigned long last,
6590 unsigned long nr)
6591{
6592 return min(last, start + nr - 1);
6593}
6594
6595static noinline int relocate_inode_pages(struct inode *inode, u64 start,
6596 u64 len)
6597{
6598 u64 page_start;
6599 u64 page_end;
6600 unsigned long first_index;
6601 unsigned long last_index;
6602 unsigned long i;
6603 struct page *page;
6604 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
6605 struct file_ra_state *ra;
6606 struct btrfs_ordered_extent *ordered;
6607 unsigned int total_read = 0;
6608 unsigned int total_dirty = 0;
6609 int ret = 0;
6610
6611 ra = kzalloc(sizeof(*ra), GFP_NOFS);
6612 if (!ra)
6613 return -ENOMEM;
6614
6615 mutex_lock(&inode->i_mutex);
6616 first_index = start >> PAGE_CACHE_SHIFT;
6617 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
6618
6619 /* make sure the dirty trick played by the caller work */
6620 ret = invalidate_inode_pages2_range(inode->i_mapping,
6621 first_index, last_index);
6622 if (ret)
6623 goto out_unlock;
6624
6625 file_ra_state_init(ra, inode->i_mapping);
6626
6627 for (i = first_index ; i <= last_index; i++) {
6628 if (total_read % ra->ra_pages == 0) {
6629 btrfs_force_ra(inode->i_mapping, ra, NULL, i,
6630 calc_ra(i, last_index, ra->ra_pages));
6631 }
6632 total_read++;
6633again:
6634 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
6635 BUG_ON(1);
6636 page = grab_cache_page(inode->i_mapping, i);
6637 if (!page) {
6638 ret = -ENOMEM;
6639 goto out_unlock;
6640 }
6641 if (!PageUptodate(page)) {
6642 btrfs_readpage(NULL, page);
6643 lock_page(page);
6644 if (!PageUptodate(page)) {
6645 unlock_page(page);
6646 page_cache_release(page);
6647 ret = -EIO;
6648 goto out_unlock;
6649 }
6650 }
6651 wait_on_page_writeback(page);
6652
6653 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
6654 page_end = page_start + PAGE_CACHE_SIZE - 1;
6655 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
6656
6657 ordered = btrfs_lookup_ordered_extent(inode, page_start);
6658 if (ordered) {
6659 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
6660 unlock_page(page);
6661 page_cache_release(page);
6662 btrfs_start_ordered_extent(inode, ordered, 1);
6663 btrfs_put_ordered_extent(ordered);
6664 goto again;
6665 }
6666 set_page_extent_mapped(page);
6667
6668 if (i == first_index)
6669 set_extent_bits(io_tree, page_start, page_end,
6670 EXTENT_BOUNDARY, GFP_NOFS);
6671 btrfs_set_extent_delalloc(inode, page_start, page_end);
6672
6673 set_page_dirty(page);
6674 total_dirty++;
6675
6676 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
6677 unlock_page(page);
6678 page_cache_release(page);
6679 }
6680
6681out_unlock:
6682 kfree(ra);
6683 mutex_unlock(&inode->i_mutex);
6684 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
6685 return ret;
6686}
6687
6688static noinline int relocate_data_extent(struct inode *reloc_inode,
6689 struct btrfs_key *extent_key,
6690 u64 offset)
6691{
6692 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
6693 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
6694 struct extent_map *em;
6695 u64 start = extent_key->objectid - offset;
6696 u64 end = start + extent_key->offset - 1;
6697
6698 em = alloc_extent_map(GFP_NOFS);
6699 BUG_ON(!em);
6700
6701 em->start = start;
6702 em->len = extent_key->offset;
6703 em->block_len = extent_key->offset;
6704 em->block_start = extent_key->objectid;
6705 em->bdev = root->fs_info->fs_devices->latest_bdev;
6706 set_bit(EXTENT_FLAG_PINNED, &em->flags);
6707
6708 /* setup extent map to cheat btrfs_readpage */
6709 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
6710 while (1) {
6711 int ret;
6712 write_lock(&em_tree->lock);
6713 ret = add_extent_mapping(em_tree, em);
6714 write_unlock(&em_tree->lock);
6715 if (ret != -EEXIST) {
6716 free_extent_map(em);
6717 break;
6718 }
6719 btrfs_drop_extent_cache(reloc_inode, start, end, 0);
6720 }
6721 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
6722
6723 return relocate_inode_pages(reloc_inode, start, extent_key->offset);
6724}
6725
6726struct btrfs_ref_path {
6727 u64 extent_start;
6728 u64 nodes[BTRFS_MAX_LEVEL];
6729 u64 root_objectid;
6730 u64 root_generation;
6731 u64 owner_objectid;
6732 u32 num_refs;
6733 int lowest_level;
6734 int current_level;
6735 int shared_level;
6736
6737 struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
6738 u64 new_nodes[BTRFS_MAX_LEVEL];
6739};
6740
6741struct disk_extent {
6742 u64 ram_bytes;
6743 u64 disk_bytenr;
6744 u64 disk_num_bytes;
6745 u64 offset;
6746 u64 num_bytes;
6747 u8 compression;
6748 u8 encryption;
6749 u16 other_encoding;
6750};
6751
6752static int is_cowonly_root(u64 root_objectid)
6753{
6754 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
6755 root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
6756 root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
6757 root_objectid == BTRFS_DEV_TREE_OBJECTID ||
6758 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
6759 root_objectid == BTRFS_CSUM_TREE_OBJECTID)
6760 return 1;
6761 return 0;
6762}
6763
6764static noinline int __next_ref_path(struct btrfs_trans_handle *trans,
6765 struct btrfs_root *extent_root,
6766 struct btrfs_ref_path *ref_path,
6767 int first_time)
6768{
6769 struct extent_buffer *leaf;
6770 struct btrfs_path *path;
6771 struct btrfs_extent_ref *ref;
6772 struct btrfs_key key;
6773 struct btrfs_key found_key;
6774 u64 bytenr;
6775 u32 nritems;
6776 int level;
6777 int ret = 1;
6778
6779 path = btrfs_alloc_path();
6780 if (!path)
6781 return -ENOMEM;
6782
6783 if (first_time) {
6784 ref_path->lowest_level = -1;
6785 ref_path->current_level = -1;
6786 ref_path->shared_level = -1;
6787 goto walk_up;
6788 }
6789walk_down:
6790 level = ref_path->current_level - 1;
6791 while (level >= -1) {
6792 u64 parent;
6793 if (level < ref_path->lowest_level)
6794 break;
6795
6796 if (level >= 0)
6797 bytenr = ref_path->nodes[level];
6798 else
6799 bytenr = ref_path->extent_start;
6800 BUG_ON(bytenr == 0);
6801
6802 parent = ref_path->nodes[level + 1];
6803 ref_path->nodes[level + 1] = 0;
6804 ref_path->current_level = level;
6805 BUG_ON(parent == 0);
6806
6807 key.objectid = bytenr;
6808 key.offset = parent + 1;
6809 key.type = BTRFS_EXTENT_REF_KEY;
6810
6811 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6812 if (ret < 0)
6813 goto out;
6814 BUG_ON(ret == 0);
6815
6816 leaf = path->nodes[0];
6817 nritems = btrfs_header_nritems(leaf);
6818 if (path->slots[0] >= nritems) {
6819 ret = btrfs_next_leaf(extent_root, path);
6820 if (ret < 0)
6821 goto out;
6822 if (ret > 0)
6823 goto next;
6824 leaf = path->nodes[0];
6825 }
6826
6827 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6828 if (found_key.objectid == bytenr &&
6829 found_key.type == BTRFS_EXTENT_REF_KEY) {
6830 if (level < ref_path->shared_level)
6831 ref_path->shared_level = level;
6832 goto found;
6833 }
6834next:
6835 level--;
6836 btrfs_release_path(extent_root, path);
6837 cond_resched();
6838 }
6839 /* reached lowest level */
6840 ret = 1;
6841 goto out;
6842walk_up:
6843 level = ref_path->current_level;
6844 while (level < BTRFS_MAX_LEVEL - 1) {
6845 u64 ref_objectid;
6846
6847 if (level >= 0)
6848 bytenr = ref_path->nodes[level];
6849 else
6850 bytenr = ref_path->extent_start;
6851
6852 BUG_ON(bytenr == 0);
6853
6854 key.objectid = bytenr;
6855 key.offset = 0;
6856 key.type = BTRFS_EXTENT_REF_KEY;
6857
6858 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6859 if (ret < 0)
6860 goto out;
6861
6862 leaf = path->nodes[0];
6863 nritems = btrfs_header_nritems(leaf);
6864 if (path->slots[0] >= nritems) {
6865 ret = btrfs_next_leaf(extent_root, path);
6866 if (ret < 0)
6867 goto out;
6868 if (ret > 0) {
6869 /* the extent was freed by someone */
6870 if (ref_path->lowest_level == level)
6871 goto out;
6872 btrfs_release_path(extent_root, path);
6873 goto walk_down;
6874 }
6875 leaf = path->nodes[0];
6876 }
6877
6878 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6879 if (found_key.objectid != bytenr ||
6880 found_key.type != BTRFS_EXTENT_REF_KEY) {
6881 /* the extent was freed by someone */
6882 if (ref_path->lowest_level == level) {
6883 ret = 1;
6884 goto out;
6885 }
6886 btrfs_release_path(extent_root, path);
6887 goto walk_down;
6888 }
6889found:
6890 ref = btrfs_item_ptr(leaf, path->slots[0],
6891 struct btrfs_extent_ref);
6892 ref_objectid = btrfs_ref_objectid(leaf, ref);
6893 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
6894 if (first_time) {
6895 level = (int)ref_objectid;
6896 BUG_ON(level >= BTRFS_MAX_LEVEL);
6897 ref_path->lowest_level = level;
6898 ref_path->current_level = level;
6899 ref_path->nodes[level] = bytenr;
6900 } else {
6901 WARN_ON(ref_objectid != level);
6902 }
6903 } else {
6904 WARN_ON(level != -1);
6905 }
6906 first_time = 0;
6907
6908 if (ref_path->lowest_level == level) {
6909 ref_path->owner_objectid = ref_objectid;
6910 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
6911 }
6912
6913 /*
6914 * the block is tree root or the block isn't in reference
6915 * counted tree.
6916 */
6917 if (found_key.objectid == found_key.offset ||
6918 is_cowonly_root(btrfs_ref_root(leaf, ref))) {
6919 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6920 ref_path->root_generation =
6921 btrfs_ref_generation(leaf, ref);
6922 if (level < 0) {
6923 /* special reference from the tree log */
6924 ref_path->nodes[0] = found_key.offset;
6925 ref_path->current_level = 0;
6926 }
6927 ret = 0;
6928 goto out;
6929 }
6930
6931 level++;
6932 BUG_ON(ref_path->nodes[level] != 0);
6933 ref_path->nodes[level] = found_key.offset;
6934 ref_path->current_level = level;
6935
6936 /*
6937 * the reference was created in the running transaction,
6938 * no need to continue walking up.
6939 */
6940 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
6941 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6942 ref_path->root_generation =
6943 btrfs_ref_generation(leaf, ref);
6944 ret = 0;
6945 goto out;
6946 }
6947
6948 btrfs_release_path(extent_root, path);
6949 cond_resched();
6950 }
6951 /* reached max tree level, but no tree root found. */
6952 BUG();
6953out:
6954 btrfs_free_path(path);
6955 return ret;
6956}
6957
6958static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
6959 struct btrfs_root *extent_root,
6960 struct btrfs_ref_path *ref_path,
6961 u64 extent_start)
6962{
6963 memset(ref_path, 0, sizeof(*ref_path));
6964 ref_path->extent_start = extent_start;
6965
6966 return __next_ref_path(trans, extent_root, ref_path, 1);
6967}
6968
6969static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
6970 struct btrfs_root *extent_root,
6971 struct btrfs_ref_path *ref_path)
6972{
6973 return __next_ref_path(trans, extent_root, ref_path, 0);
6974}
6975
6976static noinline int get_new_locations(struct inode *reloc_inode,
6977 struct btrfs_key *extent_key,
6978 u64 offset, int no_fragment,
6979 struct disk_extent **extents,
6980 int *nr_extents)
6981{
6982 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
6983 struct btrfs_path *path;
6984 struct btrfs_file_extent_item *fi;
6985 struct extent_buffer *leaf;
6986 struct disk_extent *exts = *extents;
6987 struct btrfs_key found_key;
6988 u64 cur_pos;
6989 u64 last_byte;
6990 u32 nritems;
6991 int nr = 0;
6992 int max = *nr_extents;
6993 int ret;
6994
6995 WARN_ON(!no_fragment && *extents);
6996 if (!exts) {
6997 max = 1;
6998 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
6999 if (!exts)
7000 return -ENOMEM;
7001 }
7002
7003 path = btrfs_alloc_path();
7004 if (!path) {
7005 if (exts != *extents)
7006 kfree(exts);
7007 return -ENOMEM;
7008 }
7009
7010 cur_pos = extent_key->objectid - offset;
7011 last_byte = extent_key->objectid + extent_key->offset;
7012 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
7013 cur_pos, 0);
7014 if (ret < 0)
7015 goto out;
7016 if (ret > 0) {
7017 ret = -ENOENT;
7018 goto out;
7019 }
7020
7021 while (1) {
7022 leaf = path->nodes[0];
7023 nritems = btrfs_header_nritems(leaf);
7024 if (path->slots[0] >= nritems) {
7025 ret = btrfs_next_leaf(root, path);
7026 if (ret < 0)
7027 goto out;
7028 if (ret > 0)
7029 break;
7030 leaf = path->nodes[0];
7031 }
7032
7033 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
7034 if (found_key.offset != cur_pos ||
7035 found_key.type != BTRFS_EXTENT_DATA_KEY ||
7036 found_key.objectid != reloc_inode->i_ino)
7037 break;
7038
7039 fi = btrfs_item_ptr(leaf, path->slots[0],
7040 struct btrfs_file_extent_item);
7041 if (btrfs_file_extent_type(leaf, fi) !=
7042 BTRFS_FILE_EXTENT_REG ||
7043 btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
7044 break;
7045
7046 if (nr == max) {
7047 struct disk_extent *old = exts;
7048 max *= 2;
7049 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
7050 if (!exts) {
7051 ret = -ENOMEM;
7052 goto out;
7053 }
7054 memcpy(exts, old, sizeof(*exts) * nr);
7055 if (old != *extents)
7056 kfree(old);
7057 }
7058
7059 exts[nr].disk_bytenr =
7060 btrfs_file_extent_disk_bytenr(leaf, fi);
7061 exts[nr].disk_num_bytes =
7062 btrfs_file_extent_disk_num_bytes(leaf, fi);
7063 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
7064 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7065 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7066 exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
7067 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
7068 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
7069 fi);
7070 BUG_ON(exts[nr].offset > 0);
7071 BUG_ON(exts[nr].compression || exts[nr].encryption);
7072 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
7073
7074 cur_pos += exts[nr].num_bytes;
7075 nr++;
7076
7077 if (cur_pos + offset >= last_byte)
7078 break;
7079
7080 if (no_fragment) {
7081 ret = 1;
7082 goto out;
7083 }
7084 path->slots[0]++;
7085 }
7086
7087 BUG_ON(cur_pos + offset > last_byte);
7088 if (cur_pos + offset < last_byte) {
7089 ret = -ENOENT;
7090 goto out;
7091 }
7092 ret = 0;
7093out:
7094 btrfs_free_path(path);
7095 if (ret) {
7096 if (exts != *extents)
7097 kfree(exts);
7098 } else {
7099 *extents = exts;
7100 *nr_extents = nr;
7101 }
7102 return ret;
7103}
7104
7105static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
7106 struct btrfs_root *root,
7107 struct btrfs_path *path,
7108 struct btrfs_key *extent_key,
7109 struct btrfs_key *leaf_key,
7110 struct btrfs_ref_path *ref_path,
7111 struct disk_extent *new_extents,
7112 int nr_extents)
7113{
7114 struct extent_buffer *leaf;
7115 struct btrfs_file_extent_item *fi;
7116 struct inode *inode = NULL;
7117 struct btrfs_key key;
7118 u64 lock_start = 0;
7119 u64 lock_end = 0;
7120 u64 num_bytes;
7121 u64 ext_offset;
7122 u64 search_end = (u64)-1;
7123 u32 nritems;
7124 int nr_scaned = 0;
7125 int extent_locked = 0;
7126 int extent_type;
7127 int ret;
7128
7129 memcpy(&key, leaf_key, sizeof(key));
7130 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
7131 if (key.objectid < ref_path->owner_objectid ||
7132 (key.objectid == ref_path->owner_objectid &&
7133 key.type < BTRFS_EXTENT_DATA_KEY)) {
7134 key.objectid = ref_path->owner_objectid;
7135 key.type = BTRFS_EXTENT_DATA_KEY;
7136 key.offset = 0;
7137 }
7138 }
7139
7140 while (1) {
7141 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7142 if (ret < 0)
7143 goto out;
7144
7145 leaf = path->nodes[0];
7146 nritems = btrfs_header_nritems(leaf);
7147next:
7148 if (extent_locked && ret > 0) {
7149 /*
7150 * the file extent item was modified by someone
7151 * before the extent got locked.
7152 */
7153 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7154 lock_end, GFP_NOFS);
7155 extent_locked = 0;
7156 }
7157
7158 if (path->slots[0] >= nritems) {
7159 if (++nr_scaned > 2)
7160 break;
7161
7162 BUG_ON(extent_locked);
7163 ret = btrfs_next_leaf(root, path);
7164 if (ret < 0)
7165 goto out;
7166 if (ret > 0)
7167 break;
7168 leaf = path->nodes[0];
7169 nritems = btrfs_header_nritems(leaf);
7170 }
7171
7172 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7173
7174 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
7175 if ((key.objectid > ref_path->owner_objectid) ||
7176 (key.objectid == ref_path->owner_objectid &&
7177 key.type > BTRFS_EXTENT_DATA_KEY) ||
7178 key.offset >= search_end)
7179 break;
7180 }
7181
7182 if (inode && key.objectid != inode->i_ino) {
7183 BUG_ON(extent_locked);
7184 btrfs_release_path(root, path);
7185 mutex_unlock(&inode->i_mutex);
7186 iput(inode);
7187 inode = NULL;
7188 continue;
7189 }
7190
7191 if (key.type != BTRFS_EXTENT_DATA_KEY) {
7192 path->slots[0]++;
7193 ret = 1;
7194 goto next;
7195 }
7196 fi = btrfs_item_ptr(leaf, path->slots[0],
7197 struct btrfs_file_extent_item);
7198 extent_type = btrfs_file_extent_type(leaf, fi);
7199 if ((extent_type != BTRFS_FILE_EXTENT_REG &&
7200 extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
7201 (btrfs_file_extent_disk_bytenr(leaf, fi) !=
7202 extent_key->objectid)) {
7203 path->slots[0]++;
7204 ret = 1;
7205 goto next;
7206 }
7207
7208 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7209 ext_offset = btrfs_file_extent_offset(leaf, fi);
7210
7211 if (search_end == (u64)-1) {
7212 search_end = key.offset - ext_offset +
7213 btrfs_file_extent_ram_bytes(leaf, fi);
7214 }
7215
7216 if (!extent_locked) {
7217 lock_start = key.offset;
7218 lock_end = lock_start + num_bytes - 1;
7219 } else {
7220 if (lock_start > key.offset ||
7221 lock_end + 1 < key.offset + num_bytes) {
7222 unlock_extent(&BTRFS_I(inode)->io_tree,
7223 lock_start, lock_end, GFP_NOFS);
7224 extent_locked = 0;
7225 }
7226 }
7227
7228 if (!inode) {
7229 btrfs_release_path(root, path);
7230
7231 inode = btrfs_iget_locked(root->fs_info->sb,
7232 key.objectid, root);
7233 if (inode->i_state & I_NEW) {
7234 BTRFS_I(inode)->root = root;
7235 BTRFS_I(inode)->location.objectid =
7236 key.objectid;
7237 BTRFS_I(inode)->location.type =
7238 BTRFS_INODE_ITEM_KEY;
7239 BTRFS_I(inode)->location.offset = 0;
7240 btrfs_read_locked_inode(inode);
7241 unlock_new_inode(inode);
7242 }
7243 /*
7244 * some code call btrfs_commit_transaction while
7245 * holding the i_mutex, so we can't use mutex_lock
7246 * here.
7247 */
7248 if (is_bad_inode(inode) ||
7249 !mutex_trylock(&inode->i_mutex)) {
7250 iput(inode);
7251 inode = NULL;
7252 key.offset = (u64)-1;
7253 goto skip;
7254 }
7255 }
7256
7257 if (!extent_locked) {
7258 struct btrfs_ordered_extent *ordered;
7259
7260 btrfs_release_path(root, path);
7261
7262 lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7263 lock_end, GFP_NOFS);
7264 ordered = btrfs_lookup_first_ordered_extent(inode,
7265 lock_end);
7266 if (ordered &&
7267 ordered->file_offset <= lock_end &&
7268 ordered->file_offset + ordered->len > lock_start) {
7269 unlock_extent(&BTRFS_I(inode)->io_tree,
7270 lock_start, lock_end, GFP_NOFS);
7271 btrfs_start_ordered_extent(inode, ordered, 1);
7272 btrfs_put_ordered_extent(ordered);
7273 key.offset += num_bytes;
7274 goto skip;
7275 }
7276 if (ordered)
7277 btrfs_put_ordered_extent(ordered);
7278
7279 extent_locked = 1;
7280 continue;
7281 }
7282
7283 if (nr_extents == 1) {
7284 /* update extent pointer in place */
7285 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7286 new_extents[0].disk_bytenr);
7287 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7288 new_extents[0].disk_num_bytes);
7289 btrfs_mark_buffer_dirty(leaf);
7290
7291 btrfs_drop_extent_cache(inode, key.offset,
7292 key.offset + num_bytes - 1, 0);
7293
7294 ret = btrfs_inc_extent_ref(trans, root,
7295 new_extents[0].disk_bytenr,
7296 new_extents[0].disk_num_bytes,
7297 leaf->start,
7298 root->root_key.objectid,
7299 trans->transid,
7300 key.objectid);
7301 BUG_ON(ret);
7302
7303 ret = btrfs_free_extent(trans, root,
7304 extent_key->objectid,
7305 extent_key->offset,
7306 leaf->start,
7307 btrfs_header_owner(leaf),
7308 btrfs_header_generation(leaf),
7309 key.objectid, 0);
7310 BUG_ON(ret);
7311
7312 btrfs_release_path(root, path);
7313 key.offset += num_bytes;
7314 } else {
7315 BUG_ON(1);
7316#if 0
7317 u64 alloc_hint;
7318 u64 extent_len;
7319 int i;
7320 /*
7321 * drop old extent pointer at first, then insert the
7322 * new pointers one bye one
7323 */
7324 btrfs_release_path(root, path);
7325 ret = btrfs_drop_extents(trans, root, inode, key.offset,
7326 key.offset + num_bytes,
7327 key.offset, &alloc_hint);
7328 BUG_ON(ret);
7329
7330 for (i = 0; i < nr_extents; i++) {
7331 if (ext_offset >= new_extents[i].num_bytes) {
7332 ext_offset -= new_extents[i].num_bytes;
7333 continue;
7334 }
7335 extent_len = min(new_extents[i].num_bytes -
7336 ext_offset, num_bytes);
7337
7338 ret = btrfs_insert_empty_item(trans, root,
7339 path, &key,
7340 sizeof(*fi));
7341 BUG_ON(ret);
7342
7343 leaf = path->nodes[0];
7344 fi = btrfs_item_ptr(leaf, path->slots[0],
7345 struct btrfs_file_extent_item);
7346 btrfs_set_file_extent_generation(leaf, fi,
7347 trans->transid);
7348 btrfs_set_file_extent_type(leaf, fi,
7349 BTRFS_FILE_EXTENT_REG);
7350 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7351 new_extents[i].disk_bytenr);
7352 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7353 new_extents[i].disk_num_bytes);
7354 btrfs_set_file_extent_ram_bytes(leaf, fi,
7355 new_extents[i].ram_bytes);
7356
7357 btrfs_set_file_extent_compression(leaf, fi,
7358 new_extents[i].compression);
7359 btrfs_set_file_extent_encryption(leaf, fi,
7360 new_extents[i].encryption);
7361 btrfs_set_file_extent_other_encoding(leaf, fi,
7362 new_extents[i].other_encoding);
7363
7364 btrfs_set_file_extent_num_bytes(leaf, fi,
7365 extent_len);
7366 ext_offset += new_extents[i].offset;
7367 btrfs_set_file_extent_offset(leaf, fi,
7368 ext_offset);
7369 btrfs_mark_buffer_dirty(leaf);
7370
7371 btrfs_drop_extent_cache(inode, key.offset,
7372 key.offset + extent_len - 1, 0);
7373
7374 ret = btrfs_inc_extent_ref(trans, root,
7375 new_extents[i].disk_bytenr,
7376 new_extents[i].disk_num_bytes,
7377 leaf->start,
7378 root->root_key.objectid,
7379 trans->transid, key.objectid);
7380 BUG_ON(ret);
7381 btrfs_release_path(root, path);
7382
7383 inode_add_bytes(inode, extent_len);
7384
7385 ext_offset = 0;
7386 num_bytes -= extent_len;
7387 key.offset += extent_len;
7388
7389 if (num_bytes == 0)
7390 break;
7391 }
7392 BUG_ON(i >= nr_extents);
7393#endif
7394 }
7395
7396 if (extent_locked) {
7397 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7398 lock_end, GFP_NOFS);
7399 extent_locked = 0;
7400 }
7401skip:
7402 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
7403 key.offset >= search_end)
7404 break;
7405
7406 cond_resched();
7407 }
7408 ret = 0;
7409out:
7410 btrfs_release_path(root, path);
7411 if (inode) {
7412 mutex_unlock(&inode->i_mutex);
7413 if (extent_locked) {
7414 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7415 lock_end, GFP_NOFS);
7416 }
7417 iput(inode);
7418 }
7419 return ret;
7420}
7421
7422int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
7423 struct btrfs_root *root,
7424 struct extent_buffer *buf, u64 orig_start)
7425{
7426 int level;
7427 int ret;
7428
7429 BUG_ON(btrfs_header_generation(buf) != trans->transid);
7430 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
7431
7432 level = btrfs_header_level(buf);
7433 if (level == 0) {
7434 struct btrfs_leaf_ref *ref;
7435 struct btrfs_leaf_ref *orig_ref;
7436
7437 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
7438 if (!orig_ref)
7439 return -ENOENT;
7440
7441 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
7442 if (!ref) {
7443 btrfs_free_leaf_ref(root, orig_ref);
7444 return -ENOMEM;
7445 }
7446
7447 ref->nritems = orig_ref->nritems;
7448 memcpy(ref->extents, orig_ref->extents,
7449 sizeof(ref->extents[0]) * ref->nritems);
7450
7451 btrfs_free_leaf_ref(root, orig_ref);
7452
7453 ref->root_gen = trans->transid;
7454 ref->bytenr = buf->start;
7455 ref->owner = btrfs_header_owner(buf);
7456 ref->generation = btrfs_header_generation(buf);
7457
7458 ret = btrfs_add_leaf_ref(root, ref, 0);
7459 WARN_ON(ret);
7460 btrfs_free_leaf_ref(root, ref);
7461 }
7462 return 0;
7463}
7464
7465static noinline int invalidate_extent_cache(struct btrfs_root *root,
7466 struct extent_buffer *leaf,
7467 struct btrfs_block_group_cache *group,
7468 struct btrfs_root *target_root)
7469{
7470 struct btrfs_key key;
7471 struct inode *inode = NULL;
7472 struct btrfs_file_extent_item *fi;
7473 struct extent_state *cached_state = NULL;
7474 u64 num_bytes;
7475 u64 skip_objectid = 0;
7476 u32 nritems;
7477 u32 i;
7478
7479 nritems = btrfs_header_nritems(leaf);
7480 for (i = 0; i < nritems; i++) {
7481 btrfs_item_key_to_cpu(leaf, &key, i);
7482 if (key.objectid == skip_objectid ||
7483 key.type != BTRFS_EXTENT_DATA_KEY)
7484 continue;
7485 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
7486 if (btrfs_file_extent_type(leaf, fi) ==
7487 BTRFS_FILE_EXTENT_INLINE)
7488 continue;
7489 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
7490 continue;
7491 if (!inode || inode->i_ino != key.objectid) {
7492 iput(inode);
7493 inode = btrfs_ilookup(target_root->fs_info->sb,
7494 key.objectid, target_root, 1);
7495 }
7496 if (!inode) {
7497 skip_objectid = key.objectid;
7498 continue;
7499 }
7500 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7501
7502 lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset,
7503 key.offset + num_bytes - 1, 0, &cached_state,
7504 GFP_NOFS);
7505 btrfs_drop_extent_cache(inode, key.offset,
7506 key.offset + num_bytes - 1, 1);
7507 unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset,
7508 key.offset + num_bytes - 1, &cached_state,
7509 GFP_NOFS);
7510 cond_resched();
7511 }
7512 iput(inode);
7513 return 0;
7514}
7515
7516static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
7517 struct btrfs_root *root,
7518 struct extent_buffer *leaf,
7519 struct btrfs_block_group_cache *group,
7520 struct inode *reloc_inode)
7521{
7522 struct btrfs_key key;
7523 struct btrfs_key extent_key;
7524 struct btrfs_file_extent_item *fi;
7525 struct btrfs_leaf_ref *ref;
7526 struct disk_extent *new_extent;
7527 u64 bytenr;
7528 u64 num_bytes;
7529 u32 nritems;
7530 u32 i;
7531 int ext_index;
7532 int nr_extent;
7533 int ret;
7534
7535 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
7536 if (!new_extent)
7537 return -ENOMEM;
7538
7539 ref = btrfs_lookup_leaf_ref(root, leaf->start);
7540 BUG_ON(!ref);
7541
7542 ext_index = -1;
7543 nritems = btrfs_header_nritems(leaf);
7544 for (i = 0; i < nritems; i++) {
7545 btrfs_item_key_to_cpu(leaf, &key, i);
7546 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
7547 continue;
7548 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
7549 if (btrfs_file_extent_type(leaf, fi) ==
7550 BTRFS_FILE_EXTENT_INLINE)
7551 continue;
7552 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7553 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7554 if (bytenr == 0)
7555 continue;
7556
7557 ext_index++;
7558 if (bytenr >= group->key.objectid + group->key.offset ||
7559 bytenr + num_bytes <= group->key.objectid)
7560 continue;
7561
7562 extent_key.objectid = bytenr;
7563 extent_key.offset = num_bytes;
7564 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
7565 nr_extent = 1;
7566 ret = get_new_locations(reloc_inode, &extent_key,
7567 group->key.objectid, 1,
7568 &new_extent, &nr_extent);
7569 if (ret > 0)
7570 continue;
7571 BUG_ON(ret < 0);
7572
7573 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
7574 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
7575 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
7576 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
7577
7578 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7579 new_extent->disk_bytenr);
7580 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7581 new_extent->disk_num_bytes);
7582 btrfs_mark_buffer_dirty(leaf);
7583
7584 ret = btrfs_inc_extent_ref(trans, root,
7585 new_extent->disk_bytenr,
7586 new_extent->disk_num_bytes,
7587 leaf->start,
7588 root->root_key.objectid,
7589 trans->transid, key.objectid);
7590 BUG_ON(ret);
7591
7592 ret = btrfs_free_extent(trans, root,
7593 bytenr, num_bytes, leaf->start,
7594 btrfs_header_owner(leaf),
7595 btrfs_header_generation(leaf),
7596 key.objectid, 0);
7597 BUG_ON(ret);
7598 cond_resched();
7599 }
7600 kfree(new_extent);
7601 BUG_ON(ext_index + 1 != ref->nritems);
7602 btrfs_free_leaf_ref(root, ref);
7603 return 0;
7604}
7605
7606int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
7607 struct btrfs_root *root)
7608{
7609 struct btrfs_root *reloc_root;
7610 int ret;
7611
7612 if (root->reloc_root) {
7613 reloc_root = root->reloc_root;
7614 root->reloc_root = NULL;
7615 list_add(&reloc_root->dead_list,
7616 &root->fs_info->dead_reloc_roots);
7617
7618 btrfs_set_root_bytenr(&reloc_root->root_item,
7619 reloc_root->node->start);
7620 btrfs_set_root_level(&root->root_item,
7621 btrfs_header_level(reloc_root->node));
7622 memset(&reloc_root->root_item.drop_progress, 0,
7623 sizeof(struct btrfs_disk_key));
7624 reloc_root->root_item.drop_level = 0;
7625
7626 ret = btrfs_update_root(trans, root->fs_info->tree_root,
7627 &reloc_root->root_key,
7628 &reloc_root->root_item);
7629 BUG_ON(ret);
7630 }
7631 return 0;
7632}
7633
7634int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
7635{
7636 struct btrfs_trans_handle *trans;
7637 struct btrfs_root *reloc_root;
7638 struct btrfs_root *prev_root = NULL;
7639 struct list_head dead_roots;
7640 int ret;
7641 unsigned long nr;
7642
7643 INIT_LIST_HEAD(&dead_roots);
7644 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
7645
7646 while (!list_empty(&dead_roots)) {
7647 reloc_root = list_entry(dead_roots.prev,
7648 struct btrfs_root, dead_list);
7649 list_del_init(&reloc_root->dead_list);
7650
7651 BUG_ON(reloc_root->commit_root != NULL);
7652 while (1) {
7653 trans = btrfs_join_transaction(root, 1);
7654 BUG_ON(IS_ERR(trans));
7655
7656 mutex_lock(&root->fs_info->drop_mutex);
7657 ret = btrfs_drop_snapshot(trans, reloc_root);
7658 if (ret != -EAGAIN)
7659 break;
7660 mutex_unlock(&root->fs_info->drop_mutex);
7661
7662 nr = trans->blocks_used;
7663 ret = btrfs_end_transaction(trans, root);
7664 BUG_ON(ret);
7665 btrfs_btree_balance_dirty(root, nr);
7666 }
7667
7668 free_extent_buffer(reloc_root->node);
7669
7670 ret = btrfs_del_root(trans, root->fs_info->tree_root,
7671 &reloc_root->root_key);
7672 BUG_ON(ret);
7673 mutex_unlock(&root->fs_info->drop_mutex);
7674
7675 nr = trans->blocks_used;
7676 ret = btrfs_end_transaction(trans, root);
7677 BUG_ON(ret);
7678 btrfs_btree_balance_dirty(root, nr);
7679
7680 kfree(prev_root);
7681 prev_root = reloc_root;
7682 }
7683 if (prev_root) {
7684 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
7685 kfree(prev_root);
7686 }
7687 return 0;
7688}
7689
7690int btrfs_add_dead_reloc_root(struct btrfs_root *root)
7691{
7692 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
7693 return 0;
7694}
7695
7696int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
7697{
7698 struct btrfs_root *reloc_root;
7699 struct btrfs_trans_handle *trans;
7700 struct btrfs_key location;
7701 int found;
7702 int ret;
7703
7704 mutex_lock(&root->fs_info->tree_reloc_mutex);
7705 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
7706 BUG_ON(ret);
7707 found = !list_empty(&root->fs_info->dead_reloc_roots);
7708 mutex_unlock(&root->fs_info->tree_reloc_mutex);
7709
7710 if (found) {
7711 trans = btrfs_start_transaction(root, 1);
7712 BUG_ON(IS_ERR(trans));
7713 ret = btrfs_commit_transaction(trans, root);
7714 BUG_ON(ret);
7715 }
7716
7717 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
7718 location.offset = (u64)-1;
7719 location.type = BTRFS_ROOT_ITEM_KEY;
7720
7721 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
7722 BUG_ON(!reloc_root);
7723 ret = btrfs_orphan_cleanup(reloc_root);
7724 BUG_ON(ret);
7725 return 0;
7726}
7727
7728static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
7729 struct btrfs_root *root)
7730{
7731 struct btrfs_root *reloc_root;
7732 struct extent_buffer *eb;
7733 struct btrfs_root_item *root_item;
7734 struct btrfs_key root_key;
7735 int ret;
7736
7737 BUG_ON(!root->ref_cows);
7738 if (root->reloc_root)
7739 return 0;
7740
7741 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
7742 if (!root_item)
7743 return -ENOMEM;
7744
7745 ret = btrfs_copy_root(trans, root, root->commit_root,
7746 &eb, BTRFS_TREE_RELOC_OBJECTID);
7747 BUG_ON(ret);
7748
7749 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
7750 root_key.offset = root->root_key.objectid;
7751 root_key.type = BTRFS_ROOT_ITEM_KEY;
7752
7753 memcpy(root_item, &root->root_item, sizeof(root_item));
7754 btrfs_set_root_refs(root_item, 0);
7755 btrfs_set_root_bytenr(root_item, eb->start);
7756 btrfs_set_root_level(root_item, btrfs_header_level(eb));
7757 btrfs_set_root_generation(root_item, trans->transid);
7758
7759 btrfs_tree_unlock(eb);
7760 free_extent_buffer(eb);
7761
7762 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
7763 &root_key, root_item);
7764 BUG_ON(ret);
7765 kfree(root_item);
7766
7767 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
7768 &root_key);
7769 BUG_ON(IS_ERR(reloc_root));
7770 reloc_root->last_trans = trans->transid;
7771 reloc_root->commit_root = NULL;
7772 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
7773
7774 root->reloc_root = reloc_root;
7775 return 0;
7776}
7777
7778/*
7779 * Core function of space balance.
7780 *
7781 * The idea is using reloc trees to relocate tree blocks in reference
7782 * counted roots. There is one reloc tree for each subvol, and all
7783 * reloc trees share same root key objectid. Reloc trees are snapshots
7784 * of the latest committed roots of subvols (root->commit_root).
7785 *
7786 * To relocate a tree block referenced by a subvol, there are two steps.
7787 * COW the block through subvol's reloc tree, then update block pointer
7788 * in the subvol to point to the new block. Since all reloc trees share
7789 * same root key objectid, doing special handing for tree blocks owned
7790 * by them is easy. Once a tree block has been COWed in one reloc tree,
7791 * we can use the resulting new block directly when the same block is
7792 * required to COW again through other reloc trees. By this way, relocated
7793 * tree blocks are shared between reloc trees, so they are also shared
7794 * between subvols.
7795 */
7796static noinline int relocate_one_path(struct btrfs_trans_handle *trans,
7797 struct btrfs_root *root,
7798 struct btrfs_path *path,
7799 struct btrfs_key *first_key,
7800 struct btrfs_ref_path *ref_path,
7801 struct btrfs_block_group_cache *group,
7802 struct inode *reloc_inode)
7803{
7804 struct btrfs_root *reloc_root;
7805 struct extent_buffer *eb = NULL;
7806 struct btrfs_key *keys;
7807 u64 *nodes;
7808 int level;
7809 int shared_level;
7810 int lowest_level = 0;
7811 int ret;
7812
7813 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
7814 lowest_level = ref_path->owner_objectid;
7815
7816 if (!root->ref_cows) {
7817 path->lowest_level = lowest_level;
7818 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
7819 BUG_ON(ret < 0);
7820 path->lowest_level = 0;
7821 btrfs_release_path(root, path);
7822 return 0;
7823 }
7824
7825 mutex_lock(&root->fs_info->tree_reloc_mutex);
7826 ret = init_reloc_tree(trans, root);
7827 BUG_ON(ret);
7828 reloc_root = root->reloc_root;
7829
7830 shared_level = ref_path->shared_level;
7831 ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
7832
7833 keys = ref_path->node_keys;
7834 nodes = ref_path->new_nodes;
7835 memset(&keys[shared_level + 1], 0,
7836 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
7837 memset(&nodes[shared_level + 1], 0,
7838 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
7839
7840 if (nodes[lowest_level] == 0) {
7841 path->lowest_level = lowest_level;
7842 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7843 0, 1);
7844 BUG_ON(ret);
7845 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
7846 eb = path->nodes[level];
7847 if (!eb || eb == reloc_root->node)
7848 break;
7849 nodes[level] = eb->start;
7850 if (level == 0)
7851 btrfs_item_key_to_cpu(eb, &keys[level], 0);
7852 else
7853 btrfs_node_key_to_cpu(eb, &keys[level], 0);
7854 }
7855 if (nodes[0] &&
7856 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7857 eb = path->nodes[0];
7858 ret = replace_extents_in_leaf(trans, reloc_root, eb,
7859 group, reloc_inode);
7860 BUG_ON(ret);
7861 }
7862 btrfs_release_path(reloc_root, path);
7863 } else {
7864 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
7865 lowest_level);
7866 BUG_ON(ret);
7867 }
7868
7869 /*
7870 * replace tree blocks in the fs tree with tree blocks in
7871 * the reloc tree.
7872 */
7873 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
7874 BUG_ON(ret < 0);
7875
7876 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7877 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7878 0, 0);
7879 BUG_ON(ret);
7880 extent_buffer_get(path->nodes[0]);
7881 eb = path->nodes[0];
7882 btrfs_release_path(reloc_root, path);
7883 ret = invalidate_extent_cache(reloc_root, eb, group, root);
7884 BUG_ON(ret);
7885 free_extent_buffer(eb);
7886 }
7887
7888 mutex_unlock(&root->fs_info->tree_reloc_mutex);
7889 path->lowest_level = 0;
7890 return 0;
7891}
7892
7893static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
7894 struct btrfs_root *root,
7895 struct btrfs_path *path,
7896 struct btrfs_key *first_key,
7897 struct btrfs_ref_path *ref_path)
7898{
7899 int ret;
7900
7901 ret = relocate_one_path(trans, root, path, first_key,
7902 ref_path, NULL, NULL);
7903 BUG_ON(ret);
7904
7905 return 0;
7906}
7907
7908static noinline int del_extent_zero(struct btrfs_trans_handle *trans,
7909 struct btrfs_root *extent_root,
7910 struct btrfs_path *path,
7911 struct btrfs_key *extent_key)
7912{
7913 int ret;
7914
7915 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
7916 if (ret)
7917 goto out;
7918 ret = btrfs_del_item(trans, extent_root, path);
7919out:
7920 btrfs_release_path(extent_root, path);
7921 return ret;
7922}
7923
7924static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info,
7925 struct btrfs_ref_path *ref_path)
7926{
7927 struct btrfs_key root_key;
7928
7929 root_key.objectid = ref_path->root_objectid;
7930 root_key.type = BTRFS_ROOT_ITEM_KEY;
7931 if (is_cowonly_root(ref_path->root_objectid))
7932 root_key.offset = 0;
7933 else
7934 root_key.offset = (u64)-1;
7935
7936 return btrfs_read_fs_root_no_name(fs_info, &root_key);
7937}
7938
7939static noinline int relocate_one_extent(struct btrfs_root *extent_root,
7940 struct btrfs_path *path,
7941 struct btrfs_key *extent_key,
7942 struct btrfs_block_group_cache *group,
7943 struct inode *reloc_inode, int pass)
7944{
7945 struct btrfs_trans_handle *trans;
7946 struct btrfs_root *found_root;
7947 struct btrfs_ref_path *ref_path = NULL;
7948 struct disk_extent *new_extents = NULL;
7949 int nr_extents = 0;
7950 int loops;
7951 int ret;
7952 int level;
7953 struct btrfs_key first_key;
7954 u64 prev_block = 0;
7955
7956
7957 trans = btrfs_start_transaction(extent_root, 1);
7958 BUG_ON(IS_ERR(trans));
7959
7960 if (extent_key->objectid == 0) {
7961 ret = del_extent_zero(trans, extent_root, path, extent_key);
7962 goto out;
7963 }
7964
7965 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
7966 if (!ref_path) {
7967 ret = -ENOMEM;
7968 goto out;
7969 }
7970
7971 for (loops = 0; ; loops++) {
7972 if (loops == 0) {
7973 ret = btrfs_first_ref_path(trans, extent_root, ref_path,
7974 extent_key->objectid);
7975 } else {
7976 ret = btrfs_next_ref_path(trans, extent_root, ref_path);
7977 }
7978 if (ret < 0)
7979 goto out;
7980 if (ret > 0)
7981 break;
7982
7983 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
7984 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
7985 continue;
7986
7987 found_root = read_ref_root(extent_root->fs_info, ref_path);
7988 BUG_ON(!found_root);
7989 /*
7990 * for reference counted tree, only process reference paths
7991 * rooted at the latest committed root.
7992 */
7993 if (found_root->ref_cows &&
7994 ref_path->root_generation != found_root->root_key.offset)
7995 continue;
7996
7997 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7998 if (pass == 0) {
7999 /*
8000 * copy data extents to new locations
8001 */
8002 u64 group_start = group->key.objectid;
8003 ret = relocate_data_extent(reloc_inode,
8004 extent_key,
8005 group_start);
8006 if (ret < 0)
8007 goto out;
8008 break;
8009 }
8010 level = 0;
8011 } else {
8012 level = ref_path->owner_objectid;
8013 }
8014
8015 if (prev_block != ref_path->nodes[level]) {
8016 struct extent_buffer *eb;
8017 u64 block_start = ref_path->nodes[level];
8018 u64 block_size = btrfs_level_size(found_root, level);
8019
8020 eb = read_tree_block(found_root, block_start,
8021 block_size, 0);
8022 if (!eb) {
8023 ret = -EIO;
8024 goto out;
8025 }
8026 btrfs_tree_lock(eb);
8027 BUG_ON(level != btrfs_header_level(eb));
8028
8029 if (level == 0)
8030 btrfs_item_key_to_cpu(eb, &first_key, 0);
8031 else
8032 btrfs_node_key_to_cpu(eb, &first_key, 0);
8033
8034 btrfs_tree_unlock(eb);
8035 free_extent_buffer(eb);
8036 prev_block = block_start;
8037 }
8038
8039 mutex_lock(&extent_root->fs_info->trans_mutex);
8040 btrfs_record_root_in_trans(found_root);
8041 mutex_unlock(&extent_root->fs_info->trans_mutex);
8042 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
8043 /*
8044 * try to update data extent references while
8045 * keeping metadata shared between snapshots.
8046 */
8047 if (pass == 1) {
8048 ret = relocate_one_path(trans, found_root,
8049 path, &first_key, ref_path,
8050 group, reloc_inode);
8051 if (ret < 0)
8052 goto out;
8053 continue;
8054 }
8055 /*
8056 * use fallback method to process the remaining
8057 * references.
8058 */
8059 if (!new_extents) {
8060 u64 group_start = group->key.objectid;
8061 new_extents = kmalloc(sizeof(*new_extents),
8062 GFP_NOFS);
8063 if (!new_extents) {
8064 ret = -ENOMEM;
8065 goto out;
8066 }
8067 nr_extents = 1;
8068 ret = get_new_locations(reloc_inode,
8069 extent_key,
8070 group_start, 1,
8071 &new_extents,
8072 &nr_extents);
8073 if (ret)
8074 goto out;
8075 }
8076 ret = replace_one_extent(trans, found_root,
8077 path, extent_key,
8078 &first_key, ref_path,
8079 new_extents, nr_extents);
8080 } else {
8081 ret = relocate_tree_block(trans, found_root, path,
8082 &first_key, ref_path);
8083 }
8084 if (ret < 0)
8085 goto out;
8086 }
8087 ret = 0;
8088out:
8089 btrfs_end_transaction(trans, extent_root);
8090 kfree(new_extents);
8091 kfree(ref_path);
8092 return ret;
8093}
8094#endif
8095
8096static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6445static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
8097{ 6446{
8098 u64 num_devices; 6447 u64 num_devices;
@@ -8556,10 +6905,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8556 ret = -ENOMEM; 6905 ret = -ENOMEM;
8557 goto error; 6906 goto error;
8558 } 6907 }
6908 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
6909 GFP_NOFS);
6910 if (!cache->free_space_ctl) {
6911 kfree(cache);
6912 ret = -ENOMEM;
6913 goto error;
6914 }
8559 6915
8560 atomic_set(&cache->count, 1); 6916 atomic_set(&cache->count, 1);
8561 spin_lock_init(&cache->lock); 6917 spin_lock_init(&cache->lock);
8562 spin_lock_init(&cache->tree_lock);
8563 cache->fs_info = info; 6918 cache->fs_info = info;
8564 INIT_LIST_HEAD(&cache->list); 6919 INIT_LIST_HEAD(&cache->list);
8565 INIT_LIST_HEAD(&cache->cluster_list); 6920 INIT_LIST_HEAD(&cache->cluster_list);
@@ -8567,24 +6922,18 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8567 if (need_clear) 6922 if (need_clear)
8568 cache->disk_cache_state = BTRFS_DC_CLEAR; 6923 cache->disk_cache_state = BTRFS_DC_CLEAR;
8569 6924
8570 /*
8571 * we only want to have 32k of ram per block group for keeping
8572 * track of free space, and if we pass 1/2 of that we want to
8573 * start converting things over to using bitmaps
8574 */
8575 cache->extents_thresh = ((1024 * 32) / 2) /
8576 sizeof(struct btrfs_free_space);
8577
8578 read_extent_buffer(leaf, &cache->item, 6925 read_extent_buffer(leaf, &cache->item,
8579 btrfs_item_ptr_offset(leaf, path->slots[0]), 6926 btrfs_item_ptr_offset(leaf, path->slots[0]),
8580 sizeof(cache->item)); 6927 sizeof(cache->item));
8581 memcpy(&cache->key, &found_key, sizeof(found_key)); 6928 memcpy(&cache->key, &found_key, sizeof(found_key));
8582 6929
8583 key.objectid = found_key.objectid + found_key.offset; 6930 key.objectid = found_key.objectid + found_key.offset;
8584 btrfs_release_path(root, path); 6931 btrfs_release_path(path);
8585 cache->flags = btrfs_block_group_flags(&cache->item); 6932 cache->flags = btrfs_block_group_flags(&cache->item);
8586 cache->sectorsize = root->sectorsize; 6933 cache->sectorsize = root->sectorsize;
8587 6934
6935 btrfs_init_free_space_ctl(cache);
6936
8588 /* 6937 /*
8589 * We need to exclude the super stripes now so that the space 6938 * We need to exclude the super stripes now so that the space
8590 * info has super bytes accounted for, otherwise we'll think 6939 * info has super bytes accounted for, otherwise we'll think
@@ -8671,6 +7020,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8671 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7020 cache = kzalloc(sizeof(*cache), GFP_NOFS);
8672 if (!cache) 7021 if (!cache)
8673 return -ENOMEM; 7022 return -ENOMEM;
7023 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
7024 GFP_NOFS);
7025 if (!cache->free_space_ctl) {
7026 kfree(cache);
7027 return -ENOMEM;
7028 }
8674 7029
8675 cache->key.objectid = chunk_offset; 7030 cache->key.objectid = chunk_offset;
8676 cache->key.offset = size; 7031 cache->key.offset = size;
@@ -8678,19 +7033,13 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8678 cache->sectorsize = root->sectorsize; 7033 cache->sectorsize = root->sectorsize;
8679 cache->fs_info = root->fs_info; 7034 cache->fs_info = root->fs_info;
8680 7035
8681 /*
8682 * we only want to have 32k of ram per block group for keeping track
8683 * of free space, and if we pass 1/2 of that we want to start
8684 * converting things over to using bitmaps
8685 */
8686 cache->extents_thresh = ((1024 * 32) / 2) /
8687 sizeof(struct btrfs_free_space);
8688 atomic_set(&cache->count, 1); 7036 atomic_set(&cache->count, 1);
8689 spin_lock_init(&cache->lock); 7037 spin_lock_init(&cache->lock);
8690 spin_lock_init(&cache->tree_lock);
8691 INIT_LIST_HEAD(&cache->list); 7038 INIT_LIST_HEAD(&cache->list);
8692 INIT_LIST_HEAD(&cache->cluster_list); 7039 INIT_LIST_HEAD(&cache->cluster_list);
8693 7040
7041 btrfs_init_free_space_ctl(cache);
7042
8694 btrfs_set_block_group_used(&cache->item, bytes_used); 7043 btrfs_set_block_group_used(&cache->item, bytes_used);
8695 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7044 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
8696 cache->flags = type; 7045 cache->flags = type;
@@ -8803,12 +7152,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8803 if (ret < 0) 7152 if (ret < 0)
8804 goto out; 7153 goto out;
8805 if (ret > 0) 7154 if (ret > 0)
8806 btrfs_release_path(tree_root, path); 7155 btrfs_release_path(path);
8807 if (ret == 0) { 7156 if (ret == 0) {
8808 ret = btrfs_del_item(trans, tree_root, path); 7157 ret = btrfs_del_item(trans, tree_root, path);
8809 if (ret) 7158 if (ret)
8810 goto out; 7159 goto out;
8811 btrfs_release_path(tree_root, path); 7160 btrfs_release_path(path);
8812 } 7161 }
8813 7162
8814 spin_lock(&root->fs_info->block_group_cache_lock); 7163 spin_lock(&root->fs_info->block_group_cache_lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ebfff5b44752..a90c4a12556b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -101,7 +101,7 @@ void extent_io_exit(void)
101} 101}
102 102
103void extent_io_tree_init(struct extent_io_tree *tree, 103void extent_io_tree_init(struct extent_io_tree *tree,
104 struct address_space *mapping, gfp_t mask) 104 struct address_space *mapping)
105{ 105{
106 tree->state = RB_ROOT; 106 tree->state = RB_ROOT;
107 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC); 107 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
@@ -965,13 +965,6 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
965 NULL, mask); 965 NULL, mask);
966} 966}
967 967
968static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
969 gfp_t mask)
970{
971 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
972 NULL, mask);
973}
974
975int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 968int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
976 struct extent_state **cached_state, gfp_t mask) 969 struct extent_state **cached_state, gfp_t mask)
977{ 970{
@@ -987,11 +980,6 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
987 cached_state, mask); 980 cached_state, mask);
988} 981}
989 982
990int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
991{
992 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
993}
994
995/* 983/*
996 * either insert or lock state struct between start and end use mask to tell 984 * either insert or lock state struct between start and end use mask to tell
997 * us if waiting is desired. 985 * us if waiting is desired.
@@ -1052,25 +1040,6 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
1052} 1040}
1053 1041
1054/* 1042/*
1055 * helper function to set pages and extents in the tree dirty
1056 */
1057int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
1058{
1059 unsigned long index = start >> PAGE_CACHE_SHIFT;
1060 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1061 struct page *page;
1062
1063 while (index <= end_index) {
1064 page = find_get_page(tree->mapping, index);
1065 BUG_ON(!page);
1066 __set_page_dirty_nobuffers(page);
1067 page_cache_release(page);
1068 index++;
1069 }
1070 return 0;
1071}
1072
1073/*
1074 * helper function to set both pages and extents in the tree writeback 1043 * helper function to set both pages and extents in the tree writeback
1075 */ 1044 */
1076static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) 1045static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@ -1843,46 +1812,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1843 bio_put(bio); 1812 bio_put(bio);
1844} 1813}
1845 1814
1846/*
1847 * IO done from prepare_write is pretty simple, we just unlock
1848 * the structs in the extent tree when done, and set the uptodate bits
1849 * as appropriate.
1850 */
1851static void end_bio_extent_preparewrite(struct bio *bio, int err)
1852{
1853 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1854 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1855 struct extent_io_tree *tree;
1856 u64 start;
1857 u64 end;
1858
1859 do {
1860 struct page *page = bvec->bv_page;
1861 struct extent_state *cached = NULL;
1862 tree = &BTRFS_I(page->mapping->host)->io_tree;
1863
1864 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1865 bvec->bv_offset;
1866 end = start + bvec->bv_len - 1;
1867
1868 if (--bvec >= bio->bi_io_vec)
1869 prefetchw(&bvec->bv_page->flags);
1870
1871 if (uptodate) {
1872 set_extent_uptodate(tree, start, end, &cached,
1873 GFP_ATOMIC);
1874 } else {
1875 ClearPageUptodate(page);
1876 SetPageError(page);
1877 }
1878
1879 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1880
1881 } while (bvec >= bio->bi_io_vec);
1882
1883 bio_put(bio);
1884}
1885
1886struct bio * 1815struct bio *
1887btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 1816btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1888 gfp_t gfp_flags) 1817 gfp_t gfp_flags)
@@ -2031,7 +1960,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2031 struct btrfs_ordered_extent *ordered; 1960 struct btrfs_ordered_extent *ordered;
2032 int ret; 1961 int ret;
2033 int nr = 0; 1962 int nr = 0;
2034 size_t page_offset = 0; 1963 size_t pg_offset = 0;
2035 size_t iosize; 1964 size_t iosize;
2036 size_t disk_io_size; 1965 size_t disk_io_size;
2037 size_t blocksize = inode->i_sb->s_blocksize; 1966 size_t blocksize = inode->i_sb->s_blocksize;
@@ -2067,9 +1996,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2067 char *userpage; 1996 char *userpage;
2068 struct extent_state *cached = NULL; 1997 struct extent_state *cached = NULL;
2069 1998
2070 iosize = PAGE_CACHE_SIZE - page_offset; 1999 iosize = PAGE_CACHE_SIZE - pg_offset;
2071 userpage = kmap_atomic(page, KM_USER0); 2000 userpage = kmap_atomic(page, KM_USER0);
2072 memset(userpage + page_offset, 0, iosize); 2001 memset(userpage + pg_offset, 0, iosize);
2073 flush_dcache_page(page); 2002 flush_dcache_page(page);
2074 kunmap_atomic(userpage, KM_USER0); 2003 kunmap_atomic(userpage, KM_USER0);
2075 set_extent_uptodate(tree, cur, cur + iosize - 1, 2004 set_extent_uptodate(tree, cur, cur + iosize - 1,
@@ -2078,9 +2007,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2078 &cached, GFP_NOFS); 2007 &cached, GFP_NOFS);
2079 break; 2008 break;
2080 } 2009 }
2081 em = get_extent(inode, page, page_offset, cur, 2010 em = get_extent(inode, page, pg_offset, cur,
2082 end - cur + 1, 0); 2011 end - cur + 1, 0);
2083 if (IS_ERR(em) || !em) { 2012 if (IS_ERR_OR_NULL(em)) {
2084 SetPageError(page); 2013 SetPageError(page);
2085 unlock_extent(tree, cur, end, GFP_NOFS); 2014 unlock_extent(tree, cur, end, GFP_NOFS);
2086 break; 2015 break;
@@ -2118,7 +2047,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2118 struct extent_state *cached = NULL; 2047 struct extent_state *cached = NULL;
2119 2048
2120 userpage = kmap_atomic(page, KM_USER0); 2049 userpage = kmap_atomic(page, KM_USER0);
2121 memset(userpage + page_offset, 0, iosize); 2050 memset(userpage + pg_offset, 0, iosize);
2122 flush_dcache_page(page); 2051 flush_dcache_page(page);
2123 kunmap_atomic(userpage, KM_USER0); 2052 kunmap_atomic(userpage, KM_USER0);
2124 2053
@@ -2127,7 +2056,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2127 unlock_extent_cached(tree, cur, cur + iosize - 1, 2056 unlock_extent_cached(tree, cur, cur + iosize - 1,
2128 &cached, GFP_NOFS); 2057 &cached, GFP_NOFS);
2129 cur = cur + iosize; 2058 cur = cur + iosize;
2130 page_offset += iosize; 2059 pg_offset += iosize;
2131 continue; 2060 continue;
2132 } 2061 }
2133 /* the get_extent function already copied into the page */ 2062 /* the get_extent function already copied into the page */
@@ -2136,7 +2065,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2136 check_page_uptodate(tree, page); 2065 check_page_uptodate(tree, page);
2137 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2066 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2138 cur = cur + iosize; 2067 cur = cur + iosize;
2139 page_offset += iosize; 2068 pg_offset += iosize;
2140 continue; 2069 continue;
2141 } 2070 }
2142 /* we have an inline extent but it didn't get marked up 2071 /* we have an inline extent but it didn't get marked up
@@ -2146,7 +2075,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2146 SetPageError(page); 2075 SetPageError(page);
2147 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2076 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2148 cur = cur + iosize; 2077 cur = cur + iosize;
2149 page_offset += iosize; 2078 pg_offset += iosize;
2150 continue; 2079 continue;
2151 } 2080 }
2152 2081
@@ -2159,7 +2088,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2159 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; 2088 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2160 pnr -= page->index; 2089 pnr -= page->index;
2161 ret = submit_extent_page(READ, tree, page, 2090 ret = submit_extent_page(READ, tree, page,
2162 sector, disk_io_size, page_offset, 2091 sector, disk_io_size, pg_offset,
2163 bdev, bio, pnr, 2092 bdev, bio, pnr,
2164 end_bio_extent_readpage, mirror_num, 2093 end_bio_extent_readpage, mirror_num,
2165 *bio_flags, 2094 *bio_flags,
@@ -2170,7 +2099,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2170 if (ret) 2099 if (ret)
2171 SetPageError(page); 2100 SetPageError(page);
2172 cur = cur + iosize; 2101 cur = cur + iosize;
2173 page_offset += iosize; 2102 pg_offset += iosize;
2174 } 2103 }
2175 if (!nr) { 2104 if (!nr) {
2176 if (!PageError(page)) 2105 if (!PageError(page))
@@ -2365,7 +2294,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2365 } 2294 }
2366 em = epd->get_extent(inode, page, pg_offset, cur, 2295 em = epd->get_extent(inode, page, pg_offset, cur,
2367 end - cur + 1, 1); 2296 end - cur + 1, 1);
2368 if (IS_ERR(em) || !em) { 2297 if (IS_ERR_OR_NULL(em)) {
2369 SetPageError(page); 2298 SetPageError(page);
2370 break; 2299 break;
2371 } 2300 }
@@ -2744,128 +2673,6 @@ int extent_invalidatepage(struct extent_io_tree *tree,
2744} 2673}
2745 2674
2746/* 2675/*
2747 * simple commit_write call, set_range_dirty is used to mark both
2748 * the pages and the extent records as dirty
2749 */
2750int extent_commit_write(struct extent_io_tree *tree,
2751 struct inode *inode, struct page *page,
2752 unsigned from, unsigned to)
2753{
2754 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2755
2756 set_page_extent_mapped(page);
2757 set_page_dirty(page);
2758
2759 if (pos > inode->i_size) {
2760 i_size_write(inode, pos);
2761 mark_inode_dirty(inode);
2762 }
2763 return 0;
2764}
2765
2766int extent_prepare_write(struct extent_io_tree *tree,
2767 struct inode *inode, struct page *page,
2768 unsigned from, unsigned to, get_extent_t *get_extent)
2769{
2770 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2771 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
2772 u64 block_start;
2773 u64 orig_block_start;
2774 u64 block_end;
2775 u64 cur_end;
2776 struct extent_map *em;
2777 unsigned blocksize = 1 << inode->i_blkbits;
2778 size_t page_offset = 0;
2779 size_t block_off_start;
2780 size_t block_off_end;
2781 int err = 0;
2782 int iocount = 0;
2783 int ret = 0;
2784 int isnew;
2785
2786 set_page_extent_mapped(page);
2787
2788 block_start = (page_start + from) & ~((u64)blocksize - 1);
2789 block_end = (page_start + to - 1) | (blocksize - 1);
2790 orig_block_start = block_start;
2791
2792 lock_extent(tree, page_start, page_end, GFP_NOFS);
2793 while (block_start <= block_end) {
2794 em = get_extent(inode, page, page_offset, block_start,
2795 block_end - block_start + 1, 1);
2796 if (IS_ERR(em) || !em)
2797 goto err;
2798
2799 cur_end = min(block_end, extent_map_end(em) - 1);
2800 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
2801 block_off_end = block_off_start + blocksize;
2802 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
2803
2804 if (!PageUptodate(page) && isnew &&
2805 (block_off_end > to || block_off_start < from)) {
2806 void *kaddr;
2807
2808 kaddr = kmap_atomic(page, KM_USER0);
2809 if (block_off_end > to)
2810 memset(kaddr + to, 0, block_off_end - to);
2811 if (block_off_start < from)
2812 memset(kaddr + block_off_start, 0,
2813 from - block_off_start);
2814 flush_dcache_page(page);
2815 kunmap_atomic(kaddr, KM_USER0);
2816 }
2817 if ((em->block_start != EXTENT_MAP_HOLE &&
2818 em->block_start != EXTENT_MAP_INLINE) &&
2819 !isnew && !PageUptodate(page) &&
2820 (block_off_end > to || block_off_start < from) &&
2821 !test_range_bit(tree, block_start, cur_end,
2822 EXTENT_UPTODATE, 1, NULL)) {
2823 u64 sector;
2824 u64 extent_offset = block_start - em->start;
2825 size_t iosize;
2826 sector = (em->block_start + extent_offset) >> 9;
2827 iosize = (cur_end - block_start + blocksize) &
2828 ~((u64)blocksize - 1);
2829 /*
2830 * we've already got the extent locked, but we
2831 * need to split the state such that our end_bio
2832 * handler can clear the lock.
2833 */
2834 set_extent_bit(tree, block_start,
2835 block_start + iosize - 1,
2836 EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
2837 ret = submit_extent_page(READ, tree, page,
2838 sector, iosize, page_offset, em->bdev,
2839 NULL, 1,
2840 end_bio_extent_preparewrite, 0,
2841 0, 0);
2842 if (ret && !err)
2843 err = ret;
2844 iocount++;
2845 block_start = block_start + iosize;
2846 } else {
2847 struct extent_state *cached = NULL;
2848
2849 set_extent_uptodate(tree, block_start, cur_end, &cached,
2850 GFP_NOFS);
2851 unlock_extent_cached(tree, block_start, cur_end,
2852 &cached, GFP_NOFS);
2853 block_start = cur_end + 1;
2854 }
2855 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
2856 free_extent_map(em);
2857 }
2858 if (iocount) {
2859 wait_extent_bit(tree, orig_block_start,
2860 block_end, EXTENT_LOCKED);
2861 }
2862 check_page_uptodate(tree, page);
2863err:
2864 /* FIXME, zero out newly allocated blocks on error */
2865 return err;
2866}
2867
2868/*
2869 * a helper for releasepage, this tests for areas of the page that 2676 * a helper for releasepage, this tests for areas of the page that
2870 * are locked or under IO and drops the related state bits if it is safe 2677 * are locked or under IO and drops the related state bits if it is safe
2871 * to drop the page. 2678 * to drop the page.
@@ -2923,7 +2730,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2923 len = end - start + 1; 2730 len = end - start + 1;
2924 write_lock(&map->lock); 2731 write_lock(&map->lock);
2925 em = lookup_extent_mapping(map, start, len); 2732 em = lookup_extent_mapping(map, start, len);
2926 if (!em || IS_ERR(em)) { 2733 if (IS_ERR_OR_NULL(em)) {
2927 write_unlock(&map->lock); 2734 write_unlock(&map->lock);
2928 break; 2735 break;
2929 } 2736 }
@@ -2951,33 +2758,6 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2951 return try_release_extent_state(map, tree, page, mask); 2758 return try_release_extent_state(map, tree, page, mask);
2952} 2759}
2953 2760
2954sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
2955 get_extent_t *get_extent)
2956{
2957 struct inode *inode = mapping->host;
2958 struct extent_state *cached_state = NULL;
2959 u64 start = iblock << inode->i_blkbits;
2960 sector_t sector = 0;
2961 size_t blksize = (1 << inode->i_blkbits);
2962 struct extent_map *em;
2963
2964 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
2965 0, &cached_state, GFP_NOFS);
2966 em = get_extent(inode, NULL, 0, start, blksize, 0);
2967 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start,
2968 start + blksize - 1, &cached_state, GFP_NOFS);
2969 if (!em || IS_ERR(em))
2970 return 0;
2971
2972 if (em->block_start > EXTENT_MAP_LAST_BYTE)
2973 goto out;
2974
2975 sector = (em->block_start + start - em->start) >> inode->i_blkbits;
2976out:
2977 free_extent_map(em);
2978 return sector;
2979}
2980
2981/* 2761/*
2982 * helper function for fiemap, which doesn't want to see any holes. 2762 * helper function for fiemap, which doesn't want to see any holes.
2983 * This maps until we find something past 'last' 2763 * This maps until we find something past 'last'
@@ -3000,7 +2780,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
3000 break; 2780 break;
3001 len = (len + sectorsize - 1) & ~(sectorsize - 1); 2781 len = (len + sectorsize - 1) & ~(sectorsize - 1);
3002 em = get_extent(inode, NULL, 0, offset, len, 0); 2782 em = get_extent(inode, NULL, 0, offset, len, 0);
3003 if (!em || IS_ERR(em)) 2783 if (IS_ERR_OR_NULL(em))
3004 return em; 2784 return em;
3005 2785
3006 /* if this isn't a hole return it */ 2786 /* if this isn't a hole return it */
@@ -3054,7 +2834,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3054 * because there might be preallocation past i_size 2834 * because there might be preallocation past i_size
3055 */ 2835 */
3056 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2836 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
3057 path, inode->i_ino, -1, 0); 2837 path, btrfs_ino(inode), -1, 0);
3058 if (ret < 0) { 2838 if (ret < 0) {
3059 btrfs_free_path(path); 2839 btrfs_free_path(path);
3060 return ret; 2840 return ret;
@@ -3067,7 +2847,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3067 found_type = btrfs_key_type(&found_key); 2847 found_type = btrfs_key_type(&found_key);
3068 2848
3069 /* No extents, but there might be delalloc bits */ 2849 /* No extents, but there might be delalloc bits */
3070 if (found_key.objectid != inode->i_ino || 2850 if (found_key.objectid != btrfs_ino(inode) ||
3071 found_type != BTRFS_EXTENT_DATA_KEY) { 2851 found_type != BTRFS_EXTENT_DATA_KEY) {
3072 /* have to trust i_size as the end */ 2852 /* have to trust i_size as the end */
3073 last = (u64)-1; 2853 last = (u64)-1;
@@ -3290,8 +3070,7 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
3290 3070
3291struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 3071struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3292 u64 start, unsigned long len, 3072 u64 start, unsigned long len,
3293 struct page *page0, 3073 struct page *page0)
3294 gfp_t mask)
3295{ 3074{
3296 unsigned long num_pages = num_extent_pages(start, len); 3075 unsigned long num_pages = num_extent_pages(start, len);
3297 unsigned long i; 3076 unsigned long i;
@@ -3312,7 +3091,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3312 } 3091 }
3313 rcu_read_unlock(); 3092 rcu_read_unlock();
3314 3093
3315 eb = __alloc_extent_buffer(tree, start, len, mask); 3094 eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
3316 if (!eb) 3095 if (!eb)
3317 return NULL; 3096 return NULL;
3318 3097
@@ -3329,7 +3108,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3329 i = 0; 3108 i = 0;
3330 } 3109 }
3331 for (; i < num_pages; i++, index++) { 3110 for (; i < num_pages; i++, index++) {
3332 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); 3111 p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM);
3333 if (!p) { 3112 if (!p) {
3334 WARN_ON(1); 3113 WARN_ON(1);
3335 goto free_eb; 3114 goto free_eb;
@@ -3401,8 +3180,7 @@ free_eb:
3401} 3180}
3402 3181
3403struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 3182struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3404 u64 start, unsigned long len, 3183 u64 start, unsigned long len)
3405 gfp_t mask)
3406{ 3184{
3407 struct extent_buffer *eb; 3185 struct extent_buffer *eb;
3408 3186
@@ -3463,13 +3241,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3463 return 0; 3241 return 0;
3464} 3242}
3465 3243
3466int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
3467 struct extent_buffer *eb)
3468{
3469 return wait_on_extent_writeback(tree, eb->start,
3470 eb->start + eb->len - 1);
3471}
3472
3473int set_extent_buffer_dirty(struct extent_io_tree *tree, 3244int set_extent_buffer_dirty(struct extent_io_tree *tree,
3474 struct extent_buffer *eb) 3245 struct extent_buffer *eb)
3475{ 3246{
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index af2d7179c372..4e8445a4757c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -153,23 +153,14 @@ static inline int extent_compress_type(unsigned long bio_flags)
153 153
154struct extent_map_tree; 154struct extent_map_tree;
155 155
156static inline struct extent_state *extent_state_next(struct extent_state *state)
157{
158 struct rb_node *node;
159 node = rb_next(&state->rb_node);
160 if (!node)
161 return NULL;
162 return rb_entry(node, struct extent_state, rb_node);
163}
164
165typedef struct extent_map *(get_extent_t)(struct inode *inode, 156typedef struct extent_map *(get_extent_t)(struct inode *inode,
166 struct page *page, 157 struct page *page,
167 size_t page_offset, 158 size_t pg_offset,
168 u64 start, u64 len, 159 u64 start, u64 len,
169 int create); 160 int create);
170 161
171void extent_io_tree_init(struct extent_io_tree *tree, 162void extent_io_tree_init(struct extent_io_tree *tree,
172 struct address_space *mapping, gfp_t mask); 163 struct address_space *mapping);
173int try_release_extent_mapping(struct extent_map_tree *map, 164int try_release_extent_mapping(struct extent_map_tree *map,
174 struct extent_io_tree *tree, struct page *page, 165 struct extent_io_tree *tree, struct page *page,
175 gfp_t mask); 166 gfp_t mask);
@@ -215,14 +206,8 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
215 gfp_t mask); 206 gfp_t mask);
216int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 207int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
217 gfp_t mask); 208 gfp_t mask);
218int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
219 gfp_t mask);
220int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start,
221 u64 end, gfp_t mask);
222int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 209int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
223 struct extent_state **cached_state, gfp_t mask); 210 struct extent_state **cached_state, gfp_t mask);
224int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
225 gfp_t mask);
226int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 211int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
227 u64 *start_ret, u64 *end_ret, int bits); 212 u64 *start_ret, u64 *end_ret, int bits);
228struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 213struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
@@ -243,28 +228,17 @@ int extent_readpages(struct extent_io_tree *tree,
243 struct address_space *mapping, 228 struct address_space *mapping,
244 struct list_head *pages, unsigned nr_pages, 229 struct list_head *pages, unsigned nr_pages,
245 get_extent_t get_extent); 230 get_extent_t get_extent);
246int extent_prepare_write(struct extent_io_tree *tree,
247 struct inode *inode, struct page *page,
248 unsigned from, unsigned to, get_extent_t *get_extent);
249int extent_commit_write(struct extent_io_tree *tree,
250 struct inode *inode, struct page *page,
251 unsigned from, unsigned to);
252sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
253 get_extent_t *get_extent);
254int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 231int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
255 __u64 start, __u64 len, get_extent_t *get_extent); 232 __u64 start, __u64 len, get_extent_t *get_extent);
256int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end);
257int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); 233int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
258int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); 234int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
259void set_page_extent_mapped(struct page *page); 235void set_page_extent_mapped(struct page *page);
260 236
261struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 237struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
262 u64 start, unsigned long len, 238 u64 start, unsigned long len,
263 struct page *page0, 239 struct page *page0);
264 gfp_t mask);
265struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 240struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
266 u64 start, unsigned long len, 241 u64 start, unsigned long len);
267 gfp_t mask);
268void free_extent_buffer(struct extent_buffer *eb); 242void free_extent_buffer(struct extent_buffer *eb);
269int read_extent_buffer_pages(struct extent_io_tree *tree, 243int read_extent_buffer_pages(struct extent_io_tree *tree,
270 struct extent_buffer *eb, u64 start, int wait, 244 struct extent_buffer *eb, u64 start, int wait,
@@ -292,16 +266,11 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
292 unsigned long src_offset, unsigned long len); 266 unsigned long src_offset, unsigned long len);
293void memset_extent_buffer(struct extent_buffer *eb, char c, 267void memset_extent_buffer(struct extent_buffer *eb, char c,
294 unsigned long start, unsigned long len); 268 unsigned long start, unsigned long len);
295int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
296 struct extent_buffer *eb);
297int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end);
298int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); 269int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
299int clear_extent_buffer_dirty(struct extent_io_tree *tree, 270int clear_extent_buffer_dirty(struct extent_io_tree *tree,
300 struct extent_buffer *eb); 271 struct extent_buffer *eb);
301int set_extent_buffer_dirty(struct extent_io_tree *tree, 272int set_extent_buffer_dirty(struct extent_io_tree *tree,
302 struct extent_buffer *eb); 273 struct extent_buffer *eb);
303int test_extent_buffer_dirty(struct extent_io_tree *tree,
304 struct extent_buffer *eb);
305int set_extent_buffer_uptodate(struct extent_io_tree *tree, 274int set_extent_buffer_uptodate(struct extent_io_tree *tree,
306 struct extent_buffer *eb); 275 struct extent_buffer *eb);
307int clear_extent_buffer_uptodate(struct extent_io_tree *tree, 276int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
@@ -319,7 +288,6 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
319 unsigned long *map_start, 288 unsigned long *map_start,
320 unsigned long *map_len, int km); 289 unsigned long *map_len, int km);
321void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); 290void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
322int release_extent_buffer_tail_pages(struct extent_buffer *eb);
323int extent_range_uptodate(struct extent_io_tree *tree, 291int extent_range_uptodate(struct extent_io_tree *tree,
324 u64 start, u64 end); 292 u64 start, u64 end);
325int extent_clear_unlock_delalloc(struct inode *inode, 293int extent_clear_unlock_delalloc(struct inode *inode,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index a24a3f2fa13e..2d0410344ea3 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -28,12 +28,11 @@ void extent_map_exit(void)
28/** 28/**
29 * extent_map_tree_init - initialize extent map tree 29 * extent_map_tree_init - initialize extent map tree
30 * @tree: tree to initialize 30 * @tree: tree to initialize
31 * @mask: flags for memory allocations during tree operations
32 * 31 *
33 * Initialize the extent tree @tree. Should be called for each new inode 32 * Initialize the extent tree @tree. Should be called for each new inode
34 * or other user of the extent_map interface. 33 * or other user of the extent_map interface.
35 */ 34 */
36void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) 35void extent_map_tree_init(struct extent_map_tree *tree)
37{ 36{
38 tree->map = RB_ROOT; 37 tree->map = RB_ROOT;
39 rwlock_init(&tree->lock); 38 rwlock_init(&tree->lock);
@@ -41,16 +40,15 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
41 40
42/** 41/**
43 * alloc_extent_map - allocate new extent map structure 42 * alloc_extent_map - allocate new extent map structure
44 * @mask: memory allocation flags
45 * 43 *
46 * Allocate a new extent_map structure. The new structure is 44 * Allocate a new extent_map structure. The new structure is
47 * returned with a reference count of one and needs to be 45 * returned with a reference count of one and needs to be
48 * freed using free_extent_map() 46 * freed using free_extent_map()
49 */ 47 */
50struct extent_map *alloc_extent_map(gfp_t mask) 48struct extent_map *alloc_extent_map(void)
51{ 49{
52 struct extent_map *em; 50 struct extent_map *em;
53 em = kmem_cache_alloc(extent_map_cache, mask); 51 em = kmem_cache_alloc(extent_map_cache, GFP_NOFS);
54 if (!em) 52 if (!em)
55 return NULL; 53 return NULL;
56 em->in_tree = 0; 54 em->in_tree = 0;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 28b44dbd1e35..33a7890b1f40 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -49,14 +49,14 @@ static inline u64 extent_map_block_end(struct extent_map *em)
49 return em->block_start + em->block_len; 49 return em->block_start + em->block_len;
50} 50}
51 51
52void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); 52void extent_map_tree_init(struct extent_map_tree *tree);
53struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, 53struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
54 u64 start, u64 len); 54 u64 start, u64 len);
55int add_extent_mapping(struct extent_map_tree *tree, 55int add_extent_mapping(struct extent_map_tree *tree,
56 struct extent_map *em); 56 struct extent_map *em);
57int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); 57int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
58 58
59struct extent_map *alloc_extent_map(gfp_t mask); 59struct extent_map *alloc_extent_map(void);
60void free_extent_map(struct extent_map *em); 60void free_extent_map(struct extent_map *em);
61int __init extent_map_init(void); 61int __init extent_map_init(void);
62void extent_map_exit(void); 62void extent_map_exit(void);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index fb9b02667e75..90d4ee52cd45 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -193,7 +193,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
193 u32 item_size; 193 u32 item_size;
194 194
195 if (item) 195 if (item)
196 btrfs_release_path(root, path); 196 btrfs_release_path(path);
197 item = btrfs_lookup_csum(NULL, root->fs_info->csum_root, 197 item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
198 path, disk_bytenr, 0); 198 path, disk_bytenr, 0);
199 if (IS_ERR(item)) { 199 if (IS_ERR(item)) {
@@ -208,12 +208,13 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
208 EXTENT_NODATASUM, GFP_NOFS); 208 EXTENT_NODATASUM, GFP_NOFS);
209 } else { 209 } else {
210 printk(KERN_INFO "btrfs no csum found " 210 printk(KERN_INFO "btrfs no csum found "
211 "for inode %lu start %llu\n", 211 "for inode %llu start %llu\n",
212 inode->i_ino, 212 (unsigned long long)
213 btrfs_ino(inode),
213 (unsigned long long)offset); 214 (unsigned long long)offset);
214 } 215 }
215 item = NULL; 216 item = NULL;
216 btrfs_release_path(root, path); 217 btrfs_release_path(path);
217 goto found; 218 goto found;
218 } 219 }
219 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 220 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
@@ -266,7 +267,7 @@ int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
266} 267}
267 268
268int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 269int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
269 struct list_head *list) 270 struct list_head *list, int search_commit)
270{ 271{
271 struct btrfs_key key; 272 struct btrfs_key key;
272 struct btrfs_path *path; 273 struct btrfs_path *path;
@@ -283,6 +284,12 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
283 path = btrfs_alloc_path(); 284 path = btrfs_alloc_path();
284 BUG_ON(!path); 285 BUG_ON(!path);
285 286
287 if (search_commit) {
288 path->skip_locking = 1;
289 path->reada = 2;
290 path->search_commit_root = 1;
291 }
292
286 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 293 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
287 key.offset = start; 294 key.offset = start;
288 key.type = BTRFS_EXTENT_CSUM_KEY; 295 key.type = BTRFS_EXTENT_CSUM_KEY;
@@ -630,7 +637,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
630 if (key.offset < bytenr) 637 if (key.offset < bytenr)
631 break; 638 break;
632 } 639 }
633 btrfs_release_path(root, path); 640 btrfs_release_path(path);
634 } 641 }
635 ret = 0; 642 ret = 0;
636out: 643out:
@@ -722,7 +729,7 @@ again:
722 * at this point, we know the tree has an item, but it isn't big 729 * at this point, we know the tree has an item, but it isn't big
723 * enough yet to put our csum in. Grow it 730 * enough yet to put our csum in. Grow it
724 */ 731 */
725 btrfs_release_path(root, path); 732 btrfs_release_path(path);
726 ret = btrfs_search_slot(trans, root, &file_key, path, 733 ret = btrfs_search_slot(trans, root, &file_key, path,
727 csum_size, 1); 734 csum_size, 1);
728 if (ret < 0) 735 if (ret < 0)
@@ -765,7 +772,7 @@ again:
765 } 772 }
766 773
767insert: 774insert:
768 btrfs_release_path(root, path); 775 btrfs_release_path(path);
769 csum_offset = 0; 776 csum_offset = 0;
770 if (found_next) { 777 if (found_next) {
771 u64 tmp = total_bytes + root->sectorsize; 778 u64 tmp = total_bytes + root->sectorsize;
@@ -849,7 +856,7 @@ next_sector:
849 } 856 }
850 btrfs_mark_buffer_dirty(path->nodes[0]); 857 btrfs_mark_buffer_dirty(path->nodes[0]);
851 if (total_bytes < sums->len) { 858 if (total_bytes < sums->len) {
852 btrfs_release_path(root, path); 859 btrfs_release_path(path);
853 cond_resched(); 860 cond_resched();
854 goto again; 861 goto again;
855 } 862 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 75899a01dded..58ddc4442159 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -191,9 +191,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
191 } 191 }
192 while (1) { 192 while (1) {
193 if (!split) 193 if (!split)
194 split = alloc_extent_map(GFP_NOFS); 194 split = alloc_extent_map();
195 if (!split2) 195 if (!split2)
196 split2 = alloc_extent_map(GFP_NOFS); 196 split2 = alloc_extent_map();
197 BUG_ON(!split || !split2); 197 BUG_ON(!split || !split2);
198 198
199 write_lock(&em_tree->lock); 199 write_lock(&em_tree->lock);
@@ -298,6 +298,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
298 struct btrfs_path *path; 298 struct btrfs_path *path;
299 struct btrfs_key key; 299 struct btrfs_key key;
300 struct btrfs_key new_key; 300 struct btrfs_key new_key;
301 u64 ino = btrfs_ino(inode);
301 u64 search_start = start; 302 u64 search_start = start;
302 u64 disk_bytenr = 0; 303 u64 disk_bytenr = 0;
303 u64 num_bytes = 0; 304 u64 num_bytes = 0;
@@ -318,14 +319,14 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
318 319
319 while (1) { 320 while (1) {
320 recow = 0; 321 recow = 0;
321 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 322 ret = btrfs_lookup_file_extent(trans, root, path, ino,
322 search_start, -1); 323 search_start, -1);
323 if (ret < 0) 324 if (ret < 0)
324 break; 325 break;
325 if (ret > 0 && path->slots[0] > 0 && search_start == start) { 326 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
326 leaf = path->nodes[0]; 327 leaf = path->nodes[0];
327 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); 328 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
328 if (key.objectid == inode->i_ino && 329 if (key.objectid == ino &&
329 key.type == BTRFS_EXTENT_DATA_KEY) 330 key.type == BTRFS_EXTENT_DATA_KEY)
330 path->slots[0]--; 331 path->slots[0]--;
331 } 332 }
@@ -346,7 +347,7 @@ next_slot:
346 } 347 }
347 348
348 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 349 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
349 if (key.objectid > inode->i_ino || 350 if (key.objectid > ino ||
350 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) 351 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
351 break; 352 break;
352 353
@@ -376,7 +377,7 @@ next_slot:
376 377
377 search_start = max(key.offset, start); 378 search_start = max(key.offset, start);
378 if (recow) { 379 if (recow) {
379 btrfs_release_path(root, path); 380 btrfs_release_path(path);
380 continue; 381 continue;
381 } 382 }
382 383
@@ -393,7 +394,7 @@ next_slot:
393 ret = btrfs_duplicate_item(trans, root, path, 394 ret = btrfs_duplicate_item(trans, root, path,
394 &new_key); 395 &new_key);
395 if (ret == -EAGAIN) { 396 if (ret == -EAGAIN) {
396 btrfs_release_path(root, path); 397 btrfs_release_path(path);
397 continue; 398 continue;
398 } 399 }
399 if (ret < 0) 400 if (ret < 0)
@@ -516,7 +517,7 @@ next_slot:
516 del_nr = 0; 517 del_nr = 0;
517 del_slot = 0; 518 del_slot = 0;
518 519
519 btrfs_release_path(root, path); 520 btrfs_release_path(path);
520 continue; 521 continue;
521 } 522 }
522 523
@@ -592,6 +593,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
592 int del_slot = 0; 593 int del_slot = 0;
593 int recow; 594 int recow;
594 int ret; 595 int ret;
596 u64 ino = btrfs_ino(inode);
595 597
596 btrfs_drop_extent_cache(inode, start, end - 1, 0); 598 btrfs_drop_extent_cache(inode, start, end - 1, 0);
597 599
@@ -600,7 +602,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
600again: 602again:
601 recow = 0; 603 recow = 0;
602 split = start; 604 split = start;
603 key.objectid = inode->i_ino; 605 key.objectid = ino;
604 key.type = BTRFS_EXTENT_DATA_KEY; 606 key.type = BTRFS_EXTENT_DATA_KEY;
605 key.offset = split; 607 key.offset = split;
606 608
@@ -612,8 +614,7 @@ again:
612 614
613 leaf = path->nodes[0]; 615 leaf = path->nodes[0];
614 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 616 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
615 BUG_ON(key.objectid != inode->i_ino || 617 BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY);
616 key.type != BTRFS_EXTENT_DATA_KEY);
617 fi = btrfs_item_ptr(leaf, path->slots[0], 618 fi = btrfs_item_ptr(leaf, path->slots[0],
618 struct btrfs_file_extent_item); 619 struct btrfs_file_extent_item);
619 BUG_ON(btrfs_file_extent_type(leaf, fi) != 620 BUG_ON(btrfs_file_extent_type(leaf, fi) !=
@@ -630,7 +631,7 @@ again:
630 other_start = 0; 631 other_start = 0;
631 other_end = start; 632 other_end = start;
632 if (extent_mergeable(leaf, path->slots[0] - 1, 633 if (extent_mergeable(leaf, path->slots[0] - 1,
633 inode->i_ino, bytenr, orig_offset, 634 ino, bytenr, orig_offset,
634 &other_start, &other_end)) { 635 &other_start, &other_end)) {
635 new_key.offset = end; 636 new_key.offset = end;
636 btrfs_set_item_key_safe(trans, root, path, &new_key); 637 btrfs_set_item_key_safe(trans, root, path, &new_key);
@@ -653,7 +654,7 @@ again:
653 other_start = end; 654 other_start = end;
654 other_end = 0; 655 other_end = 0;
655 if (extent_mergeable(leaf, path->slots[0] + 1, 656 if (extent_mergeable(leaf, path->slots[0] + 1,
656 inode->i_ino, bytenr, orig_offset, 657 ino, bytenr, orig_offset,
657 &other_start, &other_end)) { 658 &other_start, &other_end)) {
658 fi = btrfs_item_ptr(leaf, path->slots[0], 659 fi = btrfs_item_ptr(leaf, path->slots[0],
659 struct btrfs_file_extent_item); 660 struct btrfs_file_extent_item);
@@ -681,7 +682,7 @@ again:
681 new_key.offset = split; 682 new_key.offset = split;
682 ret = btrfs_duplicate_item(trans, root, path, &new_key); 683 ret = btrfs_duplicate_item(trans, root, path, &new_key);
683 if (ret == -EAGAIN) { 684 if (ret == -EAGAIN) {
684 btrfs_release_path(root, path); 685 btrfs_release_path(path);
685 goto again; 686 goto again;
686 } 687 }
687 BUG_ON(ret < 0); 688 BUG_ON(ret < 0);
@@ -702,7 +703,7 @@ again:
702 703
703 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 704 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
704 root->root_key.objectid, 705 root->root_key.objectid,
705 inode->i_ino, orig_offset); 706 ino, orig_offset);
706 BUG_ON(ret); 707 BUG_ON(ret);
707 708
708 if (split == start) { 709 if (split == start) {
@@ -718,10 +719,10 @@ again:
718 other_start = end; 719 other_start = end;
719 other_end = 0; 720 other_end = 0;
720 if (extent_mergeable(leaf, path->slots[0] + 1, 721 if (extent_mergeable(leaf, path->slots[0] + 1,
721 inode->i_ino, bytenr, orig_offset, 722 ino, bytenr, orig_offset,
722 &other_start, &other_end)) { 723 &other_start, &other_end)) {
723 if (recow) { 724 if (recow) {
724 btrfs_release_path(root, path); 725 btrfs_release_path(path);
725 goto again; 726 goto again;
726 } 727 }
727 extent_end = other_end; 728 extent_end = other_end;
@@ -729,16 +730,16 @@ again:
729 del_nr++; 730 del_nr++;
730 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 731 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
731 0, root->root_key.objectid, 732 0, root->root_key.objectid,
732 inode->i_ino, orig_offset); 733 ino, orig_offset);
733 BUG_ON(ret); 734 BUG_ON(ret);
734 } 735 }
735 other_start = 0; 736 other_start = 0;
736 other_end = start; 737 other_end = start;
737 if (extent_mergeable(leaf, path->slots[0] - 1, 738 if (extent_mergeable(leaf, path->slots[0] - 1,
738 inode->i_ino, bytenr, orig_offset, 739 ino, bytenr, orig_offset,
739 &other_start, &other_end)) { 740 &other_start, &other_end)) {
740 if (recow) { 741 if (recow) {
741 btrfs_release_path(root, path); 742 btrfs_release_path(path);
742 goto again; 743 goto again;
743 } 744 }
744 key.offset = other_start; 745 key.offset = other_start;
@@ -746,7 +747,7 @@ again:
746 del_nr++; 747 del_nr++;
747 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 748 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
748 0, root->root_key.objectid, 749 0, root->root_key.objectid,
749 inode->i_ino, orig_offset); 750 ino, orig_offset);
750 BUG_ON(ret); 751 BUG_ON(ret);
751 } 752 }
752 if (del_nr == 0) { 753 if (del_nr == 0) {
@@ -1375,7 +1376,7 @@ static long btrfs_fallocate(struct file *file, int mode,
1375 while (1) { 1376 while (1) {
1376 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 1377 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
1377 alloc_end - cur_offset, 0); 1378 alloc_end - cur_offset, 0);
1378 BUG_ON(IS_ERR(em) || !em); 1379 BUG_ON(IS_ERR_OR_NULL(em));
1379 last_byte = min(extent_map_end(em), alloc_end); 1380 last_byte = min(extent_map_end(em), alloc_end);
1380 last_byte = (last_byte + mask) & ~mask; 1381 last_byte = (last_byte + mask) & ~mask;
1381 if (em->block_start == EXTENT_MAP_HOLE || 1382 if (em->block_start == EXTENT_MAP_HOLE ||
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 63731a1fb0a1..70d45795d758 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -25,18 +25,17 @@
25#include "transaction.h" 25#include "transaction.h"
26#include "disk-io.h" 26#include "disk-io.h"
27#include "extent_io.h" 27#include "extent_io.h"
28#include "inode-map.h"
28 29
29#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 30#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
30#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 31#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
31 32
32static void recalculate_thresholds(struct btrfs_block_group_cache 33static int link_free_space(struct btrfs_free_space_ctl *ctl,
33 *block_group);
34static int link_free_space(struct btrfs_block_group_cache *block_group,
35 struct btrfs_free_space *info); 34 struct btrfs_free_space *info);
36 35
37struct inode *lookup_free_space_inode(struct btrfs_root *root, 36static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 struct btrfs_block_group_cache 37 struct btrfs_path *path,
39 *block_group, struct btrfs_path *path) 38 u64 offset)
40{ 39{
41 struct btrfs_key key; 40 struct btrfs_key key;
42 struct btrfs_key location; 41 struct btrfs_key location;
@@ -46,22 +45,15 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
46 struct inode *inode = NULL; 45 struct inode *inode = NULL;
47 int ret; 46 int ret;
48 47
49 spin_lock(&block_group->lock);
50 if (block_group->inode)
51 inode = igrab(block_group->inode);
52 spin_unlock(&block_group->lock);
53 if (inode)
54 return inode;
55
56 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 48 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
57 key.offset = block_group->key.objectid; 49 key.offset = offset;
58 key.type = 0; 50 key.type = 0;
59 51
60 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 52 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
61 if (ret < 0) 53 if (ret < 0)
62 return ERR_PTR(ret); 54 return ERR_PTR(ret);
63 if (ret > 0) { 55 if (ret > 0) {
64 btrfs_release_path(root, path); 56 btrfs_release_path(path);
65 return ERR_PTR(-ENOENT); 57 return ERR_PTR(-ENOENT);
66 } 58 }
67 59
@@ -70,7 +62,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
70 struct btrfs_free_space_header); 62 struct btrfs_free_space_header);
71 btrfs_free_space_key(leaf, header, &disk_key); 63 btrfs_free_space_key(leaf, header, &disk_key);
72 btrfs_disk_key_to_cpu(&location, &disk_key); 64 btrfs_disk_key_to_cpu(&location, &disk_key);
73 btrfs_release_path(root, path); 65 btrfs_release_path(path);
74 66
75 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); 67 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
76 if (!inode) 68 if (!inode)
@@ -84,6 +76,27 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
84 76
85 inode->i_mapping->flags &= ~__GFP_FS; 77 inode->i_mapping->flags &= ~__GFP_FS;
86 78
79 return inode;
80}
81
82struct inode *lookup_free_space_inode(struct btrfs_root *root,
83 struct btrfs_block_group_cache
84 *block_group, struct btrfs_path *path)
85{
86 struct inode *inode = NULL;
87
88 spin_lock(&block_group->lock);
89 if (block_group->inode)
90 inode = igrab(block_group->inode);
91 spin_unlock(&block_group->lock);
92 if (inode)
93 return inode;
94
95 inode = __lookup_free_space_inode(root, path,
96 block_group->key.objectid);
97 if (IS_ERR(inode))
98 return inode;
99
87 spin_lock(&block_group->lock); 100 spin_lock(&block_group->lock);
88 if (!root->fs_info->closing) { 101 if (!root->fs_info->closing) {
89 block_group->inode = igrab(inode); 102 block_group->inode = igrab(inode);
@@ -94,24 +107,18 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
94 return inode; 107 return inode;
95} 108}
96 109
97int create_free_space_inode(struct btrfs_root *root, 110int __create_free_space_inode(struct btrfs_root *root,
98 struct btrfs_trans_handle *trans, 111 struct btrfs_trans_handle *trans,
99 struct btrfs_block_group_cache *block_group, 112 struct btrfs_path *path, u64 ino, u64 offset)
100 struct btrfs_path *path)
101{ 113{
102 struct btrfs_key key; 114 struct btrfs_key key;
103 struct btrfs_disk_key disk_key; 115 struct btrfs_disk_key disk_key;
104 struct btrfs_free_space_header *header; 116 struct btrfs_free_space_header *header;
105 struct btrfs_inode_item *inode_item; 117 struct btrfs_inode_item *inode_item;
106 struct extent_buffer *leaf; 118 struct extent_buffer *leaf;
107 u64 objectid;
108 int ret; 119 int ret;
109 120
110 ret = btrfs_find_free_objectid(trans, root, 0, &objectid); 121 ret = btrfs_insert_empty_inode(trans, root, path, ino);
111 if (ret < 0)
112 return ret;
113
114 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
115 if (ret) 122 if (ret)
116 return ret; 123 return ret;
117 124
@@ -131,19 +138,18 @@ int create_free_space_inode(struct btrfs_root *root,
131 BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); 138 BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
132 btrfs_set_inode_nlink(leaf, inode_item, 1); 139 btrfs_set_inode_nlink(leaf, inode_item, 1);
133 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 140 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
134 btrfs_set_inode_block_group(leaf, inode_item, 141 btrfs_set_inode_block_group(leaf, inode_item, offset);
135 block_group->key.objectid);
136 btrfs_mark_buffer_dirty(leaf); 142 btrfs_mark_buffer_dirty(leaf);
137 btrfs_release_path(root, path); 143 btrfs_release_path(path);
138 144
139 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 145 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
140 key.offset = block_group->key.objectid; 146 key.offset = offset;
141 key.type = 0; 147 key.type = 0;
142 148
143 ret = btrfs_insert_empty_item(trans, root, path, &key, 149 ret = btrfs_insert_empty_item(trans, root, path, &key,
144 sizeof(struct btrfs_free_space_header)); 150 sizeof(struct btrfs_free_space_header));
145 if (ret < 0) { 151 if (ret < 0) {
146 btrfs_release_path(root, path); 152 btrfs_release_path(path);
147 return ret; 153 return ret;
148 } 154 }
149 leaf = path->nodes[0]; 155 leaf = path->nodes[0];
@@ -152,11 +158,27 @@ int create_free_space_inode(struct btrfs_root *root,
152 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); 158 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
153 btrfs_set_free_space_key(leaf, header, &disk_key); 159 btrfs_set_free_space_key(leaf, header, &disk_key);
154 btrfs_mark_buffer_dirty(leaf); 160 btrfs_mark_buffer_dirty(leaf);
155 btrfs_release_path(root, path); 161 btrfs_release_path(path);
156 162
157 return 0; 163 return 0;
158} 164}
159 165
166int create_free_space_inode(struct btrfs_root *root,
167 struct btrfs_trans_handle *trans,
168 struct btrfs_block_group_cache *block_group,
169 struct btrfs_path *path)
170{
171 int ret;
172 u64 ino;
173
174 ret = btrfs_find_free_objectid(root, &ino);
175 if (ret < 0)
176 return ret;
177
178 return __create_free_space_inode(root, trans, path, ino,
179 block_group->key.objectid);
180}
181
160int btrfs_truncate_free_space_cache(struct btrfs_root *root, 182int btrfs_truncate_free_space_cache(struct btrfs_root *root,
161 struct btrfs_trans_handle *trans, 183 struct btrfs_trans_handle *trans,
162 struct btrfs_path *path, 184 struct btrfs_path *path,
@@ -187,7 +209,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
187 return ret; 209 return ret;
188 } 210 }
189 211
190 return btrfs_update_inode(trans, root, inode); 212 ret = btrfs_update_inode(trans, root, inode);
213 return ret;
191} 214}
192 215
193static int readahead_cache(struct inode *inode) 216static int readahead_cache(struct inode *inode)
@@ -209,15 +232,13 @@ static int readahead_cache(struct inode *inode)
209 return 0; 232 return 0;
210} 233}
211 234
212int load_free_space_cache(struct btrfs_fs_info *fs_info, 235int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
213 struct btrfs_block_group_cache *block_group) 236 struct btrfs_free_space_ctl *ctl,
237 struct btrfs_path *path, u64 offset)
214{ 238{
215 struct btrfs_root *root = fs_info->tree_root;
216 struct inode *inode;
217 struct btrfs_free_space_header *header; 239 struct btrfs_free_space_header *header;
218 struct extent_buffer *leaf; 240 struct extent_buffer *leaf;
219 struct page *page; 241 struct page *page;
220 struct btrfs_path *path;
221 u32 *checksums = NULL, *crc; 242 u32 *checksums = NULL, *crc;
222 char *disk_crcs = NULL; 243 char *disk_crcs = NULL;
223 struct btrfs_key key; 244 struct btrfs_key key;
@@ -225,76 +246,47 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
225 u64 num_entries; 246 u64 num_entries;
226 u64 num_bitmaps; 247 u64 num_bitmaps;
227 u64 generation; 248 u64 generation;
228 u64 used = btrfs_block_group_used(&block_group->item);
229 u32 cur_crc = ~(u32)0; 249 u32 cur_crc = ~(u32)0;
230 pgoff_t index = 0; 250 pgoff_t index = 0;
231 unsigned long first_page_offset; 251 unsigned long first_page_offset;
232 int num_checksums; 252 int num_checksums;
233 int ret = 0; 253 int ret = 0, ret2;
234
235 /*
236 * If we're unmounting then just return, since this does a search on the
237 * normal root and not the commit root and we could deadlock.
238 */
239 smp_mb();
240 if (fs_info->closing)
241 return 0;
242
243 /*
244 * If this block group has been marked to be cleared for one reason or
245 * another then we can't trust the on disk cache, so just return.
246 */
247 spin_lock(&block_group->lock);
248 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
249 spin_unlock(&block_group->lock);
250 return 0;
251 }
252 spin_unlock(&block_group->lock);
253 254
254 INIT_LIST_HEAD(&bitmaps); 255 INIT_LIST_HEAD(&bitmaps);
255 256
256 path = btrfs_alloc_path();
257 if (!path)
258 return 0;
259
260 inode = lookup_free_space_inode(root, block_group, path);
261 if (IS_ERR(inode)) {
262 btrfs_free_path(path);
263 return 0;
264 }
265
266 /* Nothing in the space cache, goodbye */ 257 /* Nothing in the space cache, goodbye */
267 if (!i_size_read(inode)) { 258 if (!i_size_read(inode))
268 btrfs_free_path(path);
269 goto out; 259 goto out;
270 }
271 260
272 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 261 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
273 key.offset = block_group->key.objectid; 262 key.offset = offset;
274 key.type = 0; 263 key.type = 0;
275 264
276 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 265 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
277 if (ret) { 266 if (ret < 0)
278 btrfs_free_path(path); 267 goto out;
268 else if (ret > 0) {
269 btrfs_release_path(path);
270 ret = 0;
279 goto out; 271 goto out;
280 } 272 }
281 273
274 ret = -1;
275
282 leaf = path->nodes[0]; 276 leaf = path->nodes[0];
283 header = btrfs_item_ptr(leaf, path->slots[0], 277 header = btrfs_item_ptr(leaf, path->slots[0],
284 struct btrfs_free_space_header); 278 struct btrfs_free_space_header);
285 num_entries = btrfs_free_space_entries(leaf, header); 279 num_entries = btrfs_free_space_entries(leaf, header);
286 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 280 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
287 generation = btrfs_free_space_generation(leaf, header); 281 generation = btrfs_free_space_generation(leaf, header);
288 btrfs_free_path(path); 282 btrfs_release_path(path);
289 283
290 if (BTRFS_I(inode)->generation != generation) { 284 if (BTRFS_I(inode)->generation != generation) {
291 printk(KERN_ERR "btrfs: free space inode generation (%llu) did" 285 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
292 " not match free space cache generation (%llu) for " 286 " not match free space cache generation (%llu)\n",
293 "block group %llu\n",
294 (unsigned long long)BTRFS_I(inode)->generation, 287 (unsigned long long)BTRFS_I(inode)->generation,
295 (unsigned long long)generation, 288 (unsigned long long)generation);
296 (unsigned long long)block_group->key.objectid); 289 goto out;
297 goto free_cache;
298 } 290 }
299 291
300 if (!num_entries) 292 if (!num_entries)
@@ -311,10 +303,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
311 goto out; 303 goto out;
312 304
313 ret = readahead_cache(inode); 305 ret = readahead_cache(inode);
314 if (ret) { 306 if (ret)
315 ret = 0;
316 goto out; 307 goto out;
317 }
318 308
319 while (1) { 309 while (1) {
320 struct btrfs_free_space_entry *entry; 310 struct btrfs_free_space_entry *entry;
@@ -333,10 +323,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
333 } 323 }
334 324
335 page = grab_cache_page(inode->i_mapping, index); 325 page = grab_cache_page(inode->i_mapping, index);
336 if (!page) { 326 if (!page)
337 ret = 0;
338 goto free_cache; 327 goto free_cache;
339 }
340 328
341 if (!PageUptodate(page)) { 329 if (!PageUptodate(page)) {
342 btrfs_readpage(NULL, page); 330 btrfs_readpage(NULL, page);
@@ -345,9 +333,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
345 unlock_page(page); 333 unlock_page(page);
346 page_cache_release(page); 334 page_cache_release(page);
347 printk(KERN_ERR "btrfs: error reading free " 335 printk(KERN_ERR "btrfs: error reading free "
348 "space cache: %llu\n", 336 "space cache\n");
349 (unsigned long long)
350 block_group->key.objectid);
351 goto free_cache; 337 goto free_cache;
352 } 338 }
353 } 339 }
@@ -360,13 +346,10 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
360 gen = addr + (sizeof(u32) * num_checksums); 346 gen = addr + (sizeof(u32) * num_checksums);
361 if (*gen != BTRFS_I(inode)->generation) { 347 if (*gen != BTRFS_I(inode)->generation) {
362 printk(KERN_ERR "btrfs: space cache generation" 348 printk(KERN_ERR "btrfs: space cache generation"
363 " (%llu) does not match inode (%llu) " 349 " (%llu) does not match inode (%llu)\n",
364 "for block group %llu\n",
365 (unsigned long long)*gen, 350 (unsigned long long)*gen,
366 (unsigned long long) 351 (unsigned long long)
367 BTRFS_I(inode)->generation, 352 BTRFS_I(inode)->generation);
368 (unsigned long long)
369 block_group->key.objectid);
370 kunmap(page); 353 kunmap(page);
371 unlock_page(page); 354 unlock_page(page);
372 page_cache_release(page); 355 page_cache_release(page);
@@ -382,9 +365,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
382 PAGE_CACHE_SIZE - start_offset); 365 PAGE_CACHE_SIZE - start_offset);
383 btrfs_csum_final(cur_crc, (char *)&cur_crc); 366 btrfs_csum_final(cur_crc, (char *)&cur_crc);
384 if (cur_crc != *crc) { 367 if (cur_crc != *crc) {
385 printk(KERN_ERR "btrfs: crc mismatch for page %lu in " 368 printk(KERN_ERR "btrfs: crc mismatch for page %lu\n",
386 "block group %llu\n", index, 369 index);
387 (unsigned long long)block_group->key.objectid);
388 kunmap(page); 370 kunmap(page);
389 unlock_page(page); 371 unlock_page(page);
390 page_cache_release(page); 372 page_cache_release(page);
@@ -417,9 +399,9 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
417 } 399 }
418 400
419 if (entry->type == BTRFS_FREE_SPACE_EXTENT) { 401 if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
420 spin_lock(&block_group->tree_lock); 402 spin_lock(&ctl->tree_lock);
421 ret = link_free_space(block_group, e); 403 ret = link_free_space(ctl, e);
422 spin_unlock(&block_group->tree_lock); 404 spin_unlock(&ctl->tree_lock);
423 BUG_ON(ret); 405 BUG_ON(ret);
424 } else { 406 } else {
425 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 407 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
@@ -431,11 +413,11 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
431 page_cache_release(page); 413 page_cache_release(page);
432 goto free_cache; 414 goto free_cache;
433 } 415 }
434 spin_lock(&block_group->tree_lock); 416 spin_lock(&ctl->tree_lock);
435 ret = link_free_space(block_group, e); 417 ret2 = link_free_space(ctl, e);
436 block_group->total_bitmaps++; 418 ctl->total_bitmaps++;
437 recalculate_thresholds(block_group); 419 ctl->op->recalc_thresholds(ctl);
438 spin_unlock(&block_group->tree_lock); 420 spin_unlock(&ctl->tree_lock);
439 list_add_tail(&e->list, &bitmaps); 421 list_add_tail(&e->list, &bitmaps);
440 } 422 }
441 423
@@ -471,41 +453,97 @@ next:
471 index++; 453 index++;
472 } 454 }
473 455
474 spin_lock(&block_group->tree_lock);
475 if (block_group->free_space != (block_group->key.offset - used -
476 block_group->bytes_super)) {
477 spin_unlock(&block_group->tree_lock);
478 printk(KERN_ERR "block group %llu has an wrong amount of free "
479 "space\n", block_group->key.objectid);
480 ret = 0;
481 goto free_cache;
482 }
483 spin_unlock(&block_group->tree_lock);
484
485 ret = 1; 456 ret = 1;
486out: 457out:
487 kfree(checksums); 458 kfree(checksums);
488 kfree(disk_crcs); 459 kfree(disk_crcs);
489 iput(inode);
490 return ret; 460 return ret;
491
492free_cache: 461free_cache:
493 /* This cache is bogus, make sure it gets cleared */ 462 __btrfs_remove_free_space_cache(ctl);
463 goto out;
464}
465
466int load_free_space_cache(struct btrfs_fs_info *fs_info,
467 struct btrfs_block_group_cache *block_group)
468{
469 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
470 struct btrfs_root *root = fs_info->tree_root;
471 struct inode *inode;
472 struct btrfs_path *path;
473 int ret;
474 bool matched;
475 u64 used = btrfs_block_group_used(&block_group->item);
476
477 /*
478 * If we're unmounting then just return, since this does a search on the
479 * normal root and not the commit root and we could deadlock.
480 */
481 smp_mb();
482 if (fs_info->closing)
483 return 0;
484
485 /*
486 * If this block group has been marked to be cleared for one reason or
487 * another then we can't trust the on disk cache, so just return.
488 */
494 spin_lock(&block_group->lock); 489 spin_lock(&block_group->lock);
495 block_group->disk_cache_state = BTRFS_DC_CLEAR; 490 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
491 spin_unlock(&block_group->lock);
492 return 0;
493 }
496 spin_unlock(&block_group->lock); 494 spin_unlock(&block_group->lock);
497 btrfs_remove_free_space_cache(block_group); 495
498 goto out; 496 path = btrfs_alloc_path();
497 if (!path)
498 return 0;
499
500 inode = lookup_free_space_inode(root, block_group, path);
501 if (IS_ERR(inode)) {
502 btrfs_free_path(path);
503 return 0;
504 }
505
506 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
507 path, block_group->key.objectid);
508 btrfs_free_path(path);
509 if (ret <= 0)
510 goto out;
511
512 spin_lock(&ctl->tree_lock);
513 matched = (ctl->free_space == (block_group->key.offset - used -
514 block_group->bytes_super));
515 spin_unlock(&ctl->tree_lock);
516
517 if (!matched) {
518 __btrfs_remove_free_space_cache(ctl);
519 printk(KERN_ERR "block group %llu has an wrong amount of free "
520 "space\n", block_group->key.objectid);
521 ret = -1;
522 }
523out:
524 if (ret < 0) {
525 /* This cache is bogus, make sure it gets cleared */
526 spin_lock(&block_group->lock);
527 block_group->disk_cache_state = BTRFS_DC_CLEAR;
528 spin_unlock(&block_group->lock);
529 ret = 0;
530
531 printk(KERN_ERR "btrfs: failed to load free space cache "
532 "for block group %llu\n", block_group->key.objectid);
533 }
534
535 iput(inode);
536 return ret;
499} 537}
500 538
501int btrfs_write_out_cache(struct btrfs_root *root, 539int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
502 struct btrfs_trans_handle *trans, 540 struct btrfs_free_space_ctl *ctl,
503 struct btrfs_block_group_cache *block_group, 541 struct btrfs_block_group_cache *block_group,
504 struct btrfs_path *path) 542 struct btrfs_trans_handle *trans,
543 struct btrfs_path *path, u64 offset)
505{ 544{
506 struct btrfs_free_space_header *header; 545 struct btrfs_free_space_header *header;
507 struct extent_buffer *leaf; 546 struct extent_buffer *leaf;
508 struct inode *inode;
509 struct rb_node *node; 547 struct rb_node *node;
510 struct list_head *pos, *n; 548 struct list_head *pos, *n;
511 struct page **pages; 549 struct page **pages;
@@ -522,35 +560,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
522 int index = 0, num_pages = 0; 560 int index = 0, num_pages = 0;
523 int entries = 0; 561 int entries = 0;
524 int bitmaps = 0; 562 int bitmaps = 0;
525 int ret = 0; 563 int ret = -1;
526 bool next_page = false; 564 bool next_page = false;
527 bool out_of_space = false; 565 bool out_of_space = false;
528 566
529 root = root->fs_info->tree_root;
530
531 INIT_LIST_HEAD(&bitmap_list); 567 INIT_LIST_HEAD(&bitmap_list);
532 568
533 spin_lock(&block_group->lock); 569 node = rb_first(&ctl->free_space_offset);
534 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 570 if (!node)
535 spin_unlock(&block_group->lock);
536 return 0;
537 }
538 spin_unlock(&block_group->lock);
539
540 inode = lookup_free_space_inode(root, block_group, path);
541 if (IS_ERR(inode))
542 return 0;
543
544 if (!i_size_read(inode)) {
545 iput(inode);
546 return 0; 571 return 0;
547 }
548 572
549 node = rb_first(&block_group->free_space_offset); 573 if (!i_size_read(inode))
550 if (!node) { 574 return -1;
551 iput(inode);
552 return 0;
553 }
554 575
555 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 576 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
556 PAGE_CACHE_SHIFT; 577 PAGE_CACHE_SHIFT;
@@ -560,16 +581,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
560 581
561 /* We need a checksum per page. */ 582 /* We need a checksum per page. */
562 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); 583 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
563 if (!crc) { 584 if (!crc)
564 iput(inode); 585 return -1;
565 return 0;
566 }
567 586
568 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); 587 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
569 if (!pages) { 588 if (!pages) {
570 kfree(crc); 589 kfree(crc);
571 iput(inode); 590 return -1;
572 return 0;
573 } 591 }
574 592
575 /* Since the first page has all of our checksums and our generation we 593 /* Since the first page has all of our checksums and our generation we
@@ -579,7 +597,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
579 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); 597 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
580 598
581 /* Get the cluster for this block_group if it exists */ 599 /* Get the cluster for this block_group if it exists */
582 if (!list_empty(&block_group->cluster_list)) 600 if (block_group && !list_empty(&block_group->cluster_list))
583 cluster = list_entry(block_group->cluster_list.next, 601 cluster = list_entry(block_group->cluster_list.next,
584 struct btrfs_free_cluster, 602 struct btrfs_free_cluster,
585 block_group_list); 603 block_group_list);
@@ -621,7 +639,8 @@ int btrfs_write_out_cache(struct btrfs_root *root,
621 * When searching for pinned extents, we need to start at our start 639 * When searching for pinned extents, we need to start at our start
622 * offset. 640 * offset.
623 */ 641 */
624 start = block_group->key.objectid; 642 if (block_group)
643 start = block_group->key.objectid;
625 644
626 /* Write out the extent entries */ 645 /* Write out the extent entries */
627 do { 646 do {
@@ -679,8 +698,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
679 * We want to add any pinned extents to our free space cache 698 * We want to add any pinned extents to our free space cache
680 * so we don't leak the space 699 * so we don't leak the space
681 */ 700 */
682 while (!next_page && (start < block_group->key.objectid + 701 while (block_group && !next_page &&
683 block_group->key.offset)) { 702 (start < block_group->key.objectid +
703 block_group->key.offset)) {
684 ret = find_first_extent_bit(unpin, start, &start, &end, 704 ret = find_first_extent_bit(unpin, start, &start, &end,
685 EXTENT_DIRTY); 705 EXTENT_DIRTY);
686 if (ret) { 706 if (ret) {
@@ -798,12 +818,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
798 filemap_write_and_wait(inode->i_mapping); 818 filemap_write_and_wait(inode->i_mapping);
799 819
800 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 820 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
801 key.offset = block_group->key.objectid; 821 key.offset = offset;
802 key.type = 0; 822 key.type = 0;
803 823
804 ret = btrfs_search_slot(trans, root, &key, path, 1, 1); 824 ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
805 if (ret < 0) { 825 if (ret < 0) {
806 ret = 0; 826 ret = -1;
807 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 827 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
808 EXTENT_DIRTY | EXTENT_DELALLOC | 828 EXTENT_DIRTY | EXTENT_DELALLOC |
809 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); 829 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
@@ -816,13 +836,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
816 path->slots[0]--; 836 path->slots[0]--;
817 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 837 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
818 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 838 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
819 found_key.offset != block_group->key.objectid) { 839 found_key.offset != offset) {
820 ret = 0; 840 ret = -1;
821 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 841 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
822 EXTENT_DIRTY | EXTENT_DELALLOC | 842 EXTENT_DIRTY | EXTENT_DELALLOC |
823 EXTENT_DO_ACCOUNTING, 0, 0, NULL, 843 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
824 GFP_NOFS); 844 GFP_NOFS);
825 btrfs_release_path(root, path); 845 btrfs_release_path(path);
826 goto out_free; 846 goto out_free;
827 } 847 }
828 } 848 }
@@ -832,49 +852,83 @@ int btrfs_write_out_cache(struct btrfs_root *root,
832 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 852 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
833 btrfs_set_free_space_generation(leaf, header, trans->transid); 853 btrfs_set_free_space_generation(leaf, header, trans->transid);
834 btrfs_mark_buffer_dirty(leaf); 854 btrfs_mark_buffer_dirty(leaf);
835 btrfs_release_path(root, path); 855 btrfs_release_path(path);
836 856
837 ret = 1; 857 ret = 1;
838 858
839out_free: 859out_free:
840 if (ret == 0) { 860 if (ret != 1) {
841 invalidate_inode_pages2_range(inode->i_mapping, 0, index); 861 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
842 spin_lock(&block_group->lock);
843 block_group->disk_cache_state = BTRFS_DC_ERROR;
844 spin_unlock(&block_group->lock);
845 BTRFS_I(inode)->generation = 0; 862 BTRFS_I(inode)->generation = 0;
846 } 863 }
847 kfree(checksums); 864 kfree(checksums);
848 kfree(pages); 865 kfree(pages);
849 btrfs_update_inode(trans, root, inode); 866 btrfs_update_inode(trans, root, inode);
867 return ret;
868}
869
870int btrfs_write_out_cache(struct btrfs_root *root,
871 struct btrfs_trans_handle *trans,
872 struct btrfs_block_group_cache *block_group,
873 struct btrfs_path *path)
874{
875 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
876 struct inode *inode;
877 int ret = 0;
878
879 root = root->fs_info->tree_root;
880
881 spin_lock(&block_group->lock);
882 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
883 spin_unlock(&block_group->lock);
884 return 0;
885 }
886 spin_unlock(&block_group->lock);
887
888 inode = lookup_free_space_inode(root, block_group, path);
889 if (IS_ERR(inode))
890 return 0;
891
892 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
893 path, block_group->key.objectid);
894 if (ret < 0) {
895 spin_lock(&block_group->lock);
896 block_group->disk_cache_state = BTRFS_DC_ERROR;
897 spin_unlock(&block_group->lock);
898 ret = 0;
899
900 printk(KERN_ERR "btrfs: failed to write free space cace "
901 "for block group %llu\n", block_group->key.objectid);
902 }
903
850 iput(inode); 904 iput(inode);
851 return ret; 905 return ret;
852} 906}
853 907
854static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 908static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
855 u64 offset) 909 u64 offset)
856{ 910{
857 BUG_ON(offset < bitmap_start); 911 BUG_ON(offset < bitmap_start);
858 offset -= bitmap_start; 912 offset -= bitmap_start;
859 return (unsigned long)(div64_u64(offset, sectorsize)); 913 return (unsigned long)(div_u64(offset, unit));
860} 914}
861 915
862static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize) 916static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
863{ 917{
864 return (unsigned long)(div64_u64(bytes, sectorsize)); 918 return (unsigned long)(div_u64(bytes, unit));
865} 919}
866 920
867static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group, 921static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
868 u64 offset) 922 u64 offset)
869{ 923{
870 u64 bitmap_start; 924 u64 bitmap_start;
871 u64 bytes_per_bitmap; 925 u64 bytes_per_bitmap;
872 926
873 bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize; 927 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
874 bitmap_start = offset - block_group->key.objectid; 928 bitmap_start = offset - ctl->start;
875 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 929 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
876 bitmap_start *= bytes_per_bitmap; 930 bitmap_start *= bytes_per_bitmap;
877 bitmap_start += block_group->key.objectid; 931 bitmap_start += ctl->start;
878 932
879 return bitmap_start; 933 return bitmap_start;
880} 934}
@@ -932,10 +986,10 @@ static int tree_insert_offset(struct rb_root *root, u64 offset,
932 * offset. 986 * offset.
933 */ 987 */
934static struct btrfs_free_space * 988static struct btrfs_free_space *
935tree_search_offset(struct btrfs_block_group_cache *block_group, 989tree_search_offset(struct btrfs_free_space_ctl *ctl,
936 u64 offset, int bitmap_only, int fuzzy) 990 u64 offset, int bitmap_only, int fuzzy)
937{ 991{
938 struct rb_node *n = block_group->free_space_offset.rb_node; 992 struct rb_node *n = ctl->free_space_offset.rb_node;
939 struct btrfs_free_space *entry, *prev = NULL; 993 struct btrfs_free_space *entry, *prev = NULL;
940 994
941 /* find entry that is closest to the 'offset' */ 995 /* find entry that is closest to the 'offset' */
@@ -1031,8 +1085,7 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
1031 break; 1085 break;
1032 } 1086 }
1033 } 1087 }
1034 if (entry->offset + BITS_PER_BITMAP * 1088 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1035 block_group->sectorsize > offset)
1036 return entry; 1089 return entry;
1037 } else if (entry->offset + entry->bytes > offset) 1090 } else if (entry->offset + entry->bytes > offset)
1038 return entry; 1091 return entry;
@@ -1043,7 +1096,7 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
1043 while (1) { 1096 while (1) {
1044 if (entry->bitmap) { 1097 if (entry->bitmap) {
1045 if (entry->offset + BITS_PER_BITMAP * 1098 if (entry->offset + BITS_PER_BITMAP *
1046 block_group->sectorsize > offset) 1099 ctl->unit > offset)
1047 break; 1100 break;
1048 } else { 1101 } else {
1049 if (entry->offset + entry->bytes > offset) 1102 if (entry->offset + entry->bytes > offset)
@@ -1059,42 +1112,47 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
1059} 1112}
1060 1113
1061static inline void 1114static inline void
1062__unlink_free_space(struct btrfs_block_group_cache *block_group, 1115__unlink_free_space(struct btrfs_free_space_ctl *ctl,
1063 struct btrfs_free_space *info) 1116 struct btrfs_free_space *info)
1064{ 1117{
1065 rb_erase(&info->offset_index, &block_group->free_space_offset); 1118 rb_erase(&info->offset_index, &ctl->free_space_offset);
1066 block_group->free_extents--; 1119 ctl->free_extents--;
1067} 1120}
1068 1121
1069static void unlink_free_space(struct btrfs_block_group_cache *block_group, 1122static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1070 struct btrfs_free_space *info) 1123 struct btrfs_free_space *info)
1071{ 1124{
1072 __unlink_free_space(block_group, info); 1125 __unlink_free_space(ctl, info);
1073 block_group->free_space -= info->bytes; 1126 ctl->free_space -= info->bytes;
1074} 1127}
1075 1128
1076static int link_free_space(struct btrfs_block_group_cache *block_group, 1129static int link_free_space(struct btrfs_free_space_ctl *ctl,
1077 struct btrfs_free_space *info) 1130 struct btrfs_free_space *info)
1078{ 1131{
1079 int ret = 0; 1132 int ret = 0;
1080 1133
1081 BUG_ON(!info->bitmap && !info->bytes); 1134 BUG_ON(!info->bitmap && !info->bytes);
1082 ret = tree_insert_offset(&block_group->free_space_offset, info->offset, 1135 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1083 &info->offset_index, (info->bitmap != NULL)); 1136 &info->offset_index, (info->bitmap != NULL));
1084 if (ret) 1137 if (ret)
1085 return ret; 1138 return ret;
1086 1139
1087 block_group->free_space += info->bytes; 1140 ctl->free_space += info->bytes;
1088 block_group->free_extents++; 1141 ctl->free_extents++;
1089 return ret; 1142 return ret;
1090} 1143}
1091 1144
1092static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) 1145static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1093{ 1146{
1147 struct btrfs_block_group_cache *block_group = ctl->private;
1094 u64 max_bytes; 1148 u64 max_bytes;
1095 u64 bitmap_bytes; 1149 u64 bitmap_bytes;
1096 u64 extent_bytes; 1150 u64 extent_bytes;
1097 u64 size = block_group->key.offset; 1151 u64 size = block_group->key.offset;
1152 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1153 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1154
1155 BUG_ON(ctl->total_bitmaps > max_bitmaps);
1098 1156
1099 /* 1157 /*
1100 * The goal is to keep the total amount of memory used per 1gb of space 1158 * The goal is to keep the total amount of memory used per 1gb of space
@@ -1112,10 +1170,10 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
1112 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1170 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1113 * we add more bitmaps. 1171 * we add more bitmaps.
1114 */ 1172 */
1115 bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; 1173 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1116 1174
1117 if (bitmap_bytes >= max_bytes) { 1175 if (bitmap_bytes >= max_bytes) {
1118 block_group->extents_thresh = 0; 1176 ctl->extents_thresh = 0;
1119 return; 1177 return;
1120 } 1178 }
1121 1179
@@ -1126,47 +1184,43 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
1126 extent_bytes = max_bytes - bitmap_bytes; 1184 extent_bytes = max_bytes - bitmap_bytes;
1127 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 1185 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1128 1186
1129 block_group->extents_thresh = 1187 ctl->extents_thresh =
1130 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1188 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1131} 1189}
1132 1190
1133static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, 1191static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1134 struct btrfs_free_space *info, u64 offset, 1192 struct btrfs_free_space *info, u64 offset,
1135 u64 bytes) 1193 u64 bytes)
1136{ 1194{
1137 unsigned long start, end; 1195 unsigned long start, count;
1138 unsigned long i;
1139 1196
1140 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 1197 start = offset_to_bit(info->offset, ctl->unit, offset);
1141 end = start + bytes_to_bits(bytes, block_group->sectorsize); 1198 count = bytes_to_bits(bytes, ctl->unit);
1142 BUG_ON(end > BITS_PER_BITMAP); 1199 BUG_ON(start + count > BITS_PER_BITMAP);
1143 1200
1144 for (i = start; i < end; i++) 1201 bitmap_clear(info->bitmap, start, count);
1145 clear_bit(i, info->bitmap);
1146 1202
1147 info->bytes -= bytes; 1203 info->bytes -= bytes;
1148 block_group->free_space -= bytes; 1204 ctl->free_space -= bytes;
1149} 1205}
1150 1206
1151static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, 1207static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1152 struct btrfs_free_space *info, u64 offset, 1208 struct btrfs_free_space *info, u64 offset,
1153 u64 bytes) 1209 u64 bytes)
1154{ 1210{
1155 unsigned long start, end; 1211 unsigned long start, count;
1156 unsigned long i;
1157 1212
1158 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 1213 start = offset_to_bit(info->offset, ctl->unit, offset);
1159 end = start + bytes_to_bits(bytes, block_group->sectorsize); 1214 count = bytes_to_bits(bytes, ctl->unit);
1160 BUG_ON(end > BITS_PER_BITMAP); 1215 BUG_ON(start + count > BITS_PER_BITMAP);
1161 1216
1162 for (i = start; i < end; i++) 1217 bitmap_set(info->bitmap, start, count);
1163 set_bit(i, info->bitmap);
1164 1218
1165 info->bytes += bytes; 1219 info->bytes += bytes;
1166 block_group->free_space += bytes; 1220 ctl->free_space += bytes;
1167} 1221}
1168 1222
1169static int search_bitmap(struct btrfs_block_group_cache *block_group, 1223static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1170 struct btrfs_free_space *bitmap_info, u64 *offset, 1224 struct btrfs_free_space *bitmap_info, u64 *offset,
1171 u64 *bytes) 1225 u64 *bytes)
1172{ 1226{
@@ -1174,9 +1228,9 @@ static int search_bitmap(struct btrfs_block_group_cache *block_group,
1174 unsigned long bits, i; 1228 unsigned long bits, i;
1175 unsigned long next_zero; 1229 unsigned long next_zero;
1176 1230
1177 i = offset_to_bit(bitmap_info->offset, block_group->sectorsize, 1231 i = offset_to_bit(bitmap_info->offset, ctl->unit,
1178 max_t(u64, *offset, bitmap_info->offset)); 1232 max_t(u64, *offset, bitmap_info->offset));
1179 bits = bytes_to_bits(*bytes, block_group->sectorsize); 1233 bits = bytes_to_bits(*bytes, ctl->unit);
1180 1234
1181 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 1235 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1182 i < BITS_PER_BITMAP; 1236 i < BITS_PER_BITMAP;
@@ -1191,29 +1245,25 @@ static int search_bitmap(struct btrfs_block_group_cache *block_group,
1191 } 1245 }
1192 1246
1193 if (found_bits) { 1247 if (found_bits) {
1194 *offset = (u64)(i * block_group->sectorsize) + 1248 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1195 bitmap_info->offset; 1249 *bytes = (u64)(found_bits) * ctl->unit;
1196 *bytes = (u64)(found_bits) * block_group->sectorsize;
1197 return 0; 1250 return 0;
1198 } 1251 }
1199 1252
1200 return -1; 1253 return -1;
1201} 1254}
1202 1255
1203static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache 1256static struct btrfs_free_space *
1204 *block_group, u64 *offset, 1257find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1205 u64 *bytes, int debug)
1206{ 1258{
1207 struct btrfs_free_space *entry; 1259 struct btrfs_free_space *entry;
1208 struct rb_node *node; 1260 struct rb_node *node;
1209 int ret; 1261 int ret;
1210 1262
1211 if (!block_group->free_space_offset.rb_node) 1263 if (!ctl->free_space_offset.rb_node)
1212 return NULL; 1264 return NULL;
1213 1265
1214 entry = tree_search_offset(block_group, 1266 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1215 offset_to_bitmap(block_group, *offset),
1216 0, 1);
1217 if (!entry) 1267 if (!entry)
1218 return NULL; 1268 return NULL;
1219 1269
@@ -1223,7 +1273,7 @@ static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
1223 continue; 1273 continue;
1224 1274
1225 if (entry->bitmap) { 1275 if (entry->bitmap) {
1226 ret = search_bitmap(block_group, entry, offset, bytes); 1276 ret = search_bitmap(ctl, entry, offset, bytes);
1227 if (!ret) 1277 if (!ret)
1228 return entry; 1278 return entry;
1229 continue; 1279 continue;
@@ -1237,33 +1287,28 @@ static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
1237 return NULL; 1287 return NULL;
1238} 1288}
1239 1289
1240static void add_new_bitmap(struct btrfs_block_group_cache *block_group, 1290static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1241 struct btrfs_free_space *info, u64 offset) 1291 struct btrfs_free_space *info, u64 offset)
1242{ 1292{
1243 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 1293 info->offset = offset_to_bitmap(ctl, offset);
1244 int max_bitmaps = (int)div64_u64(block_group->key.offset +
1245 bytes_per_bg - 1, bytes_per_bg);
1246 BUG_ON(block_group->total_bitmaps >= max_bitmaps);
1247
1248 info->offset = offset_to_bitmap(block_group, offset);
1249 info->bytes = 0; 1294 info->bytes = 0;
1250 link_free_space(block_group, info); 1295 link_free_space(ctl, info);
1251 block_group->total_bitmaps++; 1296 ctl->total_bitmaps++;
1252 1297
1253 recalculate_thresholds(block_group); 1298 ctl->op->recalc_thresholds(ctl);
1254} 1299}
1255 1300
1256static void free_bitmap(struct btrfs_block_group_cache *block_group, 1301static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1257 struct btrfs_free_space *bitmap_info) 1302 struct btrfs_free_space *bitmap_info)
1258{ 1303{
1259 unlink_free_space(block_group, bitmap_info); 1304 unlink_free_space(ctl, bitmap_info);
1260 kfree(bitmap_info->bitmap); 1305 kfree(bitmap_info->bitmap);
1261 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1306 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1262 block_group->total_bitmaps--; 1307 ctl->total_bitmaps--;
1263 recalculate_thresholds(block_group); 1308 ctl->op->recalc_thresholds(ctl);
1264} 1309}
1265 1310
1266static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, 1311static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1267 struct btrfs_free_space *bitmap_info, 1312 struct btrfs_free_space *bitmap_info,
1268 u64 *offset, u64 *bytes) 1313 u64 *offset, u64 *bytes)
1269{ 1314{
@@ -1272,8 +1317,7 @@ static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_gro
1272 int ret; 1317 int ret;
1273 1318
1274again: 1319again:
1275 end = bitmap_info->offset + 1320 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1276 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
1277 1321
1278 /* 1322 /*
1279 * XXX - this can go away after a few releases. 1323 * XXX - this can go away after a few releases.
@@ -1288,24 +1332,22 @@ again:
1288 search_start = *offset; 1332 search_start = *offset;
1289 search_bytes = *bytes; 1333 search_bytes = *bytes;
1290 search_bytes = min(search_bytes, end - search_start + 1); 1334 search_bytes = min(search_bytes, end - search_start + 1);
1291 ret = search_bitmap(block_group, bitmap_info, &search_start, 1335 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1292 &search_bytes);
1293 BUG_ON(ret < 0 || search_start != *offset); 1336 BUG_ON(ret < 0 || search_start != *offset);
1294 1337
1295 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1338 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1296 bitmap_clear_bits(block_group, bitmap_info, *offset, 1339 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1297 end - *offset + 1);
1298 *bytes -= end - *offset + 1; 1340 *bytes -= end - *offset + 1;
1299 *offset = end + 1; 1341 *offset = end + 1;
1300 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1342 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1301 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); 1343 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1302 *bytes = 0; 1344 *bytes = 0;
1303 } 1345 }
1304 1346
1305 if (*bytes) { 1347 if (*bytes) {
1306 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1348 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1307 if (!bitmap_info->bytes) 1349 if (!bitmap_info->bytes)
1308 free_bitmap(block_group, bitmap_info); 1350 free_bitmap(ctl, bitmap_info);
1309 1351
1310 /* 1352 /*
1311 * no entry after this bitmap, but we still have bytes to 1353 * no entry after this bitmap, but we still have bytes to
@@ -1332,31 +1374,28 @@ again:
1332 */ 1374 */
1333 search_start = *offset; 1375 search_start = *offset;
1334 search_bytes = *bytes; 1376 search_bytes = *bytes;
1335 ret = search_bitmap(block_group, bitmap_info, &search_start, 1377 ret = search_bitmap(ctl, bitmap_info, &search_start,
1336 &search_bytes); 1378 &search_bytes);
1337 if (ret < 0 || search_start != *offset) 1379 if (ret < 0 || search_start != *offset)
1338 return -EAGAIN; 1380 return -EAGAIN;
1339 1381
1340 goto again; 1382 goto again;
1341 } else if (!bitmap_info->bytes) 1383 } else if (!bitmap_info->bytes)
1342 free_bitmap(block_group, bitmap_info); 1384 free_bitmap(ctl, bitmap_info);
1343 1385
1344 return 0; 1386 return 0;
1345} 1387}
1346 1388
1347static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, 1389static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1348 struct btrfs_free_space *info) 1390 struct btrfs_free_space *info)
1349{ 1391{
1350 struct btrfs_free_space *bitmap_info; 1392 struct btrfs_block_group_cache *block_group = ctl->private;
1351 int added = 0;
1352 u64 bytes, offset, end;
1353 int ret;
1354 1393
1355 /* 1394 /*
1356 * If we are below the extents threshold then we can add this as an 1395 * If we are below the extents threshold then we can add this as an
1357 * extent, and don't have to deal with the bitmap 1396 * extent, and don't have to deal with the bitmap
1358 */ 1397 */
1359 if (block_group->free_extents < block_group->extents_thresh) { 1398 if (ctl->free_extents < ctl->extents_thresh) {
1360 /* 1399 /*
1361 * If this block group has some small extents we don't want to 1400 * If this block group has some small extents we don't want to
1362 * use up all of our free slots in the cache with them, we want 1401 * use up all of our free slots in the cache with them, we want
@@ -1365,11 +1404,10 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
1365 * the overhead of a bitmap if we don't have to. 1404 * the overhead of a bitmap if we don't have to.
1366 */ 1405 */
1367 if (info->bytes <= block_group->sectorsize * 4) { 1406 if (info->bytes <= block_group->sectorsize * 4) {
1368 if (block_group->free_extents * 2 <= 1407 if (ctl->free_extents * 2 <= ctl->extents_thresh)
1369 block_group->extents_thresh) 1408 return false;
1370 return 0;
1371 } else { 1409 } else {
1372 return 0; 1410 return false;
1373 } 1411 }
1374 } 1412 }
1375 1413
@@ -1379,31 +1417,42 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
1379 */ 1417 */
1380 if (BITS_PER_BITMAP * block_group->sectorsize > 1418 if (BITS_PER_BITMAP * block_group->sectorsize >
1381 block_group->key.offset) 1419 block_group->key.offset)
1382 return 0; 1420 return false;
1421
1422 return true;
1423}
1424
1425static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1426 struct btrfs_free_space *info)
1427{
1428 struct btrfs_free_space *bitmap_info;
1429 int added = 0;
1430 u64 bytes, offset, end;
1431 int ret;
1383 1432
1384 bytes = info->bytes; 1433 bytes = info->bytes;
1385 offset = info->offset; 1434 offset = info->offset;
1386 1435
1436 if (!ctl->op->use_bitmap(ctl, info))
1437 return 0;
1438
1387again: 1439again:
1388 bitmap_info = tree_search_offset(block_group, 1440 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1389 offset_to_bitmap(block_group, offset),
1390 1, 0); 1441 1, 0);
1391 if (!bitmap_info) { 1442 if (!bitmap_info) {
1392 BUG_ON(added); 1443 BUG_ON(added);
1393 goto new_bitmap; 1444 goto new_bitmap;
1394 } 1445 }
1395 1446
1396 end = bitmap_info->offset + 1447 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1397 (u64)(BITS_PER_BITMAP * block_group->sectorsize);
1398 1448
1399 if (offset >= bitmap_info->offset && offset + bytes > end) { 1449 if (offset >= bitmap_info->offset && offset + bytes > end) {
1400 bitmap_set_bits(block_group, bitmap_info, offset, 1450 bitmap_set_bits(ctl, bitmap_info, offset, end - offset);
1401 end - offset);
1402 bytes -= end - offset; 1451 bytes -= end - offset;
1403 offset = end; 1452 offset = end;
1404 added = 0; 1453 added = 0;
1405 } else if (offset >= bitmap_info->offset && offset + bytes <= end) { 1454 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
1406 bitmap_set_bits(block_group, bitmap_info, offset, bytes); 1455 bitmap_set_bits(ctl, bitmap_info, offset, bytes);
1407 bytes = 0; 1456 bytes = 0;
1408 } else { 1457 } else {
1409 BUG(); 1458 BUG();
@@ -1417,19 +1466,19 @@ again:
1417 1466
1418new_bitmap: 1467new_bitmap:
1419 if (info && info->bitmap) { 1468 if (info && info->bitmap) {
1420 add_new_bitmap(block_group, info, offset); 1469 add_new_bitmap(ctl, info, offset);
1421 added = 1; 1470 added = 1;
1422 info = NULL; 1471 info = NULL;
1423 goto again; 1472 goto again;
1424 } else { 1473 } else {
1425 spin_unlock(&block_group->tree_lock); 1474 spin_unlock(&ctl->tree_lock);
1426 1475
1427 /* no pre-allocated info, allocate a new one */ 1476 /* no pre-allocated info, allocate a new one */
1428 if (!info) { 1477 if (!info) {
1429 info = kmem_cache_zalloc(btrfs_free_space_cachep, 1478 info = kmem_cache_zalloc(btrfs_free_space_cachep,
1430 GFP_NOFS); 1479 GFP_NOFS);
1431 if (!info) { 1480 if (!info) {
1432 spin_lock(&block_group->tree_lock); 1481 spin_lock(&ctl->tree_lock);
1433 ret = -ENOMEM; 1482 ret = -ENOMEM;
1434 goto out; 1483 goto out;
1435 } 1484 }
@@ -1437,7 +1486,7 @@ new_bitmap:
1437 1486
1438 /* allocate the bitmap */ 1487 /* allocate the bitmap */
1439 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 1488 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1440 spin_lock(&block_group->tree_lock); 1489 spin_lock(&ctl->tree_lock);
1441 if (!info->bitmap) { 1490 if (!info->bitmap) {
1442 ret = -ENOMEM; 1491 ret = -ENOMEM;
1443 goto out; 1492 goto out;
@@ -1455,7 +1504,7 @@ out:
1455 return ret; 1504 return ret;
1456} 1505}
1457 1506
1458bool try_merge_free_space(struct btrfs_block_group_cache *block_group, 1507static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1459 struct btrfs_free_space *info, bool update_stat) 1508 struct btrfs_free_space *info, bool update_stat)
1460{ 1509{
1461 struct btrfs_free_space *left_info; 1510 struct btrfs_free_space *left_info;
@@ -1469,18 +1518,18 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1469 * are adding, if there is remove that struct and add a new one to 1518 * are adding, if there is remove that struct and add a new one to
1470 * cover the entire range 1519 * cover the entire range
1471 */ 1520 */
1472 right_info = tree_search_offset(block_group, offset + bytes, 0, 0); 1521 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1473 if (right_info && rb_prev(&right_info->offset_index)) 1522 if (right_info && rb_prev(&right_info->offset_index))
1474 left_info = rb_entry(rb_prev(&right_info->offset_index), 1523 left_info = rb_entry(rb_prev(&right_info->offset_index),
1475 struct btrfs_free_space, offset_index); 1524 struct btrfs_free_space, offset_index);
1476 else 1525 else
1477 left_info = tree_search_offset(block_group, offset - 1, 0, 0); 1526 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1478 1527
1479 if (right_info && !right_info->bitmap) { 1528 if (right_info && !right_info->bitmap) {
1480 if (update_stat) 1529 if (update_stat)
1481 unlink_free_space(block_group, right_info); 1530 unlink_free_space(ctl, right_info);
1482 else 1531 else
1483 __unlink_free_space(block_group, right_info); 1532 __unlink_free_space(ctl, right_info);
1484 info->bytes += right_info->bytes; 1533 info->bytes += right_info->bytes;
1485 kmem_cache_free(btrfs_free_space_cachep, right_info); 1534 kmem_cache_free(btrfs_free_space_cachep, right_info);
1486 merged = true; 1535 merged = true;
@@ -1489,9 +1538,9 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1489 if (left_info && !left_info->bitmap && 1538 if (left_info && !left_info->bitmap &&
1490 left_info->offset + left_info->bytes == offset) { 1539 left_info->offset + left_info->bytes == offset) {
1491 if (update_stat) 1540 if (update_stat)
1492 unlink_free_space(block_group, left_info); 1541 unlink_free_space(ctl, left_info);
1493 else 1542 else
1494 __unlink_free_space(block_group, left_info); 1543 __unlink_free_space(ctl, left_info);
1495 info->offset = left_info->offset; 1544 info->offset = left_info->offset;
1496 info->bytes += left_info->bytes; 1545 info->bytes += left_info->bytes;
1497 kmem_cache_free(btrfs_free_space_cachep, left_info); 1546 kmem_cache_free(btrfs_free_space_cachep, left_info);
@@ -1501,8 +1550,8 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1501 return merged; 1550 return merged;
1502} 1551}
1503 1552
1504int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 1553int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1505 u64 offset, u64 bytes) 1554 u64 offset, u64 bytes)
1506{ 1555{
1507 struct btrfs_free_space *info; 1556 struct btrfs_free_space *info;
1508 int ret = 0; 1557 int ret = 0;
@@ -1514,9 +1563,9 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1514 info->offset = offset; 1563 info->offset = offset;
1515 info->bytes = bytes; 1564 info->bytes = bytes;
1516 1565
1517 spin_lock(&block_group->tree_lock); 1566 spin_lock(&ctl->tree_lock);
1518 1567
1519 if (try_merge_free_space(block_group, info, true)) 1568 if (try_merge_free_space(ctl, info, true))
1520 goto link; 1569 goto link;
1521 1570
1522 /* 1571 /*
@@ -1524,7 +1573,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1524 * extent then we know we're going to have to allocate a new extent, so 1573 * extent then we know we're going to have to allocate a new extent, so
1525 * before we do that see if we need to drop this into a bitmap 1574 * before we do that see if we need to drop this into a bitmap
1526 */ 1575 */
1527 ret = insert_into_bitmap(block_group, info); 1576 ret = insert_into_bitmap(ctl, info);
1528 if (ret < 0) { 1577 if (ret < 0) {
1529 goto out; 1578 goto out;
1530 } else if (ret) { 1579 } else if (ret) {
@@ -1532,11 +1581,11 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1532 goto out; 1581 goto out;
1533 } 1582 }
1534link: 1583link:
1535 ret = link_free_space(block_group, info); 1584 ret = link_free_space(ctl, info);
1536 if (ret) 1585 if (ret)
1537 kmem_cache_free(btrfs_free_space_cachep, info); 1586 kmem_cache_free(btrfs_free_space_cachep, info);
1538out: 1587out:
1539 spin_unlock(&block_group->tree_lock); 1588 spin_unlock(&ctl->tree_lock);
1540 1589
1541 if (ret) { 1590 if (ret) {
1542 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1591 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
@@ -1549,21 +1598,21 @@ out:
1549int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 1598int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1550 u64 offset, u64 bytes) 1599 u64 offset, u64 bytes)
1551{ 1600{
1601 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1552 struct btrfs_free_space *info; 1602 struct btrfs_free_space *info;
1553 struct btrfs_free_space *next_info = NULL; 1603 struct btrfs_free_space *next_info = NULL;
1554 int ret = 0; 1604 int ret = 0;
1555 1605
1556 spin_lock(&block_group->tree_lock); 1606 spin_lock(&ctl->tree_lock);
1557 1607
1558again: 1608again:
1559 info = tree_search_offset(block_group, offset, 0, 0); 1609 info = tree_search_offset(ctl, offset, 0, 0);
1560 if (!info) { 1610 if (!info) {
1561 /* 1611 /*
1562 * oops didn't find an extent that matched the space we wanted 1612 * oops didn't find an extent that matched the space we wanted
1563 * to remove, look for a bitmap instead 1613 * to remove, look for a bitmap instead
1564 */ 1614 */
1565 info = tree_search_offset(block_group, 1615 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1566 offset_to_bitmap(block_group, offset),
1567 1, 0); 1616 1, 0);
1568 if (!info) { 1617 if (!info) {
1569 WARN_ON(1); 1618 WARN_ON(1);
@@ -1578,8 +1627,8 @@ again:
1578 offset_index); 1627 offset_index);
1579 1628
1580 if (next_info->bitmap) 1629 if (next_info->bitmap)
1581 end = next_info->offset + BITS_PER_BITMAP * 1630 end = next_info->offset +
1582 block_group->sectorsize - 1; 1631 BITS_PER_BITMAP * ctl->unit - 1;
1583 else 1632 else
1584 end = next_info->offset + next_info->bytes; 1633 end = next_info->offset + next_info->bytes;
1585 1634
@@ -1599,20 +1648,20 @@ again:
1599 } 1648 }
1600 1649
1601 if (info->bytes == bytes) { 1650 if (info->bytes == bytes) {
1602 unlink_free_space(block_group, info); 1651 unlink_free_space(ctl, info);
1603 if (info->bitmap) { 1652 if (info->bitmap) {
1604 kfree(info->bitmap); 1653 kfree(info->bitmap);
1605 block_group->total_bitmaps--; 1654 ctl->total_bitmaps--;
1606 } 1655 }
1607 kmem_cache_free(btrfs_free_space_cachep, info); 1656 kmem_cache_free(btrfs_free_space_cachep, info);
1608 goto out_lock; 1657 goto out_lock;
1609 } 1658 }
1610 1659
1611 if (!info->bitmap && info->offset == offset) { 1660 if (!info->bitmap && info->offset == offset) {
1612 unlink_free_space(block_group, info); 1661 unlink_free_space(ctl, info);
1613 info->offset += bytes; 1662 info->offset += bytes;
1614 info->bytes -= bytes; 1663 info->bytes -= bytes;
1615 link_free_space(block_group, info); 1664 link_free_space(ctl, info);
1616 goto out_lock; 1665 goto out_lock;
1617 } 1666 }
1618 1667
@@ -1626,13 +1675,13 @@ again:
1626 * first unlink the old info and then 1675 * first unlink the old info and then
1627 * insert it again after the hole we're creating 1676 * insert it again after the hole we're creating
1628 */ 1677 */
1629 unlink_free_space(block_group, info); 1678 unlink_free_space(ctl, info);
1630 if (offset + bytes < info->offset + info->bytes) { 1679 if (offset + bytes < info->offset + info->bytes) {
1631 u64 old_end = info->offset + info->bytes; 1680 u64 old_end = info->offset + info->bytes;
1632 1681
1633 info->offset = offset + bytes; 1682 info->offset = offset + bytes;
1634 info->bytes = old_end - info->offset; 1683 info->bytes = old_end - info->offset;
1635 ret = link_free_space(block_group, info); 1684 ret = link_free_space(ctl, info);
1636 WARN_ON(ret); 1685 WARN_ON(ret);
1637 if (ret) 1686 if (ret)
1638 goto out_lock; 1687 goto out_lock;
@@ -1642,7 +1691,7 @@ again:
1642 */ 1691 */
1643 kmem_cache_free(btrfs_free_space_cachep, info); 1692 kmem_cache_free(btrfs_free_space_cachep, info);
1644 } 1693 }
1645 spin_unlock(&block_group->tree_lock); 1694 spin_unlock(&ctl->tree_lock);
1646 1695
1647 /* step two, insert a new info struct to cover 1696 /* step two, insert a new info struct to cover
1648 * anything before the hole 1697 * anything before the hole
@@ -1653,12 +1702,12 @@ again:
1653 goto out; 1702 goto out;
1654 } 1703 }
1655 1704
1656 ret = remove_from_bitmap(block_group, info, &offset, &bytes); 1705 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1657 if (ret == -EAGAIN) 1706 if (ret == -EAGAIN)
1658 goto again; 1707 goto again;
1659 BUG_ON(ret); 1708 BUG_ON(ret);
1660out_lock: 1709out_lock:
1661 spin_unlock(&block_group->tree_lock); 1710 spin_unlock(&ctl->tree_lock);
1662out: 1711out:
1663 return ret; 1712 return ret;
1664} 1713}
@@ -1666,11 +1715,12 @@ out:
1666void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 1715void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1667 u64 bytes) 1716 u64 bytes)
1668{ 1717{
1718 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1669 struct btrfs_free_space *info; 1719 struct btrfs_free_space *info;
1670 struct rb_node *n; 1720 struct rb_node *n;
1671 int count = 0; 1721 int count = 0;
1672 1722
1673 for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) { 1723 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1674 info = rb_entry(n, struct btrfs_free_space, offset_index); 1724 info = rb_entry(n, struct btrfs_free_space, offset_index);
1675 if (info->bytes >= bytes) 1725 if (info->bytes >= bytes)
1676 count++; 1726 count++;
@@ -1685,19 +1735,28 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1685 "\n", count); 1735 "\n", count);
1686} 1736}
1687 1737
1688u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) 1738static struct btrfs_free_space_op free_space_op = {
1739 .recalc_thresholds = recalculate_thresholds,
1740 .use_bitmap = use_bitmap,
1741};
1742
1743void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1689{ 1744{
1690 struct btrfs_free_space *info; 1745 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1691 struct rb_node *n;
1692 u64 ret = 0;
1693 1746
1694 for (n = rb_first(&block_group->free_space_offset); n; 1747 spin_lock_init(&ctl->tree_lock);
1695 n = rb_next(n)) { 1748 ctl->unit = block_group->sectorsize;
1696 info = rb_entry(n, struct btrfs_free_space, offset_index); 1749 ctl->start = block_group->key.objectid;
1697 ret += info->bytes; 1750 ctl->private = block_group;
1698 } 1751 ctl->op = &free_space_op;
1699 1752
1700 return ret; 1753 /*
1754 * we only want to have 32k of ram per block group for keeping
1755 * track of free space, and if we pass 1/2 of that we want to
1756 * start converting things over to using bitmaps
1757 */
1758 ctl->extents_thresh = ((1024 * 32) / 2) /
1759 sizeof(struct btrfs_free_space);
1701} 1760}
1702 1761
1703/* 1762/*
@@ -1711,6 +1770,7 @@ __btrfs_return_cluster_to_free_space(
1711 struct btrfs_block_group_cache *block_group, 1770 struct btrfs_block_group_cache *block_group,
1712 struct btrfs_free_cluster *cluster) 1771 struct btrfs_free_cluster *cluster)
1713{ 1772{
1773 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1714 struct btrfs_free_space *entry; 1774 struct btrfs_free_space *entry;
1715 struct rb_node *node; 1775 struct rb_node *node;
1716 1776
@@ -1732,8 +1792,8 @@ __btrfs_return_cluster_to_free_space(
1732 1792
1733 bitmap = (entry->bitmap != NULL); 1793 bitmap = (entry->bitmap != NULL);
1734 if (!bitmap) 1794 if (!bitmap)
1735 try_merge_free_space(block_group, entry, false); 1795 try_merge_free_space(ctl, entry, false);
1736 tree_insert_offset(&block_group->free_space_offset, 1796 tree_insert_offset(&ctl->free_space_offset,
1737 entry->offset, &entry->offset_index, bitmap); 1797 entry->offset, &entry->offset_index, bitmap);
1738 } 1798 }
1739 cluster->root = RB_ROOT; 1799 cluster->root = RB_ROOT;
@@ -1744,14 +1804,38 @@ out:
1744 return 0; 1804 return 0;
1745} 1805}
1746 1806
1747void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 1807void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
1748{ 1808{
1749 struct btrfs_free_space *info; 1809 struct btrfs_free_space *info;
1750 struct rb_node *node; 1810 struct rb_node *node;
1811
1812 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
1813 info = rb_entry(node, struct btrfs_free_space, offset_index);
1814 unlink_free_space(ctl, info);
1815 kfree(info->bitmap);
1816 kmem_cache_free(btrfs_free_space_cachep, info);
1817 if (need_resched()) {
1818 spin_unlock(&ctl->tree_lock);
1819 cond_resched();
1820 spin_lock(&ctl->tree_lock);
1821 }
1822 }
1823}
1824
1825void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
1826{
1827 spin_lock(&ctl->tree_lock);
1828 __btrfs_remove_free_space_cache_locked(ctl);
1829 spin_unlock(&ctl->tree_lock);
1830}
1831
1832void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1833{
1834 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1751 struct btrfs_free_cluster *cluster; 1835 struct btrfs_free_cluster *cluster;
1752 struct list_head *head; 1836 struct list_head *head;
1753 1837
1754 spin_lock(&block_group->tree_lock); 1838 spin_lock(&ctl->tree_lock);
1755 while ((head = block_group->cluster_list.next) != 1839 while ((head = block_group->cluster_list.next) !=
1756 &block_group->cluster_list) { 1840 &block_group->cluster_list) {
1757 cluster = list_entry(head, struct btrfs_free_cluster, 1841 cluster = list_entry(head, struct btrfs_free_cluster,
@@ -1760,60 +1844,46 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1760 WARN_ON(cluster->block_group != block_group); 1844 WARN_ON(cluster->block_group != block_group);
1761 __btrfs_return_cluster_to_free_space(block_group, cluster); 1845 __btrfs_return_cluster_to_free_space(block_group, cluster);
1762 if (need_resched()) { 1846 if (need_resched()) {
1763 spin_unlock(&block_group->tree_lock); 1847 spin_unlock(&ctl->tree_lock);
1764 cond_resched(); 1848 cond_resched();
1765 spin_lock(&block_group->tree_lock); 1849 spin_lock(&ctl->tree_lock);
1766 } 1850 }
1767 } 1851 }
1852 __btrfs_remove_free_space_cache_locked(ctl);
1853 spin_unlock(&ctl->tree_lock);
1768 1854
1769 while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
1770 info = rb_entry(node, struct btrfs_free_space, offset_index);
1771 if (!info->bitmap) {
1772 unlink_free_space(block_group, info);
1773 kmem_cache_free(btrfs_free_space_cachep, info);
1774 } else {
1775 free_bitmap(block_group, info);
1776 }
1777
1778 if (need_resched()) {
1779 spin_unlock(&block_group->tree_lock);
1780 cond_resched();
1781 spin_lock(&block_group->tree_lock);
1782 }
1783 }
1784
1785 spin_unlock(&block_group->tree_lock);
1786} 1855}
1787 1856
1788u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 1857u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1789 u64 offset, u64 bytes, u64 empty_size) 1858 u64 offset, u64 bytes, u64 empty_size)
1790{ 1859{
1860 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1791 struct btrfs_free_space *entry = NULL; 1861 struct btrfs_free_space *entry = NULL;
1792 u64 bytes_search = bytes + empty_size; 1862 u64 bytes_search = bytes + empty_size;
1793 u64 ret = 0; 1863 u64 ret = 0;
1794 1864
1795 spin_lock(&block_group->tree_lock); 1865 spin_lock(&ctl->tree_lock);
1796 entry = find_free_space(block_group, &offset, &bytes_search, 0); 1866 entry = find_free_space(ctl, &offset, &bytes_search);
1797 if (!entry) 1867 if (!entry)
1798 goto out; 1868 goto out;
1799 1869
1800 ret = offset; 1870 ret = offset;
1801 if (entry->bitmap) { 1871 if (entry->bitmap) {
1802 bitmap_clear_bits(block_group, entry, offset, bytes); 1872 bitmap_clear_bits(ctl, entry, offset, bytes);
1803 if (!entry->bytes) 1873 if (!entry->bytes)
1804 free_bitmap(block_group, entry); 1874 free_bitmap(ctl, entry);
1805 } else { 1875 } else {
1806 unlink_free_space(block_group, entry); 1876 unlink_free_space(ctl, entry);
1807 entry->offset += bytes; 1877 entry->offset += bytes;
1808 entry->bytes -= bytes; 1878 entry->bytes -= bytes;
1809 if (!entry->bytes) 1879 if (!entry->bytes)
1810 kmem_cache_free(btrfs_free_space_cachep, entry); 1880 kmem_cache_free(btrfs_free_space_cachep, entry);
1811 else 1881 else
1812 link_free_space(block_group, entry); 1882 link_free_space(ctl, entry);
1813 } 1883 }
1814 1884
1815out: 1885out:
1816 spin_unlock(&block_group->tree_lock); 1886 spin_unlock(&ctl->tree_lock);
1817 1887
1818 return ret; 1888 return ret;
1819} 1889}
@@ -1830,6 +1900,7 @@ int btrfs_return_cluster_to_free_space(
1830 struct btrfs_block_group_cache *block_group, 1900 struct btrfs_block_group_cache *block_group,
1831 struct btrfs_free_cluster *cluster) 1901 struct btrfs_free_cluster *cluster)
1832{ 1902{
1903 struct btrfs_free_space_ctl *ctl;
1833 int ret; 1904 int ret;
1834 1905
1835 /* first, get a safe pointer to the block group */ 1906 /* first, get a safe pointer to the block group */
@@ -1848,10 +1919,12 @@ int btrfs_return_cluster_to_free_space(
1848 atomic_inc(&block_group->count); 1919 atomic_inc(&block_group->count);
1849 spin_unlock(&cluster->lock); 1920 spin_unlock(&cluster->lock);
1850 1921
1922 ctl = block_group->free_space_ctl;
1923
1851 /* now return any extents the cluster had on it */ 1924 /* now return any extents the cluster had on it */
1852 spin_lock(&block_group->tree_lock); 1925 spin_lock(&ctl->tree_lock);
1853 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 1926 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
1854 spin_unlock(&block_group->tree_lock); 1927 spin_unlock(&ctl->tree_lock);
1855 1928
1856 /* finally drop our ref */ 1929 /* finally drop our ref */
1857 btrfs_put_block_group(block_group); 1930 btrfs_put_block_group(block_group);
@@ -1863,6 +1936,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1863 struct btrfs_free_space *entry, 1936 struct btrfs_free_space *entry,
1864 u64 bytes, u64 min_start) 1937 u64 bytes, u64 min_start)
1865{ 1938{
1939 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1866 int err; 1940 int err;
1867 u64 search_start = cluster->window_start; 1941 u64 search_start = cluster->window_start;
1868 u64 search_bytes = bytes; 1942 u64 search_bytes = bytes;
@@ -1871,13 +1945,12 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1871 search_start = min_start; 1945 search_start = min_start;
1872 search_bytes = bytes; 1946 search_bytes = bytes;
1873 1947
1874 err = search_bitmap(block_group, entry, &search_start, 1948 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
1875 &search_bytes);
1876 if (err) 1949 if (err)
1877 return 0; 1950 return 0;
1878 1951
1879 ret = search_start; 1952 ret = search_start;
1880 bitmap_clear_bits(block_group, entry, ret, bytes); 1953 bitmap_clear_bits(ctl, entry, ret, bytes);
1881 1954
1882 return ret; 1955 return ret;
1883} 1956}
@@ -1891,6 +1964,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1891 struct btrfs_free_cluster *cluster, u64 bytes, 1964 struct btrfs_free_cluster *cluster, u64 bytes,
1892 u64 min_start) 1965 u64 min_start)
1893{ 1966{
1967 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1894 struct btrfs_free_space *entry = NULL; 1968 struct btrfs_free_space *entry = NULL;
1895 struct rb_node *node; 1969 struct rb_node *node;
1896 u64 ret = 0; 1970 u64 ret = 0;
@@ -1910,8 +1984,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1910 while(1) { 1984 while(1) {
1911 if (entry->bytes < bytes || 1985 if (entry->bytes < bytes ||
1912 (!entry->bitmap && entry->offset < min_start)) { 1986 (!entry->bitmap && entry->offset < min_start)) {
1913 struct rb_node *node;
1914
1915 node = rb_next(&entry->offset_index); 1987 node = rb_next(&entry->offset_index);
1916 if (!node) 1988 if (!node)
1917 break; 1989 break;
@@ -1925,7 +1997,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1925 cluster, entry, bytes, 1997 cluster, entry, bytes,
1926 min_start); 1998 min_start);
1927 if (ret == 0) { 1999 if (ret == 0) {
1928 struct rb_node *node;
1929 node = rb_next(&entry->offset_index); 2000 node = rb_next(&entry->offset_index);
1930 if (!node) 2001 if (!node)
1931 break; 2002 break;
@@ -1951,20 +2022,20 @@ out:
1951 if (!ret) 2022 if (!ret)
1952 return 0; 2023 return 0;
1953 2024
1954 spin_lock(&block_group->tree_lock); 2025 spin_lock(&ctl->tree_lock);
1955 2026
1956 block_group->free_space -= bytes; 2027 ctl->free_space -= bytes;
1957 if (entry->bytes == 0) { 2028 if (entry->bytes == 0) {
1958 block_group->free_extents--; 2029 ctl->free_extents--;
1959 if (entry->bitmap) { 2030 if (entry->bitmap) {
1960 kfree(entry->bitmap); 2031 kfree(entry->bitmap);
1961 block_group->total_bitmaps--; 2032 ctl->total_bitmaps--;
1962 recalculate_thresholds(block_group); 2033 ctl->op->recalc_thresholds(ctl);
1963 } 2034 }
1964 kmem_cache_free(btrfs_free_space_cachep, entry); 2035 kmem_cache_free(btrfs_free_space_cachep, entry);
1965 } 2036 }
1966 2037
1967 spin_unlock(&block_group->tree_lock); 2038 spin_unlock(&ctl->tree_lock);
1968 2039
1969 return ret; 2040 return ret;
1970} 2041}
@@ -1974,6 +2045,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1974 struct btrfs_free_cluster *cluster, 2045 struct btrfs_free_cluster *cluster,
1975 u64 offset, u64 bytes, u64 min_bytes) 2046 u64 offset, u64 bytes, u64 min_bytes)
1976{ 2047{
2048 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1977 unsigned long next_zero; 2049 unsigned long next_zero;
1978 unsigned long i; 2050 unsigned long i;
1979 unsigned long search_bits; 2051 unsigned long search_bits;
@@ -2028,7 +2100,7 @@ again:
2028 2100
2029 cluster->window_start = start * block_group->sectorsize + 2101 cluster->window_start = start * block_group->sectorsize +
2030 entry->offset; 2102 entry->offset;
2031 rb_erase(&entry->offset_index, &block_group->free_space_offset); 2103 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2032 ret = tree_insert_offset(&cluster->root, entry->offset, 2104 ret = tree_insert_offset(&cluster->root, entry->offset,
2033 &entry->offset_index, 1); 2105 &entry->offset_index, 1);
2034 BUG_ON(ret); 2106 BUG_ON(ret);
@@ -2043,6 +2115,7 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2043 struct btrfs_free_cluster *cluster, 2115 struct btrfs_free_cluster *cluster,
2044 u64 offset, u64 bytes, u64 min_bytes) 2116 u64 offset, u64 bytes, u64 min_bytes)
2045{ 2117{
2118 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2046 struct btrfs_free_space *first = NULL; 2119 struct btrfs_free_space *first = NULL;
2047 struct btrfs_free_space *entry = NULL; 2120 struct btrfs_free_space *entry = NULL;
2048 struct btrfs_free_space *prev = NULL; 2121 struct btrfs_free_space *prev = NULL;
@@ -2053,7 +2126,7 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2053 u64 max_extent; 2126 u64 max_extent;
2054 u64 max_gap = 128 * 1024; 2127 u64 max_gap = 128 * 1024;
2055 2128
2056 entry = tree_search_offset(block_group, offset, 0, 1); 2129 entry = tree_search_offset(ctl, offset, 0, 1);
2057 if (!entry) 2130 if (!entry)
2058 return -ENOSPC; 2131 return -ENOSPC;
2059 2132
@@ -2119,7 +2192,7 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2119 if (entry->bitmap) 2192 if (entry->bitmap)
2120 continue; 2193 continue;
2121 2194
2122 rb_erase(&entry->offset_index, &block_group->free_space_offset); 2195 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2123 ret = tree_insert_offset(&cluster->root, entry->offset, 2196 ret = tree_insert_offset(&cluster->root, entry->offset,
2124 &entry->offset_index, 0); 2197 &entry->offset_index, 0);
2125 BUG_ON(ret); 2198 BUG_ON(ret);
@@ -2138,16 +2211,15 @@ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2138 struct btrfs_free_cluster *cluster, 2211 struct btrfs_free_cluster *cluster,
2139 u64 offset, u64 bytes, u64 min_bytes) 2212 u64 offset, u64 bytes, u64 min_bytes)
2140{ 2213{
2214 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2141 struct btrfs_free_space *entry; 2215 struct btrfs_free_space *entry;
2142 struct rb_node *node; 2216 struct rb_node *node;
2143 int ret = -ENOSPC; 2217 int ret = -ENOSPC;
2144 2218
2145 if (block_group->total_bitmaps == 0) 2219 if (ctl->total_bitmaps == 0)
2146 return -ENOSPC; 2220 return -ENOSPC;
2147 2221
2148 entry = tree_search_offset(block_group, 2222 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2149 offset_to_bitmap(block_group, offset),
2150 0, 1);
2151 if (!entry) 2223 if (!entry)
2152 return -ENOSPC; 2224 return -ENOSPC;
2153 2225
@@ -2180,6 +2252,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2180 struct btrfs_free_cluster *cluster, 2252 struct btrfs_free_cluster *cluster,
2181 u64 offset, u64 bytes, u64 empty_size) 2253 u64 offset, u64 bytes, u64 empty_size)
2182{ 2254{
2255 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2183 u64 min_bytes; 2256 u64 min_bytes;
2184 int ret; 2257 int ret;
2185 2258
@@ -2199,14 +2272,14 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2199 } else 2272 } else
2200 min_bytes = max(bytes, (bytes + empty_size) >> 2); 2273 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2201 2274
2202 spin_lock(&block_group->tree_lock); 2275 spin_lock(&ctl->tree_lock);
2203 2276
2204 /* 2277 /*
2205 * If we know we don't have enough space to make a cluster don't even 2278 * If we know we don't have enough space to make a cluster don't even
2206 * bother doing all the work to try and find one. 2279 * bother doing all the work to try and find one.
2207 */ 2280 */
2208 if (block_group->free_space < min_bytes) { 2281 if (ctl->free_space < min_bytes) {
2209 spin_unlock(&block_group->tree_lock); 2282 spin_unlock(&ctl->tree_lock);
2210 return -ENOSPC; 2283 return -ENOSPC;
2211 } 2284 }
2212 2285
@@ -2232,7 +2305,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2232 } 2305 }
2233out: 2306out:
2234 spin_unlock(&cluster->lock); 2307 spin_unlock(&cluster->lock);
2235 spin_unlock(&block_group->tree_lock); 2308 spin_unlock(&ctl->tree_lock);
2236 2309
2237 return ret; 2310 return ret;
2238} 2311}
@@ -2253,6 +2326,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2253int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 2326int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2254 u64 *trimmed, u64 start, u64 end, u64 minlen) 2327 u64 *trimmed, u64 start, u64 end, u64 minlen)
2255{ 2328{
2329 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2256 struct btrfs_free_space *entry = NULL; 2330 struct btrfs_free_space *entry = NULL;
2257 struct btrfs_fs_info *fs_info = block_group->fs_info; 2331 struct btrfs_fs_info *fs_info = block_group->fs_info;
2258 u64 bytes = 0; 2332 u64 bytes = 0;
@@ -2262,52 +2336,50 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2262 *trimmed = 0; 2336 *trimmed = 0;
2263 2337
2264 while (start < end) { 2338 while (start < end) {
2265 spin_lock(&block_group->tree_lock); 2339 spin_lock(&ctl->tree_lock);
2266 2340
2267 if (block_group->free_space < minlen) { 2341 if (ctl->free_space < minlen) {
2268 spin_unlock(&block_group->tree_lock); 2342 spin_unlock(&ctl->tree_lock);
2269 break; 2343 break;
2270 } 2344 }
2271 2345
2272 entry = tree_search_offset(block_group, start, 0, 1); 2346 entry = tree_search_offset(ctl, start, 0, 1);
2273 if (!entry) 2347 if (!entry)
2274 entry = tree_search_offset(block_group, 2348 entry = tree_search_offset(ctl,
2275 offset_to_bitmap(block_group, 2349 offset_to_bitmap(ctl, start),
2276 start),
2277 1, 1); 2350 1, 1);
2278 2351
2279 if (!entry || entry->offset >= end) { 2352 if (!entry || entry->offset >= end) {
2280 spin_unlock(&block_group->tree_lock); 2353 spin_unlock(&ctl->tree_lock);
2281 break; 2354 break;
2282 } 2355 }
2283 2356
2284 if (entry->bitmap) { 2357 if (entry->bitmap) {
2285 ret = search_bitmap(block_group, entry, &start, &bytes); 2358 ret = search_bitmap(ctl, entry, &start, &bytes);
2286 if (!ret) { 2359 if (!ret) {
2287 if (start >= end) { 2360 if (start >= end) {
2288 spin_unlock(&block_group->tree_lock); 2361 spin_unlock(&ctl->tree_lock);
2289 break; 2362 break;
2290 } 2363 }
2291 bytes = min(bytes, end - start); 2364 bytes = min(bytes, end - start);
2292 bitmap_clear_bits(block_group, entry, 2365 bitmap_clear_bits(ctl, entry, start, bytes);
2293 start, bytes);
2294 if (entry->bytes == 0) 2366 if (entry->bytes == 0)
2295 free_bitmap(block_group, entry); 2367 free_bitmap(ctl, entry);
2296 } else { 2368 } else {
2297 start = entry->offset + BITS_PER_BITMAP * 2369 start = entry->offset + BITS_PER_BITMAP *
2298 block_group->sectorsize; 2370 block_group->sectorsize;
2299 spin_unlock(&block_group->tree_lock); 2371 spin_unlock(&ctl->tree_lock);
2300 ret = 0; 2372 ret = 0;
2301 continue; 2373 continue;
2302 } 2374 }
2303 } else { 2375 } else {
2304 start = entry->offset; 2376 start = entry->offset;
2305 bytes = min(entry->bytes, end - start); 2377 bytes = min(entry->bytes, end - start);
2306 unlink_free_space(block_group, entry); 2378 unlink_free_space(ctl, entry);
2307 kmem_cache_free(btrfs_free_space_cachep, entry); 2379 kmem_cache_free(btrfs_free_space_cachep, entry);
2308 } 2380 }
2309 2381
2310 spin_unlock(&block_group->tree_lock); 2382 spin_unlock(&ctl->tree_lock);
2311 2383
2312 if (bytes >= minlen) { 2384 if (bytes >= minlen) {
2313 int update_ret; 2385 int update_ret;
@@ -2319,8 +2391,7 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2319 bytes, 2391 bytes,
2320 &actually_trimmed); 2392 &actually_trimmed);
2321 2393
2322 btrfs_add_free_space(block_group, 2394 btrfs_add_free_space(block_group, start, bytes);
2323 start, bytes);
2324 if (!update_ret) 2395 if (!update_ret)
2325 btrfs_update_reserved_bytes(block_group, 2396 btrfs_update_reserved_bytes(block_group,
2326 bytes, 0, 1); 2397 bytes, 0, 1);
@@ -2342,3 +2413,145 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2342 2413
2343 return ret; 2414 return ret;
2344} 2415}
2416
2417/*
2418 * Find the left-most item in the cache tree, and then return the
2419 * smallest inode number in the item.
2420 *
2421 * Note: the returned inode number may not be the smallest one in
2422 * the tree, if the left-most item is a bitmap.
2423 */
2424u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2425{
2426 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2427 struct btrfs_free_space *entry = NULL;
2428 u64 ino = 0;
2429
2430 spin_lock(&ctl->tree_lock);
2431
2432 if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2433 goto out;
2434
2435 entry = rb_entry(rb_first(&ctl->free_space_offset),
2436 struct btrfs_free_space, offset_index);
2437
2438 if (!entry->bitmap) {
2439 ino = entry->offset;
2440
2441 unlink_free_space(ctl, entry);
2442 entry->offset++;
2443 entry->bytes--;
2444 if (!entry->bytes)
2445 kmem_cache_free(btrfs_free_space_cachep, entry);
2446 else
2447 link_free_space(ctl, entry);
2448 } else {
2449 u64 offset = 0;
2450 u64 count = 1;
2451 int ret;
2452
2453 ret = search_bitmap(ctl, entry, &offset, &count);
2454 BUG_ON(ret);
2455
2456 ino = offset;
2457 bitmap_clear_bits(ctl, entry, offset, 1);
2458 if (entry->bytes == 0)
2459 free_bitmap(ctl, entry);
2460 }
2461out:
2462 spin_unlock(&ctl->tree_lock);
2463
2464 return ino;
2465}
2466
2467struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2468 struct btrfs_path *path)
2469{
2470 struct inode *inode = NULL;
2471
2472 spin_lock(&root->cache_lock);
2473 if (root->cache_inode)
2474 inode = igrab(root->cache_inode);
2475 spin_unlock(&root->cache_lock);
2476 if (inode)
2477 return inode;
2478
2479 inode = __lookup_free_space_inode(root, path, 0);
2480 if (IS_ERR(inode))
2481 return inode;
2482
2483 spin_lock(&root->cache_lock);
2484 if (!root->fs_info->closing)
2485 root->cache_inode = igrab(inode);
2486 spin_unlock(&root->cache_lock);
2487
2488 return inode;
2489}
2490
2491int create_free_ino_inode(struct btrfs_root *root,
2492 struct btrfs_trans_handle *trans,
2493 struct btrfs_path *path)
2494{
2495 return __create_free_space_inode(root, trans, path,
2496 BTRFS_FREE_INO_OBJECTID, 0);
2497}
2498
2499int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2500{
2501 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2502 struct btrfs_path *path;
2503 struct inode *inode;
2504 int ret = 0;
2505 u64 root_gen = btrfs_root_generation(&root->root_item);
2506
2507 /*
2508 * If we're unmounting then just return, since this does a search on the
2509 * normal root and not the commit root and we could deadlock.
2510 */
2511 smp_mb();
2512 if (fs_info->closing)
2513 return 0;
2514
2515 path = btrfs_alloc_path();
2516 if (!path)
2517 return 0;
2518
2519 inode = lookup_free_ino_inode(root, path);
2520 if (IS_ERR(inode))
2521 goto out;
2522
2523 if (root_gen != BTRFS_I(inode)->generation)
2524 goto out_put;
2525
2526 ret = __load_free_space_cache(root, inode, ctl, path, 0);
2527
2528 if (ret < 0)
2529 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2530 "root %llu\n", root->root_key.objectid);
2531out_put:
2532 iput(inode);
2533out:
2534 btrfs_free_path(path);
2535 return ret;
2536}
2537
2538int btrfs_write_out_ino_cache(struct btrfs_root *root,
2539 struct btrfs_trans_handle *trans,
2540 struct btrfs_path *path)
2541{
2542 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2543 struct inode *inode;
2544 int ret;
2545
2546 inode = lookup_free_ino_inode(root, path);
2547 if (IS_ERR(inode))
2548 return 0;
2549
2550 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2551 if (ret < 0)
2552 printk(KERN_ERR "btrfs: failed to write free ino cache "
2553 "for root %llu\n", root->root_key.objectid);
2554
2555 iput(inode);
2556 return ret;
2557}
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 65c3b935289f..8f2613f779ed 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -27,6 +27,25 @@ struct btrfs_free_space {
27 struct list_head list; 27 struct list_head list;
28}; 28};
29 29
30struct btrfs_free_space_ctl {
31 spinlock_t tree_lock;
32 struct rb_root free_space_offset;
33 u64 free_space;
34 int extents_thresh;
35 int free_extents;
36 int total_bitmaps;
37 int unit;
38 u64 start;
39 struct btrfs_free_space_op *op;
40 void *private;
41};
42
43struct btrfs_free_space_op {
44 void (*recalc_thresholds)(struct btrfs_free_space_ctl *ctl);
45 bool (*use_bitmap)(struct btrfs_free_space_ctl *ctl,
46 struct btrfs_free_space *info);
47};
48
30struct inode *lookup_free_space_inode(struct btrfs_root *root, 49struct inode *lookup_free_space_inode(struct btrfs_root *root,
31 struct btrfs_block_group_cache 50 struct btrfs_block_group_cache
32 *block_group, struct btrfs_path *path); 51 *block_group, struct btrfs_path *path);
@@ -45,17 +64,38 @@ int btrfs_write_out_cache(struct btrfs_root *root,
45 struct btrfs_trans_handle *trans, 64 struct btrfs_trans_handle *trans,
46 struct btrfs_block_group_cache *block_group, 65 struct btrfs_block_group_cache *block_group,
47 struct btrfs_path *path); 66 struct btrfs_path *path);
48int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 67
49 u64 bytenr, u64 size); 68struct inode *lookup_free_ino_inode(struct btrfs_root *root,
69 struct btrfs_path *path);
70int create_free_ino_inode(struct btrfs_root *root,
71 struct btrfs_trans_handle *trans,
72 struct btrfs_path *path);
73int load_free_ino_cache(struct btrfs_fs_info *fs_info,
74 struct btrfs_root *root);
75int btrfs_write_out_ino_cache(struct btrfs_root *root,
76 struct btrfs_trans_handle *trans,
77 struct btrfs_path *path);
78
79void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
80int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
81 u64 bytenr, u64 size);
82static inline int
83btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
84 u64 bytenr, u64 size)
85{
86 return __btrfs_add_free_space(block_group->free_space_ctl,
87 bytenr, size);
88}
50int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 89int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
51 u64 bytenr, u64 size); 90 u64 bytenr, u64 size);
91void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
52void btrfs_remove_free_space_cache(struct btrfs_block_group_cache 92void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
53 *block_group); 93 *block_group);
54u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 94u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
55 u64 offset, u64 bytes, u64 empty_size); 95 u64 offset, u64 bytes, u64 empty_size);
96u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
56void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 97void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
57 u64 bytes); 98 u64 bytes);
58u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
59int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 99int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
60 struct btrfs_root *root, 100 struct btrfs_root *root,
61 struct btrfs_block_group_cache *block_group, 101 struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index c05a08f4c411..000970512624 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -16,11 +16,430 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/delay.h>
20#include <linux/kthread.h>
21#include <linux/pagemap.h>
22
19#include "ctree.h" 23#include "ctree.h"
20#include "disk-io.h" 24#include "disk-io.h"
25#include "free-space-cache.h"
26#include "inode-map.h"
21#include "transaction.h" 27#include "transaction.h"
22 28
23int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) 29static int caching_kthread(void *data)
30{
31 struct btrfs_root *root = data;
32 struct btrfs_fs_info *fs_info = root->fs_info;
33 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
34 struct btrfs_key key;
35 struct btrfs_path *path;
36 struct extent_buffer *leaf;
37 u64 last = (u64)-1;
38 int slot;
39 int ret;
40
41 path = btrfs_alloc_path();
42 if (!path)
43 return -ENOMEM;
44
45 /* Since the commit root is read-only, we can safely skip locking. */
46 path->skip_locking = 1;
47 path->search_commit_root = 1;
48 path->reada = 2;
49
50 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
51 key.offset = 0;
52 key.type = BTRFS_INODE_ITEM_KEY;
53again:
54 /* need to make sure the commit_root doesn't disappear */
55 mutex_lock(&root->fs_commit_mutex);
56
57 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
58 if (ret < 0)
59 goto out;
60
61 while (1) {
62 smp_mb();
63 if (fs_info->closing > 1)
64 goto out;
65
66 leaf = path->nodes[0];
67 slot = path->slots[0];
68 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
69 ret = btrfs_next_leaf(root, path);
70 if (ret < 0)
71 goto out;
72 else if (ret > 0)
73 break;
74
75 if (need_resched() ||
76 btrfs_transaction_in_commit(fs_info)) {
77 leaf = path->nodes[0];
78
79 if (btrfs_header_nritems(leaf) == 0) {
80 WARN_ON(1);
81 break;
82 }
83
84 /*
85 * Save the key so we can advances forward
86 * in the next search.
87 */
88 btrfs_item_key_to_cpu(leaf, &key, 0);
89 btrfs_release_path(path);
90 root->cache_progress = last;
91 mutex_unlock(&root->fs_commit_mutex);
92 schedule_timeout(1);
93 goto again;
94 } else
95 continue;
96 }
97
98 btrfs_item_key_to_cpu(leaf, &key, slot);
99
100 if (key.type != BTRFS_INODE_ITEM_KEY)
101 goto next;
102
103 if (key.objectid >= BTRFS_LAST_FREE_OBJECTID)
104 break;
105
106 if (last != (u64)-1 && last + 1 != key.objectid) {
107 __btrfs_add_free_space(ctl, last + 1,
108 key.objectid - last - 1);
109 wake_up(&root->cache_wait);
110 }
111
112 last = key.objectid;
113next:
114 path->slots[0]++;
115 }
116
117 if (last < BTRFS_LAST_FREE_OBJECTID - 1) {
118 __btrfs_add_free_space(ctl, last + 1,
119 BTRFS_LAST_FREE_OBJECTID - last - 1);
120 }
121
122 spin_lock(&root->cache_lock);
123 root->cached = BTRFS_CACHE_FINISHED;
124 spin_unlock(&root->cache_lock);
125
126 root->cache_progress = (u64)-1;
127 btrfs_unpin_free_ino(root);
128out:
129 wake_up(&root->cache_wait);
130 mutex_unlock(&root->fs_commit_mutex);
131
132 btrfs_free_path(path);
133
134 return ret;
135}
136
137static void start_caching(struct btrfs_root *root)
138{
139 struct task_struct *tsk;
140 int ret;
141
142 spin_lock(&root->cache_lock);
143 if (root->cached != BTRFS_CACHE_NO) {
144 spin_unlock(&root->cache_lock);
145 return;
146 }
147
148 root->cached = BTRFS_CACHE_STARTED;
149 spin_unlock(&root->cache_lock);
150
151 ret = load_free_ino_cache(root->fs_info, root);
152 if (ret == 1) {
153 spin_lock(&root->cache_lock);
154 root->cached = BTRFS_CACHE_FINISHED;
155 spin_unlock(&root->cache_lock);
156 return;
157 }
158
159 tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
160 root->root_key.objectid);
161 BUG_ON(IS_ERR(tsk));
162}
163
164int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
165{
166again:
167 *objectid = btrfs_find_ino_for_alloc(root);
168
169 if (*objectid != 0)
170 return 0;
171
172 start_caching(root);
173
174 wait_event(root->cache_wait,
175 root->cached == BTRFS_CACHE_FINISHED ||
176 root->free_ino_ctl->free_space > 0);
177
178 if (root->cached == BTRFS_CACHE_FINISHED &&
179 root->free_ino_ctl->free_space == 0)
180 return -ENOSPC;
181 else
182 goto again;
183}
184
185void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
186{
187 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
188 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
189again:
190 if (root->cached == BTRFS_CACHE_FINISHED) {
191 __btrfs_add_free_space(ctl, objectid, 1);
192 } else {
193 /*
194 * If we are in the process of caching free ino chunks,
195 * to avoid adding the same inode number to the free_ino
196 * tree twice due to cross transaction, we'll leave it
197 * in the pinned tree until a transaction is committed
198 * or the caching work is done.
199 */
200
201 mutex_lock(&root->fs_commit_mutex);
202 spin_lock(&root->cache_lock);
203 if (root->cached == BTRFS_CACHE_FINISHED) {
204 spin_unlock(&root->cache_lock);
205 mutex_unlock(&root->fs_commit_mutex);
206 goto again;
207 }
208 spin_unlock(&root->cache_lock);
209
210 start_caching(root);
211
212 if (objectid <= root->cache_progress)
213 __btrfs_add_free_space(ctl, objectid, 1);
214 else
215 __btrfs_add_free_space(pinned, objectid, 1);
216
217 mutex_unlock(&root->fs_commit_mutex);
218 }
219}
220
221/*
222 * When a transaction is committed, we'll move those inode numbers which
223 * are smaller than root->cache_progress from pinned tree to free_ino tree,
224 * and others will just be dropped, because the commit root we were
225 * searching has changed.
226 *
227 * Must be called with root->fs_commit_mutex held
228 */
229void btrfs_unpin_free_ino(struct btrfs_root *root)
230{
231 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
232 struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
233 struct btrfs_free_space *info;
234 struct rb_node *n;
235 u64 count;
236
237 while (1) {
238 n = rb_first(rbroot);
239 if (!n)
240 break;
241
242 info = rb_entry(n, struct btrfs_free_space, offset_index);
243 BUG_ON(info->bitmap);
244
245 if (info->offset > root->cache_progress)
246 goto free;
247 else if (info->offset + info->bytes > root->cache_progress)
248 count = root->cache_progress - info->offset + 1;
249 else
250 count = info->bytes;
251
252 __btrfs_add_free_space(ctl, info->offset, count);
253free:
254 rb_erase(&info->offset_index, rbroot);
255 kfree(info);
256 }
257}
258
259#define INIT_THRESHOLD (((1024 * 32) / 2) / sizeof(struct btrfs_free_space))
260#define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8)
261
262/*
263 * The goal is to keep the memory used by the free_ino tree won't
264 * exceed the memory if we use bitmaps only.
265 */
266static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
267{
268 struct btrfs_free_space *info;
269 struct rb_node *n;
270 int max_ino;
271 int max_bitmaps;
272
273 n = rb_last(&ctl->free_space_offset);
274 if (!n) {
275 ctl->extents_thresh = INIT_THRESHOLD;
276 return;
277 }
278 info = rb_entry(n, struct btrfs_free_space, offset_index);
279
280 /*
281 * Find the maximum inode number in the filesystem. Note we
282 * ignore the fact that this can be a bitmap, because we are
283 * not doing precise calculation.
284 */
285 max_ino = info->bytes - 1;
286
287 max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP;
288 if (max_bitmaps <= ctl->total_bitmaps) {
289 ctl->extents_thresh = 0;
290 return;
291 }
292
293 ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
294 PAGE_CACHE_SIZE / sizeof(*info);
295}
296
297/*
298 * We don't fall back to bitmap, if we are below the extents threshold
299 * or this chunk of inode numbers is a big one.
300 */
301static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
302 struct btrfs_free_space *info)
303{
304 if (ctl->free_extents < ctl->extents_thresh ||
305 info->bytes > INODES_PER_BITMAP / 10)
306 return false;
307
308 return true;
309}
310
311static struct btrfs_free_space_op free_ino_op = {
312 .recalc_thresholds = recalculate_thresholds,
313 .use_bitmap = use_bitmap,
314};
315
316static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl)
317{
318}
319
320static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
321 struct btrfs_free_space *info)
322{
323 /*
324 * We always use extents for two reasons:
325 *
326 * - The pinned tree is only used during the process of caching
327 * work.
328 * - Make code simpler. See btrfs_unpin_free_ino().
329 */
330 return false;
331}
332
333static struct btrfs_free_space_op pinned_free_ino_op = {
334 .recalc_thresholds = pinned_recalc_thresholds,
335 .use_bitmap = pinned_use_bitmap,
336};
337
338void btrfs_init_free_ino_ctl(struct btrfs_root *root)
339{
340 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
341 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
342
343 spin_lock_init(&ctl->tree_lock);
344 ctl->unit = 1;
345 ctl->start = 0;
346 ctl->private = NULL;
347 ctl->op = &free_ino_op;
348
349 /*
350 * Initially we allow to use 16K of ram to cache chunks of
351 * inode numbers before we resort to bitmaps. This is somewhat
352 * arbitrary, but it will be adjusted in runtime.
353 */
354 ctl->extents_thresh = INIT_THRESHOLD;
355
356 spin_lock_init(&pinned->tree_lock);
357 pinned->unit = 1;
358 pinned->start = 0;
359 pinned->private = NULL;
360 pinned->extents_thresh = 0;
361 pinned->op = &pinned_free_ino_op;
362}
363
364int btrfs_save_ino_cache(struct btrfs_root *root,
365 struct btrfs_trans_handle *trans)
366{
367 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
368 struct btrfs_path *path;
369 struct inode *inode;
370 u64 alloc_hint = 0;
371 int ret;
372 int prealloc;
373 bool retry = false;
374
375 path = btrfs_alloc_path();
376 if (!path)
377 return -ENOMEM;
378again:
379 inode = lookup_free_ino_inode(root, path);
380 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
381 ret = PTR_ERR(inode);
382 goto out;
383 }
384
385 if (IS_ERR(inode)) {
386 BUG_ON(retry);
387 retry = true;
388
389 ret = create_free_ino_inode(root, trans, path);
390 if (ret)
391 goto out;
392 goto again;
393 }
394
395 BTRFS_I(inode)->generation = 0;
396 ret = btrfs_update_inode(trans, root, inode);
397 WARN_ON(ret);
398
399 if (i_size_read(inode) > 0) {
400 ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
401 if (ret)
402 goto out_put;
403 }
404
405 spin_lock(&root->cache_lock);
406 if (root->cached != BTRFS_CACHE_FINISHED) {
407 ret = -1;
408 spin_unlock(&root->cache_lock);
409 goto out_put;
410 }
411 spin_unlock(&root->cache_lock);
412
413 spin_lock(&ctl->tree_lock);
414 prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
415 prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
416 prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
417 spin_unlock(&ctl->tree_lock);
418
419 /* Just to make sure we have enough space */
420 prealloc += 8 * PAGE_CACHE_SIZE;
421
422 ret = btrfs_check_data_free_space(inode, prealloc);
423 if (ret)
424 goto out_put;
425
426 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
427 prealloc, prealloc, &alloc_hint);
428 if (ret)
429 goto out_put;
430 btrfs_free_reserved_data_space(inode, prealloc);
431
432out_put:
433 iput(inode);
434out:
435 if (ret == 0)
436 ret = btrfs_write_out_ino_cache(root, trans, path);
437
438 btrfs_free_path(path);
439 return ret;
440}
441
442static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
24{ 443{
25 struct btrfs_path *path; 444 struct btrfs_path *path;
26 int ret; 445 int ret;
@@ -55,15 +474,14 @@ error:
55 return ret; 474 return ret;
56} 475}
57 476
58int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, 477int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
59 struct btrfs_root *root,
60 u64 dirid, u64 *objectid)
61{ 478{
62 int ret; 479 int ret;
63 mutex_lock(&root->objectid_mutex); 480 mutex_lock(&root->objectid_mutex);
64 481
65 if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { 482 if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
66 ret = btrfs_find_highest_inode(root, &root->highest_objectid); 483 ret = btrfs_find_highest_objectid(root,
484 &root->highest_objectid);
67 if (ret) 485 if (ret)
68 goto out; 486 goto out;
69 } 487 }
diff --git a/fs/btrfs/inode-map.h b/fs/btrfs/inode-map.h
new file mode 100644
index 000000000000..ddb347bfee23
--- /dev/null
+++ b/fs/btrfs/inode-map.h
@@ -0,0 +1,13 @@
1#ifndef __BTRFS_INODE_MAP
2#define __BTRFS_INODE_MAP
3
4void btrfs_init_free_ino_ctl(struct btrfs_root *root);
5void btrfs_unpin_free_ino(struct btrfs_root *root);
6void btrfs_return_ino(struct btrfs_root *root, u64 objectid);
7int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid);
8int btrfs_save_ino_cache(struct btrfs_root *root,
9 struct btrfs_trans_handle *trans);
10
11int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
12
13#endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 80fcd5177731..d378f8b70ef7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -37,6 +37,7 @@
37#include <linux/posix_acl.h> 37#include <linux/posix_acl.h>
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/ratelimit.h>
40#include "compat.h" 41#include "compat.h"
41#include "ctree.h" 42#include "ctree.h"
42#include "disk-io.h" 43#include "disk-io.h"
@@ -51,6 +52,7 @@
51#include "compression.h" 52#include "compression.h"
52#include "locking.h" 53#include "locking.h"
53#include "free-space-cache.h" 54#include "free-space-cache.h"
55#include "inode-map.h"
54 56
55struct btrfs_iget_args { 57struct btrfs_iget_args {
56 u64 ino; 58 u64 ino;
@@ -138,7 +140,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
138 path->leave_spinning = 1; 140 path->leave_spinning = 1;
139 btrfs_set_trans_block_group(trans, inode); 141 btrfs_set_trans_block_group(trans, inode);
140 142
141 key.objectid = inode->i_ino; 143 key.objectid = btrfs_ino(inode);
142 key.offset = start; 144 key.offset = start;
143 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 145 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
144 datasize = btrfs_file_extent_calc_inline_size(cur_size); 146 datasize = btrfs_file_extent_calc_inline_size(cur_size);
@@ -649,7 +651,7 @@ retry:
649 async_extent->start + 651 async_extent->start +
650 async_extent->ram_size - 1, 0); 652 async_extent->ram_size - 1, 0);
651 653
652 em = alloc_extent_map(GFP_NOFS); 654 em = alloc_extent_map();
653 BUG_ON(!em); 655 BUG_ON(!em);
654 em->start = async_extent->start; 656 em->start = async_extent->start;
655 em->len = async_extent->ram_size; 657 em->len = async_extent->ram_size;
@@ -745,6 +747,15 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
745 return alloc_hint; 747 return alloc_hint;
746} 748}
747 749
750static inline bool is_free_space_inode(struct btrfs_root *root,
751 struct inode *inode)
752{
753 if (root == root->fs_info->tree_root ||
754 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
755 return true;
756 return false;
757}
758
748/* 759/*
749 * when extent_io.c finds a delayed allocation range in the file, 760 * when extent_io.c finds a delayed allocation range in the file,
750 * the call backs end up in this code. The basic idea is to 761 * the call backs end up in this code. The basic idea is to
@@ -777,7 +788,7 @@ static noinline int cow_file_range(struct inode *inode,
777 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 788 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
778 int ret = 0; 789 int ret = 0;
779 790
780 BUG_ON(root == root->fs_info->tree_root); 791 BUG_ON(is_free_space_inode(root, inode));
781 trans = btrfs_join_transaction(root, 1); 792 trans = btrfs_join_transaction(root, 1);
782 BUG_ON(IS_ERR(trans)); 793 BUG_ON(IS_ERR(trans));
783 btrfs_set_trans_block_group(trans, inode); 794 btrfs_set_trans_block_group(trans, inode);
@@ -826,7 +837,7 @@ static noinline int cow_file_range(struct inode *inode,
826 (u64)-1, &ins, 1); 837 (u64)-1, &ins, 1);
827 BUG_ON(ret); 838 BUG_ON(ret);
828 839
829 em = alloc_extent_map(GFP_NOFS); 840 em = alloc_extent_map();
830 BUG_ON(!em); 841 BUG_ON(!em);
831 em->start = start; 842 em->start = start;
832 em->orig_start = em->start; 843 em->orig_start = em->start;
@@ -1008,7 +1019,7 @@ static noinline int csum_exist_in_range(struct btrfs_root *root,
1008 LIST_HEAD(list); 1019 LIST_HEAD(list);
1009 1020
1010 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr, 1021 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
1011 bytenr + num_bytes - 1, &list); 1022 bytenr + num_bytes - 1, &list, 0);
1012 if (ret == 0 && list_empty(&list)) 1023 if (ret == 0 && list_empty(&list))
1013 return 0; 1024 return 0;
1014 1025
@@ -1049,29 +1060,31 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1049 int type; 1060 int type;
1050 int nocow; 1061 int nocow;
1051 int check_prev = 1; 1062 int check_prev = 1;
1052 bool nolock = false; 1063 bool nolock;
1064 u64 ino = btrfs_ino(inode);
1053 1065
1054 path = btrfs_alloc_path(); 1066 path = btrfs_alloc_path();
1055 BUG_ON(!path); 1067 BUG_ON(!path);
1056 if (root == root->fs_info->tree_root) { 1068
1057 nolock = true; 1069 nolock = is_free_space_inode(root, inode);
1070
1071 if (nolock)
1058 trans = btrfs_join_transaction_nolock(root, 1); 1072 trans = btrfs_join_transaction_nolock(root, 1);
1059 } else { 1073 else
1060 trans = btrfs_join_transaction(root, 1); 1074 trans = btrfs_join_transaction(root, 1);
1061 }
1062 BUG_ON(IS_ERR(trans)); 1075 BUG_ON(IS_ERR(trans));
1063 1076
1064 cow_start = (u64)-1; 1077 cow_start = (u64)-1;
1065 cur_offset = start; 1078 cur_offset = start;
1066 while (1) { 1079 while (1) {
1067 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 1080 ret = btrfs_lookup_file_extent(trans, root, path, ino,
1068 cur_offset, 0); 1081 cur_offset, 0);
1069 BUG_ON(ret < 0); 1082 BUG_ON(ret < 0);
1070 if (ret > 0 && path->slots[0] > 0 && check_prev) { 1083 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1071 leaf = path->nodes[0]; 1084 leaf = path->nodes[0];
1072 btrfs_item_key_to_cpu(leaf, &found_key, 1085 btrfs_item_key_to_cpu(leaf, &found_key,
1073 path->slots[0] - 1); 1086 path->slots[0] - 1);
1074 if (found_key.objectid == inode->i_ino && 1087 if (found_key.objectid == ino &&
1075 found_key.type == BTRFS_EXTENT_DATA_KEY) 1088 found_key.type == BTRFS_EXTENT_DATA_KEY)
1076 path->slots[0]--; 1089 path->slots[0]--;
1077 } 1090 }
@@ -1092,7 +1105,7 @@ next_slot:
1092 num_bytes = 0; 1105 num_bytes = 0;
1093 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1106 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1094 1107
1095 if (found_key.objectid > inode->i_ino || 1108 if (found_key.objectid > ino ||
1096 found_key.type > BTRFS_EXTENT_DATA_KEY || 1109 found_key.type > BTRFS_EXTENT_DATA_KEY ||
1097 found_key.offset > end) 1110 found_key.offset > end)
1098 break; 1111 break;
@@ -1127,7 +1140,7 @@ next_slot:
1127 goto out_check; 1140 goto out_check;
1128 if (btrfs_extent_readonly(root, disk_bytenr)) 1141 if (btrfs_extent_readonly(root, disk_bytenr))
1129 goto out_check; 1142 goto out_check;
1130 if (btrfs_cross_ref_exist(trans, root, inode->i_ino, 1143 if (btrfs_cross_ref_exist(trans, root, ino,
1131 found_key.offset - 1144 found_key.offset -
1132 extent_offset, disk_bytenr)) 1145 extent_offset, disk_bytenr))
1133 goto out_check; 1146 goto out_check;
@@ -1164,7 +1177,7 @@ out_check:
1164 goto next_slot; 1177 goto next_slot;
1165 } 1178 }
1166 1179
1167 btrfs_release_path(root, path); 1180 btrfs_release_path(path);
1168 if (cow_start != (u64)-1) { 1181 if (cow_start != (u64)-1) {
1169 ret = cow_file_range(inode, locked_page, cow_start, 1182 ret = cow_file_range(inode, locked_page, cow_start,
1170 found_key.offset - 1, page_started, 1183 found_key.offset - 1, page_started,
@@ -1177,7 +1190,7 @@ out_check:
1177 struct extent_map *em; 1190 struct extent_map *em;
1178 struct extent_map_tree *em_tree; 1191 struct extent_map_tree *em_tree;
1179 em_tree = &BTRFS_I(inode)->extent_tree; 1192 em_tree = &BTRFS_I(inode)->extent_tree;
1180 em = alloc_extent_map(GFP_NOFS); 1193 em = alloc_extent_map();
1181 BUG_ON(!em); 1194 BUG_ON(!em);
1182 em->start = cur_offset; 1195 em->start = cur_offset;
1183 em->orig_start = em->start; 1196 em->orig_start = em->start;
@@ -1222,7 +1235,7 @@ out_check:
1222 if (cur_offset > end) 1235 if (cur_offset > end)
1223 break; 1236 break;
1224 } 1237 }
1225 btrfs_release_path(root, path); 1238 btrfs_release_path(path);
1226 1239
1227 if (cur_offset <= end && cow_start == (u64)-1) 1240 if (cur_offset <= end && cow_start == (u64)-1)
1228 cow_start = cur_offset; 1241 cow_start = cur_offset;
@@ -1316,8 +1329,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
1316 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1329 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1317 struct btrfs_root *root = BTRFS_I(inode)->root; 1330 struct btrfs_root *root = BTRFS_I(inode)->root;
1318 u64 len = state->end + 1 - state->start; 1331 u64 len = state->end + 1 - state->start;
1319 int do_list = (root->root_key.objectid != 1332 bool do_list = !is_free_space_inode(root, inode);
1320 BTRFS_ROOT_TREE_OBJECTID);
1321 1333
1322 if (*bits & EXTENT_FIRST_DELALLOC) 1334 if (*bits & EXTENT_FIRST_DELALLOC)
1323 *bits &= ~EXTENT_FIRST_DELALLOC; 1335 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1350,8 +1362,7 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1350 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1362 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1351 struct btrfs_root *root = BTRFS_I(inode)->root; 1363 struct btrfs_root *root = BTRFS_I(inode)->root;
1352 u64 len = state->end + 1 - state->start; 1364 u64 len = state->end + 1 - state->start;
1353 int do_list = (root->root_key.objectid != 1365 bool do_list = !is_free_space_inode(root, inode);
1354 BTRFS_ROOT_TREE_OBJECTID);
1355 1366
1356 if (*bits & EXTENT_FIRST_DELALLOC) 1367 if (*bits & EXTENT_FIRST_DELALLOC)
1357 *bits &= ~EXTENT_FIRST_DELALLOC; 1368 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1458,7 +1469,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1458 1469
1459 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1470 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1460 1471
1461 if (root == root->fs_info->tree_root) 1472 if (is_free_space_inode(root, inode))
1462 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); 1473 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
1463 else 1474 else
1464 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1475 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
@@ -1644,7 +1655,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1644 &hint, 0); 1655 &hint, 0);
1645 BUG_ON(ret); 1656 BUG_ON(ret);
1646 1657
1647 ins.objectid = inode->i_ino; 1658 ins.objectid = btrfs_ino(inode);
1648 ins.offset = file_pos; 1659 ins.offset = file_pos;
1649 ins.type = BTRFS_EXTENT_DATA_KEY; 1660 ins.type = BTRFS_EXTENT_DATA_KEY;
1650 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); 1661 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
@@ -1675,7 +1686,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1675 ins.type = BTRFS_EXTENT_ITEM_KEY; 1686 ins.type = BTRFS_EXTENT_ITEM_KEY;
1676 ret = btrfs_alloc_reserved_file_extent(trans, root, 1687 ret = btrfs_alloc_reserved_file_extent(trans, root,
1677 root->root_key.objectid, 1688 root->root_key.objectid,
1678 inode->i_ino, file_pos, &ins); 1689 btrfs_ino(inode), file_pos, &ins);
1679 BUG_ON(ret); 1690 BUG_ON(ret);
1680 btrfs_free_path(path); 1691 btrfs_free_path(path);
1681 1692
@@ -1701,7 +1712,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1701 struct extent_state *cached_state = NULL; 1712 struct extent_state *cached_state = NULL;
1702 int compress_type = 0; 1713 int compress_type = 0;
1703 int ret; 1714 int ret;
1704 bool nolock = false; 1715 bool nolock;
1705 1716
1706 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, 1717 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1707 end - start + 1); 1718 end - start + 1);
@@ -1709,7 +1720,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1709 return 0; 1720 return 0;
1710 BUG_ON(!ordered_extent); 1721 BUG_ON(!ordered_extent);
1711 1722
1712 nolock = (root == root->fs_info->tree_root); 1723 nolock = is_free_space_inode(root, inode);
1713 1724
1714 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1725 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1715 BUG_ON(!list_empty(&ordered_extent->list)); 1726 BUG_ON(!list_empty(&ordered_extent->list));
@@ -1855,7 +1866,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1855 } 1866 }
1856 read_unlock(&em_tree->lock); 1867 read_unlock(&em_tree->lock);
1857 1868
1858 if (!em || IS_ERR(em)) { 1869 if (IS_ERR_OR_NULL(em)) {
1859 kfree(failrec); 1870 kfree(failrec);
1860 return -EIO; 1871 return -EIO;
1861 } 1872 }
@@ -2004,12 +2015,11 @@ good:
2004 return 0; 2015 return 0;
2005 2016
2006zeroit: 2017zeroit:
2007 if (printk_ratelimit()) { 2018 printk_ratelimited(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u "
2008 printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " 2019 "private %llu\n",
2009 "private %llu\n", page->mapping->host->i_ino, 2020 (unsigned long long)btrfs_ino(page->mapping->host),
2010 (unsigned long long)start, csum, 2021 (unsigned long long)start, csum,
2011 (unsigned long long)private); 2022 (unsigned long long)private);
2012 }
2013 memset(kaddr + offset, 1, end - start + 1); 2023 memset(kaddr + offset, 1, end - start + 1);
2014 flush_dcache_page(page); 2024 flush_dcache_page(page);
2015 kunmap_atomic(kaddr, KM_USER0); 2025 kunmap_atomic(kaddr, KM_USER0);
@@ -2244,7 +2254,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2244 2254
2245 /* insert an orphan item to track this unlinked/truncated file */ 2255 /* insert an orphan item to track this unlinked/truncated file */
2246 if (insert >= 1) { 2256 if (insert >= 1) {
2247 ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); 2257 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
2248 BUG_ON(ret); 2258 BUG_ON(ret);
2249 } 2259 }
2250 2260
@@ -2281,7 +2291,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2281 spin_unlock(&root->orphan_lock); 2291 spin_unlock(&root->orphan_lock);
2282 2292
2283 if (trans && delete_item) { 2293 if (trans && delete_item) {
2284 ret = btrfs_del_orphan_item(trans, root, inode->i_ino); 2294 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
2285 BUG_ON(ret); 2295 BUG_ON(ret);
2286 } 2296 }
2287 2297
@@ -2346,7 +2356,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2346 break; 2356 break;
2347 2357
2348 /* release the path since we're done with it */ 2358 /* release the path since we're done with it */
2349 btrfs_release_path(root, path); 2359 btrfs_release_path(path);
2350 2360
2351 /* 2361 /*
2352 * this is where we are basically btrfs_lookup, without the 2362 * this is where we are basically btrfs_lookup, without the
@@ -2543,7 +2553,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
2543 * try to precache a NULL acl entry for files that don't have 2553 * try to precache a NULL acl entry for files that don't have
2544 * any xattrs or acls 2554 * any xattrs or acls
2545 */ 2555 */
2546 maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); 2556 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
2557 btrfs_ino(inode));
2547 if (!maybe_acls) 2558 if (!maybe_acls)
2548 cache_no_acl(inode); 2559 cache_no_acl(inode);
2549 2560
@@ -2647,11 +2658,26 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2647 struct extent_buffer *leaf; 2658 struct extent_buffer *leaf;
2648 int ret; 2659 int ret;
2649 2660
2661 /*
2662 * If root is tree root, it means this inode is used to
2663 * store free space information. And these inodes are updated
2664 * when committing the transaction, so they needn't delaye to
2665 * be updated, or deadlock will occured.
2666 */
2667 if (!is_free_space_inode(root, inode)) {
2668 ret = btrfs_delayed_update_inode(trans, root, inode);
2669 if (!ret)
2670 btrfs_set_inode_last_trans(trans, inode);
2671 return ret;
2672 }
2673
2650 path = btrfs_alloc_path(); 2674 path = btrfs_alloc_path();
2651 BUG_ON(!path); 2675 if (!path)
2676 return -ENOMEM;
2677
2652 path->leave_spinning = 1; 2678 path->leave_spinning = 1;
2653 ret = btrfs_lookup_inode(trans, root, path, 2679 ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
2654 &BTRFS_I(inode)->location, 1); 2680 1);
2655 if (ret) { 2681 if (ret) {
2656 if (ret > 0) 2682 if (ret > 0)
2657 ret = -ENOENT; 2683 ret = -ENOENT;
@@ -2661,7 +2687,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2661 btrfs_unlock_up_safe(path, 1); 2687 btrfs_unlock_up_safe(path, 1);
2662 leaf = path->nodes[0]; 2688 leaf = path->nodes[0];
2663 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2689 inode_item = btrfs_item_ptr(leaf, path->slots[0],
2664 struct btrfs_inode_item); 2690 struct btrfs_inode_item);
2665 2691
2666 fill_inode_item(trans, leaf, inode_item, inode); 2692 fill_inode_item(trans, leaf, inode_item, inode);
2667 btrfs_mark_buffer_dirty(leaf); 2693 btrfs_mark_buffer_dirty(leaf);
@@ -2672,7 +2698,6 @@ failed:
2672 return ret; 2698 return ret;
2673} 2699}
2674 2700
2675
2676/* 2701/*
2677 * unlink helper that gets used here in inode.c and in the tree logging 2702 * unlink helper that gets used here in inode.c and in the tree logging
2678 * recovery code. It remove a link in a directory with a given name, and 2703 * recovery code. It remove a link in a directory with a given name, and
@@ -2689,6 +2714,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2689 struct btrfs_dir_item *di; 2714 struct btrfs_dir_item *di;
2690 struct btrfs_key key; 2715 struct btrfs_key key;
2691 u64 index; 2716 u64 index;
2717 u64 ino = btrfs_ino(inode);
2718 u64 dir_ino = btrfs_ino(dir);
2692 2719
2693 path = btrfs_alloc_path(); 2720 path = btrfs_alloc_path();
2694 if (!path) { 2721 if (!path) {
@@ -2697,7 +2724,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2697 } 2724 }
2698 2725
2699 path->leave_spinning = 1; 2726 path->leave_spinning = 1;
2700 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 2727 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
2701 name, name_len, -1); 2728 name, name_len, -1);
2702 if (IS_ERR(di)) { 2729 if (IS_ERR(di)) {
2703 ret = PTR_ERR(di); 2730 ret = PTR_ERR(di);
@@ -2712,33 +2739,23 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2712 ret = btrfs_delete_one_dir_name(trans, root, path, di); 2739 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2713 if (ret) 2740 if (ret)
2714 goto err; 2741 goto err;
2715 btrfs_release_path(root, path); 2742 btrfs_release_path(path);
2716 2743
2717 ret = btrfs_del_inode_ref(trans, root, name, name_len, 2744 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
2718 inode->i_ino, 2745 dir_ino, &index);
2719 dir->i_ino, &index);
2720 if (ret) { 2746 if (ret) {
2721 printk(KERN_INFO "btrfs failed to delete reference to %.*s, " 2747 printk(KERN_INFO "btrfs failed to delete reference to %.*s, "
2722 "inode %lu parent %lu\n", name_len, name, 2748 "inode %llu parent %llu\n", name_len, name,
2723 inode->i_ino, dir->i_ino); 2749 (unsigned long long)ino, (unsigned long long)dir_ino);
2724 goto err; 2750 goto err;
2725 } 2751 }
2726 2752
2727 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, 2753 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
2728 index, name, name_len, -1); 2754 if (ret)
2729 if (IS_ERR(di)) {
2730 ret = PTR_ERR(di);
2731 goto err;
2732 }
2733 if (!di) {
2734 ret = -ENOENT;
2735 goto err; 2755 goto err;
2736 }
2737 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2738 btrfs_release_path(root, path);
2739 2756
2740 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, 2757 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
2741 inode, dir->i_ino); 2758 inode, dir_ino);
2742 BUG_ON(ret != 0 && ret != -ENOENT); 2759 BUG_ON(ret != 0 && ret != -ENOENT);
2743 2760
2744 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2761 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
@@ -2816,12 +2833,14 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2816 int check_link = 1; 2833 int check_link = 1;
2817 int err = -ENOSPC; 2834 int err = -ENOSPC;
2818 int ret; 2835 int ret;
2836 u64 ino = btrfs_ino(inode);
2837 u64 dir_ino = btrfs_ino(dir);
2819 2838
2820 trans = btrfs_start_transaction(root, 10); 2839 trans = btrfs_start_transaction(root, 10);
2821 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 2840 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
2822 return trans; 2841 return trans;
2823 2842
2824 if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) 2843 if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
2825 return ERR_PTR(-ENOSPC); 2844 return ERR_PTR(-ENOSPC);
2826 2845
2827 /* check if there is someone else holds reference */ 2846 /* check if there is someone else holds reference */
@@ -2862,7 +2881,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2862 } else { 2881 } else {
2863 check_link = 0; 2882 check_link = 0;
2864 } 2883 }
2865 btrfs_release_path(root, path); 2884 btrfs_release_path(path);
2866 2885
2867 ret = btrfs_lookup_inode(trans, root, path, 2886 ret = btrfs_lookup_inode(trans, root, path,
2868 &BTRFS_I(inode)->location, 0); 2887 &BTRFS_I(inode)->location, 0);
@@ -2876,11 +2895,11 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2876 } else { 2895 } else {
2877 check_link = 0; 2896 check_link = 0;
2878 } 2897 }
2879 btrfs_release_path(root, path); 2898 btrfs_release_path(path);
2880 2899
2881 if (ret == 0 && S_ISREG(inode->i_mode)) { 2900 if (ret == 0 && S_ISREG(inode->i_mode)) {
2882 ret = btrfs_lookup_file_extent(trans, root, path, 2901 ret = btrfs_lookup_file_extent(trans, root, path,
2883 inode->i_ino, (u64)-1, 0); 2902 ino, (u64)-1, 0);
2884 if (ret < 0) { 2903 if (ret < 0) {
2885 err = ret; 2904 err = ret;
2886 goto out; 2905 goto out;
@@ -2888,7 +2907,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2888 BUG_ON(ret == 0); 2907 BUG_ON(ret == 0);
2889 if (check_path_shared(root, path)) 2908 if (check_path_shared(root, path))
2890 goto out; 2909 goto out;
2891 btrfs_release_path(root, path); 2910 btrfs_release_path(path);
2892 } 2911 }
2893 2912
2894 if (!check_link) { 2913 if (!check_link) {
@@ -2896,7 +2915,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2896 goto out; 2915 goto out;
2897 } 2916 }
2898 2917
2899 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 2918 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
2900 dentry->d_name.name, dentry->d_name.len, 0); 2919 dentry->d_name.name, dentry->d_name.len, 0);
2901 if (IS_ERR(di)) { 2920 if (IS_ERR(di)) {
2902 err = PTR_ERR(di); 2921 err = PTR_ERR(di);
@@ -2909,11 +2928,11 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2909 err = 0; 2928 err = 0;
2910 goto out; 2929 goto out;
2911 } 2930 }
2912 btrfs_release_path(root, path); 2931 btrfs_release_path(path);
2913 2932
2914 ref = btrfs_lookup_inode_ref(trans, root, path, 2933 ref = btrfs_lookup_inode_ref(trans, root, path,
2915 dentry->d_name.name, dentry->d_name.len, 2934 dentry->d_name.name, dentry->d_name.len,
2916 inode->i_ino, dir->i_ino, 0); 2935 ino, dir_ino, 0);
2917 if (IS_ERR(ref)) { 2936 if (IS_ERR(ref)) {
2918 err = PTR_ERR(ref); 2937 err = PTR_ERR(ref);
2919 goto out; 2938 goto out;
@@ -2922,9 +2941,17 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2922 if (check_path_shared(root, path)) 2941 if (check_path_shared(root, path))
2923 goto out; 2942 goto out;
2924 index = btrfs_inode_ref_index(path->nodes[0], ref); 2943 index = btrfs_inode_ref_index(path->nodes[0], ref);
2925 btrfs_release_path(root, path); 2944 btrfs_release_path(path);
2926 2945
2927 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, 2946 /*
2947 * This is a commit root search, if we can lookup inode item and other
2948 * relative items in the commit root, it means the transaction of
2949 * dir/file creation has been committed, and the dir index item that we
2950 * delay to insert has also been inserted into the commit root. So
2951 * we needn't worry about the delayed insertion of the dir index item
2952 * here.
2953 */
2954 di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index,
2928 dentry->d_name.name, dentry->d_name.len, 0); 2955 dentry->d_name.name, dentry->d_name.len, 0);
2929 if (IS_ERR(di)) { 2956 if (IS_ERR(di)) {
2930 err = PTR_ERR(di); 2957 err = PTR_ERR(di);
@@ -2999,54 +3026,47 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2999 struct btrfs_key key; 3026 struct btrfs_key key;
3000 u64 index; 3027 u64 index;
3001 int ret; 3028 int ret;
3029 u64 dir_ino = btrfs_ino(dir);
3002 3030
3003 path = btrfs_alloc_path(); 3031 path = btrfs_alloc_path();
3004 if (!path) 3032 if (!path)
3005 return -ENOMEM; 3033 return -ENOMEM;
3006 3034
3007 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 3035 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
3008 name, name_len, -1); 3036 name, name_len, -1);
3009 BUG_ON(!di || IS_ERR(di)); 3037 BUG_ON(IS_ERR_OR_NULL(di));
3010 3038
3011 leaf = path->nodes[0]; 3039 leaf = path->nodes[0];
3012 btrfs_dir_item_key_to_cpu(leaf, di, &key); 3040 btrfs_dir_item_key_to_cpu(leaf, di, &key);
3013 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); 3041 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
3014 ret = btrfs_delete_one_dir_name(trans, root, path, di); 3042 ret = btrfs_delete_one_dir_name(trans, root, path, di);
3015 BUG_ON(ret); 3043 BUG_ON(ret);
3016 btrfs_release_path(root, path); 3044 btrfs_release_path(path);
3017 3045
3018 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, 3046 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
3019 objectid, root->root_key.objectid, 3047 objectid, root->root_key.objectid,
3020 dir->i_ino, &index, name, name_len); 3048 dir_ino, &index, name, name_len);
3021 if (ret < 0) { 3049 if (ret < 0) {
3022 BUG_ON(ret != -ENOENT); 3050 BUG_ON(ret != -ENOENT);
3023 di = btrfs_search_dir_index_item(root, path, dir->i_ino, 3051 di = btrfs_search_dir_index_item(root, path, dir_ino,
3024 name, name_len); 3052 name, name_len);
3025 BUG_ON(!di || IS_ERR(di)); 3053 BUG_ON(IS_ERR_OR_NULL(di));
3026 3054
3027 leaf = path->nodes[0]; 3055 leaf = path->nodes[0];
3028 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3056 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3029 btrfs_release_path(root, path); 3057 btrfs_release_path(path);
3030 index = key.offset; 3058 index = key.offset;
3031 } 3059 }
3060 btrfs_release_path(path);
3032 3061
3033 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, 3062 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
3034 index, name, name_len, -1);
3035 BUG_ON(!di || IS_ERR(di));
3036
3037 leaf = path->nodes[0];
3038 btrfs_dir_item_key_to_cpu(leaf, di, &key);
3039 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
3040 ret = btrfs_delete_one_dir_name(trans, root, path, di);
3041 BUG_ON(ret); 3063 BUG_ON(ret);
3042 btrfs_release_path(root, path);
3043 3064
3044 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 3065 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
3045 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 3066 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
3046 ret = btrfs_update_inode(trans, root, dir); 3067 ret = btrfs_update_inode(trans, root, dir);
3047 BUG_ON(ret); 3068 BUG_ON(ret);
3048 3069
3049 btrfs_free_path(path);
3050 return 0; 3070 return 0;
3051} 3071}
3052 3072
@@ -3059,7 +3079,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
3059 unsigned long nr = 0; 3079 unsigned long nr = 0;
3060 3080
3061 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || 3081 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
3062 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 3082 btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
3063 return -ENOTEMPTY; 3083 return -ENOTEMPTY;
3064 3084
3065 trans = __unlink_start_trans(dir, dentry); 3085 trans = __unlink_start_trans(dir, dentry);
@@ -3068,7 +3088,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
3068 3088
3069 btrfs_set_trans_block_group(trans, dir); 3089 btrfs_set_trans_block_group(trans, dir);
3070 3090
3071 if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 3091 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
3072 err = btrfs_unlink_subvol(trans, root, dir, 3092 err = btrfs_unlink_subvol(trans, root, dir,
3073 BTRFS_I(inode)->location.objectid, 3093 BTRFS_I(inode)->location.objectid,
3074 dentry->d_name.name, 3094 dentry->d_name.name,
@@ -3093,178 +3113,6 @@ out:
3093 return err; 3113 return err;
3094} 3114}
3095 3115
3096#if 0
3097/*
3098 * when truncating bytes in a file, it is possible to avoid reading
3099 * the leaves that contain only checksum items. This can be the
3100 * majority of the IO required to delete a large file, but it must
3101 * be done carefully.
3102 *
3103 * The keys in the level just above the leaves are checked to make sure
3104 * the lowest key in a given leaf is a csum key, and starts at an offset
3105 * after the new size.
3106 *
3107 * Then the key for the next leaf is checked to make sure it also has
3108 * a checksum item for the same file. If it does, we know our target leaf
3109 * contains only checksum items, and it can be safely freed without reading
3110 * it.
3111 *
3112 * This is just an optimization targeted at large files. It may do
3113 * nothing. It will return 0 unless things went badly.
3114 */
3115static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans,
3116 struct btrfs_root *root,
3117 struct btrfs_path *path,
3118 struct inode *inode, u64 new_size)
3119{
3120 struct btrfs_key key;
3121 int ret;
3122 int nritems;
3123 struct btrfs_key found_key;
3124 struct btrfs_key other_key;
3125 struct btrfs_leaf_ref *ref;
3126 u64 leaf_gen;
3127 u64 leaf_start;
3128
3129 path->lowest_level = 1;
3130 key.objectid = inode->i_ino;
3131 key.type = BTRFS_CSUM_ITEM_KEY;
3132 key.offset = new_size;
3133again:
3134 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3135 if (ret < 0)
3136 goto out;
3137
3138 if (path->nodes[1] == NULL) {
3139 ret = 0;
3140 goto out;
3141 }
3142 ret = 0;
3143 btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]);
3144 nritems = btrfs_header_nritems(path->nodes[1]);
3145
3146 if (!nritems)
3147 goto out;
3148
3149 if (path->slots[1] >= nritems)
3150 goto next_node;
3151
3152 /* did we find a key greater than anything we want to delete? */
3153 if (found_key.objectid > inode->i_ino ||
3154 (found_key.objectid == inode->i_ino && found_key.type > key.type))
3155 goto out;
3156
3157 /* we check the next key in the node to make sure the leave contains
3158 * only checksum items. This comparison doesn't work if our
3159 * leaf is the last one in the node
3160 */
3161 if (path->slots[1] + 1 >= nritems) {
3162next_node:
3163 /* search forward from the last key in the node, this
3164 * will bring us into the next node in the tree
3165 */
3166 btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1);
3167
3168 /* unlikely, but we inc below, so check to be safe */
3169 if (found_key.offset == (u64)-1)
3170 goto out;
3171
3172 /* search_forward needs a path with locks held, do the
3173 * search again for the original key. It is possible
3174 * this will race with a balance and return a path that
3175 * we could modify, but this drop is just an optimization
3176 * and is allowed to miss some leaves.
3177 */
3178 btrfs_release_path(root, path);
3179 found_key.offset++;
3180
3181 /* setup a max key for search_forward */
3182 other_key.offset = (u64)-1;
3183 other_key.type = key.type;
3184 other_key.objectid = key.objectid;
3185
3186 path->keep_locks = 1;
3187 ret = btrfs_search_forward(root, &found_key, &other_key,
3188 path, 0, 0);
3189 path->keep_locks = 0;
3190 if (ret || found_key.objectid != key.objectid ||
3191 found_key.type != key.type) {
3192 ret = 0;
3193 goto out;
3194 }
3195
3196 key.offset = found_key.offset;
3197 btrfs_release_path(root, path);
3198 cond_resched();
3199 goto again;
3200 }
3201
3202 /* we know there's one more slot after us in the tree,
3203 * read that key so we can verify it is also a checksum item
3204 */
3205 btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1);
3206
3207 if (found_key.objectid < inode->i_ino)
3208 goto next_key;
3209
3210 if (found_key.type != key.type || found_key.offset < new_size)
3211 goto next_key;
3212
3213 /*
3214 * if the key for the next leaf isn't a csum key from this objectid,
3215 * we can't be sure there aren't good items inside this leaf.
3216 * Bail out
3217 */
3218 if (other_key.objectid != inode->i_ino || other_key.type != key.type)
3219 goto out;
3220
3221 leaf_start = btrfs_node_blockptr(path->nodes[1], path->slots[1]);
3222 leaf_gen = btrfs_node_ptr_generation(path->nodes[1], path->slots[1]);
3223 /*
3224 * it is safe to delete this leaf, it contains only
3225 * csum items from this inode at an offset >= new_size
3226 */
3227 ret = btrfs_del_leaf(trans, root, path, leaf_start);
3228 BUG_ON(ret);
3229
3230 if (root->ref_cows && leaf_gen < trans->transid) {
3231 ref = btrfs_alloc_leaf_ref(root, 0);
3232 if (ref) {
3233 ref->root_gen = root->root_key.offset;
3234 ref->bytenr = leaf_start;
3235 ref->owner = 0;
3236 ref->generation = leaf_gen;
3237 ref->nritems = 0;
3238
3239 btrfs_sort_leaf_ref(ref);
3240
3241 ret = btrfs_add_leaf_ref(root, ref, 0);
3242 WARN_ON(ret);
3243 btrfs_free_leaf_ref(root, ref);
3244 } else {
3245 WARN_ON(1);
3246 }
3247 }
3248next_key:
3249 btrfs_release_path(root, path);
3250
3251 if (other_key.objectid == inode->i_ino &&
3252 other_key.type == key.type && other_key.offset > key.offset) {
3253 key.offset = other_key.offset;
3254 cond_resched();
3255 goto again;
3256 }
3257 ret = 0;
3258out:
3259 /* fixup any changes we've made to the path */
3260 path->lowest_level = 0;
3261 path->keep_locks = 0;
3262 btrfs_release_path(root, path);
3263 return ret;
3264}
3265
3266#endif
3267
3268/* 3116/*
3269 * this can truncate away extent items, csum items and directory items. 3117 * this can truncate away extent items, csum items and directory items.
3270 * It starts at a high offset and removes keys until it can't find 3118 * It starts at a high offset and removes keys until it can't find
@@ -3300,17 +3148,27 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3300 int encoding; 3148 int encoding;
3301 int ret; 3149 int ret;
3302 int err = 0; 3150 int err = 0;
3151 u64 ino = btrfs_ino(inode);
3303 3152
3304 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 3153 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
3305 3154
3306 if (root->ref_cows || root == root->fs_info->tree_root) 3155 if (root->ref_cows || root == root->fs_info->tree_root)
3307 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3156 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
3308 3157
3158 /*
3159 * This function is also used to drop the items in the log tree before
3160 * we relog the inode, so if root != BTRFS_I(inode)->root, it means
3161 * it is used to drop the loged items. So we shouldn't kill the delayed
3162 * items.
3163 */
3164 if (min_type == 0 && root == BTRFS_I(inode)->root)
3165 btrfs_kill_delayed_inode_items(inode);
3166
3309 path = btrfs_alloc_path(); 3167 path = btrfs_alloc_path();
3310 BUG_ON(!path); 3168 BUG_ON(!path);
3311 path->reada = -1; 3169 path->reada = -1;
3312 3170
3313 key.objectid = inode->i_ino; 3171 key.objectid = ino;
3314 key.offset = (u64)-1; 3172 key.offset = (u64)-1;
3315 key.type = (u8)-1; 3173 key.type = (u8)-1;
3316 3174
@@ -3338,7 +3196,7 @@ search_again:
3338 found_type = btrfs_key_type(&found_key); 3196 found_type = btrfs_key_type(&found_key);
3339 encoding = 0; 3197 encoding = 0;
3340 3198
3341 if (found_key.objectid != inode->i_ino) 3199 if (found_key.objectid != ino)
3342 break; 3200 break;
3343 3201
3344 if (found_type < min_type) 3202 if (found_type < min_type)
@@ -3456,7 +3314,7 @@ delete:
3456 ret = btrfs_free_extent(trans, root, extent_start, 3314 ret = btrfs_free_extent(trans, root, extent_start,
3457 extent_num_bytes, 0, 3315 extent_num_bytes, 0,
3458 btrfs_header_owner(leaf), 3316 btrfs_header_owner(leaf),
3459 inode->i_ino, extent_offset); 3317 ino, extent_offset);
3460 BUG_ON(ret); 3318 BUG_ON(ret);
3461 } 3319 }
3462 3320
@@ -3465,7 +3323,9 @@ delete:
3465 3323
3466 if (path->slots[0] == 0 || 3324 if (path->slots[0] == 0 ||
3467 path->slots[0] != pending_del_slot) { 3325 path->slots[0] != pending_del_slot) {
3468 if (root->ref_cows) { 3326 if (root->ref_cows &&
3327 BTRFS_I(inode)->location.objectid !=
3328 BTRFS_FREE_INO_OBJECTID) {
3469 err = -EAGAIN; 3329 err = -EAGAIN;
3470 goto out; 3330 goto out;
3471 } 3331 }
@@ -3476,7 +3336,7 @@ delete:
3476 BUG_ON(ret); 3336 BUG_ON(ret);
3477 pending_del_nr = 0; 3337 pending_del_nr = 0;
3478 } 3338 }
3479 btrfs_release_path(root, path); 3339 btrfs_release_path(path);
3480 goto search_again; 3340 goto search_again;
3481 } else { 3341 } else {
3482 path->slots[0]--; 3342 path->slots[0]--;
@@ -3634,7 +3494,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3634 while (1) { 3494 while (1) {
3635 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3495 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
3636 block_end - cur_offset, 0); 3496 block_end - cur_offset, 0);
3637 BUG_ON(IS_ERR(em) || !em); 3497 BUG_ON(IS_ERR_OR_NULL(em));
3638 last_byte = min(extent_map_end(em), block_end); 3498 last_byte = min(extent_map_end(em), block_end);
3639 last_byte = (last_byte + mask) & ~mask; 3499 last_byte = (last_byte + mask) & ~mask;
3640 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 3500 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
@@ -3655,7 +3515,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3655 break; 3515 break;
3656 3516
3657 err = btrfs_insert_file_extent(trans, root, 3517 err = btrfs_insert_file_extent(trans, root,
3658 inode->i_ino, cur_offset, 0, 3518 btrfs_ino(inode), cur_offset, 0,
3659 0, hole_size, 0, hole_size, 3519 0, hole_size, 0, hole_size,
3660 0, 0, 0); 3520 0, 0, 0);
3661 if (err) 3521 if (err)
@@ -3757,7 +3617,7 @@ void btrfs_evict_inode(struct inode *inode)
3757 3617
3758 truncate_inode_pages(&inode->i_data, 0); 3618 truncate_inode_pages(&inode->i_data, 0);
3759 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || 3619 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
3760 root == root->fs_info->tree_root)) 3620 is_free_space_inode(root, inode)))
3761 goto no_delete; 3621 goto no_delete;
3762 3622
3763 if (is_bad_inode(inode)) { 3623 if (is_bad_inode(inode)) {
@@ -3810,6 +3670,10 @@ void btrfs_evict_inode(struct inode *inode)
3810 BUG_ON(ret); 3670 BUG_ON(ret);
3811 } 3671 }
3812 3672
3673 if (!(root == root->fs_info->tree_root ||
3674 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
3675 btrfs_return_ino(root, btrfs_ino(inode));
3676
3813 nr = trans->blocks_used; 3677 nr = trans->blocks_used;
3814 btrfs_end_transaction(trans, root); 3678 btrfs_end_transaction(trans, root);
3815 btrfs_btree_balance_dirty(root, nr); 3679 btrfs_btree_balance_dirty(root, nr);
@@ -3835,12 +3699,12 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
3835 path = btrfs_alloc_path(); 3699 path = btrfs_alloc_path();
3836 BUG_ON(!path); 3700 BUG_ON(!path);
3837 3701
3838 di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, 3702 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
3839 namelen, 0); 3703 namelen, 0);
3840 if (IS_ERR(di)) 3704 if (IS_ERR(di))
3841 ret = PTR_ERR(di); 3705 ret = PTR_ERR(di);
3842 3706
3843 if (!di || IS_ERR(di)) 3707 if (IS_ERR_OR_NULL(di))
3844 goto out_err; 3708 goto out_err;
3845 3709
3846 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); 3710 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
@@ -3888,7 +3752,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
3888 3752
3889 leaf = path->nodes[0]; 3753 leaf = path->nodes[0];
3890 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); 3754 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
3891 if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || 3755 if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) ||
3892 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) 3756 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
3893 goto out; 3757 goto out;
3894 3758
@@ -3898,7 +3762,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
3898 if (ret) 3762 if (ret)
3899 goto out; 3763 goto out;
3900 3764
3901 btrfs_release_path(root->fs_info->tree_root, path); 3765 btrfs_release_path(path);
3902 3766
3903 new_root = btrfs_read_fs_root_no_name(root->fs_info, location); 3767 new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
3904 if (IS_ERR(new_root)) { 3768 if (IS_ERR(new_root)) {
@@ -3927,6 +3791,7 @@ static void inode_tree_add(struct inode *inode)
3927 struct btrfs_inode *entry; 3791 struct btrfs_inode *entry;
3928 struct rb_node **p; 3792 struct rb_node **p;
3929 struct rb_node *parent; 3793 struct rb_node *parent;
3794 u64 ino = btrfs_ino(inode);
3930again: 3795again:
3931 p = &root->inode_tree.rb_node; 3796 p = &root->inode_tree.rb_node;
3932 parent = NULL; 3797 parent = NULL;
@@ -3939,9 +3804,9 @@ again:
3939 parent = *p; 3804 parent = *p;
3940 entry = rb_entry(parent, struct btrfs_inode, rb_node); 3805 entry = rb_entry(parent, struct btrfs_inode, rb_node);
3941 3806
3942 if (inode->i_ino < entry->vfs_inode.i_ino) 3807 if (ino < btrfs_ino(&entry->vfs_inode))
3943 p = &parent->rb_left; 3808 p = &parent->rb_left;
3944 else if (inode->i_ino > entry->vfs_inode.i_ino) 3809 else if (ino > btrfs_ino(&entry->vfs_inode))
3945 p = &parent->rb_right; 3810 p = &parent->rb_right;
3946 else { 3811 else {
3947 WARN_ON(!(entry->vfs_inode.i_state & 3812 WARN_ON(!(entry->vfs_inode.i_state &
@@ -4005,9 +3870,9 @@ again:
4005 prev = node; 3870 prev = node;
4006 entry = rb_entry(node, struct btrfs_inode, rb_node); 3871 entry = rb_entry(node, struct btrfs_inode, rb_node);
4007 3872
4008 if (objectid < entry->vfs_inode.i_ino) 3873 if (objectid < btrfs_ino(&entry->vfs_inode))
4009 node = node->rb_left; 3874 node = node->rb_left;
4010 else if (objectid > entry->vfs_inode.i_ino) 3875 else if (objectid > btrfs_ino(&entry->vfs_inode))
4011 node = node->rb_right; 3876 node = node->rb_right;
4012 else 3877 else
4013 break; 3878 break;
@@ -4015,7 +3880,7 @@ again:
4015 if (!node) { 3880 if (!node) {
4016 while (prev) { 3881 while (prev) {
4017 entry = rb_entry(prev, struct btrfs_inode, rb_node); 3882 entry = rb_entry(prev, struct btrfs_inode, rb_node);
4018 if (objectid <= entry->vfs_inode.i_ino) { 3883 if (objectid <= btrfs_ino(&entry->vfs_inode)) {
4019 node = prev; 3884 node = prev;
4020 break; 3885 break;
4021 } 3886 }
@@ -4024,7 +3889,7 @@ again:
4024 } 3889 }
4025 while (node) { 3890 while (node) {
4026 entry = rb_entry(node, struct btrfs_inode, rb_node); 3891 entry = rb_entry(node, struct btrfs_inode, rb_node);
4027 objectid = entry->vfs_inode.i_ino + 1; 3892 objectid = btrfs_ino(&entry->vfs_inode) + 1;
4028 inode = igrab(&entry->vfs_inode); 3893 inode = igrab(&entry->vfs_inode);
4029 if (inode) { 3894 if (inode) {
4030 spin_unlock(&root->inode_lock); 3895 spin_unlock(&root->inode_lock);
@@ -4062,7 +3927,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
4062static int btrfs_find_actor(struct inode *inode, void *opaque) 3927static int btrfs_find_actor(struct inode *inode, void *opaque)
4063{ 3928{
4064 struct btrfs_iget_args *args = opaque; 3929 struct btrfs_iget_args *args = opaque;
4065 return args->ino == inode->i_ino && 3930 return args->ino == btrfs_ino(inode) &&
4066 args->root == BTRFS_I(inode)->root; 3931 args->root == BTRFS_I(inode)->root;
4067} 3932}
4068 3933
@@ -4207,7 +4072,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
4207 return d_splice_alias(inode, dentry); 4072 return d_splice_alias(inode, dentry);
4208} 4073}
4209 4074
4210static unsigned char btrfs_filetype_table[] = { 4075unsigned char btrfs_filetype_table[] = {
4211 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 4076 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
4212}; 4077};
4213 4078
@@ -4221,6 +4086,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4221 struct btrfs_key key; 4086 struct btrfs_key key;
4222 struct btrfs_key found_key; 4087 struct btrfs_key found_key;
4223 struct btrfs_path *path; 4088 struct btrfs_path *path;
4089 struct list_head ins_list;
4090 struct list_head del_list;
4224 int ret; 4091 int ret;
4225 struct extent_buffer *leaf; 4092 struct extent_buffer *leaf;
4226 int slot; 4093 int slot;
@@ -4233,6 +4100,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4233 char tmp_name[32]; 4100 char tmp_name[32];
4234 char *name_ptr; 4101 char *name_ptr;
4235 int name_len; 4102 int name_len;
4103 int is_curr = 0; /* filp->f_pos points to the current index? */
4236 4104
4237 /* FIXME, use a real flag for deciding about the key type */ 4105 /* FIXME, use a real flag for deciding about the key type */
4238 if (root->fs_info->tree_root == root) 4106 if (root->fs_info->tree_root == root)
@@ -4240,9 +4108,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4240 4108
4241 /* special case for "." */ 4109 /* special case for "." */
4242 if (filp->f_pos == 0) { 4110 if (filp->f_pos == 0) {
4243 over = filldir(dirent, ".", 1, 4111 over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR);
4244 1, inode->i_ino,
4245 DT_DIR);
4246 if (over) 4112 if (over)
4247 return 0; 4113 return 0;
4248 filp->f_pos = 1; 4114 filp->f_pos = 1;
@@ -4257,11 +4123,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4257 filp->f_pos = 2; 4123 filp->f_pos = 2;
4258 } 4124 }
4259 path = btrfs_alloc_path(); 4125 path = btrfs_alloc_path();
4126 if (!path)
4127 return -ENOMEM;
4260 path->reada = 2; 4128 path->reada = 2;
4261 4129
4130 if (key_type == BTRFS_DIR_INDEX_KEY) {
4131 INIT_LIST_HEAD(&ins_list);
4132 INIT_LIST_HEAD(&del_list);
4133 btrfs_get_delayed_items(inode, &ins_list, &del_list);
4134 }
4135
4262 btrfs_set_key_type(&key, key_type); 4136 btrfs_set_key_type(&key, key_type);
4263 key.offset = filp->f_pos; 4137 key.offset = filp->f_pos;
4264 key.objectid = inode->i_ino; 4138 key.objectid = btrfs_ino(inode);
4265 4139
4266 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4140 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4267 if (ret < 0) 4141 if (ret < 0)
@@ -4288,8 +4162,13 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4288 break; 4162 break;
4289 if (found_key.offset < filp->f_pos) 4163 if (found_key.offset < filp->f_pos)
4290 goto next; 4164 goto next;
4165 if (key_type == BTRFS_DIR_INDEX_KEY &&
4166 btrfs_should_delete_dir_index(&del_list,
4167 found_key.offset))
4168 goto next;
4291 4169
4292 filp->f_pos = found_key.offset; 4170 filp->f_pos = found_key.offset;
4171 is_curr = 1;
4293 4172
4294 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); 4173 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
4295 di_cur = 0; 4174 di_cur = 0;
@@ -4344,6 +4223,15 @@ next:
4344 path->slots[0]++; 4223 path->slots[0]++;
4345 } 4224 }
4346 4225
4226 if (key_type == BTRFS_DIR_INDEX_KEY) {
4227 if (is_curr)
4228 filp->f_pos++;
4229 ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir,
4230 &ins_list);
4231 if (ret)
4232 goto nopos;
4233 }
4234
4347 /* Reached end of directory/root. Bump pos past the last item. */ 4235 /* Reached end of directory/root. Bump pos past the last item. */
4348 if (key_type == BTRFS_DIR_INDEX_KEY) 4236 if (key_type == BTRFS_DIR_INDEX_KEY)
4349 /* 4237 /*
@@ -4356,6 +4244,8 @@ next:
4356nopos: 4244nopos:
4357 ret = 0; 4245 ret = 0;
4358err: 4246err:
4247 if (key_type == BTRFS_DIR_INDEX_KEY)
4248 btrfs_put_delayed_items(&ins_list, &del_list);
4359 btrfs_free_path(path); 4249 btrfs_free_path(path);
4360 return ret; 4250 return ret;
4361} 4251}
@@ -4371,7 +4261,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4371 return 0; 4261 return 0;
4372 4262
4373 smp_mb(); 4263 smp_mb();
4374 nolock = (root->fs_info->closing && root == root->fs_info->tree_root); 4264 if (root->fs_info->closing && is_free_space_inode(root, inode))
4265 nolock = true;
4375 4266
4376 if (wbc->sync_mode == WB_SYNC_ALL) { 4267 if (wbc->sync_mode == WB_SYNC_ALL) {
4377 if (nolock) 4268 if (nolock)
@@ -4414,25 +4305,25 @@ void btrfs_dirty_inode(struct inode *inode)
4414 btrfs_end_transaction(trans, root); 4305 btrfs_end_transaction(trans, root);
4415 trans = btrfs_start_transaction(root, 1); 4306 trans = btrfs_start_transaction(root, 1);
4416 if (IS_ERR(trans)) { 4307 if (IS_ERR(trans)) {
4417 if (printk_ratelimit()) { 4308 printk_ratelimited(KERN_ERR "btrfs: fail to "
4418 printk(KERN_ERR "btrfs: fail to " 4309 "dirty inode %llu error %ld\n",
4419 "dirty inode %lu error %ld\n", 4310 (unsigned long long)btrfs_ino(inode),
4420 inode->i_ino, PTR_ERR(trans)); 4311 PTR_ERR(trans));
4421 }
4422 return; 4312 return;
4423 } 4313 }
4424 btrfs_set_trans_block_group(trans, inode); 4314 btrfs_set_trans_block_group(trans, inode);
4425 4315
4426 ret = btrfs_update_inode(trans, root, inode); 4316 ret = btrfs_update_inode(trans, root, inode);
4427 if (ret) { 4317 if (ret) {
4428 if (printk_ratelimit()) { 4318 printk_ratelimited(KERN_ERR "btrfs: fail to "
4429 printk(KERN_ERR "btrfs: fail to " 4319 "dirty inode %llu error %d\n",
4430 "dirty inode %lu error %d\n", 4320 (unsigned long long)btrfs_ino(inode),
4431 inode->i_ino, ret); 4321 ret);
4432 }
4433 } 4322 }
4434 } 4323 }
4435 btrfs_end_transaction(trans, root); 4324 btrfs_end_transaction(trans, root);
4325 if (BTRFS_I(inode)->delayed_node)
4326 btrfs_balance_delayed_items(root);
4436} 4327}
4437 4328
4438/* 4329/*
@@ -4448,7 +4339,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
4448 struct extent_buffer *leaf; 4339 struct extent_buffer *leaf;
4449 int ret; 4340 int ret;
4450 4341
4451 key.objectid = inode->i_ino; 4342 key.objectid = btrfs_ino(inode);
4452 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); 4343 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
4453 key.offset = (u64)-1; 4344 key.offset = (u64)-1;
4454 4345
@@ -4480,7 +4371,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
4480 leaf = path->nodes[0]; 4371 leaf = path->nodes[0];
4481 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4372 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4482 4373
4483 if (found_key.objectid != inode->i_ino || 4374 if (found_key.objectid != btrfs_ino(inode) ||
4484 btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) { 4375 btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) {
4485 BTRFS_I(inode)->index_cnt = 2; 4376 BTRFS_I(inode)->index_cnt = 2;
4486 goto out; 4377 goto out;
@@ -4501,9 +4392,12 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
4501 int ret = 0; 4392 int ret = 0;
4502 4393
4503 if (BTRFS_I(dir)->index_cnt == (u64)-1) { 4394 if (BTRFS_I(dir)->index_cnt == (u64)-1) {
4504 ret = btrfs_set_inode_index_count(dir); 4395 ret = btrfs_inode_delayed_dir_index_count(dir);
4505 if (ret) 4396 if (ret) {
4506 return ret; 4397 ret = btrfs_set_inode_index_count(dir);
4398 if (ret)
4399 return ret;
4400 }
4507 } 4401 }
4508 4402
4509 *index = BTRFS_I(dir)->index_cnt; 4403 *index = BTRFS_I(dir)->index_cnt;
@@ -4539,6 +4433,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4539 return ERR_PTR(-ENOMEM); 4433 return ERR_PTR(-ENOMEM);
4540 } 4434 }
4541 4435
4436 /*
4437 * we have to initialize this early, so we can reclaim the inode
4438 * number if we fail afterwards in this function.
4439 */
4440 inode->i_ino = objectid;
4441
4542 if (dir) { 4442 if (dir) {
4543 trace_btrfs_inode_request(dir); 4443 trace_btrfs_inode_request(dir);
4544 4444
@@ -4584,7 +4484,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4584 goto fail; 4484 goto fail;
4585 4485
4586 inode_init_owner(inode, dir, mode); 4486 inode_init_owner(inode, dir, mode);
4587 inode->i_ino = objectid;
4588 inode_set_bytes(inode, 0); 4487 inode_set_bytes(inode, 0);
4589 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 4488 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
4590 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 4489 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -4648,29 +4547,29 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4648 int ret = 0; 4547 int ret = 0;
4649 struct btrfs_key key; 4548 struct btrfs_key key;
4650 struct btrfs_root *root = BTRFS_I(parent_inode)->root; 4549 struct btrfs_root *root = BTRFS_I(parent_inode)->root;
4550 u64 ino = btrfs_ino(inode);
4551 u64 parent_ino = btrfs_ino(parent_inode);
4651 4552
4652 if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 4553 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
4653 memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); 4554 memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
4654 } else { 4555 } else {
4655 key.objectid = inode->i_ino; 4556 key.objectid = ino;
4656 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 4557 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
4657 key.offset = 0; 4558 key.offset = 0;
4658 } 4559 }
4659 4560
4660 if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 4561 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
4661 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 4562 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
4662 key.objectid, root->root_key.objectid, 4563 key.objectid, root->root_key.objectid,
4663 parent_inode->i_ino, 4564 parent_ino, index, name, name_len);
4664 index, name, name_len);
4665 } else if (add_backref) { 4565 } else if (add_backref) {
4666 ret = btrfs_insert_inode_ref(trans, root, 4566 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
4667 name, name_len, inode->i_ino, 4567 parent_ino, index);
4668 parent_inode->i_ino, index);
4669 } 4568 }
4670 4569
4671 if (ret == 0) { 4570 if (ret == 0) {
4672 ret = btrfs_insert_dir_item(trans, root, name, name_len, 4571 ret = btrfs_insert_dir_item(trans, root, name, name_len,
4673 parent_inode->i_ino, &key, 4572 parent_inode, &key,
4674 btrfs_inode_type(inode), index); 4573 btrfs_inode_type(inode), index);
4675 BUG_ON(ret); 4574 BUG_ON(ret);
4676 4575
@@ -4713,10 +4612,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4713 if (!new_valid_dev(rdev)) 4612 if (!new_valid_dev(rdev))
4714 return -EINVAL; 4613 return -EINVAL;
4715 4614
4716 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4717 if (err)
4718 return err;
4719
4720 /* 4615 /*
4721 * 2 for inode item and ref 4616 * 2 for inode item and ref
4722 * 2 for dir items 4617 * 2 for dir items
@@ -4728,8 +4623,12 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4728 4623
4729 btrfs_set_trans_block_group(trans, dir); 4624 btrfs_set_trans_block_group(trans, dir);
4730 4625
4626 err = btrfs_find_free_ino(root, &objectid);
4627 if (err)
4628 goto out_unlock;
4629
4731 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4630 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4732 dentry->d_name.len, dir->i_ino, objectid, 4631 dentry->d_name.len, btrfs_ino(dir), objectid,
4733 BTRFS_I(dir)->block_group, mode, &index); 4632 BTRFS_I(dir)->block_group, mode, &index);
4734 if (IS_ERR(inode)) { 4633 if (IS_ERR(inode)) {
4735 err = PTR_ERR(inode); 4634 err = PTR_ERR(inode);
@@ -4776,9 +4675,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4776 u64 objectid; 4675 u64 objectid;
4777 u64 index = 0; 4676 u64 index = 0;
4778 4677
4779 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4780 if (err)
4781 return err;
4782 /* 4678 /*
4783 * 2 for inode item and ref 4679 * 2 for inode item and ref
4784 * 2 for dir items 4680 * 2 for dir items
@@ -4790,8 +4686,12 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4790 4686
4791 btrfs_set_trans_block_group(trans, dir); 4687 btrfs_set_trans_block_group(trans, dir);
4792 4688
4689 err = btrfs_find_free_ino(root, &objectid);
4690 if (err)
4691 goto out_unlock;
4692
4793 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4693 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4794 dentry->d_name.len, dir->i_ino, objectid, 4694 dentry->d_name.len, btrfs_ino(dir), objectid,
4795 BTRFS_I(dir)->block_group, mode, &index); 4695 BTRFS_I(dir)->block_group, mode, &index);
4796 if (IS_ERR(inode)) { 4696 if (IS_ERR(inode)) {
4797 err = PTR_ERR(inode); 4697 err = PTR_ERR(inode);
@@ -4902,10 +4802,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4902 u64 index = 0; 4802 u64 index = 0;
4903 unsigned long nr = 1; 4803 unsigned long nr = 1;
4904 4804
4905 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4906 if (err)
4907 return err;
4908
4909 /* 4805 /*
4910 * 2 items for inode and ref 4806 * 2 items for inode and ref
4911 * 2 items for dir items 4807 * 2 items for dir items
@@ -4916,8 +4812,12 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4916 return PTR_ERR(trans); 4812 return PTR_ERR(trans);
4917 btrfs_set_trans_block_group(trans, dir); 4813 btrfs_set_trans_block_group(trans, dir);
4918 4814
4815 err = btrfs_find_free_ino(root, &objectid);
4816 if (err)
4817 goto out_fail;
4818
4919 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4819 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4920 dentry->d_name.len, dir->i_ino, objectid, 4820 dentry->d_name.len, btrfs_ino(dir), objectid,
4921 BTRFS_I(dir)->block_group, S_IFDIR | mode, 4821 BTRFS_I(dir)->block_group, S_IFDIR | mode,
4922 &index); 4822 &index);
4923 if (IS_ERR(inode)) { 4823 if (IS_ERR(inode)) {
@@ -5040,7 +4940,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
5040 u64 bytenr; 4940 u64 bytenr;
5041 u64 extent_start = 0; 4941 u64 extent_start = 0;
5042 u64 extent_end = 0; 4942 u64 extent_end = 0;
5043 u64 objectid = inode->i_ino; 4943 u64 objectid = btrfs_ino(inode);
5044 u32 found_type; 4944 u32 found_type;
5045 struct btrfs_path *path = NULL; 4945 struct btrfs_path *path = NULL;
5046 struct btrfs_root *root = BTRFS_I(inode)->root; 4946 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5068,7 +4968,7 @@ again:
5068 else 4968 else
5069 goto out; 4969 goto out;
5070 } 4970 }
5071 em = alloc_extent_map(GFP_NOFS); 4971 em = alloc_extent_map();
5072 if (!em) { 4972 if (!em) {
5073 err = -ENOMEM; 4973 err = -ENOMEM;
5074 goto out; 4974 goto out;
@@ -5222,7 +5122,7 @@ again:
5222 kunmap(page); 5122 kunmap(page);
5223 free_extent_map(em); 5123 free_extent_map(em);
5224 em = NULL; 5124 em = NULL;
5225 btrfs_release_path(root, path); 5125 btrfs_release_path(path);
5226 trans = btrfs_join_transaction(root, 1); 5126 trans = btrfs_join_transaction(root, 1);
5227 if (IS_ERR(trans)) 5127 if (IS_ERR(trans))
5228 return ERR_CAST(trans); 5128 return ERR_CAST(trans);
@@ -5248,7 +5148,7 @@ not_found_em:
5248 em->block_start = EXTENT_MAP_HOLE; 5148 em->block_start = EXTENT_MAP_HOLE;
5249 set_bit(EXTENT_FLAG_VACANCY, &em->flags); 5149 set_bit(EXTENT_FLAG_VACANCY, &em->flags);
5250insert: 5150insert:
5251 btrfs_release_path(root, path); 5151 btrfs_release_path(path);
5252 if (em->start > start || extent_map_end(em) <= start) { 5152 if (em->start > start || extent_map_end(em) <= start) {
5253 printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed " 5153 printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed "
5254 "[%llu %llu]\n", (unsigned long long)em->start, 5154 "[%llu %llu]\n", (unsigned long long)em->start,
@@ -5381,7 +5281,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
5381 u64 hole_start = start; 5281 u64 hole_start = start;
5382 u64 hole_len = len; 5282 u64 hole_len = len;
5383 5283
5384 em = alloc_extent_map(GFP_NOFS); 5284 em = alloc_extent_map();
5385 if (!em) { 5285 if (!em) {
5386 err = -ENOMEM; 5286 err = -ENOMEM;
5387 goto out; 5287 goto out;
@@ -5482,7 +5382,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5482 } 5382 }
5483 5383
5484 if (!em) { 5384 if (!em) {
5485 em = alloc_extent_map(GFP_NOFS); 5385 em = alloc_extent_map();
5486 if (!em) { 5386 if (!em) {
5487 em = ERR_PTR(-ENOMEM); 5387 em = ERR_PTR(-ENOMEM);
5488 goto out; 5388 goto out;
@@ -5548,7 +5448,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5548 if (!path) 5448 if (!path)
5549 return -ENOMEM; 5449 return -ENOMEM;
5550 5450
5551 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 5451 ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
5552 offset, 0); 5452 offset, 0);
5553 if (ret < 0) 5453 if (ret < 0)
5554 goto out; 5454 goto out;
@@ -5565,7 +5465,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5565 ret = 0; 5465 ret = 0;
5566 leaf = path->nodes[0]; 5466 leaf = path->nodes[0];
5567 btrfs_item_key_to_cpu(leaf, &key, slot); 5467 btrfs_item_key_to_cpu(leaf, &key, slot);
5568 if (key.objectid != inode->i_ino || 5468 if (key.objectid != btrfs_ino(inode) ||
5569 key.type != BTRFS_EXTENT_DATA_KEY) { 5469 key.type != BTRFS_EXTENT_DATA_KEY) {
5570 /* not our file or wrong item type, must cow */ 5470 /* not our file or wrong item type, must cow */
5571 goto out; 5471 goto out;
@@ -5599,7 +5499,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5599 * look for other files referencing this extent, if we 5499 * look for other files referencing this extent, if we
5600 * find any we must cow 5500 * find any we must cow
5601 */ 5501 */
5602 if (btrfs_cross_ref_exist(trans, root, inode->i_ino, 5502 if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
5603 key.offset - backref_offset, disk_bytenr)) 5503 key.offset - backref_offset, disk_bytenr))
5604 goto out; 5504 goto out;
5605 5505
@@ -5789,9 +5689,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5789 5689
5790 flush_dcache_page(bvec->bv_page); 5690 flush_dcache_page(bvec->bv_page);
5791 if (csum != *private) { 5691 if (csum != *private) {
5792 printk(KERN_ERR "btrfs csum failed ino %lu off" 5692 printk(KERN_ERR "btrfs csum failed ino %llu off"
5793 " %llu csum %u private %u\n", 5693 " %llu csum %u private %u\n",
5794 inode->i_ino, (unsigned long long)start, 5694 (unsigned long long)btrfs_ino(inode),
5695 (unsigned long long)start,
5795 csum, *private); 5696 csum, *private);
5796 err = -EIO; 5697 err = -EIO;
5797 } 5698 }
@@ -5938,9 +5839,9 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
5938 struct btrfs_dio_private *dip = bio->bi_private; 5839 struct btrfs_dio_private *dip = bio->bi_private;
5939 5840
5940 if (err) { 5841 if (err) {
5941 printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " 5842 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
5942 "sector %#Lx len %u err no %d\n", 5843 "sector %#Lx len %u err no %d\n",
5943 dip->inode->i_ino, bio->bi_rw, 5844 (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw,
5944 (unsigned long long)bio->bi_sector, bio->bi_size, err); 5845 (unsigned long long)bio->bi_sector, bio->bi_size, err);
5945 dip->errors = 1; 5846 dip->errors = 1;
5946 5847
@@ -6783,10 +6684,12 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6783 ei->dummy_inode = 0; 6684 ei->dummy_inode = 0;
6784 ei->force_compress = BTRFS_COMPRESS_NONE; 6685 ei->force_compress = BTRFS_COMPRESS_NONE;
6785 6686
6687 ei->delayed_node = NULL;
6688
6786 inode = &ei->vfs_inode; 6689 inode = &ei->vfs_inode;
6787 extent_map_tree_init(&ei->extent_tree, GFP_NOFS); 6690 extent_map_tree_init(&ei->extent_tree);
6788 extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS); 6691 extent_io_tree_init(&ei->io_tree, &inode->i_data);
6789 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS); 6692 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
6790 mutex_init(&ei->log_mutex); 6693 mutex_init(&ei->log_mutex);
6791 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 6694 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
6792 INIT_LIST_HEAD(&ei->i_orphan); 6695 INIT_LIST_HEAD(&ei->i_orphan);
@@ -6850,8 +6753,8 @@ void btrfs_destroy_inode(struct inode *inode)
6850 6753
6851 spin_lock(&root->orphan_lock); 6754 spin_lock(&root->orphan_lock);
6852 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6755 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
6853 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", 6756 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
6854 inode->i_ino); 6757 (unsigned long long)btrfs_ino(inode));
6855 list_del_init(&BTRFS_I(inode)->i_orphan); 6758 list_del_init(&BTRFS_I(inode)->i_orphan);
6856 } 6759 }
6857 spin_unlock(&root->orphan_lock); 6760 spin_unlock(&root->orphan_lock);
@@ -6873,6 +6776,7 @@ void btrfs_destroy_inode(struct inode *inode)
6873 inode_tree_del(inode); 6776 inode_tree_del(inode);
6874 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 6777 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
6875free: 6778free:
6779 btrfs_remove_delayed_node(inode);
6876 call_rcu(&inode->i_rcu, btrfs_i_callback); 6780 call_rcu(&inode->i_rcu, btrfs_i_callback);
6877} 6781}
6878 6782
@@ -6881,7 +6785,7 @@ int btrfs_drop_inode(struct inode *inode)
6881 struct btrfs_root *root = BTRFS_I(inode)->root; 6785 struct btrfs_root *root = BTRFS_I(inode)->root;
6882 6786
6883 if (btrfs_root_refs(&root->root_item) == 0 && 6787 if (btrfs_root_refs(&root->root_item) == 0 &&
6884 root != root->fs_info->tree_root) 6788 !is_free_space_inode(root, inode))
6885 return 1; 6789 return 1;
6886 else 6790 else
6887 return generic_drop_inode(inode); 6791 return generic_drop_inode(inode);
@@ -6990,16 +6894,17 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6990 u64 index = 0; 6894 u64 index = 0;
6991 u64 root_objectid; 6895 u64 root_objectid;
6992 int ret; 6896 int ret;
6897 u64 old_ino = btrfs_ino(old_inode);
6993 6898
6994 if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) 6899 if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
6995 return -EPERM; 6900 return -EPERM;
6996 6901
6997 /* we only allow rename subvolume link between subvolumes */ 6902 /* we only allow rename subvolume link between subvolumes */
6998 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) 6903 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
6999 return -EXDEV; 6904 return -EXDEV;
7000 6905
7001 if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || 6906 if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
7002 (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) 6907 (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID))
7003 return -ENOTEMPTY; 6908 return -ENOTEMPTY;
7004 6909
7005 if (S_ISDIR(old_inode->i_mode) && new_inode && 6910 if (S_ISDIR(old_inode->i_mode) && new_inode &&
@@ -7015,7 +6920,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7015 filemap_flush(old_inode->i_mapping); 6920 filemap_flush(old_inode->i_mapping);
7016 6921
7017 /* close the racy window with snapshot create/destroy ioctl */ 6922 /* close the racy window with snapshot create/destroy ioctl */
7018 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 6923 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
7019 down_read(&root->fs_info->subvol_sem); 6924 down_read(&root->fs_info->subvol_sem);
7020 /* 6925 /*
7021 * We want to reserve the absolute worst case amount of items. So if 6926 * We want to reserve the absolute worst case amount of items. So if
@@ -7040,15 +6945,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7040 if (ret) 6945 if (ret)
7041 goto out_fail; 6946 goto out_fail;
7042 6947
7043 if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 6948 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
7044 /* force full log commit if subvolume involved. */ 6949 /* force full log commit if subvolume involved. */
7045 root->fs_info->last_trans_log_full_commit = trans->transid; 6950 root->fs_info->last_trans_log_full_commit = trans->transid;
7046 } else { 6951 } else {
7047 ret = btrfs_insert_inode_ref(trans, dest, 6952 ret = btrfs_insert_inode_ref(trans, dest,
7048 new_dentry->d_name.name, 6953 new_dentry->d_name.name,
7049 new_dentry->d_name.len, 6954 new_dentry->d_name.len,
7050 old_inode->i_ino, 6955 old_ino,
7051 new_dir->i_ino, index); 6956 btrfs_ino(new_dir), index);
7052 if (ret) 6957 if (ret)
7053 goto out_fail; 6958 goto out_fail;
7054 /* 6959 /*
@@ -7064,10 +6969,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7064 * make sure the inode gets flushed if it is replacing 6969 * make sure the inode gets flushed if it is replacing
7065 * something. 6970 * something.
7066 */ 6971 */
7067 if (new_inode && new_inode->i_size && 6972 if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
7068 old_inode && S_ISREG(old_inode->i_mode)) {
7069 btrfs_add_ordered_operation(trans, root, old_inode); 6973 btrfs_add_ordered_operation(trans, root, old_inode);
7070 }
7071 6974
7072 old_dir->i_ctime = old_dir->i_mtime = ctime; 6975 old_dir->i_ctime = old_dir->i_mtime = ctime;
7073 new_dir->i_ctime = new_dir->i_mtime = ctime; 6976 new_dir->i_ctime = new_dir->i_mtime = ctime;
@@ -7076,7 +6979,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7076 if (old_dentry->d_parent != new_dentry->d_parent) 6979 if (old_dentry->d_parent != new_dentry->d_parent)
7077 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); 6980 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
7078 6981
7079 if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 6982 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
7080 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; 6983 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
7081 ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, 6984 ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
7082 old_dentry->d_name.name, 6985 old_dentry->d_name.name,
@@ -7093,7 +6996,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7093 6996
7094 if (new_inode) { 6997 if (new_inode) {
7095 new_inode->i_ctime = CURRENT_TIME; 6998 new_inode->i_ctime = CURRENT_TIME;
7096 if (unlikely(new_inode->i_ino == 6999 if (unlikely(btrfs_ino(new_inode) ==
7097 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 7000 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
7098 root_objectid = BTRFS_I(new_inode)->location.objectid; 7001 root_objectid = BTRFS_I(new_inode)->location.objectid;
7099 ret = btrfs_unlink_subvol(trans, dest, new_dir, 7002 ret = btrfs_unlink_subvol(trans, dest, new_dir,
@@ -7121,7 +7024,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7121 new_dentry->d_name.len, 0, index); 7024 new_dentry->d_name.len, 0, index);
7122 BUG_ON(ret); 7025 BUG_ON(ret);
7123 7026
7124 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 7027 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
7125 struct dentry *parent = dget_parent(new_dentry); 7028 struct dentry *parent = dget_parent(new_dentry);
7126 btrfs_log_new_name(trans, old_inode, old_dir, parent); 7029 btrfs_log_new_name(trans, old_inode, old_dir, parent);
7127 dput(parent); 7030 dput(parent);
@@ -7130,7 +7033,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7130out_fail: 7033out_fail:
7131 btrfs_end_transaction_throttle(trans, root); 7034 btrfs_end_transaction_throttle(trans, root);
7132out_notrans: 7035out_notrans:
7133 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 7036 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
7134 up_read(&root->fs_info->subvol_sem); 7037 up_read(&root->fs_info->subvol_sem);
7135 7038
7136 return ret; 7039 return ret;
@@ -7184,58 +7087,6 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7184 return 0; 7087 return 0;
7185} 7088}
7186 7089
7187int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
7188 int sync)
7189{
7190 struct btrfs_inode *binode;
7191 struct inode *inode = NULL;
7192
7193 spin_lock(&root->fs_info->delalloc_lock);
7194 while (!list_empty(&root->fs_info->delalloc_inodes)) {
7195 binode = list_entry(root->fs_info->delalloc_inodes.next,
7196 struct btrfs_inode, delalloc_inodes);
7197 inode = igrab(&binode->vfs_inode);
7198 if (inode) {
7199 list_move_tail(&binode->delalloc_inodes,
7200 &root->fs_info->delalloc_inodes);
7201 break;
7202 }
7203
7204 list_del_init(&binode->delalloc_inodes);
7205 cond_resched_lock(&root->fs_info->delalloc_lock);
7206 }
7207 spin_unlock(&root->fs_info->delalloc_lock);
7208
7209 if (inode) {
7210 if (sync) {
7211 filemap_write_and_wait(inode->i_mapping);
7212 /*
7213 * We have to do this because compression doesn't
7214 * actually set PG_writeback until it submits the pages
7215 * for IO, which happens in an async thread, so we could
7216 * race and not actually wait for any writeback pages
7217 * because they've not been submitted yet. Technically
7218 * this could still be the case for the ordered stuff
7219 * since the async thread may not have started to do its
7220 * work yet. If this becomes the case then we need to
7221 * figure out a way to make sure that in writepage we
7222 * wait for any async pages to be submitted before
7223 * returning so that fdatawait does what its supposed to
7224 * do.
7225 */
7226 btrfs_wait_ordered_range(inode, 0, (u64)-1);
7227 } else {
7228 filemap_flush(inode->i_mapping);
7229 }
7230 if (delay_iput)
7231 btrfs_add_delayed_iput(inode);
7232 else
7233 iput(inode);
7234 return 1;
7235 }
7236 return 0;
7237}
7238
7239static int btrfs_symlink(struct inode *dir, struct dentry *dentry, 7090static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7240 const char *symname) 7091 const char *symname)
7241{ 7092{
@@ -7259,9 +7110,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7259 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) 7110 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
7260 return -ENAMETOOLONG; 7111 return -ENAMETOOLONG;
7261 7112
7262 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
7263 if (err)
7264 return err;
7265 /* 7113 /*
7266 * 2 items for inode item and ref 7114 * 2 items for inode item and ref
7267 * 2 items for dir items 7115 * 2 items for dir items
@@ -7273,8 +7121,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7273 7121
7274 btrfs_set_trans_block_group(trans, dir); 7122 btrfs_set_trans_block_group(trans, dir);
7275 7123
7124 err = btrfs_find_free_ino(root, &objectid);
7125 if (err)
7126 goto out_unlock;
7127
7276 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 7128 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
7277 dentry->d_name.len, dir->i_ino, objectid, 7129 dentry->d_name.len, btrfs_ino(dir), objectid,
7278 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, 7130 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
7279 &index); 7131 &index);
7280 if (IS_ERR(inode)) { 7132 if (IS_ERR(inode)) {
@@ -7306,7 +7158,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7306 7158
7307 path = btrfs_alloc_path(); 7159 path = btrfs_alloc_path();
7308 BUG_ON(!path); 7160 BUG_ON(!path);
7309 key.objectid = inode->i_ino; 7161 key.objectid = btrfs_ino(inode);
7310 key.offset = 0; 7162 key.offset = 0;
7311 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 7163 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
7312 datasize = btrfs_file_extent_calc_inline_size(name_len); 7164 datasize = btrfs_file_extent_calc_inline_size(name_len);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0de71feb8e1c..c4f17e4e2c9c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -50,6 +50,7 @@
50#include "print-tree.h" 50#include "print-tree.h"
51#include "volumes.h" 51#include "volumes.h"
52#include "locking.h" 52#include "locking.h"
53#include "inode-map.h"
53 54
54/* Mask out flags that are inappropriate for the given type of inode. */ 55/* Mask out flags that are inappropriate for the given type of inode. */
55static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 56static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -330,8 +331,7 @@ static noinline int create_subvol(struct btrfs_root *root,
330 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 331 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
331 u64 index = 0; 332 u64 index = 0;
332 333
333 ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, 334 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
334 0, &objectid);
335 if (ret) { 335 if (ret) {
336 dput(parent); 336 dput(parent);
337 return ret; 337 return ret;
@@ -423,7 +423,7 @@ static noinline int create_subvol(struct btrfs_root *root,
423 BUG_ON(ret); 423 BUG_ON(ret);
424 424
425 ret = btrfs_insert_dir_item(trans, root, 425 ret = btrfs_insert_dir_item(trans, root,
426 name, namelen, dir->i_ino, &key, 426 name, namelen, dir, &key,
427 BTRFS_FT_DIR, index); 427 BTRFS_FT_DIR, index);
428 if (ret) 428 if (ret)
429 goto fail; 429 goto fail;
@@ -434,7 +434,7 @@ static noinline int create_subvol(struct btrfs_root *root,
434 434
435 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 435 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
436 objectid, root->root_key.objectid, 436 objectid, root->root_key.objectid,
437 dir->i_ino, index, name, namelen); 437 btrfs_ino(dir), index, name, namelen);
438 438
439 BUG_ON(ret); 439 BUG_ON(ret);
440 440
@@ -1130,7 +1130,7 @@ static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
1130 int ret = 0; 1130 int ret = 0;
1131 u64 flags = 0; 1131 u64 flags = 0;
1132 1132
1133 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1133 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
1134 return -EINVAL; 1134 return -EINVAL;
1135 1135
1136 down_read(&root->fs_info->subvol_sem); 1136 down_read(&root->fs_info->subvol_sem);
@@ -1157,7 +1157,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1157 if (root->fs_info->sb->s_flags & MS_RDONLY) 1157 if (root->fs_info->sb->s_flags & MS_RDONLY)
1158 return -EROFS; 1158 return -EROFS;
1159 1159
1160 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1160 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
1161 return -EINVAL; 1161 return -EINVAL;
1162 1162
1163 if (copy_from_user(&flags, arg, sizeof(flags))) 1163 if (copy_from_user(&flags, arg, sizeof(flags)))
@@ -1401,7 +1401,7 @@ static noinline int search_ioctl(struct inode *inode,
1401 } 1401 }
1402 ret = copy_to_sk(root, path, &key, sk, args->buf, 1402 ret = copy_to_sk(root, path, &key, sk, args->buf,
1403 &sk_offset, &num_found); 1403 &sk_offset, &num_found);
1404 btrfs_release_path(root, path); 1404 btrfs_release_path(path);
1405 if (ret || num_found >= sk->nr_items) 1405 if (ret || num_found >= sk->nr_items)
1406 break; 1406 break;
1407 1407
@@ -1508,7 +1508,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1508 if (key.offset == BTRFS_FIRST_FREE_OBJECTID) 1508 if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
1509 break; 1509 break;
1510 1510
1511 btrfs_release_path(root, path); 1511 btrfs_release_path(path);
1512 key.objectid = key.offset; 1512 key.objectid = key.offset;
1513 key.offset = (u64)-1; 1513 key.offset = (u64)-1;
1514 dirid = key.objectid; 1514 dirid = key.objectid;
@@ -1638,7 +1638,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1638 goto out_dput; 1638 goto out_dput;
1639 } 1639 }
1640 1640
1641 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 1641 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
1642 err = -EINVAL; 1642 err = -EINVAL;
1643 goto out_dput; 1643 goto out_dput;
1644 } 1644 }
@@ -1808,6 +1808,75 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
1808 return ret; 1808 return ret;
1809} 1809}
1810 1810
1811static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
1812{
1813 struct btrfs_ioctl_fs_info_args fi_args;
1814 struct btrfs_device *device;
1815 struct btrfs_device *next;
1816 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
1817
1818 if (!capable(CAP_SYS_ADMIN))
1819 return -EPERM;
1820
1821 fi_args.num_devices = fs_devices->num_devices;
1822 fi_args.max_id = 0;
1823 memcpy(&fi_args.fsid, root->fs_info->fsid, sizeof(fi_args.fsid));
1824
1825 mutex_lock(&fs_devices->device_list_mutex);
1826 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
1827 if (device->devid > fi_args.max_id)
1828 fi_args.max_id = device->devid;
1829 }
1830 mutex_unlock(&fs_devices->device_list_mutex);
1831
1832 if (copy_to_user(arg, &fi_args, sizeof(fi_args)))
1833 return -EFAULT;
1834
1835 return 0;
1836}
1837
1838static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
1839{
1840 struct btrfs_ioctl_dev_info_args *di_args;
1841 struct btrfs_device *dev;
1842 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
1843 int ret = 0;
1844 char *s_uuid = NULL;
1845 char empty_uuid[BTRFS_UUID_SIZE] = {0};
1846
1847 if (!capable(CAP_SYS_ADMIN))
1848 return -EPERM;
1849
1850 di_args = memdup_user(arg, sizeof(*di_args));
1851 if (IS_ERR(di_args))
1852 return PTR_ERR(di_args);
1853
1854 if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0)
1855 s_uuid = di_args->uuid;
1856
1857 mutex_lock(&fs_devices->device_list_mutex);
1858 dev = btrfs_find_device(root, di_args->devid, s_uuid, NULL);
1859 mutex_unlock(&fs_devices->device_list_mutex);
1860
1861 if (!dev) {
1862 ret = -ENODEV;
1863 goto out;
1864 }
1865
1866 di_args->devid = dev->devid;
1867 di_args->bytes_used = dev->bytes_used;
1868 di_args->total_bytes = dev->total_bytes;
1869 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
1870 strncpy(di_args->path, dev->name, sizeof(di_args->path));
1871
1872out:
1873 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
1874 ret = -EFAULT;
1875
1876 kfree(di_args);
1877 return ret;
1878}
1879
1811static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 1880static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1812 u64 off, u64 olen, u64 destoff) 1881 u64 off, u64 olen, u64 destoff)
1813{ 1882{
@@ -1924,7 +1993,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1924 } 1993 }
1925 1994
1926 /* clone data */ 1995 /* clone data */
1927 key.objectid = src->i_ino; 1996 key.objectid = btrfs_ino(src);
1928 key.type = BTRFS_EXTENT_DATA_KEY; 1997 key.type = BTRFS_EXTENT_DATA_KEY;
1929 key.offset = 0; 1998 key.offset = 0;
1930 1999
@@ -1951,7 +2020,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1951 2020
1952 btrfs_item_key_to_cpu(leaf, &key, slot); 2021 btrfs_item_key_to_cpu(leaf, &key, slot);
1953 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 2022 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
1954 key.objectid != src->i_ino) 2023 key.objectid != btrfs_ino(src))
1955 break; 2024 break;
1956 2025
1957 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { 2026 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
@@ -1987,14 +2056,14 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1987 datal = btrfs_file_extent_ram_bytes(leaf, 2056 datal = btrfs_file_extent_ram_bytes(leaf,
1988 extent); 2057 extent);
1989 } 2058 }
1990 btrfs_release_path(root, path); 2059 btrfs_release_path(path);
1991 2060
1992 if (key.offset + datal <= off || 2061 if (key.offset + datal <= off ||
1993 key.offset >= off+len) 2062 key.offset >= off+len)
1994 goto next; 2063 goto next;
1995 2064
1996 memcpy(&new_key, &key, sizeof(new_key)); 2065 memcpy(&new_key, &key, sizeof(new_key));
1997 new_key.objectid = inode->i_ino; 2066 new_key.objectid = btrfs_ino(inode);
1998 if (off <= key.offset) 2067 if (off <= key.offset)
1999 new_key.offset = key.offset + destoff - off; 2068 new_key.offset = key.offset + destoff - off;
2000 else 2069 else
@@ -2048,7 +2117,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2048 ret = btrfs_inc_extent_ref(trans, root, 2117 ret = btrfs_inc_extent_ref(trans, root,
2049 disko, diskl, 0, 2118 disko, diskl, 0,
2050 root->root_key.objectid, 2119 root->root_key.objectid,
2051 inode->i_ino, 2120 btrfs_ino(inode),
2052 new_key.offset - datao); 2121 new_key.offset - datao);
2053 BUG_ON(ret); 2122 BUG_ON(ret);
2054 } 2123 }
@@ -2097,7 +2166,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2097 } 2166 }
2098 2167
2099 btrfs_mark_buffer_dirty(leaf); 2168 btrfs_mark_buffer_dirty(leaf);
2100 btrfs_release_path(root, path); 2169 btrfs_release_path(path);
2101 2170
2102 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2171 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2103 2172
@@ -2118,12 +2187,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2118 btrfs_end_transaction(trans, root); 2187 btrfs_end_transaction(trans, root);
2119 } 2188 }
2120next: 2189next:
2121 btrfs_release_path(root, path); 2190 btrfs_release_path(path);
2122 key.offset++; 2191 key.offset++;
2123 } 2192 }
2124 ret = 0; 2193 ret = 0;
2125out: 2194out:
2126 btrfs_release_path(root, path); 2195 btrfs_release_path(path);
2127 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 2196 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
2128out_unlock: 2197out_unlock:
2129 mutex_unlock(&src->i_mutex); 2198 mutex_unlock(&src->i_mutex);
@@ -2470,6 +2539,58 @@ static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
2470 return btrfs_wait_for_commit(root, transid); 2539 return btrfs_wait_for_commit(root, transid);
2471} 2540}
2472 2541
2542static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg)
2543{
2544 int ret;
2545 struct btrfs_ioctl_scrub_args *sa;
2546
2547 if (!capable(CAP_SYS_ADMIN))
2548 return -EPERM;
2549
2550 sa = memdup_user(arg, sizeof(*sa));
2551 if (IS_ERR(sa))
2552 return PTR_ERR(sa);
2553
2554 ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end,
2555 &sa->progress, sa->flags & BTRFS_SCRUB_READONLY);
2556
2557 if (copy_to_user(arg, sa, sizeof(*sa)))
2558 ret = -EFAULT;
2559
2560 kfree(sa);
2561 return ret;
2562}
2563
2564static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg)
2565{
2566 if (!capable(CAP_SYS_ADMIN))
2567 return -EPERM;
2568
2569 return btrfs_scrub_cancel(root);
2570}
2571
2572static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
2573 void __user *arg)
2574{
2575 struct btrfs_ioctl_scrub_args *sa;
2576 int ret;
2577
2578 if (!capable(CAP_SYS_ADMIN))
2579 return -EPERM;
2580
2581 sa = memdup_user(arg, sizeof(*sa));
2582 if (IS_ERR(sa))
2583 return PTR_ERR(sa);
2584
2585 ret = btrfs_scrub_progress(root, sa->devid, &sa->progress);
2586
2587 if (copy_to_user(arg, sa, sizeof(*sa)))
2588 ret = -EFAULT;
2589
2590 kfree(sa);
2591 return ret;
2592}
2593
2473long btrfs_ioctl(struct file *file, unsigned int 2594long btrfs_ioctl(struct file *file, unsigned int
2474 cmd, unsigned long arg) 2595 cmd, unsigned long arg)
2475{ 2596{
@@ -2509,6 +2630,10 @@ long btrfs_ioctl(struct file *file, unsigned int
2509 return btrfs_ioctl_add_dev(root, argp); 2630 return btrfs_ioctl_add_dev(root, argp);
2510 case BTRFS_IOC_RM_DEV: 2631 case BTRFS_IOC_RM_DEV:
2511 return btrfs_ioctl_rm_dev(root, argp); 2632 return btrfs_ioctl_rm_dev(root, argp);
2633 case BTRFS_IOC_FS_INFO:
2634 return btrfs_ioctl_fs_info(root, argp);
2635 case BTRFS_IOC_DEV_INFO:
2636 return btrfs_ioctl_dev_info(root, argp);
2512 case BTRFS_IOC_BALANCE: 2637 case BTRFS_IOC_BALANCE:
2513 return btrfs_balance(root->fs_info->dev_root); 2638 return btrfs_balance(root->fs_info->dev_root);
2514 case BTRFS_IOC_CLONE: 2639 case BTRFS_IOC_CLONE:
@@ -2532,6 +2657,12 @@ long btrfs_ioctl(struct file *file, unsigned int
2532 return btrfs_ioctl_start_sync(file, argp); 2657 return btrfs_ioctl_start_sync(file, argp);
2533 case BTRFS_IOC_WAIT_SYNC: 2658 case BTRFS_IOC_WAIT_SYNC:
2534 return btrfs_ioctl_wait_sync(file, argp); 2659 return btrfs_ioctl_wait_sync(file, argp);
2660 case BTRFS_IOC_SCRUB:
2661 return btrfs_ioctl_scrub(root, argp);
2662 case BTRFS_IOC_SCRUB_CANCEL:
2663 return btrfs_ioctl_scrub_cancel(root, argp);
2664 case BTRFS_IOC_SCRUB_PROGRESS:
2665 return btrfs_ioctl_scrub_progress(root, argp);
2535 } 2666 }
2536 2667
2537 return -ENOTTY; 2668 return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 8fb382167b13..e5e0ee2cad4e 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -32,6 +32,8 @@ struct btrfs_ioctl_vol_args {
32 32
33#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) 33#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
34#define BTRFS_SUBVOL_RDONLY (1ULL << 1) 34#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
35#define BTRFS_FSID_SIZE 16
36#define BTRFS_UUID_SIZE 16
35 37
36#define BTRFS_SUBVOL_NAME_MAX 4039 38#define BTRFS_SUBVOL_NAME_MAX 4039
37struct btrfs_ioctl_vol_args_v2 { 39struct btrfs_ioctl_vol_args_v2 {
@@ -42,6 +44,71 @@ struct btrfs_ioctl_vol_args_v2 {
42 char name[BTRFS_SUBVOL_NAME_MAX + 1]; 44 char name[BTRFS_SUBVOL_NAME_MAX + 1];
43}; 45};
44 46
47/*
48 * structure to report errors and progress to userspace, either as a
49 * result of a finished scrub, a canceled scrub or a progress inquiry
50 */
51struct btrfs_scrub_progress {
52 __u64 data_extents_scrubbed; /* # of data extents scrubbed */
53 __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */
54 __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */
55 __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */
56 __u64 read_errors; /* # of read errors encountered (EIO) */
57 __u64 csum_errors; /* # of failed csum checks */
58 __u64 verify_errors; /* # of occurences, where the metadata
59 * of a tree block did not match the
60 * expected values, like generation or
61 * logical */
62 __u64 no_csum; /* # of 4k data block for which no csum
63 * is present, probably the result of
64 * data written with nodatasum */
65 __u64 csum_discards; /* # of csum for which no data was found
66 * in the extent tree. */
67 __u64 super_errors; /* # of bad super blocks encountered */
68 __u64 malloc_errors; /* # of internal kmalloc errors. These
69 * will likely cause an incomplete
70 * scrub */
71 __u64 uncorrectable_errors; /* # of errors where either no intact
72 * copy was found or the writeback
73 * failed */
74 __u64 corrected_errors; /* # of errors corrected */
75 __u64 last_physical; /* last physical address scrubbed. In
76 * case a scrub was aborted, this can
77 * be used to restart the scrub */
78 __u64 unverified_errors; /* # of occurences where a read for a
79 * full (64k) bio failed, but the re-
80 * check succeeded for each 4k piece.
81 * Intermittent error. */
82};
83
84#define BTRFS_SCRUB_READONLY 1
85struct btrfs_ioctl_scrub_args {
86 __u64 devid; /* in */
87 __u64 start; /* in */
88 __u64 end; /* in */
89 __u64 flags; /* in */
90 struct btrfs_scrub_progress progress; /* out */
91 /* pad to 1k */
92 __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8];
93};
94
95#define BTRFS_DEVICE_PATH_NAME_MAX 1024
96struct btrfs_ioctl_dev_info_args {
97 __u64 devid; /* in/out */
98 __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */
99 __u64 bytes_used; /* out */
100 __u64 total_bytes; /* out */
101 __u64 unused[379]; /* pad to 4k */
102 __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */
103};
104
105struct btrfs_ioctl_fs_info_args {
106 __u64 max_id; /* out */
107 __u64 num_devices; /* out */
108 __u8 fsid[BTRFS_FSID_SIZE]; /* out */
109 __u64 reserved[124]; /* pad to 1k */
110};
111
45#define BTRFS_INO_LOOKUP_PATH_MAX 4080 112#define BTRFS_INO_LOOKUP_PATH_MAX 4080
46struct btrfs_ioctl_ino_lookup_args { 113struct btrfs_ioctl_ino_lookup_args {
47 __u64 treeid; 114 __u64 treeid;
@@ -203,4 +270,13 @@ struct btrfs_ioctl_space_args {
203 struct btrfs_ioctl_vol_args_v2) 270 struct btrfs_ioctl_vol_args_v2)
204#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) 271#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
205#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) 272#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
273#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
274 struct btrfs_ioctl_scrub_args)
275#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28)
276#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \
277 struct btrfs_ioctl_scrub_args)
278#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \
279 struct btrfs_ioctl_dev_info_args)
280#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
281 struct btrfs_ioctl_fs_info_args)
206#endif 282#endif
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 6151f2ea38bb..66fa43dc3f0f 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -185,31 +185,6 @@ sleep:
185 return 0; 185 return 0;
186} 186}
187 187
188/*
189 * Very quick trylock, this does not spin or schedule. It returns
190 * 1 with the spinlock held if it was able to take the lock, or it
191 * returns zero if it was unable to take the lock.
192 *
193 * After this call, scheduling is not safe without first calling
194 * btrfs_set_lock_blocking()
195 */
196int btrfs_try_tree_lock(struct extent_buffer *eb)
197{
198 if (spin_trylock(&eb->lock)) {
199 if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
200 /*
201 * we've got the spinlock, but the real owner is
202 * blocking. Drop the spinlock and return failure
203 */
204 spin_unlock(&eb->lock);
205 return 0;
206 }
207 return 1;
208 }
209 /* someone else has the spinlock giveup */
210 return 0;
211}
212
213int btrfs_tree_unlock(struct extent_buffer *eb) 188int btrfs_tree_unlock(struct extent_buffer *eb)
214{ 189{
215 /* 190 /*
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 6c4ce457168c..5c33a560a2f1 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -21,8 +21,6 @@
21 21
22int btrfs_tree_lock(struct extent_buffer *eb); 22int btrfs_tree_lock(struct extent_buffer *eb);
23int btrfs_tree_unlock(struct extent_buffer *eb); 23int btrfs_tree_unlock(struct extent_buffer *eb);
24
25int btrfs_try_tree_lock(struct extent_buffer *eb);
26int btrfs_try_spin_lock(struct extent_buffer *eb); 24int btrfs_try_spin_lock(struct extent_buffer *eb);
27 25
28void btrfs_set_lock_blocking(struct extent_buffer *eb); 26void btrfs_set_lock_blocking(struct extent_buffer *eb);
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c
index a97314cf6bd6..82d569cb6267 100644
--- a/fs/btrfs/ref-cache.c
+++ b/fs/btrfs/ref-cache.c
@@ -23,56 +23,6 @@
23#include "ref-cache.h" 23#include "ref-cache.h"
24#include "transaction.h" 24#include "transaction.h"
25 25
26/*
27 * leaf refs are used to cache the information about which extents
28 * a given leaf has references on. This allows us to process that leaf
29 * in btrfs_drop_snapshot without needing to read it back from disk.
30 */
31
32/*
33 * kmalloc a leaf reference struct and update the counters for the
34 * total ref cache size
35 */
36struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
37 int nr_extents)
38{
39 struct btrfs_leaf_ref *ref;
40 size_t size = btrfs_leaf_ref_size(nr_extents);
41
42 ref = kmalloc(size, GFP_NOFS);
43 if (ref) {
44 spin_lock(&root->fs_info->ref_cache_lock);
45 root->fs_info->total_ref_cache_size += size;
46 spin_unlock(&root->fs_info->ref_cache_lock);
47
48 memset(ref, 0, sizeof(*ref));
49 atomic_set(&ref->usage, 1);
50 INIT_LIST_HEAD(&ref->list);
51 }
52 return ref;
53}
54
55/*
56 * free a leaf reference struct and update the counters for the
57 * total ref cache size
58 */
59void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
60{
61 if (!ref)
62 return;
63 WARN_ON(atomic_read(&ref->usage) == 0);
64 if (atomic_dec_and_test(&ref->usage)) {
65 size_t size = btrfs_leaf_ref_size(ref->nritems);
66
67 BUG_ON(ref->in_tree);
68 kfree(ref);
69
70 spin_lock(&root->fs_info->ref_cache_lock);
71 root->fs_info->total_ref_cache_size -= size;
72 spin_unlock(&root->fs_info->ref_cache_lock);
73 }
74}
75
76static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, 26static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
77 struct rb_node *node) 27 struct rb_node *node)
78{ 28{
@@ -116,117 +66,3 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
116 } 66 }
117 return NULL; 67 return NULL;
118} 68}
119
120int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
121 int shared)
122{
123 struct btrfs_leaf_ref *ref = NULL;
124 struct btrfs_leaf_ref_tree *tree = root->ref_tree;
125
126 if (shared)
127 tree = &root->fs_info->shared_ref_tree;
128 if (!tree)
129 return 0;
130
131 spin_lock(&tree->lock);
132 while (!list_empty(&tree->list)) {
133 ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list);
134 BUG_ON(ref->tree != tree);
135 if (ref->root_gen > max_root_gen)
136 break;
137 if (!xchg(&ref->in_tree, 0)) {
138 cond_resched_lock(&tree->lock);
139 continue;
140 }
141
142 rb_erase(&ref->rb_node, &tree->root);
143 list_del_init(&ref->list);
144
145 spin_unlock(&tree->lock);
146 btrfs_free_leaf_ref(root, ref);
147 cond_resched();
148 spin_lock(&tree->lock);
149 }
150 spin_unlock(&tree->lock);
151 return 0;
152}
153
154/*
155 * find the leaf ref for a given extent. This returns the ref struct with
156 * a usage reference incremented
157 */
158struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
159 u64 bytenr)
160{
161 struct rb_node *rb;
162 struct btrfs_leaf_ref *ref = NULL;
163 struct btrfs_leaf_ref_tree *tree = root->ref_tree;
164again:
165 if (tree) {
166 spin_lock(&tree->lock);
167 rb = tree_search(&tree->root, bytenr);
168 if (rb)
169 ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node);
170 if (ref)
171 atomic_inc(&ref->usage);
172 spin_unlock(&tree->lock);
173 if (ref)
174 return ref;
175 }
176 if (tree != &root->fs_info->shared_ref_tree) {
177 tree = &root->fs_info->shared_ref_tree;
178 goto again;
179 }
180 return NULL;
181}
182
183/*
184 * add a fully filled in leaf ref struct
185 * remove all the refs older than a given root generation
186 */
187int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
188 int shared)
189{
190 int ret = 0;
191 struct rb_node *rb;
192 struct btrfs_leaf_ref_tree *tree = root->ref_tree;
193
194 if (shared)
195 tree = &root->fs_info->shared_ref_tree;
196
197 spin_lock(&tree->lock);
198 rb = tree_insert(&tree->root, ref->bytenr, &ref->rb_node);
199 if (rb) {
200 ret = -EEXIST;
201 } else {
202 atomic_inc(&ref->usage);
203 ref->tree = tree;
204 ref->in_tree = 1;
205 list_add_tail(&ref->list, &tree->list);
206 }
207 spin_unlock(&tree->lock);
208 return ret;
209}
210
211/*
212 * remove a single leaf ref from the tree. This drops the ref held by the tree
213 * only
214 */
215int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
216{
217 struct btrfs_leaf_ref_tree *tree;
218
219 if (!xchg(&ref->in_tree, 0))
220 return 0;
221
222 tree = ref->tree;
223 spin_lock(&tree->lock);
224
225 rb_erase(&ref->rb_node, &tree->root);
226 list_del_init(&ref->list);
227
228 spin_unlock(&tree->lock);
229
230 btrfs_free_leaf_ref(root, ref);
231 return 0;
232}
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h
index e2a55cb2072b..24f7001f6387 100644
--- a/fs/btrfs/ref-cache.h
+++ b/fs/btrfs/ref-cache.h
@@ -49,28 +49,4 @@ static inline size_t btrfs_leaf_ref_size(int nr_extents)
49 return sizeof(struct btrfs_leaf_ref) + 49 return sizeof(struct btrfs_leaf_ref) +
50 sizeof(struct btrfs_extent_info) * nr_extents; 50 sizeof(struct btrfs_extent_info) * nr_extents;
51} 51}
52
53static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree)
54{
55 tree->root = RB_ROOT;
56 INIT_LIST_HEAD(&tree->list);
57 spin_lock_init(&tree->lock);
58}
59
60static inline int btrfs_leaf_ref_tree_empty(struct btrfs_leaf_ref_tree *tree)
61{
62 return RB_EMPTY_ROOT(&tree->root);
63}
64
65void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree);
66struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
67 int nr_extents);
68void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
69struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
70 u64 bytenr);
71int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
72 int shared);
73int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
74 int shared);
75int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
76#endif 52#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 199a80134312..fa2c5d87f219 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -30,6 +30,7 @@
30#include "btrfs_inode.h" 30#include "btrfs_inode.h"
31#include "async-thread.h" 31#include "async-thread.h"
32#include "free-space-cache.h" 32#include "free-space-cache.h"
33#include "inode-map.h"
33 34
34/* 35/*
35 * backref_node, mapping_node and tree_block start with this 36 * backref_node, mapping_node and tree_block start with this
@@ -507,6 +508,7 @@ static int update_backref_cache(struct btrfs_trans_handle *trans,
507 return 1; 508 return 1;
508} 509}
509 510
511
510static int should_ignore_root(struct btrfs_root *root) 512static int should_ignore_root(struct btrfs_root *root)
511{ 513{
512 struct btrfs_root *reloc_root; 514 struct btrfs_root *reloc_root;
@@ -529,7 +531,6 @@ static int should_ignore_root(struct btrfs_root *root)
529 */ 531 */
530 return 1; 532 return 1;
531} 533}
532
533/* 534/*
534 * find reloc tree by address of tree root 535 * find reloc tree by address of tree root
535 */ 536 */
@@ -961,7 +962,7 @@ again:
961 lower = upper; 962 lower = upper;
962 upper = NULL; 963 upper = NULL;
963 } 964 }
964 btrfs_release_path(root, path2); 965 btrfs_release_path(path2);
965next: 966next:
966 if (ptr < end) { 967 if (ptr < end) {
967 ptr += btrfs_extent_inline_ref_size(key.type); 968 ptr += btrfs_extent_inline_ref_size(key.type);
@@ -974,7 +975,7 @@ next:
974 if (ptr >= end) 975 if (ptr >= end)
975 path1->slots[0]++; 976 path1->slots[0]++;
976 } 977 }
977 btrfs_release_path(rc->extent_root, path1); 978 btrfs_release_path(path1);
978 979
979 cur->checked = 1; 980 cur->checked = 1;
980 WARN_ON(exist); 981 WARN_ON(exist);
@@ -1409,9 +1410,9 @@ again:
1409 prev = node; 1410 prev = node;
1410 entry = rb_entry(node, struct btrfs_inode, rb_node); 1411 entry = rb_entry(node, struct btrfs_inode, rb_node);
1411 1412
1412 if (objectid < entry->vfs_inode.i_ino) 1413 if (objectid < btrfs_ino(&entry->vfs_inode))
1413 node = node->rb_left; 1414 node = node->rb_left;
1414 else if (objectid > entry->vfs_inode.i_ino) 1415 else if (objectid > btrfs_ino(&entry->vfs_inode))
1415 node = node->rb_right; 1416 node = node->rb_right;
1416 else 1417 else
1417 break; 1418 break;
@@ -1419,7 +1420,7 @@ again:
1419 if (!node) { 1420 if (!node) {
1420 while (prev) { 1421 while (prev) {
1421 entry = rb_entry(prev, struct btrfs_inode, rb_node); 1422 entry = rb_entry(prev, struct btrfs_inode, rb_node);
1422 if (objectid <= entry->vfs_inode.i_ino) { 1423 if (objectid <= btrfs_ino(&entry->vfs_inode)) {
1423 node = prev; 1424 node = prev;
1424 break; 1425 break;
1425 } 1426 }
@@ -1434,7 +1435,7 @@ again:
1434 return inode; 1435 return inode;
1435 } 1436 }
1436 1437
1437 objectid = entry->vfs_inode.i_ino + 1; 1438 objectid = btrfs_ino(&entry->vfs_inode) + 1;
1438 if (cond_resched_lock(&root->inode_lock)) 1439 if (cond_resched_lock(&root->inode_lock))
1439 goto again; 1440 goto again;
1440 1441
@@ -1470,7 +1471,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1470 return -ENOMEM; 1471 return -ENOMEM;
1471 1472
1472 bytenr -= BTRFS_I(reloc_inode)->index_cnt; 1473 bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1473 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 1474 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode),
1474 bytenr, 0); 1475 bytenr, 0);
1475 if (ret < 0) 1476 if (ret < 0)
1476 goto out; 1477 goto out;
@@ -1558,11 +1559,11 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1558 if (first) { 1559 if (first) {
1559 inode = find_next_inode(root, key.objectid); 1560 inode = find_next_inode(root, key.objectid);
1560 first = 0; 1561 first = 0;
1561 } else if (inode && inode->i_ino < key.objectid) { 1562 } else if (inode && btrfs_ino(inode) < key.objectid) {
1562 btrfs_add_delayed_iput(inode); 1563 btrfs_add_delayed_iput(inode);
1563 inode = find_next_inode(root, key.objectid); 1564 inode = find_next_inode(root, key.objectid);
1564 } 1565 }
1565 if (inode && inode->i_ino == key.objectid) { 1566 if (inode && btrfs_ino(inode) == key.objectid) {
1566 end = key.offset + 1567 end = key.offset +
1567 btrfs_file_extent_num_bytes(leaf, fi); 1568 btrfs_file_extent_num_bytes(leaf, fi);
1568 WARN_ON(!IS_ALIGNED(key.offset, 1569 WARN_ON(!IS_ALIGNED(key.offset,
@@ -1749,7 +1750,7 @@ again:
1749 1750
1750 btrfs_node_key_to_cpu(path->nodes[level], &key, 1751 btrfs_node_key_to_cpu(path->nodes[level], &key,
1751 path->slots[level]); 1752 path->slots[level]);
1752 btrfs_release_path(src, path); 1753 btrfs_release_path(path);
1753 1754
1754 path->lowest_level = level; 1755 path->lowest_level = level;
1755 ret = btrfs_search_slot(trans, src, &key, path, 0, 1); 1756 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
@@ -1893,6 +1894,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1893 struct inode *inode = NULL; 1894 struct inode *inode = NULL;
1894 u64 objectid; 1895 u64 objectid;
1895 u64 start, end; 1896 u64 start, end;
1897 u64 ino;
1896 1898
1897 objectid = min_key->objectid; 1899 objectid = min_key->objectid;
1898 while (1) { 1900 while (1) {
@@ -1905,17 +1907,18 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1905 inode = find_next_inode(root, objectid); 1907 inode = find_next_inode(root, objectid);
1906 if (!inode) 1908 if (!inode)
1907 break; 1909 break;
1910 ino = btrfs_ino(inode);
1908 1911
1909 if (inode->i_ino > max_key->objectid) { 1912 if (ino > max_key->objectid) {
1910 iput(inode); 1913 iput(inode);
1911 break; 1914 break;
1912 } 1915 }
1913 1916
1914 objectid = inode->i_ino + 1; 1917 objectid = ino + 1;
1915 if (!S_ISREG(inode->i_mode)) 1918 if (!S_ISREG(inode->i_mode))
1916 continue; 1919 continue;
1917 1920
1918 if (unlikely(min_key->objectid == inode->i_ino)) { 1921 if (unlikely(min_key->objectid == ino)) {
1919 if (min_key->type > BTRFS_EXTENT_DATA_KEY) 1922 if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1920 continue; 1923 continue;
1921 if (min_key->type < BTRFS_EXTENT_DATA_KEY) 1924 if (min_key->type < BTRFS_EXTENT_DATA_KEY)
@@ -1928,7 +1931,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1928 start = 0; 1931 start = 0;
1929 } 1932 }
1930 1933
1931 if (unlikely(max_key->objectid == inode->i_ino)) { 1934 if (unlikely(max_key->objectid == ino)) {
1932 if (max_key->type < BTRFS_EXTENT_DATA_KEY) 1935 if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1933 continue; 1936 continue;
1934 if (max_key->type > BTRFS_EXTENT_DATA_KEY) { 1937 if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
@@ -2496,7 +2499,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2496 path->locks[upper->level] = 0; 2499 path->locks[upper->level] = 0;
2497 2500
2498 slot = path->slots[upper->level]; 2501 slot = path->slots[upper->level];
2499 btrfs_release_path(NULL, path); 2502 btrfs_release_path(path);
2500 } else { 2503 } else {
2501 ret = btrfs_bin_search(upper->eb, key, upper->level, 2504 ret = btrfs_bin_search(upper->eb, key, upper->level,
2502 &slot); 2505 &slot);
@@ -2737,7 +2740,7 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
2737 } else { 2740 } else {
2738 path->lowest_level = node->level; 2741 path->lowest_level = node->level;
2739 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2742 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2740 btrfs_release_path(root, path); 2743 btrfs_release_path(path);
2741 if (ret > 0) 2744 if (ret > 0)
2742 ret = 0; 2745 ret = 0;
2743 } 2746 }
@@ -2870,7 +2873,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2870 struct extent_map *em; 2873 struct extent_map *em;
2871 int ret = 0; 2874 int ret = 0;
2872 2875
2873 em = alloc_extent_map(GFP_NOFS); 2876 em = alloc_extent_map();
2874 if (!em) 2877 if (!em)
2875 return -ENOMEM; 2878 return -ENOMEM;
2876 2879
@@ -3119,7 +3122,7 @@ static int add_tree_block(struct reloc_control *rc,
3119#endif 3122#endif
3120 } 3123 }
3121 3124
3122 btrfs_release_path(rc->extent_root, path); 3125 btrfs_release_path(path);
3123 3126
3124 BUG_ON(level == -1); 3127 BUG_ON(level == -1);
3125 3128
@@ -3220,7 +3223,7 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3220 key.offset = 0; 3223 key.offset = 0;
3221 3224
3222 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 3225 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3223 if (!inode || IS_ERR(inode) || is_bad_inode(inode)) { 3226 if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
3224 if (inode && !IS_ERR(inode)) 3227 if (inode && !IS_ERR(inode))
3225 iput(inode); 3228 iput(inode);
3226 return -ENOENT; 3229 return -ENOENT;
@@ -3505,7 +3508,7 @@ int add_data_references(struct reloc_control *rc,
3505 } 3508 }
3506 path->slots[0]++; 3509 path->slots[0]++;
3507 } 3510 }
3508 btrfs_release_path(rc->extent_root, path); 3511 btrfs_release_path(path);
3509 if (err) 3512 if (err)
3510 free_block_list(blocks); 3513 free_block_list(blocks);
3511 return err; 3514 return err;
@@ -3568,7 +3571,7 @@ next:
3568 EXTENT_DIRTY); 3571 EXTENT_DIRTY);
3569 3572
3570 if (ret == 0 && start <= key.objectid) { 3573 if (ret == 0 && start <= key.objectid) {
3571 btrfs_release_path(rc->extent_root, path); 3574 btrfs_release_path(path);
3572 rc->search_start = end + 1; 3575 rc->search_start = end + 1;
3573 } else { 3576 } else {
3574 rc->search_start = key.objectid + key.offset; 3577 rc->search_start = key.objectid + key.offset;
@@ -3576,7 +3579,7 @@ next:
3576 return 0; 3579 return 0;
3577 } 3580 }
3578 } 3581 }
3579 btrfs_release_path(rc->extent_root, path); 3582 btrfs_release_path(path);
3580 return ret; 3583 return ret;
3581} 3584}
3582 3585
@@ -3713,7 +3716,7 @@ restart:
3713 flags = BTRFS_EXTENT_FLAG_DATA; 3716 flags = BTRFS_EXTENT_FLAG_DATA;
3714 3717
3715 if (path_change) { 3718 if (path_change) {
3716 btrfs_release_path(rc->extent_root, path); 3719 btrfs_release_path(path);
3717 3720
3718 path->search_commit_root = 1; 3721 path->search_commit_root = 1;
3719 path->skip_locking = 1; 3722 path->skip_locking = 1;
@@ -3736,7 +3739,7 @@ restart:
3736 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3739 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3737 ret = add_data_references(rc, &key, path, &blocks); 3740 ret = add_data_references(rc, &key, path, &blocks);
3738 } else { 3741 } else {
3739 btrfs_release_path(rc->extent_root, path); 3742 btrfs_release_path(path);
3740 ret = 0; 3743 ret = 0;
3741 } 3744 }
3742 if (ret < 0) { 3745 if (ret < 0) {
@@ -3799,7 +3802,7 @@ restart:
3799 } 3802 }
3800 } 3803 }
3801 3804
3802 btrfs_release_path(rc->extent_root, path); 3805 btrfs_release_path(path);
3803 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3806 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
3804 GFP_NOFS); 3807 GFP_NOFS);
3805 3808
@@ -3867,7 +3870,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3867 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | 3870 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
3868 BTRFS_INODE_PREALLOC); 3871 BTRFS_INODE_PREALLOC);
3869 btrfs_mark_buffer_dirty(leaf); 3872 btrfs_mark_buffer_dirty(leaf);
3870 btrfs_release_path(root, path); 3873 btrfs_release_path(path);
3871out: 3874out:
3872 btrfs_free_path(path); 3875 btrfs_free_path(path);
3873 return ret; 3876 return ret;
@@ -3897,7 +3900,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3897 if (IS_ERR(trans)) 3900 if (IS_ERR(trans))
3898 return ERR_CAST(trans); 3901 return ERR_CAST(trans);
3899 3902
3900 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 3903 err = btrfs_find_free_objectid(root, &objectid);
3901 if (err) 3904 if (err)
3902 goto out; 3905 goto out;
3903 3906
@@ -3935,7 +3938,7 @@ static struct reloc_control *alloc_reloc_control(void)
3935 INIT_LIST_HEAD(&rc->reloc_roots); 3938 INIT_LIST_HEAD(&rc->reloc_roots);
3936 backref_cache_init(&rc->backref_cache); 3939 backref_cache_init(&rc->backref_cache);
3937 mapping_tree_init(&rc->reloc_root_tree); 3940 mapping_tree_init(&rc->reloc_root_tree);
3938 extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); 3941 extent_io_tree_init(&rc->processed_blocks, NULL);
3939 return rc; 3942 return rc;
3940} 3943}
3941 3944
@@ -4109,7 +4112,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4109 } 4112 }
4110 leaf = path->nodes[0]; 4113 leaf = path->nodes[0];
4111 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 4114 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4112 btrfs_release_path(root->fs_info->tree_root, path); 4115 btrfs_release_path(path);
4113 4116
4114 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || 4117 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
4115 key.type != BTRFS_ROOT_ITEM_KEY) 4118 key.type != BTRFS_ROOT_ITEM_KEY)
@@ -4141,7 +4144,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4141 4144
4142 key.offset--; 4145 key.offset--;
4143 } 4146 }
4144 btrfs_release_path(root->fs_info->tree_root, path); 4147 btrfs_release_path(path);
4145 4148
4146 if (list_empty(&reloc_roots)) 4149 if (list_empty(&reloc_roots))
4147 goto out; 4150 goto out;
@@ -4242,7 +4245,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4242 4245
4243 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 4246 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
4244 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 4247 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
4245 disk_bytenr + len - 1, &list); 4248 disk_bytenr + len - 1, &list, 0);
4246 4249
4247 while (!list_empty(&list)) { 4250 while (!list_empty(&list)) {
4248 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 4251 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 2cf5f5142159..ebe45443de06 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -22,53 +22,6 @@
22#include "print-tree.h" 22#include "print-tree.h"
23 23
24/* 24/*
25 * search forward for a root, starting with objectid 'search_start'
26 * if a root key is found, the objectid we find is filled into 'found_objectid'
27 * and 0 is returned. < 0 is returned on error, 1 if there is nothing
28 * left in the tree.
29 */
30int btrfs_search_root(struct btrfs_root *root, u64 search_start,
31 u64 *found_objectid)
32{
33 struct btrfs_path *path;
34 struct btrfs_key search_key;
35 int ret;
36
37 root = root->fs_info->tree_root;
38 search_key.objectid = search_start;
39 search_key.type = (u8)-1;
40 search_key.offset = (u64)-1;
41
42 path = btrfs_alloc_path();
43 BUG_ON(!path);
44again:
45 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
46 if (ret < 0)
47 goto out;
48 if (ret == 0) {
49 ret = 1;
50 goto out;
51 }
52 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
53 ret = btrfs_next_leaf(root, path);
54 if (ret)
55 goto out;
56 }
57 btrfs_item_key_to_cpu(path->nodes[0], &search_key, path->slots[0]);
58 if (search_key.type != BTRFS_ROOT_ITEM_KEY) {
59 search_key.offset++;
60 btrfs_release_path(root, path);
61 goto again;
62 }
63 ret = 0;
64 *found_objectid = search_key.objectid;
65
66out:
67 btrfs_free_path(path);
68 return ret;
69}
70
71/*
72 * lookup the root with the highest offset for a given objectid. The key we do 25 * lookup the root with the highest offset for a given objectid. The key we do
73 * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 26 * find is copied into 'key'. If we find something return 0, otherwise 1, < 0
74 * on error. 27 * on error.
@@ -230,7 +183,7 @@ again:
230 183
231 memcpy(&found_key, &key, sizeof(key)); 184 memcpy(&found_key, &key, sizeof(key));
232 key.offset++; 185 key.offset++;
233 btrfs_release_path(root, path); 186 btrfs_release_path(path);
234 dead_root = 187 dead_root =
235 btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 188 btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
236 &found_key); 189 &found_key);
@@ -292,7 +245,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
292 } 245 }
293 246
294 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 247 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
295 btrfs_release_path(tree_root, path); 248 btrfs_release_path(path);
296 249
297 if (key.objectid != BTRFS_ORPHAN_OBJECTID || 250 if (key.objectid != BTRFS_ORPHAN_OBJECTID ||
298 key.type != BTRFS_ORPHAN_ITEM_KEY) 251 key.type != BTRFS_ORPHAN_ITEM_KEY)
@@ -393,7 +346,7 @@ again:
393 err = -ENOENT; 346 err = -ENOENT;
394 347
395 if (key.type == BTRFS_ROOT_BACKREF_KEY) { 348 if (key.type == BTRFS_ROOT_BACKREF_KEY) {
396 btrfs_release_path(tree_root, path); 349 btrfs_release_path(path);
397 key.objectid = ref_id; 350 key.objectid = ref_id;
398 key.type = BTRFS_ROOT_REF_KEY; 351 key.type = BTRFS_ROOT_REF_KEY;
399 key.offset = root_id; 352 key.offset = root_id;
@@ -467,7 +420,7 @@ again:
467 btrfs_mark_buffer_dirty(leaf); 420 btrfs_mark_buffer_dirty(leaf);
468 421
469 if (key.type == BTRFS_ROOT_BACKREF_KEY) { 422 if (key.type == BTRFS_ROOT_BACKREF_KEY) {
470 btrfs_release_path(tree_root, path); 423 btrfs_release_path(path);
471 key.objectid = ref_id; 424 key.objectid = ref_id;
472 key.type = BTRFS_ROOT_REF_KEY; 425 key.type = BTRFS_ROOT_REF_KEY;
473 key.offset = root_id; 426 key.offset = root_id;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
new file mode 100644
index 000000000000..87a2f1273136
--- /dev/null
+++ b/fs/btrfs/scrub.c
@@ -0,0 +1,1368 @@
1/*
2 * Copyright (C) 2011 STRATO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include <linux/slab.h>
25#include <linux/workqueue.h>
26#include "ctree.h"
27#include "volumes.h"
28#include "disk-io.h"
29#include "ordered-data.h"
30
31/*
32 * This is only the first step towards a full-features scrub. It reads all
33 * extent and super block and verifies the checksums. In case a bad checksum
34 * is found or the extent cannot be read, good data will be written back if
35 * any can be found.
36 *
37 * Future enhancements:
38 * - To enhance the performance, better read-ahead strategies for the
39 * extent-tree can be employed.
40 * - In case an unrepairable extent is encountered, track which files are
41 * affected and report them
42 * - In case of a read error on files with nodatasum, map the file and read
43 * the extent to trigger a writeback of the good copy
44 * - track and record media errors, throw out bad devices
45 * - add a mode to also read unallocated space
46 * - make the prefetch cancellable
47 */
48
49struct scrub_bio;
50struct scrub_page;
51struct scrub_dev;
52static void scrub_bio_end_io(struct bio *bio, int err);
53static void scrub_checksum(struct btrfs_work *work);
54static int scrub_checksum_data(struct scrub_dev *sdev,
55 struct scrub_page *spag, void *buffer);
56static int scrub_checksum_tree_block(struct scrub_dev *sdev,
57 struct scrub_page *spag, u64 logical,
58 void *buffer);
59static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
60static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
61static void scrub_fixup_end_io(struct bio *bio, int err);
62static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
63 struct page *page);
64static void scrub_fixup(struct scrub_bio *sbio, int ix);
65
66#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
67#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */
68
69struct scrub_page {
70 u64 flags; /* extent flags */
71 u64 generation;
72 u64 mirror_num;
73 int have_csum;
74 u8 csum[BTRFS_CSUM_SIZE];
75};
76
77struct scrub_bio {
78 int index;
79 struct scrub_dev *sdev;
80 struct bio *bio;
81 int err;
82 u64 logical;
83 u64 physical;
84 struct scrub_page spag[SCRUB_PAGES_PER_BIO];
85 u64 count;
86 int next_free;
87 struct btrfs_work work;
88};
89
90struct scrub_dev {
91 struct scrub_bio *bios[SCRUB_BIOS_PER_DEV];
92 struct btrfs_device *dev;
93 int first_free;
94 int curr;
95 atomic_t in_flight;
96 spinlock_t list_lock;
97 wait_queue_head_t list_wait;
98 u16 csum_size;
99 struct list_head csum_list;
100 atomic_t cancel_req;
101 int readonly;
102 /*
103 * statistics
104 */
105 struct btrfs_scrub_progress stat;
106 spinlock_t stat_lock;
107};
108
109static void scrub_free_csums(struct scrub_dev *sdev)
110{
111 while (!list_empty(&sdev->csum_list)) {
112 struct btrfs_ordered_sum *sum;
113 sum = list_first_entry(&sdev->csum_list,
114 struct btrfs_ordered_sum, list);
115 list_del(&sum->list);
116 kfree(sum);
117 }
118}
119
120static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
121{
122 int i;
123 int j;
124 struct page *last_page;
125
126 if (!sdev)
127 return;
128
129 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
130 struct scrub_bio *sbio = sdev->bios[i];
131 struct bio *bio;
132
133 if (!sbio)
134 break;
135
136 bio = sbio->bio;
137 if (bio) {
138 last_page = NULL;
139 for (j = 0; j < bio->bi_vcnt; ++j) {
140 if (bio->bi_io_vec[j].bv_page == last_page)
141 continue;
142 last_page = bio->bi_io_vec[j].bv_page;
143 __free_page(last_page);
144 }
145 bio_put(bio);
146 }
147 kfree(sbio);
148 }
149
150 scrub_free_csums(sdev);
151 kfree(sdev);
152}
153
154static noinline_for_stack
155struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
156{
157 struct scrub_dev *sdev;
158 int i;
159 int j;
160 int ret;
161 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
162
163 sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
164 if (!sdev)
165 goto nomem;
166 sdev->dev = dev;
167 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
168 struct bio *bio;
169 struct scrub_bio *sbio;
170
171 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
172 if (!sbio)
173 goto nomem;
174 sdev->bios[i] = sbio;
175
176 bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
177 if (!bio)
178 goto nomem;
179
180 sbio->index = i;
181 sbio->sdev = sdev;
182 sbio->bio = bio;
183 sbio->count = 0;
184 sbio->work.func = scrub_checksum;
185 bio->bi_private = sdev->bios[i];
186 bio->bi_end_io = scrub_bio_end_io;
187 bio->bi_sector = 0;
188 bio->bi_bdev = dev->bdev;
189 bio->bi_size = 0;
190
191 for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) {
192 struct page *page;
193 page = alloc_page(GFP_NOFS);
194 if (!page)
195 goto nomem;
196
197 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
198 if (!ret)
199 goto nomem;
200 }
201 WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO);
202
203 if (i != SCRUB_BIOS_PER_DEV-1)
204 sdev->bios[i]->next_free = i + 1;
205 else
206 sdev->bios[i]->next_free = -1;
207 }
208 sdev->first_free = 0;
209 sdev->curr = -1;
210 atomic_set(&sdev->in_flight, 0);
211 atomic_set(&sdev->cancel_req, 0);
212 sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy);
213 INIT_LIST_HEAD(&sdev->csum_list);
214
215 spin_lock_init(&sdev->list_lock);
216 spin_lock_init(&sdev->stat_lock);
217 init_waitqueue_head(&sdev->list_wait);
218 return sdev;
219
220nomem:
221 scrub_free_dev(sdev);
222 return ERR_PTR(-ENOMEM);
223}
224
225/*
226 * scrub_recheck_error gets called when either verification of the page
227 * failed or the bio failed to read, e.g. with EIO. In the latter case,
228 * recheck_error gets called for every page in the bio, even though only
229 * one may be bad
230 */
231static void scrub_recheck_error(struct scrub_bio *sbio, int ix)
232{
233 if (sbio->err) {
234 if (scrub_fixup_io(READ, sbio->sdev->dev->bdev,
235 (sbio->physical + ix * PAGE_SIZE) >> 9,
236 sbio->bio->bi_io_vec[ix].bv_page) == 0) {
237 if (scrub_fixup_check(sbio, ix) == 0)
238 return;
239 }
240 }
241
242 scrub_fixup(sbio, ix);
243}
244
245static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
246{
247 int ret = 1;
248 struct page *page;
249 void *buffer;
250 u64 flags = sbio->spag[ix].flags;
251
252 page = sbio->bio->bi_io_vec[ix].bv_page;
253 buffer = kmap_atomic(page, KM_USER0);
254 if (flags & BTRFS_EXTENT_FLAG_DATA) {
255 ret = scrub_checksum_data(sbio->sdev,
256 sbio->spag + ix, buffer);
257 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
258 ret = scrub_checksum_tree_block(sbio->sdev,
259 sbio->spag + ix,
260 sbio->logical + ix * PAGE_SIZE,
261 buffer);
262 } else {
263 WARN_ON(1);
264 }
265 kunmap_atomic(buffer, KM_USER0);
266
267 return ret;
268}
269
270static void scrub_fixup_end_io(struct bio *bio, int err)
271{
272 complete((struct completion *)bio->bi_private);
273}
274
275static void scrub_fixup(struct scrub_bio *sbio, int ix)
276{
277 struct scrub_dev *sdev = sbio->sdev;
278 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
279 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
280 struct btrfs_multi_bio *multi = NULL;
281 u64 logical = sbio->logical + ix * PAGE_SIZE;
282 u64 length;
283 int i;
284 int ret;
285 DECLARE_COMPLETION_ONSTACK(complete);
286
287 if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
288 (sbio->spag[ix].have_csum == 0)) {
289 /*
290 * nodatasum, don't try to fix anything
291 * FIXME: we can do better, open the inode and trigger a
292 * writeback
293 */
294 goto uncorrectable;
295 }
296
297 length = PAGE_SIZE;
298 ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length,
299 &multi, 0);
300 if (ret || !multi || length < PAGE_SIZE) {
301 printk(KERN_ERR
302 "scrub_fixup: btrfs_map_block failed us for %llu\n",
303 (unsigned long long)logical);
304 WARN_ON(1);
305 return;
306 }
307
308 if (multi->num_stripes == 1)
309 /* there aren't any replicas */
310 goto uncorrectable;
311
312 /*
313 * first find a good copy
314 */
315 for (i = 0; i < multi->num_stripes; ++i) {
316 if (i == sbio->spag[ix].mirror_num)
317 continue;
318
319 if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev,
320 multi->stripes[i].physical >> 9,
321 sbio->bio->bi_io_vec[ix].bv_page)) {
322 /* I/O-error, this is not a good copy */
323 continue;
324 }
325
326 if (scrub_fixup_check(sbio, ix) == 0)
327 break;
328 }
329 if (i == multi->num_stripes)
330 goto uncorrectable;
331
332 if (!sdev->readonly) {
333 /*
334 * bi_io_vec[ix].bv_page now contains good data, write it back
335 */
336 if (scrub_fixup_io(WRITE, sdev->dev->bdev,
337 (sbio->physical + ix * PAGE_SIZE) >> 9,
338 sbio->bio->bi_io_vec[ix].bv_page)) {
339 /* I/O-error, writeback failed, give up */
340 goto uncorrectable;
341 }
342 }
343
344 kfree(multi);
345 spin_lock(&sdev->stat_lock);
346 ++sdev->stat.corrected_errors;
347 spin_unlock(&sdev->stat_lock);
348
349 if (printk_ratelimit())
350 printk(KERN_ERR "btrfs: fixed up at %llu\n",
351 (unsigned long long)logical);
352 return;
353
354uncorrectable:
355 kfree(multi);
356 spin_lock(&sdev->stat_lock);
357 ++sdev->stat.uncorrectable_errors;
358 spin_unlock(&sdev->stat_lock);
359
360 if (printk_ratelimit())
361 printk(KERN_ERR "btrfs: unable to fixup at %llu\n",
362 (unsigned long long)logical);
363}
364
365static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
366 struct page *page)
367{
368 struct bio *bio = NULL;
369 int ret;
370 DECLARE_COMPLETION_ONSTACK(complete);
371
372 /* we are going to wait on this IO */
373 rw |= REQ_SYNC;
374
375 bio = bio_alloc(GFP_NOFS, 1);
376 bio->bi_bdev = bdev;
377 bio->bi_sector = sector;
378 bio_add_page(bio, page, PAGE_SIZE, 0);
379 bio->bi_end_io = scrub_fixup_end_io;
380 bio->bi_private = &complete;
381 submit_bio(rw, bio);
382
383 wait_for_completion(&complete);
384
385 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
386 bio_put(bio);
387 return ret;
388}
389
390static void scrub_bio_end_io(struct bio *bio, int err)
391{
392 struct scrub_bio *sbio = bio->bi_private;
393 struct scrub_dev *sdev = sbio->sdev;
394 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
395
396 sbio->err = err;
397
398 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
399}
400
401static void scrub_checksum(struct btrfs_work *work)
402{
403 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
404 struct scrub_dev *sdev = sbio->sdev;
405 struct page *page;
406 void *buffer;
407 int i;
408 u64 flags;
409 u64 logical;
410 int ret;
411
412 if (sbio->err) {
413 for (i = 0; i < sbio->count; ++i)
414 scrub_recheck_error(sbio, i);
415
416 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
417 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
418 sbio->bio->bi_phys_segments = 0;
419 sbio->bio->bi_idx = 0;
420
421 for (i = 0; i < sbio->count; i++) {
422 struct bio_vec *bi;
423 bi = &sbio->bio->bi_io_vec[i];
424 bi->bv_offset = 0;
425 bi->bv_len = PAGE_SIZE;
426 }
427
428 spin_lock(&sdev->stat_lock);
429 ++sdev->stat.read_errors;
430 spin_unlock(&sdev->stat_lock);
431 goto out;
432 }
433 for (i = 0; i < sbio->count; ++i) {
434 page = sbio->bio->bi_io_vec[i].bv_page;
435 buffer = kmap_atomic(page, KM_USER0);
436 flags = sbio->spag[i].flags;
437 logical = sbio->logical + i * PAGE_SIZE;
438 ret = 0;
439 if (flags & BTRFS_EXTENT_FLAG_DATA) {
440 ret = scrub_checksum_data(sdev, sbio->spag + i, buffer);
441 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
442 ret = scrub_checksum_tree_block(sdev, sbio->spag + i,
443 logical, buffer);
444 } else if (flags & BTRFS_EXTENT_FLAG_SUPER) {
445 BUG_ON(i);
446 (void)scrub_checksum_super(sbio, buffer);
447 } else {
448 WARN_ON(1);
449 }
450 kunmap_atomic(buffer, KM_USER0);
451 if (ret)
452 scrub_recheck_error(sbio, i);
453 }
454
455out:
456 spin_lock(&sdev->list_lock);
457 sbio->next_free = sdev->first_free;
458 sdev->first_free = sbio->index;
459 spin_unlock(&sdev->list_lock);
460 atomic_dec(&sdev->in_flight);
461 wake_up(&sdev->list_wait);
462}
463
464static int scrub_checksum_data(struct scrub_dev *sdev,
465 struct scrub_page *spag, void *buffer)
466{
467 u8 csum[BTRFS_CSUM_SIZE];
468 u32 crc = ~(u32)0;
469 int fail = 0;
470 struct btrfs_root *root = sdev->dev->dev_root;
471
472 if (!spag->have_csum)
473 return 0;
474
475 crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE);
476 btrfs_csum_final(crc, csum);
477 if (memcmp(csum, spag->csum, sdev->csum_size))
478 fail = 1;
479
480 spin_lock(&sdev->stat_lock);
481 ++sdev->stat.data_extents_scrubbed;
482 sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
483 if (fail)
484 ++sdev->stat.csum_errors;
485 spin_unlock(&sdev->stat_lock);
486
487 return fail;
488}
489
490static int scrub_checksum_tree_block(struct scrub_dev *sdev,
491 struct scrub_page *spag, u64 logical,
492 void *buffer)
493{
494 struct btrfs_header *h;
495 struct btrfs_root *root = sdev->dev->dev_root;
496 struct btrfs_fs_info *fs_info = root->fs_info;
497 u8 csum[BTRFS_CSUM_SIZE];
498 u32 crc = ~(u32)0;
499 int fail = 0;
500 int crc_fail = 0;
501
502 /*
503 * we don't use the getter functions here, as we
504 * a) don't have an extent buffer and
505 * b) the page is already kmapped
506 */
507 h = (struct btrfs_header *)buffer;
508
509 if (logical != le64_to_cpu(h->bytenr))
510 ++fail;
511
512 if (spag->generation != le64_to_cpu(h->generation))
513 ++fail;
514
515 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
516 ++fail;
517
518 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
519 BTRFS_UUID_SIZE))
520 ++fail;
521
522 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
523 PAGE_SIZE - BTRFS_CSUM_SIZE);
524 btrfs_csum_final(crc, csum);
525 if (memcmp(csum, h->csum, sdev->csum_size))
526 ++crc_fail;
527
528 spin_lock(&sdev->stat_lock);
529 ++sdev->stat.tree_extents_scrubbed;
530 sdev->stat.tree_bytes_scrubbed += PAGE_SIZE;
531 if (crc_fail)
532 ++sdev->stat.csum_errors;
533 if (fail)
534 ++sdev->stat.verify_errors;
535 spin_unlock(&sdev->stat_lock);
536
537 return fail || crc_fail;
538}
539
540static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
541{
542 struct btrfs_super_block *s;
543 u64 logical;
544 struct scrub_dev *sdev = sbio->sdev;
545 struct btrfs_root *root = sdev->dev->dev_root;
546 struct btrfs_fs_info *fs_info = root->fs_info;
547 u8 csum[BTRFS_CSUM_SIZE];
548 u32 crc = ~(u32)0;
549 int fail = 0;
550
551 s = (struct btrfs_super_block *)buffer;
552 logical = sbio->logical;
553
554 if (logical != le64_to_cpu(s->bytenr))
555 ++fail;
556
557 if (sbio->spag[0].generation != le64_to_cpu(s->generation))
558 ++fail;
559
560 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
561 ++fail;
562
563 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
564 PAGE_SIZE - BTRFS_CSUM_SIZE);
565 btrfs_csum_final(crc, csum);
566 if (memcmp(csum, s->csum, sbio->sdev->csum_size))
567 ++fail;
568
569 if (fail) {
570 /*
571 * if we find an error in a super block, we just report it.
572 * They will get written with the next transaction commit
573 * anyway
574 */
575 spin_lock(&sdev->stat_lock);
576 ++sdev->stat.super_errors;
577 spin_unlock(&sdev->stat_lock);
578 }
579
580 return fail;
581}
582
583static int scrub_submit(struct scrub_dev *sdev)
584{
585 struct scrub_bio *sbio;
586
587 if (sdev->curr == -1)
588 return 0;
589
590 sbio = sdev->bios[sdev->curr];
591
592 sbio->bio->bi_sector = sbio->physical >> 9;
593 sbio->bio->bi_size = sbio->count * PAGE_SIZE;
594 sbio->bio->bi_next = NULL;
595 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
596 sbio->bio->bi_comp_cpu = -1;
597 sbio->bio->bi_bdev = sdev->dev->bdev;
598 sbio->err = 0;
599 sdev->curr = -1;
600 atomic_inc(&sdev->in_flight);
601
602 submit_bio(0, sbio->bio);
603
604 return 0;
605}
606
607static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
608 u64 physical, u64 flags, u64 gen, u64 mirror_num,
609 u8 *csum, int force)
610{
611 struct scrub_bio *sbio;
612
613again:
614 /*
615 * grab a fresh bio or wait for one to become available
616 */
617 while (sdev->curr == -1) {
618 spin_lock(&sdev->list_lock);
619 sdev->curr = sdev->first_free;
620 if (sdev->curr != -1) {
621 sdev->first_free = sdev->bios[sdev->curr]->next_free;
622 sdev->bios[sdev->curr]->next_free = -1;
623 sdev->bios[sdev->curr]->count = 0;
624 spin_unlock(&sdev->list_lock);
625 } else {
626 spin_unlock(&sdev->list_lock);
627 wait_event(sdev->list_wait, sdev->first_free != -1);
628 }
629 }
630 sbio = sdev->bios[sdev->curr];
631 if (sbio->count == 0) {
632 sbio->physical = physical;
633 sbio->logical = logical;
634 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical) {
635 scrub_submit(sdev);
636 goto again;
637 }
638 sbio->spag[sbio->count].flags = flags;
639 sbio->spag[sbio->count].generation = gen;
640 sbio->spag[sbio->count].have_csum = 0;
641 sbio->spag[sbio->count].mirror_num = mirror_num;
642 if (csum) {
643 sbio->spag[sbio->count].have_csum = 1;
644 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
645 }
646 ++sbio->count;
647 if (sbio->count == SCRUB_PAGES_PER_BIO || force)
648 scrub_submit(sdev);
649
650 return 0;
651}
652
653static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
654 u8 *csum)
655{
656 struct btrfs_ordered_sum *sum = NULL;
657 int ret = 0;
658 unsigned long i;
659 unsigned long num_sectors;
660 u32 sectorsize = sdev->dev->dev_root->sectorsize;
661
662 while (!list_empty(&sdev->csum_list)) {
663 sum = list_first_entry(&sdev->csum_list,
664 struct btrfs_ordered_sum, list);
665 if (sum->bytenr > logical)
666 return 0;
667 if (sum->bytenr + sum->len > logical)
668 break;
669
670 ++sdev->stat.csum_discards;
671 list_del(&sum->list);
672 kfree(sum);
673 sum = NULL;
674 }
675 if (!sum)
676 return 0;
677
678 num_sectors = sum->len / sectorsize;
679 for (i = 0; i < num_sectors; ++i) {
680 if (sum->sums[i].bytenr == logical) {
681 memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
682 ret = 1;
683 break;
684 }
685 }
686 if (ret && i == num_sectors - 1) {
687 list_del(&sum->list);
688 kfree(sum);
689 }
690 return ret;
691}
692
693/* scrub extent tries to collect up to 64 kB for each bio */
694static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
695 u64 physical, u64 flags, u64 gen, u64 mirror_num)
696{
697 int ret;
698 u8 csum[BTRFS_CSUM_SIZE];
699
700 while (len) {
701 u64 l = min_t(u64, len, PAGE_SIZE);
702 int have_csum = 0;
703
704 if (flags & BTRFS_EXTENT_FLAG_DATA) {
705 /* push csums to sbio */
706 have_csum = scrub_find_csum(sdev, logical, l, csum);
707 if (have_csum == 0)
708 ++sdev->stat.no_csum;
709 }
710 ret = scrub_page(sdev, logical, l, physical, flags, gen,
711 mirror_num, have_csum ? csum : NULL, 0);
712 if (ret)
713 return ret;
714 len -= l;
715 logical += l;
716 physical += l;
717 }
718 return 0;
719}
720
721static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
722 struct map_lookup *map, int num, u64 base, u64 length)
723{
724 struct btrfs_path *path;
725 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
726 struct btrfs_root *root = fs_info->extent_root;
727 struct btrfs_root *csum_root = fs_info->csum_root;
728 struct btrfs_extent_item *extent;
729 u64 flags;
730 int ret;
731 int slot;
732 int i;
733 u64 nstripes;
734 int start_stripe;
735 struct extent_buffer *l;
736 struct btrfs_key key;
737 u64 physical;
738 u64 logical;
739 u64 generation;
740 u64 mirror_num;
741
742 u64 increment = map->stripe_len;
743 u64 offset;
744
745 nstripes = length;
746 offset = 0;
747 do_div(nstripes, map->stripe_len);
748 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
749 offset = map->stripe_len * num;
750 increment = map->stripe_len * map->num_stripes;
751 mirror_num = 0;
752 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
753 int factor = map->num_stripes / map->sub_stripes;
754 offset = map->stripe_len * (num / map->sub_stripes);
755 increment = map->stripe_len * factor;
756 mirror_num = num % map->sub_stripes;
757 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
758 increment = map->stripe_len;
759 mirror_num = num % map->num_stripes;
760 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
761 increment = map->stripe_len;
762 mirror_num = num % map->num_stripes;
763 } else {
764 increment = map->stripe_len;
765 mirror_num = 0;
766 }
767
768 path = btrfs_alloc_path();
769 if (!path)
770 return -ENOMEM;
771
772 path->reada = 2;
773 path->search_commit_root = 1;
774 path->skip_locking = 1;
775
776 /*
777 * find all extents for each stripe and just read them to get
778 * them into the page cache
779 * FIXME: we can do better. build a more intelligent prefetching
780 */
781 logical = base + offset;
782 physical = map->stripes[num].physical;
783 ret = 0;
784 for (i = 0; i < nstripes; ++i) {
785 key.objectid = logical;
786 key.type = BTRFS_EXTENT_ITEM_KEY;
787 key.offset = (u64)0;
788
789 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
790 if (ret < 0)
791 goto out;
792
793 l = path->nodes[0];
794 slot = path->slots[0];
795 btrfs_item_key_to_cpu(l, &key, slot);
796 if (key.objectid != logical) {
797 ret = btrfs_previous_item(root, path, 0,
798 BTRFS_EXTENT_ITEM_KEY);
799 if (ret < 0)
800 goto out;
801 }
802
803 while (1) {
804 l = path->nodes[0];
805 slot = path->slots[0];
806 if (slot >= btrfs_header_nritems(l)) {
807 ret = btrfs_next_leaf(root, path);
808 if (ret == 0)
809 continue;
810 if (ret < 0)
811 goto out;
812
813 break;
814 }
815 btrfs_item_key_to_cpu(l, &key, slot);
816
817 if (key.objectid >= logical + map->stripe_len)
818 break;
819
820 path->slots[0]++;
821 }
822 btrfs_release_path(path);
823 logical += increment;
824 physical += map->stripe_len;
825 cond_resched();
826 }
827
828 /*
829 * collect all data csums for the stripe to avoid seeking during
830 * the scrub. This might currently (crc32) end up to be about 1MB
831 */
832 start_stripe = 0;
833again:
834 logical = base + offset + start_stripe * increment;
835 for (i = start_stripe; i < nstripes; ++i) {
836 ret = btrfs_lookup_csums_range(csum_root, logical,
837 logical + map->stripe_len - 1,
838 &sdev->csum_list, 1);
839 if (ret)
840 goto out;
841
842 logical += increment;
843 cond_resched();
844 }
845 /*
846 * now find all extents for each stripe and scrub them
847 */
848 logical = base + offset + start_stripe * increment;
849 physical = map->stripes[num].physical + start_stripe * map->stripe_len;
850 ret = 0;
851 for (i = start_stripe; i < nstripes; ++i) {
852 /*
853 * canceled?
854 */
855 if (atomic_read(&fs_info->scrub_cancel_req) ||
856 atomic_read(&sdev->cancel_req)) {
857 ret = -ECANCELED;
858 goto out;
859 }
860 /*
861 * check to see if we have to pause
862 */
863 if (atomic_read(&fs_info->scrub_pause_req)) {
864 /* push queued extents */
865 scrub_submit(sdev);
866 wait_event(sdev->list_wait,
867 atomic_read(&sdev->in_flight) == 0);
868 atomic_inc(&fs_info->scrubs_paused);
869 wake_up(&fs_info->scrub_pause_wait);
870 mutex_lock(&fs_info->scrub_lock);
871 while (atomic_read(&fs_info->scrub_pause_req)) {
872 mutex_unlock(&fs_info->scrub_lock);
873 wait_event(fs_info->scrub_pause_wait,
874 atomic_read(&fs_info->scrub_pause_req) == 0);
875 mutex_lock(&fs_info->scrub_lock);
876 }
877 atomic_dec(&fs_info->scrubs_paused);
878 mutex_unlock(&fs_info->scrub_lock);
879 wake_up(&fs_info->scrub_pause_wait);
880 scrub_free_csums(sdev);
881 start_stripe = i;
882 goto again;
883 }
884
885 key.objectid = logical;
886 key.type = BTRFS_EXTENT_ITEM_KEY;
887 key.offset = (u64)0;
888
889 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
890 if (ret < 0)
891 goto out;
892
893 l = path->nodes[0];
894 slot = path->slots[0];
895 btrfs_item_key_to_cpu(l, &key, slot);
896 if (key.objectid != logical) {
897 ret = btrfs_previous_item(root, path, 0,
898 BTRFS_EXTENT_ITEM_KEY);
899 if (ret < 0)
900 goto out;
901 }
902
903 while (1) {
904 l = path->nodes[0];
905 slot = path->slots[0];
906 if (slot >= btrfs_header_nritems(l)) {
907 ret = btrfs_next_leaf(root, path);
908 if (ret == 0)
909 continue;
910 if (ret < 0)
911 goto out;
912
913 break;
914 }
915 btrfs_item_key_to_cpu(l, &key, slot);
916
917 if (key.objectid + key.offset <= logical)
918 goto next;
919
920 if (key.objectid >= logical + map->stripe_len)
921 break;
922
923 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
924 goto next;
925
926 extent = btrfs_item_ptr(l, slot,
927 struct btrfs_extent_item);
928 flags = btrfs_extent_flags(l, extent);
929 generation = btrfs_extent_generation(l, extent);
930
931 if (key.objectid < logical &&
932 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
933 printk(KERN_ERR
934 "btrfs scrub: tree block %llu spanning "
935 "stripes, ignored. logical=%llu\n",
936 (unsigned long long)key.objectid,
937 (unsigned long long)logical);
938 goto next;
939 }
940
941 /*
942 * trim extent to this stripe
943 */
944 if (key.objectid < logical) {
945 key.offset -= logical - key.objectid;
946 key.objectid = logical;
947 }
948 if (key.objectid + key.offset >
949 logical + map->stripe_len) {
950 key.offset = logical + map->stripe_len -
951 key.objectid;
952 }
953
954 ret = scrub_extent(sdev, key.objectid, key.offset,
955 key.objectid - logical + physical,
956 flags, generation, mirror_num);
957 if (ret)
958 goto out;
959
960next:
961 path->slots[0]++;
962 }
963 btrfs_release_path(path);
964 logical += increment;
965 physical += map->stripe_len;
966 spin_lock(&sdev->stat_lock);
967 sdev->stat.last_physical = physical;
968 spin_unlock(&sdev->stat_lock);
969 }
970 /* push queued extents */
971 scrub_submit(sdev);
972
973out:
974 btrfs_free_path(path);
975 return ret < 0 ? ret : 0;
976}
977
978static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
979 u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
980{
981 struct btrfs_mapping_tree *map_tree =
982 &sdev->dev->dev_root->fs_info->mapping_tree;
983 struct map_lookup *map;
984 struct extent_map *em;
985 int i;
986 int ret = -EINVAL;
987
988 read_lock(&map_tree->map_tree.lock);
989 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
990 read_unlock(&map_tree->map_tree.lock);
991
992 if (!em)
993 return -EINVAL;
994
995 map = (struct map_lookup *)em->bdev;
996 if (em->start != chunk_offset)
997 goto out;
998
999 if (em->len < length)
1000 goto out;
1001
1002 for (i = 0; i < map->num_stripes; ++i) {
1003 if (map->stripes[i].dev == sdev->dev) {
1004 ret = scrub_stripe(sdev, map, i, chunk_offset, length);
1005 if (ret)
1006 goto out;
1007 }
1008 }
1009out:
1010 free_extent_map(em);
1011
1012 return ret;
1013}
1014
1015static noinline_for_stack
1016int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1017{
1018 struct btrfs_dev_extent *dev_extent = NULL;
1019 struct btrfs_path *path;
1020 struct btrfs_root *root = sdev->dev->dev_root;
1021 struct btrfs_fs_info *fs_info = root->fs_info;
1022 u64 length;
1023 u64 chunk_tree;
1024 u64 chunk_objectid;
1025 u64 chunk_offset;
1026 int ret;
1027 int slot;
1028 struct extent_buffer *l;
1029 struct btrfs_key key;
1030 struct btrfs_key found_key;
1031 struct btrfs_block_group_cache *cache;
1032
1033 path = btrfs_alloc_path();
1034 if (!path)
1035 return -ENOMEM;
1036
1037 path->reada = 2;
1038 path->search_commit_root = 1;
1039 path->skip_locking = 1;
1040
1041 key.objectid = sdev->dev->devid;
1042 key.offset = 0ull;
1043 key.type = BTRFS_DEV_EXTENT_KEY;
1044
1045
1046 while (1) {
1047 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1048 if (ret < 0)
1049 goto out;
1050 ret = 0;
1051
1052 l = path->nodes[0];
1053 slot = path->slots[0];
1054
1055 btrfs_item_key_to_cpu(l, &found_key, slot);
1056
1057 if (found_key.objectid != sdev->dev->devid)
1058 break;
1059
1060 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
1061 break;
1062
1063 if (found_key.offset >= end)
1064 break;
1065
1066 if (found_key.offset < key.offset)
1067 break;
1068
1069 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1070 length = btrfs_dev_extent_length(l, dev_extent);
1071
1072 if (found_key.offset + length <= start) {
1073 key.offset = found_key.offset + length;
1074 btrfs_release_path(path);
1075 continue;
1076 }
1077
1078 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1079 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1080 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1081
1082 /*
1083 * get a reference on the corresponding block group to prevent
1084 * the chunk from going away while we scrub it
1085 */
1086 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1087 if (!cache) {
1088 ret = -ENOENT;
1089 goto out;
1090 }
1091 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1092 chunk_offset, length);
1093 btrfs_put_block_group(cache);
1094 if (ret)
1095 break;
1096
1097 key.offset = found_key.offset + length;
1098 btrfs_release_path(path);
1099 }
1100
1101out:
1102 btrfs_free_path(path);
1103 return ret;
1104}
1105
1106static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1107{
1108 int i;
1109 u64 bytenr;
1110 u64 gen;
1111 int ret;
1112 struct btrfs_device *device = sdev->dev;
1113 struct btrfs_root *root = device->dev_root;
1114
1115 gen = root->fs_info->last_trans_committed;
1116
1117 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1118 bytenr = btrfs_sb_offset(i);
1119 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
1120 break;
1121
1122 ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr,
1123 BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
1124 if (ret)
1125 return ret;
1126 }
1127 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1128
1129 return 0;
1130}
1131
1132/*
1133 * get a reference count on fs_info->scrub_workers. start worker if necessary
1134 */
1135static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1136{
1137 struct btrfs_fs_info *fs_info = root->fs_info;
1138
1139 mutex_lock(&fs_info->scrub_lock);
1140 if (fs_info->scrub_workers_refcnt == 0)
1141 btrfs_start_workers(&fs_info->scrub_workers, 1);
1142 ++fs_info->scrub_workers_refcnt;
1143 mutex_unlock(&fs_info->scrub_lock);
1144
1145 return 0;
1146}
1147
1148static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
1149{
1150 struct btrfs_fs_info *fs_info = root->fs_info;
1151
1152 mutex_lock(&fs_info->scrub_lock);
1153 if (--fs_info->scrub_workers_refcnt == 0)
1154 btrfs_stop_workers(&fs_info->scrub_workers);
1155 WARN_ON(fs_info->scrub_workers_refcnt < 0);
1156 mutex_unlock(&fs_info->scrub_lock);
1157}
1158
1159
1160int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1161 struct btrfs_scrub_progress *progress, int readonly)
1162{
1163 struct scrub_dev *sdev;
1164 struct btrfs_fs_info *fs_info = root->fs_info;
1165 int ret;
1166 struct btrfs_device *dev;
1167
1168 if (root->fs_info->closing)
1169 return -EINVAL;
1170
1171 /*
1172 * check some assumptions
1173 */
1174 if (root->sectorsize != PAGE_SIZE ||
1175 root->sectorsize != root->leafsize ||
1176 root->sectorsize != root->nodesize) {
1177 printk(KERN_ERR "btrfs_scrub: size assumptions fail\n");
1178 return -EINVAL;
1179 }
1180
1181 ret = scrub_workers_get(root);
1182 if (ret)
1183 return ret;
1184
1185 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1186 dev = btrfs_find_device(root, devid, NULL, NULL);
1187 if (!dev || dev->missing) {
1188 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1189 scrub_workers_put(root);
1190 return -ENODEV;
1191 }
1192 mutex_lock(&fs_info->scrub_lock);
1193
1194 if (!dev->in_fs_metadata) {
1195 mutex_unlock(&fs_info->scrub_lock);
1196 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1197 scrub_workers_put(root);
1198 return -ENODEV;
1199 }
1200
1201 if (dev->scrub_device) {
1202 mutex_unlock(&fs_info->scrub_lock);
1203 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1204 scrub_workers_put(root);
1205 return -EINPROGRESS;
1206 }
1207 sdev = scrub_setup_dev(dev);
1208 if (IS_ERR(sdev)) {
1209 mutex_unlock(&fs_info->scrub_lock);
1210 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1211 scrub_workers_put(root);
1212 return PTR_ERR(sdev);
1213 }
1214 sdev->readonly = readonly;
1215 dev->scrub_device = sdev;
1216
1217 atomic_inc(&fs_info->scrubs_running);
1218 mutex_unlock(&fs_info->scrub_lock);
1219 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1220
1221 down_read(&fs_info->scrub_super_lock);
1222 ret = scrub_supers(sdev);
1223 up_read(&fs_info->scrub_super_lock);
1224
1225 if (!ret)
1226 ret = scrub_enumerate_chunks(sdev, start, end);
1227
1228 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1229
1230 atomic_dec(&fs_info->scrubs_running);
1231 wake_up(&fs_info->scrub_pause_wait);
1232
1233 if (progress)
1234 memcpy(progress, &sdev->stat, sizeof(*progress));
1235
1236 mutex_lock(&fs_info->scrub_lock);
1237 dev->scrub_device = NULL;
1238 mutex_unlock(&fs_info->scrub_lock);
1239
1240 scrub_free_dev(sdev);
1241 scrub_workers_put(root);
1242
1243 return ret;
1244}
1245
1246int btrfs_scrub_pause(struct btrfs_root *root)
1247{
1248 struct btrfs_fs_info *fs_info = root->fs_info;
1249
1250 mutex_lock(&fs_info->scrub_lock);
1251 atomic_inc(&fs_info->scrub_pause_req);
1252 while (atomic_read(&fs_info->scrubs_paused) !=
1253 atomic_read(&fs_info->scrubs_running)) {
1254 mutex_unlock(&fs_info->scrub_lock);
1255 wait_event(fs_info->scrub_pause_wait,
1256 atomic_read(&fs_info->scrubs_paused) ==
1257 atomic_read(&fs_info->scrubs_running));
1258 mutex_lock(&fs_info->scrub_lock);
1259 }
1260 mutex_unlock(&fs_info->scrub_lock);
1261
1262 return 0;
1263}
1264
1265int btrfs_scrub_continue(struct btrfs_root *root)
1266{
1267 struct btrfs_fs_info *fs_info = root->fs_info;
1268
1269 atomic_dec(&fs_info->scrub_pause_req);
1270 wake_up(&fs_info->scrub_pause_wait);
1271 return 0;
1272}
1273
1274int btrfs_scrub_pause_super(struct btrfs_root *root)
1275{
1276 down_write(&root->fs_info->scrub_super_lock);
1277 return 0;
1278}
1279
1280int btrfs_scrub_continue_super(struct btrfs_root *root)
1281{
1282 up_write(&root->fs_info->scrub_super_lock);
1283 return 0;
1284}
1285
1286int btrfs_scrub_cancel(struct btrfs_root *root)
1287{
1288 struct btrfs_fs_info *fs_info = root->fs_info;
1289
1290 mutex_lock(&fs_info->scrub_lock);
1291 if (!atomic_read(&fs_info->scrubs_running)) {
1292 mutex_unlock(&fs_info->scrub_lock);
1293 return -ENOTCONN;
1294 }
1295
1296 atomic_inc(&fs_info->scrub_cancel_req);
1297 while (atomic_read(&fs_info->scrubs_running)) {
1298 mutex_unlock(&fs_info->scrub_lock);
1299 wait_event(fs_info->scrub_pause_wait,
1300 atomic_read(&fs_info->scrubs_running) == 0);
1301 mutex_lock(&fs_info->scrub_lock);
1302 }
1303 atomic_dec(&fs_info->scrub_cancel_req);
1304 mutex_unlock(&fs_info->scrub_lock);
1305
1306 return 0;
1307}
1308
1309int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
1310{
1311 struct btrfs_fs_info *fs_info = root->fs_info;
1312 struct scrub_dev *sdev;
1313
1314 mutex_lock(&fs_info->scrub_lock);
1315 sdev = dev->scrub_device;
1316 if (!sdev) {
1317 mutex_unlock(&fs_info->scrub_lock);
1318 return -ENOTCONN;
1319 }
1320 atomic_inc(&sdev->cancel_req);
1321 while (dev->scrub_device) {
1322 mutex_unlock(&fs_info->scrub_lock);
1323 wait_event(fs_info->scrub_pause_wait,
1324 dev->scrub_device == NULL);
1325 mutex_lock(&fs_info->scrub_lock);
1326 }
1327 mutex_unlock(&fs_info->scrub_lock);
1328
1329 return 0;
1330}
1331int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
1332{
1333 struct btrfs_fs_info *fs_info = root->fs_info;
1334 struct btrfs_device *dev;
1335 int ret;
1336
1337 /*
1338 * we have to hold the device_list_mutex here so the device
1339 * does not go away in cancel_dev. FIXME: find a better solution
1340 */
1341 mutex_lock(&fs_info->fs_devices->device_list_mutex);
1342 dev = btrfs_find_device(root, devid, NULL, NULL);
1343 if (!dev) {
1344 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1345 return -ENODEV;
1346 }
1347 ret = btrfs_scrub_cancel_dev(root, dev);
1348 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1349
1350 return ret;
1351}
1352
1353int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
1354 struct btrfs_scrub_progress *progress)
1355{
1356 struct btrfs_device *dev;
1357 struct scrub_dev *sdev = NULL;
1358
1359 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1360 dev = btrfs_find_device(root, devid, NULL, NULL);
1361 if (dev)
1362 sdev = dev->scrub_device;
1363 if (sdev)
1364 memcpy(progress, &sdev->stat, sizeof(*progress));
1365 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1366
1367 return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
1368}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 46d7eed7e965..cd0c7cd2c8fb 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -40,6 +40,7 @@
40#include <linux/magic.h> 40#include <linux/magic.h>
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include "compat.h" 42#include "compat.h"
43#include "delayed-inode.h"
43#include "ctree.h" 44#include "ctree.h"
44#include "disk-io.h" 45#include "disk-io.h"
45#include "transaction.h" 46#include "transaction.h"
@@ -741,7 +742,7 @@ static int btrfs_set_super(struct super_block *s, void *data)
741 * for multiple device setup. Make sure to keep it in sync. 742 * for multiple device setup. Make sure to keep it in sync.
742 */ 743 */
743static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, 744static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
744 const char *dev_name, void *data) 745 const char *device_name, void *data)
745{ 746{
746 struct block_device *bdev = NULL; 747 struct block_device *bdev = NULL;
747 struct super_block *s; 748 struct super_block *s;
@@ -764,7 +765,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
764 if (error) 765 if (error)
765 return ERR_PTR(error); 766 return ERR_PTR(error);
766 767
767 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices); 768 error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
768 if (error) 769 if (error)
769 goto error_free_subvol_name; 770 goto error_free_subvol_name;
770 771
@@ -915,6 +916,32 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
915 return 0; 916 return 0;
916} 917}
917 918
919/* Used to sort the devices by max_avail(descending sort) */
920static int btrfs_cmp_device_free_bytes(const void *dev_info1,
921 const void *dev_info2)
922{
923 if (((struct btrfs_device_info *)dev_info1)->max_avail >
924 ((struct btrfs_device_info *)dev_info2)->max_avail)
925 return -1;
926 else if (((struct btrfs_device_info *)dev_info1)->max_avail <
927 ((struct btrfs_device_info *)dev_info2)->max_avail)
928 return 1;
929 else
930 return 0;
931}
932
933/*
934 * sort the devices by max_avail, in which max free extent size of each device
935 * is stored.(Descending Sort)
936 */
937static inline void btrfs_descending_sort_devices(
938 struct btrfs_device_info *devices,
939 size_t nr_devices)
940{
941 sort(devices, nr_devices, sizeof(struct btrfs_device_info),
942 btrfs_cmp_device_free_bytes, NULL);
943}
944
918/* 945/*
919 * The helper to calc the free space on the devices that can be used to store 946 * The helper to calc the free space on the devices that can be used to store
920 * file data. 947 * file data.
@@ -1208,10 +1235,14 @@ static int __init init_btrfs_fs(void)
1208 if (err) 1235 if (err)
1209 goto free_extent_io; 1236 goto free_extent_io;
1210 1237
1211 err = btrfs_interface_init(); 1238 err = btrfs_delayed_inode_init();
1212 if (err) 1239 if (err)
1213 goto free_extent_map; 1240 goto free_extent_map;
1214 1241
1242 err = btrfs_interface_init();
1243 if (err)
1244 goto free_delayed_inode;
1245
1215 err = register_filesystem(&btrfs_fs_type); 1246 err = register_filesystem(&btrfs_fs_type);
1216 if (err) 1247 if (err)
1217 goto unregister_ioctl; 1248 goto unregister_ioctl;
@@ -1221,6 +1252,8 @@ static int __init init_btrfs_fs(void)
1221 1252
1222unregister_ioctl: 1253unregister_ioctl:
1223 btrfs_interface_exit(); 1254 btrfs_interface_exit();
1255free_delayed_inode:
1256 btrfs_delayed_inode_exit();
1224free_extent_map: 1257free_extent_map:
1225 extent_map_exit(); 1258 extent_map_exit();
1226free_extent_io: 1259free_extent_io:
@@ -1237,6 +1270,7 @@ free_sysfs:
1237static void __exit exit_btrfs_fs(void) 1270static void __exit exit_btrfs_fs(void)
1238{ 1271{
1239 btrfs_destroy_cachep(); 1272 btrfs_destroy_cachep();
1273 btrfs_delayed_inode_exit();
1240 extent_map_exit(); 1274 extent_map_exit();
1241 extent_io_exit(); 1275 extent_io_exit();
1242 btrfs_interface_exit(); 1276 btrfs_interface_exit();
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 4ce16ef702a3..c3c223ae6691 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -174,86 +174,9 @@ static const struct sysfs_ops btrfs_root_attr_ops = {
174 .store = btrfs_root_attr_store, 174 .store = btrfs_root_attr_store,
175}; 175};
176 176
177static struct kobj_type btrfs_root_ktype = {
178 .default_attrs = btrfs_root_attrs,
179 .sysfs_ops = &btrfs_root_attr_ops,
180 .release = btrfs_root_release,
181};
182
183static struct kobj_type btrfs_super_ktype = {
184 .default_attrs = btrfs_super_attrs,
185 .sysfs_ops = &btrfs_super_attr_ops,
186 .release = btrfs_super_release,
187};
188
189/* /sys/fs/btrfs/ entry */ 177/* /sys/fs/btrfs/ entry */
190static struct kset *btrfs_kset; 178static struct kset *btrfs_kset;
191 179
192int btrfs_sysfs_add_super(struct btrfs_fs_info *fs)
193{
194 int error;
195 char *name;
196 char c;
197 int len = strlen(fs->sb->s_id) + 1;
198 int i;
199
200 name = kmalloc(len, GFP_NOFS);
201 if (!name) {
202 error = -ENOMEM;
203 goto fail;
204 }
205
206 for (i = 0; i < len; i++) {
207 c = fs->sb->s_id[i];
208 if (c == '/' || c == '\\')
209 c = '!';
210 name[i] = c;
211 }
212 name[len] = '\0';
213
214 fs->super_kobj.kset = btrfs_kset;
215 error = kobject_init_and_add(&fs->super_kobj, &btrfs_super_ktype,
216 NULL, "%s", name);
217 kfree(name);
218 if (error)
219 goto fail;
220
221 return 0;
222
223fail:
224 printk(KERN_ERR "btrfs: sysfs creation for super failed\n");
225 return error;
226}
227
228int btrfs_sysfs_add_root(struct btrfs_root *root)
229{
230 int error;
231
232 error = kobject_init_and_add(&root->root_kobj, &btrfs_root_ktype,
233 &root->fs_info->super_kobj,
234 "%s", root->name);
235 if (error)
236 goto fail;
237
238 return 0;
239
240fail:
241 printk(KERN_ERR "btrfs: sysfs creation for root failed\n");
242 return error;
243}
244
245void btrfs_sysfs_del_root(struct btrfs_root *root)
246{
247 kobject_put(&root->root_kobj);
248 wait_for_completion(&root->kobj_unregister);
249}
250
251void btrfs_sysfs_del_super(struct btrfs_fs_info *fs)
252{
253 kobject_put(&fs->super_kobj);
254 wait_for_completion(&fs->kobj_unregister);
255}
256
257int btrfs_init_sysfs(void) 180int btrfs_init_sysfs(void)
258{ 181{
259 btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); 182 btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c571734d5e5a..dc80f7156923 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -27,6 +27,7 @@
27#include "transaction.h" 27#include "transaction.h"
28#include "locking.h" 28#include "locking.h"
29#include "tree-log.h" 29#include "tree-log.h"
30#include "inode-map.h"
30 31
31#define BTRFS_ROOT_TRANS_TAG 0 32#define BTRFS_ROOT_TRANS_TAG 0
32 33
@@ -80,8 +81,7 @@ static noinline int join_transaction(struct btrfs_root *root)
80 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 81 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
81 list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 82 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
82 extent_io_tree_init(&cur_trans->dirty_pages, 83 extent_io_tree_init(&cur_trans->dirty_pages,
83 root->fs_info->btree_inode->i_mapping, 84 root->fs_info->btree_inode->i_mapping);
84 GFP_NOFS);
85 spin_lock(&root->fs_info->new_trans_lock); 85 spin_lock(&root->fs_info->new_trans_lock);
86 root->fs_info->running_transaction = cur_trans; 86 root->fs_info->running_transaction = cur_trans;
87 spin_unlock(&root->fs_info->new_trans_lock); 87 spin_unlock(&root->fs_info->new_trans_lock);
@@ -347,49 +347,6 @@ out_unlock:
347 return ret; 347 return ret;
348} 348}
349 349
350#if 0
351/*
352 * rate limit against the drop_snapshot code. This helps to slow down new
353 * operations if the drop_snapshot code isn't able to keep up.
354 */
355static void throttle_on_drops(struct btrfs_root *root)
356{
357 struct btrfs_fs_info *info = root->fs_info;
358 int harder_count = 0;
359
360harder:
361 if (atomic_read(&info->throttles)) {
362 DEFINE_WAIT(wait);
363 int thr;
364 thr = atomic_read(&info->throttle_gen);
365
366 do {
367 prepare_to_wait(&info->transaction_throttle,
368 &wait, TASK_UNINTERRUPTIBLE);
369 if (!atomic_read(&info->throttles)) {
370 finish_wait(&info->transaction_throttle, &wait);
371 break;
372 }
373 schedule();
374 finish_wait(&info->transaction_throttle, &wait);
375 } while (thr == atomic_read(&info->throttle_gen));
376 harder_count++;
377
378 if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 &&
379 harder_count < 2)
380 goto harder;
381
382 if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 &&
383 harder_count < 10)
384 goto harder;
385
386 if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 &&
387 harder_count < 20)
388 goto harder;
389 }
390}
391#endif
392
393void btrfs_throttle(struct btrfs_root *root) 350void btrfs_throttle(struct btrfs_root *root)
394{ 351{
395 mutex_lock(&root->fs_info->trans_mutex); 352 mutex_lock(&root->fs_info->trans_mutex);
@@ -487,19 +444,40 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
487int btrfs_end_transaction(struct btrfs_trans_handle *trans, 444int btrfs_end_transaction(struct btrfs_trans_handle *trans,
488 struct btrfs_root *root) 445 struct btrfs_root *root)
489{ 446{
490 return __btrfs_end_transaction(trans, root, 0, 1); 447 int ret;
448
449 ret = __btrfs_end_transaction(trans, root, 0, 1);
450 if (ret)
451 return ret;
452 return 0;
491} 453}
492 454
493int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 455int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
494 struct btrfs_root *root) 456 struct btrfs_root *root)
495{ 457{
496 return __btrfs_end_transaction(trans, root, 1, 1); 458 int ret;
459
460 ret = __btrfs_end_transaction(trans, root, 1, 1);
461 if (ret)
462 return ret;
463 return 0;
497} 464}
498 465
499int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, 466int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
500 struct btrfs_root *root) 467 struct btrfs_root *root)
501{ 468{
502 return __btrfs_end_transaction(trans, root, 0, 0); 469 int ret;
470
471 ret = __btrfs_end_transaction(trans, root, 0, 0);
472 if (ret)
473 return ret;
474 return 0;
475}
476
477int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
478 struct btrfs_root *root)
479{
480 return __btrfs_end_transaction(trans, root, 1, 1);
503} 481}
504 482
505/* 483/*
@@ -760,8 +738,14 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
760 btrfs_update_reloc_root(trans, root); 738 btrfs_update_reloc_root(trans, root);
761 btrfs_orphan_commit_root(trans, root); 739 btrfs_orphan_commit_root(trans, root);
762 740
741 btrfs_save_ino_cache(root, trans);
742
763 if (root->commit_root != root->node) { 743 if (root->commit_root != root->node) {
744 mutex_lock(&root->fs_commit_mutex);
764 switch_commit_root(root); 745 switch_commit_root(root);
746 btrfs_unpin_free_ino(root);
747 mutex_unlock(&root->fs_commit_mutex);
748
765 btrfs_set_root_node(&root->root_item, 749 btrfs_set_root_node(&root->root_item,
766 root->node); 750 root->node);
767 } 751 }
@@ -809,97 +793,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
809 return ret; 793 return ret;
810} 794}
811 795
812#if 0
813/*
814 * when dropping snapshots, we generate a ton of delayed refs, and it makes
815 * sense not to join the transaction while it is trying to flush the current
816 * queue of delayed refs out.
817 *
818 * This is used by the drop snapshot code only
819 */
820static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
821{
822 DEFINE_WAIT(wait);
823
824 mutex_lock(&info->trans_mutex);
825 while (info->running_transaction &&
826 info->running_transaction->delayed_refs.flushing) {
827 prepare_to_wait(&info->transaction_wait, &wait,
828 TASK_UNINTERRUPTIBLE);
829 mutex_unlock(&info->trans_mutex);
830
831 schedule();
832
833 mutex_lock(&info->trans_mutex);
834 finish_wait(&info->transaction_wait, &wait);
835 }
836 mutex_unlock(&info->trans_mutex);
837 return 0;
838}
839
840/*
841 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
842 * all of them
843 */
844int btrfs_drop_dead_root(struct btrfs_root *root)
845{
846 struct btrfs_trans_handle *trans;
847 struct btrfs_root *tree_root = root->fs_info->tree_root;
848 unsigned long nr;
849 int ret;
850
851 while (1) {
852 /*
853 * we don't want to jump in and create a bunch of
854 * delayed refs if the transaction is starting to close
855 */
856 wait_transaction_pre_flush(tree_root->fs_info);
857 trans = btrfs_start_transaction(tree_root, 1);
858
859 /*
860 * we've joined a transaction, make sure it isn't
861 * closing right now
862 */
863 if (trans->transaction->delayed_refs.flushing) {
864 btrfs_end_transaction(trans, tree_root);
865 continue;
866 }
867
868 ret = btrfs_drop_snapshot(trans, root);
869 if (ret != -EAGAIN)
870 break;
871
872 ret = btrfs_update_root(trans, tree_root,
873 &root->root_key,
874 &root->root_item);
875 if (ret)
876 break;
877
878 nr = trans->blocks_used;
879 ret = btrfs_end_transaction(trans, tree_root);
880 BUG_ON(ret);
881
882 btrfs_btree_balance_dirty(tree_root, nr);
883 cond_resched();
884 }
885 BUG_ON(ret);
886
887 ret = btrfs_del_root(trans, tree_root, &root->root_key);
888 BUG_ON(ret);
889
890 nr = trans->blocks_used;
891 ret = btrfs_end_transaction(trans, tree_root);
892 BUG_ON(ret);
893
894 free_extent_buffer(root->node);
895 free_extent_buffer(root->commit_root);
896 kfree(root);
897
898 btrfs_btree_balance_dirty(tree_root, nr);
899 return ret;
900}
901#endif
902
903/* 796/*
904 * new snapshots need to be created at a very specific time in the 797 * new snapshots need to be created at a very specific time in the
905 * transaction commit. This does the actual creation 798 * transaction commit. This does the actual creation
@@ -930,7 +823,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
930 goto fail; 823 goto fail;
931 } 824 }
932 825
933 ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); 826 ret = btrfs_find_free_objectid(tree_root, &objectid);
934 if (ret) { 827 if (ret) {
935 pending->error = ret; 828 pending->error = ret;
936 goto fail; 829 goto fail;
@@ -967,7 +860,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
967 BUG_ON(ret); 860 BUG_ON(ret);
968 ret = btrfs_insert_dir_item(trans, parent_root, 861 ret = btrfs_insert_dir_item(trans, parent_root,
969 dentry->d_name.name, dentry->d_name.len, 862 dentry->d_name.name, dentry->d_name.len,
970 parent_inode->i_ino, &key, 863 parent_inode, &key,
971 BTRFS_FT_DIR, index); 864 BTRFS_FT_DIR, index);
972 BUG_ON(ret); 865 BUG_ON(ret);
973 866
@@ -1009,7 +902,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1009 */ 902 */
1010 ret = btrfs_add_root_ref(trans, tree_root, objectid, 903 ret = btrfs_add_root_ref(trans, tree_root, objectid,
1011 parent_root->root_key.objectid, 904 parent_root->root_key.objectid,
1012 parent_inode->i_ino, index, 905 btrfs_ino(parent_inode), index,
1013 dentry->d_name.name, dentry->d_name.len); 906 dentry->d_name.name, dentry->d_name.len);
1014 BUG_ON(ret); 907 BUG_ON(ret);
1015 dput(parent); 908 dput(parent);
@@ -1037,6 +930,14 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
1037 int ret; 930 int ret;
1038 931
1039 list_for_each_entry(pending, head, list) { 932 list_for_each_entry(pending, head, list) {
933 /*
934 * We must deal with the delayed items before creating
935 * snapshots, or we will create a snapthot with inconsistent
936 * information.
937 */
938 ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
939 BUG_ON(ret);
940
1040 ret = create_pending_snapshot(trans, fs_info, pending); 941 ret = create_pending_snapshot(trans, fs_info, pending);
1041 BUG_ON(ret); 942 BUG_ON(ret);
1042 } 943 }
@@ -1290,6 +1191,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1290 BUG_ON(ret); 1191 BUG_ON(ret);
1291 } 1192 }
1292 1193
1194 ret = btrfs_run_delayed_items(trans, root);
1195 BUG_ON(ret);
1196
1293 /* 1197 /*
1294 * rename don't use btrfs_join_transaction, so, once we 1198 * rename don't use btrfs_join_transaction, so, once we
1295 * set the transaction to blocked above, we aren't going 1199 * set the transaction to blocked above, we aren't going
@@ -1316,11 +1220,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1316 ret = create_pending_snapshots(trans, root->fs_info); 1220 ret = create_pending_snapshots(trans, root->fs_info);
1317 BUG_ON(ret); 1221 BUG_ON(ret);
1318 1222
1223 ret = btrfs_run_delayed_items(trans, root);
1224 BUG_ON(ret);
1225
1319 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1226 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1320 BUG_ON(ret); 1227 BUG_ON(ret);
1321 1228
1322 WARN_ON(cur_trans != trans->transaction); 1229 WARN_ON(cur_trans != trans->transaction);
1323 1230
1231 btrfs_scrub_pause(root);
1324 /* btrfs_commit_tree_roots is responsible for getting the 1232 /* btrfs_commit_tree_roots is responsible for getting the
1325 * various roots consistent with each other. Every pointer 1233 * various roots consistent with each other. Every pointer
1326 * in the tree of tree roots has to point to the most up to date 1234 * in the tree of tree roots has to point to the most up to date
@@ -1405,6 +1313,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1405 1313
1406 mutex_unlock(&root->fs_info->trans_mutex); 1314 mutex_unlock(&root->fs_info->trans_mutex);
1407 1315
1316 btrfs_scrub_continue(root);
1317
1408 if (current->journal_info == trans) 1318 if (current->journal_info == trans)
1409 current->journal_info = NULL; 1319 current->journal_info = NULL;
1410 1320
@@ -1432,6 +1342,8 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1432 root = list_entry(list.next, struct btrfs_root, root_list); 1342 root = list_entry(list.next, struct btrfs_root, root_list);
1433 list_del(&root->root_list); 1343 list_del(&root->root_list);
1434 1344
1345 btrfs_kill_all_delayed_nodes(root);
1346
1435 if (btrfs_header_backref_rev(root->node) < 1347 if (btrfs_header_backref_rev(root->node) <
1436 BTRFS_MIXED_BACKREF_REV) 1348 BTRFS_MIXED_BACKREF_REV)
1437 btrfs_drop_snapshot(root, NULL, 0); 1349 btrfs_drop_snapshot(root, NULL, 0);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index e441acc6c584..804c88639e5d 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -101,11 +101,8 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
101int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); 101int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
102int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 102int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root); 103 struct btrfs_root *root);
104int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
105 struct btrfs_root *root);
106 104
107int btrfs_add_dead_root(struct btrfs_root *root); 105int btrfs_add_dead_root(struct btrfs_root *root);
108int btrfs_drop_dead_root(struct btrfs_root *root);
109int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); 106int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
110int btrfs_clean_old_snapshots(struct btrfs_root *root); 107int btrfs_clean_old_snapshots(struct btrfs_root *root);
111int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 108int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -115,6 +112,8 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
115 int wait_for_unblock); 112 int wait_for_unblock);
116int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 113int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
117 struct btrfs_root *root); 114 struct btrfs_root *root);
115int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
116 struct btrfs_root *root);
118int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, 117int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
119 struct btrfs_root *root); 118 struct btrfs_root *root);
120void btrfs_throttle(struct btrfs_root *root); 119void btrfs_throttle(struct btrfs_root *root);
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 992ab425599d..3b580ee8ab1d 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -97,7 +97,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
97 ret = 0; 97 ret = 0;
98 goto out; 98 goto out;
99 } 99 }
100 btrfs_release_path(root, path); 100 btrfs_release_path(path);
101 wret = btrfs_search_slot(trans, root, &key, path, 0, 1); 101 wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
102 102
103 if (wret < 0) { 103 if (wret < 0) {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c2d887566400..592396c6dc47 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -333,13 +333,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
333 goto insert; 333 goto insert;
334 334
335 if (item_size == 0) { 335 if (item_size == 0) {
336 btrfs_release_path(root, path); 336 btrfs_release_path(path);
337 return 0; 337 return 0;
338 } 338 }
339 dst_copy = kmalloc(item_size, GFP_NOFS); 339 dst_copy = kmalloc(item_size, GFP_NOFS);
340 src_copy = kmalloc(item_size, GFP_NOFS); 340 src_copy = kmalloc(item_size, GFP_NOFS);
341 if (!dst_copy || !src_copy) { 341 if (!dst_copy || !src_copy) {
342 btrfs_release_path(root, path); 342 btrfs_release_path(path);
343 kfree(dst_copy); 343 kfree(dst_copy);
344 kfree(src_copy); 344 kfree(src_copy);
345 return -ENOMEM; 345 return -ENOMEM;
@@ -361,13 +361,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
361 * sync 361 * sync
362 */ 362 */
363 if (ret == 0) { 363 if (ret == 0) {
364 btrfs_release_path(root, path); 364 btrfs_release_path(path);
365 return 0; 365 return 0;
366 } 366 }
367 367
368 } 368 }
369insert: 369insert:
370 btrfs_release_path(root, path); 370 btrfs_release_path(path);
371 /* try to insert the key into the destination tree */ 371 /* try to insert the key into the destination tree */
372 ret = btrfs_insert_empty_item(trans, root, path, 372 ret = btrfs_insert_empty_item(trans, root, path,
373 key, item_size); 373 key, item_size);
@@ -437,7 +437,7 @@ insert:
437 } 437 }
438no_copy: 438no_copy:
439 btrfs_mark_buffer_dirty(path->nodes[0]); 439 btrfs_mark_buffer_dirty(path->nodes[0]);
440 btrfs_release_path(root, path); 440 btrfs_release_path(path);
441 return 0; 441 return 0;
442} 442}
443 443
@@ -518,7 +518,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
518 * file. This must be done before the btrfs_drop_extents run 518 * file. This must be done before the btrfs_drop_extents run
519 * so we don't try to drop this extent. 519 * so we don't try to drop this extent.
520 */ 520 */
521 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 521 ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
522 start, 0); 522 start, 0);
523 523
524 if (ret == 0 && 524 if (ret == 0 &&
@@ -543,11 +543,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
543 * we don't have to do anything 543 * we don't have to do anything
544 */ 544 */
545 if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { 545 if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) {
546 btrfs_release_path(root, path); 546 btrfs_release_path(path);
547 goto out; 547 goto out;
548 } 548 }
549 } 549 }
550 btrfs_release_path(root, path); 550 btrfs_release_path(path);
551 551
552 saved_nbytes = inode_get_bytes(inode); 552 saved_nbytes = inode_get_bytes(inode);
553 /* drop any overlapping extents */ 553 /* drop any overlapping extents */
@@ -600,7 +600,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
600 key->objectid, offset, &ins); 600 key->objectid, offset, &ins);
601 BUG_ON(ret); 601 BUG_ON(ret);
602 } 602 }
603 btrfs_release_path(root, path); 603 btrfs_release_path(path);
604 604
605 if (btrfs_file_extent_compression(eb, item)) { 605 if (btrfs_file_extent_compression(eb, item)) {
606 csum_start = ins.objectid; 606 csum_start = ins.objectid;
@@ -614,7 +614,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
614 614
615 ret = btrfs_lookup_csums_range(root->log_root, 615 ret = btrfs_lookup_csums_range(root->log_root,
616 csum_start, csum_end - 1, 616 csum_start, csum_end - 1,
617 &ordered_sums); 617 &ordered_sums, 0);
618 BUG_ON(ret); 618 BUG_ON(ret);
619 while (!list_empty(&ordered_sums)) { 619 while (!list_empty(&ordered_sums)) {
620 struct btrfs_ordered_sum *sums; 620 struct btrfs_ordered_sum *sums;
@@ -629,7 +629,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
629 kfree(sums); 629 kfree(sums);
630 } 630 }
631 } else { 631 } else {
632 btrfs_release_path(root, path); 632 btrfs_release_path(path);
633 } 633 }
634 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 634 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
635 /* inline extents are easy, we just overwrite them */ 635 /* inline extents are easy, we just overwrite them */
@@ -675,7 +675,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
675 return -ENOMEM; 675 return -ENOMEM;
676 676
677 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); 677 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
678 btrfs_release_path(root, path); 678 btrfs_release_path(path);
679 679
680 inode = read_one_inode(root, location.objectid); 680 inode = read_one_inode(root, location.objectid);
681 if (!inode) { 681 if (!inode) {
@@ -716,7 +716,7 @@ static noinline int inode_in_dir(struct btrfs_root *root,
716 goto out; 716 goto out;
717 } else 717 } else
718 goto out; 718 goto out;
719 btrfs_release_path(root, path); 719 btrfs_release_path(path);
720 720
721 di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); 721 di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
722 if (di && !IS_ERR(di)) { 722 if (di && !IS_ERR(di)) {
@@ -727,7 +727,7 @@ static noinline int inode_in_dir(struct btrfs_root *root,
727 goto out; 727 goto out;
728 match = 1; 728 match = 1;
729out: 729out:
730 btrfs_release_path(root, path); 730 btrfs_release_path(path);
731 return match; 731 return match;
732} 732}
733 733
@@ -838,7 +838,7 @@ again:
838 read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); 838 read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen);
839 839
840 /* if we already have a perfect match, we're done */ 840 /* if we already have a perfect match, we're done */
841 if (inode_in_dir(root, path, dir->i_ino, inode->i_ino, 841 if (inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
842 btrfs_inode_ref_index(eb, ref), 842 btrfs_inode_ref_index(eb, ref),
843 name, namelen)) { 843 name, namelen)) {
844 goto out; 844 goto out;
@@ -890,7 +890,7 @@ again:
890 if (!backref_in_log(log, key, victim_name, 890 if (!backref_in_log(log, key, victim_name,
891 victim_name_len)) { 891 victim_name_len)) {
892 btrfs_inc_nlink(inode); 892 btrfs_inc_nlink(inode);
893 btrfs_release_path(root, path); 893 btrfs_release_path(path);
894 894
895 ret = btrfs_unlink_inode(trans, root, dir, 895 ret = btrfs_unlink_inode(trans, root, dir,
896 inode, victim_name, 896 inode, victim_name,
@@ -907,7 +907,7 @@ again:
907 */ 907 */
908 search_done = 1; 908 search_done = 1;
909 } 909 }
910 btrfs_release_path(root, path); 910 btrfs_release_path(path);
911 911
912insert: 912insert:
913 /* insert our name */ 913 /* insert our name */
@@ -928,7 +928,7 @@ out:
928 BUG_ON(ret); 928 BUG_ON(ret);
929 929
930out_nowrite: 930out_nowrite:
931 btrfs_release_path(root, path); 931 btrfs_release_path(path);
932 iput(dir); 932 iput(dir);
933 iput(inode); 933 iput(inode);
934 return 0; 934 return 0;
@@ -966,8 +966,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
966 unsigned long ptr; 966 unsigned long ptr;
967 unsigned long ptr_end; 967 unsigned long ptr_end;
968 int name_len; 968 int name_len;
969 u64 ino = btrfs_ino(inode);
969 970
970 key.objectid = inode->i_ino; 971 key.objectid = ino;
971 key.type = BTRFS_INODE_REF_KEY; 972 key.type = BTRFS_INODE_REF_KEY;
972 key.offset = (u64)-1; 973 key.offset = (u64)-1;
973 974
@@ -986,7 +987,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
986 } 987 }
987 btrfs_item_key_to_cpu(path->nodes[0], &key, 988 btrfs_item_key_to_cpu(path->nodes[0], &key,
988 path->slots[0]); 989 path->slots[0]);
989 if (key.objectid != inode->i_ino || 990 if (key.objectid != ino ||
990 key.type != BTRFS_INODE_REF_KEY) 991 key.type != BTRFS_INODE_REF_KEY)
991 break; 992 break;
992 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 993 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
@@ -1005,9 +1006,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1005 if (key.offset == 0) 1006 if (key.offset == 0)
1006 break; 1007 break;
1007 key.offset--; 1008 key.offset--;
1008 btrfs_release_path(root, path); 1009 btrfs_release_path(path);
1009 } 1010 }
1010 btrfs_release_path(root, path); 1011 btrfs_release_path(path);
1011 if (nlink != inode->i_nlink) { 1012 if (nlink != inode->i_nlink) {
1012 inode->i_nlink = nlink; 1013 inode->i_nlink = nlink;
1013 btrfs_update_inode(trans, root, inode); 1014 btrfs_update_inode(trans, root, inode);
@@ -1017,10 +1018,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1017 if (inode->i_nlink == 0) { 1018 if (inode->i_nlink == 0) {
1018 if (S_ISDIR(inode->i_mode)) { 1019 if (S_ISDIR(inode->i_mode)) {
1019 ret = replay_dir_deletes(trans, root, NULL, path, 1020 ret = replay_dir_deletes(trans, root, NULL, path,
1020 inode->i_ino, 1); 1021 ino, 1);
1021 BUG_ON(ret); 1022 BUG_ON(ret);
1022 } 1023 }
1023 ret = insert_orphan_item(trans, root, inode->i_ino); 1024 ret = insert_orphan_item(trans, root, ino);
1024 BUG_ON(ret); 1025 BUG_ON(ret);
1025 } 1026 }
1026 btrfs_free_path(path); 1027 btrfs_free_path(path);
@@ -1059,7 +1060,7 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
1059 if (ret) 1060 if (ret)
1060 goto out; 1061 goto out;
1061 1062
1062 btrfs_release_path(root, path); 1063 btrfs_release_path(path);
1063 inode = read_one_inode(root, key.offset); 1064 inode = read_one_inode(root, key.offset);
1064 if (!inode) 1065 if (!inode)
1065 return -EIO; 1066 return -EIO;
@@ -1078,7 +1079,7 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
1078 } 1079 }
1079 ret = 0; 1080 ret = 0;
1080out: 1081out:
1081 btrfs_release_path(root, path); 1082 btrfs_release_path(path);
1082 return ret; 1083 return ret;
1083} 1084}
1084 1085
@@ -1107,7 +1108,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
1107 1108
1108 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1109 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1109 1110
1110 btrfs_release_path(root, path); 1111 btrfs_release_path(path);
1111 if (ret == 0) { 1112 if (ret == 0) {
1112 btrfs_inc_nlink(inode); 1113 btrfs_inc_nlink(inode);
1113 btrfs_update_inode(trans, root, inode); 1114 btrfs_update_inode(trans, root, inode);
@@ -1204,7 +1205,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1204 exists = 1; 1205 exists = 1;
1205 else 1206 else
1206 exists = 0; 1207 exists = 0;
1207 btrfs_release_path(root, path); 1208 btrfs_release_path(path);
1208 1209
1209 if (key->type == BTRFS_DIR_ITEM_KEY) { 1210 if (key->type == BTRFS_DIR_ITEM_KEY) {
1210 dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, 1211 dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
@@ -1217,7 +1218,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1217 } else { 1218 } else {
1218 BUG(); 1219 BUG();
1219 } 1220 }
1220 if (!dst_di || IS_ERR(dst_di)) { 1221 if (IS_ERR_OR_NULL(dst_di)) {
1221 /* we need a sequence number to insert, so we only 1222 /* we need a sequence number to insert, so we only
1222 * do inserts for the BTRFS_DIR_INDEX_KEY types 1223 * do inserts for the BTRFS_DIR_INDEX_KEY types
1223 */ 1224 */
@@ -1248,13 +1249,13 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1248 if (key->type == BTRFS_DIR_INDEX_KEY) 1249 if (key->type == BTRFS_DIR_INDEX_KEY)
1249 goto insert; 1250 goto insert;
1250out: 1251out:
1251 btrfs_release_path(root, path); 1252 btrfs_release_path(path);
1252 kfree(name); 1253 kfree(name);
1253 iput(dir); 1254 iput(dir);
1254 return 0; 1255 return 0;
1255 1256
1256insert: 1257insert:
1257 btrfs_release_path(root, path); 1258 btrfs_release_path(path);
1258 ret = insert_one_name(trans, root, path, key->objectid, key->offset, 1259 ret = insert_one_name(trans, root, path, key->objectid, key->offset,
1259 name, name_len, log_type, &log_key); 1260 name, name_len, log_type, &log_key);
1260 1261
@@ -1375,7 +1376,7 @@ next:
1375 *end_ret = found_end; 1376 *end_ret = found_end;
1376 ret = 0; 1377 ret = 0;
1377out: 1378out:
1378 btrfs_release_path(root, path); 1379 btrfs_release_path(path);
1379 return ret; 1380 return ret;
1380} 1381}
1381 1382
@@ -1438,10 +1439,10 @@ again:
1438 dir_key->offset, 1439 dir_key->offset,
1439 name, name_len, 0); 1440 name, name_len, 0);
1440 } 1441 }
1441 if (!log_di || IS_ERR(log_di)) { 1442 if (IS_ERR_OR_NULL(log_di)) {
1442 btrfs_dir_item_key_to_cpu(eb, di, &location); 1443 btrfs_dir_item_key_to_cpu(eb, di, &location);
1443 btrfs_release_path(root, path); 1444 btrfs_release_path(path);
1444 btrfs_release_path(log, log_path); 1445 btrfs_release_path(log_path);
1445 inode = read_one_inode(root, location.objectid); 1446 inode = read_one_inode(root, location.objectid);
1446 if (!inode) { 1447 if (!inode) {
1447 kfree(name); 1448 kfree(name);
@@ -1468,7 +1469,7 @@ again:
1468 ret = 0; 1469 ret = 0;
1469 goto out; 1470 goto out;
1470 } 1471 }
1471 btrfs_release_path(log, log_path); 1472 btrfs_release_path(log_path);
1472 kfree(name); 1473 kfree(name);
1473 1474
1474 ptr = (unsigned long)(di + 1); 1475 ptr = (unsigned long)(di + 1);
@@ -1476,8 +1477,8 @@ again:
1476 } 1477 }
1477 ret = 0; 1478 ret = 0;
1478out: 1479out:
1479 btrfs_release_path(root, path); 1480 btrfs_release_path(path);
1480 btrfs_release_path(log, log_path); 1481 btrfs_release_path(log_path);
1481 return ret; 1482 return ret;
1482} 1483}
1483 1484
@@ -1565,7 +1566,7 @@ again:
1565 break; 1566 break;
1566 dir_key.offset = found_key.offset + 1; 1567 dir_key.offset = found_key.offset + 1;
1567 } 1568 }
1568 btrfs_release_path(root, path); 1569 btrfs_release_path(path);
1569 if (range_end == (u64)-1) 1570 if (range_end == (u64)-1)
1570 break; 1571 break;
1571 range_start = range_end + 1; 1572 range_start = range_end + 1;
@@ -1576,11 +1577,11 @@ next_type:
1576 if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { 1577 if (key_type == BTRFS_DIR_LOG_ITEM_KEY) {
1577 key_type = BTRFS_DIR_LOG_INDEX_KEY; 1578 key_type = BTRFS_DIR_LOG_INDEX_KEY;
1578 dir_key.type = BTRFS_DIR_INDEX_KEY; 1579 dir_key.type = BTRFS_DIR_INDEX_KEY;
1579 btrfs_release_path(root, path); 1580 btrfs_release_path(path);
1580 goto again; 1581 goto again;
1581 } 1582 }
1582out: 1583out:
1583 btrfs_release_path(root, path); 1584 btrfs_release_path(path);
1584 btrfs_free_path(log_path); 1585 btrfs_free_path(log_path);
1585 iput(dir); 1586 iput(dir);
1586 return ret; 1587 return ret;
@@ -2108,7 +2109,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2108 * the running transaction open, so a full commit can't hop 2109 * the running transaction open, so a full commit can't hop
2109 * in and cause problems either. 2110 * in and cause problems either.
2110 */ 2111 */
2112 btrfs_scrub_pause_super(root);
2111 write_ctree_super(trans, root->fs_info->tree_root, 1); 2113 write_ctree_super(trans, root->fs_info->tree_root, 1);
2114 btrfs_scrub_continue_super(root);
2112 ret = 0; 2115 ret = 0;
2113 2116
2114 mutex_lock(&root->log_mutex); 2117 mutex_lock(&root->log_mutex);
@@ -2212,6 +2215,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2212 int ret; 2215 int ret;
2213 int err = 0; 2216 int err = 0;
2214 int bytes_del = 0; 2217 int bytes_del = 0;
2218 u64 dir_ino = btrfs_ino(dir);
2215 2219
2216 if (BTRFS_I(dir)->logged_trans < trans->transid) 2220 if (BTRFS_I(dir)->logged_trans < trans->transid)
2217 return 0; 2221 return 0;
@@ -2229,7 +2233,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2229 goto out_unlock; 2233 goto out_unlock;
2230 } 2234 }
2231 2235
2232 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, 2236 di = btrfs_lookup_dir_item(trans, log, path, dir_ino,
2233 name, name_len, -1); 2237 name, name_len, -1);
2234 if (IS_ERR(di)) { 2238 if (IS_ERR(di)) {
2235 err = PTR_ERR(di); 2239 err = PTR_ERR(di);
@@ -2240,8 +2244,8 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2240 bytes_del += name_len; 2244 bytes_del += name_len;
2241 BUG_ON(ret); 2245 BUG_ON(ret);
2242 } 2246 }
2243 btrfs_release_path(log, path); 2247 btrfs_release_path(path);
2244 di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, 2248 di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
2245 index, name, name_len, -1); 2249 index, name, name_len, -1);
2246 if (IS_ERR(di)) { 2250 if (IS_ERR(di)) {
2247 err = PTR_ERR(di); 2251 err = PTR_ERR(di);
@@ -2259,10 +2263,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2259 if (bytes_del) { 2263 if (bytes_del) {
2260 struct btrfs_key key; 2264 struct btrfs_key key;
2261 2265
2262 key.objectid = dir->i_ino; 2266 key.objectid = dir_ino;
2263 key.offset = 0; 2267 key.offset = 0;
2264 key.type = BTRFS_INODE_ITEM_KEY; 2268 key.type = BTRFS_INODE_ITEM_KEY;
2265 btrfs_release_path(log, path); 2269 btrfs_release_path(path);
2266 2270
2267 ret = btrfs_search_slot(trans, log, &key, path, 0, 1); 2271 ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
2268 if (ret < 0) { 2272 if (ret < 0) {
@@ -2284,7 +2288,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2284 btrfs_mark_buffer_dirty(path->nodes[0]); 2288 btrfs_mark_buffer_dirty(path->nodes[0]);
2285 } else 2289 } else
2286 ret = 0; 2290 ret = 0;
2287 btrfs_release_path(log, path); 2291 btrfs_release_path(path);
2288 } 2292 }
2289fail: 2293fail:
2290 btrfs_free_path(path); 2294 btrfs_free_path(path);
@@ -2318,7 +2322,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
2318 log = root->log_root; 2322 log = root->log_root;
2319 mutex_lock(&BTRFS_I(inode)->log_mutex); 2323 mutex_lock(&BTRFS_I(inode)->log_mutex);
2320 2324
2321 ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, 2325 ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
2322 dirid, &index); 2326 dirid, &index);
2323 mutex_unlock(&BTRFS_I(inode)->log_mutex); 2327 mutex_unlock(&BTRFS_I(inode)->log_mutex);
2324 if (ret == -ENOSPC) { 2328 if (ret == -ENOSPC) {
@@ -2359,7 +2363,7 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
2359 struct btrfs_dir_log_item); 2363 struct btrfs_dir_log_item);
2360 btrfs_set_dir_log_end(path->nodes[0], item, last_offset); 2364 btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
2361 btrfs_mark_buffer_dirty(path->nodes[0]); 2365 btrfs_mark_buffer_dirty(path->nodes[0]);
2362 btrfs_release_path(log, path); 2366 btrfs_release_path(path);
2363 return 0; 2367 return 0;
2364} 2368}
2365 2369
@@ -2384,13 +2388,14 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2384 int nritems; 2388 int nritems;
2385 u64 first_offset = min_offset; 2389 u64 first_offset = min_offset;
2386 u64 last_offset = (u64)-1; 2390 u64 last_offset = (u64)-1;
2391 u64 ino = btrfs_ino(inode);
2387 2392
2388 log = root->log_root; 2393 log = root->log_root;
2389 max_key.objectid = inode->i_ino; 2394 max_key.objectid = ino;
2390 max_key.offset = (u64)-1; 2395 max_key.offset = (u64)-1;
2391 max_key.type = key_type; 2396 max_key.type = key_type;
2392 2397
2393 min_key.objectid = inode->i_ino; 2398 min_key.objectid = ino;
2394 min_key.type = key_type; 2399 min_key.type = key_type;
2395 min_key.offset = min_offset; 2400 min_key.offset = min_offset;
2396 2401
@@ -2403,18 +2408,17 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2403 * we didn't find anything from this transaction, see if there 2408 * we didn't find anything from this transaction, see if there
2404 * is anything at all 2409 * is anything at all
2405 */ 2410 */
2406 if (ret != 0 || min_key.objectid != inode->i_ino || 2411 if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) {
2407 min_key.type != key_type) { 2412 min_key.objectid = ino;
2408 min_key.objectid = inode->i_ino;
2409 min_key.type = key_type; 2413 min_key.type = key_type;
2410 min_key.offset = (u64)-1; 2414 min_key.offset = (u64)-1;
2411 btrfs_release_path(root, path); 2415 btrfs_release_path(path);
2412 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 2416 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
2413 if (ret < 0) { 2417 if (ret < 0) {
2414 btrfs_release_path(root, path); 2418 btrfs_release_path(path);
2415 return ret; 2419 return ret;
2416 } 2420 }
2417 ret = btrfs_previous_item(root, path, inode->i_ino, key_type); 2421 ret = btrfs_previous_item(root, path, ino, key_type);
2418 2422
2419 /* if ret == 0 there are items for this type, 2423 /* if ret == 0 there are items for this type,
2420 * create a range to tell us the last key of this type. 2424 * create a range to tell us the last key of this type.
@@ -2432,7 +2436,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2432 } 2436 }
2433 2437
2434 /* go backward to find any previous key */ 2438 /* go backward to find any previous key */
2435 ret = btrfs_previous_item(root, path, inode->i_ino, key_type); 2439 ret = btrfs_previous_item(root, path, ino, key_type);
2436 if (ret == 0) { 2440 if (ret == 0) {
2437 struct btrfs_key tmp; 2441 struct btrfs_key tmp;
2438 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 2442 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
@@ -2447,7 +2451,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2447 } 2451 }
2448 } 2452 }
2449 } 2453 }
2450 btrfs_release_path(root, path); 2454 btrfs_release_path(path);
2451 2455
2452 /* find the first key from this transaction again */ 2456 /* find the first key from this transaction again */
2453 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 2457 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
@@ -2467,8 +2471,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2467 for (i = path->slots[0]; i < nritems; i++) { 2471 for (i = path->slots[0]; i < nritems; i++) {
2468 btrfs_item_key_to_cpu(src, &min_key, i); 2472 btrfs_item_key_to_cpu(src, &min_key, i);
2469 2473
2470 if (min_key.objectid != inode->i_ino || 2474 if (min_key.objectid != ino || min_key.type != key_type)
2471 min_key.type != key_type)
2472 goto done; 2475 goto done;
2473 ret = overwrite_item(trans, log, dst_path, src, i, 2476 ret = overwrite_item(trans, log, dst_path, src, i,
2474 &min_key); 2477 &min_key);
@@ -2489,7 +2492,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2489 goto done; 2492 goto done;
2490 } 2493 }
2491 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 2494 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
2492 if (tmp.objectid != inode->i_ino || tmp.type != key_type) { 2495 if (tmp.objectid != ino || tmp.type != key_type) {
2493 last_offset = (u64)-1; 2496 last_offset = (u64)-1;
2494 goto done; 2497 goto done;
2495 } 2498 }
@@ -2505,8 +2508,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2505 } 2508 }
2506 } 2509 }
2507done: 2510done:
2508 btrfs_release_path(root, path); 2511 btrfs_release_path(path);
2509 btrfs_release_path(log, dst_path); 2512 btrfs_release_path(dst_path);
2510 2513
2511 if (err == 0) { 2514 if (err == 0) {
2512 *last_offset_ret = last_offset; 2515 *last_offset_ret = last_offset;
@@ -2515,8 +2518,7 @@ done:
2515 * is valid 2518 * is valid
2516 */ 2519 */
2517 ret = insert_dir_log_key(trans, log, path, key_type, 2520 ret = insert_dir_log_key(trans, log, path, key_type,
2518 inode->i_ino, first_offset, 2521 ino, first_offset, last_offset);
2519 last_offset);
2520 if (ret) 2522 if (ret)
2521 err = ret; 2523 err = ret;
2522 } 2524 }
@@ -2604,9 +2606,9 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
2604 ret = btrfs_del_item(trans, log, path); 2606 ret = btrfs_del_item(trans, log, path);
2605 if (ret) 2607 if (ret)
2606 break; 2608 break;
2607 btrfs_release_path(log, path); 2609 btrfs_release_path(path);
2608 } 2610 }
2609 btrfs_release_path(log, path); 2611 btrfs_release_path(path);
2610 return ret; 2612 return ret;
2611} 2613}
2612 2614
@@ -2681,6 +2683,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2681 extent = btrfs_item_ptr(src, start_slot + i, 2683 extent = btrfs_item_ptr(src, start_slot + i,
2682 struct btrfs_file_extent_item); 2684 struct btrfs_file_extent_item);
2683 2685
2686 if (btrfs_file_extent_generation(src, extent) < trans->transid)
2687 continue;
2688
2684 found_type = btrfs_file_extent_type(src, extent); 2689 found_type = btrfs_file_extent_type(src, extent);
2685 if (found_type == BTRFS_FILE_EXTENT_REG || 2690 if (found_type == BTRFS_FILE_EXTENT_REG ||
2686 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 2691 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
@@ -2705,14 +2710,14 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2705 ret = btrfs_lookup_csums_range( 2710 ret = btrfs_lookup_csums_range(
2706 log->fs_info->csum_root, 2711 log->fs_info->csum_root,
2707 ds + cs, ds + cs + cl - 1, 2712 ds + cs, ds + cs + cl - 1,
2708 &ordered_sums); 2713 &ordered_sums, 0);
2709 BUG_ON(ret); 2714 BUG_ON(ret);
2710 } 2715 }
2711 } 2716 }
2712 } 2717 }
2713 2718
2714 btrfs_mark_buffer_dirty(dst_path->nodes[0]); 2719 btrfs_mark_buffer_dirty(dst_path->nodes[0]);
2715 btrfs_release_path(log, dst_path); 2720 btrfs_release_path(dst_path);
2716 kfree(ins_data); 2721 kfree(ins_data);
2717 2722
2718 /* 2723 /*
@@ -2761,6 +2766,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2761 int nritems; 2766 int nritems;
2762 int ins_start_slot = 0; 2767 int ins_start_slot = 0;
2763 int ins_nr; 2768 int ins_nr;
2769 u64 ino = btrfs_ino(inode);
2764 2770
2765 log = root->log_root; 2771 log = root->log_root;
2766 2772
@@ -2773,11 +2779,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2773 return -ENOMEM; 2779 return -ENOMEM;
2774 } 2780 }
2775 2781
2776 min_key.objectid = inode->i_ino; 2782 min_key.objectid = ino;
2777 min_key.type = BTRFS_INODE_ITEM_KEY; 2783 min_key.type = BTRFS_INODE_ITEM_KEY;
2778 min_key.offset = 0; 2784 min_key.offset = 0;
2779 2785
2780 max_key.objectid = inode->i_ino; 2786 max_key.objectid = ino;
2781 2787
2782 /* today the code can only do partial logging of directories */ 2788 /* today the code can only do partial logging of directories */
2783 if (!S_ISDIR(inode->i_mode)) 2789 if (!S_ISDIR(inode->i_mode))
@@ -2789,6 +2795,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2789 max_key.type = (u8)-1; 2795 max_key.type = (u8)-1;
2790 max_key.offset = (u64)-1; 2796 max_key.offset = (u64)-1;
2791 2797
2798 ret = btrfs_commit_inode_delayed_items(trans, inode);
2799 if (ret) {
2800 btrfs_free_path(path);
2801 btrfs_free_path(dst_path);
2802 return ret;
2803 }
2804
2792 mutex_lock(&BTRFS_I(inode)->log_mutex); 2805 mutex_lock(&BTRFS_I(inode)->log_mutex);
2793 2806
2794 /* 2807 /*
@@ -2800,8 +2813,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2800 2813
2801 if (inode_only == LOG_INODE_EXISTS) 2814 if (inode_only == LOG_INODE_EXISTS)
2802 max_key_type = BTRFS_XATTR_ITEM_KEY; 2815 max_key_type = BTRFS_XATTR_ITEM_KEY;
2803 ret = drop_objectid_items(trans, log, path, 2816 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
2804 inode->i_ino, max_key_type);
2805 } else { 2817 } else {
2806 ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); 2818 ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0);
2807 } 2819 }
@@ -2819,7 +2831,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2819 break; 2831 break;
2820again: 2832again:
2821 /* note, ins_nr might be > 0 here, cleanup outside the loop */ 2833 /* note, ins_nr might be > 0 here, cleanup outside the loop */
2822 if (min_key.objectid != inode->i_ino) 2834 if (min_key.objectid != ino)
2823 break; 2835 break;
2824 if (min_key.type > max_key.type) 2836 if (min_key.type > max_key.type)
2825 break; 2837 break;
@@ -2861,7 +2873,7 @@ next_slot:
2861 } 2873 }
2862 ins_nr = 0; 2874 ins_nr = 0;
2863 } 2875 }
2864 btrfs_release_path(root, path); 2876 btrfs_release_path(path);
2865 2877
2866 if (min_key.offset < (u64)-1) 2878 if (min_key.offset < (u64)-1)
2867 min_key.offset++; 2879 min_key.offset++;
@@ -2884,8 +2896,8 @@ next_slot:
2884 } 2896 }
2885 WARN_ON(ins_nr); 2897 WARN_ON(ins_nr);
2886 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 2898 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
2887 btrfs_release_path(root, path); 2899 btrfs_release_path(path);
2888 btrfs_release_path(log, dst_path); 2900 btrfs_release_path(dst_path);
2889 ret = log_directory_changes(trans, root, inode, path, dst_path); 2901 ret = log_directory_changes(trans, root, inode, path, dst_path);
2890 if (ret) { 2902 if (ret) {
2891 err = ret; 2903 err = ret;
@@ -3152,7 +3164,7 @@ again:
3152 } 3164 }
3153 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 3165 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3154 path->slots[0]); 3166 path->slots[0]);
3155 btrfs_release_path(log_root_tree, path); 3167 btrfs_release_path(path);
3156 if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) 3168 if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
3157 break; 3169 break;
3158 3170
@@ -3187,7 +3199,7 @@ again:
3187 if (found_key.offset == 0) 3199 if (found_key.offset == 0)
3188 break; 3200 break;
3189 } 3201 }
3190 btrfs_release_path(log_root_tree, path); 3202 btrfs_release_path(path);
3191 3203
3192 /* step one is to pin it all, step two is to replay just inodes */ 3204 /* step one is to pin it all, step two is to replay just inodes */
3193 if (wc.pin) { 3205 if (wc.pin) {
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 3dfae84c8cc8..2270ac58d746 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -38,7 +38,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
38 struct btrfs_root *root, 38 struct btrfs_root *root,
39 const char *name, int name_len, 39 const char *name, int name_len,
40 struct inode *inode, u64 dirid); 40 struct inode *inode, u64 dirid);
41int btrfs_join_running_log_trans(struct btrfs_root *root);
42int btrfs_end_log_trans(struct btrfs_root *root); 41int btrfs_end_log_trans(struct btrfs_root *root);
43int btrfs_pin_log_trans(struct btrfs_root *root); 42int btrfs_pin_log_trans(struct btrfs_root *root);
44int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, 43int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e7844f8a347a..c48214ef5c09 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -38,22 +38,9 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans,
38 struct btrfs_device *device); 38 struct btrfs_device *device);
39static int btrfs_relocate_sys_chunks(struct btrfs_root *root); 39static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
40 40
41#define map_lookup_size(n) (sizeof(struct map_lookup) + \
42 (sizeof(struct btrfs_bio_stripe) * (n)))
43
44static DEFINE_MUTEX(uuid_mutex); 41static DEFINE_MUTEX(uuid_mutex);
45static LIST_HEAD(fs_uuids); 42static LIST_HEAD(fs_uuids);
46 43
47void btrfs_lock_volumes(void)
48{
49 mutex_lock(&uuid_mutex);
50}
51
52void btrfs_unlock_volumes(void)
53{
54 mutex_unlock(&uuid_mutex);
55}
56
57static void lock_chunks(struct btrfs_root *root) 44static void lock_chunks(struct btrfs_root *root)
58{ 45{
59 mutex_lock(&root->fs_info->chunk_mutex); 46 mutex_lock(&root->fs_info->chunk_mutex);
@@ -848,10 +835,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
848 /* we don't want to overwrite the superblock on the drive, 835 /* we don't want to overwrite the superblock on the drive,
849 * so we make sure to start at an offset of at least 1MB 836 * so we make sure to start at an offset of at least 1MB
850 */ 837 */
851 search_start = 1024 * 1024; 838 search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
852
853 if (root->fs_info->alloc_start + num_bytes <= search_end)
854 search_start = max(root->fs_info->alloc_start, search_start);
855 839
856 max_hole_start = search_start; 840 max_hole_start = search_start;
857 max_hole_size = 0; 841 max_hole_size = 0;
@@ -1339,6 +1323,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1339 if (ret) 1323 if (ret)
1340 goto error_undo; 1324 goto error_undo;
1341 1325
1326 device->in_fs_metadata = 0;
1327 btrfs_scrub_cancel_dev(root, device);
1328
1342 /* 1329 /*
1343 * the device list mutex makes sure that we don't change 1330 * the device list mutex makes sure that we don't change
1344 * the device list while someone else is writing out all 1331 * the device list while someone else is writing out all
@@ -1521,7 +1508,7 @@ next_slot:
1521 goto error; 1508 goto error;
1522 leaf = path->nodes[0]; 1509 leaf = path->nodes[0];
1523 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1510 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1524 btrfs_release_path(root, path); 1511 btrfs_release_path(path);
1525 continue; 1512 continue;
1526 } 1513 }
1527 1514
@@ -1992,7 +1979,7 @@ again:
1992 chunk = btrfs_item_ptr(leaf, path->slots[0], 1979 chunk = btrfs_item_ptr(leaf, path->slots[0],
1993 struct btrfs_chunk); 1980 struct btrfs_chunk);
1994 chunk_type = btrfs_chunk_type(leaf, chunk); 1981 chunk_type = btrfs_chunk_type(leaf, chunk);
1995 btrfs_release_path(chunk_root, path); 1982 btrfs_release_path(path);
1996 1983
1997 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) { 1984 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
1998 ret = btrfs_relocate_chunk(chunk_root, chunk_tree, 1985 ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
@@ -2110,7 +2097,7 @@ int btrfs_balance(struct btrfs_root *dev_root)
2110 if (found_key.offset == 0) 2097 if (found_key.offset == 0)
2111 break; 2098 break;
2112 2099
2113 btrfs_release_path(chunk_root, path); 2100 btrfs_release_path(path);
2114 ret = btrfs_relocate_chunk(chunk_root, 2101 ret = btrfs_relocate_chunk(chunk_root,
2115 chunk_root->root_key.objectid, 2102 chunk_root->root_key.objectid,
2116 found_key.objectid, 2103 found_key.objectid,
@@ -2182,7 +2169,7 @@ again:
2182 goto done; 2169 goto done;
2183 if (ret) { 2170 if (ret) {
2184 ret = 0; 2171 ret = 0;
2185 btrfs_release_path(root, path); 2172 btrfs_release_path(path);
2186 break; 2173 break;
2187 } 2174 }
2188 2175
@@ -2191,7 +2178,7 @@ again:
2191 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 2178 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
2192 2179
2193 if (key.objectid != device->devid) { 2180 if (key.objectid != device->devid) {
2194 btrfs_release_path(root, path); 2181 btrfs_release_path(path);
2195 break; 2182 break;
2196 } 2183 }
2197 2184
@@ -2199,14 +2186,14 @@ again:
2199 length = btrfs_dev_extent_length(l, dev_extent); 2186 length = btrfs_dev_extent_length(l, dev_extent);
2200 2187
2201 if (key.offset + length <= new_size) { 2188 if (key.offset + length <= new_size) {
2202 btrfs_release_path(root, path); 2189 btrfs_release_path(path);
2203 break; 2190 break;
2204 } 2191 }
2205 2192
2206 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); 2193 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2207 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); 2194 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
2208 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); 2195 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
2209 btrfs_release_path(root, path); 2196 btrfs_release_path(path);
2210 2197
2211 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, 2198 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
2212 chunk_offset); 2199 chunk_offset);
@@ -2282,275 +2269,204 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
2282 return 0; 2269 return 0;
2283} 2270}
2284 2271
2285static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, 2272/*
2286 int num_stripes, int sub_stripes) 2273 * sort the devices in descending order by max_avail, total_avail
2274 */
2275static int btrfs_cmp_device_info(const void *a, const void *b)
2287{ 2276{
2288 if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) 2277 const struct btrfs_device_info *di_a = a;
2289 return calc_size; 2278 const struct btrfs_device_info *di_b = b;
2290 else if (type & BTRFS_BLOCK_GROUP_RAID10)
2291 return calc_size * (num_stripes / sub_stripes);
2292 else
2293 return calc_size * num_stripes;
2294}
2295 2279
2296/* Used to sort the devices by max_avail(descending sort) */ 2280 if (di_a->max_avail > di_b->max_avail)
2297int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2)
2298{
2299 if (((struct btrfs_device_info *)dev_info1)->max_avail >
2300 ((struct btrfs_device_info *)dev_info2)->max_avail)
2301 return -1; 2281 return -1;
2302 else if (((struct btrfs_device_info *)dev_info1)->max_avail < 2282 if (di_a->max_avail < di_b->max_avail)
2303 ((struct btrfs_device_info *)dev_info2)->max_avail)
2304 return 1; 2283 return 1;
2305 else 2284 if (di_a->total_avail > di_b->total_avail)
2306 return 0; 2285 return -1;
2286 if (di_a->total_avail < di_b->total_avail)
2287 return 1;
2288 return 0;
2307} 2289}
2308 2290
2309static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, 2291static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2310 int *num_stripes, int *min_stripes, 2292 struct btrfs_root *extent_root,
2311 int *sub_stripes) 2293 struct map_lookup **map_ret,
2294 u64 *num_bytes_out, u64 *stripe_size_out,
2295 u64 start, u64 type)
2312{ 2296{
2313 *num_stripes = 1; 2297 struct btrfs_fs_info *info = extent_root->fs_info;
2314 *min_stripes = 1; 2298 struct btrfs_fs_devices *fs_devices = info->fs_devices;
2315 *sub_stripes = 0; 2299 struct list_head *cur;
2300 struct map_lookup *map = NULL;
2301 struct extent_map_tree *em_tree;
2302 struct extent_map *em;
2303 struct btrfs_device_info *devices_info = NULL;
2304 u64 total_avail;
2305 int num_stripes; /* total number of stripes to allocate */
2306 int sub_stripes; /* sub_stripes info for map */
2307 int dev_stripes; /* stripes per dev */
2308 int devs_max; /* max devs to use */
2309 int devs_min; /* min devs needed */
2310 int devs_increment; /* ndevs has to be a multiple of this */
2311 int ncopies; /* how many copies to data has */
2312 int ret;
2313 u64 max_stripe_size;
2314 u64 max_chunk_size;
2315 u64 stripe_size;
2316 u64 num_bytes;
2317 int ndevs;
2318 int i;
2319 int j;
2316 2320
2317 if (type & (BTRFS_BLOCK_GROUP_RAID0)) { 2321 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
2318 *num_stripes = fs_devices->rw_devices; 2322 (type & BTRFS_BLOCK_GROUP_DUP)) {
2319 *min_stripes = 2; 2323 WARN_ON(1);
2320 } 2324 type &= ~BTRFS_BLOCK_GROUP_DUP;
2321 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
2322 *num_stripes = 2;
2323 *min_stripes = 2;
2324 }
2325 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
2326 if (fs_devices->rw_devices < 2)
2327 return -ENOSPC;
2328 *num_stripes = 2;
2329 *min_stripes = 2;
2330 }
2331 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2332 *num_stripes = fs_devices->rw_devices;
2333 if (*num_stripes < 4)
2334 return -ENOSPC;
2335 *num_stripes &= ~(u32)1;
2336 *sub_stripes = 2;
2337 *min_stripes = 4;
2338 } 2325 }
2339 2326
2340 return 0; 2327 if (list_empty(&fs_devices->alloc_list))
2341} 2328 return -ENOSPC;
2342 2329
2343static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, 2330 sub_stripes = 1;
2344 u64 proposed_size, u64 type, 2331 dev_stripes = 1;
2345 int num_stripes, int small_stripe) 2332 devs_increment = 1;
2346{ 2333 ncopies = 1;
2347 int min_stripe_size = 1 * 1024 * 1024; 2334 devs_max = 0; /* 0 == as many as possible */
2348 u64 calc_size = proposed_size; 2335 devs_min = 1;
2349 u64 max_chunk_size = calc_size;
2350 int ncopies = 1;
2351 2336
2352 if (type & (BTRFS_BLOCK_GROUP_RAID1 | 2337 /*
2353 BTRFS_BLOCK_GROUP_DUP | 2338 * define the properties of each RAID type.
2354 BTRFS_BLOCK_GROUP_RAID10)) 2339 * FIXME: move this to a global table and use it in all RAID
2340 * calculation code
2341 */
2342 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
2343 dev_stripes = 2;
2344 ncopies = 2;
2345 devs_max = 1;
2346 } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
2347 devs_min = 2;
2348 } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
2349 devs_increment = 2;
2355 ncopies = 2; 2350 ncopies = 2;
2351 devs_max = 2;
2352 devs_min = 2;
2353 } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2354 sub_stripes = 2;
2355 devs_increment = 2;
2356 ncopies = 2;
2357 devs_min = 4;
2358 } else {
2359 devs_max = 1;
2360 }
2356 2361
2357 if (type & BTRFS_BLOCK_GROUP_DATA) { 2362 if (type & BTRFS_BLOCK_GROUP_DATA) {
2358 max_chunk_size = 10 * calc_size; 2363 max_stripe_size = 1024 * 1024 * 1024;
2359 min_stripe_size = 64 * 1024 * 1024; 2364 max_chunk_size = 10 * max_stripe_size;
2360 } else if (type & BTRFS_BLOCK_GROUP_METADATA) { 2365 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2361 max_chunk_size = 256 * 1024 * 1024; 2366 max_stripe_size = 256 * 1024 * 1024;
2362 min_stripe_size = 32 * 1024 * 1024; 2367 max_chunk_size = max_stripe_size;
2363 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { 2368 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2364 calc_size = 8 * 1024 * 1024; 2369 max_stripe_size = 8 * 1024 * 1024;
2365 max_chunk_size = calc_size * 2; 2370 max_chunk_size = 2 * max_stripe_size;
2366 min_stripe_size = 1 * 1024 * 1024; 2371 } else {
2372 printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n",
2373 type);
2374 BUG_ON(1);
2367 } 2375 }
2368 2376
2369 /* we don't want a chunk larger than 10% of writeable space */ 2377 /* we don't want a chunk larger than 10% of writeable space */
2370 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), 2378 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
2371 max_chunk_size); 2379 max_chunk_size);
2372 2380
2373 if (calc_size * num_stripes > max_chunk_size * ncopies) { 2381 devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
2374 calc_size = max_chunk_size * ncopies; 2382 GFP_NOFS);
2375 do_div(calc_size, num_stripes); 2383 if (!devices_info)
2376 do_div(calc_size, BTRFS_STRIPE_LEN); 2384 return -ENOMEM;
2377 calc_size *= BTRFS_STRIPE_LEN;
2378 }
2379 2385
2380 /* we don't want tiny stripes */ 2386 cur = fs_devices->alloc_list.next;
2381 if (!small_stripe)
2382 calc_size = max_t(u64, min_stripe_size, calc_size);
2383 2387
2384 /* 2388 /*
2385 * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure 2389 * in the first pass through the devices list, we gather information
2386 * we end up with something bigger than a stripe 2390 * about the available holes on each device.
2387 */ 2391 */
2388 calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); 2392 ndevs = 0;
2389 2393 while (cur != &fs_devices->alloc_list) {
2390 do_div(calc_size, BTRFS_STRIPE_LEN); 2394 struct btrfs_device *device;
2391 calc_size *= BTRFS_STRIPE_LEN; 2395 u64 max_avail;
2392 2396 u64 dev_offset;
2393 return calc_size;
2394}
2395
2396static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map,
2397 int num_stripes)
2398{
2399 struct map_lookup *new;
2400 size_t len = map_lookup_size(num_stripes);
2401
2402 BUG_ON(map->num_stripes < num_stripes);
2403 2397
2404 if (map->num_stripes == num_stripes) 2398 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
2405 return map;
2406
2407 new = kmalloc(len, GFP_NOFS);
2408 if (!new) {
2409 /* just change map->num_stripes */
2410 map->num_stripes = num_stripes;
2411 return map;
2412 }
2413
2414 memcpy(new, map, len);
2415 new->num_stripes = num_stripes;
2416 kfree(map);
2417 return new;
2418}
2419
2420/*
2421 * helper to allocate device space from btrfs_device_info, in which we stored
2422 * max free space information of every device. It is used when we can not
2423 * allocate chunks by default size.
2424 *
2425 * By this helper, we can allocate a new chunk as larger as possible.
2426 */
2427static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans,
2428 struct btrfs_fs_devices *fs_devices,
2429 struct btrfs_device_info *devices,
2430 int nr_device, u64 type,
2431 struct map_lookup **map_lookup,
2432 int min_stripes, u64 *stripe_size)
2433{
2434 int i, index, sort_again = 0;
2435 int min_devices = min_stripes;
2436 u64 max_avail, min_free;
2437 struct map_lookup *map = *map_lookup;
2438 int ret;
2439 2399
2440 if (nr_device < min_stripes) 2400 cur = cur->next;
2441 return -ENOSPC;
2442 2401
2443 btrfs_descending_sort_devices(devices, nr_device); 2402 if (!device->writeable) {
2403 printk(KERN_ERR
2404 "btrfs: read-only device in alloc_list\n");
2405 WARN_ON(1);
2406 continue;
2407 }
2444 2408
2445 max_avail = devices[0].max_avail; 2409 if (!device->in_fs_metadata)
2446 if (!max_avail) 2410 continue;
2447 return -ENOSPC;
2448 2411
2449 for (i = 0; i < nr_device; i++) { 2412 if (device->total_bytes > device->bytes_used)
2450 /* 2413 total_avail = device->total_bytes - device->bytes_used;
2451 * if dev_offset = 0, it means the free space of this device 2414 else
2452 * is less than what we need, and we didn't search max avail 2415 total_avail = 0;
2453 * extent on this device, so do it now. 2416 /* avail is off by max(alloc_start, 1MB), but that is the same
2417 * for all devices, so it doesn't hurt the sorting later on
2454 */ 2418 */
2455 if (!devices[i].dev_offset) {
2456 ret = find_free_dev_extent(trans, devices[i].dev,
2457 max_avail,
2458 &devices[i].dev_offset,
2459 &devices[i].max_avail);
2460 if (ret != 0 && ret != -ENOSPC)
2461 return ret;
2462 sort_again = 1;
2463 }
2464 }
2465 2419
2466 /* we update the max avail free extent of each devices, sort again */ 2420 ret = find_free_dev_extent(trans, device,
2467 if (sort_again) 2421 max_stripe_size * dev_stripes,
2468 btrfs_descending_sort_devices(devices, nr_device); 2422 &dev_offset, &max_avail);
2469 2423 if (ret && ret != -ENOSPC)
2470 if (type & BTRFS_BLOCK_GROUP_DUP) 2424 goto error;
2471 min_devices = 1;
2472 2425
2473 if (!devices[min_devices - 1].max_avail) 2426 if (ret == 0)
2474 return -ENOSPC; 2427 max_avail = max_stripe_size * dev_stripes;
2475 2428
2476 max_avail = devices[min_devices - 1].max_avail; 2429 if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
2477 if (type & BTRFS_BLOCK_GROUP_DUP) 2430 continue;
2478 do_div(max_avail, 2);
2479 2431
2480 max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, 2432 devices_info[ndevs].dev_offset = dev_offset;
2481 min_stripes, 1); 2433 devices_info[ndevs].max_avail = max_avail;
2482 if (type & BTRFS_BLOCK_GROUP_DUP) 2434 devices_info[ndevs].total_avail = total_avail;
2483 min_free = max_avail * 2; 2435 devices_info[ndevs].dev = device;
2484 else 2436 ++ndevs;
2485 min_free = max_avail; 2437 }
2486 2438
2487 if (min_free > devices[min_devices - 1].max_avail) 2439 /*
2488 return -ENOSPC; 2440 * now sort the devices by hole size / available space
2441 */
2442 sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
2443 btrfs_cmp_device_info, NULL);
2489 2444
2490 map = __shrink_map_lookup_stripes(map, min_stripes); 2445 /* round down to number of usable stripes */
2491 *stripe_size = max_avail; 2446 ndevs -= ndevs % devs_increment;
2492 2447
2493 index = 0; 2448 if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) {
2494 for (i = 0; i < min_stripes; i++) { 2449 ret = -ENOSPC;
2495 map->stripes[i].dev = devices[index].dev; 2450 goto error;
2496 map->stripes[i].physical = devices[index].dev_offset;
2497 if (type & BTRFS_BLOCK_GROUP_DUP) {
2498 i++;
2499 map->stripes[i].dev = devices[index].dev;
2500 map->stripes[i].physical = devices[index].dev_offset +
2501 max_avail;
2502 }
2503 index++;
2504 } 2451 }
2505 *map_lookup = map;
2506 2452
2507 return 0; 2453 if (devs_max && ndevs > devs_max)
2508} 2454 ndevs = devs_max;
2509 2455 /*
2510static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 2456 * the primary goal is to maximize the number of stripes, so use as many
2511 struct btrfs_root *extent_root, 2457 * devices as possible, even if the stripes are not maximum sized.
2512 struct map_lookup **map_ret, 2458 */
2513 u64 *num_bytes, u64 *stripe_size, 2459 stripe_size = devices_info[ndevs-1].max_avail;
2514 u64 start, u64 type) 2460 num_stripes = ndevs * dev_stripes;
2515{
2516 struct btrfs_fs_info *info = extent_root->fs_info;
2517 struct btrfs_device *device = NULL;
2518 struct btrfs_fs_devices *fs_devices = info->fs_devices;
2519 struct list_head *cur;
2520 struct map_lookup *map;
2521 struct extent_map_tree *em_tree;
2522 struct extent_map *em;
2523 struct btrfs_device_info *devices_info;
2524 struct list_head private_devs;
2525 u64 calc_size = 1024 * 1024 * 1024;
2526 u64 min_free;
2527 u64 avail;
2528 u64 dev_offset;
2529 int num_stripes;
2530 int min_stripes;
2531 int sub_stripes;
2532 int min_devices; /* the min number of devices we need */
2533 int i;
2534 int ret;
2535 int index;
2536 2461
2537 if ((type & BTRFS_BLOCK_GROUP_RAID1) && 2462 if (stripe_size * num_stripes > max_chunk_size * ncopies) {
2538 (type & BTRFS_BLOCK_GROUP_DUP)) { 2463 stripe_size = max_chunk_size * ncopies;
2539 WARN_ON(1); 2464 do_div(stripe_size, num_stripes);
2540 type &= ~BTRFS_BLOCK_GROUP_DUP;
2541 } 2465 }
2542 if (list_empty(&fs_devices->alloc_list))
2543 return -ENOSPC;
2544 2466
2545 ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes, 2467 do_div(stripe_size, dev_stripes);
2546 &min_stripes, &sub_stripes); 2468 do_div(stripe_size, BTRFS_STRIPE_LEN);
2547 if (ret) 2469 stripe_size *= BTRFS_STRIPE_LEN;
2548 return ret;
2549
2550 devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
2551 GFP_NOFS);
2552 if (!devices_info)
2553 return -ENOMEM;
2554 2470
2555 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); 2471 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
2556 if (!map) { 2472 if (!map) {
@@ -2559,85 +2475,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2559 } 2475 }
2560 map->num_stripes = num_stripes; 2476 map->num_stripes = num_stripes;
2561 2477
2562 cur = fs_devices->alloc_list.next; 2478 for (i = 0; i < ndevs; ++i) {
2563 index = 0; 2479 for (j = 0; j < dev_stripes; ++j) {
2564 i = 0; 2480 int s = i * dev_stripes + j;
2565 2481 map->stripes[s].dev = devices_info[i].dev;
2566 calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, 2482 map->stripes[s].physical = devices_info[i].dev_offset +
2567 num_stripes, 0); 2483 j * stripe_size;
2568
2569 if (type & BTRFS_BLOCK_GROUP_DUP) {
2570 min_free = calc_size * 2;
2571 min_devices = 1;
2572 } else {
2573 min_free = calc_size;
2574 min_devices = min_stripes;
2575 }
2576
2577 INIT_LIST_HEAD(&private_devs);
2578 while (index < num_stripes) {
2579 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
2580 BUG_ON(!device->writeable);
2581 if (device->total_bytes > device->bytes_used)
2582 avail = device->total_bytes - device->bytes_used;
2583 else
2584 avail = 0;
2585 cur = cur->next;
2586
2587 if (device->in_fs_metadata && avail >= min_free) {
2588 ret = find_free_dev_extent(trans, device, min_free,
2589 &devices_info[i].dev_offset,
2590 &devices_info[i].max_avail);
2591 if (ret == 0) {
2592 list_move_tail(&device->dev_alloc_list,
2593 &private_devs);
2594 map->stripes[index].dev = device;
2595 map->stripes[index].physical =
2596 devices_info[i].dev_offset;
2597 index++;
2598 if (type & BTRFS_BLOCK_GROUP_DUP) {
2599 map->stripes[index].dev = device;
2600 map->stripes[index].physical =
2601 devices_info[i].dev_offset +
2602 calc_size;
2603 index++;
2604 }
2605 } else if (ret != -ENOSPC)
2606 goto error;
2607
2608 devices_info[i].dev = device;
2609 i++;
2610 } else if (device->in_fs_metadata &&
2611 avail >= BTRFS_STRIPE_LEN) {
2612 devices_info[i].dev = device;
2613 devices_info[i].max_avail = avail;
2614 i++;
2615 }
2616
2617 if (cur == &fs_devices->alloc_list)
2618 break;
2619 }
2620
2621 list_splice(&private_devs, &fs_devices->alloc_list);
2622 if (index < num_stripes) {
2623 if (index >= min_stripes) {
2624 num_stripes = index;
2625 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2626 num_stripes /= sub_stripes;
2627 num_stripes *= sub_stripes;
2628 }
2629
2630 map = __shrink_map_lookup_stripes(map, num_stripes);
2631 } else if (i >= min_devices) {
2632 ret = __btrfs_alloc_tiny_space(trans, fs_devices,
2633 devices_info, i, type,
2634 &map, min_stripes,
2635 &calc_size);
2636 if (ret)
2637 goto error;
2638 } else {
2639 ret = -ENOSPC;
2640 goto error;
2641 } 2484 }
2642 } 2485 }
2643 map->sector_size = extent_root->sectorsize; 2486 map->sector_size = extent_root->sectorsize;
@@ -2648,20 +2491,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2648 map->sub_stripes = sub_stripes; 2491 map->sub_stripes = sub_stripes;
2649 2492
2650 *map_ret = map; 2493 *map_ret = map;
2651 *stripe_size = calc_size; 2494 num_bytes = stripe_size * (num_stripes / ncopies);
2652 *num_bytes = chunk_bytes_by_type(type, calc_size,
2653 map->num_stripes, sub_stripes);
2654 2495
2655 trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes); 2496 *stripe_size_out = stripe_size;
2497 *num_bytes_out = num_bytes;
2656 2498
2657 em = alloc_extent_map(GFP_NOFS); 2499 trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes);
2500
2501 em = alloc_extent_map();
2658 if (!em) { 2502 if (!em) {
2659 ret = -ENOMEM; 2503 ret = -ENOMEM;
2660 goto error; 2504 goto error;
2661 } 2505 }
2662 em->bdev = (struct block_device *)map; 2506 em->bdev = (struct block_device *)map;
2663 em->start = start; 2507 em->start = start;
2664 em->len = *num_bytes; 2508 em->len = num_bytes;
2665 em->block_start = 0; 2509 em->block_start = 0;
2666 em->block_len = em->len; 2510 em->block_len = em->len;
2667 2511
@@ -2674,20 +2518,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2674 2518
2675 ret = btrfs_make_block_group(trans, extent_root, 0, type, 2519 ret = btrfs_make_block_group(trans, extent_root, 0, type,
2676 BTRFS_FIRST_CHUNK_TREE_OBJECTID, 2520 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2677 start, *num_bytes); 2521 start, num_bytes);
2678 BUG_ON(ret); 2522 BUG_ON(ret);
2679 2523
2680 index = 0; 2524 for (i = 0; i < map->num_stripes; ++i) {
2681 while (index < map->num_stripes) { 2525 struct btrfs_device *device;
2682 device = map->stripes[index].dev; 2526 u64 dev_offset;
2683 dev_offset = map->stripes[index].physical; 2527
2528 device = map->stripes[i].dev;
2529 dev_offset = map->stripes[i].physical;
2684 2530
2685 ret = btrfs_alloc_dev_extent(trans, device, 2531 ret = btrfs_alloc_dev_extent(trans, device,
2686 info->chunk_root->root_key.objectid, 2532 info->chunk_root->root_key.objectid,
2687 BTRFS_FIRST_CHUNK_TREE_OBJECTID, 2533 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2688 start, dev_offset, calc_size); 2534 start, dev_offset, stripe_size);
2689 BUG_ON(ret); 2535 BUG_ON(ret);
2690 index++;
2691 } 2536 }
2692 2537
2693 kfree(devices_info); 2538 kfree(devices_info);
@@ -2894,7 +2739,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2894 2739
2895void btrfs_mapping_init(struct btrfs_mapping_tree *tree) 2740void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
2896{ 2741{
2897 extent_map_tree_init(&tree->map_tree, GFP_NOFS); 2742 extent_map_tree_init(&tree->map_tree);
2898} 2743}
2899 2744
2900void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) 2745void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
@@ -3544,7 +3389,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
3544 free_extent_map(em); 3389 free_extent_map(em);
3545 } 3390 }
3546 3391
3547 em = alloc_extent_map(GFP_NOFS); 3392 em = alloc_extent_map();
3548 if (!em) 3393 if (!em)
3549 return -ENOMEM; 3394 return -ENOMEM;
3550 num_stripes = btrfs_chunk_num_stripes(leaf, chunk); 3395 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
@@ -3733,15 +3578,6 @@ static int read_one_dev(struct btrfs_root *root,
3733 return ret; 3578 return ret;
3734} 3579}
3735 3580
3736int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
3737{
3738 struct btrfs_dev_item *dev_item;
3739
3740 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
3741 dev_item);
3742 return read_one_dev(root, buf, dev_item);
3743}
3744
3745int btrfs_read_sys_array(struct btrfs_root *root) 3581int btrfs_read_sys_array(struct btrfs_root *root)
3746{ 3582{
3747 struct btrfs_super_block *super_copy = &root->fs_info->super_copy; 3583 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
@@ -3858,7 +3694,7 @@ again:
3858 } 3694 }
3859 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { 3695 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
3860 key.objectid = 0; 3696 key.objectid = 0;
3861 btrfs_release_path(root, path); 3697 btrfs_release_path(path);
3862 goto again; 3698 goto again;
3863 } 3699 }
3864 ret = 0; 3700 ret = 0;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index f1b2e4f53fc2..7c12d61ae7ae 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -85,6 +85,9 @@ struct btrfs_device {
85 /* physical drive uuid (or lvm uuid) */ 85 /* physical drive uuid (or lvm uuid) */
86 u8 uuid[BTRFS_UUID_SIZE]; 86 u8 uuid[BTRFS_UUID_SIZE];
87 87
88 /* per-device scrub information */
89 struct scrub_dev *scrub_device;
90
88 struct btrfs_work work; 91 struct btrfs_work work;
89 struct rcu_head rcu; 92 struct rcu_head rcu;
90 struct work_struct rcu_work; 93 struct work_struct rcu_work;
@@ -146,6 +149,7 @@ struct btrfs_device_info {
146 struct btrfs_device *dev; 149 struct btrfs_device *dev;
147 u64 dev_offset; 150 u64 dev_offset;
148 u64 max_avail; 151 u64 max_avail;
152 u64 total_avail;
149}; 153};
150 154
151struct map_lookup { 155struct map_lookup {
@@ -159,20 +163,8 @@ struct map_lookup {
159 struct btrfs_bio_stripe stripes[]; 163 struct btrfs_bio_stripe stripes[];
160}; 164};
161 165
162/* Used to sort the devices by max_avail(descending sort) */ 166#define map_lookup_size(n) (sizeof(struct map_lookup) + \
163int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); 167 (sizeof(struct btrfs_bio_stripe) * (n)))
164
165/*
166 * sort the devices by max_avail, in which max free extent size of each device
167 * is stored.(Descending Sort)
168 */
169static inline void btrfs_descending_sort_devices(
170 struct btrfs_device_info *devices,
171 size_t nr_devices)
172{
173 sort(devices, nr_devices, sizeof(struct btrfs_device_info),
174 btrfs_cmp_device_free_bytes, NULL);
175}
176 168
177int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, 169int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
178 u64 end, u64 *length); 170 u64 end, u64 *length);
@@ -198,7 +190,6 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
198void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); 190void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
199int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, 191int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
200 int mirror_num, int async_submit); 192 int mirror_num, int async_submit);
201int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf);
202int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 193int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
203 fmode_t flags, void *holder); 194 fmode_t flags, void *holder);
204int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, 195int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
@@ -211,8 +202,6 @@ int btrfs_add_device(struct btrfs_trans_handle *trans,
211int btrfs_rm_device(struct btrfs_root *root, char *device_path); 202int btrfs_rm_device(struct btrfs_root *root, char *device_path);
212int btrfs_cleanup_fs_uuids(void); 203int btrfs_cleanup_fs_uuids(void);
213int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); 204int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
214int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
215 u64 logical, struct page *page);
216int btrfs_grow_device(struct btrfs_trans_handle *trans, 205int btrfs_grow_device(struct btrfs_trans_handle *trans,
217 struct btrfs_device *device, u64 new_size); 206 struct btrfs_device *device, u64 new_size);
218struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, 207struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
@@ -220,8 +209,6 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
220int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); 209int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
221int btrfs_init_new_device(struct btrfs_root *root, char *path); 210int btrfs_init_new_device(struct btrfs_root *root, char *path);
222int btrfs_balance(struct btrfs_root *dev_root); 211int btrfs_balance(struct btrfs_root *dev_root);
223void btrfs_unlock_volumes(void);
224void btrfs_lock_volumes(void);
225int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 212int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
226int find_free_dev_extent(struct btrfs_trans_handle *trans, 213int find_free_dev_extent(struct btrfs_trans_handle *trans,
227 struct btrfs_device *device, u64 num_bytes, 214 struct btrfs_device *device, u64 num_bytes,
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index cfd660550ded..f3107e4b4d56 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -44,7 +44,7 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
44 return -ENOMEM; 44 return -ENOMEM;
45 45
46 /* lookup the xattr by name */ 46 /* lookup the xattr by name */
47 di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, 47 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), name,
48 strlen(name), 0); 48 strlen(name), 0);
49 if (!di) { 49 if (!di) {
50 ret = -ENODATA; 50 ret = -ENODATA;
@@ -103,7 +103,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
103 return -ENOMEM; 103 return -ENOMEM;
104 104
105 /* first lets see if we already have this xattr */ 105 /* first lets see if we already have this xattr */
106 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, 106 di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
107 strlen(name), -1); 107 strlen(name), -1);
108 if (IS_ERR(di)) { 108 if (IS_ERR(di)) {
109 ret = PTR_ERR(di); 109 ret = PTR_ERR(di);
@@ -120,13 +120,13 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
120 120
121 ret = btrfs_delete_one_dir_name(trans, root, path, di); 121 ret = btrfs_delete_one_dir_name(trans, root, path, di);
122 BUG_ON(ret); 122 BUG_ON(ret);
123 btrfs_release_path(root, path); 123 btrfs_release_path(path);
124 124
125 /* if we don't have a value then we are removing the xattr */ 125 /* if we don't have a value then we are removing the xattr */
126 if (!value) 126 if (!value)
127 goto out; 127 goto out;
128 } else { 128 } else {
129 btrfs_release_path(root, path); 129 btrfs_release_path(path);
130 130
131 if (flags & XATTR_REPLACE) { 131 if (flags & XATTR_REPLACE) {
132 /* we couldn't find the attr to replace */ 132 /* we couldn't find the attr to replace */
@@ -136,7 +136,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
136 } 136 }
137 137
138 /* ok we have to create a completely new xattr */ 138 /* ok we have to create a completely new xattr */
139 ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino, 139 ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
140 name, name_len, value, size); 140 name, name_len, value, size);
141 BUG_ON(ret); 141 BUG_ON(ret);
142out: 142out:
@@ -190,7 +190,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
190 * NOTE: we set key.offset = 0; because we want to start with the 190 * NOTE: we set key.offset = 0; because we want to start with the
191 * first xattr that we find and walk forward 191 * first xattr that we find and walk forward
192 */ 192 */
193 key.objectid = inode->i_ino; 193 key.objectid = btrfs_ino(inode);
194 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); 194 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
195 key.offset = 0; 195 key.offset = 0;
196 196