aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/acl.c2
-rw-r--r--fs/btrfs/btrfs_inode.h15
-rw-r--r--fs/btrfs/compression.c47
-rw-r--r--fs/btrfs/compression.h2
-rw-r--r--fs/btrfs/ctree.c51
-rw-r--r--fs/btrfs/ctree.h244
-rw-r--r--fs/btrfs/delayed-inode.c1695
-rw-r--r--fs/btrfs/delayed-inode.h141
-rw-r--r--fs/btrfs/delayed-ref.c114
-rw-r--r--fs/btrfs/delayed-ref.h6
-rw-r--r--fs/btrfs/dir-item.c39
-rw-r--r--fs/btrfs/disk-io.c210
-rw-r--r--fs/btrfs/disk-io.h19
-rw-r--r--fs/btrfs/export.c25
-rw-r--r--fs/btrfs/extent-tree.c1788
-rw-r--r--fs/btrfs/extent_io.c324
-rw-r--r--fs/btrfs/extent_io.h40
-rw-r--r--fs/btrfs/extent_map.c8
-rw-r--r--fs/btrfs/extent_map.h4
-rw-r--r--fs/btrfs/file-item.c38
-rw-r--r--fs/btrfs/file.c302
-rw-r--r--fs/btrfs/free-space-cache.c993
-rw-r--r--fs/btrfs/free-space-cache.h48
-rw-r--r--fs/btrfs/inode-item.c2
-rw-r--r--fs/btrfs/inode-map.c444
-rw-r--r--fs/btrfs/inode-map.h13
-rw-r--r--fs/btrfs/inode.c700
-rw-r--r--fs/btrfs/ioctl.c624
-rw-r--r--fs/btrfs/ioctl.h107
-rw-r--r--fs/btrfs/locking.c25
-rw-r--r--fs/btrfs/locking.h2
-rw-r--r--fs/btrfs/ref-cache.c164
-rw-r--r--fs/btrfs/ref-cache.h24
-rw-r--r--fs/btrfs/relocation.c67
-rw-r--r--fs/btrfs/root-tree.c61
-rw-r--r--fs/btrfs/scrub.c1369
-rw-r--r--fs/btrfs/super.c51
-rw-r--r--fs/btrfs/sysfs.c77
-rw-r--r--fs/btrfs/transaction.c196
-rw-r--r--fs/btrfs/transaction.h5
-rw-r--r--fs/btrfs/tree-defrag.c2
-rw-r--r--fs/btrfs/tree-log.c208
-rw-r--r--fs/btrfs/tree-log.h1
-rw-r--r--fs/btrfs/version.sh43
-rw-r--r--fs/btrfs/volumes.c657
-rw-r--r--fs/btrfs/volumes.h27
-rw-r--r--fs/btrfs/xattr.c12
-rw-r--r--fs/cifs/Kconfig20
-rw-r--r--fs/cifs/README3
-rw-r--r--fs/cifs/cache.c6
-rw-r--r--fs/cifs/cifs_debug.c26
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifs_fs_sb.h3
-rw-r--r--fs/cifs/cifs_spnego.c2
-rw-r--r--fs/cifs/cifs_spnego.h2
-rw-r--r--fs/cifs/cifsacl.c9
-rw-r--r--fs/cifs/cifsencrypt.c14
-rw-r--r--fs/cifs/cifsfs.c233
-rw-r--r--fs/cifs/cifsglob.h129
-rw-r--r--fs/cifs/cifsproto.h209
-rw-r--r--fs/cifs/cifssmb.c463
-rw-r--r--fs/cifs/connect.c625
-rw-r--r--fs/cifs/dir.c33
-rw-r--r--fs/cifs/file.c376
-rw-r--r--fs/cifs/fscache.c6
-rw-r--r--fs/cifs/fscache.h8
-rw-r--r--fs/cifs/inode.c92
-rw-r--r--fs/cifs/ioctl.c2
-rw-r--r--fs/cifs/link.c46
-rw-r--r--fs/cifs/misc.c32
-rw-r--r--fs/cifs/netmisc.c2
-rw-r--r--fs/cifs/readdir.c8
-rw-r--r--fs/cifs/sess.c42
-rw-r--r--fs/cifs/transport.c214
-rw-r--r--fs/cifs/xattr.c8
-rw-r--r--fs/dlm/main.c2
-rw-r--r--fs/ecryptfs/inode.c2
-rw-r--r--fs/ecryptfs/keystore.c46
-rw-r--r--fs/exec.c39
-rw-r--r--fs/gfs2/main.c2
-rw-r--r--fs/jffs2/dir.c4
-rw-r--r--fs/jffs2/scan.c19
-rw-r--r--fs/ncpfs/mmap.c2
-rw-r--r--fs/ocfs2/move_extents.c41
-rw-r--r--fs/partitions/check.c8
-rw-r--r--fs/partitions/efi.c9
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/base.c83
-rw-r--r--fs/proc/stat.c6
-rw-r--r--fs/proc/task_mmu.c27
-rw-r--r--fs/proc/vmcore.c52
-rw-r--r--fs/squashfs/block.c2
-rw-r--r--fs/squashfs/cache.c31
-rw-r--r--fs/squashfs/decompressor.c2
-rw-r--r--fs/squashfs/decompressor.h2
-rw-r--r--fs/squashfs/dir.c2
-rw-r--r--fs/squashfs/export.c42
-rw-r--r--fs/squashfs/file.c2
-rw-r--r--fs/squashfs/fragment.c37
-rw-r--r--fs/squashfs/id.c42
-rw-r--r--fs/squashfs/inode.c2
-rw-r--r--fs/squashfs/namei.c2
-rw-r--r--fs/squashfs/squashfs.h10
-rw-r--r--fs/squashfs/squashfs_fs.h2
-rw-r--r--fs/squashfs/squashfs_fs_i.h2
-rw-r--r--fs/squashfs/squashfs_fs_sb.h2
-rw-r--r--fs/squashfs/super.c112
-rw-r--r--fs/squashfs/symlink.c2
-rw-r--r--fs/squashfs/xattr.c2
-rw-r--r--fs/squashfs/xattr.h3
-rw-r--r--fs/squashfs/xattr_id.c47
-rw-r--r--fs/squashfs/xz_wrapper.c2
-rw-r--r--fs/squashfs/zlib_wrapper.c2
-rw-r--r--fs/ufs/balloc.c9
-rw-r--r--fs/ufs/truncate.c2
116 files changed, 8455 insertions, 5907 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 31610ea73aec..9b72dcf1cd25 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -7,4 +7,4 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ 9 export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
10 compression.o delayed-ref.o relocation.o 10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 44ea5b92e1ba..f66fc9959733 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -288,7 +288,7 @@ int btrfs_acl_chmod(struct inode *inode)
288 return 0; 288 return 0;
289 289
290 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); 290 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
291 if (IS_ERR(acl) || !acl) 291 if (IS_ERR_OR_NULL(acl))
292 return PTR_ERR(acl); 292 return PTR_ERR(acl);
293 293
294 clone = posix_acl_clone(acl, GFP_KERNEL); 294 clone = posix_acl_clone(acl, GFP_KERNEL);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 57c3bb2884ce..93b1aa932014 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -22,6 +22,7 @@
22#include "extent_map.h" 22#include "extent_map.h"
23#include "extent_io.h" 23#include "extent_io.h"
24#include "ordered-data.h" 24#include "ordered-data.h"
25#include "delayed-inode.h"
25 26
26/* in memory btrfs inode */ 27/* in memory btrfs inode */
27struct btrfs_inode { 28struct btrfs_inode {
@@ -152,20 +153,34 @@ struct btrfs_inode {
152 unsigned ordered_data_close:1; 153 unsigned ordered_data_close:1;
153 unsigned orphan_meta_reserved:1; 154 unsigned orphan_meta_reserved:1;
154 unsigned dummy_inode:1; 155 unsigned dummy_inode:1;
156 unsigned in_defrag:1;
155 157
156 /* 158 /*
157 * always compress this one file 159 * always compress this one file
158 */ 160 */
159 unsigned force_compress:4; 161 unsigned force_compress:4;
160 162
163 struct btrfs_delayed_node *delayed_node;
164
161 struct inode vfs_inode; 165 struct inode vfs_inode;
162}; 166};
163 167
168extern unsigned char btrfs_filetype_table[];
169
164static inline struct btrfs_inode *BTRFS_I(struct inode *inode) 170static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
165{ 171{
166 return container_of(inode, struct btrfs_inode, vfs_inode); 172 return container_of(inode, struct btrfs_inode, vfs_inode);
167} 173}
168 174
175static inline u64 btrfs_ino(struct inode *inode)
176{
177 u64 ino = BTRFS_I(inode)->location.objectid;
178
179 if (ino <= BTRFS_FIRST_FREE_OBJECTID)
180 ino = inode->i_ino;
181 return ino;
182}
183
169static inline void btrfs_i_size_write(struct inode *inode, u64 size) 184static inline void btrfs_i_size_write(struct inode *inode, u64 size)
170{ 185{
171 i_size_write(inode, size); 186 i_size_write(inode, size);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 41d1d7c70e29..bfe42b03eaf9 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -125,9 +125,10 @@ static int check_compressed_csum(struct inode *inode,
125 kunmap_atomic(kaddr, KM_USER0); 125 kunmap_atomic(kaddr, KM_USER0);
126 126
127 if (csum != *cb_sum) { 127 if (csum != *cb_sum) {
128 printk(KERN_INFO "btrfs csum failed ino %lu " 128 printk(KERN_INFO "btrfs csum failed ino %llu "
129 "extent %llu csum %u " 129 "extent %llu csum %u "
130 "wanted %u mirror %d\n", inode->i_ino, 130 "wanted %u mirror %d\n",
131 (unsigned long long)btrfs_ino(inode),
131 (unsigned long long)disk_start, 132 (unsigned long long)disk_start,
132 csum, *cb_sum, cb->mirror_num); 133 csum, *cb_sum, cb->mirror_num);
133 ret = -EIO; 134 ret = -EIO;
@@ -332,7 +333,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
332 struct compressed_bio *cb; 333 struct compressed_bio *cb;
333 unsigned long bytes_left; 334 unsigned long bytes_left;
334 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 335 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
335 int page_index = 0; 336 int pg_index = 0;
336 struct page *page; 337 struct page *page;
337 u64 first_byte = disk_start; 338 u64 first_byte = disk_start;
338 struct block_device *bdev; 339 struct block_device *bdev;
@@ -366,8 +367,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
366 367
367 /* create and submit bios for the compressed pages */ 368 /* create and submit bios for the compressed pages */
368 bytes_left = compressed_len; 369 bytes_left = compressed_len;
369 for (page_index = 0; page_index < cb->nr_pages; page_index++) { 370 for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
370 page = compressed_pages[page_index]; 371 page = compressed_pages[pg_index];
371 page->mapping = inode->i_mapping; 372 page->mapping = inode->i_mapping;
372 if (bio->bi_size) 373 if (bio->bi_size)
373 ret = io_tree->ops->merge_bio_hook(page, 0, 374 ret = io_tree->ops->merge_bio_hook(page, 0,
@@ -432,7 +433,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
432 struct compressed_bio *cb) 433 struct compressed_bio *cb)
433{ 434{
434 unsigned long end_index; 435 unsigned long end_index;
435 unsigned long page_index; 436 unsigned long pg_index;
436 u64 last_offset; 437 u64 last_offset;
437 u64 isize = i_size_read(inode); 438 u64 isize = i_size_read(inode);
438 int ret; 439 int ret;
@@ -456,13 +457,13 @@ static noinline int add_ra_bio_pages(struct inode *inode,
456 end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 457 end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
457 458
458 while (last_offset < compressed_end) { 459 while (last_offset < compressed_end) {
459 page_index = last_offset >> PAGE_CACHE_SHIFT; 460 pg_index = last_offset >> PAGE_CACHE_SHIFT;
460 461
461 if (page_index > end_index) 462 if (pg_index > end_index)
462 break; 463 break;
463 464
464 rcu_read_lock(); 465 rcu_read_lock();
465 page = radix_tree_lookup(&mapping->page_tree, page_index); 466 page = radix_tree_lookup(&mapping->page_tree, pg_index);
466 rcu_read_unlock(); 467 rcu_read_unlock();
467 if (page) { 468 if (page) {
468 misses++; 469 misses++;
@@ -476,7 +477,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
476 if (!page) 477 if (!page)
477 break; 478 break;
478 479
479 if (add_to_page_cache_lru(page, mapping, page_index, 480 if (add_to_page_cache_lru(page, mapping, pg_index,
480 GFP_NOFS)) { 481 GFP_NOFS)) {
481 page_cache_release(page); 482 page_cache_release(page);
482 goto next; 483 goto next;
@@ -560,7 +561,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
560 unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; 561 unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
561 unsigned long compressed_len; 562 unsigned long compressed_len;
562 unsigned long nr_pages; 563 unsigned long nr_pages;
563 unsigned long page_index; 564 unsigned long pg_index;
564 struct page *page; 565 struct page *page;
565 struct block_device *bdev; 566 struct block_device *bdev;
566 struct bio *comp_bio; 567 struct bio *comp_bio;
@@ -613,10 +614,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
613 614
614 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 615 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
615 616
616 for (page_index = 0; page_index < nr_pages; page_index++) { 617 for (pg_index = 0; pg_index < nr_pages; pg_index++) {
617 cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | 618 cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS |
618 __GFP_HIGHMEM); 619 __GFP_HIGHMEM);
619 if (!cb->compressed_pages[page_index]) 620 if (!cb->compressed_pages[pg_index])
620 goto fail2; 621 goto fail2;
621 } 622 }
622 cb->nr_pages = nr_pages; 623 cb->nr_pages = nr_pages;
@@ -634,8 +635,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
634 comp_bio->bi_end_io = end_compressed_bio_read; 635 comp_bio->bi_end_io = end_compressed_bio_read;
635 atomic_inc(&cb->pending_bios); 636 atomic_inc(&cb->pending_bios);
636 637
637 for (page_index = 0; page_index < nr_pages; page_index++) { 638 for (pg_index = 0; pg_index < nr_pages; pg_index++) {
638 page = cb->compressed_pages[page_index]; 639 page = cb->compressed_pages[pg_index];
639 page->mapping = inode->i_mapping; 640 page->mapping = inode->i_mapping;
640 page->index = em_start >> PAGE_CACHE_SHIFT; 641 page->index = em_start >> PAGE_CACHE_SHIFT;
641 642
@@ -702,8 +703,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
702 return 0; 703 return 0;
703 704
704fail2: 705fail2:
705 for (page_index = 0; page_index < nr_pages; page_index++) 706 for (pg_index = 0; pg_index < nr_pages; pg_index++)
706 free_page((unsigned long)cb->compressed_pages[page_index]); 707 free_page((unsigned long)cb->compressed_pages[pg_index]);
707 708
708 kfree(cb->compressed_pages); 709 kfree(cb->compressed_pages);
709fail1: 710fail1:
@@ -945,7 +946,7 @@ void btrfs_exit_compress(void)
945int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, 946int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
946 unsigned long total_out, u64 disk_start, 947 unsigned long total_out, u64 disk_start,
947 struct bio_vec *bvec, int vcnt, 948 struct bio_vec *bvec, int vcnt,
948 unsigned long *page_index, 949 unsigned long *pg_index,
949 unsigned long *pg_offset) 950 unsigned long *pg_offset)
950{ 951{
951 unsigned long buf_offset; 952 unsigned long buf_offset;
@@ -954,7 +955,7 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
954 unsigned long working_bytes = total_out - buf_start; 955 unsigned long working_bytes = total_out - buf_start;
955 unsigned long bytes; 956 unsigned long bytes;
956 char *kaddr; 957 char *kaddr;
957 struct page *page_out = bvec[*page_index].bv_page; 958 struct page *page_out = bvec[*pg_index].bv_page;
958 959
959 /* 960 /*
960 * start byte is the first byte of the page we're currently 961 * start byte is the first byte of the page we're currently
@@ -995,11 +996,11 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
995 996
996 /* check if we need to pick another page */ 997 /* check if we need to pick another page */
997 if (*pg_offset == PAGE_CACHE_SIZE) { 998 if (*pg_offset == PAGE_CACHE_SIZE) {
998 (*page_index)++; 999 (*pg_index)++;
999 if (*page_index >= vcnt) 1000 if (*pg_index >= vcnt)
1000 return 0; 1001 return 0;
1001 1002
1002 page_out = bvec[*page_index].bv_page; 1003 page_out = bvec[*pg_index].bv_page;
1003 *pg_offset = 0; 1004 *pg_offset = 0;
1004 start_byte = page_offset(page_out) - disk_start; 1005 start_byte = page_offset(page_out) - disk_start;
1005 1006
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 51000174b9d7..a12059f4f0fd 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -37,7 +37,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
37int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, 37int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
38 unsigned long total_out, u64 disk_start, 38 unsigned long total_out, u64 disk_start,
39 struct bio_vec *bvec, int vcnt, 39 struct bio_vec *bvec, int vcnt,
40 unsigned long *page_index, 40 unsigned long *pg_index,
41 unsigned long *pg_offset); 41 unsigned long *pg_offset);
42 42
43int btrfs_submit_compressed_write(struct inode *inode, u64 start, 43int btrfs_submit_compressed_write(struct inode *inode, u64 start,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 84d7ca1fe0ba..b0e18d986e0a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -38,11 +38,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
38 struct extent_buffer *src_buf); 38 struct extent_buffer *src_buf);
39static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 39static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
40 struct btrfs_path *path, int level, int slot); 40 struct btrfs_path *path, int level, int slot);
41static int setup_items_for_insert(struct btrfs_trans_handle *trans,
42 struct btrfs_root *root, struct btrfs_path *path,
43 struct btrfs_key *cpu_key, u32 *data_size,
44 u32 total_data, u32 total_size, int nr);
45
46 41
47struct btrfs_path *btrfs_alloc_path(void) 42struct btrfs_path *btrfs_alloc_path(void)
48{ 43{
@@ -107,7 +102,7 @@ void btrfs_free_path(struct btrfs_path *p)
107{ 102{
108 if (!p) 103 if (!p)
109 return; 104 return;
110 btrfs_release_path(NULL, p); 105 btrfs_release_path(p);
111 kmem_cache_free(btrfs_path_cachep, p); 106 kmem_cache_free(btrfs_path_cachep, p);
112} 107}
113 108
@@ -117,7 +112,7 @@ void btrfs_free_path(struct btrfs_path *p)
117 * 112 *
118 * It is safe to call this on paths that no locks or extent buffers held. 113 * It is safe to call this on paths that no locks or extent buffers held.
119 */ 114 */
120noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) 115noinline void btrfs_release_path(struct btrfs_path *p)
121{ 116{
122 int i; 117 int i;
123 118
@@ -1328,7 +1323,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1328 ret = -EAGAIN; 1323 ret = -EAGAIN;
1329 1324
1330 /* release the whole path */ 1325 /* release the whole path */
1331 btrfs_release_path(root, path); 1326 btrfs_release_path(path);
1332 1327
1333 /* read the blocks */ 1328 /* read the blocks */
1334 if (block1) 1329 if (block1)
@@ -1475,7 +1470,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1475 return 0; 1470 return 0;
1476 } 1471 }
1477 free_extent_buffer(tmp); 1472 free_extent_buffer(tmp);
1478 btrfs_release_path(NULL, p); 1473 btrfs_release_path(p);
1479 return -EIO; 1474 return -EIO;
1480 } 1475 }
1481 } 1476 }
@@ -1494,7 +1489,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1494 if (p->reada) 1489 if (p->reada)
1495 reada_for_search(root, p, level, slot, key->objectid); 1490 reada_for_search(root, p, level, slot, key->objectid);
1496 1491
1497 btrfs_release_path(NULL, p); 1492 btrfs_release_path(p);
1498 1493
1499 ret = -EAGAIN; 1494 ret = -EAGAIN;
1500 tmp = read_tree_block(root, blocknr, blocksize, 0); 1495 tmp = read_tree_block(root, blocknr, blocksize, 0);
@@ -1563,7 +1558,7 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
1563 } 1558 }
1564 b = p->nodes[level]; 1559 b = p->nodes[level];
1565 if (!b) { 1560 if (!b) {
1566 btrfs_release_path(NULL, p); 1561 btrfs_release_path(p);
1567 goto again; 1562 goto again;
1568 } 1563 }
1569 BUG_ON(btrfs_header_nritems(b) == 1); 1564 BUG_ON(btrfs_header_nritems(b) == 1);
@@ -1753,7 +1748,7 @@ done:
1753 if (!p->leave_spinning) 1748 if (!p->leave_spinning)
1754 btrfs_set_path_blocking(p); 1749 btrfs_set_path_blocking(p);
1755 if (ret < 0) 1750 if (ret < 0)
1756 btrfs_release_path(root, p); 1751 btrfs_release_path(p);
1757 return ret; 1752 return ret;
1758} 1753}
1759 1754
@@ -3026,7 +3021,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3026 struct btrfs_file_extent_item); 3021 struct btrfs_file_extent_item);
3027 extent_len = btrfs_file_extent_num_bytes(leaf, fi); 3022 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3028 } 3023 }
3029 btrfs_release_path(root, path); 3024 btrfs_release_path(path);
3030 3025
3031 path->keep_locks = 1; 3026 path->keep_locks = 1;
3032 path->search_for_split = 1; 3027 path->search_for_split = 1;
@@ -3216,7 +3211,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3216 struct btrfs_path *path, 3211 struct btrfs_path *path,
3217 u32 new_size, int from_end) 3212 u32 new_size, int from_end)
3218{ 3213{
3219 int ret = 0;
3220 int slot; 3214 int slot;
3221 struct extent_buffer *leaf; 3215 struct extent_buffer *leaf;
3222 struct btrfs_item *item; 3216 struct btrfs_item *item;
@@ -3314,12 +3308,11 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3314 btrfs_set_item_size(leaf, item, new_size); 3308 btrfs_set_item_size(leaf, item, new_size);
3315 btrfs_mark_buffer_dirty(leaf); 3309 btrfs_mark_buffer_dirty(leaf);
3316 3310
3317 ret = 0;
3318 if (btrfs_leaf_free_space(root, leaf) < 0) { 3311 if (btrfs_leaf_free_space(root, leaf) < 0) {
3319 btrfs_print_leaf(root, leaf); 3312 btrfs_print_leaf(root, leaf);
3320 BUG(); 3313 BUG();
3321 } 3314 }
3322 return ret; 3315 return 0;
3323} 3316}
3324 3317
3325/* 3318/*
@@ -3329,7 +3322,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3329 struct btrfs_root *root, struct btrfs_path *path, 3322 struct btrfs_root *root, struct btrfs_path *path,
3330 u32 data_size) 3323 u32 data_size)
3331{ 3324{
3332 int ret = 0;
3333 int slot; 3325 int slot;
3334 struct extent_buffer *leaf; 3326 struct extent_buffer *leaf;
3335 struct btrfs_item *item; 3327 struct btrfs_item *item;
@@ -3394,12 +3386,11 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3394 btrfs_set_item_size(leaf, item, old_size + data_size); 3386 btrfs_set_item_size(leaf, item, old_size + data_size);
3395 btrfs_mark_buffer_dirty(leaf); 3387 btrfs_mark_buffer_dirty(leaf);
3396 3388
3397 ret = 0;
3398 if (btrfs_leaf_free_space(root, leaf) < 0) { 3389 if (btrfs_leaf_free_space(root, leaf) < 0) {
3399 btrfs_print_leaf(root, leaf); 3390 btrfs_print_leaf(root, leaf);
3400 BUG(); 3391 BUG();
3401 } 3392 }
3402 return ret; 3393 return 0;
3403} 3394}
3404 3395
3405/* 3396/*
@@ -3559,11 +3550,10 @@ out:
3559 * to save stack depth by doing the bulk of the work in a function 3550 * to save stack depth by doing the bulk of the work in a function
3560 * that doesn't call btrfs_search_slot 3551 * that doesn't call btrfs_search_slot
3561 */ 3552 */
3562static noinline_for_stack int 3553int setup_items_for_insert(struct btrfs_trans_handle *trans,
3563setup_items_for_insert(struct btrfs_trans_handle *trans, 3554 struct btrfs_root *root, struct btrfs_path *path,
3564 struct btrfs_root *root, struct btrfs_path *path, 3555 struct btrfs_key *cpu_key, u32 *data_size,
3565 struct btrfs_key *cpu_key, u32 *data_size, 3556 u32 total_data, u32 total_size, int nr)
3566 u32 total_data, u32 total_size, int nr)
3567{ 3557{
3568 struct btrfs_item *item; 3558 struct btrfs_item *item;
3569 int i; 3559 int i;
@@ -3647,7 +3637,6 @@ setup_items_for_insert(struct btrfs_trans_handle *trans,
3647 3637
3648 ret = 0; 3638 ret = 0;
3649 if (slot == 0) { 3639 if (slot == 0) {
3650 struct btrfs_disk_key disk_key;
3651 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3640 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3652 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3641 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3653 } 3642 }
@@ -3949,7 +3938,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3949 else 3938 else
3950 return 1; 3939 return 1;
3951 3940
3952 btrfs_release_path(root, path); 3941 btrfs_release_path(path);
3953 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3942 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3954 if (ret < 0) 3943 if (ret < 0)
3955 return ret; 3944 return ret;
@@ -4073,7 +4062,7 @@ find_next_key:
4073 sret = btrfs_find_next_key(root, path, min_key, level, 4062 sret = btrfs_find_next_key(root, path, min_key, level,
4074 cache_only, min_trans); 4063 cache_only, min_trans);
4075 if (sret == 0) { 4064 if (sret == 0) {
4076 btrfs_release_path(root, path); 4065 btrfs_release_path(path);
4077 goto again; 4066 goto again;
4078 } else { 4067 } else {
4079 goto out; 4068 goto out;
@@ -4152,7 +4141,7 @@ next:
4152 btrfs_node_key_to_cpu(c, &cur_key, slot); 4141 btrfs_node_key_to_cpu(c, &cur_key, slot);
4153 4142
4154 orig_lowest = path->lowest_level; 4143 orig_lowest = path->lowest_level;
4155 btrfs_release_path(root, path); 4144 btrfs_release_path(path);
4156 path->lowest_level = level; 4145 path->lowest_level = level;
4157 ret = btrfs_search_slot(NULL, root, &cur_key, path, 4146 ret = btrfs_search_slot(NULL, root, &cur_key, path,
4158 0, 0); 4147 0, 0);
@@ -4229,7 +4218,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4229again: 4218again:
4230 level = 1; 4219 level = 1;
4231 next = NULL; 4220 next = NULL;
4232 btrfs_release_path(root, path); 4221 btrfs_release_path(path);
4233 4222
4234 path->keep_locks = 1; 4223 path->keep_locks = 1;
4235 4224
@@ -4285,7 +4274,7 @@ again:
4285 goto again; 4274 goto again;
4286 4275
4287 if (ret < 0) { 4276 if (ret < 0) {
4288 btrfs_release_path(root, path); 4277 btrfs_release_path(path);
4289 goto done; 4278 goto done;
4290 } 4279 }
4291 4280
@@ -4324,7 +4313,7 @@ again:
4324 goto again; 4313 goto again;
4325 4314
4326 if (ret < 0) { 4315 if (ret < 0) {
4327 btrfs_release_path(root, path); 4316 btrfs_release_path(path);
4328 goto done; 4317 goto done;
4329 } 4318 }
4330 4319
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8f4b81de3ae2..332323e19dd1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -23,6 +23,7 @@
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/highmem.h> 24#include <linux/highmem.h>
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/rwsem.h>
26#include <linux/completion.h> 27#include <linux/completion.h>
27#include <linux/backing-dev.h> 28#include <linux/backing-dev.h>
28#include <linux/wait.h> 29#include <linux/wait.h>
@@ -33,6 +34,7 @@
33#include "extent_io.h" 34#include "extent_io.h"
34#include "extent_map.h" 35#include "extent_map.h"
35#include "async-thread.h" 36#include "async-thread.h"
37#include "ioctl.h"
36 38
37struct btrfs_trans_handle; 39struct btrfs_trans_handle;
38struct btrfs_transaction; 40struct btrfs_transaction;
@@ -105,6 +107,12 @@ struct btrfs_ordered_sum;
105/* For storing free space cache */ 107/* For storing free space cache */
106#define BTRFS_FREE_SPACE_OBJECTID -11ULL 108#define BTRFS_FREE_SPACE_OBJECTID -11ULL
107 109
110/*
111 * The inode number assigned to the special inode for sotring
112 * free ino cache
113 */
114#define BTRFS_FREE_INO_OBJECTID -12ULL
115
108/* dummy objectid represents multiple objectids */ 116/* dummy objectid represents multiple objectids */
109#define BTRFS_MULTIPLE_OBJECTIDS -255ULL 117#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
110 118
@@ -187,7 +195,6 @@ struct btrfs_mapping_tree {
187 struct extent_map_tree map_tree; 195 struct extent_map_tree map_tree;
188}; 196};
189 197
190#define BTRFS_UUID_SIZE 16
191struct btrfs_dev_item { 198struct btrfs_dev_item {
192 /* the internal btrfs device id */ 199 /* the internal btrfs device id */
193 __le64 devid; 200 __le64 devid;
@@ -294,7 +301,6 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
294 sizeof(struct btrfs_stripe) * (num_stripes - 1); 301 sizeof(struct btrfs_stripe) * (num_stripes - 1);
295} 302}
296 303
297#define BTRFS_FSID_SIZE 16
298#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) 304#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
299#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) 305#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
300 306
@@ -510,6 +516,12 @@ struct btrfs_extent_item_v0 {
510/* use full backrefs for extent pointers in the block */ 516/* use full backrefs for extent pointers in the block */
511#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8) 517#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
512 518
519/*
520 * this flag is only used internally by scrub and may be changed at any time
521 * it is only declared here to avoid collisions
522 */
523#define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48)
524
513struct btrfs_tree_block_info { 525struct btrfs_tree_block_info {
514 struct btrfs_disk_key key; 526 struct btrfs_disk_key key;
515 u8 level; 527 u8 level;
@@ -740,12 +752,12 @@ struct btrfs_space_info {
740 */ 752 */
741 unsigned long reservation_progress; 753 unsigned long reservation_progress;
742 754
743 int full:1; /* indicates that we cannot allocate any more 755 unsigned int full:1; /* indicates that we cannot allocate any more
744 chunks for this space */ 756 chunks for this space */
745 int chunk_alloc:1; /* set if we are allocating a chunk */ 757 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
746 758
747 int force_alloc; /* set if we need to force a chunk alloc for 759 unsigned int force_alloc; /* set if we need to force a chunk
748 this space */ 760 alloc for this space */
749 761
750 struct list_head list; 762 struct list_head list;
751 763
@@ -830,9 +842,6 @@ struct btrfs_block_group_cache {
830 u64 bytes_super; 842 u64 bytes_super;
831 u64 flags; 843 u64 flags;
832 u64 sectorsize; 844 u64 sectorsize;
833 int extents_thresh;
834 int free_extents;
835 int total_bitmaps;
836 unsigned int ro:1; 845 unsigned int ro:1;
837 unsigned int dirty:1; 846 unsigned int dirty:1;
838 unsigned int iref:1; 847 unsigned int iref:1;
@@ -847,9 +856,7 @@ struct btrfs_block_group_cache {
847 struct btrfs_space_info *space_info; 856 struct btrfs_space_info *space_info;
848 857
849 /* free space cache stuff */ 858 /* free space cache stuff */
850 spinlock_t tree_lock; 859 struct btrfs_free_space_ctl *free_space_ctl;
851 struct rb_root free_space_offset;
852 u64 free_space;
853 860
854 /* block group cache stuff */ 861 /* block group cache stuff */
855 struct rb_node cache_node; 862 struct rb_node cache_node;
@@ -869,6 +876,7 @@ struct btrfs_block_group_cache {
869struct reloc_control; 876struct reloc_control;
870struct btrfs_device; 877struct btrfs_device;
871struct btrfs_fs_devices; 878struct btrfs_fs_devices;
879struct btrfs_delayed_root;
872struct btrfs_fs_info { 880struct btrfs_fs_info {
873 u8 fsid[BTRFS_FSID_SIZE]; 881 u8 fsid[BTRFS_FSID_SIZE];
874 u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; 882 u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
@@ -895,7 +903,10 @@ struct btrfs_fs_info {
895 /* logical->physical extent mapping */ 903 /* logical->physical extent mapping */
896 struct btrfs_mapping_tree mapping_tree; 904 struct btrfs_mapping_tree mapping_tree;
897 905
898 /* block reservation for extent, checksum and root tree */ 906 /*
907 * block reservation for extent, checksum, root tree and
908 * delayed dir index item
909 */
899 struct btrfs_block_rsv global_block_rsv; 910 struct btrfs_block_rsv global_block_rsv;
900 /* block reservation for delay allocation */ 911 /* block reservation for delay allocation */
901 struct btrfs_block_rsv delalloc_block_rsv; 912 struct btrfs_block_rsv delalloc_block_rsv;
@@ -1022,6 +1033,7 @@ struct btrfs_fs_info {
1022 * for the sys_munmap function call path 1033 * for the sys_munmap function call path
1023 */ 1034 */
1024 struct btrfs_workers fixup_workers; 1035 struct btrfs_workers fixup_workers;
1036 struct btrfs_workers delayed_workers;
1025 struct task_struct *transaction_kthread; 1037 struct task_struct *transaction_kthread;
1026 struct task_struct *cleaner_kthread; 1038 struct task_struct *cleaner_kthread;
1027 int thread_pool_size; 1039 int thread_pool_size;
@@ -1062,6 +1074,11 @@ struct btrfs_fs_info {
1062 /* all metadata allocations go through this cluster */ 1074 /* all metadata allocations go through this cluster */
1063 struct btrfs_free_cluster meta_alloc_cluster; 1075 struct btrfs_free_cluster meta_alloc_cluster;
1064 1076
1077 /* auto defrag inodes go here */
1078 spinlock_t defrag_inodes_lock;
1079 struct rb_root defrag_inodes;
1080 atomic_t defrag_running;
1081
1065 spinlock_t ref_cache_lock; 1082 spinlock_t ref_cache_lock;
1066 u64 total_ref_cache_size; 1083 u64 total_ref_cache_size;
1067 1084
@@ -1077,8 +1094,21 @@ struct btrfs_fs_info {
1077 1094
1078 void *bdev_holder; 1095 void *bdev_holder;
1079 1096
1097 /* private scrub information */
1098 struct mutex scrub_lock;
1099 atomic_t scrubs_running;
1100 atomic_t scrub_pause_req;
1101 atomic_t scrubs_paused;
1102 atomic_t scrub_cancel_req;
1103 wait_queue_head_t scrub_pause_wait;
1104 struct rw_semaphore scrub_super_lock;
1105 int scrub_workers_refcnt;
1106 struct btrfs_workers scrub_workers;
1107
1080 /* filesystem state */ 1108 /* filesystem state */
1081 u64 fs_state; 1109 u64 fs_state;
1110
1111 struct btrfs_delayed_root *delayed_root;
1082}; 1112};
1083 1113
1084/* 1114/*
@@ -1088,9 +1118,6 @@ struct btrfs_fs_info {
1088struct btrfs_root { 1118struct btrfs_root {
1089 struct extent_buffer *node; 1119 struct extent_buffer *node;
1090 1120
1091 /* the node lock is held while changing the node pointer */
1092 spinlock_t node_lock;
1093
1094 struct extent_buffer *commit_root; 1121 struct extent_buffer *commit_root;
1095 struct btrfs_root *log_root; 1122 struct btrfs_root *log_root;
1096 struct btrfs_root *reloc_root; 1123 struct btrfs_root *reloc_root;
@@ -1107,6 +1134,16 @@ struct btrfs_root {
1107 spinlock_t accounting_lock; 1134 spinlock_t accounting_lock;
1108 struct btrfs_block_rsv *block_rsv; 1135 struct btrfs_block_rsv *block_rsv;
1109 1136
1137 /* free ino cache stuff */
1138 struct mutex fs_commit_mutex;
1139 struct btrfs_free_space_ctl *free_ino_ctl;
1140 enum btrfs_caching_type cached;
1141 spinlock_t cache_lock;
1142 wait_queue_head_t cache_wait;
1143 struct btrfs_free_space_ctl *free_ino_pinned;
1144 u64 cache_progress;
1145 struct inode *cache_inode;
1146
1110 struct mutex log_mutex; 1147 struct mutex log_mutex;
1111 wait_queue_head_t log_writer_wait; 1148 wait_queue_head_t log_writer_wait;
1112 wait_queue_head_t log_commit_wait[2]; 1149 wait_queue_head_t log_commit_wait[2];
@@ -1162,12 +1199,49 @@ struct btrfs_root {
1162 struct rb_root inode_tree; 1199 struct rb_root inode_tree;
1163 1200
1164 /* 1201 /*
1202 * radix tree that keeps track of delayed nodes of every inode,
1203 * protected by inode_lock
1204 */
1205 struct radix_tree_root delayed_nodes_tree;
1206 /*
1165 * right now this just gets used so that a root has its own devid 1207 * right now this just gets used so that a root has its own devid
1166 * for stat. It may be used for more later 1208 * for stat. It may be used for more later
1167 */ 1209 */
1168 struct super_block anon_super; 1210 struct super_block anon_super;
1169}; 1211};
1170 1212
1213struct btrfs_ioctl_defrag_range_args {
1214 /* start of the defrag operation */
1215 __u64 start;
1216
1217 /* number of bytes to defrag, use (u64)-1 to say all */
1218 __u64 len;
1219
1220 /*
1221 * flags for the operation, which can include turning
1222 * on compression for this one defrag
1223 */
1224 __u64 flags;
1225
1226 /*
1227 * any extent bigger than this will be considered
1228 * already defragged. Use 0 to take the kernel default
1229 * Use 1 to say every single extent must be rewritten
1230 */
1231 __u32 extent_thresh;
1232
1233 /*
1234 * which compression method to use if turning on compression
1235 * for this defrag operation. If unspecified, zlib will
1236 * be used
1237 */
1238 __u32 compress_type;
1239
1240 /* spare for later */
1241 __u32 unused[4];
1242};
1243
1244
1171/* 1245/*
1172 * inode items have the data typically returned from stat and store other 1246 * inode items have the data typically returned from stat and store other
1173 * info about object characteristics. There is one for every file and dir in 1247 * info about object characteristics. There is one for every file and dir in
@@ -1265,6 +1339,7 @@ struct btrfs_root {
1265#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) 1339#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
1266#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1340#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1267#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) 1341#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
1342#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
1268 1343
1269#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1344#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1270#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1345#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1440,26 +1515,12 @@ static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
1440 return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); 1515 return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
1441} 1516}
1442 1517
1443static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb,
1444 struct btrfs_chunk *c, int nr,
1445 u64 val)
1446{
1447 btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val);
1448}
1449
1450static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb, 1518static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
1451 struct btrfs_chunk *c, int nr) 1519 struct btrfs_chunk *c, int nr)
1452{ 1520{
1453 return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); 1521 return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
1454} 1522}
1455 1523
1456static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb,
1457 struct btrfs_chunk *c, int nr,
1458 u64 val)
1459{
1460 btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val);
1461}
1462
1463/* struct btrfs_block_group_item */ 1524/* struct btrfs_block_group_item */
1464BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, 1525BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
1465 used, 64); 1526 used, 64);
@@ -1517,14 +1578,6 @@ btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
1517 return (struct btrfs_timespec *)ptr; 1578 return (struct btrfs_timespec *)ptr;
1518} 1579}
1519 1580
1520static inline struct btrfs_timespec *
1521btrfs_inode_otime(struct btrfs_inode_item *inode_item)
1522{
1523 unsigned long ptr = (unsigned long)inode_item;
1524 ptr += offsetof(struct btrfs_inode_item, otime);
1525 return (struct btrfs_timespec *)ptr;
1526}
1527
1528BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); 1581BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
1529BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); 1582BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
1530 1583
@@ -1875,33 +1928,6 @@ static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
1875 return (u8 *)ptr; 1928 return (u8 *)ptr;
1876} 1929}
1877 1930
1878static inline u8 *btrfs_super_fsid(struct extent_buffer *eb)
1879{
1880 unsigned long ptr = offsetof(struct btrfs_super_block, fsid);
1881 return (u8 *)ptr;
1882}
1883
1884static inline u8 *btrfs_header_csum(struct extent_buffer *eb)
1885{
1886 unsigned long ptr = offsetof(struct btrfs_header, csum);
1887 return (u8 *)ptr;
1888}
1889
1890static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb)
1891{
1892 return NULL;
1893}
1894
1895static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb)
1896{
1897 return NULL;
1898}
1899
1900static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb)
1901{
1902 return NULL;
1903}
1904
1905static inline int btrfs_is_leaf(struct extent_buffer *eb) 1931static inline int btrfs_is_leaf(struct extent_buffer *eb)
1906{ 1932{
1907 return btrfs_header_level(eb) == 0; 1933 return btrfs_header_level(eb) == 0;
@@ -2055,22 +2081,6 @@ static inline struct btrfs_root *btrfs_sb(struct super_block *sb)
2055 return sb->s_fs_info; 2081 return sb->s_fs_info;
2056} 2082}
2057 2083
2058static inline int btrfs_set_root_name(struct btrfs_root *root,
2059 const char *name, int len)
2060{
2061 /* if we already have a name just free it */
2062 kfree(root->name);
2063
2064 root->name = kmalloc(len+1, GFP_KERNEL);
2065 if (!root->name)
2066 return -ENOMEM;
2067
2068 memcpy(root->name, name, len);
2069 root->name[len] = '\0';
2070
2071 return 0;
2072}
2073
2074static inline u32 btrfs_level_size(struct btrfs_root *root, int level) 2084static inline u32 btrfs_level_size(struct btrfs_root *root, int level)
2075{ 2085{
2076 if (level == 0) 2086 if (level == 0)
@@ -2099,6 +2109,13 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
2099} 2109}
2100 2110
2101/* extent-tree.c */ 2111/* extent-tree.c */
2112static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
2113 int num_items)
2114{
2115 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
2116 3 * num_items;
2117}
2118
2102void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 2119void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
2103int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2120int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2104 struct btrfs_root *root, unsigned long count); 2121 struct btrfs_root *root, unsigned long count);
@@ -2108,12 +2125,9 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
2108 u64 num_bytes, u64 *refs, u64 *flags); 2125 u64 num_bytes, u64 *refs, u64 *flags);
2109int btrfs_pin_extent(struct btrfs_root *root, 2126int btrfs_pin_extent(struct btrfs_root *root,
2110 u64 bytenr, u64 num, int reserved); 2127 u64 bytenr, u64 num, int reserved);
2111int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
2112 struct btrfs_root *root, struct extent_buffer *leaf);
2113int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2128int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2114 struct btrfs_root *root, 2129 struct btrfs_root *root,
2115 u64 objectid, u64 offset, u64 bytenr); 2130 u64 objectid, u64 offset, u64 bytenr);
2116int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
2117struct btrfs_block_group_cache *btrfs_lookup_block_group( 2131struct btrfs_block_group_cache *btrfs_lookup_block_group(
2118 struct btrfs_fs_info *info, 2132 struct btrfs_fs_info *info,
2119 u64 bytenr); 2133 u64 bytenr);
@@ -2290,10 +2304,12 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
2290 struct btrfs_root *root, struct extent_buffer *parent, 2304 struct btrfs_root *root, struct extent_buffer *parent,
2291 int start_slot, int cache_only, u64 *last_ret, 2305 int start_slot, int cache_only, u64 *last_ret,
2292 struct btrfs_key *progress); 2306 struct btrfs_key *progress);
2293void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); 2307void btrfs_release_path(struct btrfs_path *p);
2294struct btrfs_path *btrfs_alloc_path(void); 2308struct btrfs_path *btrfs_alloc_path(void);
2295void btrfs_free_path(struct btrfs_path *p); 2309void btrfs_free_path(struct btrfs_path *p);
2296void btrfs_set_path_blocking(struct btrfs_path *p); 2310void btrfs_set_path_blocking(struct btrfs_path *p);
2311void btrfs_clear_path_blocking(struct btrfs_path *p,
2312 struct extent_buffer *held);
2297void btrfs_unlock_up_safe(struct btrfs_path *p, int level); 2313void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
2298 2314
2299int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2315int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -2305,13 +2321,12 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
2305 return btrfs_del_items(trans, root, path, path->slots[0], 1); 2321 return btrfs_del_items(trans, root, path, path->slots[0], 1);
2306} 2322}
2307 2323
2324int setup_items_for_insert(struct btrfs_trans_handle *trans,
2325 struct btrfs_root *root, struct btrfs_path *path,
2326 struct btrfs_key *cpu_key, u32 *data_size,
2327 u32 total_data, u32 total_size, int nr);
2308int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 2328int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2309 *root, struct btrfs_key *key, void *data, u32 data_size); 2329 *root, struct btrfs_key *key, void *data, u32 data_size);
2310int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
2311 struct btrfs_root *root,
2312 struct btrfs_path *path,
2313 struct btrfs_key *cpu_key, u32 *data_size,
2314 int nr);
2315int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 2330int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2316 struct btrfs_root *root, 2331 struct btrfs_root *root,
2317 struct btrfs_path *path, 2332 struct btrfs_path *path,
@@ -2357,8 +2372,6 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
2357 *item); 2372 *item);
2358int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct 2373int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
2359 btrfs_root_item *item, struct btrfs_key *key); 2374 btrfs_root_item *item, struct btrfs_key *key);
2360int btrfs_search_root(struct btrfs_root *root, u64 search_start,
2361 u64 *found_objectid);
2362int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); 2375int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
2363int btrfs_find_orphan_roots(struct btrfs_root *tree_root); 2376int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
2364int btrfs_set_root_node(struct btrfs_root_item *item, 2377int btrfs_set_root_node(struct btrfs_root_item *item,
@@ -2368,7 +2381,7 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
2368/* dir-item.c */ 2381/* dir-item.c */
2369int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, 2382int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
2370 struct btrfs_root *root, const char *name, 2383 struct btrfs_root *root, const char *name,
2371 int name_len, u64 dir, 2384 int name_len, struct inode *dir,
2372 struct btrfs_key *location, u8 type, u64 index); 2385 struct btrfs_key *location, u8 type, u64 index);
2373struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, 2386struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
2374 struct btrfs_root *root, 2387 struct btrfs_root *root,
@@ -2413,12 +2426,6 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
2413 struct btrfs_root *root, u64 offset); 2426 struct btrfs_root *root, u64 offset);
2414int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset); 2427int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
2415 2428
2416/* inode-map.c */
2417int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
2418 struct btrfs_root *fs_root,
2419 u64 dirid, u64 *objectid);
2420int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid);
2421
2422/* inode-item.c */ 2429/* inode-item.c */
2423int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, 2430int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
2424 struct btrfs_root *root, 2431 struct btrfs_root *root,
@@ -2463,8 +2470,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
2463 struct btrfs_ordered_sum *sums); 2470 struct btrfs_ordered_sum *sums);
2464int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, 2471int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
2465 struct bio *bio, u64 file_start, int contig); 2472 struct bio *bio, u64 file_start, int contig);
2466int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
2467 u64 start, unsigned long len);
2468struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, 2473struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
2469 struct btrfs_root *root, 2474 struct btrfs_root *root,
2470 struct btrfs_path *path, 2475 struct btrfs_path *path,
@@ -2472,8 +2477,8 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
2472int btrfs_csum_truncate(struct btrfs_trans_handle *trans, 2477int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
2473 struct btrfs_root *root, struct btrfs_path *path, 2478 struct btrfs_root *root, struct btrfs_path *path,
2474 u64 isize); 2479 u64 isize);
2475int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, 2480int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
2476 u64 end, struct list_head *list); 2481 struct list_head *list, int search_commit);
2477/* inode.c */ 2482/* inode.c */
2478 2483
2479/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ 2484/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
@@ -2502,8 +2507,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2502 u32 min_type); 2507 u32 min_type);
2503 2508
2504int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); 2509int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2505int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
2506 int sync);
2507int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, 2510int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2508 struct extent_state **cached_state); 2511 struct extent_state **cached_state);
2509int btrfs_writepages(struct address_space *mapping, 2512int btrfs_writepages(struct address_space *mapping,
@@ -2520,7 +2523,6 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
2520int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 2523int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2521int btrfs_readpage(struct file *file, struct page *page); 2524int btrfs_readpage(struct file *file, struct page *page);
2522void btrfs_evict_inode(struct inode *inode); 2525void btrfs_evict_inode(struct inode *inode);
2523void btrfs_put_inode(struct inode *inode);
2524int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); 2526int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
2525void btrfs_dirty_inode(struct inode *inode); 2527void btrfs_dirty_inode(struct inode *inode);
2526struct inode *btrfs_alloc_inode(struct super_block *sb); 2528struct inode *btrfs_alloc_inode(struct super_block *sb);
@@ -2531,10 +2533,8 @@ void btrfs_destroy_cachep(void);
2531long btrfs_ioctl_trans_end(struct file *file); 2533long btrfs_ioctl_trans_end(struct file *file);
2532struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 2534struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
2533 struct btrfs_root *root, int *was_new); 2535 struct btrfs_root *root, int *was_new);
2534int btrfs_commit_write(struct file *file, struct page *page,
2535 unsigned from, unsigned to);
2536struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, 2536struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2537 size_t page_offset, u64 start, u64 end, 2537 size_t pg_offset, u64 start, u64 end,
2538 int create); 2538 int create);
2539int btrfs_update_inode(struct btrfs_trans_handle *trans, 2539int btrfs_update_inode(struct btrfs_trans_handle *trans,
2540 struct btrfs_root *root, 2540 struct btrfs_root *root,
@@ -2566,12 +2566,16 @@ extern const struct dentry_operations btrfs_dentry_operations;
2566long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 2566long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
2567void btrfs_update_iflags(struct inode *inode); 2567void btrfs_update_iflags(struct inode *inode);
2568void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); 2568void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
2569 2569int btrfs_defrag_file(struct inode *inode, struct file *file,
2570 struct btrfs_ioctl_defrag_range_args *range,
2571 u64 newer_than, unsigned long max_pages);
2570/* file.c */ 2572/* file.c */
2573int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
2574 struct inode *inode);
2575int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
2571int btrfs_sync_file(struct file *file, int datasync); 2576int btrfs_sync_file(struct file *file, int datasync);
2572int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, 2577int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
2573 int skip_pinned); 2578 int skip_pinned);
2574int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2575extern const struct file_operations btrfs_file_operations; 2579extern const struct file_operations btrfs_file_operations;
2576int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, 2580int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2577 u64 start, u64 end, u64 *hint_byte, int drop_cache); 2581 u64 start, u64 end, u64 *hint_byte, int drop_cache);
@@ -2591,10 +2595,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
2591/* sysfs.c */ 2595/* sysfs.c */
2592int btrfs_init_sysfs(void); 2596int btrfs_init_sysfs(void);
2593void btrfs_exit_sysfs(void); 2597void btrfs_exit_sysfs(void);
2594int btrfs_sysfs_add_super(struct btrfs_fs_info *fs);
2595int btrfs_sysfs_add_root(struct btrfs_root *root);
2596void btrfs_sysfs_del_root(struct btrfs_root *root);
2597void btrfs_sysfs_del_super(struct btrfs_fs_info *root);
2598 2598
2599/* xattr.c */ 2599/* xattr.c */
2600ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); 2600ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
@@ -2637,4 +2637,18 @@ void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
2637 u64 *bytes_to_reserve); 2637 u64 *bytes_to_reserve);
2638void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, 2638void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
2639 struct btrfs_pending_snapshot *pending); 2639 struct btrfs_pending_snapshot *pending);
2640
2641/* scrub.c */
2642int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
2643 struct btrfs_scrub_progress *progress, int readonly);
2644int btrfs_scrub_pause(struct btrfs_root *root);
2645int btrfs_scrub_pause_super(struct btrfs_root *root);
2646int btrfs_scrub_continue(struct btrfs_root *root);
2647int btrfs_scrub_continue_super(struct btrfs_root *root);
2648int btrfs_scrub_cancel(struct btrfs_root *root);
2649int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev);
2650int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid);
2651int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
2652 struct btrfs_scrub_progress *progress);
2653
2640#endif 2654#endif
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
new file mode 100644
index 000000000000..01e29503a54b
--- /dev/null
+++ b/fs/btrfs/delayed-inode.c
@@ -0,0 +1,1695 @@
1/*
2 * Copyright (C) 2011 Fujitsu. All rights reserved.
3 * Written by Miao Xie <miaox@cn.fujitsu.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
18 */
19
20#include <linux/slab.h>
21#include "delayed-inode.h"
22#include "disk-io.h"
23#include "transaction.h"
24
25#define BTRFS_DELAYED_WRITEBACK 400
26#define BTRFS_DELAYED_BACKGROUND 100
27
28static struct kmem_cache *delayed_node_cache;
29
30int __init btrfs_delayed_inode_init(void)
31{
32 delayed_node_cache = kmem_cache_create("delayed_node",
33 sizeof(struct btrfs_delayed_node),
34 0,
35 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
36 NULL);
37 if (!delayed_node_cache)
38 return -ENOMEM;
39 return 0;
40}
41
42void btrfs_delayed_inode_exit(void)
43{
44 if (delayed_node_cache)
45 kmem_cache_destroy(delayed_node_cache);
46}
47
48static inline void btrfs_init_delayed_node(
49 struct btrfs_delayed_node *delayed_node,
50 struct btrfs_root *root, u64 inode_id)
51{
52 delayed_node->root = root;
53 delayed_node->inode_id = inode_id;
54 atomic_set(&delayed_node->refs, 0);
55 delayed_node->count = 0;
56 delayed_node->in_list = 0;
57 delayed_node->inode_dirty = 0;
58 delayed_node->ins_root = RB_ROOT;
59 delayed_node->del_root = RB_ROOT;
60 mutex_init(&delayed_node->mutex);
61 delayed_node->index_cnt = 0;
62 INIT_LIST_HEAD(&delayed_node->n_list);
63 INIT_LIST_HEAD(&delayed_node->p_list);
64 delayed_node->bytes_reserved = 0;
65}
66
67static inline int btrfs_is_continuous_delayed_item(
68 struct btrfs_delayed_item *item1,
69 struct btrfs_delayed_item *item2)
70{
71 if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
72 item1->key.objectid == item2->key.objectid &&
73 item1->key.type == item2->key.type &&
74 item1->key.offset + 1 == item2->key.offset)
75 return 1;
76 return 0;
77}
78
79static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
80 struct btrfs_root *root)
81{
82 return root->fs_info->delayed_root;
83}
84
85static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
86 struct inode *inode)
87{
88 struct btrfs_delayed_node *node;
89 struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
90 struct btrfs_root *root = btrfs_inode->root;
91 u64 ino = btrfs_ino(inode);
92 int ret;
93
94again:
95 node = ACCESS_ONCE(btrfs_inode->delayed_node);
96 if (node) {
97 atomic_inc(&node->refs); /* can be accessed */
98 return node;
99 }
100
101 spin_lock(&root->inode_lock);
102 node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
103 if (node) {
104 if (btrfs_inode->delayed_node) {
105 spin_unlock(&root->inode_lock);
106 goto again;
107 }
108 btrfs_inode->delayed_node = node;
109 atomic_inc(&node->refs); /* can be accessed */
110 atomic_inc(&node->refs); /* cached in the inode */
111 spin_unlock(&root->inode_lock);
112 return node;
113 }
114 spin_unlock(&root->inode_lock);
115
116 node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
117 if (!node)
118 return ERR_PTR(-ENOMEM);
119 btrfs_init_delayed_node(node, root, ino);
120
121 atomic_inc(&node->refs); /* cached in the btrfs inode */
122 atomic_inc(&node->refs); /* can be accessed */
123
124 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
125 if (ret) {
126 kmem_cache_free(delayed_node_cache, node);
127 return ERR_PTR(ret);
128 }
129
130 spin_lock(&root->inode_lock);
131 ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
132 if (ret == -EEXIST) {
133 kmem_cache_free(delayed_node_cache, node);
134 spin_unlock(&root->inode_lock);
135 radix_tree_preload_end();
136 goto again;
137 }
138 btrfs_inode->delayed_node = node;
139 spin_unlock(&root->inode_lock);
140 radix_tree_preload_end();
141
142 return node;
143}
144
145/*
146 * Call it when holding delayed_node->mutex
147 *
148 * If mod = 1, add this node into the prepared list.
149 */
150static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
151 struct btrfs_delayed_node *node,
152 int mod)
153{
154 spin_lock(&root->lock);
155 if (node->in_list) {
156 if (!list_empty(&node->p_list))
157 list_move_tail(&node->p_list, &root->prepare_list);
158 else if (mod)
159 list_add_tail(&node->p_list, &root->prepare_list);
160 } else {
161 list_add_tail(&node->n_list, &root->node_list);
162 list_add_tail(&node->p_list, &root->prepare_list);
163 atomic_inc(&node->refs); /* inserted into list */
164 root->nodes++;
165 node->in_list = 1;
166 }
167 spin_unlock(&root->lock);
168}
169
170/* Call it when holding delayed_node->mutex */
171static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
172 struct btrfs_delayed_node *node)
173{
174 spin_lock(&root->lock);
175 if (node->in_list) {
176 root->nodes--;
177 atomic_dec(&node->refs); /* not in the list */
178 list_del_init(&node->n_list);
179 if (!list_empty(&node->p_list))
180 list_del_init(&node->p_list);
181 node->in_list = 0;
182 }
183 spin_unlock(&root->lock);
184}
185
186struct btrfs_delayed_node *btrfs_first_delayed_node(
187 struct btrfs_delayed_root *delayed_root)
188{
189 struct list_head *p;
190 struct btrfs_delayed_node *node = NULL;
191
192 spin_lock(&delayed_root->lock);
193 if (list_empty(&delayed_root->node_list))
194 goto out;
195
196 p = delayed_root->node_list.next;
197 node = list_entry(p, struct btrfs_delayed_node, n_list);
198 atomic_inc(&node->refs);
199out:
200 spin_unlock(&delayed_root->lock);
201
202 return node;
203}
204
205struct btrfs_delayed_node *btrfs_next_delayed_node(
206 struct btrfs_delayed_node *node)
207{
208 struct btrfs_delayed_root *delayed_root;
209 struct list_head *p;
210 struct btrfs_delayed_node *next = NULL;
211
212 delayed_root = node->root->fs_info->delayed_root;
213 spin_lock(&delayed_root->lock);
214 if (!node->in_list) { /* not in the list */
215 if (list_empty(&delayed_root->node_list))
216 goto out;
217 p = delayed_root->node_list.next;
218 } else if (list_is_last(&node->n_list, &delayed_root->node_list))
219 goto out;
220 else
221 p = node->n_list.next;
222
223 next = list_entry(p, struct btrfs_delayed_node, n_list);
224 atomic_inc(&next->refs);
225out:
226 spin_unlock(&delayed_root->lock);
227
228 return next;
229}
230
231static void __btrfs_release_delayed_node(
232 struct btrfs_delayed_node *delayed_node,
233 int mod)
234{
235 struct btrfs_delayed_root *delayed_root;
236
237 if (!delayed_node)
238 return;
239
240 delayed_root = delayed_node->root->fs_info->delayed_root;
241
242 mutex_lock(&delayed_node->mutex);
243 if (delayed_node->count)
244 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
245 else
246 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
247 mutex_unlock(&delayed_node->mutex);
248
249 if (atomic_dec_and_test(&delayed_node->refs)) {
250 struct btrfs_root *root = delayed_node->root;
251 spin_lock(&root->inode_lock);
252 if (atomic_read(&delayed_node->refs) == 0) {
253 radix_tree_delete(&root->delayed_nodes_tree,
254 delayed_node->inode_id);
255 kmem_cache_free(delayed_node_cache, delayed_node);
256 }
257 spin_unlock(&root->inode_lock);
258 }
259}
260
261static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
262{
263 __btrfs_release_delayed_node(node, 0);
264}
265
266struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
267 struct btrfs_delayed_root *delayed_root)
268{
269 struct list_head *p;
270 struct btrfs_delayed_node *node = NULL;
271
272 spin_lock(&delayed_root->lock);
273 if (list_empty(&delayed_root->prepare_list))
274 goto out;
275
276 p = delayed_root->prepare_list.next;
277 list_del_init(p);
278 node = list_entry(p, struct btrfs_delayed_node, p_list);
279 atomic_inc(&node->refs);
280out:
281 spin_unlock(&delayed_root->lock);
282
283 return node;
284}
285
286static inline void btrfs_release_prepared_delayed_node(
287 struct btrfs_delayed_node *node)
288{
289 __btrfs_release_delayed_node(node, 1);
290}
291
292struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
293{
294 struct btrfs_delayed_item *item;
295 item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
296 if (item) {
297 item->data_len = data_len;
298 item->ins_or_del = 0;
299 item->bytes_reserved = 0;
300 item->block_rsv = NULL;
301 item->delayed_node = NULL;
302 atomic_set(&item->refs, 1);
303 }
304 return item;
305}
306
307/*
308 * __btrfs_lookup_delayed_item - look up the delayed item by key
309 * @delayed_node: pointer to the delayed node
310 * @key: the key to look up
311 * @prev: used to store the prev item if the right item isn't found
312 * @next: used to store the next item if the right item isn't found
313 *
314 * Note: if we don't find the right item, we will return the prev item and
315 * the next item.
316 */
317static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
318 struct rb_root *root,
319 struct btrfs_key *key,
320 struct btrfs_delayed_item **prev,
321 struct btrfs_delayed_item **next)
322{
323 struct rb_node *node, *prev_node = NULL;
324 struct btrfs_delayed_item *delayed_item = NULL;
325 int ret = 0;
326
327 node = root->rb_node;
328
329 while (node) {
330 delayed_item = rb_entry(node, struct btrfs_delayed_item,
331 rb_node);
332 prev_node = node;
333 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
334 if (ret < 0)
335 node = node->rb_right;
336 else if (ret > 0)
337 node = node->rb_left;
338 else
339 return delayed_item;
340 }
341
342 if (prev) {
343 if (!prev_node)
344 *prev = NULL;
345 else if (ret < 0)
346 *prev = delayed_item;
347 else if ((node = rb_prev(prev_node)) != NULL) {
348 *prev = rb_entry(node, struct btrfs_delayed_item,
349 rb_node);
350 } else
351 *prev = NULL;
352 }
353
354 if (next) {
355 if (!prev_node)
356 *next = NULL;
357 else if (ret > 0)
358 *next = delayed_item;
359 else if ((node = rb_next(prev_node)) != NULL) {
360 *next = rb_entry(node, struct btrfs_delayed_item,
361 rb_node);
362 } else
363 *next = NULL;
364 }
365 return NULL;
366}
367
368struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
369 struct btrfs_delayed_node *delayed_node,
370 struct btrfs_key *key)
371{
372 struct btrfs_delayed_item *item;
373
374 item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
375 NULL, NULL);
376 return item;
377}
378
379struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item(
380 struct btrfs_delayed_node *delayed_node,
381 struct btrfs_key *key)
382{
383 struct btrfs_delayed_item *item;
384
385 item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
386 NULL, NULL);
387 return item;
388}
389
390struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item(
391 struct btrfs_delayed_node *delayed_node,
392 struct btrfs_key *key)
393{
394 struct btrfs_delayed_item *item, *next;
395
396 item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
397 NULL, &next);
398 if (!item)
399 item = next;
400
401 return item;
402}
403
404struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item(
405 struct btrfs_delayed_node *delayed_node,
406 struct btrfs_key *key)
407{
408 struct btrfs_delayed_item *item, *next;
409
410 item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
411 NULL, &next);
412 if (!item)
413 item = next;
414
415 return item;
416}
417
418static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
419 struct btrfs_delayed_item *ins,
420 int action)
421{
422 struct rb_node **p, *node;
423 struct rb_node *parent_node = NULL;
424 struct rb_root *root;
425 struct btrfs_delayed_item *item;
426 int cmp;
427
428 if (action == BTRFS_DELAYED_INSERTION_ITEM)
429 root = &delayed_node->ins_root;
430 else if (action == BTRFS_DELAYED_DELETION_ITEM)
431 root = &delayed_node->del_root;
432 else
433 BUG();
434 p = &root->rb_node;
435 node = &ins->rb_node;
436
437 while (*p) {
438 parent_node = *p;
439 item = rb_entry(parent_node, struct btrfs_delayed_item,
440 rb_node);
441
442 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
443 if (cmp < 0)
444 p = &(*p)->rb_right;
445 else if (cmp > 0)
446 p = &(*p)->rb_left;
447 else
448 return -EEXIST;
449 }
450
451 rb_link_node(node, parent_node, p);
452 rb_insert_color(node, root);
453 ins->delayed_node = delayed_node;
454 ins->ins_or_del = action;
455
456 if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
457 action == BTRFS_DELAYED_INSERTION_ITEM &&
458 ins->key.offset >= delayed_node->index_cnt)
459 delayed_node->index_cnt = ins->key.offset + 1;
460
461 delayed_node->count++;
462 atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
463 return 0;
464}
465
466static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
467 struct btrfs_delayed_item *item)
468{
469 return __btrfs_add_delayed_item(node, item,
470 BTRFS_DELAYED_INSERTION_ITEM);
471}
472
473static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
474 struct btrfs_delayed_item *item)
475{
476 return __btrfs_add_delayed_item(node, item,
477 BTRFS_DELAYED_DELETION_ITEM);
478}
479
480static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
481{
482 struct rb_root *root;
483 struct btrfs_delayed_root *delayed_root;
484
485 delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
486
487 BUG_ON(!delayed_root);
488 BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
489 delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
490
491 if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
492 root = &delayed_item->delayed_node->ins_root;
493 else
494 root = &delayed_item->delayed_node->del_root;
495
496 rb_erase(&delayed_item->rb_node, root);
497 delayed_item->delayed_node->count--;
498 atomic_dec(&delayed_root->items);
499 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND &&
500 waitqueue_active(&delayed_root->wait))
501 wake_up(&delayed_root->wait);
502}
503
504static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
505{
506 if (item) {
507 __btrfs_remove_delayed_item(item);
508 if (atomic_dec_and_test(&item->refs))
509 kfree(item);
510 }
511}
512
513struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
514 struct btrfs_delayed_node *delayed_node)
515{
516 struct rb_node *p;
517 struct btrfs_delayed_item *item = NULL;
518
519 p = rb_first(&delayed_node->ins_root);
520 if (p)
521 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
522
523 return item;
524}
525
526struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
527 struct btrfs_delayed_node *delayed_node)
528{
529 struct rb_node *p;
530 struct btrfs_delayed_item *item = NULL;
531
532 p = rb_first(&delayed_node->del_root);
533 if (p)
534 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
535
536 return item;
537}
538
539struct btrfs_delayed_item *__btrfs_next_delayed_item(
540 struct btrfs_delayed_item *item)
541{
542 struct rb_node *p;
543 struct btrfs_delayed_item *next = NULL;
544
545 p = rb_next(&item->rb_node);
546 if (p)
547 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
548
549 return next;
550}
551
552static inline struct btrfs_delayed_node *btrfs_get_delayed_node(
553 struct inode *inode)
554{
555 struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
556 struct btrfs_delayed_node *delayed_node;
557
558 delayed_node = btrfs_inode->delayed_node;
559 if (delayed_node)
560 atomic_inc(&delayed_node->refs);
561
562 return delayed_node;
563}
564
565static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
566 u64 root_id)
567{
568 struct btrfs_key root_key;
569
570 if (root->objectid == root_id)
571 return root;
572
573 root_key.objectid = root_id;
574 root_key.type = BTRFS_ROOT_ITEM_KEY;
575 root_key.offset = (u64)-1;
576 return btrfs_read_fs_root_no_name(root->fs_info, &root_key);
577}
578
579static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
580 struct btrfs_root *root,
581 struct btrfs_delayed_item *item)
582{
583 struct btrfs_block_rsv *src_rsv;
584 struct btrfs_block_rsv *dst_rsv;
585 u64 num_bytes;
586 int ret;
587
588 if (!trans->bytes_reserved)
589 return 0;
590
591 src_rsv = trans->block_rsv;
592 dst_rsv = &root->fs_info->global_block_rsv;
593
594 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
595 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
596 if (!ret) {
597 item->bytes_reserved = num_bytes;
598 item->block_rsv = dst_rsv;
599 }
600
601 return ret;
602}
603
604static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
605 struct btrfs_delayed_item *item)
606{
607 if (!item->bytes_reserved)
608 return;
609
610 btrfs_block_rsv_release(root, item->block_rsv,
611 item->bytes_reserved);
612}
613
614static int btrfs_delayed_inode_reserve_metadata(
615 struct btrfs_trans_handle *trans,
616 struct btrfs_root *root,
617 struct btrfs_delayed_node *node)
618{
619 struct btrfs_block_rsv *src_rsv;
620 struct btrfs_block_rsv *dst_rsv;
621 u64 num_bytes;
622 int ret;
623
624 if (!trans->bytes_reserved)
625 return 0;
626
627 src_rsv = trans->block_rsv;
628 dst_rsv = &root->fs_info->global_block_rsv;
629
630 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
631 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
632 if (!ret)
633 node->bytes_reserved = num_bytes;
634
635 return ret;
636}
637
638static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
639 struct btrfs_delayed_node *node)
640{
641 struct btrfs_block_rsv *rsv;
642
643 if (!node->bytes_reserved)
644 return;
645
646 rsv = &root->fs_info->global_block_rsv;
647 btrfs_block_rsv_release(root, rsv,
648 node->bytes_reserved);
649 node->bytes_reserved = 0;
650}
651
652/*
653 * This helper will insert some continuous items into the same leaf according
654 * to the free space of the leaf.
655 */
656static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
657 struct btrfs_root *root,
658 struct btrfs_path *path,
659 struct btrfs_delayed_item *item)
660{
661 struct btrfs_delayed_item *curr, *next;
662 int free_space;
663 int total_data_size = 0, total_size = 0;
664 struct extent_buffer *leaf;
665 char *data_ptr;
666 struct btrfs_key *keys;
667 u32 *data_size;
668 struct list_head head;
669 int slot;
670 int nitems;
671 int i;
672 int ret = 0;
673
674 BUG_ON(!path->nodes[0]);
675
676 leaf = path->nodes[0];
677 free_space = btrfs_leaf_free_space(root, leaf);
678 INIT_LIST_HEAD(&head);
679
680 next = item;
681
682 /*
683 * count the number of the continuous items that we can insert in batch
684 */
685 while (total_size + next->data_len + sizeof(struct btrfs_item) <=
686 free_space) {
687 total_data_size += next->data_len;
688 total_size += next->data_len + sizeof(struct btrfs_item);
689 list_add_tail(&next->tree_list, &head);
690 nitems++;
691
692 curr = next;
693 next = __btrfs_next_delayed_item(curr);
694 if (!next)
695 break;
696
697 if (!btrfs_is_continuous_delayed_item(curr, next))
698 break;
699 }
700
701 if (!nitems) {
702 ret = 0;
703 goto out;
704 }
705
706 /*
707 * we need allocate some memory space, but it might cause the task
708 * to sleep, so we set all locked nodes in the path to blocking locks
709 * first.
710 */
711 btrfs_set_path_blocking(path);
712
713 keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
714 if (!keys) {
715 ret = -ENOMEM;
716 goto out;
717 }
718
719 data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
720 if (!data_size) {
721 ret = -ENOMEM;
722 goto error;
723 }
724
725 /* get keys of all the delayed items */
726 i = 0;
727 list_for_each_entry(next, &head, tree_list) {
728 keys[i] = next->key;
729 data_size[i] = next->data_len;
730 i++;
731 }
732
733 /* reset all the locked nodes in the patch to spinning locks. */
734 btrfs_clear_path_blocking(path, NULL);
735
736 /* insert the keys of the items */
737 ret = setup_items_for_insert(trans, root, path, keys, data_size,
738 total_data_size, total_size, nitems);
739 if (ret)
740 goto error;
741
742 /* insert the dir index items */
743 slot = path->slots[0];
744 list_for_each_entry_safe(curr, next, &head, tree_list) {
745 data_ptr = btrfs_item_ptr(leaf, slot, char);
746 write_extent_buffer(leaf, &curr->data,
747 (unsigned long)data_ptr,
748 curr->data_len);
749 slot++;
750
751 btrfs_delayed_item_release_metadata(root, curr);
752
753 list_del(&curr->tree_list);
754 btrfs_release_delayed_item(curr);
755 }
756
757error:
758 kfree(data_size);
759 kfree(keys);
760out:
761 return ret;
762}
763
764/*
765 * This helper can just do simple insertion that needn't extend item for new
766 * data, such as directory name index insertion, inode insertion.
767 */
768static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
769 struct btrfs_root *root,
770 struct btrfs_path *path,
771 struct btrfs_delayed_item *delayed_item)
772{
773 struct extent_buffer *leaf;
774 struct btrfs_item *item;
775 char *ptr;
776 int ret;
777
778 ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
779 delayed_item->data_len);
780 if (ret < 0 && ret != -EEXIST)
781 return ret;
782
783 leaf = path->nodes[0];
784
785 item = btrfs_item_nr(leaf, path->slots[0]);
786 ptr = btrfs_item_ptr(leaf, path->slots[0], char);
787
788 write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
789 delayed_item->data_len);
790 btrfs_mark_buffer_dirty(leaf);
791
792 btrfs_delayed_item_release_metadata(root, delayed_item);
793 return 0;
794}
795
796/*
797 * we insert an item first, then if there are some continuous items, we try
798 * to insert those items into the same leaf.
799 */
800static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
801 struct btrfs_path *path,
802 struct btrfs_root *root,
803 struct btrfs_delayed_node *node)
804{
805 struct btrfs_delayed_item *curr, *prev;
806 int ret = 0;
807
808do_again:
809 mutex_lock(&node->mutex);
810 curr = __btrfs_first_delayed_insertion_item(node);
811 if (!curr)
812 goto insert_end;
813
814 ret = btrfs_insert_delayed_item(trans, root, path, curr);
815 if (ret < 0) {
816 btrfs_release_path(path);
817 goto insert_end;
818 }
819
820 prev = curr;
821 curr = __btrfs_next_delayed_item(prev);
822 if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
823 /* insert the continuous items into the same leaf */
824 path->slots[0]++;
825 btrfs_batch_insert_items(trans, root, path, curr);
826 }
827 btrfs_release_delayed_item(prev);
828 btrfs_mark_buffer_dirty(path->nodes[0]);
829
830 btrfs_release_path(path);
831 mutex_unlock(&node->mutex);
832 goto do_again;
833
834insert_end:
835 mutex_unlock(&node->mutex);
836 return ret;
837}
838
839static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
840 struct btrfs_root *root,
841 struct btrfs_path *path,
842 struct btrfs_delayed_item *item)
843{
844 struct btrfs_delayed_item *curr, *next;
845 struct extent_buffer *leaf;
846 struct btrfs_key key;
847 struct list_head head;
848 int nitems, i, last_item;
849 int ret = 0;
850
851 BUG_ON(!path->nodes[0]);
852
853 leaf = path->nodes[0];
854
855 i = path->slots[0];
856 last_item = btrfs_header_nritems(leaf) - 1;
857 if (i > last_item)
858 return -ENOENT; /* FIXME: Is errno suitable? */
859
860 next = item;
861 INIT_LIST_HEAD(&head);
862 btrfs_item_key_to_cpu(leaf, &key, i);
863 nitems = 0;
864 /*
865 * count the number of the dir index items that we can delete in batch
866 */
867 while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
868 list_add_tail(&next->tree_list, &head);
869 nitems++;
870
871 curr = next;
872 next = __btrfs_next_delayed_item(curr);
873 if (!next)
874 break;
875
876 if (!btrfs_is_continuous_delayed_item(curr, next))
877 break;
878
879 i++;
880 if (i > last_item)
881 break;
882 btrfs_item_key_to_cpu(leaf, &key, i);
883 }
884
885 if (!nitems)
886 return 0;
887
888 ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
889 if (ret)
890 goto out;
891
892 list_for_each_entry_safe(curr, next, &head, tree_list) {
893 btrfs_delayed_item_release_metadata(root, curr);
894 list_del(&curr->tree_list);
895 btrfs_release_delayed_item(curr);
896 }
897
898out:
899 return ret;
900}
901
902static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
903 struct btrfs_path *path,
904 struct btrfs_root *root,
905 struct btrfs_delayed_node *node)
906{
907 struct btrfs_delayed_item *curr, *prev;
908 int ret = 0;
909
910do_again:
911 mutex_lock(&node->mutex);
912 curr = __btrfs_first_delayed_deletion_item(node);
913 if (!curr)
914 goto delete_fail;
915
916 ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
917 if (ret < 0)
918 goto delete_fail;
919 else if (ret > 0) {
920 /*
921 * can't find the item which the node points to, so this node
922 * is invalid, just drop it.
923 */
924 prev = curr;
925 curr = __btrfs_next_delayed_item(prev);
926 btrfs_release_delayed_item(prev);
927 ret = 0;
928 btrfs_release_path(path);
929 if (curr)
930 goto do_again;
931 else
932 goto delete_fail;
933 }
934
935 btrfs_batch_delete_items(trans, root, path, curr);
936 btrfs_release_path(path);
937 mutex_unlock(&node->mutex);
938 goto do_again;
939
940delete_fail:
941 btrfs_release_path(path);
942 mutex_unlock(&node->mutex);
943 return ret;
944}
945
946static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
947{
948 struct btrfs_delayed_root *delayed_root;
949
950 if (delayed_node && delayed_node->inode_dirty) {
951 BUG_ON(!delayed_node->root);
952 delayed_node->inode_dirty = 0;
953 delayed_node->count--;
954
955 delayed_root = delayed_node->root->fs_info->delayed_root;
956 atomic_dec(&delayed_root->items);
957 if (atomic_read(&delayed_root->items) <
958 BTRFS_DELAYED_BACKGROUND &&
959 waitqueue_active(&delayed_root->wait))
960 wake_up(&delayed_root->wait);
961 }
962}
963
964static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
965 struct btrfs_root *root,
966 struct btrfs_path *path,
967 struct btrfs_delayed_node *node)
968{
969 struct btrfs_key key;
970 struct btrfs_inode_item *inode_item;
971 struct extent_buffer *leaf;
972 int ret;
973
974 mutex_lock(&node->mutex);
975 if (!node->inode_dirty) {
976 mutex_unlock(&node->mutex);
977 return 0;
978 }
979
980 key.objectid = node->inode_id;
981 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
982 key.offset = 0;
983 ret = btrfs_lookup_inode(trans, root, path, &key, 1);
984 if (ret > 0) {
985 btrfs_release_path(path);
986 mutex_unlock(&node->mutex);
987 return -ENOENT;
988 } else if (ret < 0) {
989 mutex_unlock(&node->mutex);
990 return ret;
991 }
992
993 btrfs_unlock_up_safe(path, 1);
994 leaf = path->nodes[0];
995 inode_item = btrfs_item_ptr(leaf, path->slots[0],
996 struct btrfs_inode_item);
997 write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
998 sizeof(struct btrfs_inode_item));
999 btrfs_mark_buffer_dirty(leaf);
1000 btrfs_release_path(path);
1001
1002 btrfs_delayed_inode_release_metadata(root, node);
1003 btrfs_release_delayed_inode(node);
1004 mutex_unlock(&node->mutex);
1005
1006 return 0;
1007}
1008
1009/* Called when committing the transaction. */
1010int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1011 struct btrfs_root *root)
1012{
1013 struct btrfs_delayed_root *delayed_root;
1014 struct btrfs_delayed_node *curr_node, *prev_node;
1015 struct btrfs_path *path;
1016 int ret = 0;
1017
1018 path = btrfs_alloc_path();
1019 if (!path)
1020 return -ENOMEM;
1021 path->leave_spinning = 1;
1022
1023 delayed_root = btrfs_get_delayed_root(root);
1024
1025 curr_node = btrfs_first_delayed_node(delayed_root);
1026 while (curr_node) {
1027 root = curr_node->root;
1028 ret = btrfs_insert_delayed_items(trans, path, root,
1029 curr_node);
1030 if (!ret)
1031 ret = btrfs_delete_delayed_items(trans, path, root,
1032 curr_node);
1033 if (!ret)
1034 ret = btrfs_update_delayed_inode(trans, root, path,
1035 curr_node);
1036 if (ret) {
1037 btrfs_release_delayed_node(curr_node);
1038 break;
1039 }
1040
1041 prev_node = curr_node;
1042 curr_node = btrfs_next_delayed_node(curr_node);
1043 btrfs_release_delayed_node(prev_node);
1044 }
1045
1046 btrfs_free_path(path);
1047 return ret;
1048}
1049
1050static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1051 struct btrfs_delayed_node *node)
1052{
1053 struct btrfs_path *path;
1054 int ret;
1055
1056 path = btrfs_alloc_path();
1057 if (!path)
1058 return -ENOMEM;
1059 path->leave_spinning = 1;
1060
1061 ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1062 if (!ret)
1063 ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1064 if (!ret)
1065 ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1066 btrfs_free_path(path);
1067
1068 return ret;
1069}
1070
1071int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1072 struct inode *inode)
1073{
1074 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1075 int ret;
1076
1077 if (!delayed_node)
1078 return 0;
1079
1080 mutex_lock(&delayed_node->mutex);
1081 if (!delayed_node->count) {
1082 mutex_unlock(&delayed_node->mutex);
1083 btrfs_release_delayed_node(delayed_node);
1084 return 0;
1085 }
1086 mutex_unlock(&delayed_node->mutex);
1087
1088 ret = __btrfs_commit_inode_delayed_items(trans, delayed_node);
1089 btrfs_release_delayed_node(delayed_node);
1090 return ret;
1091}
1092
1093void btrfs_remove_delayed_node(struct inode *inode)
1094{
1095 struct btrfs_delayed_node *delayed_node;
1096
1097 delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
1098 if (!delayed_node)
1099 return;
1100
1101 BTRFS_I(inode)->delayed_node = NULL;
1102 btrfs_release_delayed_node(delayed_node);
1103}
1104
1105struct btrfs_async_delayed_node {
1106 struct btrfs_root *root;
1107 struct btrfs_delayed_node *delayed_node;
1108 struct btrfs_work work;
1109};
1110
1111static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1112{
1113 struct btrfs_async_delayed_node *async_node;
1114 struct btrfs_trans_handle *trans;
1115 struct btrfs_path *path;
1116 struct btrfs_delayed_node *delayed_node = NULL;
1117 struct btrfs_root *root;
1118 unsigned long nr = 0;
1119 int need_requeue = 0;
1120 int ret;
1121
1122 async_node = container_of(work, struct btrfs_async_delayed_node, work);
1123
1124 path = btrfs_alloc_path();
1125 if (!path)
1126 goto out;
1127 path->leave_spinning = 1;
1128
1129 delayed_node = async_node->delayed_node;
1130 root = delayed_node->root;
1131
1132 trans = btrfs_join_transaction(root, 0);
1133 if (IS_ERR(trans))
1134 goto free_path;
1135
1136 ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
1137 if (!ret)
1138 ret = btrfs_delete_delayed_items(trans, path, root,
1139 delayed_node);
1140
1141 if (!ret)
1142 btrfs_update_delayed_inode(trans, root, path, delayed_node);
1143
1144 /*
1145 * Maybe new delayed items have been inserted, so we need requeue
1146 * the work. Besides that, we must dequeue the empty delayed nodes
1147 * to avoid the race between delayed items balance and the worker.
1148 * The race like this:
1149 * Task1 Worker thread
1150 * count == 0, needn't requeue
1151 * also needn't insert the
1152 * delayed node into prepare
1153 * list again.
1154 * add lots of delayed items
1155 * queue the delayed node
1156 * already in the list,
1157 * and not in the prepare
1158 * list, it means the delayed
1159 * node is being dealt with
1160 * by the worker.
1161 * do delayed items balance
1162 * the delayed node is being
1163 * dealt with by the worker
1164 * now, just wait.
1165 * the worker goto idle.
1166 * Task1 will sleep until the transaction is commited.
1167 */
1168 mutex_lock(&delayed_node->mutex);
1169 if (delayed_node->count)
1170 need_requeue = 1;
1171 else
1172 btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
1173 delayed_node);
1174 mutex_unlock(&delayed_node->mutex);
1175
1176 nr = trans->blocks_used;
1177
1178 btrfs_end_transaction_dmeta(trans, root);
1179 __btrfs_btree_balance_dirty(root, nr);
1180free_path:
1181 btrfs_free_path(path);
1182out:
1183 if (need_requeue)
1184 btrfs_requeue_work(&async_node->work);
1185 else {
1186 btrfs_release_prepared_delayed_node(delayed_node);
1187 kfree(async_node);
1188 }
1189}
1190
1191static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1192 struct btrfs_root *root, int all)
1193{
1194 struct btrfs_async_delayed_node *async_node;
1195 struct btrfs_delayed_node *curr;
1196 int count = 0;
1197
1198again:
1199 curr = btrfs_first_prepared_delayed_node(delayed_root);
1200 if (!curr)
1201 return 0;
1202
1203 async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
1204 if (!async_node) {
1205 btrfs_release_prepared_delayed_node(curr);
1206 return -ENOMEM;
1207 }
1208
1209 async_node->root = root;
1210 async_node->delayed_node = curr;
1211
1212 async_node->work.func = btrfs_async_run_delayed_node_done;
1213 async_node->work.flags = 0;
1214
1215 btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
1216 count++;
1217
1218 if (all || count < 4)
1219 goto again;
1220
1221 return 0;
1222}
1223
1224void btrfs_balance_delayed_items(struct btrfs_root *root)
1225{
1226 struct btrfs_delayed_root *delayed_root;
1227
1228 delayed_root = btrfs_get_delayed_root(root);
1229
1230 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1231 return;
1232
1233 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1234 int ret;
1235 ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
1236 if (ret)
1237 return;
1238
1239 wait_event_interruptible_timeout(
1240 delayed_root->wait,
1241 (atomic_read(&delayed_root->items) <
1242 BTRFS_DELAYED_BACKGROUND),
1243 HZ);
1244 return;
1245 }
1246
1247 btrfs_wq_run_delayed_node(delayed_root, root, 0);
1248}
1249
1250int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1251 struct btrfs_root *root, const char *name,
1252 int name_len, struct inode *dir,
1253 struct btrfs_disk_key *disk_key, u8 type,
1254 u64 index)
1255{
1256 struct btrfs_delayed_node *delayed_node;
1257 struct btrfs_delayed_item *delayed_item;
1258 struct btrfs_dir_item *dir_item;
1259 int ret;
1260
1261 delayed_node = btrfs_get_or_create_delayed_node(dir);
1262 if (IS_ERR(delayed_node))
1263 return PTR_ERR(delayed_node);
1264
1265 delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1266 if (!delayed_item) {
1267 ret = -ENOMEM;
1268 goto release_node;
1269 }
1270
1271 ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
1272 /*
1273 * we have reserved enough space when we start a new transaction,
1274 * so reserving metadata failure is impossible
1275 */
1276 BUG_ON(ret);
1277
1278 delayed_item->key.objectid = btrfs_ino(dir);
1279 btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
1280 delayed_item->key.offset = index;
1281
1282 dir_item = (struct btrfs_dir_item *)delayed_item->data;
1283 dir_item->location = *disk_key;
1284 dir_item->transid = cpu_to_le64(trans->transid);
1285 dir_item->data_len = 0;
1286 dir_item->name_len = cpu_to_le16(name_len);
1287 dir_item->type = type;
1288 memcpy((char *)(dir_item + 1), name, name_len);
1289
1290 mutex_lock(&delayed_node->mutex);
1291 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1292 if (unlikely(ret)) {
1293 printk(KERN_ERR "err add delayed dir index item(name: %s) into "
1294 "the insertion tree of the delayed node"
1295 "(root id: %llu, inode id: %llu, errno: %d)\n",
1296 name,
1297 (unsigned long long)delayed_node->root->objectid,
1298 (unsigned long long)delayed_node->inode_id,
1299 ret);
1300 BUG();
1301 }
1302 mutex_unlock(&delayed_node->mutex);
1303
1304release_node:
1305 btrfs_release_delayed_node(delayed_node);
1306 return ret;
1307}
1308
1309static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
1310 struct btrfs_delayed_node *node,
1311 struct btrfs_key *key)
1312{
1313 struct btrfs_delayed_item *item;
1314
1315 mutex_lock(&node->mutex);
1316 item = __btrfs_lookup_delayed_insertion_item(node, key);
1317 if (!item) {
1318 mutex_unlock(&node->mutex);
1319 return 1;
1320 }
1321
1322 btrfs_delayed_item_release_metadata(root, item);
1323 btrfs_release_delayed_item(item);
1324 mutex_unlock(&node->mutex);
1325 return 0;
1326}
1327
1328int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1329 struct btrfs_root *root, struct inode *dir,
1330 u64 index)
1331{
1332 struct btrfs_delayed_node *node;
1333 struct btrfs_delayed_item *item;
1334 struct btrfs_key item_key;
1335 int ret;
1336
1337 node = btrfs_get_or_create_delayed_node(dir);
1338 if (IS_ERR(node))
1339 return PTR_ERR(node);
1340
1341 item_key.objectid = btrfs_ino(dir);
1342 btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY);
1343 item_key.offset = index;
1344
1345 ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
1346 if (!ret)
1347 goto end;
1348
1349 item = btrfs_alloc_delayed_item(0);
1350 if (!item) {
1351 ret = -ENOMEM;
1352 goto end;
1353 }
1354
1355 item->key = item_key;
1356
1357 ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
1358 /*
1359 * we have reserved enough space when we start a new transaction,
1360 * so reserving metadata failure is impossible.
1361 */
1362 BUG_ON(ret);
1363
1364 mutex_lock(&node->mutex);
1365 ret = __btrfs_add_delayed_deletion_item(node, item);
1366 if (unlikely(ret)) {
1367 printk(KERN_ERR "err add delayed dir index item(index: %llu) "
1368 "into the deletion tree of the delayed node"
1369 "(root id: %llu, inode id: %llu, errno: %d)\n",
1370 (unsigned long long)index,
1371 (unsigned long long)node->root->objectid,
1372 (unsigned long long)node->inode_id,
1373 ret);
1374 BUG();
1375 }
1376 mutex_unlock(&node->mutex);
1377end:
1378 btrfs_release_delayed_node(node);
1379 return ret;
1380}
1381
1382int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1383{
1384 struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node;
1385 int ret = 0;
1386
1387 if (!delayed_node)
1388 return -ENOENT;
1389
1390 /*
1391 * Since we have held i_mutex of this directory, it is impossible that
1392 * a new directory index is added into the delayed node and index_cnt
1393 * is updated now. So we needn't lock the delayed node.
1394 */
1395 if (!delayed_node->index_cnt)
1396 return -EINVAL;
1397
1398 BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
1399 return ret;
1400}
1401
1402void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
1403 struct list_head *del_list)
1404{
1405 struct btrfs_delayed_node *delayed_node;
1406 struct btrfs_delayed_item *item;
1407
1408 delayed_node = btrfs_get_delayed_node(inode);
1409 if (!delayed_node)
1410 return;
1411
1412 mutex_lock(&delayed_node->mutex);
1413 item = __btrfs_first_delayed_insertion_item(delayed_node);
1414 while (item) {
1415 atomic_inc(&item->refs);
1416 list_add_tail(&item->readdir_list, ins_list);
1417 item = __btrfs_next_delayed_item(item);
1418 }
1419
1420 item = __btrfs_first_delayed_deletion_item(delayed_node);
1421 while (item) {
1422 atomic_inc(&item->refs);
1423 list_add_tail(&item->readdir_list, del_list);
1424 item = __btrfs_next_delayed_item(item);
1425 }
1426 mutex_unlock(&delayed_node->mutex);
1427 /*
1428 * This delayed node is still cached in the btrfs inode, so refs
1429 * must be > 1 now, and we needn't check it is going to be freed
1430 * or not.
1431 *
1432 * Besides that, this function is used to read dir, we do not
1433 * insert/delete delayed items in this period. So we also needn't
1434 * requeue or dequeue this delayed node.
1435 */
1436 atomic_dec(&delayed_node->refs);
1437}
1438
1439void btrfs_put_delayed_items(struct list_head *ins_list,
1440 struct list_head *del_list)
1441{
1442 struct btrfs_delayed_item *curr, *next;
1443
1444 list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1445 list_del(&curr->readdir_list);
1446 if (atomic_dec_and_test(&curr->refs))
1447 kfree(curr);
1448 }
1449
1450 list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1451 list_del(&curr->readdir_list);
1452 if (atomic_dec_and_test(&curr->refs))
1453 kfree(curr);
1454 }
1455}
1456
1457int btrfs_should_delete_dir_index(struct list_head *del_list,
1458 u64 index)
1459{
1460 struct btrfs_delayed_item *curr, *next;
1461 int ret;
1462
1463 if (list_empty(del_list))
1464 return 0;
1465
1466 list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1467 if (curr->key.offset > index)
1468 break;
1469
1470 list_del(&curr->readdir_list);
1471 ret = (curr->key.offset == index);
1472
1473 if (atomic_dec_and_test(&curr->refs))
1474 kfree(curr);
1475
1476 if (ret)
1477 return 1;
1478 else
1479 continue;
1480 }
1481 return 0;
1482}
1483
1484/*
1485 * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1486 *
1487 */
1488int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
1489 filldir_t filldir,
1490 struct list_head *ins_list)
1491{
1492 struct btrfs_dir_item *di;
1493 struct btrfs_delayed_item *curr, *next;
1494 struct btrfs_key location;
1495 char *name;
1496 int name_len;
1497 int over = 0;
1498 unsigned char d_type;
1499
1500 if (list_empty(ins_list))
1501 return 0;
1502
1503 /*
1504 * Changing the data of the delayed item is impossible. So
1505 * we needn't lock them. And we have held i_mutex of the
1506 * directory, nobody can delete any directory indexes now.
1507 */
1508 list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1509 list_del(&curr->readdir_list);
1510
1511 if (curr->key.offset < filp->f_pos) {
1512 if (atomic_dec_and_test(&curr->refs))
1513 kfree(curr);
1514 continue;
1515 }
1516
1517 filp->f_pos = curr->key.offset;
1518
1519 di = (struct btrfs_dir_item *)curr->data;
1520 name = (char *)(di + 1);
1521 name_len = le16_to_cpu(di->name_len);
1522
1523 d_type = btrfs_filetype_table[di->type];
1524 btrfs_disk_key_to_cpu(&location, &di->location);
1525
1526 over = filldir(dirent, name, name_len, curr->key.offset,
1527 location.objectid, d_type);
1528
1529 if (atomic_dec_and_test(&curr->refs))
1530 kfree(curr);
1531
1532 if (over)
1533 return 1;
1534 }
1535 return 0;
1536}
1537
1538BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
1539 generation, 64);
1540BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
1541 sequence, 64);
1542BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
1543 transid, 64);
1544BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
1545BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
1546 nbytes, 64);
1547BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
1548 block_group, 64);
1549BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
1550BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
1551BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
1552BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
1553BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
1554BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
1555
1556BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
1557BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
1558
1559static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1560 struct btrfs_inode_item *inode_item,
1561 struct inode *inode)
1562{
1563 btrfs_set_stack_inode_uid(inode_item, inode->i_uid);
1564 btrfs_set_stack_inode_gid(inode_item, inode->i_gid);
1565 btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1566 btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1567 btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1568 btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1569 btrfs_set_stack_inode_generation(inode_item,
1570 BTRFS_I(inode)->generation);
1571 btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence);
1572 btrfs_set_stack_inode_transid(inode_item, trans->transid);
1573 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1574 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1575 btrfs_set_stack_inode_block_group(inode_item,
1576 BTRFS_I(inode)->block_group);
1577
1578 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1579 inode->i_atime.tv_sec);
1580 btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
1581 inode->i_atime.tv_nsec);
1582
1583 btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
1584 inode->i_mtime.tv_sec);
1585 btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
1586 inode->i_mtime.tv_nsec);
1587
1588 btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
1589 inode->i_ctime.tv_sec);
1590 btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
1591 inode->i_ctime.tv_nsec);
1592}
1593
1594int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1595 struct btrfs_root *root, struct inode *inode)
1596{
1597 struct btrfs_delayed_node *delayed_node;
1598 int ret;
1599
1600 delayed_node = btrfs_get_or_create_delayed_node(inode);
1601 if (IS_ERR(delayed_node))
1602 return PTR_ERR(delayed_node);
1603
1604 mutex_lock(&delayed_node->mutex);
1605 if (delayed_node->inode_dirty) {
1606 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1607 goto release_node;
1608 }
1609
1610 ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node);
1611 /*
1612 * we must reserve enough space when we start a new transaction,
1613 * so reserving metadata failure is impossible
1614 */
1615 BUG_ON(ret);
1616
1617 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1618 delayed_node->inode_dirty = 1;
1619 delayed_node->count++;
1620 atomic_inc(&root->fs_info->delayed_root->items);
1621release_node:
1622 mutex_unlock(&delayed_node->mutex);
1623 btrfs_release_delayed_node(delayed_node);
1624 return ret;
1625}
1626
1627static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1628{
1629 struct btrfs_root *root = delayed_node->root;
1630 struct btrfs_delayed_item *curr_item, *prev_item;
1631
1632 mutex_lock(&delayed_node->mutex);
1633 curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1634 while (curr_item) {
1635 btrfs_delayed_item_release_metadata(root, curr_item);
1636 prev_item = curr_item;
1637 curr_item = __btrfs_next_delayed_item(prev_item);
1638 btrfs_release_delayed_item(prev_item);
1639 }
1640
1641 curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1642 while (curr_item) {
1643 btrfs_delayed_item_release_metadata(root, curr_item);
1644 prev_item = curr_item;
1645 curr_item = __btrfs_next_delayed_item(prev_item);
1646 btrfs_release_delayed_item(prev_item);
1647 }
1648
1649 if (delayed_node->inode_dirty) {
1650 btrfs_delayed_inode_release_metadata(root, delayed_node);
1651 btrfs_release_delayed_inode(delayed_node);
1652 }
1653 mutex_unlock(&delayed_node->mutex);
1654}
1655
1656void btrfs_kill_delayed_inode_items(struct inode *inode)
1657{
1658 struct btrfs_delayed_node *delayed_node;
1659
1660 delayed_node = btrfs_get_delayed_node(inode);
1661 if (!delayed_node)
1662 return;
1663
1664 __btrfs_kill_delayed_node(delayed_node);
1665 btrfs_release_delayed_node(delayed_node);
1666}
1667
1668void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1669{
1670 u64 inode_id = 0;
1671 struct btrfs_delayed_node *delayed_nodes[8];
1672 int i, n;
1673
1674 while (1) {
1675 spin_lock(&root->inode_lock);
1676 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1677 (void **)delayed_nodes, inode_id,
1678 ARRAY_SIZE(delayed_nodes));
1679 if (!n) {
1680 spin_unlock(&root->inode_lock);
1681 break;
1682 }
1683
1684 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1685
1686 for (i = 0; i < n; i++)
1687 atomic_inc(&delayed_nodes[i]->refs);
1688 spin_unlock(&root->inode_lock);
1689
1690 for (i = 0; i < n; i++) {
1691 __btrfs_kill_delayed_node(delayed_nodes[i]);
1692 btrfs_release_delayed_node(delayed_nodes[i]);
1693 }
1694 }
1695}
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
new file mode 100644
index 000000000000..eb7d240aa648
--- /dev/null
+++ b/fs/btrfs/delayed-inode.h
@@ -0,0 +1,141 @@
1/*
2 * Copyright (C) 2011 Fujitsu. All rights reserved.
3 * Written by Miao Xie <miaox@cn.fujitsu.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
18 */
19
20#ifndef __DELAYED_TREE_OPERATION_H
21#define __DELAYED_TREE_OPERATION_H
22
23#include <linux/rbtree.h>
24#include <linux/spinlock.h>
25#include <linux/mutex.h>
26#include <linux/list.h>
27#include <linux/wait.h>
28#include <asm/atomic.h>
29
30#include "ctree.h"
31
32/* types of the delayed item */
33#define BTRFS_DELAYED_INSERTION_ITEM 1
34#define BTRFS_DELAYED_DELETION_ITEM 2
35
36struct btrfs_delayed_root {
37 spinlock_t lock;
38 struct list_head node_list;
39 /*
40 * Used for delayed nodes which is waiting to be dealt with by the
41 * worker. If the delayed node is inserted into the work queue, we
42 * drop it from this list.
43 */
44 struct list_head prepare_list;
45 atomic_t items; /* for delayed items */
46 int nodes; /* for delayed nodes */
47 wait_queue_head_t wait;
48};
49
50struct btrfs_delayed_node {
51 u64 inode_id;
52 u64 bytes_reserved;
53 struct btrfs_root *root;
54 /* Used to add the node into the delayed root's node list. */
55 struct list_head n_list;
56 /*
57 * Used to add the node into the prepare list, the nodes in this list
58 * is waiting to be dealt with by the async worker.
59 */
60 struct list_head p_list;
61 struct rb_root ins_root;
62 struct rb_root del_root;
63 struct mutex mutex;
64 struct btrfs_inode_item inode_item;
65 atomic_t refs;
66 u64 index_cnt;
67 bool in_list;
68 bool inode_dirty;
69 int count;
70};
71
72struct btrfs_delayed_item {
73 struct rb_node rb_node;
74 struct btrfs_key key;
75 struct list_head tree_list; /* used for batch insert/delete items */
76 struct list_head readdir_list; /* used for readdir items */
77 u64 bytes_reserved;
78 struct btrfs_block_rsv *block_rsv;
79 struct btrfs_delayed_node *delayed_node;
80 atomic_t refs;
81 int ins_or_del;
82 u32 data_len;
83 char data[0];
84};
85
86static inline void btrfs_init_delayed_root(
87 struct btrfs_delayed_root *delayed_root)
88{
89 atomic_set(&delayed_root->items, 0);
90 delayed_root->nodes = 0;
91 spin_lock_init(&delayed_root->lock);
92 init_waitqueue_head(&delayed_root->wait);
93 INIT_LIST_HEAD(&delayed_root->node_list);
94 INIT_LIST_HEAD(&delayed_root->prepare_list);
95}
96
97int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
98 struct btrfs_root *root, const char *name,
99 int name_len, struct inode *dir,
100 struct btrfs_disk_key *disk_key, u8 type,
101 u64 index);
102
103int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
104 struct btrfs_root *root, struct inode *dir,
105 u64 index);
106
107int btrfs_inode_delayed_dir_index_count(struct inode *inode);
108
109int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
110 struct btrfs_root *root);
111
112void btrfs_balance_delayed_items(struct btrfs_root *root);
113
114int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
115 struct inode *inode);
116/* Used for evicting the inode. */
117void btrfs_remove_delayed_node(struct inode *inode);
118void btrfs_kill_delayed_inode_items(struct inode *inode);
119
120
121int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
122 struct btrfs_root *root, struct inode *inode);
123
124/* Used for drop dead root */
125void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
126
127/* Used for readdir() */
128void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
129 struct list_head *del_list);
130void btrfs_put_delayed_items(struct list_head *ins_list,
131 struct list_head *del_list);
132int btrfs_should_delete_dir_index(struct list_head *del_list,
133 u64 index);
134int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
135 filldir_t filldir,
136 struct list_head *ins_list);
137
138/* for init */
139int __init btrfs_delayed_inode_init(void);
140void btrfs_delayed_inode_exit(void);
141#endif
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index bce28f653899..125cf76fcd08 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -281,44 +281,6 @@ again:
281} 281}
282 282
283/* 283/*
284 * This checks to see if there are any delayed refs in the
285 * btree for a given bytenr. It returns one if it finds any
286 * and zero otherwise.
287 *
288 * If it only finds a head node, it returns 0.
289 *
290 * The idea is to use this when deciding if you can safely delete an
291 * extent from the extent allocation tree. There may be a pending
292 * ref in the rbtree that adds or removes references, so as long as this
293 * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent
294 * allocation tree.
295 */
296int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
297{
298 struct btrfs_delayed_ref_node *ref;
299 struct btrfs_delayed_ref_root *delayed_refs;
300 struct rb_node *prev_node;
301 int ret = 0;
302
303 delayed_refs = &trans->transaction->delayed_refs;
304 spin_lock(&delayed_refs->lock);
305
306 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
307 if (ref) {
308 prev_node = rb_prev(&ref->rb_node);
309 if (!prev_node)
310 goto out;
311 ref = rb_entry(prev_node, struct btrfs_delayed_ref_node,
312 rb_node);
313 if (ref->bytenr == bytenr)
314 ret = 1;
315 }
316out:
317 spin_unlock(&delayed_refs->lock);
318 return ret;
319}
320
321/*
322 * helper function to update an extent delayed ref in the 284 * helper function to update an extent delayed ref in the
323 * rbtree. existing and update must both have the same 285 * rbtree. existing and update must both have the same
324 * bytenr and parent 286 * bytenr and parent
@@ -747,79 +709,3 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
747 return btrfs_delayed_node_to_head(ref); 709 return btrfs_delayed_node_to_head(ref);
748 return NULL; 710 return NULL;
749} 711}
750
751/*
752 * add a delayed ref to the tree. This does all of the accounting required
753 * to make sure the delayed ref is eventually processed before this
754 * transaction commits.
755 *
756 * The main point of this call is to add and remove a backreference in a single
757 * shot, taking the lock only once, and only searching for the head node once.
758 *
759 * It is the same as doing a ref add and delete in two separate calls.
760 */
761#if 0
762int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
763 u64 bytenr, u64 num_bytes, u64 orig_parent,
764 u64 parent, u64 orig_ref_root, u64 ref_root,
765 u64 orig_ref_generation, u64 ref_generation,
766 u64 owner_objectid, int pin)
767{
768 struct btrfs_delayed_ref *ref;
769 struct btrfs_delayed_ref *old_ref;
770 struct btrfs_delayed_ref_head *head_ref;
771 struct btrfs_delayed_ref_root *delayed_refs;
772 int ret;
773
774 ref = kmalloc(sizeof(*ref), GFP_NOFS);
775 if (!ref)
776 return -ENOMEM;
777
778 old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS);
779 if (!old_ref) {
780 kfree(ref);
781 return -ENOMEM;
782 }
783
784 /*
785 * the parent = 0 case comes from cases where we don't actually
786 * know the parent yet. It will get updated later via a add/drop
787 * pair.
788 */
789 if (parent == 0)
790 parent = bytenr;
791 if (orig_parent == 0)
792 orig_parent = bytenr;
793
794 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
795 if (!head_ref) {
796 kfree(ref);
797 kfree(old_ref);
798 return -ENOMEM;
799 }
800 delayed_refs = &trans->transaction->delayed_refs;
801 spin_lock(&delayed_refs->lock);
802
803 /*
804 * insert both the head node and the new ref without dropping
805 * the spin lock
806 */
807 ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
808 (u64)-1, 0, 0, 0,
809 BTRFS_UPDATE_DELAYED_HEAD, 0);
810 BUG_ON(ret);
811
812 ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
813 parent, ref_root, ref_generation,
814 owner_objectid, BTRFS_ADD_DELAYED_REF, 0);
815 BUG_ON(ret);
816
817 ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes,
818 orig_parent, orig_ref_root,
819 orig_ref_generation, owner_objectid,
820 BTRFS_DROP_DELAYED_REF, pin);
821 BUG_ON(ret);
822 spin_unlock(&delayed_refs->lock);
823 return 0;
824}
825#endif
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 50e3cf92fbda..e287e3b0eab0 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -166,12 +166,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
166 166
167struct btrfs_delayed_ref_head * 167struct btrfs_delayed_ref_head *
168btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); 168btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
169int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
170int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
171 u64 bytenr, u64 num_bytes, u64 orig_parent,
172 u64 parent, u64 orig_ref_root, u64 ref_root,
173 u64 orig_ref_generation, u64 ref_generation,
174 u64 owner_objectid, int pin);
175int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, 169int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
176 struct btrfs_delayed_ref_head *head); 170 struct btrfs_delayed_ref_head *head);
177int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 171int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index c62f02f6ae69..685f2593c4f0 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -50,7 +50,6 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
50 if (di) 50 if (di)
51 return ERR_PTR(-EEXIST); 51 return ERR_PTR(-EEXIST);
52 ret = btrfs_extend_item(trans, root, path, data_size); 52 ret = btrfs_extend_item(trans, root, path, data_size);
53 WARN_ON(ret > 0);
54 } 53 }
55 if (ret < 0) 54 if (ret < 0)
56 return ERR_PTR(ret); 55 return ERR_PTR(ret);
@@ -124,8 +123,9 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
124 * to use for the second index (if one is created). 123 * to use for the second index (if one is created).
125 */ 124 */
126int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root 125int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
127 *root, const char *name, int name_len, u64 dir, 126 *root, const char *name, int name_len,
128 struct btrfs_key *location, u8 type, u64 index) 127 struct inode *dir, struct btrfs_key *location,
128 u8 type, u64 index)
129{ 129{
130 int ret = 0; 130 int ret = 0;
131 int ret2 = 0; 131 int ret2 = 0;
@@ -137,13 +137,17 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
137 struct btrfs_disk_key disk_key; 137 struct btrfs_disk_key disk_key;
138 u32 data_size; 138 u32 data_size;
139 139
140 key.objectid = dir; 140 key.objectid = btrfs_ino(dir);
141 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); 141 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
142 key.offset = btrfs_name_hash(name, name_len); 142 key.offset = btrfs_name_hash(name, name_len);
143 143
144 path = btrfs_alloc_path(); 144 path = btrfs_alloc_path();
145 if (!path)
146 return -ENOMEM;
145 path->leave_spinning = 1; 147 path->leave_spinning = 1;
146 148
149 btrfs_cpu_key_to_disk(&disk_key, location);
150
147 data_size = sizeof(*dir_item) + name_len; 151 data_size = sizeof(*dir_item) + name_len;
148 dir_item = insert_with_overflow(trans, root, path, &key, data_size, 152 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
149 name, name_len); 153 name, name_len);
@@ -155,7 +159,6 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
155 } 159 }
156 160
157 leaf = path->nodes[0]; 161 leaf = path->nodes[0];
158 btrfs_cpu_key_to_disk(&disk_key, location);
159 btrfs_set_dir_item_key(leaf, dir_item, &disk_key); 162 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
160 btrfs_set_dir_type(leaf, dir_item, type); 163 btrfs_set_dir_type(leaf, dir_item, type);
161 btrfs_set_dir_data_len(leaf, dir_item, 0); 164 btrfs_set_dir_data_len(leaf, dir_item, 0);
@@ -172,29 +175,11 @@ second_insert:
172 ret = 0; 175 ret = 0;
173 goto out_free; 176 goto out_free;
174 } 177 }
175 btrfs_release_path(root, path); 178 btrfs_release_path(path);
176
177 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
178 key.offset = index;
179 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
180 name, name_len);
181 if (IS_ERR(dir_item)) {
182 ret2 = PTR_ERR(dir_item);
183 goto out_free;
184 }
185 leaf = path->nodes[0];
186 btrfs_cpu_key_to_disk(&disk_key, location);
187 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
188 btrfs_set_dir_type(leaf, dir_item, type);
189 btrfs_set_dir_data_len(leaf, dir_item, 0);
190 btrfs_set_dir_name_len(leaf, dir_item, name_len);
191 btrfs_set_dir_transid(leaf, dir_item, trans->transid);
192 name_ptr = (unsigned long)(dir_item + 1);
193 write_extent_buffer(leaf, name, name_ptr, name_len);
194 btrfs_mark_buffer_dirty(leaf);
195 179
180 ret2 = btrfs_insert_delayed_dir_index(trans, root, name, name_len, dir,
181 &disk_key, type, index);
196out_free: 182out_free:
197
198 btrfs_free_path(path); 183 btrfs_free_path(path);
199 if (ret) 184 if (ret)
200 return ret; 185 return ret;
@@ -452,7 +437,7 @@ int verify_dir_item(struct btrfs_root *root,
452 namelen = XATTR_NAME_MAX; 437 namelen = XATTR_NAME_MAX;
453 438
454 if (btrfs_dir_name_len(leaf, dir_item) > namelen) { 439 if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
455 printk(KERN_CRIT "btrfS: invalid dir item name len: %u\n", 440 printk(KERN_CRIT "btrfs: invalid dir item name len: %u\n",
456 (unsigned)btrfs_dir_data_len(leaf, dir_item)); 441 (unsigned)btrfs_dir_data_len(leaf, dir_item));
457 return 1; 442 return 1;
458 } 443 }
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 228cf36ece83..98b6a71decba 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -29,6 +29,7 @@
29#include <linux/crc32c.h> 29#include <linux/crc32c.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/migrate.h> 31#include <linux/migrate.h>
32#include <linux/ratelimit.h>
32#include <asm/unaligned.h> 33#include <asm/unaligned.h>
33#include "compat.h" 34#include "compat.h"
34#include "ctree.h" 35#include "ctree.h"
@@ -41,6 +42,7 @@
41#include "locking.h" 42#include "locking.h"
42#include "tree-log.h" 43#include "tree-log.h"
43#include "free-space-cache.h" 44#include "free-space-cache.h"
45#include "inode-map.h"
44 46
45static struct extent_io_ops btree_extent_io_ops; 47static struct extent_io_ops btree_extent_io_ops;
46static void end_workqueue_fn(struct btrfs_work *work); 48static void end_workqueue_fn(struct btrfs_work *work);
@@ -137,7 +139,7 @@ static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
137 * that covers the entire device 139 * that covers the entire device
138 */ 140 */
139static struct extent_map *btree_get_extent(struct inode *inode, 141static struct extent_map *btree_get_extent(struct inode *inode,
140 struct page *page, size_t page_offset, u64 start, u64 len, 142 struct page *page, size_t pg_offset, u64 start, u64 len,
141 int create) 143 int create)
142{ 144{
143 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 145 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
@@ -154,7 +156,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
154 } 156 }
155 read_unlock(&em_tree->lock); 157 read_unlock(&em_tree->lock);
156 158
157 em = alloc_extent_map(GFP_NOFS); 159 em = alloc_extent_map();
158 if (!em) { 160 if (!em) {
159 em = ERR_PTR(-ENOMEM); 161 em = ERR_PTR(-ENOMEM);
160 goto out; 162 goto out;
@@ -254,14 +256,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
254 memcpy(&found, result, csum_size); 256 memcpy(&found, result, csum_size);
255 257
256 read_extent_buffer(buf, &val, 0, csum_size); 258 read_extent_buffer(buf, &val, 0, csum_size);
257 if (printk_ratelimit()) { 259 printk_ratelimited(KERN_INFO "btrfs: %s checksum verify "
258 printk(KERN_INFO "btrfs: %s checksum verify "
259 "failed on %llu wanted %X found %X " 260 "failed on %llu wanted %X found %X "
260 "level %d\n", 261 "level %d\n",
261 root->fs_info->sb->s_id, 262 root->fs_info->sb->s_id,
262 (unsigned long long)buf->start, val, found, 263 (unsigned long long)buf->start, val, found,
263 btrfs_header_level(buf)); 264 btrfs_header_level(buf));
264 }
265 if (result != (char *)&inline_result) 265 if (result != (char *)&inline_result)
266 kfree(result); 266 kfree(result);
267 return 1; 267 return 1;
@@ -296,13 +296,11 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
296 ret = 0; 296 ret = 0;
297 goto out; 297 goto out;
298 } 298 }
299 if (printk_ratelimit()) { 299 printk_ratelimited("parent transid verify failed on %llu wanted %llu "
300 printk("parent transid verify failed on %llu wanted %llu "
301 "found %llu\n", 300 "found %llu\n",
302 (unsigned long long)eb->start, 301 (unsigned long long)eb->start,
303 (unsigned long long)parent_transid, 302 (unsigned long long)parent_transid,
304 (unsigned long long)btrfs_header_generation(eb)); 303 (unsigned long long)btrfs_header_generation(eb));
305 }
306 ret = 1; 304 ret = 1;
307 clear_extent_buffer_uptodate(io_tree, eb, &cached_state); 305 clear_extent_buffer_uptodate(io_tree, eb, &cached_state);
308out: 306out:
@@ -380,7 +378,7 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
380 len = page->private >> 2; 378 len = page->private >> 2;
381 WARN_ON(len == 0); 379 WARN_ON(len == 0);
382 380
383 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); 381 eb = alloc_extent_buffer(tree, start, len, page);
384 if (eb == NULL) { 382 if (eb == NULL) {
385 WARN_ON(1); 383 WARN_ON(1);
386 goto out; 384 goto out;
@@ -525,7 +523,7 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
525 len = page->private >> 2; 523 len = page->private >> 2;
526 WARN_ON(len == 0); 524 WARN_ON(len == 0);
527 525
528 eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); 526 eb = alloc_extent_buffer(tree, start, len, page);
529 if (eb == NULL) { 527 if (eb == NULL) {
530 ret = -EIO; 528 ret = -EIO;
531 goto out; 529 goto out;
@@ -533,12 +531,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
533 531
534 found_start = btrfs_header_bytenr(eb); 532 found_start = btrfs_header_bytenr(eb);
535 if (found_start != start) { 533 if (found_start != start) {
536 if (printk_ratelimit()) { 534 printk_ratelimited(KERN_INFO "btrfs bad tree block start "
537 printk(KERN_INFO "btrfs bad tree block start "
538 "%llu %llu\n", 535 "%llu %llu\n",
539 (unsigned long long)found_start, 536 (unsigned long long)found_start,
540 (unsigned long long)eb->start); 537 (unsigned long long)eb->start);
541 }
542 ret = -EIO; 538 ret = -EIO;
543 goto err; 539 goto err;
544 } 540 }
@@ -550,10 +546,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
550 goto err; 546 goto err;
551 } 547 }
552 if (check_tree_block_fsid(root, eb)) { 548 if (check_tree_block_fsid(root, eb)) {
553 if (printk_ratelimit()) { 549 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
554 printk(KERN_INFO "btrfs bad fsid on block %llu\n",
555 (unsigned long long)eb->start); 550 (unsigned long long)eb->start);
556 }
557 ret = -EIO; 551 ret = -EIO;
558 goto err; 552 goto err;
559 } 553 }
@@ -650,12 +644,6 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
650 return 256 * limit; 644 return 256 * limit;
651} 645}
652 646
653int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
654{
655 return atomic_read(&info->nr_async_bios) >
656 btrfs_async_submit_limit(info);
657}
658
659static void run_one_async_start(struct btrfs_work *work) 647static void run_one_async_start(struct btrfs_work *work)
660{ 648{
661 struct async_submit_bio *async; 649 struct async_submit_bio *async;
@@ -963,7 +951,7 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
963 struct inode *btree_inode = root->fs_info->btree_inode; 951 struct inode *btree_inode = root->fs_info->btree_inode;
964 struct extent_buffer *eb; 952 struct extent_buffer *eb;
965 eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 953 eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
966 bytenr, blocksize, GFP_NOFS); 954 bytenr, blocksize);
967 return eb; 955 return eb;
968} 956}
969 957
@@ -974,7 +962,7 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
974 struct extent_buffer *eb; 962 struct extent_buffer *eb;
975 963
976 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 964 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
977 bytenr, blocksize, NULL, GFP_NOFS); 965 bytenr, blocksize, NULL);
978 return eb; 966 return eb;
979} 967}
980 968
@@ -1058,13 +1046,13 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1058 root->name = NULL; 1046 root->name = NULL;
1059 root->in_sysfs = 0; 1047 root->in_sysfs = 0;
1060 root->inode_tree = RB_ROOT; 1048 root->inode_tree = RB_ROOT;
1049 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
1061 root->block_rsv = NULL; 1050 root->block_rsv = NULL;
1062 root->orphan_block_rsv = NULL; 1051 root->orphan_block_rsv = NULL;
1063 1052
1064 INIT_LIST_HEAD(&root->dirty_list); 1053 INIT_LIST_HEAD(&root->dirty_list);
1065 INIT_LIST_HEAD(&root->orphan_list); 1054 INIT_LIST_HEAD(&root->orphan_list);
1066 INIT_LIST_HEAD(&root->root_list); 1055 INIT_LIST_HEAD(&root->root_list);
1067 spin_lock_init(&root->node_lock);
1068 spin_lock_init(&root->orphan_lock); 1056 spin_lock_init(&root->orphan_lock);
1069 spin_lock_init(&root->inode_lock); 1057 spin_lock_init(&root->inode_lock);
1070 spin_lock_init(&root->accounting_lock); 1058 spin_lock_init(&root->accounting_lock);
@@ -1080,7 +1068,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1080 root->log_transid = 0; 1068 root->log_transid = 0;
1081 root->last_log_commit = 0; 1069 root->last_log_commit = 0;
1082 extent_io_tree_init(&root->dirty_log_pages, 1070 extent_io_tree_init(&root->dirty_log_pages,
1083 fs_info->btree_inode->i_mapping, GFP_NOFS); 1071 fs_info->btree_inode->i_mapping);
1084 1072
1085 memset(&root->root_key, 0, sizeof(root->root_key)); 1073 memset(&root->root_key, 0, sizeof(root->root_key));
1086 memset(&root->root_item, 0, sizeof(root->root_item)); 1074 memset(&root->root_item, 0, sizeof(root->root_item));
@@ -1283,21 +1271,6 @@ out:
1283 return root; 1271 return root;
1284} 1272}
1285 1273
1286struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
1287 u64 root_objectid)
1288{
1289 struct btrfs_root *root;
1290
1291 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
1292 return fs_info->tree_root;
1293 if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
1294 return fs_info->extent_root;
1295
1296 root = radix_tree_lookup(&fs_info->fs_roots_radix,
1297 (unsigned long)root_objectid);
1298 return root;
1299}
1300
1301struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, 1274struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1302 struct btrfs_key *location) 1275 struct btrfs_key *location)
1303{ 1276{
@@ -1326,6 +1299,19 @@ again:
1326 if (IS_ERR(root)) 1299 if (IS_ERR(root))
1327 return root; 1300 return root;
1328 1301
1302 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
1303 if (!root->free_ino_ctl)
1304 goto fail;
1305 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
1306 GFP_NOFS);
1307 if (!root->free_ino_pinned)
1308 goto fail;
1309
1310 btrfs_init_free_ino_ctl(root);
1311 mutex_init(&root->fs_commit_mutex);
1312 spin_lock_init(&root->cache_lock);
1313 init_waitqueue_head(&root->cache_wait);
1314
1329 set_anon_super(&root->anon_super, NULL); 1315 set_anon_super(&root->anon_super, NULL);
1330 1316
1331 if (btrfs_root_refs(&root->root_item) == 0) { 1317 if (btrfs_root_refs(&root->root_item) == 0) {
@@ -1369,41 +1355,6 @@ fail:
1369 return ERR_PTR(ret); 1355 return ERR_PTR(ret);
1370} 1356}
1371 1357
1372struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
1373 struct btrfs_key *location,
1374 const char *name, int namelen)
1375{
1376 return btrfs_read_fs_root_no_name(fs_info, location);
1377#if 0
1378 struct btrfs_root *root;
1379 int ret;
1380
1381 root = btrfs_read_fs_root_no_name(fs_info, location);
1382 if (!root)
1383 return NULL;
1384
1385 if (root->in_sysfs)
1386 return root;
1387
1388 ret = btrfs_set_root_name(root, name, namelen);
1389 if (ret) {
1390 free_extent_buffer(root->node);
1391 kfree(root);
1392 return ERR_PTR(ret);
1393 }
1394
1395 ret = btrfs_sysfs_add_root(root);
1396 if (ret) {
1397 free_extent_buffer(root->node);
1398 kfree(root->name);
1399 kfree(root);
1400 return ERR_PTR(ret);
1401 }
1402 root->in_sysfs = 1;
1403 return root;
1404#endif
1405}
1406
1407static int btrfs_congested_fn(void *congested_data, int bdi_bits) 1358static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1408{ 1359{
1409 struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; 1360 struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
@@ -1411,7 +1362,8 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1411 struct btrfs_device *device; 1362 struct btrfs_device *device;
1412 struct backing_dev_info *bdi; 1363 struct backing_dev_info *bdi;
1413 1364
1414 list_for_each_entry(device, &info->fs_devices->devices, dev_list) { 1365 rcu_read_lock();
1366 list_for_each_entry_rcu(device, &info->fs_devices->devices, dev_list) {
1415 if (!device->bdev) 1367 if (!device->bdev)
1416 continue; 1368 continue;
1417 bdi = blk_get_backing_dev_info(device->bdev); 1369 bdi = blk_get_backing_dev_info(device->bdev);
@@ -1420,6 +1372,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1420 break; 1372 break;
1421 } 1373 }
1422 } 1374 }
1375 rcu_read_unlock();
1423 return ret; 1376 return ret;
1424} 1377}
1425 1378
@@ -1522,6 +1475,7 @@ static int cleaner_kthread(void *arg)
1522 btrfs_run_delayed_iputs(root); 1475 btrfs_run_delayed_iputs(root);
1523 btrfs_clean_old_snapshots(root); 1476 btrfs_clean_old_snapshots(root);
1524 mutex_unlock(&root->fs_info->cleaner_mutex); 1477 mutex_unlock(&root->fs_info->cleaner_mutex);
1478 btrfs_run_defrag_inodes(root->fs_info);
1525 } 1479 }
1526 1480
1527 if (freezing(current)) { 1481 if (freezing(current)) {
@@ -1611,7 +1565,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1611 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), 1565 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
1612 GFP_NOFS); 1566 GFP_NOFS);
1613 struct btrfs_root *tree_root = btrfs_sb(sb); 1567 struct btrfs_root *tree_root = btrfs_sb(sb);
1614 struct btrfs_fs_info *fs_info = tree_root->fs_info; 1568 struct btrfs_fs_info *fs_info = NULL;
1615 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), 1569 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
1616 GFP_NOFS); 1570 GFP_NOFS);
1617 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), 1571 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
@@ -1623,11 +1577,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1623 1577
1624 struct btrfs_super_block *disk_super; 1578 struct btrfs_super_block *disk_super;
1625 1579
1626 if (!extent_root || !tree_root || !fs_info || 1580 if (!extent_root || !tree_root || !tree_root->fs_info ||
1627 !chunk_root || !dev_root || !csum_root) { 1581 !chunk_root || !dev_root || !csum_root) {
1628 err = -ENOMEM; 1582 err = -ENOMEM;
1629 goto fail; 1583 goto fail;
1630 } 1584 }
1585 fs_info = tree_root->fs_info;
1631 1586
1632 ret = init_srcu_struct(&fs_info->subvol_srcu); 1587 ret = init_srcu_struct(&fs_info->subvol_srcu);
1633 if (ret) { 1588 if (ret) {
@@ -1662,6 +1617,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1662 spin_lock_init(&fs_info->ref_cache_lock); 1617 spin_lock_init(&fs_info->ref_cache_lock);
1663 spin_lock_init(&fs_info->fs_roots_radix_lock); 1618 spin_lock_init(&fs_info->fs_roots_radix_lock);
1664 spin_lock_init(&fs_info->delayed_iput_lock); 1619 spin_lock_init(&fs_info->delayed_iput_lock);
1620 spin_lock_init(&fs_info->defrag_inodes_lock);
1665 1621
1666 init_completion(&fs_info->kobj_unregister); 1622 init_completion(&fs_info->kobj_unregister);
1667 fs_info->tree_root = tree_root; 1623 fs_info->tree_root = tree_root;
@@ -1684,15 +1640,35 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1684 atomic_set(&fs_info->async_delalloc_pages, 0); 1640 atomic_set(&fs_info->async_delalloc_pages, 0);
1685 atomic_set(&fs_info->async_submit_draining, 0); 1641 atomic_set(&fs_info->async_submit_draining, 0);
1686 atomic_set(&fs_info->nr_async_bios, 0); 1642 atomic_set(&fs_info->nr_async_bios, 0);
1643 atomic_set(&fs_info->defrag_running, 0);
1687 fs_info->sb = sb; 1644 fs_info->sb = sb;
1688 fs_info->max_inline = 8192 * 1024; 1645 fs_info->max_inline = 8192 * 1024;
1689 fs_info->metadata_ratio = 0; 1646 fs_info->metadata_ratio = 0;
1647 fs_info->defrag_inodes = RB_ROOT;
1690 1648
1691 fs_info->thread_pool_size = min_t(unsigned long, 1649 fs_info->thread_pool_size = min_t(unsigned long,
1692 num_online_cpus() + 2, 8); 1650 num_online_cpus() + 2, 8);
1693 1651
1694 INIT_LIST_HEAD(&fs_info->ordered_extents); 1652 INIT_LIST_HEAD(&fs_info->ordered_extents);
1695 spin_lock_init(&fs_info->ordered_extent_lock); 1653 spin_lock_init(&fs_info->ordered_extent_lock);
1654 fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
1655 GFP_NOFS);
1656 if (!fs_info->delayed_root) {
1657 err = -ENOMEM;
1658 goto fail_iput;
1659 }
1660 btrfs_init_delayed_root(fs_info->delayed_root);
1661
1662 mutex_init(&fs_info->scrub_lock);
1663 atomic_set(&fs_info->scrubs_running, 0);
1664 atomic_set(&fs_info->scrub_pause_req, 0);
1665 atomic_set(&fs_info->scrubs_paused, 0);
1666 atomic_set(&fs_info->scrub_cancel_req, 0);
1667 init_waitqueue_head(&fs_info->scrub_pause_wait);
1668 init_rwsem(&fs_info->scrub_super_lock);
1669 fs_info->scrub_workers_refcnt = 0;
1670 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1671 fs_info->thread_pool_size, &fs_info->generic_worker);
1696 1672
1697 sb->s_blocksize = 4096; 1673 sb->s_blocksize = 4096;
1698 sb->s_blocksize_bits = blksize_bits(4096); 1674 sb->s_blocksize_bits = blksize_bits(4096);
@@ -1711,10 +1687,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1711 1687
1712 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); 1688 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
1713 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, 1689 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
1714 fs_info->btree_inode->i_mapping, 1690 fs_info->btree_inode->i_mapping);
1715 GFP_NOFS); 1691 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
1716 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
1717 GFP_NOFS);
1718 1692
1719 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; 1693 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
1720 1694
@@ -1728,9 +1702,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1728 fs_info->block_group_cache_tree = RB_ROOT; 1702 fs_info->block_group_cache_tree = RB_ROOT;
1729 1703
1730 extent_io_tree_init(&fs_info->freed_extents[0], 1704 extent_io_tree_init(&fs_info->freed_extents[0],
1731 fs_info->btree_inode->i_mapping, GFP_NOFS); 1705 fs_info->btree_inode->i_mapping);
1732 extent_io_tree_init(&fs_info->freed_extents[1], 1706 extent_io_tree_init(&fs_info->freed_extents[1],
1733 fs_info->btree_inode->i_mapping, GFP_NOFS); 1707 fs_info->btree_inode->i_mapping);
1734 fs_info->pinned_extents = &fs_info->freed_extents[0]; 1708 fs_info->pinned_extents = &fs_info->freed_extents[0];
1735 fs_info->do_barriers = 1; 1709 fs_info->do_barriers = 1;
1736 1710
@@ -1760,7 +1734,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1760 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 1734 bh = btrfs_read_dev_super(fs_devices->latest_bdev);
1761 if (!bh) { 1735 if (!bh) {
1762 err = -EINVAL; 1736 err = -EINVAL;
1763 goto fail_iput; 1737 goto fail_alloc;
1764 } 1738 }
1765 1739
1766 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); 1740 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy));
@@ -1772,7 +1746,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1772 1746
1773 disk_super = &fs_info->super_copy; 1747 disk_super = &fs_info->super_copy;
1774 if (!btrfs_super_root(disk_super)) 1748 if (!btrfs_super_root(disk_super))
1775 goto fail_iput; 1749 goto fail_alloc;
1776 1750
1777 /* check FS state, whether FS is broken. */ 1751 /* check FS state, whether FS is broken. */
1778 fs_info->fs_state |= btrfs_super_flags(disk_super); 1752 fs_info->fs_state |= btrfs_super_flags(disk_super);
@@ -1788,7 +1762,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1788 ret = btrfs_parse_options(tree_root, options); 1762 ret = btrfs_parse_options(tree_root, options);
1789 if (ret) { 1763 if (ret) {
1790 err = ret; 1764 err = ret;
1791 goto fail_iput; 1765 goto fail_alloc;
1792 } 1766 }
1793 1767
1794 features = btrfs_super_incompat_flags(disk_super) & 1768 features = btrfs_super_incompat_flags(disk_super) &
@@ -1798,7 +1772,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1798 "unsupported optional features (%Lx).\n", 1772 "unsupported optional features (%Lx).\n",
1799 (unsigned long long)features); 1773 (unsigned long long)features);
1800 err = -EINVAL; 1774 err = -EINVAL;
1801 goto fail_iput; 1775 goto fail_alloc;
1802 } 1776 }
1803 1777
1804 features = btrfs_super_incompat_flags(disk_super); 1778 features = btrfs_super_incompat_flags(disk_super);
@@ -1814,7 +1788,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1814 "unsupported option features (%Lx).\n", 1788 "unsupported option features (%Lx).\n",
1815 (unsigned long long)features); 1789 (unsigned long long)features);
1816 err = -EINVAL; 1790 err = -EINVAL;
1817 goto fail_iput; 1791 goto fail_alloc;
1818 } 1792 }
1819 1793
1820 btrfs_init_workers(&fs_info->generic_worker, 1794 btrfs_init_workers(&fs_info->generic_worker,
@@ -1861,6 +1835,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1861 &fs_info->generic_worker); 1835 &fs_info->generic_worker);
1862 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", 1836 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
1863 1, &fs_info->generic_worker); 1837 1, &fs_info->generic_worker);
1838 btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta",
1839 fs_info->thread_pool_size,
1840 &fs_info->generic_worker);
1864 1841
1865 /* 1842 /*
1866 * endios are largely parallel and should have a very 1843 * endios are largely parallel and should have a very
@@ -1882,6 +1859,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1882 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 1859 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
1883 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1860 btrfs_start_workers(&fs_info->endio_write_workers, 1);
1884 btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 1861 btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
1862 btrfs_start_workers(&fs_info->delayed_workers, 1);
1885 1863
1886 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1864 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1887 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1865 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2138,6 +2116,9 @@ fail_sb_buffer:
2138 btrfs_stop_workers(&fs_info->endio_write_workers); 2116 btrfs_stop_workers(&fs_info->endio_write_workers);
2139 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2117 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2140 btrfs_stop_workers(&fs_info->submit_workers); 2118 btrfs_stop_workers(&fs_info->submit_workers);
2119 btrfs_stop_workers(&fs_info->delayed_workers);
2120fail_alloc:
2121 kfree(fs_info->delayed_root);
2141fail_iput: 2122fail_iput:
2142 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2123 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2143 iput(fs_info->btree_inode); 2124 iput(fs_info->btree_inode);
@@ -2165,11 +2146,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
2165 if (uptodate) { 2146 if (uptodate) {
2166 set_buffer_uptodate(bh); 2147 set_buffer_uptodate(bh);
2167 } else { 2148 } else {
2168 if (printk_ratelimit()) { 2149 printk_ratelimited(KERN_WARNING "lost page write due to "
2169 printk(KERN_WARNING "lost page write due to "
2170 "I/O error on %s\n", 2150 "I/O error on %s\n",
2171 bdevname(bh->b_bdev, b)); 2151 bdevname(bh->b_bdev, b));
2172 }
2173 /* note, we dont' set_buffer_write_io_error because we have 2152 /* note, we dont' set_buffer_write_io_error because we have
2174 * our own ways of dealing with the IO errors 2153 * our own ways of dealing with the IO errors
2175 */ 2154 */
@@ -2333,7 +2312,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2333 2312
2334 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 2313 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2335 head = &root->fs_info->fs_devices->devices; 2314 head = &root->fs_info->fs_devices->devices;
2336 list_for_each_entry(dev, head, dev_list) { 2315 list_for_each_entry_rcu(dev, head, dev_list) {
2337 if (!dev->bdev) { 2316 if (!dev->bdev) {
2338 total_errors++; 2317 total_errors++;
2339 continue; 2318 continue;
@@ -2366,7 +2345,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2366 } 2345 }
2367 2346
2368 total_errors = 0; 2347 total_errors = 0;
2369 list_for_each_entry(dev, head, dev_list) { 2348 list_for_each_entry_rcu(dev, head, dev_list) {
2370 if (!dev->bdev) 2349 if (!dev->bdev)
2371 continue; 2350 continue;
2372 if (!dev->in_fs_metadata || !dev->writeable) 2351 if (!dev->in_fs_metadata || !dev->writeable)
@@ -2404,12 +2383,15 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2404 if (btrfs_root_refs(&root->root_item) == 0) 2383 if (btrfs_root_refs(&root->root_item) == 0)
2405 synchronize_srcu(&fs_info->subvol_srcu); 2384 synchronize_srcu(&fs_info->subvol_srcu);
2406 2385
2386 __btrfs_remove_free_space_cache(root->free_ino_pinned);
2387 __btrfs_remove_free_space_cache(root->free_ino_ctl);
2407 free_fs_root(root); 2388 free_fs_root(root);
2408 return 0; 2389 return 0;
2409} 2390}
2410 2391
2411static void free_fs_root(struct btrfs_root *root) 2392static void free_fs_root(struct btrfs_root *root)
2412{ 2393{
2394 iput(root->cache_inode);
2413 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); 2395 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
2414 if (root->anon_super.s_dev) { 2396 if (root->anon_super.s_dev) {
2415 down_write(&root->anon_super.s_umount); 2397 down_write(&root->anon_super.s_umount);
@@ -2417,6 +2399,8 @@ static void free_fs_root(struct btrfs_root *root)
2417 } 2399 }
2418 free_extent_buffer(root->node); 2400 free_extent_buffer(root->node);
2419 free_extent_buffer(root->commit_root); 2401 free_extent_buffer(root->commit_root);
2402 kfree(root->free_ino_ctl);
2403 kfree(root->free_ino_pinned);
2420 kfree(root->name); 2404 kfree(root->name);
2421 kfree(root); 2405 kfree(root);
2422} 2406}
@@ -2520,6 +2504,15 @@ int close_ctree(struct btrfs_root *root)
2520 fs_info->closing = 1; 2504 fs_info->closing = 1;
2521 smp_mb(); 2505 smp_mb();
2522 2506
2507 btrfs_scrub_cancel(root);
2508
2509 /* wait for any defraggers to finish */
2510 wait_event(fs_info->transaction_wait,
2511 (atomic_read(&fs_info->defrag_running) == 0));
2512
2513 /* clear out the rbtree of defraggable inodes */
2514 btrfs_run_defrag_inodes(root->fs_info);
2515
2523 btrfs_put_block_group_cache(fs_info); 2516 btrfs_put_block_group_cache(fs_info);
2524 2517
2525 /* 2518 /*
@@ -2578,6 +2571,7 @@ int close_ctree(struct btrfs_root *root)
2578 del_fs_roots(fs_info); 2571 del_fs_roots(fs_info);
2579 2572
2580 iput(fs_info->btree_inode); 2573 iput(fs_info->btree_inode);
2574 kfree(fs_info->delayed_root);
2581 2575
2582 btrfs_stop_workers(&fs_info->generic_worker); 2576 btrfs_stop_workers(&fs_info->generic_worker);
2583 btrfs_stop_workers(&fs_info->fixup_workers); 2577 btrfs_stop_workers(&fs_info->fixup_workers);
@@ -2589,6 +2583,7 @@ int close_ctree(struct btrfs_root *root)
2589 btrfs_stop_workers(&fs_info->endio_write_workers); 2583 btrfs_stop_workers(&fs_info->endio_write_workers);
2590 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2584 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2591 btrfs_stop_workers(&fs_info->submit_workers); 2585 btrfs_stop_workers(&fs_info->submit_workers);
2586 btrfs_stop_workers(&fs_info->delayed_workers);
2592 2587
2593 btrfs_close_devices(fs_info->fs_devices); 2588 btrfs_close_devices(fs_info->fs_devices);
2594 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2589 btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -2665,6 +2660,29 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2665 if (current->flags & PF_MEMALLOC) 2660 if (current->flags & PF_MEMALLOC)
2666 return; 2661 return;
2667 2662
2663 btrfs_balance_delayed_items(root);
2664
2665 num_dirty = root->fs_info->dirty_metadata_bytes;
2666
2667 if (num_dirty > thresh) {
2668 balance_dirty_pages_ratelimited_nr(
2669 root->fs_info->btree_inode->i_mapping, 1);
2670 }
2671 return;
2672}
2673
2674void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2675{
2676 /*
2677 * looks as though older kernels can get into trouble with
2678 * this code, they end up stuck in balance_dirty_pages forever
2679 */
2680 u64 num_dirty;
2681 unsigned long thresh = 32 * 1024 * 1024;
2682
2683 if (current->flags & PF_MEMALLOC)
2684 return;
2685
2668 num_dirty = root->fs_info->dirty_metadata_bytes; 2686 num_dirty = root->fs_info->dirty_metadata_bytes;
2669 2687
2670 if (num_dirty > thresh) { 2688 if (num_dirty > thresh) {
@@ -2697,7 +2715,7 @@ int btree_lock_page_hook(struct page *page)
2697 goto out; 2715 goto out;
2698 2716
2699 len = page->private >> 2; 2717 len = page->private >> 2;
2700 eb = find_extent_buffer(io_tree, bytenr, len, GFP_NOFS); 2718 eb = find_extent_buffer(io_tree, bytenr, len);
2701 if (!eb) 2719 if (!eb)
2702 goto out; 2720 goto out;
2703 2721
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 07b20dc2fd95..a0b610a67aae 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -55,35 +55,20 @@ int btrfs_commit_super(struct btrfs_root *root);
55int btrfs_error_commit_super(struct btrfs_root *root); 55int btrfs_error_commit_super(struct btrfs_root *root);
56struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 56struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
57 u64 bytenr, u32 blocksize); 57 u64 bytenr, u32 blocksize);
58struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
59 u64 root_objectid);
60struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
61 struct btrfs_key *location,
62 const char *name, int namelen);
63struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, 58struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
64 struct btrfs_key *location); 59 struct btrfs_key *location);
65struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, 60struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
66 struct btrfs_key *location); 61 struct btrfs_key *location);
67int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); 62int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
68int btrfs_insert_dev_radix(struct btrfs_root *root,
69 struct block_device *bdev,
70 u64 device_id,
71 u64 block_start,
72 u64 num_blocks);
73void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); 63void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
64void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
74int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); 65int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
75void btrfs_mark_buffer_dirty(struct extent_buffer *buf); 66void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
76void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf);
77int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); 67int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
78int btrfs_set_buffer_uptodate(struct extent_buffer *buf); 68int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
79int wait_on_tree_block_writeback(struct btrfs_root *root,
80 struct extent_buffer *buf);
81int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); 69int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
82u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); 70u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
83void btrfs_csum_final(u32 crc, char *result); 71void btrfs_csum_final(u32 crc, char *result);
84int btrfs_open_device(struct btrfs_device *dev);
85int btrfs_verify_block_csum(struct btrfs_root *root,
86 struct extent_buffer *buf);
87int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 72int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
88 int metadata); 73 int metadata);
89int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, 74int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
@@ -91,8 +76,6 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
91 unsigned long bio_flags, u64 bio_offset, 76 unsigned long bio_flags, u64 bio_offset,
92 extent_submit_bio_hook_t *submit_bio_start, 77 extent_submit_bio_hook_t *submit_bio_start,
93 extent_submit_bio_hook_t *submit_bio_done); 78 extent_submit_bio_hook_t *submit_bio_done);
94
95int btrfs_congested_async(struct btrfs_fs_info *info, int iodone);
96unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); 79unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
97int btrfs_write_tree_block(struct extent_buffer *buf); 80int btrfs_write_tree_block(struct extent_buffer *buf);
98int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); 81int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index b4ffad859adb..1b8dc33778f9 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -32,7 +32,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
32 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 32 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
33 type = FILEID_BTRFS_WITHOUT_PARENT; 33 type = FILEID_BTRFS_WITHOUT_PARENT;
34 34
35 fid->objectid = inode->i_ino; 35 fid->objectid = btrfs_ino(inode);
36 fid->root_objectid = BTRFS_I(inode)->root->objectid; 36 fid->root_objectid = BTRFS_I(inode)->root->objectid;
37 fid->gen = inode->i_generation; 37 fid->gen = inode->i_generation;
38 38
@@ -178,13 +178,13 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
178 if (!path) 178 if (!path)
179 return ERR_PTR(-ENOMEM); 179 return ERR_PTR(-ENOMEM);
180 180
181 if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 181 if (btrfs_ino(dir) == BTRFS_FIRST_FREE_OBJECTID) {
182 key.objectid = root->root_key.objectid; 182 key.objectid = root->root_key.objectid;
183 key.type = BTRFS_ROOT_BACKREF_KEY; 183 key.type = BTRFS_ROOT_BACKREF_KEY;
184 key.offset = (u64)-1; 184 key.offset = (u64)-1;
185 root = root->fs_info->tree_root; 185 root = root->fs_info->tree_root;
186 } else { 186 } else {
187 key.objectid = dir->i_ino; 187 key.objectid = btrfs_ino(dir);
188 key.type = BTRFS_INODE_REF_KEY; 188 key.type = BTRFS_INODE_REF_KEY;
189 key.offset = (u64)-1; 189 key.offset = (u64)-1;
190 } 190 }
@@ -244,6 +244,7 @@ static int btrfs_get_name(struct dentry *parent, char *name,
244 struct btrfs_key key; 244 struct btrfs_key key;
245 int name_len; 245 int name_len;
246 int ret; 246 int ret;
247 u64 ino;
247 248
248 if (!dir || !inode) 249 if (!dir || !inode)
249 return -EINVAL; 250 return -EINVAL;
@@ -251,19 +252,21 @@ static int btrfs_get_name(struct dentry *parent, char *name,
251 if (!S_ISDIR(dir->i_mode)) 252 if (!S_ISDIR(dir->i_mode))
252 return -EINVAL; 253 return -EINVAL;
253 254
255 ino = btrfs_ino(inode);
256
254 path = btrfs_alloc_path(); 257 path = btrfs_alloc_path();
255 if (!path) 258 if (!path)
256 return -ENOMEM; 259 return -ENOMEM;
257 path->leave_spinning = 1; 260 path->leave_spinning = 1;
258 261
259 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 262 if (ino == BTRFS_FIRST_FREE_OBJECTID) {
260 key.objectid = BTRFS_I(inode)->root->root_key.objectid; 263 key.objectid = BTRFS_I(inode)->root->root_key.objectid;
261 key.type = BTRFS_ROOT_BACKREF_KEY; 264 key.type = BTRFS_ROOT_BACKREF_KEY;
262 key.offset = (u64)-1; 265 key.offset = (u64)-1;
263 root = root->fs_info->tree_root; 266 root = root->fs_info->tree_root;
264 } else { 267 } else {
265 key.objectid = inode->i_ino; 268 key.objectid = ino;
266 key.offset = dir->i_ino; 269 key.offset = btrfs_ino(dir);
267 key.type = BTRFS_INODE_REF_KEY; 270 key.type = BTRFS_INODE_REF_KEY;
268 } 271 }
269 272
@@ -272,7 +275,7 @@ static int btrfs_get_name(struct dentry *parent, char *name,
272 btrfs_free_path(path); 275 btrfs_free_path(path);
273 return ret; 276 return ret;
274 } else if (ret > 0) { 277 } else if (ret > 0) {
275 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 278 if (ino == BTRFS_FIRST_FREE_OBJECTID) {
276 path->slots[0]--; 279 path->slots[0]--;
277 } else { 280 } else {
278 btrfs_free_path(path); 281 btrfs_free_path(path);
@@ -281,11 +284,11 @@ static int btrfs_get_name(struct dentry *parent, char *name,
281 } 284 }
282 leaf = path->nodes[0]; 285 leaf = path->nodes[0];
283 286
284 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 287 if (ino == BTRFS_FIRST_FREE_OBJECTID) {
285 rref = btrfs_item_ptr(leaf, path->slots[0], 288 rref = btrfs_item_ptr(leaf, path->slots[0],
286 struct btrfs_root_ref); 289 struct btrfs_root_ref);
287 name_ptr = (unsigned long)(rref + 1); 290 name_ptr = (unsigned long)(rref + 1);
288 name_len = btrfs_root_ref_name_len(leaf, rref); 291 name_len = btrfs_root_ref_name_len(leaf, rref);
289 } else { 292 } else {
290 iref = btrfs_item_ptr(leaf, path->slots[0], 293 iref = btrfs_item_ptr(leaf, path->slots[0],
291 struct btrfs_inode_ref); 294 struct btrfs_inode_ref);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9ee6bd55e16c..169bd62ce776 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -94,7 +94,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
94 return (cache->flags & bits) == bits; 94 return (cache->flags & bits) == bits;
95} 95}
96 96
97void btrfs_get_block_group(struct btrfs_block_group_cache *cache) 97static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
98{ 98{
99 atomic_inc(&cache->count); 99 atomic_inc(&cache->count);
100} 100}
@@ -105,6 +105,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
105 WARN_ON(cache->pinned > 0); 105 WARN_ON(cache->pinned > 0);
106 WARN_ON(cache->reserved > 0); 106 WARN_ON(cache->reserved > 0);
107 WARN_ON(cache->reserved_pinned > 0); 107 WARN_ON(cache->reserved_pinned > 0);
108 kfree(cache->free_space_ctl);
108 kfree(cache); 109 kfree(cache);
109 } 110 }
110} 111}
@@ -379,7 +380,7 @@ again:
379 break; 380 break;
380 381
381 caching_ctl->progress = last; 382 caching_ctl->progress = last;
382 btrfs_release_path(extent_root, path); 383 btrfs_release_path(path);
383 up_read(&fs_info->extent_commit_sem); 384 up_read(&fs_info->extent_commit_sem);
384 mutex_unlock(&caching_ctl->mutex); 385 mutex_unlock(&caching_ctl->mutex);
385 if (btrfs_transaction_in_commit(fs_info)) 386 if (btrfs_transaction_in_commit(fs_info))
@@ -754,8 +755,12 @@ again:
754 atomic_inc(&head->node.refs); 755 atomic_inc(&head->node.refs);
755 spin_unlock(&delayed_refs->lock); 756 spin_unlock(&delayed_refs->lock);
756 757
757 btrfs_release_path(root->fs_info->extent_root, path); 758 btrfs_release_path(path);
758 759
760 /*
761 * Mutex was contended, block until it's released and try
762 * again
763 */
759 mutex_lock(&head->mutex); 764 mutex_lock(&head->mutex);
760 mutex_unlock(&head->mutex); 765 mutex_unlock(&head->mutex);
761 btrfs_put_delayed_ref(&head->node); 766 btrfs_put_delayed_ref(&head->node);
@@ -934,7 +939,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
934 break; 939 break;
935 } 940 }
936 } 941 }
937 btrfs_release_path(root, path); 942 btrfs_release_path(path);
938 943
939 if (owner < BTRFS_FIRST_FREE_OBJECTID) 944 if (owner < BTRFS_FIRST_FREE_OBJECTID)
940 new_size += sizeof(*bi); 945 new_size += sizeof(*bi);
@@ -947,7 +952,6 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
947 BUG_ON(ret); 952 BUG_ON(ret);
948 953
949 ret = btrfs_extend_item(trans, root, path, new_size); 954 ret = btrfs_extend_item(trans, root, path, new_size);
950 BUG_ON(ret);
951 955
952 leaf = path->nodes[0]; 956 leaf = path->nodes[0];
953 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 957 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1042,7 +1046,7 @@ again:
1042 return 0; 1046 return 0;
1043#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1047#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1044 key.type = BTRFS_EXTENT_REF_V0_KEY; 1048 key.type = BTRFS_EXTENT_REF_V0_KEY;
1045 btrfs_release_path(root, path); 1049 btrfs_release_path(path);
1046 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1050 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1047 if (ret < 0) { 1051 if (ret < 0) {
1048 err = ret; 1052 err = ret;
@@ -1080,7 +1084,7 @@ again:
1080 if (match_extent_data_ref(leaf, ref, root_objectid, 1084 if (match_extent_data_ref(leaf, ref, root_objectid,
1081 owner, offset)) { 1085 owner, offset)) {
1082 if (recow) { 1086 if (recow) {
1083 btrfs_release_path(root, path); 1087 btrfs_release_path(path);
1084 goto again; 1088 goto again;
1085 } 1089 }
1086 err = 0; 1090 err = 0;
@@ -1141,7 +1145,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1141 if (match_extent_data_ref(leaf, ref, root_objectid, 1145 if (match_extent_data_ref(leaf, ref, root_objectid,
1142 owner, offset)) 1146 owner, offset))
1143 break; 1147 break;
1144 btrfs_release_path(root, path); 1148 btrfs_release_path(path);
1145 key.offset++; 1149 key.offset++;
1146 ret = btrfs_insert_empty_item(trans, root, path, &key, 1150 ret = btrfs_insert_empty_item(trans, root, path, &key,
1147 size); 1151 size);
@@ -1167,7 +1171,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1167 btrfs_mark_buffer_dirty(leaf); 1171 btrfs_mark_buffer_dirty(leaf);
1168 ret = 0; 1172 ret = 0;
1169fail: 1173fail:
1170 btrfs_release_path(root, path); 1174 btrfs_release_path(path);
1171 return ret; 1175 return ret;
1172} 1176}
1173 1177
@@ -1293,7 +1297,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1293 ret = -ENOENT; 1297 ret = -ENOENT;
1294#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1298#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1295 if (ret == -ENOENT && parent) { 1299 if (ret == -ENOENT && parent) {
1296 btrfs_release_path(root, path); 1300 btrfs_release_path(path);
1297 key.type = BTRFS_EXTENT_REF_V0_KEY; 1301 key.type = BTRFS_EXTENT_REF_V0_KEY;
1298 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1302 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1299 if (ret > 0) 1303 if (ret > 0)
@@ -1322,7 +1326,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1322 } 1326 }
1323 1327
1324 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1328 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1325 btrfs_release_path(root, path); 1329 btrfs_release_path(path);
1326 return ret; 1330 return ret;
1327} 1331}
1328 1332
@@ -1555,7 +1559,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1555 size = btrfs_extent_inline_ref_size(type); 1559 size = btrfs_extent_inline_ref_size(type);
1556 1560
1557 ret = btrfs_extend_item(trans, root, path, size); 1561 ret = btrfs_extend_item(trans, root, path, size);
1558 BUG_ON(ret);
1559 1562
1560 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1563 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1561 refs = btrfs_extent_refs(leaf, ei); 1564 refs = btrfs_extent_refs(leaf, ei);
@@ -1608,7 +1611,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1608 if (ret != -ENOENT) 1611 if (ret != -ENOENT)
1609 return ret; 1612 return ret;
1610 1613
1611 btrfs_release_path(root, path); 1614 btrfs_release_path(path);
1612 *ref_ret = NULL; 1615 *ref_ret = NULL;
1613 1616
1614 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1617 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
@@ -1684,7 +1687,6 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1684 end - ptr - size); 1687 end - ptr - size);
1685 item_size -= size; 1688 item_size -= size;
1686 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1689 ret = btrfs_truncate_item(trans, root, path, item_size, 1);
1687 BUG_ON(ret);
1688 } 1690 }
1689 btrfs_mark_buffer_dirty(leaf); 1691 btrfs_mark_buffer_dirty(leaf);
1690 return 0; 1692 return 0;
@@ -1862,7 +1864,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1862 __run_delayed_extent_op(extent_op, leaf, item); 1864 __run_delayed_extent_op(extent_op, leaf, item);
1863 1865
1864 btrfs_mark_buffer_dirty(leaf); 1866 btrfs_mark_buffer_dirty(leaf);
1865 btrfs_release_path(root->fs_info->extent_root, path); 1867 btrfs_release_path(path);
1866 1868
1867 path->reada = 1; 1869 path->reada = 1;
1868 path->leave_spinning = 1; 1870 path->leave_spinning = 1;
@@ -2297,6 +2299,10 @@ again:
2297 atomic_inc(&ref->refs); 2299 atomic_inc(&ref->refs);
2298 2300
2299 spin_unlock(&delayed_refs->lock); 2301 spin_unlock(&delayed_refs->lock);
2302 /*
2303 * Mutex was contended, block until it's
2304 * released and try again
2305 */
2300 mutex_lock(&head->mutex); 2306 mutex_lock(&head->mutex);
2301 mutex_unlock(&head->mutex); 2307 mutex_unlock(&head->mutex);
2302 2308
@@ -2361,8 +2367,12 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2361 atomic_inc(&head->node.refs); 2367 atomic_inc(&head->node.refs);
2362 spin_unlock(&delayed_refs->lock); 2368 spin_unlock(&delayed_refs->lock);
2363 2369
2364 btrfs_release_path(root->fs_info->extent_root, path); 2370 btrfs_release_path(path);
2365 2371
2372 /*
2373 * Mutex was contended, block until it's released and let
2374 * caller try again
2375 */
2366 mutex_lock(&head->mutex); 2376 mutex_lock(&head->mutex);
2367 mutex_unlock(&head->mutex); 2377 mutex_unlock(&head->mutex);
2368 btrfs_put_delayed_ref(&head->node); 2378 btrfs_put_delayed_ref(&head->node);
@@ -2510,126 +2520,6 @@ out:
2510 return ret; 2520 return ret;
2511} 2521}
2512 2522
2513#if 0
2514int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2515 struct extent_buffer *buf, u32 nr_extents)
2516{
2517 struct btrfs_key key;
2518 struct btrfs_file_extent_item *fi;
2519 u64 root_gen;
2520 u32 nritems;
2521 int i;
2522 int level;
2523 int ret = 0;
2524 int shared = 0;
2525
2526 if (!root->ref_cows)
2527 return 0;
2528
2529 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2530 shared = 0;
2531 root_gen = root->root_key.offset;
2532 } else {
2533 shared = 1;
2534 root_gen = trans->transid - 1;
2535 }
2536
2537 level = btrfs_header_level(buf);
2538 nritems = btrfs_header_nritems(buf);
2539
2540 if (level == 0) {
2541 struct btrfs_leaf_ref *ref;
2542 struct btrfs_extent_info *info;
2543
2544 ref = btrfs_alloc_leaf_ref(root, nr_extents);
2545 if (!ref) {
2546 ret = -ENOMEM;
2547 goto out;
2548 }
2549
2550 ref->root_gen = root_gen;
2551 ref->bytenr = buf->start;
2552 ref->owner = btrfs_header_owner(buf);
2553 ref->generation = btrfs_header_generation(buf);
2554 ref->nritems = nr_extents;
2555 info = ref->extents;
2556
2557 for (i = 0; nr_extents > 0 && i < nritems; i++) {
2558 u64 disk_bytenr;
2559 btrfs_item_key_to_cpu(buf, &key, i);
2560 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2561 continue;
2562 fi = btrfs_item_ptr(buf, i,
2563 struct btrfs_file_extent_item);
2564 if (btrfs_file_extent_type(buf, fi) ==
2565 BTRFS_FILE_EXTENT_INLINE)
2566 continue;
2567 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2568 if (disk_bytenr == 0)
2569 continue;
2570
2571 info->bytenr = disk_bytenr;
2572 info->num_bytes =
2573 btrfs_file_extent_disk_num_bytes(buf, fi);
2574 info->objectid = key.objectid;
2575 info->offset = key.offset;
2576 info++;
2577 }
2578
2579 ret = btrfs_add_leaf_ref(root, ref, shared);
2580 if (ret == -EEXIST && shared) {
2581 struct btrfs_leaf_ref *old;
2582 old = btrfs_lookup_leaf_ref(root, ref->bytenr);
2583 BUG_ON(!old);
2584 btrfs_remove_leaf_ref(root, old);
2585 btrfs_free_leaf_ref(root, old);
2586 ret = btrfs_add_leaf_ref(root, ref, shared);
2587 }
2588 WARN_ON(ret);
2589 btrfs_free_leaf_ref(root, ref);
2590 }
2591out:
2592 return ret;
2593}
2594
2595/* when a block goes through cow, we update the reference counts of
2596 * everything that block points to. The internal pointers of the block
2597 * can be in just about any order, and it is likely to have clusters of
2598 * things that are close together and clusters of things that are not.
2599 *
2600 * To help reduce the seeks that come with updating all of these reference
2601 * counts, sort them by byte number before actual updates are done.
2602 *
2603 * struct refsort is used to match byte number to slot in the btree block.
2604 * we sort based on the byte number and then use the slot to actually
2605 * find the item.
2606 *
2607 * struct refsort is smaller than strcut btrfs_item and smaller than
2608 * struct btrfs_key_ptr. Since we're currently limited to the page size
2609 * for a btree block, there's no way for a kmalloc of refsorts for a
2610 * single node to be bigger than a page.
2611 */
2612struct refsort {
2613 u64 bytenr;
2614 u32 slot;
2615};
2616
2617/*
2618 * for passing into sort()
2619 */
2620static int refsort_cmp(const void *a_void, const void *b_void)
2621{
2622 const struct refsort *a = a_void;
2623 const struct refsort *b = b_void;
2624
2625 if (a->bytenr < b->bytenr)
2626 return -1;
2627 if (a->bytenr > b->bytenr)
2628 return 1;
2629 return 0;
2630}
2631#endif
2632
2633static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2523static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2634 struct btrfs_root *root, 2524 struct btrfs_root *root,
2635 struct extent_buffer *buf, 2525 struct extent_buffer *buf,
@@ -2732,7 +2622,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
2732 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2622 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
2733 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2623 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
2734 btrfs_mark_buffer_dirty(leaf); 2624 btrfs_mark_buffer_dirty(leaf);
2735 btrfs_release_path(extent_root, path); 2625 btrfs_release_path(path);
2736fail: 2626fail:
2737 if (ret) 2627 if (ret)
2738 return ret; 2628 return ret;
@@ -2785,7 +2675,7 @@ again:
2785 inode = lookup_free_space_inode(root, block_group, path); 2675 inode = lookup_free_space_inode(root, block_group, path);
2786 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 2676 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
2787 ret = PTR_ERR(inode); 2677 ret = PTR_ERR(inode);
2788 btrfs_release_path(root, path); 2678 btrfs_release_path(path);
2789 goto out; 2679 goto out;
2790 } 2680 }
2791 2681
@@ -2854,7 +2744,7 @@ again:
2854out_put: 2744out_put:
2855 iput(inode); 2745 iput(inode);
2856out_free: 2746out_free:
2857 btrfs_release_path(root, path); 2747 btrfs_release_path(path);
2858out: 2748out:
2859 spin_lock(&block_group->lock); 2749 spin_lock(&block_group->lock);
2860 block_group->disk_cache_state = dcs; 2750 block_group->disk_cache_state = dcs;
@@ -3144,7 +3034,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3144 /* make sure bytes are sectorsize aligned */ 3034 /* make sure bytes are sectorsize aligned */
3145 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3035 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
3146 3036
3147 if (root == root->fs_info->tree_root) { 3037 if (root == root->fs_info->tree_root ||
3038 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
3148 alloc_chunk = 0; 3039 alloc_chunk = 0;
3149 committed = 1; 3040 committed = 1;
3150 } 3041 }
@@ -3211,18 +3102,6 @@ commit_trans:
3211 goto again; 3102 goto again;
3212 } 3103 }
3213 3104
3214#if 0 /* I hope we never need this code again, just in case */
3215 printk(KERN_ERR "no space left, need %llu, %llu bytes_used, "
3216 "%llu bytes_reserved, " "%llu bytes_pinned, "
3217 "%llu bytes_readonly, %llu may use %llu total\n",
3218 (unsigned long long)bytes,
3219 (unsigned long long)data_sinfo->bytes_used,
3220 (unsigned long long)data_sinfo->bytes_reserved,
3221 (unsigned long long)data_sinfo->bytes_pinned,
3222 (unsigned long long)data_sinfo->bytes_readonly,
3223 (unsigned long long)data_sinfo->bytes_may_use,
3224 (unsigned long long)data_sinfo->total_bytes);
3225#endif
3226 return -ENOSPC; 3105 return -ENOSPC;
3227 } 3106 }
3228 data_sinfo->bytes_may_use += bytes; 3107 data_sinfo->bytes_may_use += bytes;
@@ -3425,6 +3304,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3425 if (reserved == 0) 3304 if (reserved == 0)
3426 return 0; 3305 return 0;
3427 3306
3307 /* nothing to shrink - nothing to reclaim */
3308 if (root->fs_info->delalloc_bytes == 0)
3309 return 0;
3310
3428 max_reclaim = min(reserved, to_reclaim); 3311 max_reclaim = min(reserved, to_reclaim);
3429 3312
3430 while (loops < 1024) { 3313 while (loops < 1024) {
@@ -3651,8 +3534,8 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
3651 spin_unlock(&block_rsv->lock); 3534 spin_unlock(&block_rsv->lock);
3652} 3535}
3653 3536
3654void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, 3537static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3655 struct btrfs_block_rsv *dest, u64 num_bytes) 3538 struct btrfs_block_rsv *dest, u64 num_bytes)
3656{ 3539{
3657 struct btrfs_space_info *space_info = block_rsv->space_info; 3540 struct btrfs_space_info *space_info = block_rsv->space_info;
3658 3541
@@ -3855,23 +3738,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
3855 u64 meta_used; 3738 u64 meta_used;
3856 u64 data_used; 3739 u64 data_used;
3857 int csum_size = btrfs_super_csum_size(&fs_info->super_copy); 3740 int csum_size = btrfs_super_csum_size(&fs_info->super_copy);
3858#if 0
3859 /*
3860 * per tree used space accounting can be inaccuracy, so we
3861 * can't rely on it.
3862 */
3863 spin_lock(&fs_info->extent_root->accounting_lock);
3864 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item);
3865 spin_unlock(&fs_info->extent_root->accounting_lock);
3866
3867 spin_lock(&fs_info->csum_root->accounting_lock);
3868 num_bytes += btrfs_root_used(&fs_info->csum_root->root_item);
3869 spin_unlock(&fs_info->csum_root->accounting_lock);
3870 3741
3871 spin_lock(&fs_info->tree_root->accounting_lock);
3872 num_bytes += btrfs_root_used(&fs_info->tree_root->root_item);
3873 spin_unlock(&fs_info->tree_root->accounting_lock);
3874#endif
3875 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); 3742 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
3876 spin_lock(&sinfo->lock); 3743 spin_lock(&sinfo->lock);
3877 data_used = sinfo->bytes_used; 3744 data_used = sinfo->bytes_used;
@@ -3924,10 +3791,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3924 block_rsv->reserved = block_rsv->size; 3791 block_rsv->reserved = block_rsv->size;
3925 block_rsv->full = 1; 3792 block_rsv->full = 1;
3926 } 3793 }
3927#if 0 3794
3928 printk(KERN_INFO"global block rsv size %llu reserved %llu\n",
3929 block_rsv->size, block_rsv->reserved);
3930#endif
3931 spin_unlock(&sinfo->lock); 3795 spin_unlock(&sinfo->lock);
3932 spin_unlock(&block_rsv->lock); 3796 spin_unlock(&block_rsv->lock);
3933} 3797}
@@ -3973,12 +3837,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3973 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3837 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3974} 3838}
3975 3839
3976static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
3977{
3978 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3979 3 * num_items;
3980}
3981
3982int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3840int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3983 struct btrfs_root *root, 3841 struct btrfs_root *root,
3984 int num_items) 3842 int num_items)
@@ -3989,7 +3847,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3989 if (num_items == 0 || root->fs_info->chunk_root == root) 3847 if (num_items == 0 || root->fs_info->chunk_root == root)
3990 return 0; 3848 return 0;
3991 3849
3992 num_bytes = calc_trans_metadata_size(root, num_items); 3850 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
3993 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3851 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
3994 num_bytes); 3852 num_bytes);
3995 if (!ret) { 3853 if (!ret) {
@@ -4028,14 +3886,14 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
4028 * If all of the metadata space is used, we can commit 3886 * If all of the metadata space is used, we can commit
4029 * transaction and use space it freed. 3887 * transaction and use space it freed.
4030 */ 3888 */
4031 u64 num_bytes = calc_trans_metadata_size(root, 4); 3889 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
4032 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3890 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4033} 3891}
4034 3892
4035void btrfs_orphan_release_metadata(struct inode *inode) 3893void btrfs_orphan_release_metadata(struct inode *inode)
4036{ 3894{
4037 struct btrfs_root *root = BTRFS_I(inode)->root; 3895 struct btrfs_root *root = BTRFS_I(inode)->root;
4038 u64 num_bytes = calc_trans_metadata_size(root, 4); 3896 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
4039 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3897 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
4040} 3898}
4041 3899
@@ -4049,7 +3907,7 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
4049 * two for root back/forward refs, two for directory entries 3907 * two for root back/forward refs, two for directory entries
4050 * and one for root of the snapshot. 3908 * and one for root of the snapshot.
4051 */ 3909 */
4052 u64 num_bytes = calc_trans_metadata_size(root, 5); 3910 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
4053 dst_rsv->space_info = src_rsv->space_info; 3911 dst_rsv->space_info = src_rsv->space_info;
4054 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3912 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4055} 3913}
@@ -4078,7 +3936,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4078 3936
4079 if (nr_extents > reserved_extents) { 3937 if (nr_extents > reserved_extents) {
4080 nr_extents -= reserved_extents; 3938 nr_extents -= reserved_extents;
4081 to_reserve = calc_trans_metadata_size(root, nr_extents); 3939 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
4082 } else { 3940 } else {
4083 nr_extents = 0; 3941 nr_extents = 0;
4084 to_reserve = 0; 3942 to_reserve = 0;
@@ -4132,7 +3990,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4132 3990
4133 to_free = calc_csum_metadata_size(inode, num_bytes); 3991 to_free = calc_csum_metadata_size(inode, num_bytes);
4134 if (nr_extents > 0) 3992 if (nr_extents > 0)
4135 to_free += calc_trans_metadata_size(root, nr_extents); 3993 to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
4136 3994
4137 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 3995 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
4138 to_free); 3996 to_free);
@@ -4541,7 +4399,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4541 NULL, refs_to_drop, 4399 NULL, refs_to_drop,
4542 is_data); 4400 is_data);
4543 BUG_ON(ret); 4401 BUG_ON(ret);
4544 btrfs_release_path(extent_root, path); 4402 btrfs_release_path(path);
4545 path->leave_spinning = 1; 4403 path->leave_spinning = 1;
4546 4404
4547 key.objectid = bytenr; 4405 key.objectid = bytenr;
@@ -4580,7 +4438,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4580 owner_objectid, 0); 4438 owner_objectid, 0);
4581 BUG_ON(ret < 0); 4439 BUG_ON(ret < 0);
4582 4440
4583 btrfs_release_path(extent_root, path); 4441 btrfs_release_path(path);
4584 path->leave_spinning = 1; 4442 path->leave_spinning = 1;
4585 4443
4586 key.objectid = bytenr; 4444 key.objectid = bytenr;
@@ -4650,7 +4508,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4650 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 4508 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
4651 num_to_del); 4509 num_to_del);
4652 BUG_ON(ret); 4510 BUG_ON(ret);
4653 btrfs_release_path(extent_root, path); 4511 btrfs_release_path(path);
4654 4512
4655 if (is_data) { 4513 if (is_data) {
4656 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 4514 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
@@ -4893,7 +4751,7 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
4893 return 0; 4751 return 0;
4894 4752
4895 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 4753 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
4896 (cache->free_space >= num_bytes)); 4754 (cache->free_space_ctl->free_space >= num_bytes));
4897 4755
4898 put_caching_control(caching_ctl); 4756 put_caching_control(caching_ctl);
4899 return 0; 4757 return 0;
@@ -6480,7 +6338,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6480 trans->block_rsv = block_rsv; 6338 trans->block_rsv = block_rsv;
6481 } 6339 }
6482 } 6340 }
6483 btrfs_release_path(root, path); 6341 btrfs_release_path(path);
6484 BUG_ON(err); 6342 BUG_ON(err);
6485 6343
6486 ret = btrfs_del_root(trans, tree_root, &root->root_key); 6344 ret = btrfs_del_root(trans, tree_root, &root->root_key);
@@ -6584,1514 +6442,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6584 return ret; 6442 return ret;
6585} 6443}
6586 6444
6587#if 0
6588static unsigned long calc_ra(unsigned long start, unsigned long last,
6589 unsigned long nr)
6590{
6591 return min(last, start + nr - 1);
6592}
6593
6594static noinline int relocate_inode_pages(struct inode *inode, u64 start,
6595 u64 len)
6596{
6597 u64 page_start;
6598 u64 page_end;
6599 unsigned long first_index;
6600 unsigned long last_index;
6601 unsigned long i;
6602 struct page *page;
6603 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
6604 struct file_ra_state *ra;
6605 struct btrfs_ordered_extent *ordered;
6606 unsigned int total_read = 0;
6607 unsigned int total_dirty = 0;
6608 int ret = 0;
6609
6610 ra = kzalloc(sizeof(*ra), GFP_NOFS);
6611 if (!ra)
6612 return -ENOMEM;
6613
6614 mutex_lock(&inode->i_mutex);
6615 first_index = start >> PAGE_CACHE_SHIFT;
6616 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
6617
6618 /* make sure the dirty trick played by the caller work */
6619 ret = invalidate_inode_pages2_range(inode->i_mapping,
6620 first_index, last_index);
6621 if (ret)
6622 goto out_unlock;
6623
6624 file_ra_state_init(ra, inode->i_mapping);
6625
6626 for (i = first_index ; i <= last_index; i++) {
6627 if (total_read % ra->ra_pages == 0) {
6628 btrfs_force_ra(inode->i_mapping, ra, NULL, i,
6629 calc_ra(i, last_index, ra->ra_pages));
6630 }
6631 total_read++;
6632again:
6633 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
6634 BUG_ON(1);
6635 page = grab_cache_page(inode->i_mapping, i);
6636 if (!page) {
6637 ret = -ENOMEM;
6638 goto out_unlock;
6639 }
6640 if (!PageUptodate(page)) {
6641 btrfs_readpage(NULL, page);
6642 lock_page(page);
6643 if (!PageUptodate(page)) {
6644 unlock_page(page);
6645 page_cache_release(page);
6646 ret = -EIO;
6647 goto out_unlock;
6648 }
6649 }
6650 wait_on_page_writeback(page);
6651
6652 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
6653 page_end = page_start + PAGE_CACHE_SIZE - 1;
6654 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
6655
6656 ordered = btrfs_lookup_ordered_extent(inode, page_start);
6657 if (ordered) {
6658 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
6659 unlock_page(page);
6660 page_cache_release(page);
6661 btrfs_start_ordered_extent(inode, ordered, 1);
6662 btrfs_put_ordered_extent(ordered);
6663 goto again;
6664 }
6665 set_page_extent_mapped(page);
6666
6667 if (i == first_index)
6668 set_extent_bits(io_tree, page_start, page_end,
6669 EXTENT_BOUNDARY, GFP_NOFS);
6670 btrfs_set_extent_delalloc(inode, page_start, page_end);
6671
6672 set_page_dirty(page);
6673 total_dirty++;
6674
6675 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
6676 unlock_page(page);
6677 page_cache_release(page);
6678 }
6679
6680out_unlock:
6681 kfree(ra);
6682 mutex_unlock(&inode->i_mutex);
6683 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
6684 return ret;
6685}
6686
6687static noinline int relocate_data_extent(struct inode *reloc_inode,
6688 struct btrfs_key *extent_key,
6689 u64 offset)
6690{
6691 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
6692 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
6693 struct extent_map *em;
6694 u64 start = extent_key->objectid - offset;
6695 u64 end = start + extent_key->offset - 1;
6696
6697 em = alloc_extent_map(GFP_NOFS);
6698 BUG_ON(!em);
6699
6700 em->start = start;
6701 em->len = extent_key->offset;
6702 em->block_len = extent_key->offset;
6703 em->block_start = extent_key->objectid;
6704 em->bdev = root->fs_info->fs_devices->latest_bdev;
6705 set_bit(EXTENT_FLAG_PINNED, &em->flags);
6706
6707 /* setup extent map to cheat btrfs_readpage */
6708 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
6709 while (1) {
6710 int ret;
6711 write_lock(&em_tree->lock);
6712 ret = add_extent_mapping(em_tree, em);
6713 write_unlock(&em_tree->lock);
6714 if (ret != -EEXIST) {
6715 free_extent_map(em);
6716 break;
6717 }
6718 btrfs_drop_extent_cache(reloc_inode, start, end, 0);
6719 }
6720 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
6721
6722 return relocate_inode_pages(reloc_inode, start, extent_key->offset);
6723}
6724
6725struct btrfs_ref_path {
6726 u64 extent_start;
6727 u64 nodes[BTRFS_MAX_LEVEL];
6728 u64 root_objectid;
6729 u64 root_generation;
6730 u64 owner_objectid;
6731 u32 num_refs;
6732 int lowest_level;
6733 int current_level;
6734 int shared_level;
6735
6736 struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
6737 u64 new_nodes[BTRFS_MAX_LEVEL];
6738};
6739
6740struct disk_extent {
6741 u64 ram_bytes;
6742 u64 disk_bytenr;
6743 u64 disk_num_bytes;
6744 u64 offset;
6745 u64 num_bytes;
6746 u8 compression;
6747 u8 encryption;
6748 u16 other_encoding;
6749};
6750
6751static int is_cowonly_root(u64 root_objectid)
6752{
6753 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
6754 root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
6755 root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
6756 root_objectid == BTRFS_DEV_TREE_OBJECTID ||
6757 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
6758 root_objectid == BTRFS_CSUM_TREE_OBJECTID)
6759 return 1;
6760 return 0;
6761}
6762
6763static noinline int __next_ref_path(struct btrfs_trans_handle *trans,
6764 struct btrfs_root *extent_root,
6765 struct btrfs_ref_path *ref_path,
6766 int first_time)
6767{
6768 struct extent_buffer *leaf;
6769 struct btrfs_path *path;
6770 struct btrfs_extent_ref *ref;
6771 struct btrfs_key key;
6772 struct btrfs_key found_key;
6773 u64 bytenr;
6774 u32 nritems;
6775 int level;
6776 int ret = 1;
6777
6778 path = btrfs_alloc_path();
6779 if (!path)
6780 return -ENOMEM;
6781
6782 if (first_time) {
6783 ref_path->lowest_level = -1;
6784 ref_path->current_level = -1;
6785 ref_path->shared_level = -1;
6786 goto walk_up;
6787 }
6788walk_down:
6789 level = ref_path->current_level - 1;
6790 while (level >= -1) {
6791 u64 parent;
6792 if (level < ref_path->lowest_level)
6793 break;
6794
6795 if (level >= 0)
6796 bytenr = ref_path->nodes[level];
6797 else
6798 bytenr = ref_path->extent_start;
6799 BUG_ON(bytenr == 0);
6800
6801 parent = ref_path->nodes[level + 1];
6802 ref_path->nodes[level + 1] = 0;
6803 ref_path->current_level = level;
6804 BUG_ON(parent == 0);
6805
6806 key.objectid = bytenr;
6807 key.offset = parent + 1;
6808 key.type = BTRFS_EXTENT_REF_KEY;
6809
6810 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6811 if (ret < 0)
6812 goto out;
6813 BUG_ON(ret == 0);
6814
6815 leaf = path->nodes[0];
6816 nritems = btrfs_header_nritems(leaf);
6817 if (path->slots[0] >= nritems) {
6818 ret = btrfs_next_leaf(extent_root, path);
6819 if (ret < 0)
6820 goto out;
6821 if (ret > 0)
6822 goto next;
6823 leaf = path->nodes[0];
6824 }
6825
6826 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6827 if (found_key.objectid == bytenr &&
6828 found_key.type == BTRFS_EXTENT_REF_KEY) {
6829 if (level < ref_path->shared_level)
6830 ref_path->shared_level = level;
6831 goto found;
6832 }
6833next:
6834 level--;
6835 btrfs_release_path(extent_root, path);
6836 cond_resched();
6837 }
6838 /* reached lowest level */
6839 ret = 1;
6840 goto out;
6841walk_up:
6842 level = ref_path->current_level;
6843 while (level < BTRFS_MAX_LEVEL - 1) {
6844 u64 ref_objectid;
6845
6846 if (level >= 0)
6847 bytenr = ref_path->nodes[level];
6848 else
6849 bytenr = ref_path->extent_start;
6850
6851 BUG_ON(bytenr == 0);
6852
6853 key.objectid = bytenr;
6854 key.offset = 0;
6855 key.type = BTRFS_EXTENT_REF_KEY;
6856
6857 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6858 if (ret < 0)
6859 goto out;
6860
6861 leaf = path->nodes[0];
6862 nritems = btrfs_header_nritems(leaf);
6863 if (path->slots[0] >= nritems) {
6864 ret = btrfs_next_leaf(extent_root, path);
6865 if (ret < 0)
6866 goto out;
6867 if (ret > 0) {
6868 /* the extent was freed by someone */
6869 if (ref_path->lowest_level == level)
6870 goto out;
6871 btrfs_release_path(extent_root, path);
6872 goto walk_down;
6873 }
6874 leaf = path->nodes[0];
6875 }
6876
6877 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6878 if (found_key.objectid != bytenr ||
6879 found_key.type != BTRFS_EXTENT_REF_KEY) {
6880 /* the extent was freed by someone */
6881 if (ref_path->lowest_level == level) {
6882 ret = 1;
6883 goto out;
6884 }
6885 btrfs_release_path(extent_root, path);
6886 goto walk_down;
6887 }
6888found:
6889 ref = btrfs_item_ptr(leaf, path->slots[0],
6890 struct btrfs_extent_ref);
6891 ref_objectid = btrfs_ref_objectid(leaf, ref);
6892 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
6893 if (first_time) {
6894 level = (int)ref_objectid;
6895 BUG_ON(level >= BTRFS_MAX_LEVEL);
6896 ref_path->lowest_level = level;
6897 ref_path->current_level = level;
6898 ref_path->nodes[level] = bytenr;
6899 } else {
6900 WARN_ON(ref_objectid != level);
6901 }
6902 } else {
6903 WARN_ON(level != -1);
6904 }
6905 first_time = 0;
6906
6907 if (ref_path->lowest_level == level) {
6908 ref_path->owner_objectid = ref_objectid;
6909 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
6910 }
6911
6912 /*
6913 * the block is tree root or the block isn't in reference
6914 * counted tree.
6915 */
6916 if (found_key.objectid == found_key.offset ||
6917 is_cowonly_root(btrfs_ref_root(leaf, ref))) {
6918 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6919 ref_path->root_generation =
6920 btrfs_ref_generation(leaf, ref);
6921 if (level < 0) {
6922 /* special reference from the tree log */
6923 ref_path->nodes[0] = found_key.offset;
6924 ref_path->current_level = 0;
6925 }
6926 ret = 0;
6927 goto out;
6928 }
6929
6930 level++;
6931 BUG_ON(ref_path->nodes[level] != 0);
6932 ref_path->nodes[level] = found_key.offset;
6933 ref_path->current_level = level;
6934
6935 /*
6936 * the reference was created in the running transaction,
6937 * no need to continue walking up.
6938 */
6939 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
6940 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6941 ref_path->root_generation =
6942 btrfs_ref_generation(leaf, ref);
6943 ret = 0;
6944 goto out;
6945 }
6946
6947 btrfs_release_path(extent_root, path);
6948 cond_resched();
6949 }
6950 /* reached max tree level, but no tree root found. */
6951 BUG();
6952out:
6953 btrfs_free_path(path);
6954 return ret;
6955}
6956
6957static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
6958 struct btrfs_root *extent_root,
6959 struct btrfs_ref_path *ref_path,
6960 u64 extent_start)
6961{
6962 memset(ref_path, 0, sizeof(*ref_path));
6963 ref_path->extent_start = extent_start;
6964
6965 return __next_ref_path(trans, extent_root, ref_path, 1);
6966}
6967
6968static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
6969 struct btrfs_root *extent_root,
6970 struct btrfs_ref_path *ref_path)
6971{
6972 return __next_ref_path(trans, extent_root, ref_path, 0);
6973}
6974
6975static noinline int get_new_locations(struct inode *reloc_inode,
6976 struct btrfs_key *extent_key,
6977 u64 offset, int no_fragment,
6978 struct disk_extent **extents,
6979 int *nr_extents)
6980{
6981 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
6982 struct btrfs_path *path;
6983 struct btrfs_file_extent_item *fi;
6984 struct extent_buffer *leaf;
6985 struct disk_extent *exts = *extents;
6986 struct btrfs_key found_key;
6987 u64 cur_pos;
6988 u64 last_byte;
6989 u32 nritems;
6990 int nr = 0;
6991 int max = *nr_extents;
6992 int ret;
6993
6994 WARN_ON(!no_fragment && *extents);
6995 if (!exts) {
6996 max = 1;
6997 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
6998 if (!exts)
6999 return -ENOMEM;
7000 }
7001
7002 path = btrfs_alloc_path();
7003 if (!path) {
7004 if (exts != *extents)
7005 kfree(exts);
7006 return -ENOMEM;
7007 }
7008
7009 cur_pos = extent_key->objectid - offset;
7010 last_byte = extent_key->objectid + extent_key->offset;
7011 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
7012 cur_pos, 0);
7013 if (ret < 0)
7014 goto out;
7015 if (ret > 0) {
7016 ret = -ENOENT;
7017 goto out;
7018 }
7019
7020 while (1) {
7021 leaf = path->nodes[0];
7022 nritems = btrfs_header_nritems(leaf);
7023 if (path->slots[0] >= nritems) {
7024 ret = btrfs_next_leaf(root, path);
7025 if (ret < 0)
7026 goto out;
7027 if (ret > 0)
7028 break;
7029 leaf = path->nodes[0];
7030 }
7031
7032 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
7033 if (found_key.offset != cur_pos ||
7034 found_key.type != BTRFS_EXTENT_DATA_KEY ||
7035 found_key.objectid != reloc_inode->i_ino)
7036 break;
7037
7038 fi = btrfs_item_ptr(leaf, path->slots[0],
7039 struct btrfs_file_extent_item);
7040 if (btrfs_file_extent_type(leaf, fi) !=
7041 BTRFS_FILE_EXTENT_REG ||
7042 btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
7043 break;
7044
7045 if (nr == max) {
7046 struct disk_extent *old = exts;
7047 max *= 2;
7048 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
7049 if (!exts) {
7050 ret = -ENOMEM;
7051 goto out;
7052 }
7053 memcpy(exts, old, sizeof(*exts) * nr);
7054 if (old != *extents)
7055 kfree(old);
7056 }
7057
7058 exts[nr].disk_bytenr =
7059 btrfs_file_extent_disk_bytenr(leaf, fi);
7060 exts[nr].disk_num_bytes =
7061 btrfs_file_extent_disk_num_bytes(leaf, fi);
7062 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
7063 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7064 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7065 exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
7066 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
7067 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
7068 fi);
7069 BUG_ON(exts[nr].offset > 0);
7070 BUG_ON(exts[nr].compression || exts[nr].encryption);
7071 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
7072
7073 cur_pos += exts[nr].num_bytes;
7074 nr++;
7075
7076 if (cur_pos + offset >= last_byte)
7077 break;
7078
7079 if (no_fragment) {
7080 ret = 1;
7081 goto out;
7082 }
7083 path->slots[0]++;
7084 }
7085
7086 BUG_ON(cur_pos + offset > last_byte);
7087 if (cur_pos + offset < last_byte) {
7088 ret = -ENOENT;
7089 goto out;
7090 }
7091 ret = 0;
7092out:
7093 btrfs_free_path(path);
7094 if (ret) {
7095 if (exts != *extents)
7096 kfree(exts);
7097 } else {
7098 *extents = exts;
7099 *nr_extents = nr;
7100 }
7101 return ret;
7102}
7103
7104static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
7105 struct btrfs_root *root,
7106 struct btrfs_path *path,
7107 struct btrfs_key *extent_key,
7108 struct btrfs_key *leaf_key,
7109 struct btrfs_ref_path *ref_path,
7110 struct disk_extent *new_extents,
7111 int nr_extents)
7112{
7113 struct extent_buffer *leaf;
7114 struct btrfs_file_extent_item *fi;
7115 struct inode *inode = NULL;
7116 struct btrfs_key key;
7117 u64 lock_start = 0;
7118 u64 lock_end = 0;
7119 u64 num_bytes;
7120 u64 ext_offset;
7121 u64 search_end = (u64)-1;
7122 u32 nritems;
7123 int nr_scaned = 0;
7124 int extent_locked = 0;
7125 int extent_type;
7126 int ret;
7127
7128 memcpy(&key, leaf_key, sizeof(key));
7129 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
7130 if (key.objectid < ref_path->owner_objectid ||
7131 (key.objectid == ref_path->owner_objectid &&
7132 key.type < BTRFS_EXTENT_DATA_KEY)) {
7133 key.objectid = ref_path->owner_objectid;
7134 key.type = BTRFS_EXTENT_DATA_KEY;
7135 key.offset = 0;
7136 }
7137 }
7138
7139 while (1) {
7140 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7141 if (ret < 0)
7142 goto out;
7143
7144 leaf = path->nodes[0];
7145 nritems = btrfs_header_nritems(leaf);
7146next:
7147 if (extent_locked && ret > 0) {
7148 /*
7149 * the file extent item was modified by someone
7150 * before the extent got locked.
7151 */
7152 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7153 lock_end, GFP_NOFS);
7154 extent_locked = 0;
7155 }
7156
7157 if (path->slots[0] >= nritems) {
7158 if (++nr_scaned > 2)
7159 break;
7160
7161 BUG_ON(extent_locked);
7162 ret = btrfs_next_leaf(root, path);
7163 if (ret < 0)
7164 goto out;
7165 if (ret > 0)
7166 break;
7167 leaf = path->nodes[0];
7168 nritems = btrfs_header_nritems(leaf);
7169 }
7170
7171 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7172
7173 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
7174 if ((key.objectid > ref_path->owner_objectid) ||
7175 (key.objectid == ref_path->owner_objectid &&
7176 key.type > BTRFS_EXTENT_DATA_KEY) ||
7177 key.offset >= search_end)
7178 break;
7179 }
7180
7181 if (inode && key.objectid != inode->i_ino) {
7182 BUG_ON(extent_locked);
7183 btrfs_release_path(root, path);
7184 mutex_unlock(&inode->i_mutex);
7185 iput(inode);
7186 inode = NULL;
7187 continue;
7188 }
7189
7190 if (key.type != BTRFS_EXTENT_DATA_KEY) {
7191 path->slots[0]++;
7192 ret = 1;
7193 goto next;
7194 }
7195 fi = btrfs_item_ptr(leaf, path->slots[0],
7196 struct btrfs_file_extent_item);
7197 extent_type = btrfs_file_extent_type(leaf, fi);
7198 if ((extent_type != BTRFS_FILE_EXTENT_REG &&
7199 extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
7200 (btrfs_file_extent_disk_bytenr(leaf, fi) !=
7201 extent_key->objectid)) {
7202 path->slots[0]++;
7203 ret = 1;
7204 goto next;
7205 }
7206
7207 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7208 ext_offset = btrfs_file_extent_offset(leaf, fi);
7209
7210 if (search_end == (u64)-1) {
7211 search_end = key.offset - ext_offset +
7212 btrfs_file_extent_ram_bytes(leaf, fi);
7213 }
7214
7215 if (!extent_locked) {
7216 lock_start = key.offset;
7217 lock_end = lock_start + num_bytes - 1;
7218 } else {
7219 if (lock_start > key.offset ||
7220 lock_end + 1 < key.offset + num_bytes) {
7221 unlock_extent(&BTRFS_I(inode)->io_tree,
7222 lock_start, lock_end, GFP_NOFS);
7223 extent_locked = 0;
7224 }
7225 }
7226
7227 if (!inode) {
7228 btrfs_release_path(root, path);
7229
7230 inode = btrfs_iget_locked(root->fs_info->sb,
7231 key.objectid, root);
7232 if (inode->i_state & I_NEW) {
7233 BTRFS_I(inode)->root = root;
7234 BTRFS_I(inode)->location.objectid =
7235 key.objectid;
7236 BTRFS_I(inode)->location.type =
7237 BTRFS_INODE_ITEM_KEY;
7238 BTRFS_I(inode)->location.offset = 0;
7239 btrfs_read_locked_inode(inode);
7240 unlock_new_inode(inode);
7241 }
7242 /*
7243 * some code call btrfs_commit_transaction while
7244 * holding the i_mutex, so we can't use mutex_lock
7245 * here.
7246 */
7247 if (is_bad_inode(inode) ||
7248 !mutex_trylock(&inode->i_mutex)) {
7249 iput(inode);
7250 inode = NULL;
7251 key.offset = (u64)-1;
7252 goto skip;
7253 }
7254 }
7255
7256 if (!extent_locked) {
7257 struct btrfs_ordered_extent *ordered;
7258
7259 btrfs_release_path(root, path);
7260
7261 lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7262 lock_end, GFP_NOFS);
7263 ordered = btrfs_lookup_first_ordered_extent(inode,
7264 lock_end);
7265 if (ordered &&
7266 ordered->file_offset <= lock_end &&
7267 ordered->file_offset + ordered->len > lock_start) {
7268 unlock_extent(&BTRFS_I(inode)->io_tree,
7269 lock_start, lock_end, GFP_NOFS);
7270 btrfs_start_ordered_extent(inode, ordered, 1);
7271 btrfs_put_ordered_extent(ordered);
7272 key.offset += num_bytes;
7273 goto skip;
7274 }
7275 if (ordered)
7276 btrfs_put_ordered_extent(ordered);
7277
7278 extent_locked = 1;
7279 continue;
7280 }
7281
7282 if (nr_extents == 1) {
7283 /* update extent pointer in place */
7284 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7285 new_extents[0].disk_bytenr);
7286 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7287 new_extents[0].disk_num_bytes);
7288 btrfs_mark_buffer_dirty(leaf);
7289
7290 btrfs_drop_extent_cache(inode, key.offset,
7291 key.offset + num_bytes - 1, 0);
7292
7293 ret = btrfs_inc_extent_ref(trans, root,
7294 new_extents[0].disk_bytenr,
7295 new_extents[0].disk_num_bytes,
7296 leaf->start,
7297 root->root_key.objectid,
7298 trans->transid,
7299 key.objectid);
7300 BUG_ON(ret);
7301
7302 ret = btrfs_free_extent(trans, root,
7303 extent_key->objectid,
7304 extent_key->offset,
7305 leaf->start,
7306 btrfs_header_owner(leaf),
7307 btrfs_header_generation(leaf),
7308 key.objectid, 0);
7309 BUG_ON(ret);
7310
7311 btrfs_release_path(root, path);
7312 key.offset += num_bytes;
7313 } else {
7314 BUG_ON(1);
7315#if 0
7316 u64 alloc_hint;
7317 u64 extent_len;
7318 int i;
7319 /*
7320 * drop old extent pointer at first, then insert the
7321 * new pointers one bye one
7322 */
7323 btrfs_release_path(root, path);
7324 ret = btrfs_drop_extents(trans, root, inode, key.offset,
7325 key.offset + num_bytes,
7326 key.offset, &alloc_hint);
7327 BUG_ON(ret);
7328
7329 for (i = 0; i < nr_extents; i++) {
7330 if (ext_offset >= new_extents[i].num_bytes) {
7331 ext_offset -= new_extents[i].num_bytes;
7332 continue;
7333 }
7334 extent_len = min(new_extents[i].num_bytes -
7335 ext_offset, num_bytes);
7336
7337 ret = btrfs_insert_empty_item(trans, root,
7338 path, &key,
7339 sizeof(*fi));
7340 BUG_ON(ret);
7341
7342 leaf = path->nodes[0];
7343 fi = btrfs_item_ptr(leaf, path->slots[0],
7344 struct btrfs_file_extent_item);
7345 btrfs_set_file_extent_generation(leaf, fi,
7346 trans->transid);
7347 btrfs_set_file_extent_type(leaf, fi,
7348 BTRFS_FILE_EXTENT_REG);
7349 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7350 new_extents[i].disk_bytenr);
7351 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7352 new_extents[i].disk_num_bytes);
7353 btrfs_set_file_extent_ram_bytes(leaf, fi,
7354 new_extents[i].ram_bytes);
7355
7356 btrfs_set_file_extent_compression(leaf, fi,
7357 new_extents[i].compression);
7358 btrfs_set_file_extent_encryption(leaf, fi,
7359 new_extents[i].encryption);
7360 btrfs_set_file_extent_other_encoding(leaf, fi,
7361 new_extents[i].other_encoding);
7362
7363 btrfs_set_file_extent_num_bytes(leaf, fi,
7364 extent_len);
7365 ext_offset += new_extents[i].offset;
7366 btrfs_set_file_extent_offset(leaf, fi,
7367 ext_offset);
7368 btrfs_mark_buffer_dirty(leaf);
7369
7370 btrfs_drop_extent_cache(inode, key.offset,
7371 key.offset + extent_len - 1, 0);
7372
7373 ret = btrfs_inc_extent_ref(trans, root,
7374 new_extents[i].disk_bytenr,
7375 new_extents[i].disk_num_bytes,
7376 leaf->start,
7377 root->root_key.objectid,
7378 trans->transid, key.objectid);
7379 BUG_ON(ret);
7380 btrfs_release_path(root, path);
7381
7382 inode_add_bytes(inode, extent_len);
7383
7384 ext_offset = 0;
7385 num_bytes -= extent_len;
7386 key.offset += extent_len;
7387
7388 if (num_bytes == 0)
7389 break;
7390 }
7391 BUG_ON(i >= nr_extents);
7392#endif
7393 }
7394
7395 if (extent_locked) {
7396 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7397 lock_end, GFP_NOFS);
7398 extent_locked = 0;
7399 }
7400skip:
7401 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
7402 key.offset >= search_end)
7403 break;
7404
7405 cond_resched();
7406 }
7407 ret = 0;
7408out:
7409 btrfs_release_path(root, path);
7410 if (inode) {
7411 mutex_unlock(&inode->i_mutex);
7412 if (extent_locked) {
7413 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7414 lock_end, GFP_NOFS);
7415 }
7416 iput(inode);
7417 }
7418 return ret;
7419}
7420
7421int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
7422 struct btrfs_root *root,
7423 struct extent_buffer *buf, u64 orig_start)
7424{
7425 int level;
7426 int ret;
7427
7428 BUG_ON(btrfs_header_generation(buf) != trans->transid);
7429 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
7430
7431 level = btrfs_header_level(buf);
7432 if (level == 0) {
7433 struct btrfs_leaf_ref *ref;
7434 struct btrfs_leaf_ref *orig_ref;
7435
7436 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
7437 if (!orig_ref)
7438 return -ENOENT;
7439
7440 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
7441 if (!ref) {
7442 btrfs_free_leaf_ref(root, orig_ref);
7443 return -ENOMEM;
7444 }
7445
7446 ref->nritems = orig_ref->nritems;
7447 memcpy(ref->extents, orig_ref->extents,
7448 sizeof(ref->extents[0]) * ref->nritems);
7449
7450 btrfs_free_leaf_ref(root, orig_ref);
7451
7452 ref->root_gen = trans->transid;
7453 ref->bytenr = buf->start;
7454 ref->owner = btrfs_header_owner(buf);
7455 ref->generation = btrfs_header_generation(buf);
7456
7457 ret = btrfs_add_leaf_ref(root, ref, 0);
7458 WARN_ON(ret);
7459 btrfs_free_leaf_ref(root, ref);
7460 }
7461 return 0;
7462}
7463
7464static noinline int invalidate_extent_cache(struct btrfs_root *root,
7465 struct extent_buffer *leaf,
7466 struct btrfs_block_group_cache *group,
7467 struct btrfs_root *target_root)
7468{
7469 struct btrfs_key key;
7470 struct inode *inode = NULL;
7471 struct btrfs_file_extent_item *fi;
7472 struct extent_state *cached_state = NULL;
7473 u64 num_bytes;
7474 u64 skip_objectid = 0;
7475 u32 nritems;
7476 u32 i;
7477
7478 nritems = btrfs_header_nritems(leaf);
7479 for (i = 0; i < nritems; i++) {
7480 btrfs_item_key_to_cpu(leaf, &key, i);
7481 if (key.objectid == skip_objectid ||
7482 key.type != BTRFS_EXTENT_DATA_KEY)
7483 continue;
7484 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
7485 if (btrfs_file_extent_type(leaf, fi) ==
7486 BTRFS_FILE_EXTENT_INLINE)
7487 continue;
7488 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
7489 continue;
7490 if (!inode || inode->i_ino != key.objectid) {
7491 iput(inode);
7492 inode = btrfs_ilookup(target_root->fs_info->sb,
7493 key.objectid, target_root, 1);
7494 }
7495 if (!inode) {
7496 skip_objectid = key.objectid;
7497 continue;
7498 }
7499 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7500
7501 lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset,
7502 key.offset + num_bytes - 1, 0, &cached_state,
7503 GFP_NOFS);
7504 btrfs_drop_extent_cache(inode, key.offset,
7505 key.offset + num_bytes - 1, 1);
7506 unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset,
7507 key.offset + num_bytes - 1, &cached_state,
7508 GFP_NOFS);
7509 cond_resched();
7510 }
7511 iput(inode);
7512 return 0;
7513}
7514
7515static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
7516 struct btrfs_root *root,
7517 struct extent_buffer *leaf,
7518 struct btrfs_block_group_cache *group,
7519 struct inode *reloc_inode)
7520{
7521 struct btrfs_key key;
7522 struct btrfs_key extent_key;
7523 struct btrfs_file_extent_item *fi;
7524 struct btrfs_leaf_ref *ref;
7525 struct disk_extent *new_extent;
7526 u64 bytenr;
7527 u64 num_bytes;
7528 u32 nritems;
7529 u32 i;
7530 int ext_index;
7531 int nr_extent;
7532 int ret;
7533
7534 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
7535 if (!new_extent)
7536 return -ENOMEM;
7537
7538 ref = btrfs_lookup_leaf_ref(root, leaf->start);
7539 BUG_ON(!ref);
7540
7541 ext_index = -1;
7542 nritems = btrfs_header_nritems(leaf);
7543 for (i = 0; i < nritems; i++) {
7544 btrfs_item_key_to_cpu(leaf, &key, i);
7545 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
7546 continue;
7547 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
7548 if (btrfs_file_extent_type(leaf, fi) ==
7549 BTRFS_FILE_EXTENT_INLINE)
7550 continue;
7551 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7552 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7553 if (bytenr == 0)
7554 continue;
7555
7556 ext_index++;
7557 if (bytenr >= group->key.objectid + group->key.offset ||
7558 bytenr + num_bytes <= group->key.objectid)
7559 continue;
7560
7561 extent_key.objectid = bytenr;
7562 extent_key.offset = num_bytes;
7563 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
7564 nr_extent = 1;
7565 ret = get_new_locations(reloc_inode, &extent_key,
7566 group->key.objectid, 1,
7567 &new_extent, &nr_extent);
7568 if (ret > 0)
7569 continue;
7570 BUG_ON(ret < 0);
7571
7572 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
7573 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
7574 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
7575 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
7576
7577 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7578 new_extent->disk_bytenr);
7579 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7580 new_extent->disk_num_bytes);
7581 btrfs_mark_buffer_dirty(leaf);
7582
7583 ret = btrfs_inc_extent_ref(trans, root,
7584 new_extent->disk_bytenr,
7585 new_extent->disk_num_bytes,
7586 leaf->start,
7587 root->root_key.objectid,
7588 trans->transid, key.objectid);
7589 BUG_ON(ret);
7590
7591 ret = btrfs_free_extent(trans, root,
7592 bytenr, num_bytes, leaf->start,
7593 btrfs_header_owner(leaf),
7594 btrfs_header_generation(leaf),
7595 key.objectid, 0);
7596 BUG_ON(ret);
7597 cond_resched();
7598 }
7599 kfree(new_extent);
7600 BUG_ON(ext_index + 1 != ref->nritems);
7601 btrfs_free_leaf_ref(root, ref);
7602 return 0;
7603}
7604
7605int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
7606 struct btrfs_root *root)
7607{
7608 struct btrfs_root *reloc_root;
7609 int ret;
7610
7611 if (root->reloc_root) {
7612 reloc_root = root->reloc_root;
7613 root->reloc_root = NULL;
7614 list_add(&reloc_root->dead_list,
7615 &root->fs_info->dead_reloc_roots);
7616
7617 btrfs_set_root_bytenr(&reloc_root->root_item,
7618 reloc_root->node->start);
7619 btrfs_set_root_level(&root->root_item,
7620 btrfs_header_level(reloc_root->node));
7621 memset(&reloc_root->root_item.drop_progress, 0,
7622 sizeof(struct btrfs_disk_key));
7623 reloc_root->root_item.drop_level = 0;
7624
7625 ret = btrfs_update_root(trans, root->fs_info->tree_root,
7626 &reloc_root->root_key,
7627 &reloc_root->root_item);
7628 BUG_ON(ret);
7629 }
7630 return 0;
7631}
7632
7633int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
7634{
7635 struct btrfs_trans_handle *trans;
7636 struct btrfs_root *reloc_root;
7637 struct btrfs_root *prev_root = NULL;
7638 struct list_head dead_roots;
7639 int ret;
7640 unsigned long nr;
7641
7642 INIT_LIST_HEAD(&dead_roots);
7643 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
7644
7645 while (!list_empty(&dead_roots)) {
7646 reloc_root = list_entry(dead_roots.prev,
7647 struct btrfs_root, dead_list);
7648 list_del_init(&reloc_root->dead_list);
7649
7650 BUG_ON(reloc_root->commit_root != NULL);
7651 while (1) {
7652 trans = btrfs_join_transaction(root, 1);
7653 BUG_ON(IS_ERR(trans));
7654
7655 mutex_lock(&root->fs_info->drop_mutex);
7656 ret = btrfs_drop_snapshot(trans, reloc_root);
7657 if (ret != -EAGAIN)
7658 break;
7659 mutex_unlock(&root->fs_info->drop_mutex);
7660
7661 nr = trans->blocks_used;
7662 ret = btrfs_end_transaction(trans, root);
7663 BUG_ON(ret);
7664 btrfs_btree_balance_dirty(root, nr);
7665 }
7666
7667 free_extent_buffer(reloc_root->node);
7668
7669 ret = btrfs_del_root(trans, root->fs_info->tree_root,
7670 &reloc_root->root_key);
7671 BUG_ON(ret);
7672 mutex_unlock(&root->fs_info->drop_mutex);
7673
7674 nr = trans->blocks_used;
7675 ret = btrfs_end_transaction(trans, root);
7676 BUG_ON(ret);
7677 btrfs_btree_balance_dirty(root, nr);
7678
7679 kfree(prev_root);
7680 prev_root = reloc_root;
7681 }
7682 if (prev_root) {
7683 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
7684 kfree(prev_root);
7685 }
7686 return 0;
7687}
7688
7689int btrfs_add_dead_reloc_root(struct btrfs_root *root)
7690{
7691 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
7692 return 0;
7693}
7694
7695int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
7696{
7697 struct btrfs_root *reloc_root;
7698 struct btrfs_trans_handle *trans;
7699 struct btrfs_key location;
7700 int found;
7701 int ret;
7702
7703 mutex_lock(&root->fs_info->tree_reloc_mutex);
7704 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
7705 BUG_ON(ret);
7706 found = !list_empty(&root->fs_info->dead_reloc_roots);
7707 mutex_unlock(&root->fs_info->tree_reloc_mutex);
7708
7709 if (found) {
7710 trans = btrfs_start_transaction(root, 1);
7711 BUG_ON(IS_ERR(trans));
7712 ret = btrfs_commit_transaction(trans, root);
7713 BUG_ON(ret);
7714 }
7715
7716 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
7717 location.offset = (u64)-1;
7718 location.type = BTRFS_ROOT_ITEM_KEY;
7719
7720 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
7721 BUG_ON(!reloc_root);
7722 ret = btrfs_orphan_cleanup(reloc_root);
7723 BUG_ON(ret);
7724 return 0;
7725}
7726
7727static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
7728 struct btrfs_root *root)
7729{
7730 struct btrfs_root *reloc_root;
7731 struct extent_buffer *eb;
7732 struct btrfs_root_item *root_item;
7733 struct btrfs_key root_key;
7734 int ret;
7735
7736 BUG_ON(!root->ref_cows);
7737 if (root->reloc_root)
7738 return 0;
7739
7740 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
7741 if (!root_item)
7742 return -ENOMEM;
7743
7744 ret = btrfs_copy_root(trans, root, root->commit_root,
7745 &eb, BTRFS_TREE_RELOC_OBJECTID);
7746 BUG_ON(ret);
7747
7748 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
7749 root_key.offset = root->root_key.objectid;
7750 root_key.type = BTRFS_ROOT_ITEM_KEY;
7751
7752 memcpy(root_item, &root->root_item, sizeof(root_item));
7753 btrfs_set_root_refs(root_item, 0);
7754 btrfs_set_root_bytenr(root_item, eb->start);
7755 btrfs_set_root_level(root_item, btrfs_header_level(eb));
7756 btrfs_set_root_generation(root_item, trans->transid);
7757
7758 btrfs_tree_unlock(eb);
7759 free_extent_buffer(eb);
7760
7761 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
7762 &root_key, root_item);
7763 BUG_ON(ret);
7764 kfree(root_item);
7765
7766 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
7767 &root_key);
7768 BUG_ON(IS_ERR(reloc_root));
7769 reloc_root->last_trans = trans->transid;
7770 reloc_root->commit_root = NULL;
7771 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
7772
7773 root->reloc_root = reloc_root;
7774 return 0;
7775}
7776
7777/*
7778 * Core function of space balance.
7779 *
7780 * The idea is using reloc trees to relocate tree blocks in reference
7781 * counted roots. There is one reloc tree for each subvol, and all
7782 * reloc trees share same root key objectid. Reloc trees are snapshots
7783 * of the latest committed roots of subvols (root->commit_root).
7784 *
7785 * To relocate a tree block referenced by a subvol, there are two steps.
7786 * COW the block through subvol's reloc tree, then update block pointer
7787 * in the subvol to point to the new block. Since all reloc trees share
7788 * same root key objectid, doing special handing for tree blocks owned
7789 * by them is easy. Once a tree block has been COWed in one reloc tree,
7790 * we can use the resulting new block directly when the same block is
7791 * required to COW again through other reloc trees. By this way, relocated
7792 * tree blocks are shared between reloc trees, so they are also shared
7793 * between subvols.
7794 */
7795static noinline int relocate_one_path(struct btrfs_trans_handle *trans,
7796 struct btrfs_root *root,
7797 struct btrfs_path *path,
7798 struct btrfs_key *first_key,
7799 struct btrfs_ref_path *ref_path,
7800 struct btrfs_block_group_cache *group,
7801 struct inode *reloc_inode)
7802{
7803 struct btrfs_root *reloc_root;
7804 struct extent_buffer *eb = NULL;
7805 struct btrfs_key *keys;
7806 u64 *nodes;
7807 int level;
7808 int shared_level;
7809 int lowest_level = 0;
7810 int ret;
7811
7812 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
7813 lowest_level = ref_path->owner_objectid;
7814
7815 if (!root->ref_cows) {
7816 path->lowest_level = lowest_level;
7817 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
7818 BUG_ON(ret < 0);
7819 path->lowest_level = 0;
7820 btrfs_release_path(root, path);
7821 return 0;
7822 }
7823
7824 mutex_lock(&root->fs_info->tree_reloc_mutex);
7825 ret = init_reloc_tree(trans, root);
7826 BUG_ON(ret);
7827 reloc_root = root->reloc_root;
7828
7829 shared_level = ref_path->shared_level;
7830 ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
7831
7832 keys = ref_path->node_keys;
7833 nodes = ref_path->new_nodes;
7834 memset(&keys[shared_level + 1], 0,
7835 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
7836 memset(&nodes[shared_level + 1], 0,
7837 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
7838
7839 if (nodes[lowest_level] == 0) {
7840 path->lowest_level = lowest_level;
7841 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7842 0, 1);
7843 BUG_ON(ret);
7844 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
7845 eb = path->nodes[level];
7846 if (!eb || eb == reloc_root->node)
7847 break;
7848 nodes[level] = eb->start;
7849 if (level == 0)
7850 btrfs_item_key_to_cpu(eb, &keys[level], 0);
7851 else
7852 btrfs_node_key_to_cpu(eb, &keys[level], 0);
7853 }
7854 if (nodes[0] &&
7855 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7856 eb = path->nodes[0];
7857 ret = replace_extents_in_leaf(trans, reloc_root, eb,
7858 group, reloc_inode);
7859 BUG_ON(ret);
7860 }
7861 btrfs_release_path(reloc_root, path);
7862 } else {
7863 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
7864 lowest_level);
7865 BUG_ON(ret);
7866 }
7867
7868 /*
7869 * replace tree blocks in the fs tree with tree blocks in
7870 * the reloc tree.
7871 */
7872 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
7873 BUG_ON(ret < 0);
7874
7875 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7876 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7877 0, 0);
7878 BUG_ON(ret);
7879 extent_buffer_get(path->nodes[0]);
7880 eb = path->nodes[0];
7881 btrfs_release_path(reloc_root, path);
7882 ret = invalidate_extent_cache(reloc_root, eb, group, root);
7883 BUG_ON(ret);
7884 free_extent_buffer(eb);
7885 }
7886
7887 mutex_unlock(&root->fs_info->tree_reloc_mutex);
7888 path->lowest_level = 0;
7889 return 0;
7890}
7891
7892static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
7893 struct btrfs_root *root,
7894 struct btrfs_path *path,
7895 struct btrfs_key *first_key,
7896 struct btrfs_ref_path *ref_path)
7897{
7898 int ret;
7899
7900 ret = relocate_one_path(trans, root, path, first_key,
7901 ref_path, NULL, NULL);
7902 BUG_ON(ret);
7903
7904 return 0;
7905}
7906
7907static noinline int del_extent_zero(struct btrfs_trans_handle *trans,
7908 struct btrfs_root *extent_root,
7909 struct btrfs_path *path,
7910 struct btrfs_key *extent_key)
7911{
7912 int ret;
7913
7914 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
7915 if (ret)
7916 goto out;
7917 ret = btrfs_del_item(trans, extent_root, path);
7918out:
7919 btrfs_release_path(extent_root, path);
7920 return ret;
7921}
7922
7923static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info,
7924 struct btrfs_ref_path *ref_path)
7925{
7926 struct btrfs_key root_key;
7927
7928 root_key.objectid = ref_path->root_objectid;
7929 root_key.type = BTRFS_ROOT_ITEM_KEY;
7930 if (is_cowonly_root(ref_path->root_objectid))
7931 root_key.offset = 0;
7932 else
7933 root_key.offset = (u64)-1;
7934
7935 return btrfs_read_fs_root_no_name(fs_info, &root_key);
7936}
7937
7938static noinline int relocate_one_extent(struct btrfs_root *extent_root,
7939 struct btrfs_path *path,
7940 struct btrfs_key *extent_key,
7941 struct btrfs_block_group_cache *group,
7942 struct inode *reloc_inode, int pass)
7943{
7944 struct btrfs_trans_handle *trans;
7945 struct btrfs_root *found_root;
7946 struct btrfs_ref_path *ref_path = NULL;
7947 struct disk_extent *new_extents = NULL;
7948 int nr_extents = 0;
7949 int loops;
7950 int ret;
7951 int level;
7952 struct btrfs_key first_key;
7953 u64 prev_block = 0;
7954
7955
7956 trans = btrfs_start_transaction(extent_root, 1);
7957 BUG_ON(IS_ERR(trans));
7958
7959 if (extent_key->objectid == 0) {
7960 ret = del_extent_zero(trans, extent_root, path, extent_key);
7961 goto out;
7962 }
7963
7964 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
7965 if (!ref_path) {
7966 ret = -ENOMEM;
7967 goto out;
7968 }
7969
7970 for (loops = 0; ; loops++) {
7971 if (loops == 0) {
7972 ret = btrfs_first_ref_path(trans, extent_root, ref_path,
7973 extent_key->objectid);
7974 } else {
7975 ret = btrfs_next_ref_path(trans, extent_root, ref_path);
7976 }
7977 if (ret < 0)
7978 goto out;
7979 if (ret > 0)
7980 break;
7981
7982 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
7983 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
7984 continue;
7985
7986 found_root = read_ref_root(extent_root->fs_info, ref_path);
7987 BUG_ON(!found_root);
7988 /*
7989 * for reference counted tree, only process reference paths
7990 * rooted at the latest committed root.
7991 */
7992 if (found_root->ref_cows &&
7993 ref_path->root_generation != found_root->root_key.offset)
7994 continue;
7995
7996 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7997 if (pass == 0) {
7998 /*
7999 * copy data extents to new locations
8000 */
8001 u64 group_start = group->key.objectid;
8002 ret = relocate_data_extent(reloc_inode,
8003 extent_key,
8004 group_start);
8005 if (ret < 0)
8006 goto out;
8007 break;
8008 }
8009 level = 0;
8010 } else {
8011 level = ref_path->owner_objectid;
8012 }
8013
8014 if (prev_block != ref_path->nodes[level]) {
8015 struct extent_buffer *eb;
8016 u64 block_start = ref_path->nodes[level];
8017 u64 block_size = btrfs_level_size(found_root, level);
8018
8019 eb = read_tree_block(found_root, block_start,
8020 block_size, 0);
8021 if (!eb) {
8022 ret = -EIO;
8023 goto out;
8024 }
8025 btrfs_tree_lock(eb);
8026 BUG_ON(level != btrfs_header_level(eb));
8027
8028 if (level == 0)
8029 btrfs_item_key_to_cpu(eb, &first_key, 0);
8030 else
8031 btrfs_node_key_to_cpu(eb, &first_key, 0);
8032
8033 btrfs_tree_unlock(eb);
8034 free_extent_buffer(eb);
8035 prev_block = block_start;
8036 }
8037
8038 mutex_lock(&extent_root->fs_info->trans_mutex);
8039 btrfs_record_root_in_trans(found_root);
8040 mutex_unlock(&extent_root->fs_info->trans_mutex);
8041 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
8042 /*
8043 * try to update data extent references while
8044 * keeping metadata shared between snapshots.
8045 */
8046 if (pass == 1) {
8047 ret = relocate_one_path(trans, found_root,
8048 path, &first_key, ref_path,
8049 group, reloc_inode);
8050 if (ret < 0)
8051 goto out;
8052 continue;
8053 }
8054 /*
8055 * use fallback method to process the remaining
8056 * references.
8057 */
8058 if (!new_extents) {
8059 u64 group_start = group->key.objectid;
8060 new_extents = kmalloc(sizeof(*new_extents),
8061 GFP_NOFS);
8062 if (!new_extents) {
8063 ret = -ENOMEM;
8064 goto out;
8065 }
8066 nr_extents = 1;
8067 ret = get_new_locations(reloc_inode,
8068 extent_key,
8069 group_start, 1,
8070 &new_extents,
8071 &nr_extents);
8072 if (ret)
8073 goto out;
8074 }
8075 ret = replace_one_extent(trans, found_root,
8076 path, extent_key,
8077 &first_key, ref_path,
8078 new_extents, nr_extents);
8079 } else {
8080 ret = relocate_tree_block(trans, found_root, path,
8081 &first_key, ref_path);
8082 }
8083 if (ret < 0)
8084 goto out;
8085 }
8086 ret = 0;
8087out:
8088 btrfs_end_transaction(trans, extent_root);
8089 kfree(new_extents);
8090 kfree(ref_path);
8091 return ret;
8092}
8093#endif
8094
8095static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6445static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
8096{ 6446{
8097 u64 num_devices; 6447 u64 num_devices;
@@ -8555,10 +6905,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8555 ret = -ENOMEM; 6905 ret = -ENOMEM;
8556 goto error; 6906 goto error;
8557 } 6907 }
6908 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
6909 GFP_NOFS);
6910 if (!cache->free_space_ctl) {
6911 kfree(cache);
6912 ret = -ENOMEM;
6913 goto error;
6914 }
8558 6915
8559 atomic_set(&cache->count, 1); 6916 atomic_set(&cache->count, 1);
8560 spin_lock_init(&cache->lock); 6917 spin_lock_init(&cache->lock);
8561 spin_lock_init(&cache->tree_lock);
8562 cache->fs_info = info; 6918 cache->fs_info = info;
8563 INIT_LIST_HEAD(&cache->list); 6919 INIT_LIST_HEAD(&cache->list);
8564 INIT_LIST_HEAD(&cache->cluster_list); 6920 INIT_LIST_HEAD(&cache->cluster_list);
@@ -8566,24 +6922,18 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8566 if (need_clear) 6922 if (need_clear)
8567 cache->disk_cache_state = BTRFS_DC_CLEAR; 6923 cache->disk_cache_state = BTRFS_DC_CLEAR;
8568 6924
8569 /*
8570 * we only want to have 32k of ram per block group for keeping
8571 * track of free space, and if we pass 1/2 of that we want to
8572 * start converting things over to using bitmaps
8573 */
8574 cache->extents_thresh = ((1024 * 32) / 2) /
8575 sizeof(struct btrfs_free_space);
8576
8577 read_extent_buffer(leaf, &cache->item, 6925 read_extent_buffer(leaf, &cache->item,
8578 btrfs_item_ptr_offset(leaf, path->slots[0]), 6926 btrfs_item_ptr_offset(leaf, path->slots[0]),
8579 sizeof(cache->item)); 6927 sizeof(cache->item));
8580 memcpy(&cache->key, &found_key, sizeof(found_key)); 6928 memcpy(&cache->key, &found_key, sizeof(found_key));
8581 6929
8582 key.objectid = found_key.objectid + found_key.offset; 6930 key.objectid = found_key.objectid + found_key.offset;
8583 btrfs_release_path(root, path); 6931 btrfs_release_path(path);
8584 cache->flags = btrfs_block_group_flags(&cache->item); 6932 cache->flags = btrfs_block_group_flags(&cache->item);
8585 cache->sectorsize = root->sectorsize; 6933 cache->sectorsize = root->sectorsize;
8586 6934
6935 btrfs_init_free_space_ctl(cache);
6936
8587 /* 6937 /*
8588 * We need to exclude the super stripes now so that the space 6938 * We need to exclude the super stripes now so that the space
8589 * info has super bytes accounted for, otherwise we'll think 6939 * info has super bytes accounted for, otherwise we'll think
@@ -8670,6 +7020,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8670 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7020 cache = kzalloc(sizeof(*cache), GFP_NOFS);
8671 if (!cache) 7021 if (!cache)
8672 return -ENOMEM; 7022 return -ENOMEM;
7023 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
7024 GFP_NOFS);
7025 if (!cache->free_space_ctl) {
7026 kfree(cache);
7027 return -ENOMEM;
7028 }
8673 7029
8674 cache->key.objectid = chunk_offset; 7030 cache->key.objectid = chunk_offset;
8675 cache->key.offset = size; 7031 cache->key.offset = size;
@@ -8677,19 +7033,13 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8677 cache->sectorsize = root->sectorsize; 7033 cache->sectorsize = root->sectorsize;
8678 cache->fs_info = root->fs_info; 7034 cache->fs_info = root->fs_info;
8679 7035
8680 /*
8681 * we only want to have 32k of ram per block group for keeping track
8682 * of free space, and if we pass 1/2 of that we want to start
8683 * converting things over to using bitmaps
8684 */
8685 cache->extents_thresh = ((1024 * 32) / 2) /
8686 sizeof(struct btrfs_free_space);
8687 atomic_set(&cache->count, 1); 7036 atomic_set(&cache->count, 1);
8688 spin_lock_init(&cache->lock); 7037 spin_lock_init(&cache->lock);
8689 spin_lock_init(&cache->tree_lock);
8690 INIT_LIST_HEAD(&cache->list); 7038 INIT_LIST_HEAD(&cache->list);
8691 INIT_LIST_HEAD(&cache->cluster_list); 7039 INIT_LIST_HEAD(&cache->cluster_list);
8692 7040
7041 btrfs_init_free_space_ctl(cache);
7042
8693 btrfs_set_block_group_used(&cache->item, bytes_used); 7043 btrfs_set_block_group_used(&cache->item, bytes_used);
8694 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7044 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
8695 cache->flags = type; 7045 cache->flags = type;
@@ -8802,12 +7152,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8802 if (ret < 0) 7152 if (ret < 0)
8803 goto out; 7153 goto out;
8804 if (ret > 0) 7154 if (ret > 0)
8805 btrfs_release_path(tree_root, path); 7155 btrfs_release_path(path);
8806 if (ret == 0) { 7156 if (ret == 0) {
8807 ret = btrfs_del_item(trans, tree_root, path); 7157 ret = btrfs_del_item(trans, tree_root, path);
8808 if (ret) 7158 if (ret)
8809 goto out; 7159 goto out;
8810 btrfs_release_path(tree_root, path); 7160 btrfs_release_path(path);
8811 } 7161 }
8812 7162
8813 spin_lock(&root->fs_info->block_group_cache_lock); 7163 spin_lock(&root->fs_info->block_group_cache_lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4f9893243dae..c5d9fbb92bc3 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -103,7 +103,7 @@ void extent_io_exit(void)
103} 103}
104 104
105void extent_io_tree_init(struct extent_io_tree *tree, 105void extent_io_tree_init(struct extent_io_tree *tree,
106 struct address_space *mapping, gfp_t mask) 106 struct address_space *mapping)
107{ 107{
108 tree->state = RB_ROOT; 108 tree->state = RB_ROOT;
109 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC); 109 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
@@ -441,6 +441,15 @@ static int clear_state_bit(struct extent_io_tree *tree,
441 return ret; 441 return ret;
442} 442}
443 443
444static struct extent_state *
445alloc_extent_state_atomic(struct extent_state *prealloc)
446{
447 if (!prealloc)
448 prealloc = alloc_extent_state(GFP_ATOMIC);
449
450 return prealloc;
451}
452
444/* 453/*
445 * clear some bits on a range in the tree. This may require splitting 454 * clear some bits on a range in the tree. This may require splitting
446 * or inserting elements in the tree, so the gfp mask is used to 455 * or inserting elements in the tree, so the gfp mask is used to
@@ -531,8 +540,8 @@ hit_next:
531 */ 540 */
532 541
533 if (state->start < start) { 542 if (state->start < start) {
534 if (!prealloc) 543 prealloc = alloc_extent_state_atomic(prealloc);
535 prealloc = alloc_extent_state(GFP_ATOMIC); 544 BUG_ON(!prealloc);
536 err = split_state(tree, state, prealloc, start); 545 err = split_state(tree, state, prealloc, start);
537 BUG_ON(err == -EEXIST); 546 BUG_ON(err == -EEXIST);
538 prealloc = NULL; 547 prealloc = NULL;
@@ -553,8 +562,8 @@ hit_next:
553 * on the first half 562 * on the first half
554 */ 563 */
555 if (state->start <= end && state->end > end) { 564 if (state->start <= end && state->end > end) {
556 if (!prealloc) 565 prealloc = alloc_extent_state_atomic(prealloc);
557 prealloc = alloc_extent_state(GFP_ATOMIC); 566 BUG_ON(!prealloc);
558 err = split_state(tree, state, prealloc, end + 1); 567 err = split_state(tree, state, prealloc, end + 1);
559 BUG_ON(err == -EEXIST); 568 BUG_ON(err == -EEXIST);
560 if (wake) 569 if (wake)
@@ -727,8 +736,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
727again: 736again:
728 if (!prealloc && (mask & __GFP_WAIT)) { 737 if (!prealloc && (mask & __GFP_WAIT)) {
729 prealloc = alloc_extent_state(mask); 738 prealloc = alloc_extent_state(mask);
730 if (!prealloc) 739 BUG_ON(!prealloc);
731 return -ENOMEM;
732 } 740 }
733 741
734 spin_lock(&tree->lock); 742 spin_lock(&tree->lock);
@@ -745,6 +753,8 @@ again:
745 */ 753 */
746 node = tree_search(tree, start); 754 node = tree_search(tree, start);
747 if (!node) { 755 if (!node) {
756 prealloc = alloc_extent_state_atomic(prealloc);
757 BUG_ON(!prealloc);
748 err = insert_state(tree, prealloc, start, end, &bits); 758 err = insert_state(tree, prealloc, start, end, &bits);
749 prealloc = NULL; 759 prealloc = NULL;
750 BUG_ON(err == -EEXIST); 760 BUG_ON(err == -EEXIST);
@@ -773,20 +783,18 @@ hit_next:
773 if (err) 783 if (err)
774 goto out; 784 goto out;
775 785
786 next_node = rb_next(node);
776 cache_state(state, cached_state); 787 cache_state(state, cached_state);
777 merge_state(tree, state); 788 merge_state(tree, state);
778 if (last_end == (u64)-1) 789 if (last_end == (u64)-1)
779 goto out; 790 goto out;
780 791
781 start = last_end + 1; 792 start = last_end + 1;
782 if (start < end && prealloc && !need_resched()) { 793 if (next_node && start < end && prealloc && !need_resched()) {
783 next_node = rb_next(node); 794 state = rb_entry(next_node, struct extent_state,
784 if (next_node) { 795 rb_node);
785 state = rb_entry(next_node, struct extent_state, 796 if (state->start == start)
786 rb_node); 797 goto hit_next;
787 if (state->start == start)
788 goto hit_next;
789 }
790 } 798 }
791 goto search_again; 799 goto search_again;
792 } 800 }
@@ -813,6 +821,9 @@ hit_next:
813 err = -EEXIST; 821 err = -EEXIST;
814 goto out; 822 goto out;
815 } 823 }
824
825 prealloc = alloc_extent_state_atomic(prealloc);
826 BUG_ON(!prealloc);
816 err = split_state(tree, state, prealloc, start); 827 err = split_state(tree, state, prealloc, start);
817 BUG_ON(err == -EEXIST); 828 BUG_ON(err == -EEXIST);
818 prealloc = NULL; 829 prealloc = NULL;
@@ -843,14 +854,25 @@ hit_next:
843 this_end = end; 854 this_end = end;
844 else 855 else
845 this_end = last_start - 1; 856 this_end = last_start - 1;
857
858 prealloc = alloc_extent_state_atomic(prealloc);
859 BUG_ON(!prealloc);
860
861 /*
862 * Avoid to free 'prealloc' if it can be merged with
863 * the later extent.
864 */
865 atomic_inc(&prealloc->refs);
846 err = insert_state(tree, prealloc, start, this_end, 866 err = insert_state(tree, prealloc, start, this_end,
847 &bits); 867 &bits);
848 BUG_ON(err == -EEXIST); 868 BUG_ON(err == -EEXIST);
849 if (err) { 869 if (err) {
870 free_extent_state(prealloc);
850 prealloc = NULL; 871 prealloc = NULL;
851 goto out; 872 goto out;
852 } 873 }
853 cache_state(prealloc, cached_state); 874 cache_state(prealloc, cached_state);
875 free_extent_state(prealloc);
854 prealloc = NULL; 876 prealloc = NULL;
855 start = this_end + 1; 877 start = this_end + 1;
856 goto search_again; 878 goto search_again;
@@ -867,6 +889,9 @@ hit_next:
867 err = -EEXIST; 889 err = -EEXIST;
868 goto out; 890 goto out;
869 } 891 }
892
893 prealloc = alloc_extent_state_atomic(prealloc);
894 BUG_ON(!prealloc);
870 err = split_state(tree, state, prealloc, end + 1); 895 err = split_state(tree, state, prealloc, end + 1);
871 BUG_ON(err == -EEXIST); 896 BUG_ON(err == -EEXIST);
872 897
@@ -943,13 +968,6 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
943 NULL, mask); 968 NULL, mask);
944} 969}
945 970
946static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
947 gfp_t mask)
948{
949 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
950 NULL, mask);
951}
952
953int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 971int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
954 struct extent_state **cached_state, gfp_t mask) 972 struct extent_state **cached_state, gfp_t mask)
955{ 973{
@@ -965,11 +983,6 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
965 cached_state, mask); 983 cached_state, mask);
966} 984}
967 985
968int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
969{
970 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
971}
972
973/* 986/*
974 * either insert or lock state struct between start and end use mask to tell 987 * either insert or lock state struct between start and end use mask to tell
975 * us if waiting is desired. 988 * us if waiting is desired.
@@ -1030,25 +1043,6 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
1030} 1043}
1031 1044
1032/* 1045/*
1033 * helper function to set pages and extents in the tree dirty
1034 */
1035int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
1036{
1037 unsigned long index = start >> PAGE_CACHE_SHIFT;
1038 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1039 struct page *page;
1040
1041 while (index <= end_index) {
1042 page = find_get_page(tree->mapping, index);
1043 BUG_ON(!page);
1044 __set_page_dirty_nobuffers(page);
1045 page_cache_release(page);
1046 index++;
1047 }
1048 return 0;
1049}
1050
1051/*
1052 * helper function to set both pages and extents in the tree writeback 1046 * helper function to set both pages and extents in the tree writeback
1053 */ 1047 */
1054static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) 1048static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@ -1821,46 +1815,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1821 bio_put(bio); 1815 bio_put(bio);
1822} 1816}
1823 1817
1824/*
1825 * IO done from prepare_write is pretty simple, we just unlock
1826 * the structs in the extent tree when done, and set the uptodate bits
1827 * as appropriate.
1828 */
1829static void end_bio_extent_preparewrite(struct bio *bio, int err)
1830{
1831 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1832 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1833 struct extent_io_tree *tree;
1834 u64 start;
1835 u64 end;
1836
1837 do {
1838 struct page *page = bvec->bv_page;
1839 struct extent_state *cached = NULL;
1840 tree = &BTRFS_I(page->mapping->host)->io_tree;
1841
1842 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1843 bvec->bv_offset;
1844 end = start + bvec->bv_len - 1;
1845
1846 if (--bvec >= bio->bi_io_vec)
1847 prefetchw(&bvec->bv_page->flags);
1848
1849 if (uptodate) {
1850 set_extent_uptodate(tree, start, end, &cached,
1851 GFP_ATOMIC);
1852 } else {
1853 ClearPageUptodate(page);
1854 SetPageError(page);
1855 }
1856
1857 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1858
1859 } while (bvec >= bio->bi_io_vec);
1860
1861 bio_put(bio);
1862}
1863
1864struct bio * 1818struct bio *
1865btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 1819btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1866 gfp_t gfp_flags) 1820 gfp_t gfp_flags)
@@ -2009,7 +1963,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2009 struct btrfs_ordered_extent *ordered; 1963 struct btrfs_ordered_extent *ordered;
2010 int ret; 1964 int ret;
2011 int nr = 0; 1965 int nr = 0;
2012 size_t page_offset = 0; 1966 size_t pg_offset = 0;
2013 size_t iosize; 1967 size_t iosize;
2014 size_t disk_io_size; 1968 size_t disk_io_size;
2015 size_t blocksize = inode->i_sb->s_blocksize; 1969 size_t blocksize = inode->i_sb->s_blocksize;
@@ -2052,9 +2006,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2052 char *userpage; 2006 char *userpage;
2053 struct extent_state *cached = NULL; 2007 struct extent_state *cached = NULL;
2054 2008
2055 iosize = PAGE_CACHE_SIZE - page_offset; 2009 iosize = PAGE_CACHE_SIZE - pg_offset;
2056 userpage = kmap_atomic(page, KM_USER0); 2010 userpage = kmap_atomic(page, KM_USER0);
2057 memset(userpage + page_offset, 0, iosize); 2011 memset(userpage + pg_offset, 0, iosize);
2058 flush_dcache_page(page); 2012 flush_dcache_page(page);
2059 kunmap_atomic(userpage, KM_USER0); 2013 kunmap_atomic(userpage, KM_USER0);
2060 set_extent_uptodate(tree, cur, cur + iosize - 1, 2014 set_extent_uptodate(tree, cur, cur + iosize - 1,
@@ -2063,9 +2017,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2063 &cached, GFP_NOFS); 2017 &cached, GFP_NOFS);
2064 break; 2018 break;
2065 } 2019 }
2066 em = get_extent(inode, page, page_offset, cur, 2020 em = get_extent(inode, page, pg_offset, cur,
2067 end - cur + 1, 0); 2021 end - cur + 1, 0);
2068 if (IS_ERR(em) || !em) { 2022 if (IS_ERR_OR_NULL(em)) {
2069 SetPageError(page); 2023 SetPageError(page);
2070 unlock_extent(tree, cur, end, GFP_NOFS); 2024 unlock_extent(tree, cur, end, GFP_NOFS);
2071 break; 2025 break;
@@ -2103,7 +2057,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2103 struct extent_state *cached = NULL; 2057 struct extent_state *cached = NULL;
2104 2058
2105 userpage = kmap_atomic(page, KM_USER0); 2059 userpage = kmap_atomic(page, KM_USER0);
2106 memset(userpage + page_offset, 0, iosize); 2060 memset(userpage + pg_offset, 0, iosize);
2107 flush_dcache_page(page); 2061 flush_dcache_page(page);
2108 kunmap_atomic(userpage, KM_USER0); 2062 kunmap_atomic(userpage, KM_USER0);
2109 2063
@@ -2112,7 +2066,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2112 unlock_extent_cached(tree, cur, cur + iosize - 1, 2066 unlock_extent_cached(tree, cur, cur + iosize - 1,
2113 &cached, GFP_NOFS); 2067 &cached, GFP_NOFS);
2114 cur = cur + iosize; 2068 cur = cur + iosize;
2115 page_offset += iosize; 2069 pg_offset += iosize;
2116 continue; 2070 continue;
2117 } 2071 }
2118 /* the get_extent function already copied into the page */ 2072 /* the get_extent function already copied into the page */
@@ -2121,7 +2075,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2121 check_page_uptodate(tree, page); 2075 check_page_uptodate(tree, page);
2122 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2076 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2123 cur = cur + iosize; 2077 cur = cur + iosize;
2124 page_offset += iosize; 2078 pg_offset += iosize;
2125 continue; 2079 continue;
2126 } 2080 }
2127 /* we have an inline extent but it didn't get marked up 2081 /* we have an inline extent but it didn't get marked up
@@ -2131,7 +2085,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2131 SetPageError(page); 2085 SetPageError(page);
2132 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2086 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2133 cur = cur + iosize; 2087 cur = cur + iosize;
2134 page_offset += iosize; 2088 pg_offset += iosize;
2135 continue; 2089 continue;
2136 } 2090 }
2137 2091
@@ -2144,7 +2098,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2144 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; 2098 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2145 pnr -= page->index; 2099 pnr -= page->index;
2146 ret = submit_extent_page(READ, tree, page, 2100 ret = submit_extent_page(READ, tree, page,
2147 sector, disk_io_size, page_offset, 2101 sector, disk_io_size, pg_offset,
2148 bdev, bio, pnr, 2102 bdev, bio, pnr,
2149 end_bio_extent_readpage, mirror_num, 2103 end_bio_extent_readpage, mirror_num,
2150 *bio_flags, 2104 *bio_flags,
@@ -2155,7 +2109,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2155 if (ret) 2109 if (ret)
2156 SetPageError(page); 2110 SetPageError(page);
2157 cur = cur + iosize; 2111 cur = cur + iosize;
2158 page_offset += iosize; 2112 pg_offset += iosize;
2159 } 2113 }
2160out: 2114out:
2161 if (!nr) { 2115 if (!nr) {
@@ -2351,7 +2305,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2351 } 2305 }
2352 em = epd->get_extent(inode, page, pg_offset, cur, 2306 em = epd->get_extent(inode, page, pg_offset, cur,
2353 end - cur + 1, 1); 2307 end - cur + 1, 1);
2354 if (IS_ERR(em) || !em) { 2308 if (IS_ERR_OR_NULL(em)) {
2355 SetPageError(page); 2309 SetPageError(page);
2356 break; 2310 break;
2357 } 2311 }
@@ -2730,128 +2684,6 @@ int extent_invalidatepage(struct extent_io_tree *tree,
2730} 2684}
2731 2685
2732/* 2686/*
2733 * simple commit_write call, set_range_dirty is used to mark both
2734 * the pages and the extent records as dirty
2735 */
2736int extent_commit_write(struct extent_io_tree *tree,
2737 struct inode *inode, struct page *page,
2738 unsigned from, unsigned to)
2739{
2740 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2741
2742 set_page_extent_mapped(page);
2743 set_page_dirty(page);
2744
2745 if (pos > inode->i_size) {
2746 i_size_write(inode, pos);
2747 mark_inode_dirty(inode);
2748 }
2749 return 0;
2750}
2751
2752int extent_prepare_write(struct extent_io_tree *tree,
2753 struct inode *inode, struct page *page,
2754 unsigned from, unsigned to, get_extent_t *get_extent)
2755{
2756 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2757 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
2758 u64 block_start;
2759 u64 orig_block_start;
2760 u64 block_end;
2761 u64 cur_end;
2762 struct extent_map *em;
2763 unsigned blocksize = 1 << inode->i_blkbits;
2764 size_t page_offset = 0;
2765 size_t block_off_start;
2766 size_t block_off_end;
2767 int err = 0;
2768 int iocount = 0;
2769 int ret = 0;
2770 int isnew;
2771
2772 set_page_extent_mapped(page);
2773
2774 block_start = (page_start + from) & ~((u64)blocksize - 1);
2775 block_end = (page_start + to - 1) | (blocksize - 1);
2776 orig_block_start = block_start;
2777
2778 lock_extent(tree, page_start, page_end, GFP_NOFS);
2779 while (block_start <= block_end) {
2780 em = get_extent(inode, page, page_offset, block_start,
2781 block_end - block_start + 1, 1);
2782 if (IS_ERR(em) || !em)
2783 goto err;
2784
2785 cur_end = min(block_end, extent_map_end(em) - 1);
2786 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
2787 block_off_end = block_off_start + blocksize;
2788 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
2789
2790 if (!PageUptodate(page) && isnew &&
2791 (block_off_end > to || block_off_start < from)) {
2792 void *kaddr;
2793
2794 kaddr = kmap_atomic(page, KM_USER0);
2795 if (block_off_end > to)
2796 memset(kaddr + to, 0, block_off_end - to);
2797 if (block_off_start < from)
2798 memset(kaddr + block_off_start, 0,
2799 from - block_off_start);
2800 flush_dcache_page(page);
2801 kunmap_atomic(kaddr, KM_USER0);
2802 }
2803 if ((em->block_start != EXTENT_MAP_HOLE &&
2804 em->block_start != EXTENT_MAP_INLINE) &&
2805 !isnew && !PageUptodate(page) &&
2806 (block_off_end > to || block_off_start < from) &&
2807 !test_range_bit(tree, block_start, cur_end,
2808 EXTENT_UPTODATE, 1, NULL)) {
2809 u64 sector;
2810 u64 extent_offset = block_start - em->start;
2811 size_t iosize;
2812 sector = (em->block_start + extent_offset) >> 9;
2813 iosize = (cur_end - block_start + blocksize) &
2814 ~((u64)blocksize - 1);
2815 /*
2816 * we've already got the extent locked, but we
2817 * need to split the state such that our end_bio
2818 * handler can clear the lock.
2819 */
2820 set_extent_bit(tree, block_start,
2821 block_start + iosize - 1,
2822 EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
2823 ret = submit_extent_page(READ, tree, page,
2824 sector, iosize, page_offset, em->bdev,
2825 NULL, 1,
2826 end_bio_extent_preparewrite, 0,
2827 0, 0);
2828 if (ret && !err)
2829 err = ret;
2830 iocount++;
2831 block_start = block_start + iosize;
2832 } else {
2833 struct extent_state *cached = NULL;
2834
2835 set_extent_uptodate(tree, block_start, cur_end, &cached,
2836 GFP_NOFS);
2837 unlock_extent_cached(tree, block_start, cur_end,
2838 &cached, GFP_NOFS);
2839 block_start = cur_end + 1;
2840 }
2841 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
2842 free_extent_map(em);
2843 }
2844 if (iocount) {
2845 wait_extent_bit(tree, orig_block_start,
2846 block_end, EXTENT_LOCKED);
2847 }
2848 check_page_uptodate(tree, page);
2849err:
2850 /* FIXME, zero out newly allocated blocks on error */
2851 return err;
2852}
2853
2854/*
2855 * a helper for releasepage, this tests for areas of the page that 2687 * a helper for releasepage, this tests for areas of the page that
2856 * are locked or under IO and drops the related state bits if it is safe 2688 * are locked or under IO and drops the related state bits if it is safe
2857 * to drop the page. 2689 * to drop the page.
@@ -2909,7 +2741,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2909 len = end - start + 1; 2741 len = end - start + 1;
2910 write_lock(&map->lock); 2742 write_lock(&map->lock);
2911 em = lookup_extent_mapping(map, start, len); 2743 em = lookup_extent_mapping(map, start, len);
2912 if (!em || IS_ERR(em)) { 2744 if (IS_ERR_OR_NULL(em)) {
2913 write_unlock(&map->lock); 2745 write_unlock(&map->lock);
2914 break; 2746 break;
2915 } 2747 }
@@ -2937,33 +2769,6 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2937 return try_release_extent_state(map, tree, page, mask); 2769 return try_release_extent_state(map, tree, page, mask);
2938} 2770}
2939 2771
2940sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
2941 get_extent_t *get_extent)
2942{
2943 struct inode *inode = mapping->host;
2944 struct extent_state *cached_state = NULL;
2945 u64 start = iblock << inode->i_blkbits;
2946 sector_t sector = 0;
2947 size_t blksize = (1 << inode->i_blkbits);
2948 struct extent_map *em;
2949
2950 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
2951 0, &cached_state, GFP_NOFS);
2952 em = get_extent(inode, NULL, 0, start, blksize, 0);
2953 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start,
2954 start + blksize - 1, &cached_state, GFP_NOFS);
2955 if (!em || IS_ERR(em))
2956 return 0;
2957
2958 if (em->block_start > EXTENT_MAP_LAST_BYTE)
2959 goto out;
2960
2961 sector = (em->block_start + start - em->start) >> inode->i_blkbits;
2962out:
2963 free_extent_map(em);
2964 return sector;
2965}
2966
2967/* 2772/*
2968 * helper function for fiemap, which doesn't want to see any holes. 2773 * helper function for fiemap, which doesn't want to see any holes.
2969 * This maps until we find something past 'last' 2774 * This maps until we find something past 'last'
@@ -2986,7 +2791,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
2986 break; 2791 break;
2987 len = (len + sectorsize - 1) & ~(sectorsize - 1); 2792 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2988 em = get_extent(inode, NULL, 0, offset, len, 0); 2793 em = get_extent(inode, NULL, 0, offset, len, 0);
2989 if (!em || IS_ERR(em)) 2794 if (IS_ERR_OR_NULL(em))
2990 return em; 2795 return em;
2991 2796
2992 /* if this isn't a hole return it */ 2797 /* if this isn't a hole return it */
@@ -3040,7 +2845,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3040 * because there might be preallocation past i_size 2845 * because there might be preallocation past i_size
3041 */ 2846 */
3042 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2847 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
3043 path, inode->i_ino, -1, 0); 2848 path, btrfs_ino(inode), -1, 0);
3044 if (ret < 0) { 2849 if (ret < 0) {
3045 btrfs_free_path(path); 2850 btrfs_free_path(path);
3046 return ret; 2851 return ret;
@@ -3053,7 +2858,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3053 found_type = btrfs_key_type(&found_key); 2858 found_type = btrfs_key_type(&found_key);
3054 2859
3055 /* No extents, but there might be delalloc bits */ 2860 /* No extents, but there might be delalloc bits */
3056 if (found_key.objectid != inode->i_ino || 2861 if (found_key.objectid != btrfs_ino(inode) ||
3057 found_type != BTRFS_EXTENT_DATA_KEY) { 2862 found_type != BTRFS_EXTENT_DATA_KEY) {
3058 /* have to trust i_size as the end */ 2863 /* have to trust i_size as the end */
3059 last = (u64)-1; 2864 last = (u64)-1;
@@ -3276,8 +3081,7 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
3276 3081
3277struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 3082struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3278 u64 start, unsigned long len, 3083 u64 start, unsigned long len,
3279 struct page *page0, 3084 struct page *page0)
3280 gfp_t mask)
3281{ 3085{
3282 unsigned long num_pages = num_extent_pages(start, len); 3086 unsigned long num_pages = num_extent_pages(start, len);
3283 unsigned long i; 3087 unsigned long i;
@@ -3298,7 +3102,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3298 } 3102 }
3299 rcu_read_unlock(); 3103 rcu_read_unlock();
3300 3104
3301 eb = __alloc_extent_buffer(tree, start, len, mask); 3105 eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
3302 if (!eb) 3106 if (!eb)
3303 return NULL; 3107 return NULL;
3304 3108
@@ -3315,7 +3119,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3315 i = 0; 3119 i = 0;
3316 } 3120 }
3317 for (; i < num_pages; i++, index++) { 3121 for (; i < num_pages; i++, index++) {
3318 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); 3122 p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM);
3319 if (!p) { 3123 if (!p) {
3320 WARN_ON(1); 3124 WARN_ON(1);
3321 goto free_eb; 3125 goto free_eb;
@@ -3387,8 +3191,7 @@ free_eb:
3387} 3191}
3388 3192
3389struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 3193struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3390 u64 start, unsigned long len, 3194 u64 start, unsigned long len)
3391 gfp_t mask)
3392{ 3195{
3393 struct extent_buffer *eb; 3196 struct extent_buffer *eb;
3394 3197
@@ -3449,13 +3252,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3449 return 0; 3252 return 0;
3450} 3253}
3451 3254
3452int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
3453 struct extent_buffer *eb)
3454{
3455 return wait_on_extent_writeback(tree, eb->start,
3456 eb->start + eb->len - 1);
3457}
3458
3459int set_extent_buffer_dirty(struct extent_io_tree *tree, 3255int set_extent_buffer_dirty(struct extent_io_tree *tree,
3460 struct extent_buffer *eb) 3256 struct extent_buffer *eb)
3461{ 3257{
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index af2d7179c372..4e8445a4757c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -153,23 +153,14 @@ static inline int extent_compress_type(unsigned long bio_flags)
153 153
154struct extent_map_tree; 154struct extent_map_tree;
155 155
156static inline struct extent_state *extent_state_next(struct extent_state *state)
157{
158 struct rb_node *node;
159 node = rb_next(&state->rb_node);
160 if (!node)
161 return NULL;
162 return rb_entry(node, struct extent_state, rb_node);
163}
164
165typedef struct extent_map *(get_extent_t)(struct inode *inode, 156typedef struct extent_map *(get_extent_t)(struct inode *inode,
166 struct page *page, 157 struct page *page,
167 size_t page_offset, 158 size_t pg_offset,
168 u64 start, u64 len, 159 u64 start, u64 len,
169 int create); 160 int create);
170 161
171void extent_io_tree_init(struct extent_io_tree *tree, 162void extent_io_tree_init(struct extent_io_tree *tree,
172 struct address_space *mapping, gfp_t mask); 163 struct address_space *mapping);
173int try_release_extent_mapping(struct extent_map_tree *map, 164int try_release_extent_mapping(struct extent_map_tree *map,
174 struct extent_io_tree *tree, struct page *page, 165 struct extent_io_tree *tree, struct page *page,
175 gfp_t mask); 166 gfp_t mask);
@@ -215,14 +206,8 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
215 gfp_t mask); 206 gfp_t mask);
216int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 207int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
217 gfp_t mask); 208 gfp_t mask);
218int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
219 gfp_t mask);
220int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start,
221 u64 end, gfp_t mask);
222int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 209int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
223 struct extent_state **cached_state, gfp_t mask); 210 struct extent_state **cached_state, gfp_t mask);
224int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
225 gfp_t mask);
226int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 211int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
227 u64 *start_ret, u64 *end_ret, int bits); 212 u64 *start_ret, u64 *end_ret, int bits);
228struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 213struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
@@ -243,28 +228,17 @@ int extent_readpages(struct extent_io_tree *tree,
243 struct address_space *mapping, 228 struct address_space *mapping,
244 struct list_head *pages, unsigned nr_pages, 229 struct list_head *pages, unsigned nr_pages,
245 get_extent_t get_extent); 230 get_extent_t get_extent);
246int extent_prepare_write(struct extent_io_tree *tree,
247 struct inode *inode, struct page *page,
248 unsigned from, unsigned to, get_extent_t *get_extent);
249int extent_commit_write(struct extent_io_tree *tree,
250 struct inode *inode, struct page *page,
251 unsigned from, unsigned to);
252sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
253 get_extent_t *get_extent);
254int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 231int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
255 __u64 start, __u64 len, get_extent_t *get_extent); 232 __u64 start, __u64 len, get_extent_t *get_extent);
256int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end);
257int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); 233int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
258int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); 234int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
259void set_page_extent_mapped(struct page *page); 235void set_page_extent_mapped(struct page *page);
260 236
261struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 237struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
262 u64 start, unsigned long len, 238 u64 start, unsigned long len,
263 struct page *page0, 239 struct page *page0);
264 gfp_t mask);
265struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 240struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
266 u64 start, unsigned long len, 241 u64 start, unsigned long len);
267 gfp_t mask);
268void free_extent_buffer(struct extent_buffer *eb); 242void free_extent_buffer(struct extent_buffer *eb);
269int read_extent_buffer_pages(struct extent_io_tree *tree, 243int read_extent_buffer_pages(struct extent_io_tree *tree,
270 struct extent_buffer *eb, u64 start, int wait, 244 struct extent_buffer *eb, u64 start, int wait,
@@ -292,16 +266,11 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
292 unsigned long src_offset, unsigned long len); 266 unsigned long src_offset, unsigned long len);
293void memset_extent_buffer(struct extent_buffer *eb, char c, 267void memset_extent_buffer(struct extent_buffer *eb, char c,
294 unsigned long start, unsigned long len); 268 unsigned long start, unsigned long len);
295int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
296 struct extent_buffer *eb);
297int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end);
298int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); 269int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
299int clear_extent_buffer_dirty(struct extent_io_tree *tree, 270int clear_extent_buffer_dirty(struct extent_io_tree *tree,
300 struct extent_buffer *eb); 271 struct extent_buffer *eb);
301int set_extent_buffer_dirty(struct extent_io_tree *tree, 272int set_extent_buffer_dirty(struct extent_io_tree *tree,
302 struct extent_buffer *eb); 273 struct extent_buffer *eb);
303int test_extent_buffer_dirty(struct extent_io_tree *tree,
304 struct extent_buffer *eb);
305int set_extent_buffer_uptodate(struct extent_io_tree *tree, 274int set_extent_buffer_uptodate(struct extent_io_tree *tree,
306 struct extent_buffer *eb); 275 struct extent_buffer *eb);
307int clear_extent_buffer_uptodate(struct extent_io_tree *tree, 276int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
@@ -319,7 +288,6 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
319 unsigned long *map_start, 288 unsigned long *map_start,
320 unsigned long *map_len, int km); 289 unsigned long *map_len, int km);
321void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); 290void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
322int release_extent_buffer_tail_pages(struct extent_buffer *eb);
323int extent_range_uptodate(struct extent_io_tree *tree, 291int extent_range_uptodate(struct extent_io_tree *tree,
324 u64 start, u64 end); 292 u64 start, u64 end);
325int extent_clear_unlock_delalloc(struct inode *inode, 293int extent_clear_unlock_delalloc(struct inode *inode,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index a24a3f2fa13e..2d0410344ea3 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -28,12 +28,11 @@ void extent_map_exit(void)
28/** 28/**
29 * extent_map_tree_init - initialize extent map tree 29 * extent_map_tree_init - initialize extent map tree
30 * @tree: tree to initialize 30 * @tree: tree to initialize
31 * @mask: flags for memory allocations during tree operations
32 * 31 *
33 * Initialize the extent tree @tree. Should be called for each new inode 32 * Initialize the extent tree @tree. Should be called for each new inode
34 * or other user of the extent_map interface. 33 * or other user of the extent_map interface.
35 */ 34 */
36void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) 35void extent_map_tree_init(struct extent_map_tree *tree)
37{ 36{
38 tree->map = RB_ROOT; 37 tree->map = RB_ROOT;
39 rwlock_init(&tree->lock); 38 rwlock_init(&tree->lock);
@@ -41,16 +40,15 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
41 40
42/** 41/**
43 * alloc_extent_map - allocate new extent map structure 42 * alloc_extent_map - allocate new extent map structure
44 * @mask: memory allocation flags
45 * 43 *
46 * Allocate a new extent_map structure. The new structure is 44 * Allocate a new extent_map structure. The new structure is
47 * returned with a reference count of one and needs to be 45 * returned with a reference count of one and needs to be
48 * freed using free_extent_map() 46 * freed using free_extent_map()
49 */ 47 */
50struct extent_map *alloc_extent_map(gfp_t mask) 48struct extent_map *alloc_extent_map(void)
51{ 49{
52 struct extent_map *em; 50 struct extent_map *em;
53 em = kmem_cache_alloc(extent_map_cache, mask); 51 em = kmem_cache_alloc(extent_map_cache, GFP_NOFS);
54 if (!em) 52 if (!em)
55 return NULL; 53 return NULL;
56 em->in_tree = 0; 54 em->in_tree = 0;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 28b44dbd1e35..33a7890b1f40 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -49,14 +49,14 @@ static inline u64 extent_map_block_end(struct extent_map *em)
49 return em->block_start + em->block_len; 49 return em->block_start + em->block_len;
50} 50}
51 51
52void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); 52void extent_map_tree_init(struct extent_map_tree *tree);
53struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, 53struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
54 u64 start, u64 len); 54 u64 start, u64 len);
55int add_extent_mapping(struct extent_map_tree *tree, 55int add_extent_mapping(struct extent_map_tree *tree,
56 struct extent_map *em); 56 struct extent_map *em);
57int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); 57int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
58 58
59struct extent_map *alloc_extent_map(gfp_t mask); 59struct extent_map *alloc_extent_map(void);
60void free_extent_map(struct extent_map *em); 60void free_extent_map(struct extent_map *em);
61int __init extent_map_init(void); 61int __init extent_map_init(void);
62void extent_map_exit(void); 62void extent_map_exit(void);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a6a9d4e8b491..90d4ee52cd45 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -193,7 +193,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
193 u32 item_size; 193 u32 item_size;
194 194
195 if (item) 195 if (item)
196 btrfs_release_path(root, path); 196 btrfs_release_path(path);
197 item = btrfs_lookup_csum(NULL, root->fs_info->csum_root, 197 item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
198 path, disk_bytenr, 0); 198 path, disk_bytenr, 0);
199 if (IS_ERR(item)) { 199 if (IS_ERR(item)) {
@@ -208,12 +208,13 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
208 EXTENT_NODATASUM, GFP_NOFS); 208 EXTENT_NODATASUM, GFP_NOFS);
209 } else { 209 } else {
210 printk(KERN_INFO "btrfs no csum found " 210 printk(KERN_INFO "btrfs no csum found "
211 "for inode %lu start %llu\n", 211 "for inode %llu start %llu\n",
212 inode->i_ino, 212 (unsigned long long)
213 btrfs_ino(inode),
213 (unsigned long long)offset); 214 (unsigned long long)offset);
214 } 215 }
215 item = NULL; 216 item = NULL;
216 btrfs_release_path(root, path); 217 btrfs_release_path(path);
217 goto found; 218 goto found;
218 } 219 }
219 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 220 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
@@ -266,7 +267,7 @@ int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
266} 267}
267 268
268int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 269int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
269 struct list_head *list) 270 struct list_head *list, int search_commit)
270{ 271{
271 struct btrfs_key key; 272 struct btrfs_key key;
272 struct btrfs_path *path; 273 struct btrfs_path *path;
@@ -283,6 +284,12 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
283 path = btrfs_alloc_path(); 284 path = btrfs_alloc_path();
284 BUG_ON(!path); 285 BUG_ON(!path);
285 286
287 if (search_commit) {
288 path->skip_locking = 1;
289 path->reada = 2;
290 path->search_commit_root = 1;
291 }
292
286 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 293 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
287 key.offset = start; 294 key.offset = start;
288 key.type = BTRFS_EXTENT_CSUM_KEY; 295 key.type = BTRFS_EXTENT_CSUM_KEY;
@@ -495,7 +502,6 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
495 u32 new_size = (bytenr - key->offset) >> blocksize_bits; 502 u32 new_size = (bytenr - key->offset) >> blocksize_bits;
496 new_size *= csum_size; 503 new_size *= csum_size;
497 ret = btrfs_truncate_item(trans, root, path, new_size, 1); 504 ret = btrfs_truncate_item(trans, root, path, new_size, 1);
498 BUG_ON(ret);
499 } else if (key->offset >= bytenr && csum_end > end_byte && 505 } else if (key->offset >= bytenr && csum_end > end_byte &&
500 end_byte > key->offset) { 506 end_byte > key->offset) {
501 /* 507 /*
@@ -508,7 +514,6 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
508 new_size *= csum_size; 514 new_size *= csum_size;
509 515
510 ret = btrfs_truncate_item(trans, root, path, new_size, 0); 516 ret = btrfs_truncate_item(trans, root, path, new_size, 0);
511 BUG_ON(ret);
512 517
513 key->offset = end_byte; 518 key->offset = end_byte;
514 ret = btrfs_set_item_key_safe(trans, root, path, key); 519 ret = btrfs_set_item_key_safe(trans, root, path, key);
@@ -551,10 +556,10 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
551 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 556 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
552 if (ret > 0) { 557 if (ret > 0) {
553 if (path->slots[0] == 0) 558 if (path->slots[0] == 0)
554 goto out; 559 break;
555 path->slots[0]--; 560 path->slots[0]--;
556 } else if (ret < 0) { 561 } else if (ret < 0) {
557 goto out; 562 break;
558 } 563 }
559 564
560 leaf = path->nodes[0]; 565 leaf = path->nodes[0];
@@ -579,7 +584,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
579 /* delete the entire item, it is inside our range */ 584 /* delete the entire item, it is inside our range */
580 if (key.offset >= bytenr && csum_end <= end_byte) { 585 if (key.offset >= bytenr && csum_end <= end_byte) {
581 ret = btrfs_del_item(trans, root, path); 586 ret = btrfs_del_item(trans, root, path);
582 BUG_ON(ret); 587 if (ret)
588 goto out;
583 if (key.offset == bytenr) 589 if (key.offset == bytenr)
584 break; 590 break;
585 } else if (key.offset < bytenr && csum_end > end_byte) { 591 } else if (key.offset < bytenr && csum_end > end_byte) {
@@ -631,11 +637,12 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
631 if (key.offset < bytenr) 637 if (key.offset < bytenr)
632 break; 638 break;
633 } 639 }
634 btrfs_release_path(root, path); 640 btrfs_release_path(path);
635 } 641 }
642 ret = 0;
636out: 643out:
637 btrfs_free_path(path); 644 btrfs_free_path(path);
638 return 0; 645 return ret;
639} 646}
640 647
641int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 648int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
@@ -722,7 +729,7 @@ again:
722 * at this point, we know the tree has an item, but it isn't big 729 * at this point, we know the tree has an item, but it isn't big
723 * enough yet to put our csum in. Grow it 730 * enough yet to put our csum in. Grow it
724 */ 731 */
725 btrfs_release_path(root, path); 732 btrfs_release_path(path);
726 ret = btrfs_search_slot(trans, root, &file_key, path, 733 ret = btrfs_search_slot(trans, root, &file_key, path,
727 csum_size, 1); 734 csum_size, 1);
728 if (ret < 0) 735 if (ret < 0)
@@ -761,12 +768,11 @@ again:
761 goto insert; 768 goto insert;
762 769
763 ret = btrfs_extend_item(trans, root, path, diff); 770 ret = btrfs_extend_item(trans, root, path, diff);
764 BUG_ON(ret);
765 goto csum; 771 goto csum;
766 } 772 }
767 773
768insert: 774insert:
769 btrfs_release_path(root, path); 775 btrfs_release_path(path);
770 csum_offset = 0; 776 csum_offset = 0;
771 if (found_next) { 777 if (found_next) {
772 u64 tmp = total_bytes + root->sectorsize; 778 u64 tmp = total_bytes + root->sectorsize;
@@ -850,7 +856,7 @@ next_sector:
850 } 856 }
851 btrfs_mark_buffer_dirty(path->nodes[0]); 857 btrfs_mark_buffer_dirty(path->nodes[0]);
852 if (total_bytes < sums->len) { 858 if (total_bytes < sums->len) {
853 btrfs_release_path(root, path); 859 btrfs_release_path(path);
854 cond_resched(); 860 cond_resched();
855 goto again; 861 goto again;
856 } 862 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 75899a01dded..c6a22d783c35 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -40,6 +40,263 @@
40#include "locking.h" 40#include "locking.h"
41#include "compat.h" 41#include "compat.h"
42 42
43/*
44 * when auto defrag is enabled we
45 * queue up these defrag structs to remember which
46 * inodes need defragging passes
47 */
48struct inode_defrag {
49 struct rb_node rb_node;
50 /* objectid */
51 u64 ino;
52 /*
53 * transid where the defrag was added, we search for
54 * extents newer than this
55 */
56 u64 transid;
57
58 /* root objectid */
59 u64 root;
60
61 /* last offset we were able to defrag */
62 u64 last_offset;
63
64 /* if we've wrapped around back to zero once already */
65 int cycled;
66};
67
68/* pop a record for an inode into the defrag tree. The lock
69 * must be held already
70 *
71 * If you're inserting a record for an older transid than an
72 * existing record, the transid already in the tree is lowered
73 *
74 * If an existing record is found the defrag item you
75 * pass in is freed
76 */
77static int __btrfs_add_inode_defrag(struct inode *inode,
78 struct inode_defrag *defrag)
79{
80 struct btrfs_root *root = BTRFS_I(inode)->root;
81 struct inode_defrag *entry;
82 struct rb_node **p;
83 struct rb_node *parent = NULL;
84
85 p = &root->fs_info->defrag_inodes.rb_node;
86 while (*p) {
87 parent = *p;
88 entry = rb_entry(parent, struct inode_defrag, rb_node);
89
90 if (defrag->ino < entry->ino)
91 p = &parent->rb_left;
92 else if (defrag->ino > entry->ino)
93 p = &parent->rb_right;
94 else {
95 /* if we're reinserting an entry for
96 * an old defrag run, make sure to
97 * lower the transid of our existing record
98 */
99 if (defrag->transid < entry->transid)
100 entry->transid = defrag->transid;
101 if (defrag->last_offset > entry->last_offset)
102 entry->last_offset = defrag->last_offset;
103 goto exists;
104 }
105 }
106 BTRFS_I(inode)->in_defrag = 1;
107 rb_link_node(&defrag->rb_node, parent, p);
108 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
109 return 0;
110
111exists:
112 kfree(defrag);
113 return 0;
114
115}
116
117/*
118 * insert a defrag record for this inode if auto defrag is
119 * enabled
120 */
121int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
122 struct inode *inode)
123{
124 struct btrfs_root *root = BTRFS_I(inode)->root;
125 struct inode_defrag *defrag;
126 int ret = 0;
127 u64 transid;
128
129 if (!btrfs_test_opt(root, AUTO_DEFRAG))
130 return 0;
131
132 if (root->fs_info->closing)
133 return 0;
134
135 if (BTRFS_I(inode)->in_defrag)
136 return 0;
137
138 if (trans)
139 transid = trans->transid;
140 else
141 transid = BTRFS_I(inode)->root->last_trans;
142
143 defrag = kzalloc(sizeof(*defrag), GFP_NOFS);
144 if (!defrag)
145 return -ENOMEM;
146
147 defrag->ino = inode->i_ino;
148 defrag->transid = transid;
149 defrag->root = root->root_key.objectid;
150
151 spin_lock(&root->fs_info->defrag_inodes_lock);
152 if (!BTRFS_I(inode)->in_defrag)
153 ret = __btrfs_add_inode_defrag(inode, defrag);
154 spin_unlock(&root->fs_info->defrag_inodes_lock);
155 return ret;
156}
157
158/*
159 * must be called with the defrag_inodes lock held
160 */
161struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino,
162 struct rb_node **next)
163{
164 struct inode_defrag *entry = NULL;
165 struct rb_node *p;
166 struct rb_node *parent = NULL;
167
168 p = info->defrag_inodes.rb_node;
169 while (p) {
170 parent = p;
171 entry = rb_entry(parent, struct inode_defrag, rb_node);
172
173 if (ino < entry->ino)
174 p = parent->rb_left;
175 else if (ino > entry->ino)
176 p = parent->rb_right;
177 else
178 return entry;
179 }
180
181 if (next) {
182 while (parent && ino > entry->ino) {
183 parent = rb_next(parent);
184 entry = rb_entry(parent, struct inode_defrag, rb_node);
185 }
186 *next = parent;
187 }
188 return NULL;
189}
190
191/*
192 * run through the list of inodes in the FS that need
193 * defragging
194 */
195int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
196{
197 struct inode_defrag *defrag;
198 struct btrfs_root *inode_root;
199 struct inode *inode;
200 struct rb_node *n;
201 struct btrfs_key key;
202 struct btrfs_ioctl_defrag_range_args range;
203 u64 first_ino = 0;
204 int num_defrag;
205 int defrag_batch = 1024;
206
207 memset(&range, 0, sizeof(range));
208 range.len = (u64)-1;
209
210 atomic_inc(&fs_info->defrag_running);
211 spin_lock(&fs_info->defrag_inodes_lock);
212 while(1) {
213 n = NULL;
214
215 /* find an inode to defrag */
216 defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n);
217 if (!defrag) {
218 if (n)
219 defrag = rb_entry(n, struct inode_defrag, rb_node);
220 else if (first_ino) {
221 first_ino = 0;
222 continue;
223 } else {
224 break;
225 }
226 }
227
228 /* remove it from the rbtree */
229 first_ino = defrag->ino + 1;
230 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
231
232 if (fs_info->closing)
233 goto next_free;
234
235 spin_unlock(&fs_info->defrag_inodes_lock);
236
237 /* get the inode */
238 key.objectid = defrag->root;
239 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
240 key.offset = (u64)-1;
241 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
242 if (IS_ERR(inode_root))
243 goto next;
244
245 key.objectid = defrag->ino;
246 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
247 key.offset = 0;
248
249 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
250 if (IS_ERR(inode))
251 goto next;
252
253 /* do a chunk of defrag */
254 BTRFS_I(inode)->in_defrag = 0;
255 range.start = defrag->last_offset;
256 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
257 defrag_batch);
258 /*
259 * if we filled the whole defrag batch, there
260 * must be more work to do. Queue this defrag
261 * again
262 */
263 if (num_defrag == defrag_batch) {
264 defrag->last_offset = range.start;
265 __btrfs_add_inode_defrag(inode, defrag);
266 /*
267 * we don't want to kfree defrag, we added it back to
268 * the rbtree
269 */
270 defrag = NULL;
271 } else if (defrag->last_offset && !defrag->cycled) {
272 /*
273 * we didn't fill our defrag batch, but
274 * we didn't start at zero. Make sure we loop
275 * around to the start of the file.
276 */
277 defrag->last_offset = 0;
278 defrag->cycled = 1;
279 __btrfs_add_inode_defrag(inode, defrag);
280 defrag = NULL;
281 }
282
283 iput(inode);
284next:
285 spin_lock(&fs_info->defrag_inodes_lock);
286next_free:
287 kfree(defrag);
288 }
289 spin_unlock(&fs_info->defrag_inodes_lock);
290
291 atomic_dec(&fs_info->defrag_running);
292
293 /*
294 * during unmount, we use the transaction_wait queue to
295 * wait for the defragger to stop
296 */
297 wake_up(&fs_info->transaction_wait);
298 return 0;
299}
43 300
44/* simple helper to fault in pages and copy. This should go away 301/* simple helper to fault in pages and copy. This should go away
45 * and be replaced with calls into generic code. 302 * and be replaced with calls into generic code.
@@ -191,9 +448,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
191 } 448 }
192 while (1) { 449 while (1) {
193 if (!split) 450 if (!split)
194 split = alloc_extent_map(GFP_NOFS); 451 split = alloc_extent_map();
195 if (!split2) 452 if (!split2)
196 split2 = alloc_extent_map(GFP_NOFS); 453 split2 = alloc_extent_map();
197 BUG_ON(!split || !split2); 454 BUG_ON(!split || !split2);
198 455
199 write_lock(&em_tree->lock); 456 write_lock(&em_tree->lock);
@@ -298,6 +555,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
298 struct btrfs_path *path; 555 struct btrfs_path *path;
299 struct btrfs_key key; 556 struct btrfs_key key;
300 struct btrfs_key new_key; 557 struct btrfs_key new_key;
558 u64 ino = btrfs_ino(inode);
301 u64 search_start = start; 559 u64 search_start = start;
302 u64 disk_bytenr = 0; 560 u64 disk_bytenr = 0;
303 u64 num_bytes = 0; 561 u64 num_bytes = 0;
@@ -318,14 +576,14 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
318 576
319 while (1) { 577 while (1) {
320 recow = 0; 578 recow = 0;
321 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 579 ret = btrfs_lookup_file_extent(trans, root, path, ino,
322 search_start, -1); 580 search_start, -1);
323 if (ret < 0) 581 if (ret < 0)
324 break; 582 break;
325 if (ret > 0 && path->slots[0] > 0 && search_start == start) { 583 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
326 leaf = path->nodes[0]; 584 leaf = path->nodes[0];
327 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); 585 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
328 if (key.objectid == inode->i_ino && 586 if (key.objectid == ino &&
329 key.type == BTRFS_EXTENT_DATA_KEY) 587 key.type == BTRFS_EXTENT_DATA_KEY)
330 path->slots[0]--; 588 path->slots[0]--;
331 } 589 }
@@ -346,7 +604,7 @@ next_slot:
346 } 604 }
347 605
348 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 606 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
349 if (key.objectid > inode->i_ino || 607 if (key.objectid > ino ||
350 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) 608 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
351 break; 609 break;
352 610
@@ -376,7 +634,7 @@ next_slot:
376 634
377 search_start = max(key.offset, start); 635 search_start = max(key.offset, start);
378 if (recow) { 636 if (recow) {
379 btrfs_release_path(root, path); 637 btrfs_release_path(path);
380 continue; 638 continue;
381 } 639 }
382 640
@@ -393,7 +651,7 @@ next_slot:
393 ret = btrfs_duplicate_item(trans, root, path, 651 ret = btrfs_duplicate_item(trans, root, path,
394 &new_key); 652 &new_key);
395 if (ret == -EAGAIN) { 653 if (ret == -EAGAIN) {
396 btrfs_release_path(root, path); 654 btrfs_release_path(path);
397 continue; 655 continue;
398 } 656 }
399 if (ret < 0) 657 if (ret < 0)
@@ -516,7 +774,7 @@ next_slot:
516 del_nr = 0; 774 del_nr = 0;
517 del_slot = 0; 775 del_slot = 0;
518 776
519 btrfs_release_path(root, path); 777 btrfs_release_path(path);
520 continue; 778 continue;
521 } 779 }
522 780
@@ -592,6 +850,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
592 int del_slot = 0; 850 int del_slot = 0;
593 int recow; 851 int recow;
594 int ret; 852 int ret;
853 u64 ino = btrfs_ino(inode);
595 854
596 btrfs_drop_extent_cache(inode, start, end - 1, 0); 855 btrfs_drop_extent_cache(inode, start, end - 1, 0);
597 856
@@ -600,7 +859,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
600again: 859again:
601 recow = 0; 860 recow = 0;
602 split = start; 861 split = start;
603 key.objectid = inode->i_ino; 862 key.objectid = ino;
604 key.type = BTRFS_EXTENT_DATA_KEY; 863 key.type = BTRFS_EXTENT_DATA_KEY;
605 key.offset = split; 864 key.offset = split;
606 865
@@ -612,8 +871,7 @@ again:
612 871
613 leaf = path->nodes[0]; 872 leaf = path->nodes[0];
614 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 873 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
615 BUG_ON(key.objectid != inode->i_ino || 874 BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY);
616 key.type != BTRFS_EXTENT_DATA_KEY);
617 fi = btrfs_item_ptr(leaf, path->slots[0], 875 fi = btrfs_item_ptr(leaf, path->slots[0],
618 struct btrfs_file_extent_item); 876 struct btrfs_file_extent_item);
619 BUG_ON(btrfs_file_extent_type(leaf, fi) != 877 BUG_ON(btrfs_file_extent_type(leaf, fi) !=
@@ -630,7 +888,7 @@ again:
630 other_start = 0; 888 other_start = 0;
631 other_end = start; 889 other_end = start;
632 if (extent_mergeable(leaf, path->slots[0] - 1, 890 if (extent_mergeable(leaf, path->slots[0] - 1,
633 inode->i_ino, bytenr, orig_offset, 891 ino, bytenr, orig_offset,
634 &other_start, &other_end)) { 892 &other_start, &other_end)) {
635 new_key.offset = end; 893 new_key.offset = end;
636 btrfs_set_item_key_safe(trans, root, path, &new_key); 894 btrfs_set_item_key_safe(trans, root, path, &new_key);
@@ -653,7 +911,7 @@ again:
653 other_start = end; 911 other_start = end;
654 other_end = 0; 912 other_end = 0;
655 if (extent_mergeable(leaf, path->slots[0] + 1, 913 if (extent_mergeable(leaf, path->slots[0] + 1,
656 inode->i_ino, bytenr, orig_offset, 914 ino, bytenr, orig_offset,
657 &other_start, &other_end)) { 915 &other_start, &other_end)) {
658 fi = btrfs_item_ptr(leaf, path->slots[0], 916 fi = btrfs_item_ptr(leaf, path->slots[0],
659 struct btrfs_file_extent_item); 917 struct btrfs_file_extent_item);
@@ -681,7 +939,7 @@ again:
681 new_key.offset = split; 939 new_key.offset = split;
682 ret = btrfs_duplicate_item(trans, root, path, &new_key); 940 ret = btrfs_duplicate_item(trans, root, path, &new_key);
683 if (ret == -EAGAIN) { 941 if (ret == -EAGAIN) {
684 btrfs_release_path(root, path); 942 btrfs_release_path(path);
685 goto again; 943 goto again;
686 } 944 }
687 BUG_ON(ret < 0); 945 BUG_ON(ret < 0);
@@ -702,7 +960,7 @@ again:
702 960
703 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 961 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
704 root->root_key.objectid, 962 root->root_key.objectid,
705 inode->i_ino, orig_offset); 963 ino, orig_offset);
706 BUG_ON(ret); 964 BUG_ON(ret);
707 965
708 if (split == start) { 966 if (split == start) {
@@ -718,10 +976,10 @@ again:
718 other_start = end; 976 other_start = end;
719 other_end = 0; 977 other_end = 0;
720 if (extent_mergeable(leaf, path->slots[0] + 1, 978 if (extent_mergeable(leaf, path->slots[0] + 1,
721 inode->i_ino, bytenr, orig_offset, 979 ino, bytenr, orig_offset,
722 &other_start, &other_end)) { 980 &other_start, &other_end)) {
723 if (recow) { 981 if (recow) {
724 btrfs_release_path(root, path); 982 btrfs_release_path(path);
725 goto again; 983 goto again;
726 } 984 }
727 extent_end = other_end; 985 extent_end = other_end;
@@ -729,16 +987,16 @@ again:
729 del_nr++; 987 del_nr++;
730 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 988 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
731 0, root->root_key.objectid, 989 0, root->root_key.objectid,
732 inode->i_ino, orig_offset); 990 ino, orig_offset);
733 BUG_ON(ret); 991 BUG_ON(ret);
734 } 992 }
735 other_start = 0; 993 other_start = 0;
736 other_end = start; 994 other_end = start;
737 if (extent_mergeable(leaf, path->slots[0] - 1, 995 if (extent_mergeable(leaf, path->slots[0] - 1,
738 inode->i_ino, bytenr, orig_offset, 996 ino, bytenr, orig_offset,
739 &other_start, &other_end)) { 997 &other_start, &other_end)) {
740 if (recow) { 998 if (recow) {
741 btrfs_release_path(root, path); 999 btrfs_release_path(path);
742 goto again; 1000 goto again;
743 } 1001 }
744 key.offset = other_start; 1002 key.offset = other_start;
@@ -746,7 +1004,7 @@ again:
746 del_nr++; 1004 del_nr++;
747 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1005 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
748 0, root->root_key.objectid, 1006 0, root->root_key.objectid,
749 inode->i_ino, orig_offset); 1007 ino, orig_offset);
750 BUG_ON(ret); 1008 BUG_ON(ret);
751 } 1009 }
752 if (del_nr == 0) { 1010 if (del_nr == 0) {
@@ -1375,7 +1633,7 @@ static long btrfs_fallocate(struct file *file, int mode,
1375 while (1) { 1633 while (1) {
1376 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 1634 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
1377 alloc_end - cur_offset, 0); 1635 alloc_end - cur_offset, 0);
1378 BUG_ON(IS_ERR(em) || !em); 1636 BUG_ON(IS_ERR_OR_NULL(em));
1379 last_byte = min(extent_map_end(em), alloc_end); 1637 last_byte = min(extent_map_end(em), alloc_end);
1380 last_byte = (last_byte + mask) & ~mask; 1638 last_byte = (last_byte + mask) & ~mask;
1381 if (em->block_start == EXTENT_MAP_HOLE || 1639 if (em->block_start == EXTENT_MAP_HOLE ||
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 63731a1fb0a1..70d45795d758 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -25,18 +25,17 @@
25#include "transaction.h" 25#include "transaction.h"
26#include "disk-io.h" 26#include "disk-io.h"
27#include "extent_io.h" 27#include "extent_io.h"
28#include "inode-map.h"
28 29
29#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 30#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
30#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 31#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
31 32
32static void recalculate_thresholds(struct btrfs_block_group_cache 33static int link_free_space(struct btrfs_free_space_ctl *ctl,
33 *block_group);
34static int link_free_space(struct btrfs_block_group_cache *block_group,
35 struct btrfs_free_space *info); 34 struct btrfs_free_space *info);
36 35
37struct inode *lookup_free_space_inode(struct btrfs_root *root, 36static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 struct btrfs_block_group_cache 37 struct btrfs_path *path,
39 *block_group, struct btrfs_path *path) 38 u64 offset)
40{ 39{
41 struct btrfs_key key; 40 struct btrfs_key key;
42 struct btrfs_key location; 41 struct btrfs_key location;
@@ -46,22 +45,15 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
46 struct inode *inode = NULL; 45 struct inode *inode = NULL;
47 int ret; 46 int ret;
48 47
49 spin_lock(&block_group->lock);
50 if (block_group->inode)
51 inode = igrab(block_group->inode);
52 spin_unlock(&block_group->lock);
53 if (inode)
54 return inode;
55
56 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 48 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
57 key.offset = block_group->key.objectid; 49 key.offset = offset;
58 key.type = 0; 50 key.type = 0;
59 51
60 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 52 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
61 if (ret < 0) 53 if (ret < 0)
62 return ERR_PTR(ret); 54 return ERR_PTR(ret);
63 if (ret > 0) { 55 if (ret > 0) {
64 btrfs_release_path(root, path); 56 btrfs_release_path(path);
65 return ERR_PTR(-ENOENT); 57 return ERR_PTR(-ENOENT);
66 } 58 }
67 59
@@ -70,7 +62,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
70 struct btrfs_free_space_header); 62 struct btrfs_free_space_header);
71 btrfs_free_space_key(leaf, header, &disk_key); 63 btrfs_free_space_key(leaf, header, &disk_key);
72 btrfs_disk_key_to_cpu(&location, &disk_key); 64 btrfs_disk_key_to_cpu(&location, &disk_key);
73 btrfs_release_path(root, path); 65 btrfs_release_path(path);
74 66
75 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); 67 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
76 if (!inode) 68 if (!inode)
@@ -84,6 +76,27 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
84 76
85 inode->i_mapping->flags &= ~__GFP_FS; 77 inode->i_mapping->flags &= ~__GFP_FS;
86 78
79 return inode;
80}
81
82struct inode *lookup_free_space_inode(struct btrfs_root *root,
83 struct btrfs_block_group_cache
84 *block_group, struct btrfs_path *path)
85{
86 struct inode *inode = NULL;
87
88 spin_lock(&block_group->lock);
89 if (block_group->inode)
90 inode = igrab(block_group->inode);
91 spin_unlock(&block_group->lock);
92 if (inode)
93 return inode;
94
95 inode = __lookup_free_space_inode(root, path,
96 block_group->key.objectid);
97 if (IS_ERR(inode))
98 return inode;
99
87 spin_lock(&block_group->lock); 100 spin_lock(&block_group->lock);
88 if (!root->fs_info->closing) { 101 if (!root->fs_info->closing) {
89 block_group->inode = igrab(inode); 102 block_group->inode = igrab(inode);
@@ -94,24 +107,18 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
94 return inode; 107 return inode;
95} 108}
96 109
97int create_free_space_inode(struct btrfs_root *root, 110int __create_free_space_inode(struct btrfs_root *root,
98 struct btrfs_trans_handle *trans, 111 struct btrfs_trans_handle *trans,
99 struct btrfs_block_group_cache *block_group, 112 struct btrfs_path *path, u64 ino, u64 offset)
100 struct btrfs_path *path)
101{ 113{
102 struct btrfs_key key; 114 struct btrfs_key key;
103 struct btrfs_disk_key disk_key; 115 struct btrfs_disk_key disk_key;
104 struct btrfs_free_space_header *header; 116 struct btrfs_free_space_header *header;
105 struct btrfs_inode_item *inode_item; 117 struct btrfs_inode_item *inode_item;
106 struct extent_buffer *leaf; 118 struct extent_buffer *leaf;
107 u64 objectid;
108 int ret; 119 int ret;
109 120
110 ret = btrfs_find_free_objectid(trans, root, 0, &objectid); 121 ret = btrfs_insert_empty_inode(trans, root, path, ino);
111 if (ret < 0)
112 return ret;
113
114 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
115 if (ret) 122 if (ret)
116 return ret; 123 return ret;
117 124
@@ -131,19 +138,18 @@ int create_free_space_inode(struct btrfs_root *root,
131 BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); 138 BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
132 btrfs_set_inode_nlink(leaf, inode_item, 1); 139 btrfs_set_inode_nlink(leaf, inode_item, 1);
133 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 140 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
134 btrfs_set_inode_block_group(leaf, inode_item, 141 btrfs_set_inode_block_group(leaf, inode_item, offset);
135 block_group->key.objectid);
136 btrfs_mark_buffer_dirty(leaf); 142 btrfs_mark_buffer_dirty(leaf);
137 btrfs_release_path(root, path); 143 btrfs_release_path(path);
138 144
139 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 145 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
140 key.offset = block_group->key.objectid; 146 key.offset = offset;
141 key.type = 0; 147 key.type = 0;
142 148
143 ret = btrfs_insert_empty_item(trans, root, path, &key, 149 ret = btrfs_insert_empty_item(trans, root, path, &key,
144 sizeof(struct btrfs_free_space_header)); 150 sizeof(struct btrfs_free_space_header));
145 if (ret < 0) { 151 if (ret < 0) {
146 btrfs_release_path(root, path); 152 btrfs_release_path(path);
147 return ret; 153 return ret;
148 } 154 }
149 leaf = path->nodes[0]; 155 leaf = path->nodes[0];
@@ -152,11 +158,27 @@ int create_free_space_inode(struct btrfs_root *root,
152 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); 158 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
153 btrfs_set_free_space_key(leaf, header, &disk_key); 159 btrfs_set_free_space_key(leaf, header, &disk_key);
154 btrfs_mark_buffer_dirty(leaf); 160 btrfs_mark_buffer_dirty(leaf);
155 btrfs_release_path(root, path); 161 btrfs_release_path(path);
156 162
157 return 0; 163 return 0;
158} 164}
159 165
166int create_free_space_inode(struct btrfs_root *root,
167 struct btrfs_trans_handle *trans,
168 struct btrfs_block_group_cache *block_group,
169 struct btrfs_path *path)
170{
171 int ret;
172 u64 ino;
173
174 ret = btrfs_find_free_objectid(root, &ino);
175 if (ret < 0)
176 return ret;
177
178 return __create_free_space_inode(root, trans, path, ino,
179 block_group->key.objectid);
180}
181
160int btrfs_truncate_free_space_cache(struct btrfs_root *root, 182int btrfs_truncate_free_space_cache(struct btrfs_root *root,
161 struct btrfs_trans_handle *trans, 183 struct btrfs_trans_handle *trans,
162 struct btrfs_path *path, 184 struct btrfs_path *path,
@@ -187,7 +209,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
187 return ret; 209 return ret;
188 } 210 }
189 211
190 return btrfs_update_inode(trans, root, inode); 212 ret = btrfs_update_inode(trans, root, inode);
213 return ret;
191} 214}
192 215
193static int readahead_cache(struct inode *inode) 216static int readahead_cache(struct inode *inode)
@@ -209,15 +232,13 @@ static int readahead_cache(struct inode *inode)
209 return 0; 232 return 0;
210} 233}
211 234
212int load_free_space_cache(struct btrfs_fs_info *fs_info, 235int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
213 struct btrfs_block_group_cache *block_group) 236 struct btrfs_free_space_ctl *ctl,
237 struct btrfs_path *path, u64 offset)
214{ 238{
215 struct btrfs_root *root = fs_info->tree_root;
216 struct inode *inode;
217 struct btrfs_free_space_header *header; 239 struct btrfs_free_space_header *header;
218 struct extent_buffer *leaf; 240 struct extent_buffer *leaf;
219 struct page *page; 241 struct page *page;
220 struct btrfs_path *path;
221 u32 *checksums = NULL, *crc; 242 u32 *checksums = NULL, *crc;
222 char *disk_crcs = NULL; 243 char *disk_crcs = NULL;
223 struct btrfs_key key; 244 struct btrfs_key key;
@@ -225,76 +246,47 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
225 u64 num_entries; 246 u64 num_entries;
226 u64 num_bitmaps; 247 u64 num_bitmaps;
227 u64 generation; 248 u64 generation;
228 u64 used = btrfs_block_group_used(&block_group->item);
229 u32 cur_crc = ~(u32)0; 249 u32 cur_crc = ~(u32)0;
230 pgoff_t index = 0; 250 pgoff_t index = 0;
231 unsigned long first_page_offset; 251 unsigned long first_page_offset;
232 int num_checksums; 252 int num_checksums;
233 int ret = 0; 253 int ret = 0, ret2;
234
235 /*
236 * If we're unmounting then just return, since this does a search on the
237 * normal root and not the commit root and we could deadlock.
238 */
239 smp_mb();
240 if (fs_info->closing)
241 return 0;
242
243 /*
244 * If this block group has been marked to be cleared for one reason or
245 * another then we can't trust the on disk cache, so just return.
246 */
247 spin_lock(&block_group->lock);
248 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
249 spin_unlock(&block_group->lock);
250 return 0;
251 }
252 spin_unlock(&block_group->lock);
253 254
254 INIT_LIST_HEAD(&bitmaps); 255 INIT_LIST_HEAD(&bitmaps);
255 256
256 path = btrfs_alloc_path();
257 if (!path)
258 return 0;
259
260 inode = lookup_free_space_inode(root, block_group, path);
261 if (IS_ERR(inode)) {
262 btrfs_free_path(path);
263 return 0;
264 }
265
266 /* Nothing in the space cache, goodbye */ 257 /* Nothing in the space cache, goodbye */
267 if (!i_size_read(inode)) { 258 if (!i_size_read(inode))
268 btrfs_free_path(path);
269 goto out; 259 goto out;
270 }
271 260
272 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 261 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
273 key.offset = block_group->key.objectid; 262 key.offset = offset;
274 key.type = 0; 263 key.type = 0;
275 264
276 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 265 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
277 if (ret) { 266 if (ret < 0)
278 btrfs_free_path(path); 267 goto out;
268 else if (ret > 0) {
269 btrfs_release_path(path);
270 ret = 0;
279 goto out; 271 goto out;
280 } 272 }
281 273
274 ret = -1;
275
282 leaf = path->nodes[0]; 276 leaf = path->nodes[0];
283 header = btrfs_item_ptr(leaf, path->slots[0], 277 header = btrfs_item_ptr(leaf, path->slots[0],
284 struct btrfs_free_space_header); 278 struct btrfs_free_space_header);
285 num_entries = btrfs_free_space_entries(leaf, header); 279 num_entries = btrfs_free_space_entries(leaf, header);
286 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 280 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
287 generation = btrfs_free_space_generation(leaf, header); 281 generation = btrfs_free_space_generation(leaf, header);
288 btrfs_free_path(path); 282 btrfs_release_path(path);
289 283
290 if (BTRFS_I(inode)->generation != generation) { 284 if (BTRFS_I(inode)->generation != generation) {
291 printk(KERN_ERR "btrfs: free space inode generation (%llu) did" 285 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
292 " not match free space cache generation (%llu) for " 286 " not match free space cache generation (%llu)\n",
293 "block group %llu\n",
294 (unsigned long long)BTRFS_I(inode)->generation, 287 (unsigned long long)BTRFS_I(inode)->generation,
295 (unsigned long long)generation, 288 (unsigned long long)generation);
296 (unsigned long long)block_group->key.objectid); 289 goto out;
297 goto free_cache;
298 } 290 }
299 291
300 if (!num_entries) 292 if (!num_entries)
@@ -311,10 +303,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
311 goto out; 303 goto out;
312 304
313 ret = readahead_cache(inode); 305 ret = readahead_cache(inode);
314 if (ret) { 306 if (ret)
315 ret = 0;
316 goto out; 307 goto out;
317 }
318 308
319 while (1) { 309 while (1) {
320 struct btrfs_free_space_entry *entry; 310 struct btrfs_free_space_entry *entry;
@@ -333,10 +323,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
333 } 323 }
334 324
335 page = grab_cache_page(inode->i_mapping, index); 325 page = grab_cache_page(inode->i_mapping, index);
336 if (!page) { 326 if (!page)
337 ret = 0;
338 goto free_cache; 327 goto free_cache;
339 }
340 328
341 if (!PageUptodate(page)) { 329 if (!PageUptodate(page)) {
342 btrfs_readpage(NULL, page); 330 btrfs_readpage(NULL, page);
@@ -345,9 +333,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
345 unlock_page(page); 333 unlock_page(page);
346 page_cache_release(page); 334 page_cache_release(page);
347 printk(KERN_ERR "btrfs: error reading free " 335 printk(KERN_ERR "btrfs: error reading free "
348 "space cache: %llu\n", 336 "space cache\n");
349 (unsigned long long)
350 block_group->key.objectid);
351 goto free_cache; 337 goto free_cache;
352 } 338 }
353 } 339 }
@@ -360,13 +346,10 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
360 gen = addr + (sizeof(u32) * num_checksums); 346 gen = addr + (sizeof(u32) * num_checksums);
361 if (*gen != BTRFS_I(inode)->generation) { 347 if (*gen != BTRFS_I(inode)->generation) {
362 printk(KERN_ERR "btrfs: space cache generation" 348 printk(KERN_ERR "btrfs: space cache generation"
363 " (%llu) does not match inode (%llu) " 349 " (%llu) does not match inode (%llu)\n",
364 "for block group %llu\n",
365 (unsigned long long)*gen, 350 (unsigned long long)*gen,
366 (unsigned long long) 351 (unsigned long long)
367 BTRFS_I(inode)->generation, 352 BTRFS_I(inode)->generation);
368 (unsigned long long)
369 block_group->key.objectid);
370 kunmap(page); 353 kunmap(page);
371 unlock_page(page); 354 unlock_page(page);
372 page_cache_release(page); 355 page_cache_release(page);
@@ -382,9 +365,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
382 PAGE_CACHE_SIZE - start_offset); 365 PAGE_CACHE_SIZE - start_offset);
383 btrfs_csum_final(cur_crc, (char *)&cur_crc); 366 btrfs_csum_final(cur_crc, (char *)&cur_crc);
384 if (cur_crc != *crc) { 367 if (cur_crc != *crc) {
385 printk(KERN_ERR "btrfs: crc mismatch for page %lu in " 368 printk(KERN_ERR "btrfs: crc mismatch for page %lu\n",
386 "block group %llu\n", index, 369 index);
387 (unsigned long long)block_group->key.objectid);
388 kunmap(page); 370 kunmap(page);
389 unlock_page(page); 371 unlock_page(page);
390 page_cache_release(page); 372 page_cache_release(page);
@@ -417,9 +399,9 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
417 } 399 }
418 400
419 if (entry->type == BTRFS_FREE_SPACE_EXTENT) { 401 if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
420 spin_lock(&block_group->tree_lock); 402 spin_lock(&ctl->tree_lock);
421 ret = link_free_space(block_group, e); 403 ret = link_free_space(ctl, e);
422 spin_unlock(&block_group->tree_lock); 404 spin_unlock(&ctl->tree_lock);
423 BUG_ON(ret); 405 BUG_ON(ret);
424 } else { 406 } else {
425 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 407 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
@@ -431,11 +413,11 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
431 page_cache_release(page); 413 page_cache_release(page);
432 goto free_cache; 414 goto free_cache;
433 } 415 }
434 spin_lock(&block_group->tree_lock); 416 spin_lock(&ctl->tree_lock);
435 ret = link_free_space(block_group, e); 417 ret2 = link_free_space(ctl, e);
436 block_group->total_bitmaps++; 418 ctl->total_bitmaps++;
437 recalculate_thresholds(block_group); 419 ctl->op->recalc_thresholds(ctl);
438 spin_unlock(&block_group->tree_lock); 420 spin_unlock(&ctl->tree_lock);
439 list_add_tail(&e->list, &bitmaps); 421 list_add_tail(&e->list, &bitmaps);
440 } 422 }
441 423
@@ -471,41 +453,97 @@ next:
471 index++; 453 index++;
472 } 454 }
473 455
474 spin_lock(&block_group->tree_lock);
475 if (block_group->free_space != (block_group->key.offset - used -
476 block_group->bytes_super)) {
477 spin_unlock(&block_group->tree_lock);
478 printk(KERN_ERR "block group %llu has an wrong amount of free "
479 "space\n", block_group->key.objectid);
480 ret = 0;
481 goto free_cache;
482 }
483 spin_unlock(&block_group->tree_lock);
484
485 ret = 1; 456 ret = 1;
486out: 457out:
487 kfree(checksums); 458 kfree(checksums);
488 kfree(disk_crcs); 459 kfree(disk_crcs);
489 iput(inode);
490 return ret; 460 return ret;
491
492free_cache: 461free_cache:
493 /* This cache is bogus, make sure it gets cleared */ 462 __btrfs_remove_free_space_cache(ctl);
463 goto out;
464}
465
466int load_free_space_cache(struct btrfs_fs_info *fs_info,
467 struct btrfs_block_group_cache *block_group)
468{
469 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
470 struct btrfs_root *root = fs_info->tree_root;
471 struct inode *inode;
472 struct btrfs_path *path;
473 int ret;
474 bool matched;
475 u64 used = btrfs_block_group_used(&block_group->item);
476
477 /*
478 * If we're unmounting then just return, since this does a search on the
479 * normal root and not the commit root and we could deadlock.
480 */
481 smp_mb();
482 if (fs_info->closing)
483 return 0;
484
485 /*
486 * If this block group has been marked to be cleared for one reason or
487 * another then we can't trust the on disk cache, so just return.
488 */
494 spin_lock(&block_group->lock); 489 spin_lock(&block_group->lock);
495 block_group->disk_cache_state = BTRFS_DC_CLEAR; 490 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
491 spin_unlock(&block_group->lock);
492 return 0;
493 }
496 spin_unlock(&block_group->lock); 494 spin_unlock(&block_group->lock);
497 btrfs_remove_free_space_cache(block_group); 495
498 goto out; 496 path = btrfs_alloc_path();
497 if (!path)
498 return 0;
499
500 inode = lookup_free_space_inode(root, block_group, path);
501 if (IS_ERR(inode)) {
502 btrfs_free_path(path);
503 return 0;
504 }
505
506 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
507 path, block_group->key.objectid);
508 btrfs_free_path(path);
509 if (ret <= 0)
510 goto out;
511
512 spin_lock(&ctl->tree_lock);
513 matched = (ctl->free_space == (block_group->key.offset - used -
514 block_group->bytes_super));
515 spin_unlock(&ctl->tree_lock);
516
517 if (!matched) {
518 __btrfs_remove_free_space_cache(ctl);
519 printk(KERN_ERR "block group %llu has an wrong amount of free "
520 "space\n", block_group->key.objectid);
521 ret = -1;
522 }
523out:
524 if (ret < 0) {
525 /* This cache is bogus, make sure it gets cleared */
526 spin_lock(&block_group->lock);
527 block_group->disk_cache_state = BTRFS_DC_CLEAR;
528 spin_unlock(&block_group->lock);
529 ret = 0;
530
531 printk(KERN_ERR "btrfs: failed to load free space cache "
532 "for block group %llu\n", block_group->key.objectid);
533 }
534
535 iput(inode);
536 return ret;
499} 537}
500 538
501int btrfs_write_out_cache(struct btrfs_root *root, 539int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
502 struct btrfs_trans_handle *trans, 540 struct btrfs_free_space_ctl *ctl,
503 struct btrfs_block_group_cache *block_group, 541 struct btrfs_block_group_cache *block_group,
504 struct btrfs_path *path) 542 struct btrfs_trans_handle *trans,
543 struct btrfs_path *path, u64 offset)
505{ 544{
506 struct btrfs_free_space_header *header; 545 struct btrfs_free_space_header *header;
507 struct extent_buffer *leaf; 546 struct extent_buffer *leaf;
508 struct inode *inode;
509 struct rb_node *node; 547 struct rb_node *node;
510 struct list_head *pos, *n; 548 struct list_head *pos, *n;
511 struct page **pages; 549 struct page **pages;
@@ -522,35 +560,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
522 int index = 0, num_pages = 0; 560 int index = 0, num_pages = 0;
523 int entries = 0; 561 int entries = 0;
524 int bitmaps = 0; 562 int bitmaps = 0;
525 int ret = 0; 563 int ret = -1;
526 bool next_page = false; 564 bool next_page = false;
527 bool out_of_space = false; 565 bool out_of_space = false;
528 566
529 root = root->fs_info->tree_root;
530
531 INIT_LIST_HEAD(&bitmap_list); 567 INIT_LIST_HEAD(&bitmap_list);
532 568
533 spin_lock(&block_group->lock); 569 node = rb_first(&ctl->free_space_offset);
534 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 570 if (!node)
535 spin_unlock(&block_group->lock);
536 return 0;
537 }
538 spin_unlock(&block_group->lock);
539
540 inode = lookup_free_space_inode(root, block_group, path);
541 if (IS_ERR(inode))
542 return 0;
543
544 if (!i_size_read(inode)) {
545 iput(inode);
546 return 0; 571 return 0;
547 }
548 572
549 node = rb_first(&block_group->free_space_offset); 573 if (!i_size_read(inode))
550 if (!node) { 574 return -1;
551 iput(inode);
552 return 0;
553 }
554 575
555 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 576 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
556 PAGE_CACHE_SHIFT; 577 PAGE_CACHE_SHIFT;
@@ -560,16 +581,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
560 581
561 /* We need a checksum per page. */ 582 /* We need a checksum per page. */
562 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); 583 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
563 if (!crc) { 584 if (!crc)
564 iput(inode); 585 return -1;
565 return 0;
566 }
567 586
568 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); 587 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
569 if (!pages) { 588 if (!pages) {
570 kfree(crc); 589 kfree(crc);
571 iput(inode); 590 return -1;
572 return 0;
573 } 591 }
574 592
575 /* Since the first page has all of our checksums and our generation we 593 /* Since the first page has all of our checksums and our generation we
@@ -579,7 +597,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
579 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); 597 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
580 598
581 /* Get the cluster for this block_group if it exists */ 599 /* Get the cluster for this block_group if it exists */
582 if (!list_empty(&block_group->cluster_list)) 600 if (block_group && !list_empty(&block_group->cluster_list))
583 cluster = list_entry(block_group->cluster_list.next, 601 cluster = list_entry(block_group->cluster_list.next,
584 struct btrfs_free_cluster, 602 struct btrfs_free_cluster,
585 block_group_list); 603 block_group_list);
@@ -621,7 +639,8 @@ int btrfs_write_out_cache(struct btrfs_root *root,
621 * When searching for pinned extents, we need to start at our start 639 * When searching for pinned extents, we need to start at our start
622 * offset. 640 * offset.
623 */ 641 */
624 start = block_group->key.objectid; 642 if (block_group)
643 start = block_group->key.objectid;
625 644
626 /* Write out the extent entries */ 645 /* Write out the extent entries */
627 do { 646 do {
@@ -679,8 +698,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
679 * We want to add any pinned extents to our free space cache 698 * We want to add any pinned extents to our free space cache
680 * so we don't leak the space 699 * so we don't leak the space
681 */ 700 */
682 while (!next_page && (start < block_group->key.objectid + 701 while (block_group && !next_page &&
683 block_group->key.offset)) { 702 (start < block_group->key.objectid +
703 block_group->key.offset)) {
684 ret = find_first_extent_bit(unpin, start, &start, &end, 704 ret = find_first_extent_bit(unpin, start, &start, &end,
685 EXTENT_DIRTY); 705 EXTENT_DIRTY);
686 if (ret) { 706 if (ret) {
@@ -798,12 +818,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
798 filemap_write_and_wait(inode->i_mapping); 818 filemap_write_and_wait(inode->i_mapping);
799 819
800 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 820 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
801 key.offset = block_group->key.objectid; 821 key.offset = offset;
802 key.type = 0; 822 key.type = 0;
803 823
804 ret = btrfs_search_slot(trans, root, &key, path, 1, 1); 824 ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
805 if (ret < 0) { 825 if (ret < 0) {
806 ret = 0; 826 ret = -1;
807 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 827 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
808 EXTENT_DIRTY | EXTENT_DELALLOC | 828 EXTENT_DIRTY | EXTENT_DELALLOC |
809 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); 829 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
@@ -816,13 +836,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
816 path->slots[0]--; 836 path->slots[0]--;
817 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 837 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
818 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 838 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
819 found_key.offset != block_group->key.objectid) { 839 found_key.offset != offset) {
820 ret = 0; 840 ret = -1;
821 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 841 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
822 EXTENT_DIRTY | EXTENT_DELALLOC | 842 EXTENT_DIRTY | EXTENT_DELALLOC |
823 EXTENT_DO_ACCOUNTING, 0, 0, NULL, 843 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
824 GFP_NOFS); 844 GFP_NOFS);
825 btrfs_release_path(root, path); 845 btrfs_release_path(path);
826 goto out_free; 846 goto out_free;
827 } 847 }
828 } 848 }
@@ -832,49 +852,83 @@ int btrfs_write_out_cache(struct btrfs_root *root,
832 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 852 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
833 btrfs_set_free_space_generation(leaf, header, trans->transid); 853 btrfs_set_free_space_generation(leaf, header, trans->transid);
834 btrfs_mark_buffer_dirty(leaf); 854 btrfs_mark_buffer_dirty(leaf);
835 btrfs_release_path(root, path); 855 btrfs_release_path(path);
836 856
837 ret = 1; 857 ret = 1;
838 858
839out_free: 859out_free:
840 if (ret == 0) { 860 if (ret != 1) {
841 invalidate_inode_pages2_range(inode->i_mapping, 0, index); 861 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
842 spin_lock(&block_group->lock);
843 block_group->disk_cache_state = BTRFS_DC_ERROR;
844 spin_unlock(&block_group->lock);
845 BTRFS_I(inode)->generation = 0; 862 BTRFS_I(inode)->generation = 0;
846 } 863 }
847 kfree(checksums); 864 kfree(checksums);
848 kfree(pages); 865 kfree(pages);
849 btrfs_update_inode(trans, root, inode); 866 btrfs_update_inode(trans, root, inode);
867 return ret;
868}
869
870int btrfs_write_out_cache(struct btrfs_root *root,
871 struct btrfs_trans_handle *trans,
872 struct btrfs_block_group_cache *block_group,
873 struct btrfs_path *path)
874{
875 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
876 struct inode *inode;
877 int ret = 0;
878
879 root = root->fs_info->tree_root;
880
881 spin_lock(&block_group->lock);
882 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
883 spin_unlock(&block_group->lock);
884 return 0;
885 }
886 spin_unlock(&block_group->lock);
887
888 inode = lookup_free_space_inode(root, block_group, path);
889 if (IS_ERR(inode))
890 return 0;
891
892 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
893 path, block_group->key.objectid);
894 if (ret < 0) {
895 spin_lock(&block_group->lock);
896 block_group->disk_cache_state = BTRFS_DC_ERROR;
897 spin_unlock(&block_group->lock);
898 ret = 0;
899
900 printk(KERN_ERR "btrfs: failed to write free space cace "
901 "for block group %llu\n", block_group->key.objectid);
902 }
903
850 iput(inode); 904 iput(inode);
851 return ret; 905 return ret;
852} 906}
853 907
854static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 908static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
855 u64 offset) 909 u64 offset)
856{ 910{
857 BUG_ON(offset < bitmap_start); 911 BUG_ON(offset < bitmap_start);
858 offset -= bitmap_start; 912 offset -= bitmap_start;
859 return (unsigned long)(div64_u64(offset, sectorsize)); 913 return (unsigned long)(div_u64(offset, unit));
860} 914}
861 915
862static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize) 916static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
863{ 917{
864 return (unsigned long)(div64_u64(bytes, sectorsize)); 918 return (unsigned long)(div_u64(bytes, unit));
865} 919}
866 920
867static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group, 921static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
868 u64 offset) 922 u64 offset)
869{ 923{
870 u64 bitmap_start; 924 u64 bitmap_start;
871 u64 bytes_per_bitmap; 925 u64 bytes_per_bitmap;
872 926
873 bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize; 927 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
874 bitmap_start = offset - block_group->key.objectid; 928 bitmap_start = offset - ctl->start;
875 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 929 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
876 bitmap_start *= bytes_per_bitmap; 930 bitmap_start *= bytes_per_bitmap;
877 bitmap_start += block_group->key.objectid; 931 bitmap_start += ctl->start;
878 932
879 return bitmap_start; 933 return bitmap_start;
880} 934}
@@ -932,10 +986,10 @@ static int tree_insert_offset(struct rb_root *root, u64 offset,
932 * offset. 986 * offset.
933 */ 987 */
934static struct btrfs_free_space * 988static struct btrfs_free_space *
935tree_search_offset(struct btrfs_block_group_cache *block_group, 989tree_search_offset(struct btrfs_free_space_ctl *ctl,
936 u64 offset, int bitmap_only, int fuzzy) 990 u64 offset, int bitmap_only, int fuzzy)
937{ 991{
938 struct rb_node *n = block_group->free_space_offset.rb_node; 992 struct rb_node *n = ctl->free_space_offset.rb_node;
939 struct btrfs_free_space *entry, *prev = NULL; 993 struct btrfs_free_space *entry, *prev = NULL;
940 994
941 /* find entry that is closest to the 'offset' */ 995 /* find entry that is closest to the 'offset' */
@@ -1031,8 +1085,7 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
1031 break; 1085 break;
1032 } 1086 }
1033 } 1087 }
1034 if (entry->offset + BITS_PER_BITMAP * 1088 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1035 block_group->sectorsize > offset)
1036 return entry; 1089 return entry;
1037 } else if (entry->offset + entry->bytes > offset) 1090 } else if (entry->offset + entry->bytes > offset)
1038 return entry; 1091 return entry;
@@ -1043,7 +1096,7 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
1043 while (1) { 1096 while (1) {
1044 if (entry->bitmap) { 1097 if (entry->bitmap) {
1045 if (entry->offset + BITS_PER_BITMAP * 1098 if (entry->offset + BITS_PER_BITMAP *
1046 block_group->sectorsize > offset) 1099 ctl->unit > offset)
1047 break; 1100 break;
1048 } else { 1101 } else {
1049 if (entry->offset + entry->bytes > offset) 1102 if (entry->offset + entry->bytes > offset)
@@ -1059,42 +1112,47 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
1059} 1112}
1060 1113
1061static inline void 1114static inline void
1062__unlink_free_space(struct btrfs_block_group_cache *block_group, 1115__unlink_free_space(struct btrfs_free_space_ctl *ctl,
1063 struct btrfs_free_space *info) 1116 struct btrfs_free_space *info)
1064{ 1117{
1065 rb_erase(&info->offset_index, &block_group->free_space_offset); 1118 rb_erase(&info->offset_index, &ctl->free_space_offset);
1066 block_group->free_extents--; 1119 ctl->free_extents--;
1067} 1120}
1068 1121
1069static void unlink_free_space(struct btrfs_block_group_cache *block_group, 1122static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1070 struct btrfs_free_space *info) 1123 struct btrfs_free_space *info)
1071{ 1124{
1072 __unlink_free_space(block_group, info); 1125 __unlink_free_space(ctl, info);
1073 block_group->free_space -= info->bytes; 1126 ctl->free_space -= info->bytes;
1074} 1127}
1075 1128
1076static int link_free_space(struct btrfs_block_group_cache *block_group, 1129static int link_free_space(struct btrfs_free_space_ctl *ctl,
1077 struct btrfs_free_space *info) 1130 struct btrfs_free_space *info)
1078{ 1131{
1079 int ret = 0; 1132 int ret = 0;
1080 1133
1081 BUG_ON(!info->bitmap && !info->bytes); 1134 BUG_ON(!info->bitmap && !info->bytes);
1082 ret = tree_insert_offset(&block_group->free_space_offset, info->offset, 1135 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1083 &info->offset_index, (info->bitmap != NULL)); 1136 &info->offset_index, (info->bitmap != NULL));
1084 if (ret) 1137 if (ret)
1085 return ret; 1138 return ret;
1086 1139
1087 block_group->free_space += info->bytes; 1140 ctl->free_space += info->bytes;
1088 block_group->free_extents++; 1141 ctl->free_extents++;
1089 return ret; 1142 return ret;
1090} 1143}
1091 1144
1092static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) 1145static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1093{ 1146{
1147 struct btrfs_block_group_cache *block_group = ctl->private;
1094 u64 max_bytes; 1148 u64 max_bytes;
1095 u64 bitmap_bytes; 1149 u64 bitmap_bytes;
1096 u64 extent_bytes; 1150 u64 extent_bytes;
1097 u64 size = block_group->key.offset; 1151 u64 size = block_group->key.offset;
1152 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1153 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1154
1155 BUG_ON(ctl->total_bitmaps > max_bitmaps);
1098 1156
1099 /* 1157 /*
1100 * The goal is to keep the total amount of memory used per 1gb of space 1158 * The goal is to keep the total amount of memory used per 1gb of space
@@ -1112,10 +1170,10 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
1112 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1170 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1113 * we add more bitmaps. 1171 * we add more bitmaps.
1114 */ 1172 */
1115 bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; 1173 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1116 1174
1117 if (bitmap_bytes >= max_bytes) { 1175 if (bitmap_bytes >= max_bytes) {
1118 block_group->extents_thresh = 0; 1176 ctl->extents_thresh = 0;
1119 return; 1177 return;
1120 } 1178 }
1121 1179
@@ -1126,47 +1184,43 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
1126 extent_bytes = max_bytes - bitmap_bytes; 1184 extent_bytes = max_bytes - bitmap_bytes;
1127 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 1185 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1128 1186
1129 block_group->extents_thresh = 1187 ctl->extents_thresh =
1130 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1188 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1131} 1189}
1132 1190
1133static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, 1191static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1134 struct btrfs_free_space *info, u64 offset, 1192 struct btrfs_free_space *info, u64 offset,
1135 u64 bytes) 1193 u64 bytes)
1136{ 1194{
1137 unsigned long start, end; 1195 unsigned long start, count;
1138 unsigned long i;
1139 1196
1140 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 1197 start = offset_to_bit(info->offset, ctl->unit, offset);
1141 end = start + bytes_to_bits(bytes, block_group->sectorsize); 1198 count = bytes_to_bits(bytes, ctl->unit);
1142 BUG_ON(end > BITS_PER_BITMAP); 1199 BUG_ON(start + count > BITS_PER_BITMAP);
1143 1200
1144 for (i = start; i < end; i++) 1201 bitmap_clear(info->bitmap, start, count);
1145 clear_bit(i, info->bitmap);
1146 1202
1147 info->bytes -= bytes; 1203 info->bytes -= bytes;
1148 block_group->free_space -= bytes; 1204 ctl->free_space -= bytes;
1149} 1205}
1150 1206
1151static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, 1207static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1152 struct btrfs_free_space *info, u64 offset, 1208 struct btrfs_free_space *info, u64 offset,
1153 u64 bytes) 1209 u64 bytes)
1154{ 1210{
1155 unsigned long start, end; 1211 unsigned long start, count;
1156 unsigned long i;
1157 1212
1158 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 1213 start = offset_to_bit(info->offset, ctl->unit, offset);
1159 end = start + bytes_to_bits(bytes, block_group->sectorsize); 1214 count = bytes_to_bits(bytes, ctl->unit);
1160 BUG_ON(end > BITS_PER_BITMAP); 1215 BUG_ON(start + count > BITS_PER_BITMAP);
1161 1216
1162 for (i = start; i < end; i++) 1217 bitmap_set(info->bitmap, start, count);
1163 set_bit(i, info->bitmap);
1164 1218
1165 info->bytes += bytes; 1219 info->bytes += bytes;
1166 block_group->free_space += bytes; 1220 ctl->free_space += bytes;
1167} 1221}
1168 1222
1169static int search_bitmap(struct btrfs_block_group_cache *block_group, 1223static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1170 struct btrfs_free_space *bitmap_info, u64 *offset, 1224 struct btrfs_free_space *bitmap_info, u64 *offset,
1171 u64 *bytes) 1225 u64 *bytes)
1172{ 1226{
@@ -1174,9 +1228,9 @@ static int search_bitmap(struct btrfs_block_group_cache *block_group,
1174 unsigned long bits, i; 1228 unsigned long bits, i;
1175 unsigned long next_zero; 1229 unsigned long next_zero;
1176 1230
1177 i = offset_to_bit(bitmap_info->offset, block_group->sectorsize, 1231 i = offset_to_bit(bitmap_info->offset, ctl->unit,
1178 max_t(u64, *offset, bitmap_info->offset)); 1232 max_t(u64, *offset, bitmap_info->offset));
1179 bits = bytes_to_bits(*bytes, block_group->sectorsize); 1233 bits = bytes_to_bits(*bytes, ctl->unit);
1180 1234
1181 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 1235 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1182 i < BITS_PER_BITMAP; 1236 i < BITS_PER_BITMAP;
@@ -1191,29 +1245,25 @@ static int search_bitmap(struct btrfs_block_group_cache *block_group,
1191 } 1245 }
1192 1246
1193 if (found_bits) { 1247 if (found_bits) {
1194 *offset = (u64)(i * block_group->sectorsize) + 1248 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1195 bitmap_info->offset; 1249 *bytes = (u64)(found_bits) * ctl->unit;
1196 *bytes = (u64)(found_bits) * block_group->sectorsize;
1197 return 0; 1250 return 0;
1198 } 1251 }
1199 1252
1200 return -1; 1253 return -1;
1201} 1254}
1202 1255
1203static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache 1256static struct btrfs_free_space *
1204 *block_group, u64 *offset, 1257find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1205 u64 *bytes, int debug)
1206{ 1258{
1207 struct btrfs_free_space *entry; 1259 struct btrfs_free_space *entry;
1208 struct rb_node *node; 1260 struct rb_node *node;
1209 int ret; 1261 int ret;
1210 1262
1211 if (!block_group->free_space_offset.rb_node) 1263 if (!ctl->free_space_offset.rb_node)
1212 return NULL; 1264 return NULL;
1213 1265
1214 entry = tree_search_offset(block_group, 1266 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1215 offset_to_bitmap(block_group, *offset),
1216 0, 1);
1217 if (!entry) 1267 if (!entry)
1218 return NULL; 1268 return NULL;
1219 1269
@@ -1223,7 +1273,7 @@ static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
1223 continue; 1273 continue;
1224 1274
1225 if (entry->bitmap) { 1275 if (entry->bitmap) {
1226 ret = search_bitmap(block_group, entry, offset, bytes); 1276 ret = search_bitmap(ctl, entry, offset, bytes);
1227 if (!ret) 1277 if (!ret)
1228 return entry; 1278 return entry;
1229 continue; 1279 continue;
@@ -1237,33 +1287,28 @@ static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
1237 return NULL; 1287 return NULL;
1238} 1288}
1239 1289
1240static void add_new_bitmap(struct btrfs_block_group_cache *block_group, 1290static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1241 struct btrfs_free_space *info, u64 offset) 1291 struct btrfs_free_space *info, u64 offset)
1242{ 1292{
1243 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 1293 info->offset = offset_to_bitmap(ctl, offset);
1244 int max_bitmaps = (int)div64_u64(block_group->key.offset +
1245 bytes_per_bg - 1, bytes_per_bg);
1246 BUG_ON(block_group->total_bitmaps >= max_bitmaps);
1247
1248 info->offset = offset_to_bitmap(block_group, offset);
1249 info->bytes = 0; 1294 info->bytes = 0;
1250 link_free_space(block_group, info); 1295 link_free_space(ctl, info);
1251 block_group->total_bitmaps++; 1296 ctl->total_bitmaps++;
1252 1297
1253 recalculate_thresholds(block_group); 1298 ctl->op->recalc_thresholds(ctl);
1254} 1299}
1255 1300
1256static void free_bitmap(struct btrfs_block_group_cache *block_group, 1301static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1257 struct btrfs_free_space *bitmap_info) 1302 struct btrfs_free_space *bitmap_info)
1258{ 1303{
1259 unlink_free_space(block_group, bitmap_info); 1304 unlink_free_space(ctl, bitmap_info);
1260 kfree(bitmap_info->bitmap); 1305 kfree(bitmap_info->bitmap);
1261 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1306 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1262 block_group->total_bitmaps--; 1307 ctl->total_bitmaps--;
1263 recalculate_thresholds(block_group); 1308 ctl->op->recalc_thresholds(ctl);
1264} 1309}
1265 1310
1266static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, 1311static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1267 struct btrfs_free_space *bitmap_info, 1312 struct btrfs_free_space *bitmap_info,
1268 u64 *offset, u64 *bytes) 1313 u64 *offset, u64 *bytes)
1269{ 1314{
@@ -1272,8 +1317,7 @@ static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_gro
1272 int ret; 1317 int ret;
1273 1318
1274again: 1319again:
1275 end = bitmap_info->offset + 1320 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1276 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
1277 1321
1278 /* 1322 /*
1279 * XXX - this can go away after a few releases. 1323 * XXX - this can go away after a few releases.
@@ -1288,24 +1332,22 @@ again:
1288 search_start = *offset; 1332 search_start = *offset;
1289 search_bytes = *bytes; 1333 search_bytes = *bytes;
1290 search_bytes = min(search_bytes, end - search_start + 1); 1334 search_bytes = min(search_bytes, end - search_start + 1);
1291 ret = search_bitmap(block_group, bitmap_info, &search_start, 1335 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1292 &search_bytes);
1293 BUG_ON(ret < 0 || search_start != *offset); 1336 BUG_ON(ret < 0 || search_start != *offset);
1294 1337
1295 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1338 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1296 bitmap_clear_bits(block_group, bitmap_info, *offset, 1339 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1297 end - *offset + 1);
1298 *bytes -= end - *offset + 1; 1340 *bytes -= end - *offset + 1;
1299 *offset = end + 1; 1341 *offset = end + 1;
1300 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1342 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1301 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); 1343 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1302 *bytes = 0; 1344 *bytes = 0;
1303 } 1345 }
1304 1346
1305 if (*bytes) { 1347 if (*bytes) {
1306 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1348 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1307 if (!bitmap_info->bytes) 1349 if (!bitmap_info->bytes)
1308 free_bitmap(block_group, bitmap_info); 1350 free_bitmap(ctl, bitmap_info);
1309 1351
1310 /* 1352 /*
1311 * no entry after this bitmap, but we still have bytes to 1353 * no entry after this bitmap, but we still have bytes to
@@ -1332,31 +1374,28 @@ again:
1332 */ 1374 */
1333 search_start = *offset; 1375 search_start = *offset;
1334 search_bytes = *bytes; 1376 search_bytes = *bytes;
1335 ret = search_bitmap(block_group, bitmap_info, &search_start, 1377 ret = search_bitmap(ctl, bitmap_info, &search_start,
1336 &search_bytes); 1378 &search_bytes);
1337 if (ret < 0 || search_start != *offset) 1379 if (ret < 0 || search_start != *offset)
1338 return -EAGAIN; 1380 return -EAGAIN;
1339 1381
1340 goto again; 1382 goto again;
1341 } else if (!bitmap_info->bytes) 1383 } else if (!bitmap_info->bytes)
1342 free_bitmap(block_group, bitmap_info); 1384 free_bitmap(ctl, bitmap_info);
1343 1385
1344 return 0; 1386 return 0;
1345} 1387}
1346 1388
1347static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, 1389static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1348 struct btrfs_free_space *info) 1390 struct btrfs_free_space *info)
1349{ 1391{
1350 struct btrfs_free_space *bitmap_info; 1392 struct btrfs_block_group_cache *block_group = ctl->private;
1351 int added = 0;
1352 u64 bytes, offset, end;
1353 int ret;
1354 1393
1355 /* 1394 /*
1356 * If we are below the extents threshold then we can add this as an 1395 * If we are below the extents threshold then we can add this as an
1357 * extent, and don't have to deal with the bitmap 1396 * extent, and don't have to deal with the bitmap
1358 */ 1397 */
1359 if (block_group->free_extents < block_group->extents_thresh) { 1398 if (ctl->free_extents < ctl->extents_thresh) {
1360 /* 1399 /*
1361 * If this block group has some small extents we don't want to 1400 * If this block group has some small extents we don't want to
1362 * use up all of our free slots in the cache with them, we want 1401 * use up all of our free slots in the cache with them, we want
@@ -1365,11 +1404,10 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
1365 * the overhead of a bitmap if we don't have to. 1404 * the overhead of a bitmap if we don't have to.
1366 */ 1405 */
1367 if (info->bytes <= block_group->sectorsize * 4) { 1406 if (info->bytes <= block_group->sectorsize * 4) {
1368 if (block_group->free_extents * 2 <= 1407 if (ctl->free_extents * 2 <= ctl->extents_thresh)
1369 block_group->extents_thresh) 1408 return false;
1370 return 0;
1371 } else { 1409 } else {
1372 return 0; 1410 return false;
1373 } 1411 }
1374 } 1412 }
1375 1413
@@ -1379,31 +1417,42 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
1379 */ 1417 */
1380 if (BITS_PER_BITMAP * block_group->sectorsize > 1418 if (BITS_PER_BITMAP * block_group->sectorsize >
1381 block_group->key.offset) 1419 block_group->key.offset)
1382 return 0; 1420 return false;
1421
1422 return true;
1423}
1424
1425static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1426 struct btrfs_free_space *info)
1427{
1428 struct btrfs_free_space *bitmap_info;
1429 int added = 0;
1430 u64 bytes, offset, end;
1431 int ret;
1383 1432
1384 bytes = info->bytes; 1433 bytes = info->bytes;
1385 offset = info->offset; 1434 offset = info->offset;
1386 1435
1436 if (!ctl->op->use_bitmap(ctl, info))
1437 return 0;
1438
1387again: 1439again:
1388 bitmap_info = tree_search_offset(block_group, 1440 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1389 offset_to_bitmap(block_group, offset),
1390 1, 0); 1441 1, 0);
1391 if (!bitmap_info) { 1442 if (!bitmap_info) {
1392 BUG_ON(added); 1443 BUG_ON(added);
1393 goto new_bitmap; 1444 goto new_bitmap;
1394 } 1445 }
1395 1446
1396 end = bitmap_info->offset + 1447 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1397 (u64)(BITS_PER_BITMAP * block_group->sectorsize);
1398 1448
1399 if (offset >= bitmap_info->offset && offset + bytes > end) { 1449 if (offset >= bitmap_info->offset && offset + bytes > end) {
1400 bitmap_set_bits(block_group, bitmap_info, offset, 1450 bitmap_set_bits(ctl, bitmap_info, offset, end - offset);
1401 end - offset);
1402 bytes -= end - offset; 1451 bytes -= end - offset;
1403 offset = end; 1452 offset = end;
1404 added = 0; 1453 added = 0;
1405 } else if (offset >= bitmap_info->offset && offset + bytes <= end) { 1454 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
1406 bitmap_set_bits(block_group, bitmap_info, offset, bytes); 1455 bitmap_set_bits(ctl, bitmap_info, offset, bytes);
1407 bytes = 0; 1456 bytes = 0;
1408 } else { 1457 } else {
1409 BUG(); 1458 BUG();
@@ -1417,19 +1466,19 @@ again:
1417 1466
1418new_bitmap: 1467new_bitmap:
1419 if (info && info->bitmap) { 1468 if (info && info->bitmap) {
1420 add_new_bitmap(block_group, info, offset); 1469 add_new_bitmap(ctl, info, offset);
1421 added = 1; 1470 added = 1;
1422 info = NULL; 1471 info = NULL;
1423 goto again; 1472 goto again;
1424 } else { 1473 } else {
1425 spin_unlock(&block_group->tree_lock); 1474 spin_unlock(&ctl->tree_lock);
1426 1475
1427 /* no pre-allocated info, allocate a new one */ 1476 /* no pre-allocated info, allocate a new one */
1428 if (!info) { 1477 if (!info) {
1429 info = kmem_cache_zalloc(btrfs_free_space_cachep, 1478 info = kmem_cache_zalloc(btrfs_free_space_cachep,
1430 GFP_NOFS); 1479 GFP_NOFS);
1431 if (!info) { 1480 if (!info) {
1432 spin_lock(&block_group->tree_lock); 1481 spin_lock(&ctl->tree_lock);
1433 ret = -ENOMEM; 1482 ret = -ENOMEM;
1434 goto out; 1483 goto out;
1435 } 1484 }
@@ -1437,7 +1486,7 @@ new_bitmap:
1437 1486
1438 /* allocate the bitmap */ 1487 /* allocate the bitmap */
1439 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 1488 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1440 spin_lock(&block_group->tree_lock); 1489 spin_lock(&ctl->tree_lock);
1441 if (!info->bitmap) { 1490 if (!info->bitmap) {
1442 ret = -ENOMEM; 1491 ret = -ENOMEM;
1443 goto out; 1492 goto out;
@@ -1455,7 +1504,7 @@ out:
1455 return ret; 1504 return ret;
1456} 1505}
1457 1506
1458bool try_merge_free_space(struct btrfs_block_group_cache *block_group, 1507static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1459 struct btrfs_free_space *info, bool update_stat) 1508 struct btrfs_free_space *info, bool update_stat)
1460{ 1509{
1461 struct btrfs_free_space *left_info; 1510 struct btrfs_free_space *left_info;
@@ -1469,18 +1518,18 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1469 * are adding, if there is remove that struct and add a new one to 1518 * are adding, if there is remove that struct and add a new one to
1470 * cover the entire range 1519 * cover the entire range
1471 */ 1520 */
1472 right_info = tree_search_offset(block_group, offset + bytes, 0, 0); 1521 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1473 if (right_info && rb_prev(&right_info->offset_index)) 1522 if (right_info && rb_prev(&right_info->offset_index))
1474 left_info = rb_entry(rb_prev(&right_info->offset_index), 1523 left_info = rb_entry(rb_prev(&right_info->offset_index),
1475 struct btrfs_free_space, offset_index); 1524 struct btrfs_free_space, offset_index);
1476 else 1525 else
1477 left_info = tree_search_offset(block_group, offset - 1, 0, 0); 1526 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1478 1527
1479 if (right_info && !right_info->bitmap) { 1528 if (right_info && !right_info->bitmap) {
1480 if (update_stat) 1529 if (update_stat)
1481 unlink_free_space(block_group, right_info); 1530 unlink_free_space(ctl, right_info);
1482 else 1531 else
1483 __unlink_free_space(block_group, right_info); 1532 __unlink_free_space(ctl, right_info);
1484 info->bytes += right_info->bytes; 1533 info->bytes += right_info->bytes;
1485 kmem_cache_free(btrfs_free_space_cachep, right_info); 1534 kmem_cache_free(btrfs_free_space_cachep, right_info);
1486 merged = true; 1535 merged = true;
@@ -1489,9 +1538,9 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1489 if (left_info && !left_info->bitmap && 1538 if (left_info && !left_info->bitmap &&
1490 left_info->offset + left_info->bytes == offset) { 1539 left_info->offset + left_info->bytes == offset) {
1491 if (update_stat) 1540 if (update_stat)
1492 unlink_free_space(block_group, left_info); 1541 unlink_free_space(ctl, left_info);
1493 else 1542 else
1494 __unlink_free_space(block_group, left_info); 1543 __unlink_free_space(ctl, left_info);
1495 info->offset = left_info->offset; 1544 info->offset = left_info->offset;
1496 info->bytes += left_info->bytes; 1545 info->bytes += left_info->bytes;
1497 kmem_cache_free(btrfs_free_space_cachep, left_info); 1546 kmem_cache_free(btrfs_free_space_cachep, left_info);
@@ -1501,8 +1550,8 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1501 return merged; 1550 return merged;
1502} 1551}
1503 1552
1504int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 1553int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1505 u64 offset, u64 bytes) 1554 u64 offset, u64 bytes)
1506{ 1555{
1507 struct btrfs_free_space *info; 1556 struct btrfs_free_space *info;
1508 int ret = 0; 1557 int ret = 0;
@@ -1514,9 +1563,9 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1514 info->offset = offset; 1563 info->offset = offset;
1515 info->bytes = bytes; 1564 info->bytes = bytes;
1516 1565
1517 spin_lock(&block_group->tree_lock); 1566 spin_lock(&ctl->tree_lock);
1518 1567
1519 if (try_merge_free_space(block_group, info, true)) 1568 if (try_merge_free_space(ctl, info, true))
1520 goto link; 1569 goto link;
1521 1570
1522 /* 1571 /*
@@ -1524,7 +1573,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1524 * extent then we know we're going to have to allocate a new extent, so 1573 * extent then we know we're going to have to allocate a new extent, so
1525 * before we do that see if we need to drop this into a bitmap 1574 * before we do that see if we need to drop this into a bitmap
1526 */ 1575 */
1527 ret = insert_into_bitmap(block_group, info); 1576 ret = insert_into_bitmap(ctl, info);
1528 if (ret < 0) { 1577 if (ret < 0) {
1529 goto out; 1578 goto out;
1530 } else if (ret) { 1579 } else if (ret) {
@@ -1532,11 +1581,11 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1532 goto out; 1581 goto out;
1533 } 1582 }
1534link: 1583link:
1535 ret = link_free_space(block_group, info); 1584 ret = link_free_space(ctl, info);
1536 if (ret) 1585 if (ret)
1537 kmem_cache_free(btrfs_free_space_cachep, info); 1586 kmem_cache_free(btrfs_free_space_cachep, info);
1538out: 1587out:
1539 spin_unlock(&block_group->tree_lock); 1588 spin_unlock(&ctl->tree_lock);
1540 1589
1541 if (ret) { 1590 if (ret) {
1542 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1591 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
@@ -1549,21 +1598,21 @@ out:
1549int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 1598int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1550 u64 offset, u64 bytes) 1599 u64 offset, u64 bytes)
1551{ 1600{
1601 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1552 struct btrfs_free_space *info; 1602 struct btrfs_free_space *info;
1553 struct btrfs_free_space *next_info = NULL; 1603 struct btrfs_free_space *next_info = NULL;
1554 int ret = 0; 1604 int ret = 0;
1555 1605
1556 spin_lock(&block_group->tree_lock); 1606 spin_lock(&ctl->tree_lock);
1557 1607
1558again: 1608again:
1559 info = tree_search_offset(block_group, offset, 0, 0); 1609 info = tree_search_offset(ctl, offset, 0, 0);
1560 if (!info) { 1610 if (!info) {
1561 /* 1611 /*
1562 * oops didn't find an extent that matched the space we wanted 1612 * oops didn't find an extent that matched the space we wanted
1563 * to remove, look for a bitmap instead 1613 * to remove, look for a bitmap instead
1564 */ 1614 */
1565 info = tree_search_offset(block_group, 1615 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1566 offset_to_bitmap(block_group, offset),
1567 1, 0); 1616 1, 0);
1568 if (!info) { 1617 if (!info) {
1569 WARN_ON(1); 1618 WARN_ON(1);
@@ -1578,8 +1627,8 @@ again:
1578 offset_index); 1627 offset_index);
1579 1628
1580 if (next_info->bitmap) 1629 if (next_info->bitmap)
1581 end = next_info->offset + BITS_PER_BITMAP * 1630 end = next_info->offset +
1582 block_group->sectorsize - 1; 1631 BITS_PER_BITMAP * ctl->unit - 1;
1583 else 1632 else
1584 end = next_info->offset + next_info->bytes; 1633 end = next_info->offset + next_info->bytes;
1585 1634
@@ -1599,20 +1648,20 @@ again:
1599 } 1648 }
1600 1649
1601 if (info->bytes == bytes) { 1650 if (info->bytes == bytes) {
1602 unlink_free_space(block_group, info); 1651 unlink_free_space(ctl, info);
1603 if (info->bitmap) { 1652 if (info->bitmap) {
1604 kfree(info->bitmap); 1653 kfree(info->bitmap);
1605 block_group->total_bitmaps--; 1654 ctl->total_bitmaps--;
1606 } 1655 }
1607 kmem_cache_free(btrfs_free_space_cachep, info); 1656 kmem_cache_free(btrfs_free_space_cachep, info);
1608 goto out_lock; 1657 goto out_lock;
1609 } 1658 }
1610 1659
1611 if (!info->bitmap && info->offset == offset) { 1660 if (!info->bitmap && info->offset == offset) {
1612 unlink_free_space(block_group, info); 1661 unlink_free_space(ctl, info);
1613 info->offset += bytes; 1662 info->offset += bytes;
1614 info->bytes -= bytes; 1663 info->bytes -= bytes;
1615 link_free_space(block_group, info); 1664 link_free_space(ctl, info);
1616 goto out_lock; 1665 goto out_lock;
1617 } 1666 }
1618 1667
@@ -1626,13 +1675,13 @@ again:
1626 * first unlink the old info and then 1675 * first unlink the old info and then
1627 * insert it again after the hole we're creating 1676 * insert it again after the hole we're creating
1628 */ 1677 */
1629 unlink_free_space(block_group, info); 1678 unlink_free_space(ctl, info);
1630 if (offset + bytes < info->offset + info->bytes) { 1679 if (offset + bytes < info->offset + info->bytes) {
1631 u64 old_end = info->offset + info->bytes; 1680 u64 old_end = info->offset + info->bytes;
1632 1681
1633 info->offset = offset + bytes; 1682 info->offset = offset + bytes;
1634 info->bytes = old_end - info->offset; 1683 info->bytes = old_end - info->offset;
1635 ret = link_free_space(block_group, info); 1684 ret = link_free_space(ctl, info);
1636 WARN_ON(ret); 1685 WARN_ON(ret);
1637 if (ret) 1686 if (ret)
1638 goto out_lock; 1687 goto out_lock;
@@ -1642,7 +1691,7 @@ again:
1642 */ 1691 */
1643 kmem_cache_free(btrfs_free_space_cachep, info); 1692 kmem_cache_free(btrfs_free_space_cachep, info);
1644 } 1693 }
1645 spin_unlock(&block_group->tree_lock); 1694 spin_unlock(&ctl->tree_lock);
1646 1695
1647 /* step two, insert a new info struct to cover 1696 /* step two, insert a new info struct to cover
1648 * anything before the hole 1697 * anything before the hole
@@ -1653,12 +1702,12 @@ again:
1653 goto out; 1702 goto out;
1654 } 1703 }
1655 1704
1656 ret = remove_from_bitmap(block_group, info, &offset, &bytes); 1705 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1657 if (ret == -EAGAIN) 1706 if (ret == -EAGAIN)
1658 goto again; 1707 goto again;
1659 BUG_ON(ret); 1708 BUG_ON(ret);
1660out_lock: 1709out_lock:
1661 spin_unlock(&block_group->tree_lock); 1710 spin_unlock(&ctl->tree_lock);
1662out: 1711out:
1663 return ret; 1712 return ret;
1664} 1713}
@@ -1666,11 +1715,12 @@ out:
1666void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 1715void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1667 u64 bytes) 1716 u64 bytes)
1668{ 1717{
1718 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1669 struct btrfs_free_space *info; 1719 struct btrfs_free_space *info;
1670 struct rb_node *n; 1720 struct rb_node *n;
1671 int count = 0; 1721 int count = 0;
1672 1722
1673 for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) { 1723 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1674 info = rb_entry(n, struct btrfs_free_space, offset_index); 1724 info = rb_entry(n, struct btrfs_free_space, offset_index);
1675 if (info->bytes >= bytes) 1725 if (info->bytes >= bytes)
1676 count++; 1726 count++;
@@ -1685,19 +1735,28 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1685 "\n", count); 1735 "\n", count);
1686} 1736}
1687 1737
1688u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) 1738static struct btrfs_free_space_op free_space_op = {
1739 .recalc_thresholds = recalculate_thresholds,
1740 .use_bitmap = use_bitmap,
1741};
1742
1743void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1689{ 1744{
1690 struct btrfs_free_space *info; 1745 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1691 struct rb_node *n;
1692 u64 ret = 0;
1693 1746
1694 for (n = rb_first(&block_group->free_space_offset); n; 1747 spin_lock_init(&ctl->tree_lock);
1695 n = rb_next(n)) { 1748 ctl->unit = block_group->sectorsize;
1696 info = rb_entry(n, struct btrfs_free_space, offset_index); 1749 ctl->start = block_group->key.objectid;
1697 ret += info->bytes; 1750 ctl->private = block_group;
1698 } 1751 ctl->op = &free_space_op;
1699 1752
1700 return ret; 1753 /*
1754 * we only want to have 32k of ram per block group for keeping
1755 * track of free space, and if we pass 1/2 of that we want to
1756 * start converting things over to using bitmaps
1757 */
1758 ctl->extents_thresh = ((1024 * 32) / 2) /
1759 sizeof(struct btrfs_free_space);
1701} 1760}
1702 1761
1703/* 1762/*
@@ -1711,6 +1770,7 @@ __btrfs_return_cluster_to_free_space(
1711 struct btrfs_block_group_cache *block_group, 1770 struct btrfs_block_group_cache *block_group,
1712 struct btrfs_free_cluster *cluster) 1771 struct btrfs_free_cluster *cluster)
1713{ 1772{
1773 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1714 struct btrfs_free_space *entry; 1774 struct btrfs_free_space *entry;
1715 struct rb_node *node; 1775 struct rb_node *node;
1716 1776
@@ -1732,8 +1792,8 @@ __btrfs_return_cluster_to_free_space(
1732 1792
1733 bitmap = (entry->bitmap != NULL); 1793 bitmap = (entry->bitmap != NULL);
1734 if (!bitmap) 1794 if (!bitmap)
1735 try_merge_free_space(block_group, entry, false); 1795 try_merge_free_space(ctl, entry, false);
1736 tree_insert_offset(&block_group->free_space_offset, 1796 tree_insert_offset(&ctl->free_space_offset,
1737 entry->offset, &entry->offset_index, bitmap); 1797 entry->offset, &entry->offset_index, bitmap);
1738 } 1798 }
1739 cluster->root = RB_ROOT; 1799 cluster->root = RB_ROOT;
@@ -1744,14 +1804,38 @@ out:
1744 return 0; 1804 return 0;
1745} 1805}
1746 1806
1747void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 1807void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
1748{ 1808{
1749 struct btrfs_free_space *info; 1809 struct btrfs_free_space *info;
1750 struct rb_node *node; 1810 struct rb_node *node;
1811
1812 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
1813 info = rb_entry(node, struct btrfs_free_space, offset_index);
1814 unlink_free_space(ctl, info);
1815 kfree(info->bitmap);
1816 kmem_cache_free(btrfs_free_space_cachep, info);
1817 if (need_resched()) {
1818 spin_unlock(&ctl->tree_lock);
1819 cond_resched();
1820 spin_lock(&ctl->tree_lock);
1821 }
1822 }
1823}
1824
1825void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
1826{
1827 spin_lock(&ctl->tree_lock);
1828 __btrfs_remove_free_space_cache_locked(ctl);
1829 spin_unlock(&ctl->tree_lock);
1830}
1831
1832void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1833{
1834 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1751 struct btrfs_free_cluster *cluster; 1835 struct btrfs_free_cluster *cluster;
1752 struct list_head *head; 1836 struct list_head *head;
1753 1837
1754 spin_lock(&block_group->tree_lock); 1838 spin_lock(&ctl->tree_lock);
1755 while ((head = block_group->cluster_list.next) != 1839 while ((head = block_group->cluster_list.next) !=
1756 &block_group->cluster_list) { 1840 &block_group->cluster_list) {
1757 cluster = list_entry(head, struct btrfs_free_cluster, 1841 cluster = list_entry(head, struct btrfs_free_cluster,
@@ -1760,60 +1844,46 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1760 WARN_ON(cluster->block_group != block_group); 1844 WARN_ON(cluster->block_group != block_group);
1761 __btrfs_return_cluster_to_free_space(block_group, cluster); 1845 __btrfs_return_cluster_to_free_space(block_group, cluster);
1762 if (need_resched()) { 1846 if (need_resched()) {
1763 spin_unlock(&block_group->tree_lock); 1847 spin_unlock(&ctl->tree_lock);
1764 cond_resched(); 1848 cond_resched();
1765 spin_lock(&block_group->tree_lock); 1849 spin_lock(&ctl->tree_lock);
1766 } 1850 }
1767 } 1851 }
1852 __btrfs_remove_free_space_cache_locked(ctl);
1853 spin_unlock(&ctl->tree_lock);
1768 1854
1769 while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
1770 info = rb_entry(node, struct btrfs_free_space, offset_index);
1771 if (!info->bitmap) {
1772 unlink_free_space(block_group, info);
1773 kmem_cache_free(btrfs_free_space_cachep, info);
1774 } else {
1775 free_bitmap(block_group, info);
1776 }
1777
1778 if (need_resched()) {
1779 spin_unlock(&block_group->tree_lock);
1780 cond_resched();
1781 spin_lock(&block_group->tree_lock);
1782 }
1783 }
1784
1785 spin_unlock(&block_group->tree_lock);
1786} 1855}
1787 1856
1788u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 1857u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1789 u64 offset, u64 bytes, u64 empty_size) 1858 u64 offset, u64 bytes, u64 empty_size)
1790{ 1859{
1860 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1791 struct btrfs_free_space *entry = NULL; 1861 struct btrfs_free_space *entry = NULL;
1792 u64 bytes_search = bytes + empty_size; 1862 u64 bytes_search = bytes + empty_size;
1793 u64 ret = 0; 1863 u64 ret = 0;
1794 1864
1795 spin_lock(&block_group->tree_lock); 1865 spin_lock(&ctl->tree_lock);
1796 entry = find_free_space(block_group, &offset, &bytes_search, 0); 1866 entry = find_free_space(ctl, &offset, &bytes_search);
1797 if (!entry) 1867 if (!entry)
1798 goto out; 1868 goto out;
1799 1869
1800 ret = offset; 1870 ret = offset;
1801 if (entry->bitmap) { 1871 if (entry->bitmap) {
1802 bitmap_clear_bits(block_group, entry, offset, bytes); 1872 bitmap_clear_bits(ctl, entry, offset, bytes);
1803 if (!entry->bytes) 1873 if (!entry->bytes)
1804 free_bitmap(block_group, entry); 1874 free_bitmap(ctl, entry);
1805 } else { 1875 } else {
1806 unlink_free_space(block_group, entry); 1876 unlink_free_space(ctl, entry);
1807 entry->offset += bytes; 1877 entry->offset += bytes;
1808 entry->bytes -= bytes; 1878 entry->bytes -= bytes;
1809 if (!entry->bytes) 1879 if (!entry->bytes)
1810 kmem_cache_free(btrfs_free_space_cachep, entry); 1880 kmem_cache_free(btrfs_free_space_cachep, entry);
1811 else 1881 else
1812 link_free_space(block_group, entry); 1882 link_free_space(ctl, entry);
1813 } 1883 }
1814 1884
1815out: 1885out:
1816 spin_unlock(&block_group->tree_lock); 1886 spin_unlock(&ctl->tree_lock);
1817 1887
1818 return ret; 1888 return ret;
1819} 1889}
@@ -1830,6 +1900,7 @@ int btrfs_return_cluster_to_free_space(
1830 struct btrfs_block_group_cache *block_group, 1900 struct btrfs_block_group_cache *block_group,
1831 struct btrfs_free_cluster *cluster) 1901 struct btrfs_free_cluster *cluster)
1832{ 1902{
1903 struct btrfs_free_space_ctl *ctl;
1833 int ret; 1904 int ret;
1834 1905
1835 /* first, get a safe pointer to the block group */ 1906 /* first, get a safe pointer to the block group */
@@ -1848,10 +1919,12 @@ int btrfs_return_cluster_to_free_space(
1848 atomic_inc(&block_group->count); 1919 atomic_inc(&block_group->count);
1849 spin_unlock(&cluster->lock); 1920 spin_unlock(&cluster->lock);
1850 1921
1922 ctl = block_group->free_space_ctl;
1923
1851 /* now return any extents the cluster had on it */ 1924 /* now return any extents the cluster had on it */
1852 spin_lock(&block_group->tree_lock); 1925 spin_lock(&ctl->tree_lock);
1853 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 1926 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
1854 spin_unlock(&block_group->tree_lock); 1927 spin_unlock(&ctl->tree_lock);
1855 1928
1856 /* finally drop our ref */ 1929 /* finally drop our ref */
1857 btrfs_put_block_group(block_group); 1930 btrfs_put_block_group(block_group);
@@ -1863,6 +1936,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1863 struct btrfs_free_space *entry, 1936 struct btrfs_free_space *entry,
1864 u64 bytes, u64 min_start) 1937 u64 bytes, u64 min_start)
1865{ 1938{
1939 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1866 int err; 1940 int err;
1867 u64 search_start = cluster->window_start; 1941 u64 search_start = cluster->window_start;
1868 u64 search_bytes = bytes; 1942 u64 search_bytes = bytes;
@@ -1871,13 +1945,12 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1871 search_start = min_start; 1945 search_start = min_start;
1872 search_bytes = bytes; 1946 search_bytes = bytes;
1873 1947
1874 err = search_bitmap(block_group, entry, &search_start, 1948 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
1875 &search_bytes);
1876 if (err) 1949 if (err)
1877 return 0; 1950 return 0;
1878 1951
1879 ret = search_start; 1952 ret = search_start;
1880 bitmap_clear_bits(block_group, entry, ret, bytes); 1953 bitmap_clear_bits(ctl, entry, ret, bytes);
1881 1954
1882 return ret; 1955 return ret;
1883} 1956}
@@ -1891,6 +1964,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1891 struct btrfs_free_cluster *cluster, u64 bytes, 1964 struct btrfs_free_cluster *cluster, u64 bytes,
1892 u64 min_start) 1965 u64 min_start)
1893{ 1966{
1967 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1894 struct btrfs_free_space *entry = NULL; 1968 struct btrfs_free_space *entry = NULL;
1895 struct rb_node *node; 1969 struct rb_node *node;
1896 u64 ret = 0; 1970 u64 ret = 0;
@@ -1910,8 +1984,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1910 while(1) { 1984 while(1) {
1911 if (entry->bytes < bytes || 1985 if (entry->bytes < bytes ||
1912 (!entry->bitmap && entry->offset < min_start)) { 1986 (!entry->bitmap && entry->offset < min_start)) {
1913 struct rb_node *node;
1914
1915 node = rb_next(&entry->offset_index); 1987 node = rb_next(&entry->offset_index);
1916 if (!node) 1988 if (!node)
1917 break; 1989 break;
@@ -1925,7 +1997,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1925 cluster, entry, bytes, 1997 cluster, entry, bytes,
1926 min_start); 1998 min_start);
1927 if (ret == 0) { 1999 if (ret == 0) {
1928 struct rb_node *node;
1929 node = rb_next(&entry->offset_index); 2000 node = rb_next(&entry->offset_index);
1930 if (!node) 2001 if (!node)
1931 break; 2002 break;
@@ -1951,20 +2022,20 @@ out:
1951 if (!ret) 2022 if (!ret)
1952 return 0; 2023 return 0;
1953 2024
1954 spin_lock(&block_group->tree_lock); 2025 spin_lock(&ctl->tree_lock);
1955 2026
1956 block_group->free_space -= bytes; 2027 ctl->free_space -= bytes;
1957 if (entry->bytes == 0) { 2028 if (entry->bytes == 0) {
1958 block_group->free_extents--; 2029 ctl->free_extents--;
1959 if (entry->bitmap) { 2030 if (entry->bitmap) {
1960 kfree(entry->bitmap); 2031 kfree(entry->bitmap);
1961 block_group->total_bitmaps--; 2032 ctl->total_bitmaps--;
1962 recalculate_thresholds(block_group); 2033 ctl->op->recalc_thresholds(ctl);
1963 } 2034 }
1964 kmem_cache_free(btrfs_free_space_cachep, entry); 2035 kmem_cache_free(btrfs_free_space_cachep, entry);
1965 } 2036 }
1966 2037
1967 spin_unlock(&block_group->tree_lock); 2038 spin_unlock(&ctl->tree_lock);
1968 2039
1969 return ret; 2040 return ret;
1970} 2041}
@@ -1974,6 +2045,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1974 struct btrfs_free_cluster *cluster, 2045 struct btrfs_free_cluster *cluster,
1975 u64 offset, u64 bytes, u64 min_bytes) 2046 u64 offset, u64 bytes, u64 min_bytes)
1976{ 2047{
2048 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1977 unsigned long next_zero; 2049 unsigned long next_zero;
1978 unsigned long i; 2050 unsigned long i;
1979 unsigned long search_bits; 2051 unsigned long search_bits;
@@ -2028,7 +2100,7 @@ again:
2028 2100
2029 cluster->window_start = start * block_group->sectorsize + 2101 cluster->window_start = start * block_group->sectorsize +
2030 entry->offset; 2102 entry->offset;
2031 rb_erase(&entry->offset_index, &block_group->free_space_offset); 2103 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2032 ret = tree_insert_offset(&cluster->root, entry->offset, 2104 ret = tree_insert_offset(&cluster->root, entry->offset,
2033 &entry->offset_index, 1); 2105 &entry->offset_index, 1);
2034 BUG_ON(ret); 2106 BUG_ON(ret);
@@ -2043,6 +2115,7 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2043 struct btrfs_free_cluster *cluster, 2115 struct btrfs_free_cluster *cluster,
2044 u64 offset, u64 bytes, u64 min_bytes) 2116 u64 offset, u64 bytes, u64 min_bytes)
2045{ 2117{
2118 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2046 struct btrfs_free_space *first = NULL; 2119 struct btrfs_free_space *first = NULL;
2047 struct btrfs_free_space *entry = NULL; 2120 struct btrfs_free_space *entry = NULL;
2048 struct btrfs_free_space *prev = NULL; 2121 struct btrfs_free_space *prev = NULL;
@@ -2053,7 +2126,7 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2053 u64 max_extent; 2126 u64 max_extent;
2054 u64 max_gap = 128 * 1024; 2127 u64 max_gap = 128 * 1024;
2055 2128
2056 entry = tree_search_offset(block_group, offset, 0, 1); 2129 entry = tree_search_offset(ctl, offset, 0, 1);
2057 if (!entry) 2130 if (!entry)
2058 return -ENOSPC; 2131 return -ENOSPC;
2059 2132
@@ -2119,7 +2192,7 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2119 if (entry->bitmap) 2192 if (entry->bitmap)
2120 continue; 2193 continue;
2121 2194
2122 rb_erase(&entry->offset_index, &block_group->free_space_offset); 2195 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2123 ret = tree_insert_offset(&cluster->root, entry->offset, 2196 ret = tree_insert_offset(&cluster->root, entry->offset,
2124 &entry->offset_index, 0); 2197 &entry->offset_index, 0);
2125 BUG_ON(ret); 2198 BUG_ON(ret);
@@ -2138,16 +2211,15 @@ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2138 struct btrfs_free_cluster *cluster, 2211 struct btrfs_free_cluster *cluster,
2139 u64 offset, u64 bytes, u64 min_bytes) 2212 u64 offset, u64 bytes, u64 min_bytes)
2140{ 2213{
2214 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2141 struct btrfs_free_space *entry; 2215 struct btrfs_free_space *entry;
2142 struct rb_node *node; 2216 struct rb_node *node;
2143 int ret = -ENOSPC; 2217 int ret = -ENOSPC;
2144 2218
2145 if (block_group->total_bitmaps == 0) 2219 if (ctl->total_bitmaps == 0)
2146 return -ENOSPC; 2220 return -ENOSPC;
2147 2221
2148 entry = tree_search_offset(block_group, 2222 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2149 offset_to_bitmap(block_group, offset),
2150 0, 1);
2151 if (!entry) 2223 if (!entry)
2152 return -ENOSPC; 2224 return -ENOSPC;
2153 2225
@@ -2180,6 +2252,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2180 struct btrfs_free_cluster *cluster, 2252 struct btrfs_free_cluster *cluster,
2181 u64 offset, u64 bytes, u64 empty_size) 2253 u64 offset, u64 bytes, u64 empty_size)
2182{ 2254{
2255 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2183 u64 min_bytes; 2256 u64 min_bytes;
2184 int ret; 2257 int ret;
2185 2258
@@ -2199,14 +2272,14 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2199 } else 2272 } else
2200 min_bytes = max(bytes, (bytes + empty_size) >> 2); 2273 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2201 2274
2202 spin_lock(&block_group->tree_lock); 2275 spin_lock(&ctl->tree_lock);
2203 2276
2204 /* 2277 /*
2205 * If we know we don't have enough space to make a cluster don't even 2278 * If we know we don't have enough space to make a cluster don't even
2206 * bother doing all the work to try and find one. 2279 * bother doing all the work to try and find one.
2207 */ 2280 */
2208 if (block_group->free_space < min_bytes) { 2281 if (ctl->free_space < min_bytes) {
2209 spin_unlock(&block_group->tree_lock); 2282 spin_unlock(&ctl->tree_lock);
2210 return -ENOSPC; 2283 return -ENOSPC;
2211 } 2284 }
2212 2285
@@ -2232,7 +2305,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2232 } 2305 }
2233out: 2306out:
2234 spin_unlock(&cluster->lock); 2307 spin_unlock(&cluster->lock);
2235 spin_unlock(&block_group->tree_lock); 2308 spin_unlock(&ctl->tree_lock);
2236 2309
2237 return ret; 2310 return ret;
2238} 2311}
@@ -2253,6 +2326,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2253int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 2326int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2254 u64 *trimmed, u64 start, u64 end, u64 minlen) 2327 u64 *trimmed, u64 start, u64 end, u64 minlen)
2255{ 2328{
2329 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2256 struct btrfs_free_space *entry = NULL; 2330 struct btrfs_free_space *entry = NULL;
2257 struct btrfs_fs_info *fs_info = block_group->fs_info; 2331 struct btrfs_fs_info *fs_info = block_group->fs_info;
2258 u64 bytes = 0; 2332 u64 bytes = 0;
@@ -2262,52 +2336,50 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2262 *trimmed = 0; 2336 *trimmed = 0;
2263 2337
2264 while (start < end) { 2338 while (start < end) {
2265 spin_lock(&block_group->tree_lock); 2339 spin_lock(&ctl->tree_lock);
2266 2340
2267 if (block_group->free_space < minlen) { 2341 if (ctl->free_space < minlen) {
2268 spin_unlock(&block_group->tree_lock); 2342 spin_unlock(&ctl->tree_lock);
2269 break; 2343 break;
2270 } 2344 }
2271 2345
2272 entry = tree_search_offset(block_group, start, 0, 1); 2346 entry = tree_search_offset(ctl, start, 0, 1);
2273 if (!entry) 2347 if (!entry)
2274 entry = tree_search_offset(block_group, 2348 entry = tree_search_offset(ctl,
2275 offset_to_bitmap(block_group, 2349 offset_to_bitmap(ctl, start),
2276 start),
2277 1, 1); 2350 1, 1);
2278 2351
2279 if (!entry || entry->offset >= end) { 2352 if (!entry || entry->offset >= end) {
2280 spin_unlock(&block_group->tree_lock); 2353 spin_unlock(&ctl->tree_lock);
2281 break; 2354 break;
2282 } 2355 }
2283 2356
2284 if (entry->bitmap) { 2357 if (entry->bitmap) {
2285 ret = search_bitmap(block_group, entry, &start, &bytes); 2358 ret = search_bitmap(ctl, entry, &start, &bytes);
2286 if (!ret) { 2359 if (!ret) {
2287 if (start >= end) { 2360 if (start >= end) {
2288 spin_unlock(&block_group->tree_lock); 2361 spin_unlock(&ctl->tree_lock);
2289 break; 2362 break;
2290 } 2363 }
2291 bytes = min(bytes, end - start); 2364 bytes = min(bytes, end - start);
2292 bitmap_clear_bits(block_group, entry, 2365 bitmap_clear_bits(ctl, entry, start, bytes);
2293 start, bytes);
2294 if (entry->bytes == 0) 2366 if (entry->bytes == 0)
2295 free_bitmap(block_group, entry); 2367 free_bitmap(ctl, entry);
2296 } else { 2368 } else {
2297 start = entry->offset + BITS_PER_BITMAP * 2369 start = entry->offset + BITS_PER_BITMAP *
2298 block_group->sectorsize; 2370 block_group->sectorsize;
2299 spin_unlock(&block_group->tree_lock); 2371 spin_unlock(&ctl->tree_lock);
2300 ret = 0; 2372 ret = 0;
2301 continue; 2373 continue;
2302 } 2374 }
2303 } else { 2375 } else {
2304 start = entry->offset; 2376 start = entry->offset;
2305 bytes = min(entry->bytes, end - start); 2377 bytes = min(entry->bytes, end - start);
2306 unlink_free_space(block_group, entry); 2378 unlink_free_space(ctl, entry);
2307 kmem_cache_free(btrfs_free_space_cachep, entry); 2379 kmem_cache_free(btrfs_free_space_cachep, entry);
2308 } 2380 }
2309 2381
2310 spin_unlock(&block_group->tree_lock); 2382 spin_unlock(&ctl->tree_lock);
2311 2383
2312 if (bytes >= minlen) { 2384 if (bytes >= minlen) {
2313 int update_ret; 2385 int update_ret;
@@ -2319,8 +2391,7 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2319 bytes, 2391 bytes,
2320 &actually_trimmed); 2392 &actually_trimmed);
2321 2393
2322 btrfs_add_free_space(block_group, 2394 btrfs_add_free_space(block_group, start, bytes);
2323 start, bytes);
2324 if (!update_ret) 2395 if (!update_ret)
2325 btrfs_update_reserved_bytes(block_group, 2396 btrfs_update_reserved_bytes(block_group,
2326 bytes, 0, 1); 2397 bytes, 0, 1);
@@ -2342,3 +2413,145 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2342 2413
2343 return ret; 2414 return ret;
2344} 2415}
2416
2417/*
2418 * Find the left-most item in the cache tree, and then return the
2419 * smallest inode number in the item.
2420 *
2421 * Note: the returned inode number may not be the smallest one in
2422 * the tree, if the left-most item is a bitmap.
2423 */
2424u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2425{
2426 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2427 struct btrfs_free_space *entry = NULL;
2428 u64 ino = 0;
2429
2430 spin_lock(&ctl->tree_lock);
2431
2432 if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2433 goto out;
2434
2435 entry = rb_entry(rb_first(&ctl->free_space_offset),
2436 struct btrfs_free_space, offset_index);
2437
2438 if (!entry->bitmap) {
2439 ino = entry->offset;
2440
2441 unlink_free_space(ctl, entry);
2442 entry->offset++;
2443 entry->bytes--;
2444 if (!entry->bytes)
2445 kmem_cache_free(btrfs_free_space_cachep, entry);
2446 else
2447 link_free_space(ctl, entry);
2448 } else {
2449 u64 offset = 0;
2450 u64 count = 1;
2451 int ret;
2452
2453 ret = search_bitmap(ctl, entry, &offset, &count);
2454 BUG_ON(ret);
2455
2456 ino = offset;
2457 bitmap_clear_bits(ctl, entry, offset, 1);
2458 if (entry->bytes == 0)
2459 free_bitmap(ctl, entry);
2460 }
2461out:
2462 spin_unlock(&ctl->tree_lock);
2463
2464 return ino;
2465}
2466
2467struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2468 struct btrfs_path *path)
2469{
2470 struct inode *inode = NULL;
2471
2472 spin_lock(&root->cache_lock);
2473 if (root->cache_inode)
2474 inode = igrab(root->cache_inode);
2475 spin_unlock(&root->cache_lock);
2476 if (inode)
2477 return inode;
2478
2479 inode = __lookup_free_space_inode(root, path, 0);
2480 if (IS_ERR(inode))
2481 return inode;
2482
2483 spin_lock(&root->cache_lock);
2484 if (!root->fs_info->closing)
2485 root->cache_inode = igrab(inode);
2486 spin_unlock(&root->cache_lock);
2487
2488 return inode;
2489}
2490
2491int create_free_ino_inode(struct btrfs_root *root,
2492 struct btrfs_trans_handle *trans,
2493 struct btrfs_path *path)
2494{
2495 return __create_free_space_inode(root, trans, path,
2496 BTRFS_FREE_INO_OBJECTID, 0);
2497}
2498
2499int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2500{
2501 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2502 struct btrfs_path *path;
2503 struct inode *inode;
2504 int ret = 0;
2505 u64 root_gen = btrfs_root_generation(&root->root_item);
2506
2507 /*
2508 * If we're unmounting then just return, since this does a search on the
2509 * normal root and not the commit root and we could deadlock.
2510 */
2511 smp_mb();
2512 if (fs_info->closing)
2513 return 0;
2514
2515 path = btrfs_alloc_path();
2516 if (!path)
2517 return 0;
2518
2519 inode = lookup_free_ino_inode(root, path);
2520 if (IS_ERR(inode))
2521 goto out;
2522
2523 if (root_gen != BTRFS_I(inode)->generation)
2524 goto out_put;
2525
2526 ret = __load_free_space_cache(root, inode, ctl, path, 0);
2527
2528 if (ret < 0)
2529 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2530 "root %llu\n", root->root_key.objectid);
2531out_put:
2532 iput(inode);
2533out:
2534 btrfs_free_path(path);
2535 return ret;
2536}
2537
2538int btrfs_write_out_ino_cache(struct btrfs_root *root,
2539 struct btrfs_trans_handle *trans,
2540 struct btrfs_path *path)
2541{
2542 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2543 struct inode *inode;
2544 int ret;
2545
2546 inode = lookup_free_ino_inode(root, path);
2547 if (IS_ERR(inode))
2548 return 0;
2549
2550 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2551 if (ret < 0)
2552 printk(KERN_ERR "btrfs: failed to write free ino cache "
2553 "for root %llu\n", root->root_key.objectid);
2554
2555 iput(inode);
2556 return ret;
2557}
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 65c3b935289f..8f2613f779ed 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -27,6 +27,25 @@ struct btrfs_free_space {
27 struct list_head list; 27 struct list_head list;
28}; 28};
29 29
30struct btrfs_free_space_ctl {
31 spinlock_t tree_lock;
32 struct rb_root free_space_offset;
33 u64 free_space;
34 int extents_thresh;
35 int free_extents;
36 int total_bitmaps;
37 int unit;
38 u64 start;
39 struct btrfs_free_space_op *op;
40 void *private;
41};
42
43struct btrfs_free_space_op {
44 void (*recalc_thresholds)(struct btrfs_free_space_ctl *ctl);
45 bool (*use_bitmap)(struct btrfs_free_space_ctl *ctl,
46 struct btrfs_free_space *info);
47};
48
30struct inode *lookup_free_space_inode(struct btrfs_root *root, 49struct inode *lookup_free_space_inode(struct btrfs_root *root,
31 struct btrfs_block_group_cache 50 struct btrfs_block_group_cache
32 *block_group, struct btrfs_path *path); 51 *block_group, struct btrfs_path *path);
@@ -45,17 +64,38 @@ int btrfs_write_out_cache(struct btrfs_root *root,
45 struct btrfs_trans_handle *trans, 64 struct btrfs_trans_handle *trans,
46 struct btrfs_block_group_cache *block_group, 65 struct btrfs_block_group_cache *block_group,
47 struct btrfs_path *path); 66 struct btrfs_path *path);
48int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 67
49 u64 bytenr, u64 size); 68struct inode *lookup_free_ino_inode(struct btrfs_root *root,
69 struct btrfs_path *path);
70int create_free_ino_inode(struct btrfs_root *root,
71 struct btrfs_trans_handle *trans,
72 struct btrfs_path *path);
73int load_free_ino_cache(struct btrfs_fs_info *fs_info,
74 struct btrfs_root *root);
75int btrfs_write_out_ino_cache(struct btrfs_root *root,
76 struct btrfs_trans_handle *trans,
77 struct btrfs_path *path);
78
79void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
80int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
81 u64 bytenr, u64 size);
82static inline int
83btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
84 u64 bytenr, u64 size)
85{
86 return __btrfs_add_free_space(block_group->free_space_ctl,
87 bytenr, size);
88}
50int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 89int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
51 u64 bytenr, u64 size); 90 u64 bytenr, u64 size);
91void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
52void btrfs_remove_free_space_cache(struct btrfs_block_group_cache 92void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
53 *block_group); 93 *block_group);
54u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 94u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
55 u64 offset, u64 bytes, u64 empty_size); 95 u64 offset, u64 bytes, u64 empty_size);
96u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
56void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 97void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
57 u64 bytes); 98 u64 bytes);
58u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
59int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 99int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
60 struct btrfs_root *root, 100 struct btrfs_root *root,
61 struct btrfs_block_group_cache *block_group, 101 struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 64f1150bb48d..baa74f3db691 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -130,7 +130,6 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
130 item_size - (ptr + sub_item_len - item_start)); 130 item_size - (ptr + sub_item_len - item_start));
131 ret = btrfs_truncate_item(trans, root, path, 131 ret = btrfs_truncate_item(trans, root, path,
132 item_size - sub_item_len, 1); 132 item_size - sub_item_len, 1);
133 BUG_ON(ret);
134out: 133out:
135 btrfs_free_path(path); 134 btrfs_free_path(path);
136 return ret; 135 return ret;
@@ -167,7 +166,6 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
167 166
168 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 167 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
169 ret = btrfs_extend_item(trans, root, path, ins_len); 168 ret = btrfs_extend_item(trans, root, path, ins_len);
170 BUG_ON(ret);
171 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 169 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
172 struct btrfs_inode_ref); 170 struct btrfs_inode_ref);
173 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); 171 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index c05a08f4c411..3262cd17a12f 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -16,11 +16,446 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/delay.h>
20#include <linux/kthread.h>
21#include <linux/pagemap.h>
22
19#include "ctree.h" 23#include "ctree.h"
20#include "disk-io.h" 24#include "disk-io.h"
25#include "free-space-cache.h"
26#include "inode-map.h"
21#include "transaction.h" 27#include "transaction.h"
22 28
23int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) 29static int caching_kthread(void *data)
30{
31 struct btrfs_root *root = data;
32 struct btrfs_fs_info *fs_info = root->fs_info;
33 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
34 struct btrfs_key key;
35 struct btrfs_path *path;
36 struct extent_buffer *leaf;
37 u64 last = (u64)-1;
38 int slot;
39 int ret;
40
41 path = btrfs_alloc_path();
42 if (!path)
43 return -ENOMEM;
44
45 /* Since the commit root is read-only, we can safely skip locking. */
46 path->skip_locking = 1;
47 path->search_commit_root = 1;
48 path->reada = 2;
49
50 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
51 key.offset = 0;
52 key.type = BTRFS_INODE_ITEM_KEY;
53again:
54 /* need to make sure the commit_root doesn't disappear */
55 mutex_lock(&root->fs_commit_mutex);
56
57 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
58 if (ret < 0)
59 goto out;
60
61 while (1) {
62 smp_mb();
63 if (fs_info->closing)
64 goto out;
65
66 leaf = path->nodes[0];
67 slot = path->slots[0];
68 if (slot >= btrfs_header_nritems(leaf)) {
69 ret = btrfs_next_leaf(root, path);
70 if (ret < 0)
71 goto out;
72 else if (ret > 0)
73 break;
74
75 if (need_resched() ||
76 btrfs_transaction_in_commit(fs_info)) {
77 leaf = path->nodes[0];
78
79 if (btrfs_header_nritems(leaf) == 0) {
80 WARN_ON(1);
81 break;
82 }
83
84 /*
85 * Save the key so we can advances forward
86 * in the next search.
87 */
88 btrfs_item_key_to_cpu(leaf, &key, 0);
89 btrfs_release_path(path);
90 root->cache_progress = last;
91 mutex_unlock(&root->fs_commit_mutex);
92 schedule_timeout(1);
93 goto again;
94 } else
95 continue;
96 }
97
98 btrfs_item_key_to_cpu(leaf, &key, slot);
99
100 if (key.type != BTRFS_INODE_ITEM_KEY)
101 goto next;
102
103 if (key.objectid >= root->highest_objectid)
104 break;
105
106 if (last != (u64)-1 && last + 1 != key.objectid) {
107 __btrfs_add_free_space(ctl, last + 1,
108 key.objectid - last - 1);
109 wake_up(&root->cache_wait);
110 }
111
112 last = key.objectid;
113next:
114 path->slots[0]++;
115 }
116
117 if (last < root->highest_objectid - 1) {
118 __btrfs_add_free_space(ctl, last + 1,
119 root->highest_objectid - last - 1);
120 }
121
122 spin_lock(&root->cache_lock);
123 root->cached = BTRFS_CACHE_FINISHED;
124 spin_unlock(&root->cache_lock);
125
126 root->cache_progress = (u64)-1;
127 btrfs_unpin_free_ino(root);
128out:
129 wake_up(&root->cache_wait);
130 mutex_unlock(&root->fs_commit_mutex);
131
132 btrfs_free_path(path);
133
134 return ret;
135}
136
137static void start_caching(struct btrfs_root *root)
138{
139 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
140 struct task_struct *tsk;
141 int ret;
142 u64 objectid;
143
144 spin_lock(&root->cache_lock);
145 if (root->cached != BTRFS_CACHE_NO) {
146 spin_unlock(&root->cache_lock);
147 return;
148 }
149
150 root->cached = BTRFS_CACHE_STARTED;
151 spin_unlock(&root->cache_lock);
152
153 ret = load_free_ino_cache(root->fs_info, root);
154 if (ret == 1) {
155 spin_lock(&root->cache_lock);
156 root->cached = BTRFS_CACHE_FINISHED;
157 spin_unlock(&root->cache_lock);
158 return;
159 }
160
161 /*
162 * It can be quite time-consuming to fill the cache by searching
163 * through the extent tree, and this can keep ino allocation path
164 * waiting. Therefore at start we quickly find out the highest
165 * inode number and we know we can use inode numbers which fall in
166 * [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID].
167 */
168 ret = btrfs_find_free_objectid(root, &objectid);
169 if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
170 __btrfs_add_free_space(ctl, objectid,
171 BTRFS_LAST_FREE_OBJECTID - objectid + 1);
172 }
173
174 tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
175 root->root_key.objectid);
176 BUG_ON(IS_ERR(tsk));
177}
178
179int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
180{
181again:
182 *objectid = btrfs_find_ino_for_alloc(root);
183
184 if (*objectid != 0)
185 return 0;
186
187 start_caching(root);
188
189 wait_event(root->cache_wait,
190 root->cached == BTRFS_CACHE_FINISHED ||
191 root->free_ino_ctl->free_space > 0);
192
193 if (root->cached == BTRFS_CACHE_FINISHED &&
194 root->free_ino_ctl->free_space == 0)
195 return -ENOSPC;
196 else
197 goto again;
198}
199
200void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
201{
202 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
203 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
204again:
205 if (root->cached == BTRFS_CACHE_FINISHED) {
206 __btrfs_add_free_space(ctl, objectid, 1);
207 } else {
208 /*
209 * If we are in the process of caching free ino chunks,
210 * to avoid adding the same inode number to the free_ino
211 * tree twice due to cross transaction, we'll leave it
212 * in the pinned tree until a transaction is committed
213 * or the caching work is done.
214 */
215
216 mutex_lock(&root->fs_commit_mutex);
217 spin_lock(&root->cache_lock);
218 if (root->cached == BTRFS_CACHE_FINISHED) {
219 spin_unlock(&root->cache_lock);
220 mutex_unlock(&root->fs_commit_mutex);
221 goto again;
222 }
223 spin_unlock(&root->cache_lock);
224
225 start_caching(root);
226
227 if (objectid <= root->cache_progress ||
228 objectid > root->highest_objectid)
229 __btrfs_add_free_space(ctl, objectid, 1);
230 else
231 __btrfs_add_free_space(pinned, objectid, 1);
232
233 mutex_unlock(&root->fs_commit_mutex);
234 }
235}
236
237/*
238 * When a transaction is committed, we'll move those inode numbers which
239 * are smaller than root->cache_progress from pinned tree to free_ino tree,
240 * and others will just be dropped, because the commit root we were
241 * searching has changed.
242 *
243 * Must be called with root->fs_commit_mutex held
244 */
245void btrfs_unpin_free_ino(struct btrfs_root *root)
246{
247 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
248 struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
249 struct btrfs_free_space *info;
250 struct rb_node *n;
251 u64 count;
252
253 while (1) {
254 n = rb_first(rbroot);
255 if (!n)
256 break;
257
258 info = rb_entry(n, struct btrfs_free_space, offset_index);
259 BUG_ON(info->bitmap);
260
261 if (info->offset > root->cache_progress)
262 goto free;
263 else if (info->offset + info->bytes > root->cache_progress)
264 count = root->cache_progress - info->offset + 1;
265 else
266 count = info->bytes;
267
268 __btrfs_add_free_space(ctl, info->offset, count);
269free:
270 rb_erase(&info->offset_index, rbroot);
271 kfree(info);
272 }
273}
274
275#define INIT_THRESHOLD (((1024 * 32) / 2) / sizeof(struct btrfs_free_space))
276#define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8)
277
278/*
279 * The goal is to keep the memory used by the free_ino tree won't
280 * exceed the memory if we use bitmaps only.
281 */
282static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
283{
284 struct btrfs_free_space *info;
285 struct rb_node *n;
286 int max_ino;
287 int max_bitmaps;
288
289 n = rb_last(&ctl->free_space_offset);
290 if (!n) {
291 ctl->extents_thresh = INIT_THRESHOLD;
292 return;
293 }
294 info = rb_entry(n, struct btrfs_free_space, offset_index);
295
296 /*
297 * Find the maximum inode number in the filesystem. Note we
298 * ignore the fact that this can be a bitmap, because we are
299 * not doing precise calculation.
300 */
301 max_ino = info->bytes - 1;
302
303 max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP;
304 if (max_bitmaps <= ctl->total_bitmaps) {
305 ctl->extents_thresh = 0;
306 return;
307 }
308
309 ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
310 PAGE_CACHE_SIZE / sizeof(*info);
311}
312
313/*
314 * We don't fall back to bitmap, if we are below the extents threshold
315 * or this chunk of inode numbers is a big one.
316 */
317static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
318 struct btrfs_free_space *info)
319{
320 if (ctl->free_extents < ctl->extents_thresh ||
321 info->bytes > INODES_PER_BITMAP / 10)
322 return false;
323
324 return true;
325}
326
327static struct btrfs_free_space_op free_ino_op = {
328 .recalc_thresholds = recalculate_thresholds,
329 .use_bitmap = use_bitmap,
330};
331
332static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl)
333{
334}
335
336static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
337 struct btrfs_free_space *info)
338{
339 /*
340 * We always use extents for two reasons:
341 *
342 * - The pinned tree is only used during the process of caching
343 * work.
344 * - Make code simpler. See btrfs_unpin_free_ino().
345 */
346 return false;
347}
348
349static struct btrfs_free_space_op pinned_free_ino_op = {
350 .recalc_thresholds = pinned_recalc_thresholds,
351 .use_bitmap = pinned_use_bitmap,
352};
353
354void btrfs_init_free_ino_ctl(struct btrfs_root *root)
355{
356 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
357 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
358
359 spin_lock_init(&ctl->tree_lock);
360 ctl->unit = 1;
361 ctl->start = 0;
362 ctl->private = NULL;
363 ctl->op = &free_ino_op;
364
365 /*
366 * Initially we allow to use 16K of ram to cache chunks of
367 * inode numbers before we resort to bitmaps. This is somewhat
368 * arbitrary, but it will be adjusted in runtime.
369 */
370 ctl->extents_thresh = INIT_THRESHOLD;
371
372 spin_lock_init(&pinned->tree_lock);
373 pinned->unit = 1;
374 pinned->start = 0;
375 pinned->private = NULL;
376 pinned->extents_thresh = 0;
377 pinned->op = &pinned_free_ino_op;
378}
379
380int btrfs_save_ino_cache(struct btrfs_root *root,
381 struct btrfs_trans_handle *trans)
382{
383 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
384 struct btrfs_path *path;
385 struct inode *inode;
386 u64 alloc_hint = 0;
387 int ret;
388 int prealloc;
389 bool retry = false;
390
391 path = btrfs_alloc_path();
392 if (!path)
393 return -ENOMEM;
394again:
395 inode = lookup_free_ino_inode(root, path);
396 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
397 ret = PTR_ERR(inode);
398 goto out;
399 }
400
401 if (IS_ERR(inode)) {
402 BUG_ON(retry);
403 retry = true;
404
405 ret = create_free_ino_inode(root, trans, path);
406 if (ret)
407 goto out;
408 goto again;
409 }
410
411 BTRFS_I(inode)->generation = 0;
412 ret = btrfs_update_inode(trans, root, inode);
413 WARN_ON(ret);
414
415 if (i_size_read(inode) > 0) {
416 ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
417 if (ret)
418 goto out_put;
419 }
420
421 spin_lock(&root->cache_lock);
422 if (root->cached != BTRFS_CACHE_FINISHED) {
423 ret = -1;
424 spin_unlock(&root->cache_lock);
425 goto out_put;
426 }
427 spin_unlock(&root->cache_lock);
428
429 spin_lock(&ctl->tree_lock);
430 prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
431 prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
432 prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
433 spin_unlock(&ctl->tree_lock);
434
435 /* Just to make sure we have enough space */
436 prealloc += 8 * PAGE_CACHE_SIZE;
437
438 ret = btrfs_check_data_free_space(inode, prealloc);
439 if (ret)
440 goto out_put;
441
442 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
443 prealloc, prealloc, &alloc_hint);
444 if (ret)
445 goto out_put;
446 btrfs_free_reserved_data_space(inode, prealloc);
447
448out_put:
449 iput(inode);
450out:
451 if (ret == 0)
452 ret = btrfs_write_out_ino_cache(root, trans, path);
453
454 btrfs_free_path(path);
455 return ret;
456}
457
458static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
24{ 459{
25 struct btrfs_path *path; 460 struct btrfs_path *path;
26 int ret; 461 int ret;
@@ -55,15 +490,14 @@ error:
55 return ret; 490 return ret;
56} 491}
57 492
58int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, 493int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
59 struct btrfs_root *root,
60 u64 dirid, u64 *objectid)
61{ 494{
62 int ret; 495 int ret;
63 mutex_lock(&root->objectid_mutex); 496 mutex_lock(&root->objectid_mutex);
64 497
65 if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { 498 if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
66 ret = btrfs_find_highest_inode(root, &root->highest_objectid); 499 ret = btrfs_find_highest_objectid(root,
500 &root->highest_objectid);
67 if (ret) 501 if (ret)
68 goto out; 502 goto out;
69 } 503 }
diff --git a/fs/btrfs/inode-map.h b/fs/btrfs/inode-map.h
new file mode 100644
index 000000000000..ddb347bfee23
--- /dev/null
+++ b/fs/btrfs/inode-map.h
@@ -0,0 +1,13 @@
1#ifndef __BTRFS_INODE_MAP
2#define __BTRFS_INODE_MAP
3
4void btrfs_init_free_ino_ctl(struct btrfs_root *root);
5void btrfs_unpin_free_ino(struct btrfs_root *root);
6void btrfs_return_ino(struct btrfs_root *root, u64 objectid);
7int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid);
8int btrfs_save_ino_cache(struct btrfs_root *root,
9 struct btrfs_trans_handle *trans);
10
11int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
12
13#endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7cd8ab0ef04d..bb51bb1fa44f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -37,6 +37,7 @@
37#include <linux/posix_acl.h> 37#include <linux/posix_acl.h>
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/ratelimit.h>
40#include "compat.h" 41#include "compat.h"
41#include "ctree.h" 42#include "ctree.h"
42#include "disk-io.h" 43#include "disk-io.h"
@@ -51,6 +52,7 @@
51#include "compression.h" 52#include "compression.h"
52#include "locking.h" 53#include "locking.h"
53#include "free-space-cache.h" 54#include "free-space-cache.h"
55#include "inode-map.h"
54 56
55struct btrfs_iget_args { 57struct btrfs_iget_args {
56 u64 ino; 58 u64 ino;
@@ -138,7 +140,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
138 path->leave_spinning = 1; 140 path->leave_spinning = 1;
139 btrfs_set_trans_block_group(trans, inode); 141 btrfs_set_trans_block_group(trans, inode);
140 142
141 key.objectid = inode->i_ino; 143 key.objectid = btrfs_ino(inode);
142 key.offset = start; 144 key.offset = start;
143 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 145 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
144 datasize = btrfs_file_extent_calc_inline_size(cur_size); 146 datasize = btrfs_file_extent_calc_inline_size(cur_size);
@@ -340,6 +342,10 @@ static noinline int compress_file_range(struct inode *inode,
340 int will_compress; 342 int will_compress;
341 int compress_type = root->fs_info->compress_type; 343 int compress_type = root->fs_info->compress_type;
342 344
345 /* if this is a small write inside eof, kick off a defragbot */
346 if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024)
347 btrfs_add_inode_defrag(NULL, inode);
348
343 actual_end = min_t(u64, isize, end + 1); 349 actual_end = min_t(u64, isize, end + 1);
344again: 350again:
345 will_compress = 0; 351 will_compress = 0;
@@ -649,7 +655,7 @@ retry:
649 async_extent->start + 655 async_extent->start +
650 async_extent->ram_size - 1, 0); 656 async_extent->ram_size - 1, 0);
651 657
652 em = alloc_extent_map(GFP_NOFS); 658 em = alloc_extent_map();
653 BUG_ON(!em); 659 BUG_ON(!em);
654 em->start = async_extent->start; 660 em->start = async_extent->start;
655 em->len = async_extent->ram_size; 661 em->len = async_extent->ram_size;
@@ -745,6 +751,15 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
745 return alloc_hint; 751 return alloc_hint;
746} 752}
747 753
754static inline bool is_free_space_inode(struct btrfs_root *root,
755 struct inode *inode)
756{
757 if (root == root->fs_info->tree_root ||
758 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
759 return true;
760 return false;
761}
762
748/* 763/*
749 * when extent_io.c finds a delayed allocation range in the file, 764 * when extent_io.c finds a delayed allocation range in the file,
750 * the call backs end up in this code. The basic idea is to 765 * the call backs end up in this code. The basic idea is to
@@ -777,7 +792,7 @@ static noinline int cow_file_range(struct inode *inode,
777 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 792 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
778 int ret = 0; 793 int ret = 0;
779 794
780 BUG_ON(root == root->fs_info->tree_root); 795 BUG_ON(is_free_space_inode(root, inode));
781 trans = btrfs_join_transaction(root, 1); 796 trans = btrfs_join_transaction(root, 1);
782 BUG_ON(IS_ERR(trans)); 797 BUG_ON(IS_ERR(trans));
783 btrfs_set_trans_block_group(trans, inode); 798 btrfs_set_trans_block_group(trans, inode);
@@ -788,6 +803,10 @@ static noinline int cow_file_range(struct inode *inode,
788 disk_num_bytes = num_bytes; 803 disk_num_bytes = num_bytes;
789 ret = 0; 804 ret = 0;
790 805
806 /* if this is a small write inside eof, kick off defrag */
807 if (end <= BTRFS_I(inode)->disk_i_size && num_bytes < 64 * 1024)
808 btrfs_add_inode_defrag(trans, inode);
809
791 if (start == 0) { 810 if (start == 0) {
792 /* lets try to make an inline extent */ 811 /* lets try to make an inline extent */
793 ret = cow_file_range_inline(trans, root, inode, 812 ret = cow_file_range_inline(trans, root, inode,
@@ -826,7 +845,7 @@ static noinline int cow_file_range(struct inode *inode,
826 (u64)-1, &ins, 1); 845 (u64)-1, &ins, 1);
827 BUG_ON(ret); 846 BUG_ON(ret);
828 847
829 em = alloc_extent_map(GFP_NOFS); 848 em = alloc_extent_map();
830 BUG_ON(!em); 849 BUG_ON(!em);
831 em->start = start; 850 em->start = start;
832 em->orig_start = em->start; 851 em->orig_start = em->start;
@@ -1008,7 +1027,7 @@ static noinline int csum_exist_in_range(struct btrfs_root *root,
1008 LIST_HEAD(list); 1027 LIST_HEAD(list);
1009 1028
1010 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr, 1029 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
1011 bytenr + num_bytes - 1, &list); 1030 bytenr + num_bytes - 1, &list, 0);
1012 if (ret == 0 && list_empty(&list)) 1031 if (ret == 0 && list_empty(&list))
1013 return 0; 1032 return 0;
1014 1033
@@ -1049,29 +1068,31 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1049 int type; 1068 int type;
1050 int nocow; 1069 int nocow;
1051 int check_prev = 1; 1070 int check_prev = 1;
1052 bool nolock = false; 1071 bool nolock;
1072 u64 ino = btrfs_ino(inode);
1053 1073
1054 path = btrfs_alloc_path(); 1074 path = btrfs_alloc_path();
1055 BUG_ON(!path); 1075 BUG_ON(!path);
1056 if (root == root->fs_info->tree_root) { 1076
1057 nolock = true; 1077 nolock = is_free_space_inode(root, inode);
1078
1079 if (nolock)
1058 trans = btrfs_join_transaction_nolock(root, 1); 1080 trans = btrfs_join_transaction_nolock(root, 1);
1059 } else { 1081 else
1060 trans = btrfs_join_transaction(root, 1); 1082 trans = btrfs_join_transaction(root, 1);
1061 }
1062 BUG_ON(IS_ERR(trans)); 1083 BUG_ON(IS_ERR(trans));
1063 1084
1064 cow_start = (u64)-1; 1085 cow_start = (u64)-1;
1065 cur_offset = start; 1086 cur_offset = start;
1066 while (1) { 1087 while (1) {
1067 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 1088 ret = btrfs_lookup_file_extent(trans, root, path, ino,
1068 cur_offset, 0); 1089 cur_offset, 0);
1069 BUG_ON(ret < 0); 1090 BUG_ON(ret < 0);
1070 if (ret > 0 && path->slots[0] > 0 && check_prev) { 1091 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1071 leaf = path->nodes[0]; 1092 leaf = path->nodes[0];
1072 btrfs_item_key_to_cpu(leaf, &found_key, 1093 btrfs_item_key_to_cpu(leaf, &found_key,
1073 path->slots[0] - 1); 1094 path->slots[0] - 1);
1074 if (found_key.objectid == inode->i_ino && 1095 if (found_key.objectid == ino &&
1075 found_key.type == BTRFS_EXTENT_DATA_KEY) 1096 found_key.type == BTRFS_EXTENT_DATA_KEY)
1076 path->slots[0]--; 1097 path->slots[0]--;
1077 } 1098 }
@@ -1092,7 +1113,7 @@ next_slot:
1092 num_bytes = 0; 1113 num_bytes = 0;
1093 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1114 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1094 1115
1095 if (found_key.objectid > inode->i_ino || 1116 if (found_key.objectid > ino ||
1096 found_key.type > BTRFS_EXTENT_DATA_KEY || 1117 found_key.type > BTRFS_EXTENT_DATA_KEY ||
1097 found_key.offset > end) 1118 found_key.offset > end)
1098 break; 1119 break;
@@ -1127,7 +1148,7 @@ next_slot:
1127 goto out_check; 1148 goto out_check;
1128 if (btrfs_extent_readonly(root, disk_bytenr)) 1149 if (btrfs_extent_readonly(root, disk_bytenr))
1129 goto out_check; 1150 goto out_check;
1130 if (btrfs_cross_ref_exist(trans, root, inode->i_ino, 1151 if (btrfs_cross_ref_exist(trans, root, ino,
1131 found_key.offset - 1152 found_key.offset -
1132 extent_offset, disk_bytenr)) 1153 extent_offset, disk_bytenr))
1133 goto out_check; 1154 goto out_check;
@@ -1164,7 +1185,7 @@ out_check:
1164 goto next_slot; 1185 goto next_slot;
1165 } 1186 }
1166 1187
1167 btrfs_release_path(root, path); 1188 btrfs_release_path(path);
1168 if (cow_start != (u64)-1) { 1189 if (cow_start != (u64)-1) {
1169 ret = cow_file_range(inode, locked_page, cow_start, 1190 ret = cow_file_range(inode, locked_page, cow_start,
1170 found_key.offset - 1, page_started, 1191 found_key.offset - 1, page_started,
@@ -1177,7 +1198,7 @@ out_check:
1177 struct extent_map *em; 1198 struct extent_map *em;
1178 struct extent_map_tree *em_tree; 1199 struct extent_map_tree *em_tree;
1179 em_tree = &BTRFS_I(inode)->extent_tree; 1200 em_tree = &BTRFS_I(inode)->extent_tree;
1180 em = alloc_extent_map(GFP_NOFS); 1201 em = alloc_extent_map();
1181 BUG_ON(!em); 1202 BUG_ON(!em);
1182 em->start = cur_offset; 1203 em->start = cur_offset;
1183 em->orig_start = em->start; 1204 em->orig_start = em->start;
@@ -1222,7 +1243,7 @@ out_check:
1222 if (cur_offset > end) 1243 if (cur_offset > end)
1223 break; 1244 break;
1224 } 1245 }
1225 btrfs_release_path(root, path); 1246 btrfs_release_path(path);
1226 1247
1227 if (cur_offset <= end && cow_start == (u64)-1) 1248 if (cur_offset <= end && cow_start == (u64)-1)
1228 cow_start = cur_offset; 1249 cow_start = cur_offset;
@@ -1310,14 +1331,13 @@ static int btrfs_set_bit_hook(struct inode *inode,
1310 1331
1311 /* 1332 /*
1312 * set_bit and clear bit hooks normally require _irqsave/restore 1333 * set_bit and clear bit hooks normally require _irqsave/restore
1313 * but in this case, we are only testeing for the DELALLOC 1334 * but in this case, we are only testing for the DELALLOC
1314 * bit, which is only set or cleared with irqs on 1335 * bit, which is only set or cleared with irqs on
1315 */ 1336 */
1316 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1337 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1317 struct btrfs_root *root = BTRFS_I(inode)->root; 1338 struct btrfs_root *root = BTRFS_I(inode)->root;
1318 u64 len = state->end + 1 - state->start; 1339 u64 len = state->end + 1 - state->start;
1319 int do_list = (root->root_key.objectid != 1340 bool do_list = !is_free_space_inode(root, inode);
1320 BTRFS_ROOT_TREE_OBJECTID);
1321 1341
1322 if (*bits & EXTENT_FIRST_DELALLOC) 1342 if (*bits & EXTENT_FIRST_DELALLOC)
1323 *bits &= ~EXTENT_FIRST_DELALLOC; 1343 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1344,14 +1364,13 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1344{ 1364{
1345 /* 1365 /*
1346 * set_bit and clear bit hooks normally require _irqsave/restore 1366 * set_bit and clear bit hooks normally require _irqsave/restore
1347 * but in this case, we are only testeing for the DELALLOC 1367 * but in this case, we are only testing for the DELALLOC
1348 * bit, which is only set or cleared with irqs on 1368 * bit, which is only set or cleared with irqs on
1349 */ 1369 */
1350 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1370 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1351 struct btrfs_root *root = BTRFS_I(inode)->root; 1371 struct btrfs_root *root = BTRFS_I(inode)->root;
1352 u64 len = state->end + 1 - state->start; 1372 u64 len = state->end + 1 - state->start;
1353 int do_list = (root->root_key.objectid != 1373 bool do_list = !is_free_space_inode(root, inode);
1354 BTRFS_ROOT_TREE_OBJECTID);
1355 1374
1356 if (*bits & EXTENT_FIRST_DELALLOC) 1375 if (*bits & EXTENT_FIRST_DELALLOC)
1357 *bits &= ~EXTENT_FIRST_DELALLOC; 1376 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1458,7 +1477,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1458 1477
1459 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1478 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1460 1479
1461 if (root == root->fs_info->tree_root) 1480 if (is_free_space_inode(root, inode))
1462 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); 1481 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
1463 else 1482 else
1464 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1483 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
@@ -1644,7 +1663,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1644 &hint, 0); 1663 &hint, 0);
1645 BUG_ON(ret); 1664 BUG_ON(ret);
1646 1665
1647 ins.objectid = inode->i_ino; 1666 ins.objectid = btrfs_ino(inode);
1648 ins.offset = file_pos; 1667 ins.offset = file_pos;
1649 ins.type = BTRFS_EXTENT_DATA_KEY; 1668 ins.type = BTRFS_EXTENT_DATA_KEY;
1650 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); 1669 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
@@ -1675,7 +1694,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1675 ins.type = BTRFS_EXTENT_ITEM_KEY; 1694 ins.type = BTRFS_EXTENT_ITEM_KEY;
1676 ret = btrfs_alloc_reserved_file_extent(trans, root, 1695 ret = btrfs_alloc_reserved_file_extent(trans, root,
1677 root->root_key.objectid, 1696 root->root_key.objectid,
1678 inode->i_ino, file_pos, &ins); 1697 btrfs_ino(inode), file_pos, &ins);
1679 BUG_ON(ret); 1698 BUG_ON(ret);
1680 btrfs_free_path(path); 1699 btrfs_free_path(path);
1681 1700
@@ -1701,7 +1720,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1701 struct extent_state *cached_state = NULL; 1720 struct extent_state *cached_state = NULL;
1702 int compress_type = 0; 1721 int compress_type = 0;
1703 int ret; 1722 int ret;
1704 bool nolock = false; 1723 bool nolock;
1705 1724
1706 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, 1725 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1707 end - start + 1); 1726 end - start + 1);
@@ -1709,7 +1728,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1709 return 0; 1728 return 0;
1710 BUG_ON(!ordered_extent); 1729 BUG_ON(!ordered_extent);
1711 1730
1712 nolock = (root == root->fs_info->tree_root); 1731 nolock = is_free_space_inode(root, inode);
1713 1732
1714 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1733 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1715 BUG_ON(!list_empty(&ordered_extent->list)); 1734 BUG_ON(!list_empty(&ordered_extent->list));
@@ -1855,7 +1874,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1855 } 1874 }
1856 read_unlock(&em_tree->lock); 1875 read_unlock(&em_tree->lock);
1857 1876
1858 if (!em || IS_ERR(em)) { 1877 if (IS_ERR_OR_NULL(em)) {
1859 kfree(failrec); 1878 kfree(failrec);
1860 return -EIO; 1879 return -EIO;
1861 } 1880 }
@@ -2004,12 +2023,11 @@ good:
2004 return 0; 2023 return 0;
2005 2024
2006zeroit: 2025zeroit:
2007 if (printk_ratelimit()) { 2026 printk_ratelimited(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u "
2008 printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " 2027 "private %llu\n",
2009 "private %llu\n", page->mapping->host->i_ino, 2028 (unsigned long long)btrfs_ino(page->mapping->host),
2010 (unsigned long long)start, csum, 2029 (unsigned long long)start, csum,
2011 (unsigned long long)private); 2030 (unsigned long long)private);
2012 }
2013 memset(kaddr + offset, 1, end - start + 1); 2031 memset(kaddr + offset, 1, end - start + 1);
2014 flush_dcache_page(page); 2032 flush_dcache_page(page);
2015 kunmap_atomic(kaddr, KM_USER0); 2033 kunmap_atomic(kaddr, KM_USER0);
@@ -2244,7 +2262,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2244 2262
2245 /* insert an orphan item to track this unlinked/truncated file */ 2263 /* insert an orphan item to track this unlinked/truncated file */
2246 if (insert >= 1) { 2264 if (insert >= 1) {
2247 ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); 2265 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
2248 BUG_ON(ret); 2266 BUG_ON(ret);
2249 } 2267 }
2250 2268
@@ -2281,7 +2299,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2281 spin_unlock(&root->orphan_lock); 2299 spin_unlock(&root->orphan_lock);
2282 2300
2283 if (trans && delete_item) { 2301 if (trans && delete_item) {
2284 ret = btrfs_del_orphan_item(trans, root, inode->i_ino); 2302 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
2285 BUG_ON(ret); 2303 BUG_ON(ret);
2286 } 2304 }
2287 2305
@@ -2346,7 +2364,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2346 break; 2364 break;
2347 2365
2348 /* release the path since we're done with it */ 2366 /* release the path since we're done with it */
2349 btrfs_release_path(root, path); 2367 btrfs_release_path(path);
2350 2368
2351 /* 2369 /*
2352 * this is where we are basically btrfs_lookup, without the 2370 * this is where we are basically btrfs_lookup, without the
@@ -2543,7 +2561,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
2543 * try to precache a NULL acl entry for files that don't have 2561 * try to precache a NULL acl entry for files that don't have
2544 * any xattrs or acls 2562 * any xattrs or acls
2545 */ 2563 */
2546 maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); 2564 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
2565 btrfs_ino(inode));
2547 if (!maybe_acls) 2566 if (!maybe_acls)
2548 cache_no_acl(inode); 2567 cache_no_acl(inode);
2549 2568
@@ -2647,11 +2666,26 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2647 struct extent_buffer *leaf; 2666 struct extent_buffer *leaf;
2648 int ret; 2667 int ret;
2649 2668
2669 /*
2670 * If root is tree root, it means this inode is used to
2671 * store free space information. And these inodes are updated
2672 * when committing the transaction, so they needn't delaye to
2673 * be updated, or deadlock will occured.
2674 */
2675 if (!is_free_space_inode(root, inode)) {
2676 ret = btrfs_delayed_update_inode(trans, root, inode);
2677 if (!ret)
2678 btrfs_set_inode_last_trans(trans, inode);
2679 return ret;
2680 }
2681
2650 path = btrfs_alloc_path(); 2682 path = btrfs_alloc_path();
2651 BUG_ON(!path); 2683 if (!path)
2684 return -ENOMEM;
2685
2652 path->leave_spinning = 1; 2686 path->leave_spinning = 1;
2653 ret = btrfs_lookup_inode(trans, root, path, 2687 ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
2654 &BTRFS_I(inode)->location, 1); 2688 1);
2655 if (ret) { 2689 if (ret) {
2656 if (ret > 0) 2690 if (ret > 0)
2657 ret = -ENOENT; 2691 ret = -ENOENT;
@@ -2661,7 +2695,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2661 btrfs_unlock_up_safe(path, 1); 2695 btrfs_unlock_up_safe(path, 1);
2662 leaf = path->nodes[0]; 2696 leaf = path->nodes[0];
2663 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2697 inode_item = btrfs_item_ptr(leaf, path->slots[0],
2664 struct btrfs_inode_item); 2698 struct btrfs_inode_item);
2665 2699
2666 fill_inode_item(trans, leaf, inode_item, inode); 2700 fill_inode_item(trans, leaf, inode_item, inode);
2667 btrfs_mark_buffer_dirty(leaf); 2701 btrfs_mark_buffer_dirty(leaf);
@@ -2672,7 +2706,6 @@ failed:
2672 return ret; 2706 return ret;
2673} 2707}
2674 2708
2675
2676/* 2709/*
2677 * unlink helper that gets used here in inode.c and in the tree logging 2710 * unlink helper that gets used here in inode.c and in the tree logging
2678 * recovery code. It remove a link in a directory with a given name, and 2711 * recovery code. It remove a link in a directory with a given name, and
@@ -2689,6 +2722,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2689 struct btrfs_dir_item *di; 2722 struct btrfs_dir_item *di;
2690 struct btrfs_key key; 2723 struct btrfs_key key;
2691 u64 index; 2724 u64 index;
2725 u64 ino = btrfs_ino(inode);
2726 u64 dir_ino = btrfs_ino(dir);
2692 2727
2693 path = btrfs_alloc_path(); 2728 path = btrfs_alloc_path();
2694 if (!path) { 2729 if (!path) {
@@ -2697,7 +2732,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2697 } 2732 }
2698 2733
2699 path->leave_spinning = 1; 2734 path->leave_spinning = 1;
2700 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 2735 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
2701 name, name_len, -1); 2736 name, name_len, -1);
2702 if (IS_ERR(di)) { 2737 if (IS_ERR(di)) {
2703 ret = PTR_ERR(di); 2738 ret = PTR_ERR(di);
@@ -2712,33 +2747,23 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2712 ret = btrfs_delete_one_dir_name(trans, root, path, di); 2747 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2713 if (ret) 2748 if (ret)
2714 goto err; 2749 goto err;
2715 btrfs_release_path(root, path); 2750 btrfs_release_path(path);
2716 2751
2717 ret = btrfs_del_inode_ref(trans, root, name, name_len, 2752 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
2718 inode->i_ino, 2753 dir_ino, &index);
2719 dir->i_ino, &index);
2720 if (ret) { 2754 if (ret) {
2721 printk(KERN_INFO "btrfs failed to delete reference to %.*s, " 2755 printk(KERN_INFO "btrfs failed to delete reference to %.*s, "
2722 "inode %lu parent %lu\n", name_len, name, 2756 "inode %llu parent %llu\n", name_len, name,
2723 inode->i_ino, dir->i_ino); 2757 (unsigned long long)ino, (unsigned long long)dir_ino);
2724 goto err; 2758 goto err;
2725 } 2759 }
2726 2760
2727 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, 2761 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
2728 index, name, name_len, -1); 2762 if (ret)
2729 if (IS_ERR(di)) {
2730 ret = PTR_ERR(di);
2731 goto err;
2732 }
2733 if (!di) {
2734 ret = -ENOENT;
2735 goto err; 2763 goto err;
2736 }
2737 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2738 btrfs_release_path(root, path);
2739 2764
2740 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, 2765 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
2741 inode, dir->i_ino); 2766 inode, dir_ino);
2742 BUG_ON(ret != 0 && ret != -ENOENT); 2767 BUG_ON(ret != 0 && ret != -ENOENT);
2743 2768
2744 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2769 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
@@ -2816,12 +2841,14 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2816 int check_link = 1; 2841 int check_link = 1;
2817 int err = -ENOSPC; 2842 int err = -ENOSPC;
2818 int ret; 2843 int ret;
2844 u64 ino = btrfs_ino(inode);
2845 u64 dir_ino = btrfs_ino(dir);
2819 2846
2820 trans = btrfs_start_transaction(root, 10); 2847 trans = btrfs_start_transaction(root, 10);
2821 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 2848 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
2822 return trans; 2849 return trans;
2823 2850
2824 if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) 2851 if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
2825 return ERR_PTR(-ENOSPC); 2852 return ERR_PTR(-ENOSPC);
2826 2853
2827 /* check if there is someone else holds reference */ 2854 /* check if there is someone else holds reference */
@@ -2862,7 +2889,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2862 } else { 2889 } else {
2863 check_link = 0; 2890 check_link = 0;
2864 } 2891 }
2865 btrfs_release_path(root, path); 2892 btrfs_release_path(path);
2866 2893
2867 ret = btrfs_lookup_inode(trans, root, path, 2894 ret = btrfs_lookup_inode(trans, root, path,
2868 &BTRFS_I(inode)->location, 0); 2895 &BTRFS_I(inode)->location, 0);
@@ -2876,11 +2903,11 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2876 } else { 2903 } else {
2877 check_link = 0; 2904 check_link = 0;
2878 } 2905 }
2879 btrfs_release_path(root, path); 2906 btrfs_release_path(path);
2880 2907
2881 if (ret == 0 && S_ISREG(inode->i_mode)) { 2908 if (ret == 0 && S_ISREG(inode->i_mode)) {
2882 ret = btrfs_lookup_file_extent(trans, root, path, 2909 ret = btrfs_lookup_file_extent(trans, root, path,
2883 inode->i_ino, (u64)-1, 0); 2910 ino, (u64)-1, 0);
2884 if (ret < 0) { 2911 if (ret < 0) {
2885 err = ret; 2912 err = ret;
2886 goto out; 2913 goto out;
@@ -2888,7 +2915,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2888 BUG_ON(ret == 0); 2915 BUG_ON(ret == 0);
2889 if (check_path_shared(root, path)) 2916 if (check_path_shared(root, path))
2890 goto out; 2917 goto out;
2891 btrfs_release_path(root, path); 2918 btrfs_release_path(path);
2892 } 2919 }
2893 2920
2894 if (!check_link) { 2921 if (!check_link) {
@@ -2896,7 +2923,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2896 goto out; 2923 goto out;
2897 } 2924 }
2898 2925
2899 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 2926 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
2900 dentry->d_name.name, dentry->d_name.len, 0); 2927 dentry->d_name.name, dentry->d_name.len, 0);
2901 if (IS_ERR(di)) { 2928 if (IS_ERR(di)) {
2902 err = PTR_ERR(di); 2929 err = PTR_ERR(di);
@@ -2909,11 +2936,11 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2909 err = 0; 2936 err = 0;
2910 goto out; 2937 goto out;
2911 } 2938 }
2912 btrfs_release_path(root, path); 2939 btrfs_release_path(path);
2913 2940
2914 ref = btrfs_lookup_inode_ref(trans, root, path, 2941 ref = btrfs_lookup_inode_ref(trans, root, path,
2915 dentry->d_name.name, dentry->d_name.len, 2942 dentry->d_name.name, dentry->d_name.len,
2916 inode->i_ino, dir->i_ino, 0); 2943 ino, dir_ino, 0);
2917 if (IS_ERR(ref)) { 2944 if (IS_ERR(ref)) {
2918 err = PTR_ERR(ref); 2945 err = PTR_ERR(ref);
2919 goto out; 2946 goto out;
@@ -2922,9 +2949,17 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2922 if (check_path_shared(root, path)) 2949 if (check_path_shared(root, path))
2923 goto out; 2950 goto out;
2924 index = btrfs_inode_ref_index(path->nodes[0], ref); 2951 index = btrfs_inode_ref_index(path->nodes[0], ref);
2925 btrfs_release_path(root, path); 2952 btrfs_release_path(path);
2926 2953
2927 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, 2954 /*
2955 * This is a commit root search, if we can lookup inode item and other
2956 * relative items in the commit root, it means the transaction of
2957 * dir/file creation has been committed, and the dir index item that we
2958 * delay to insert has also been inserted into the commit root. So
2959 * we needn't worry about the delayed insertion of the dir index item
2960 * here.
2961 */
2962 di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index,
2928 dentry->d_name.name, dentry->d_name.len, 0); 2963 dentry->d_name.name, dentry->d_name.len, 0);
2929 if (IS_ERR(di)) { 2964 if (IS_ERR(di)) {
2930 err = PTR_ERR(di); 2965 err = PTR_ERR(di);
@@ -2999,54 +3034,47 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2999 struct btrfs_key key; 3034 struct btrfs_key key;
3000 u64 index; 3035 u64 index;
3001 int ret; 3036 int ret;
3037 u64 dir_ino = btrfs_ino(dir);
3002 3038
3003 path = btrfs_alloc_path(); 3039 path = btrfs_alloc_path();
3004 if (!path) 3040 if (!path)
3005 return -ENOMEM; 3041 return -ENOMEM;
3006 3042
3007 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 3043 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
3008 name, name_len, -1); 3044 name, name_len, -1);
3009 BUG_ON(!di || IS_ERR(di)); 3045 BUG_ON(IS_ERR_OR_NULL(di));
3010 3046
3011 leaf = path->nodes[0]; 3047 leaf = path->nodes[0];
3012 btrfs_dir_item_key_to_cpu(leaf, di, &key); 3048 btrfs_dir_item_key_to_cpu(leaf, di, &key);
3013 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); 3049 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
3014 ret = btrfs_delete_one_dir_name(trans, root, path, di); 3050 ret = btrfs_delete_one_dir_name(trans, root, path, di);
3015 BUG_ON(ret); 3051 BUG_ON(ret);
3016 btrfs_release_path(root, path); 3052 btrfs_release_path(path);
3017 3053
3018 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, 3054 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
3019 objectid, root->root_key.objectid, 3055 objectid, root->root_key.objectid,
3020 dir->i_ino, &index, name, name_len); 3056 dir_ino, &index, name, name_len);
3021 if (ret < 0) { 3057 if (ret < 0) {
3022 BUG_ON(ret != -ENOENT); 3058 BUG_ON(ret != -ENOENT);
3023 di = btrfs_search_dir_index_item(root, path, dir->i_ino, 3059 di = btrfs_search_dir_index_item(root, path, dir_ino,
3024 name, name_len); 3060 name, name_len);
3025 BUG_ON(!di || IS_ERR(di)); 3061 BUG_ON(IS_ERR_OR_NULL(di));
3026 3062
3027 leaf = path->nodes[0]; 3063 leaf = path->nodes[0];
3028 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3064 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3029 btrfs_release_path(root, path); 3065 btrfs_release_path(path);
3030 index = key.offset; 3066 index = key.offset;
3031 } 3067 }
3068 btrfs_release_path(path);
3032 3069
3033 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, 3070 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
3034 index, name, name_len, -1);
3035 BUG_ON(!di || IS_ERR(di));
3036
3037 leaf = path->nodes[0];
3038 btrfs_dir_item_key_to_cpu(leaf, di, &key);
3039 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
3040 ret = btrfs_delete_one_dir_name(trans, root, path, di);
3041 BUG_ON(ret); 3071 BUG_ON(ret);
3042 btrfs_release_path(root, path);
3043 3072
3044 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 3073 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
3045 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 3074 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
3046 ret = btrfs_update_inode(trans, root, dir); 3075 ret = btrfs_update_inode(trans, root, dir);
3047 BUG_ON(ret); 3076 BUG_ON(ret);
3048 3077
3049 btrfs_free_path(path);
3050 return 0; 3078 return 0;
3051} 3079}
3052 3080
@@ -3059,7 +3087,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
3059 unsigned long nr = 0; 3087 unsigned long nr = 0;
3060 3088
3061 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || 3089 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
3062 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 3090 btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
3063 return -ENOTEMPTY; 3091 return -ENOTEMPTY;
3064 3092
3065 trans = __unlink_start_trans(dir, dentry); 3093 trans = __unlink_start_trans(dir, dentry);
@@ -3068,7 +3096,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
3068 3096
3069 btrfs_set_trans_block_group(trans, dir); 3097 btrfs_set_trans_block_group(trans, dir);
3070 3098
3071 if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 3099 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
3072 err = btrfs_unlink_subvol(trans, root, dir, 3100 err = btrfs_unlink_subvol(trans, root, dir,
3073 BTRFS_I(inode)->location.objectid, 3101 BTRFS_I(inode)->location.objectid,
3074 dentry->d_name.name, 3102 dentry->d_name.name,
@@ -3093,178 +3121,6 @@ out:
3093 return err; 3121 return err;
3094} 3122}
3095 3123
3096#if 0
3097/*
3098 * when truncating bytes in a file, it is possible to avoid reading
3099 * the leaves that contain only checksum items. This can be the
3100 * majority of the IO required to delete a large file, but it must
3101 * be done carefully.
3102 *
3103 * The keys in the level just above the leaves are checked to make sure
3104 * the lowest key in a given leaf is a csum key, and starts at an offset
3105 * after the new size.
3106 *
3107 * Then the key for the next leaf is checked to make sure it also has
3108 * a checksum item for the same file. If it does, we know our target leaf
3109 * contains only checksum items, and it can be safely freed without reading
3110 * it.
3111 *
3112 * This is just an optimization targeted at large files. It may do
3113 * nothing. It will return 0 unless things went badly.
3114 */
3115static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans,
3116 struct btrfs_root *root,
3117 struct btrfs_path *path,
3118 struct inode *inode, u64 new_size)
3119{
3120 struct btrfs_key key;
3121 int ret;
3122 int nritems;
3123 struct btrfs_key found_key;
3124 struct btrfs_key other_key;
3125 struct btrfs_leaf_ref *ref;
3126 u64 leaf_gen;
3127 u64 leaf_start;
3128
3129 path->lowest_level = 1;
3130 key.objectid = inode->i_ino;
3131 key.type = BTRFS_CSUM_ITEM_KEY;
3132 key.offset = new_size;
3133again:
3134 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3135 if (ret < 0)
3136 goto out;
3137
3138 if (path->nodes[1] == NULL) {
3139 ret = 0;
3140 goto out;
3141 }
3142 ret = 0;
3143 btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]);
3144 nritems = btrfs_header_nritems(path->nodes[1]);
3145
3146 if (!nritems)
3147 goto out;
3148
3149 if (path->slots[1] >= nritems)
3150 goto next_node;
3151
3152 /* did we find a key greater than anything we want to delete? */
3153 if (found_key.objectid > inode->i_ino ||
3154 (found_key.objectid == inode->i_ino && found_key.type > key.type))
3155 goto out;
3156
3157 /* we check the next key in the node to make sure the leave contains
3158 * only checksum items. This comparison doesn't work if our
3159 * leaf is the last one in the node
3160 */
3161 if (path->slots[1] + 1 >= nritems) {
3162next_node:
3163 /* search forward from the last key in the node, this
3164 * will bring us into the next node in the tree
3165 */
3166 btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1);
3167
3168 /* unlikely, but we inc below, so check to be safe */
3169 if (found_key.offset == (u64)-1)
3170 goto out;
3171
3172 /* search_forward needs a path with locks held, do the
3173 * search again for the original key. It is possible
3174 * this will race with a balance and return a path that
3175 * we could modify, but this drop is just an optimization
3176 * and is allowed to miss some leaves.
3177 */
3178 btrfs_release_path(root, path);
3179 found_key.offset++;
3180
3181 /* setup a max key for search_forward */
3182 other_key.offset = (u64)-1;
3183 other_key.type = key.type;
3184 other_key.objectid = key.objectid;
3185
3186 path->keep_locks = 1;
3187 ret = btrfs_search_forward(root, &found_key, &other_key,
3188 path, 0, 0);
3189 path->keep_locks = 0;
3190 if (ret || found_key.objectid != key.objectid ||
3191 found_key.type != key.type) {
3192 ret = 0;
3193 goto out;
3194 }
3195
3196 key.offset = found_key.offset;
3197 btrfs_release_path(root, path);
3198 cond_resched();
3199 goto again;
3200 }
3201
3202 /* we know there's one more slot after us in the tree,
3203 * read that key so we can verify it is also a checksum item
3204 */
3205 btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1);
3206
3207 if (found_key.objectid < inode->i_ino)
3208 goto next_key;
3209
3210 if (found_key.type != key.type || found_key.offset < new_size)
3211 goto next_key;
3212
3213 /*
3214 * if the key for the next leaf isn't a csum key from this objectid,
3215 * we can't be sure there aren't good items inside this leaf.
3216 * Bail out
3217 */
3218 if (other_key.objectid != inode->i_ino || other_key.type != key.type)
3219 goto out;
3220
3221 leaf_start = btrfs_node_blockptr(path->nodes[1], path->slots[1]);
3222 leaf_gen = btrfs_node_ptr_generation(path->nodes[1], path->slots[1]);
3223 /*
3224 * it is safe to delete this leaf, it contains only
3225 * csum items from this inode at an offset >= new_size
3226 */
3227 ret = btrfs_del_leaf(trans, root, path, leaf_start);
3228 BUG_ON(ret);
3229
3230 if (root->ref_cows && leaf_gen < trans->transid) {
3231 ref = btrfs_alloc_leaf_ref(root, 0);
3232 if (ref) {
3233 ref->root_gen = root->root_key.offset;
3234 ref->bytenr = leaf_start;
3235 ref->owner = 0;
3236 ref->generation = leaf_gen;
3237 ref->nritems = 0;
3238
3239 btrfs_sort_leaf_ref(ref);
3240
3241 ret = btrfs_add_leaf_ref(root, ref, 0);
3242 WARN_ON(ret);
3243 btrfs_free_leaf_ref(root, ref);
3244 } else {
3245 WARN_ON(1);
3246 }
3247 }
3248next_key:
3249 btrfs_release_path(root, path);
3250
3251 if (other_key.objectid == inode->i_ino &&
3252 other_key.type == key.type && other_key.offset > key.offset) {
3253 key.offset = other_key.offset;
3254 cond_resched();
3255 goto again;
3256 }
3257 ret = 0;
3258out:
3259 /* fixup any changes we've made to the path */
3260 path->lowest_level = 0;
3261 path->keep_locks = 0;
3262 btrfs_release_path(root, path);
3263 return ret;
3264}
3265
3266#endif
3267
3268/* 3124/*
3269 * this can truncate away extent items, csum items and directory items. 3125 * this can truncate away extent items, csum items and directory items.
3270 * It starts at a high offset and removes keys until it can't find 3126 * It starts at a high offset and removes keys until it can't find
@@ -3300,17 +3156,27 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3300 int encoding; 3156 int encoding;
3301 int ret; 3157 int ret;
3302 int err = 0; 3158 int err = 0;
3159 u64 ino = btrfs_ino(inode);
3303 3160
3304 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 3161 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
3305 3162
3306 if (root->ref_cows || root == root->fs_info->tree_root) 3163 if (root->ref_cows || root == root->fs_info->tree_root)
3307 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3164 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
3308 3165
3166 /*
3167 * This function is also used to drop the items in the log tree before
3168 * we relog the inode, so if root != BTRFS_I(inode)->root, it means
3169 * it is used to drop the loged items. So we shouldn't kill the delayed
3170 * items.
3171 */
3172 if (min_type == 0 && root == BTRFS_I(inode)->root)
3173 btrfs_kill_delayed_inode_items(inode);
3174
3309 path = btrfs_alloc_path(); 3175 path = btrfs_alloc_path();
3310 BUG_ON(!path); 3176 BUG_ON(!path);
3311 path->reada = -1; 3177 path->reada = -1;
3312 3178
3313 key.objectid = inode->i_ino; 3179 key.objectid = ino;
3314 key.offset = (u64)-1; 3180 key.offset = (u64)-1;
3315 key.type = (u8)-1; 3181 key.type = (u8)-1;
3316 3182
@@ -3338,7 +3204,7 @@ search_again:
3338 found_type = btrfs_key_type(&found_key); 3204 found_type = btrfs_key_type(&found_key);
3339 encoding = 0; 3205 encoding = 0;
3340 3206
3341 if (found_key.objectid != inode->i_ino) 3207 if (found_key.objectid != ino)
3342 break; 3208 break;
3343 3209
3344 if (found_type < min_type) 3210 if (found_type < min_type)
@@ -3428,7 +3294,6 @@ search_again:
3428 btrfs_file_extent_calc_inline_size(size); 3294 btrfs_file_extent_calc_inline_size(size);
3429 ret = btrfs_truncate_item(trans, root, path, 3295 ret = btrfs_truncate_item(trans, root, path,
3430 size, 1); 3296 size, 1);
3431 BUG_ON(ret);
3432 } else if (root->ref_cows) { 3297 } else if (root->ref_cows) {
3433 inode_sub_bytes(inode, item_end + 1 - 3298 inode_sub_bytes(inode, item_end + 1 -
3434 found_key.offset); 3299 found_key.offset);
@@ -3457,7 +3322,7 @@ delete:
3457 ret = btrfs_free_extent(trans, root, extent_start, 3322 ret = btrfs_free_extent(trans, root, extent_start,
3458 extent_num_bytes, 0, 3323 extent_num_bytes, 0,
3459 btrfs_header_owner(leaf), 3324 btrfs_header_owner(leaf),
3460 inode->i_ino, extent_offset); 3325 ino, extent_offset);
3461 BUG_ON(ret); 3326 BUG_ON(ret);
3462 } 3327 }
3463 3328
@@ -3466,7 +3331,9 @@ delete:
3466 3331
3467 if (path->slots[0] == 0 || 3332 if (path->slots[0] == 0 ||
3468 path->slots[0] != pending_del_slot) { 3333 path->slots[0] != pending_del_slot) {
3469 if (root->ref_cows) { 3334 if (root->ref_cows &&
3335 BTRFS_I(inode)->location.objectid !=
3336 BTRFS_FREE_INO_OBJECTID) {
3470 err = -EAGAIN; 3337 err = -EAGAIN;
3471 goto out; 3338 goto out;
3472 } 3339 }
@@ -3477,7 +3344,7 @@ delete:
3477 BUG_ON(ret); 3344 BUG_ON(ret);
3478 pending_del_nr = 0; 3345 pending_del_nr = 0;
3479 } 3346 }
3480 btrfs_release_path(root, path); 3347 btrfs_release_path(path);
3481 goto search_again; 3348 goto search_again;
3482 } else { 3349 } else {
3483 path->slots[0]--; 3350 path->slots[0]--;
@@ -3635,7 +3502,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3635 while (1) { 3502 while (1) {
3636 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3503 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
3637 block_end - cur_offset, 0); 3504 block_end - cur_offset, 0);
3638 BUG_ON(IS_ERR(em) || !em); 3505 BUG_ON(IS_ERR_OR_NULL(em));
3639 last_byte = min(extent_map_end(em), block_end); 3506 last_byte = min(extent_map_end(em), block_end);
3640 last_byte = (last_byte + mask) & ~mask; 3507 last_byte = (last_byte + mask) & ~mask;
3641 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 3508 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
@@ -3656,7 +3523,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3656 break; 3523 break;
3657 3524
3658 err = btrfs_insert_file_extent(trans, root, 3525 err = btrfs_insert_file_extent(trans, root,
3659 inode->i_ino, cur_offset, 0, 3526 btrfs_ino(inode), cur_offset, 0,
3660 0, hole_size, 0, hole_size, 3527 0, hole_size, 0, hole_size,
3661 0, 0, 0); 3528 0, 0, 0);
3662 if (err) 3529 if (err)
@@ -3758,7 +3625,7 @@ void btrfs_evict_inode(struct inode *inode)
3758 3625
3759 truncate_inode_pages(&inode->i_data, 0); 3626 truncate_inode_pages(&inode->i_data, 0);
3760 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || 3627 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
3761 root == root->fs_info->tree_root)) 3628 is_free_space_inode(root, inode)))
3762 goto no_delete; 3629 goto no_delete;
3763 3630
3764 if (is_bad_inode(inode)) { 3631 if (is_bad_inode(inode)) {
@@ -3811,6 +3678,10 @@ void btrfs_evict_inode(struct inode *inode)
3811 BUG_ON(ret); 3678 BUG_ON(ret);
3812 } 3679 }
3813 3680
3681 if (!(root == root->fs_info->tree_root ||
3682 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
3683 btrfs_return_ino(root, btrfs_ino(inode));
3684
3814 nr = trans->blocks_used; 3685 nr = trans->blocks_used;
3815 btrfs_end_transaction(trans, root); 3686 btrfs_end_transaction(trans, root);
3816 btrfs_btree_balance_dirty(root, nr); 3687 btrfs_btree_balance_dirty(root, nr);
@@ -3836,12 +3707,12 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
3836 path = btrfs_alloc_path(); 3707 path = btrfs_alloc_path();
3837 BUG_ON(!path); 3708 BUG_ON(!path);
3838 3709
3839 di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, 3710 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
3840 namelen, 0); 3711 namelen, 0);
3841 if (IS_ERR(di)) 3712 if (IS_ERR(di))
3842 ret = PTR_ERR(di); 3713 ret = PTR_ERR(di);
3843 3714
3844 if (!di || IS_ERR(di)) 3715 if (IS_ERR_OR_NULL(di))
3845 goto out_err; 3716 goto out_err;
3846 3717
3847 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); 3718 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
@@ -3889,7 +3760,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
3889 3760
3890 leaf = path->nodes[0]; 3761 leaf = path->nodes[0];
3891 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); 3762 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
3892 if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || 3763 if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) ||
3893 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) 3764 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
3894 goto out; 3765 goto out;
3895 3766
@@ -3899,7 +3770,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
3899 if (ret) 3770 if (ret)
3900 goto out; 3771 goto out;
3901 3772
3902 btrfs_release_path(root->fs_info->tree_root, path); 3773 btrfs_release_path(path);
3903 3774
3904 new_root = btrfs_read_fs_root_no_name(root->fs_info, location); 3775 new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
3905 if (IS_ERR(new_root)) { 3776 if (IS_ERR(new_root)) {
@@ -3928,6 +3799,7 @@ static void inode_tree_add(struct inode *inode)
3928 struct btrfs_inode *entry; 3799 struct btrfs_inode *entry;
3929 struct rb_node **p; 3800 struct rb_node **p;
3930 struct rb_node *parent; 3801 struct rb_node *parent;
3802 u64 ino = btrfs_ino(inode);
3931again: 3803again:
3932 p = &root->inode_tree.rb_node; 3804 p = &root->inode_tree.rb_node;
3933 parent = NULL; 3805 parent = NULL;
@@ -3940,9 +3812,9 @@ again:
3940 parent = *p; 3812 parent = *p;
3941 entry = rb_entry(parent, struct btrfs_inode, rb_node); 3813 entry = rb_entry(parent, struct btrfs_inode, rb_node);
3942 3814
3943 if (inode->i_ino < entry->vfs_inode.i_ino) 3815 if (ino < btrfs_ino(&entry->vfs_inode))
3944 p = &parent->rb_left; 3816 p = &parent->rb_left;
3945 else if (inode->i_ino > entry->vfs_inode.i_ino) 3817 else if (ino > btrfs_ino(&entry->vfs_inode))
3946 p = &parent->rb_right; 3818 p = &parent->rb_right;
3947 else { 3819 else {
3948 WARN_ON(!(entry->vfs_inode.i_state & 3820 WARN_ON(!(entry->vfs_inode.i_state &
@@ -4006,9 +3878,9 @@ again:
4006 prev = node; 3878 prev = node;
4007 entry = rb_entry(node, struct btrfs_inode, rb_node); 3879 entry = rb_entry(node, struct btrfs_inode, rb_node);
4008 3880
4009 if (objectid < entry->vfs_inode.i_ino) 3881 if (objectid < btrfs_ino(&entry->vfs_inode))
4010 node = node->rb_left; 3882 node = node->rb_left;
4011 else if (objectid > entry->vfs_inode.i_ino) 3883 else if (objectid > btrfs_ino(&entry->vfs_inode))
4012 node = node->rb_right; 3884 node = node->rb_right;
4013 else 3885 else
4014 break; 3886 break;
@@ -4016,7 +3888,7 @@ again:
4016 if (!node) { 3888 if (!node) {
4017 while (prev) { 3889 while (prev) {
4018 entry = rb_entry(prev, struct btrfs_inode, rb_node); 3890 entry = rb_entry(prev, struct btrfs_inode, rb_node);
4019 if (objectid <= entry->vfs_inode.i_ino) { 3891 if (objectid <= btrfs_ino(&entry->vfs_inode)) {
4020 node = prev; 3892 node = prev;
4021 break; 3893 break;
4022 } 3894 }
@@ -4025,7 +3897,7 @@ again:
4025 } 3897 }
4026 while (node) { 3898 while (node) {
4027 entry = rb_entry(node, struct btrfs_inode, rb_node); 3899 entry = rb_entry(node, struct btrfs_inode, rb_node);
4028 objectid = entry->vfs_inode.i_ino + 1; 3900 objectid = btrfs_ino(&entry->vfs_inode) + 1;
4029 inode = igrab(&entry->vfs_inode); 3901 inode = igrab(&entry->vfs_inode);
4030 if (inode) { 3902 if (inode) {
4031 spin_unlock(&root->inode_lock); 3903 spin_unlock(&root->inode_lock);
@@ -4063,7 +3935,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
4063static int btrfs_find_actor(struct inode *inode, void *opaque) 3935static int btrfs_find_actor(struct inode *inode, void *opaque)
4064{ 3936{
4065 struct btrfs_iget_args *args = opaque; 3937 struct btrfs_iget_args *args = opaque;
4066 return args->ino == inode->i_ino && 3938 return args->ino == btrfs_ino(inode) &&
4067 args->root == BTRFS_I(inode)->root; 3939 args->root == BTRFS_I(inode)->root;
4068} 3940}
4069 3941
@@ -4208,7 +4080,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
4208 return d_splice_alias(inode, dentry); 4080 return d_splice_alias(inode, dentry);
4209} 4081}
4210 4082
4211static unsigned char btrfs_filetype_table[] = { 4083unsigned char btrfs_filetype_table[] = {
4212 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 4084 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
4213}; 4085};
4214 4086
@@ -4222,6 +4094,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4222 struct btrfs_key key; 4094 struct btrfs_key key;
4223 struct btrfs_key found_key; 4095 struct btrfs_key found_key;
4224 struct btrfs_path *path; 4096 struct btrfs_path *path;
4097 struct list_head ins_list;
4098 struct list_head del_list;
4225 int ret; 4099 int ret;
4226 struct extent_buffer *leaf; 4100 struct extent_buffer *leaf;
4227 int slot; 4101 int slot;
@@ -4234,6 +4108,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4234 char tmp_name[32]; 4108 char tmp_name[32];
4235 char *name_ptr; 4109 char *name_ptr;
4236 int name_len; 4110 int name_len;
4111 int is_curr = 0; /* filp->f_pos points to the current index? */
4237 4112
4238 /* FIXME, use a real flag for deciding about the key type */ 4113 /* FIXME, use a real flag for deciding about the key type */
4239 if (root->fs_info->tree_root == root) 4114 if (root->fs_info->tree_root == root)
@@ -4241,9 +4116,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4241 4116
4242 /* special case for "." */ 4117 /* special case for "." */
4243 if (filp->f_pos == 0) { 4118 if (filp->f_pos == 0) {
4244 over = filldir(dirent, ".", 1, 4119 over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR);
4245 1, inode->i_ino,
4246 DT_DIR);
4247 if (over) 4120 if (over)
4248 return 0; 4121 return 0;
4249 filp->f_pos = 1; 4122 filp->f_pos = 1;
@@ -4258,11 +4131,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4258 filp->f_pos = 2; 4131 filp->f_pos = 2;
4259 } 4132 }
4260 path = btrfs_alloc_path(); 4133 path = btrfs_alloc_path();
4134 if (!path)
4135 return -ENOMEM;
4261 path->reada = 2; 4136 path->reada = 2;
4262 4137
4138 if (key_type == BTRFS_DIR_INDEX_KEY) {
4139 INIT_LIST_HEAD(&ins_list);
4140 INIT_LIST_HEAD(&del_list);
4141 btrfs_get_delayed_items(inode, &ins_list, &del_list);
4142 }
4143
4263 btrfs_set_key_type(&key, key_type); 4144 btrfs_set_key_type(&key, key_type);
4264 key.offset = filp->f_pos; 4145 key.offset = filp->f_pos;
4265 key.objectid = inode->i_ino; 4146 key.objectid = btrfs_ino(inode);
4266 4147
4267 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4148 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4268 if (ret < 0) 4149 if (ret < 0)
@@ -4289,8 +4170,13 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4289 break; 4170 break;
4290 if (found_key.offset < filp->f_pos) 4171 if (found_key.offset < filp->f_pos)
4291 goto next; 4172 goto next;
4173 if (key_type == BTRFS_DIR_INDEX_KEY &&
4174 btrfs_should_delete_dir_index(&del_list,
4175 found_key.offset))
4176 goto next;
4292 4177
4293 filp->f_pos = found_key.offset; 4178 filp->f_pos = found_key.offset;
4179 is_curr = 1;
4294 4180
4295 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); 4181 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
4296 di_cur = 0; 4182 di_cur = 0;
@@ -4345,6 +4231,15 @@ next:
4345 path->slots[0]++; 4231 path->slots[0]++;
4346 } 4232 }
4347 4233
4234 if (key_type == BTRFS_DIR_INDEX_KEY) {
4235 if (is_curr)
4236 filp->f_pos++;
4237 ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir,
4238 &ins_list);
4239 if (ret)
4240 goto nopos;
4241 }
4242
4348 /* Reached end of directory/root. Bump pos past the last item. */ 4243 /* Reached end of directory/root. Bump pos past the last item. */
4349 if (key_type == BTRFS_DIR_INDEX_KEY) 4244 if (key_type == BTRFS_DIR_INDEX_KEY)
4350 /* 4245 /*
@@ -4357,6 +4252,8 @@ next:
4357nopos: 4252nopos:
4358 ret = 0; 4253 ret = 0;
4359err: 4254err:
4255 if (key_type == BTRFS_DIR_INDEX_KEY)
4256 btrfs_put_delayed_items(&ins_list, &del_list);
4360 btrfs_free_path(path); 4257 btrfs_free_path(path);
4361 return ret; 4258 return ret;
4362} 4259}
@@ -4372,7 +4269,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4372 return 0; 4269 return 0;
4373 4270
4374 smp_mb(); 4271 smp_mb();
4375 nolock = (root->fs_info->closing && root == root->fs_info->tree_root); 4272 if (root->fs_info->closing && is_free_space_inode(root, inode))
4273 nolock = true;
4376 4274
4377 if (wbc->sync_mode == WB_SYNC_ALL) { 4275 if (wbc->sync_mode == WB_SYNC_ALL) {
4378 if (nolock) 4276 if (nolock)
@@ -4415,25 +4313,25 @@ void btrfs_dirty_inode(struct inode *inode)
4415 btrfs_end_transaction(trans, root); 4313 btrfs_end_transaction(trans, root);
4416 trans = btrfs_start_transaction(root, 1); 4314 trans = btrfs_start_transaction(root, 1);
4417 if (IS_ERR(trans)) { 4315 if (IS_ERR(trans)) {
4418 if (printk_ratelimit()) { 4316 printk_ratelimited(KERN_ERR "btrfs: fail to "
4419 printk(KERN_ERR "btrfs: fail to " 4317 "dirty inode %llu error %ld\n",
4420 "dirty inode %lu error %ld\n", 4318 (unsigned long long)btrfs_ino(inode),
4421 inode->i_ino, PTR_ERR(trans)); 4319 PTR_ERR(trans));
4422 }
4423 return; 4320 return;
4424 } 4321 }
4425 btrfs_set_trans_block_group(trans, inode); 4322 btrfs_set_trans_block_group(trans, inode);
4426 4323
4427 ret = btrfs_update_inode(trans, root, inode); 4324 ret = btrfs_update_inode(trans, root, inode);
4428 if (ret) { 4325 if (ret) {
4429 if (printk_ratelimit()) { 4326 printk_ratelimited(KERN_ERR "btrfs: fail to "
4430 printk(KERN_ERR "btrfs: fail to " 4327 "dirty inode %llu error %d\n",
4431 "dirty inode %lu error %d\n", 4328 (unsigned long long)btrfs_ino(inode),
4432 inode->i_ino, ret); 4329 ret);
4433 }
4434 } 4330 }
4435 } 4331 }
4436 btrfs_end_transaction(trans, root); 4332 btrfs_end_transaction(trans, root);
4333 if (BTRFS_I(inode)->delayed_node)
4334 btrfs_balance_delayed_items(root);
4437} 4335}
4438 4336
4439/* 4337/*
@@ -4449,7 +4347,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
4449 struct extent_buffer *leaf; 4347 struct extent_buffer *leaf;
4450 int ret; 4348 int ret;
4451 4349
4452 key.objectid = inode->i_ino; 4350 key.objectid = btrfs_ino(inode);
4453 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); 4351 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
4454 key.offset = (u64)-1; 4352 key.offset = (u64)-1;
4455 4353
@@ -4481,7 +4379,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
4481 leaf = path->nodes[0]; 4379 leaf = path->nodes[0];
4482 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4380 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4483 4381
4484 if (found_key.objectid != inode->i_ino || 4382 if (found_key.objectid != btrfs_ino(inode) ||
4485 btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) { 4383 btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) {
4486 BTRFS_I(inode)->index_cnt = 2; 4384 BTRFS_I(inode)->index_cnt = 2;
4487 goto out; 4385 goto out;
@@ -4502,9 +4400,12 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
4502 int ret = 0; 4400 int ret = 0;
4503 4401
4504 if (BTRFS_I(dir)->index_cnt == (u64)-1) { 4402 if (BTRFS_I(dir)->index_cnt == (u64)-1) {
4505 ret = btrfs_set_inode_index_count(dir); 4403 ret = btrfs_inode_delayed_dir_index_count(dir);
4506 if (ret) 4404 if (ret) {
4507 return ret; 4405 ret = btrfs_set_inode_index_count(dir);
4406 if (ret)
4407 return ret;
4408 }
4508 } 4409 }
4509 4410
4510 *index = BTRFS_I(dir)->index_cnt; 4411 *index = BTRFS_I(dir)->index_cnt;
@@ -4540,6 +4441,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4540 return ERR_PTR(-ENOMEM); 4441 return ERR_PTR(-ENOMEM);
4541 } 4442 }
4542 4443
4444 /*
4445 * we have to initialize this early, so we can reclaim the inode
4446 * number if we fail afterwards in this function.
4447 */
4448 inode->i_ino = objectid;
4449
4543 if (dir) { 4450 if (dir) {
4544 trace_btrfs_inode_request(dir); 4451 trace_btrfs_inode_request(dir);
4545 4452
@@ -4585,7 +4492,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4585 goto fail; 4492 goto fail;
4586 4493
4587 inode_init_owner(inode, dir, mode); 4494 inode_init_owner(inode, dir, mode);
4588 inode->i_ino = objectid;
4589 inode_set_bytes(inode, 0); 4495 inode_set_bytes(inode, 0);
4590 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 4496 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
4591 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 4497 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -4649,29 +4555,29 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4649 int ret = 0; 4555 int ret = 0;
4650 struct btrfs_key key; 4556 struct btrfs_key key;
4651 struct btrfs_root *root = BTRFS_I(parent_inode)->root; 4557 struct btrfs_root *root = BTRFS_I(parent_inode)->root;
4558 u64 ino = btrfs_ino(inode);
4559 u64 parent_ino = btrfs_ino(parent_inode);
4652 4560
4653 if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 4561 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
4654 memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); 4562 memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
4655 } else { 4563 } else {
4656 key.objectid = inode->i_ino; 4564 key.objectid = ino;
4657 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 4565 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
4658 key.offset = 0; 4566 key.offset = 0;
4659 } 4567 }
4660 4568
4661 if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 4569 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
4662 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 4570 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
4663 key.objectid, root->root_key.objectid, 4571 key.objectid, root->root_key.objectid,
4664 parent_inode->i_ino, 4572 parent_ino, index, name, name_len);
4665 index, name, name_len);
4666 } else if (add_backref) { 4573 } else if (add_backref) {
4667 ret = btrfs_insert_inode_ref(trans, root, 4574 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
4668 name, name_len, inode->i_ino, 4575 parent_ino, index);
4669 parent_inode->i_ino, index);
4670 } 4576 }
4671 4577
4672 if (ret == 0) { 4578 if (ret == 0) {
4673 ret = btrfs_insert_dir_item(trans, root, name, name_len, 4579 ret = btrfs_insert_dir_item(trans, root, name, name_len,
4674 parent_inode->i_ino, &key, 4580 parent_inode, &key,
4675 btrfs_inode_type(inode), index); 4581 btrfs_inode_type(inode), index);
4676 BUG_ON(ret); 4582 BUG_ON(ret);
4677 4583
@@ -4714,10 +4620,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4714 if (!new_valid_dev(rdev)) 4620 if (!new_valid_dev(rdev))
4715 return -EINVAL; 4621 return -EINVAL;
4716 4622
4717 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4718 if (err)
4719 return err;
4720
4721 /* 4623 /*
4722 * 2 for inode item and ref 4624 * 2 for inode item and ref
4723 * 2 for dir items 4625 * 2 for dir items
@@ -4729,8 +4631,12 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4729 4631
4730 btrfs_set_trans_block_group(trans, dir); 4632 btrfs_set_trans_block_group(trans, dir);
4731 4633
4634 err = btrfs_find_free_ino(root, &objectid);
4635 if (err)
4636 goto out_unlock;
4637
4732 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4638 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4733 dentry->d_name.len, dir->i_ino, objectid, 4639 dentry->d_name.len, btrfs_ino(dir), objectid,
4734 BTRFS_I(dir)->block_group, mode, &index); 4640 BTRFS_I(dir)->block_group, mode, &index);
4735 if (IS_ERR(inode)) { 4641 if (IS_ERR(inode)) {
4736 err = PTR_ERR(inode); 4642 err = PTR_ERR(inode);
@@ -4777,9 +4683,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4777 u64 objectid; 4683 u64 objectid;
4778 u64 index = 0; 4684 u64 index = 0;
4779 4685
4780 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4781 if (err)
4782 return err;
4783 /* 4686 /*
4784 * 2 for inode item and ref 4687 * 2 for inode item and ref
4785 * 2 for dir items 4688 * 2 for dir items
@@ -4791,8 +4694,12 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4791 4694
4792 btrfs_set_trans_block_group(trans, dir); 4695 btrfs_set_trans_block_group(trans, dir);
4793 4696
4697 err = btrfs_find_free_ino(root, &objectid);
4698 if (err)
4699 goto out_unlock;
4700
4794 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4701 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4795 dentry->d_name.len, dir->i_ino, objectid, 4702 dentry->d_name.len, btrfs_ino(dir), objectid,
4796 BTRFS_I(dir)->block_group, mode, &index); 4703 BTRFS_I(dir)->block_group, mode, &index);
4797 if (IS_ERR(inode)) { 4704 if (IS_ERR(inode)) {
4798 err = PTR_ERR(inode); 4705 err = PTR_ERR(inode);
@@ -4903,10 +4810,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4903 u64 index = 0; 4810 u64 index = 0;
4904 unsigned long nr = 1; 4811 unsigned long nr = 1;
4905 4812
4906 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4907 if (err)
4908 return err;
4909
4910 /* 4813 /*
4911 * 2 items for inode and ref 4814 * 2 items for inode and ref
4912 * 2 items for dir items 4815 * 2 items for dir items
@@ -4917,8 +4820,12 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4917 return PTR_ERR(trans); 4820 return PTR_ERR(trans);
4918 btrfs_set_trans_block_group(trans, dir); 4821 btrfs_set_trans_block_group(trans, dir);
4919 4822
4823 err = btrfs_find_free_ino(root, &objectid);
4824 if (err)
4825 goto out_fail;
4826
4920 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4827 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4921 dentry->d_name.len, dir->i_ino, objectid, 4828 dentry->d_name.len, btrfs_ino(dir), objectid,
4922 BTRFS_I(dir)->block_group, S_IFDIR | mode, 4829 BTRFS_I(dir)->block_group, S_IFDIR | mode,
4923 &index); 4830 &index);
4924 if (IS_ERR(inode)) { 4831 if (IS_ERR(inode)) {
@@ -5041,7 +4948,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
5041 u64 bytenr; 4948 u64 bytenr;
5042 u64 extent_start = 0; 4949 u64 extent_start = 0;
5043 u64 extent_end = 0; 4950 u64 extent_end = 0;
5044 u64 objectid = inode->i_ino; 4951 u64 objectid = btrfs_ino(inode);
5045 u32 found_type; 4952 u32 found_type;
5046 struct btrfs_path *path = NULL; 4953 struct btrfs_path *path = NULL;
5047 struct btrfs_root *root = BTRFS_I(inode)->root; 4954 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5069,7 +4976,7 @@ again:
5069 else 4976 else
5070 goto out; 4977 goto out;
5071 } 4978 }
5072 em = alloc_extent_map(GFP_NOFS); 4979 em = alloc_extent_map();
5073 if (!em) { 4980 if (!em) {
5074 err = -ENOMEM; 4981 err = -ENOMEM;
5075 goto out; 4982 goto out;
@@ -5223,7 +5130,7 @@ again:
5223 kunmap(page); 5130 kunmap(page);
5224 free_extent_map(em); 5131 free_extent_map(em);
5225 em = NULL; 5132 em = NULL;
5226 btrfs_release_path(root, path); 5133 btrfs_release_path(path);
5227 trans = btrfs_join_transaction(root, 1); 5134 trans = btrfs_join_transaction(root, 1);
5228 if (IS_ERR(trans)) 5135 if (IS_ERR(trans))
5229 return ERR_CAST(trans); 5136 return ERR_CAST(trans);
@@ -5249,7 +5156,7 @@ not_found_em:
5249 em->block_start = EXTENT_MAP_HOLE; 5156 em->block_start = EXTENT_MAP_HOLE;
5250 set_bit(EXTENT_FLAG_VACANCY, &em->flags); 5157 set_bit(EXTENT_FLAG_VACANCY, &em->flags);
5251insert: 5158insert:
5252 btrfs_release_path(root, path); 5159 btrfs_release_path(path);
5253 if (em->start > start || extent_map_end(em) <= start) { 5160 if (em->start > start || extent_map_end(em) <= start) {
5254 printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed " 5161 printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed "
5255 "[%llu %llu]\n", (unsigned long long)em->start, 5162 "[%llu %llu]\n", (unsigned long long)em->start,
@@ -5382,7 +5289,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
5382 u64 hole_start = start; 5289 u64 hole_start = start;
5383 u64 hole_len = len; 5290 u64 hole_len = len;
5384 5291
5385 em = alloc_extent_map(GFP_NOFS); 5292 em = alloc_extent_map();
5386 if (!em) { 5293 if (!em) {
5387 err = -ENOMEM; 5294 err = -ENOMEM;
5388 goto out; 5295 goto out;
@@ -5472,6 +5379,9 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5472 if (IS_ERR(trans)) 5379 if (IS_ERR(trans))
5473 return ERR_CAST(trans); 5380 return ERR_CAST(trans);
5474 5381
5382 if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024)
5383 btrfs_add_inode_defrag(trans, inode);
5384
5475 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 5385 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5476 5386
5477 alloc_hint = get_extent_allocation_hint(inode, start, len); 5387 alloc_hint = get_extent_allocation_hint(inode, start, len);
@@ -5483,7 +5393,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5483 } 5393 }
5484 5394
5485 if (!em) { 5395 if (!em) {
5486 em = alloc_extent_map(GFP_NOFS); 5396 em = alloc_extent_map();
5487 if (!em) { 5397 if (!em) {
5488 em = ERR_PTR(-ENOMEM); 5398 em = ERR_PTR(-ENOMEM);
5489 goto out; 5399 goto out;
@@ -5549,7 +5459,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5549 if (!path) 5459 if (!path)
5550 return -ENOMEM; 5460 return -ENOMEM;
5551 5461
5552 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 5462 ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
5553 offset, 0); 5463 offset, 0);
5554 if (ret < 0) 5464 if (ret < 0)
5555 goto out; 5465 goto out;
@@ -5566,7 +5476,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5566 ret = 0; 5476 ret = 0;
5567 leaf = path->nodes[0]; 5477 leaf = path->nodes[0];
5568 btrfs_item_key_to_cpu(leaf, &key, slot); 5478 btrfs_item_key_to_cpu(leaf, &key, slot);
5569 if (key.objectid != inode->i_ino || 5479 if (key.objectid != btrfs_ino(inode) ||
5570 key.type != BTRFS_EXTENT_DATA_KEY) { 5480 key.type != BTRFS_EXTENT_DATA_KEY) {
5571 /* not our file or wrong item type, must cow */ 5481 /* not our file or wrong item type, must cow */
5572 goto out; 5482 goto out;
@@ -5600,7 +5510,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5600 * look for other files referencing this extent, if we 5510 * look for other files referencing this extent, if we
5601 * find any we must cow 5511 * find any we must cow
5602 */ 5512 */
5603 if (btrfs_cross_ref_exist(trans, root, inode->i_ino, 5513 if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
5604 key.offset - backref_offset, disk_bytenr)) 5514 key.offset - backref_offset, disk_bytenr))
5605 goto out; 5515 goto out;
5606 5516
@@ -5790,9 +5700,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5790 5700
5791 flush_dcache_page(bvec->bv_page); 5701 flush_dcache_page(bvec->bv_page);
5792 if (csum != *private) { 5702 if (csum != *private) {
5793 printk(KERN_ERR "btrfs csum failed ino %lu off" 5703 printk(KERN_ERR "btrfs csum failed ino %llu off"
5794 " %llu csum %u private %u\n", 5704 " %llu csum %u private %u\n",
5795 inode->i_ino, (unsigned long long)start, 5705 (unsigned long long)btrfs_ino(inode),
5706 (unsigned long long)start,
5796 csum, *private); 5707 csum, *private);
5797 err = -EIO; 5708 err = -EIO;
5798 } 5709 }
@@ -5939,9 +5850,9 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
5939 struct btrfs_dio_private *dip = bio->bi_private; 5850 struct btrfs_dio_private *dip = bio->bi_private;
5940 5851
5941 if (err) { 5852 if (err) {
5942 printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " 5853 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
5943 "sector %#Lx len %u err no %d\n", 5854 "sector %#Lx len %u err no %d\n",
5944 dip->inode->i_ino, bio->bi_rw, 5855 (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw,
5945 (unsigned long long)bio->bi_sector, bio->bi_size, err); 5856 (unsigned long long)bio->bi_sector, bio->bi_size, err);
5946 dip->errors = 1; 5857 dip->errors = 1;
5947 5858
@@ -6782,12 +6693,15 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6782 ei->ordered_data_close = 0; 6693 ei->ordered_data_close = 0;
6783 ei->orphan_meta_reserved = 0; 6694 ei->orphan_meta_reserved = 0;
6784 ei->dummy_inode = 0; 6695 ei->dummy_inode = 0;
6696 ei->in_defrag = 0;
6785 ei->force_compress = BTRFS_COMPRESS_NONE; 6697 ei->force_compress = BTRFS_COMPRESS_NONE;
6786 6698
6699 ei->delayed_node = NULL;
6700
6787 inode = &ei->vfs_inode; 6701 inode = &ei->vfs_inode;
6788 extent_map_tree_init(&ei->extent_tree, GFP_NOFS); 6702 extent_map_tree_init(&ei->extent_tree);
6789 extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS); 6703 extent_io_tree_init(&ei->io_tree, &inode->i_data);
6790 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS); 6704 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
6791 mutex_init(&ei->log_mutex); 6705 mutex_init(&ei->log_mutex);
6792 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 6706 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
6793 INIT_LIST_HEAD(&ei->i_orphan); 6707 INIT_LIST_HEAD(&ei->i_orphan);
@@ -6851,8 +6765,8 @@ void btrfs_destroy_inode(struct inode *inode)
6851 6765
6852 spin_lock(&root->orphan_lock); 6766 spin_lock(&root->orphan_lock);
6853 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6767 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
6854 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", 6768 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
6855 inode->i_ino); 6769 (unsigned long long)btrfs_ino(inode));
6856 list_del_init(&BTRFS_I(inode)->i_orphan); 6770 list_del_init(&BTRFS_I(inode)->i_orphan);
6857 } 6771 }
6858 spin_unlock(&root->orphan_lock); 6772 spin_unlock(&root->orphan_lock);
@@ -6874,6 +6788,7 @@ void btrfs_destroy_inode(struct inode *inode)
6874 inode_tree_del(inode); 6788 inode_tree_del(inode);
6875 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 6789 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
6876free: 6790free:
6791 btrfs_remove_delayed_node(inode);
6877 call_rcu(&inode->i_rcu, btrfs_i_callback); 6792 call_rcu(&inode->i_rcu, btrfs_i_callback);
6878} 6793}
6879 6794
@@ -6882,7 +6797,7 @@ int btrfs_drop_inode(struct inode *inode)
6882 struct btrfs_root *root = BTRFS_I(inode)->root; 6797 struct btrfs_root *root = BTRFS_I(inode)->root;
6883 6798
6884 if (btrfs_root_refs(&root->root_item) == 0 && 6799 if (btrfs_root_refs(&root->root_item) == 0 &&
6885 root != root->fs_info->tree_root) 6800 !is_free_space_inode(root, inode))
6886 return 1; 6801 return 1;
6887 else 6802 else
6888 return generic_drop_inode(inode); 6803 return generic_drop_inode(inode);
@@ -6991,16 +6906,17 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6991 u64 index = 0; 6906 u64 index = 0;
6992 u64 root_objectid; 6907 u64 root_objectid;
6993 int ret; 6908 int ret;
6909 u64 old_ino = btrfs_ino(old_inode);
6994 6910
6995 if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) 6911 if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
6996 return -EPERM; 6912 return -EPERM;
6997 6913
6998 /* we only allow rename subvolume link between subvolumes */ 6914 /* we only allow rename subvolume link between subvolumes */
6999 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) 6915 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
7000 return -EXDEV; 6916 return -EXDEV;
7001 6917
7002 if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || 6918 if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
7003 (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) 6919 (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID))
7004 return -ENOTEMPTY; 6920 return -ENOTEMPTY;
7005 6921
7006 if (S_ISDIR(old_inode->i_mode) && new_inode && 6922 if (S_ISDIR(old_inode->i_mode) && new_inode &&
@@ -7016,7 +6932,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7016 filemap_flush(old_inode->i_mapping); 6932 filemap_flush(old_inode->i_mapping);
7017 6933
7018 /* close the racy window with snapshot create/destroy ioctl */ 6934 /* close the racy window with snapshot create/destroy ioctl */
7019 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 6935 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
7020 down_read(&root->fs_info->subvol_sem); 6936 down_read(&root->fs_info->subvol_sem);
7021 /* 6937 /*
7022 * We want to reserve the absolute worst case amount of items. So if 6938 * We want to reserve the absolute worst case amount of items. So if
@@ -7041,15 +6957,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7041 if (ret) 6957 if (ret)
7042 goto out_fail; 6958 goto out_fail;
7043 6959
7044 if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 6960 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
7045 /* force full log commit if subvolume involved. */ 6961 /* force full log commit if subvolume involved. */
7046 root->fs_info->last_trans_log_full_commit = trans->transid; 6962 root->fs_info->last_trans_log_full_commit = trans->transid;
7047 } else { 6963 } else {
7048 ret = btrfs_insert_inode_ref(trans, dest, 6964 ret = btrfs_insert_inode_ref(trans, dest,
7049 new_dentry->d_name.name, 6965 new_dentry->d_name.name,
7050 new_dentry->d_name.len, 6966 new_dentry->d_name.len,
7051 old_inode->i_ino, 6967 old_ino,
7052 new_dir->i_ino, index); 6968 btrfs_ino(new_dir), index);
7053 if (ret) 6969 if (ret)
7054 goto out_fail; 6970 goto out_fail;
7055 /* 6971 /*
@@ -7065,10 +6981,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7065 * make sure the inode gets flushed if it is replacing 6981 * make sure the inode gets flushed if it is replacing
7066 * something. 6982 * something.
7067 */ 6983 */
7068 if (new_inode && new_inode->i_size && 6984 if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
7069 old_inode && S_ISREG(old_inode->i_mode)) {
7070 btrfs_add_ordered_operation(trans, root, old_inode); 6985 btrfs_add_ordered_operation(trans, root, old_inode);
7071 }
7072 6986
7073 old_dir->i_ctime = old_dir->i_mtime = ctime; 6987 old_dir->i_ctime = old_dir->i_mtime = ctime;
7074 new_dir->i_ctime = new_dir->i_mtime = ctime; 6988 new_dir->i_ctime = new_dir->i_mtime = ctime;
@@ -7077,7 +6991,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7077 if (old_dentry->d_parent != new_dentry->d_parent) 6991 if (old_dentry->d_parent != new_dentry->d_parent)
7078 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); 6992 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
7079 6993
7080 if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 6994 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
7081 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; 6995 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
7082 ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, 6996 ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
7083 old_dentry->d_name.name, 6997 old_dentry->d_name.name,
@@ -7094,7 +7008,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7094 7008
7095 if (new_inode) { 7009 if (new_inode) {
7096 new_inode->i_ctime = CURRENT_TIME; 7010 new_inode->i_ctime = CURRENT_TIME;
7097 if (unlikely(new_inode->i_ino == 7011 if (unlikely(btrfs_ino(new_inode) ==
7098 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 7012 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
7099 root_objectid = BTRFS_I(new_inode)->location.objectid; 7013 root_objectid = BTRFS_I(new_inode)->location.objectid;
7100 ret = btrfs_unlink_subvol(trans, dest, new_dir, 7014 ret = btrfs_unlink_subvol(trans, dest, new_dir,
@@ -7122,7 +7036,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7122 new_dentry->d_name.len, 0, index); 7036 new_dentry->d_name.len, 0, index);
7123 BUG_ON(ret); 7037 BUG_ON(ret);
7124 7038
7125 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 7039 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
7126 struct dentry *parent = dget_parent(new_dentry); 7040 struct dentry *parent = dget_parent(new_dentry);
7127 btrfs_log_new_name(trans, old_inode, old_dir, parent); 7041 btrfs_log_new_name(trans, old_inode, old_dir, parent);
7128 dput(parent); 7042 dput(parent);
@@ -7131,7 +7045,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7131out_fail: 7045out_fail:
7132 btrfs_end_transaction_throttle(trans, root); 7046 btrfs_end_transaction_throttle(trans, root);
7133out_notrans: 7047out_notrans:
7134 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 7048 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
7135 up_read(&root->fs_info->subvol_sem); 7049 up_read(&root->fs_info->subvol_sem);
7136 7050
7137 return ret; 7051 return ret;
@@ -7185,58 +7099,6 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7185 return 0; 7099 return 0;
7186} 7100}
7187 7101
7188int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
7189 int sync)
7190{
7191 struct btrfs_inode *binode;
7192 struct inode *inode = NULL;
7193
7194 spin_lock(&root->fs_info->delalloc_lock);
7195 while (!list_empty(&root->fs_info->delalloc_inodes)) {
7196 binode = list_entry(root->fs_info->delalloc_inodes.next,
7197 struct btrfs_inode, delalloc_inodes);
7198 inode = igrab(&binode->vfs_inode);
7199 if (inode) {
7200 list_move_tail(&binode->delalloc_inodes,
7201 &root->fs_info->delalloc_inodes);
7202 break;
7203 }
7204
7205 list_del_init(&binode->delalloc_inodes);
7206 cond_resched_lock(&root->fs_info->delalloc_lock);
7207 }
7208 spin_unlock(&root->fs_info->delalloc_lock);
7209
7210 if (inode) {
7211 if (sync) {
7212 filemap_write_and_wait(inode->i_mapping);
7213 /*
7214 * We have to do this because compression doesn't
7215 * actually set PG_writeback until it submits the pages
7216 * for IO, which happens in an async thread, so we could
7217 * race and not actually wait for any writeback pages
7218 * because they've not been submitted yet. Technically
7219 * this could still be the case for the ordered stuff
7220 * since the async thread may not have started to do its
7221 * work yet. If this becomes the case then we need to
7222 * figure out a way to make sure that in writepage we
7223 * wait for any async pages to be submitted before
7224 * returning so that fdatawait does what its supposed to
7225 * do.
7226 */
7227 btrfs_wait_ordered_range(inode, 0, (u64)-1);
7228 } else {
7229 filemap_flush(inode->i_mapping);
7230 }
7231 if (delay_iput)
7232 btrfs_add_delayed_iput(inode);
7233 else
7234 iput(inode);
7235 return 1;
7236 }
7237 return 0;
7238}
7239
7240static int btrfs_symlink(struct inode *dir, struct dentry *dentry, 7102static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7241 const char *symname) 7103 const char *symname)
7242{ 7104{
@@ -7260,9 +7122,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7260 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) 7122 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
7261 return -ENAMETOOLONG; 7123 return -ENAMETOOLONG;
7262 7124
7263 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
7264 if (err)
7265 return err;
7266 /* 7125 /*
7267 * 2 items for inode item and ref 7126 * 2 items for inode item and ref
7268 * 2 items for dir items 7127 * 2 items for dir items
@@ -7274,8 +7133,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7274 7133
7275 btrfs_set_trans_block_group(trans, dir); 7134 btrfs_set_trans_block_group(trans, dir);
7276 7135
7136 err = btrfs_find_free_ino(root, &objectid);
7137 if (err)
7138 goto out_unlock;
7139
7277 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 7140 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
7278 dentry->d_name.len, dir->i_ino, objectid, 7141 dentry->d_name.len, btrfs_ino(dir), objectid,
7279 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, 7142 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
7280 &index); 7143 &index);
7281 if (IS_ERR(inode)) { 7144 if (IS_ERR(inode)) {
@@ -7307,7 +7170,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7307 7170
7308 path = btrfs_alloc_path(); 7171 path = btrfs_alloc_path();
7309 BUG_ON(!path); 7172 BUG_ON(!path);
7310 key.objectid = inode->i_ino; 7173 key.objectid = btrfs_ino(inode);
7311 key.offset = 0; 7174 key.offset = 0;
7312 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 7175 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
7313 datasize = btrfs_file_extent_calc_inline_size(name_len); 7176 datasize = btrfs_file_extent_calc_inline_size(name_len);
@@ -7315,6 +7178,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7315 datasize); 7178 datasize);
7316 if (err) { 7179 if (err) {
7317 drop_inode = 1; 7180 drop_inode = 1;
7181 btrfs_free_path(path);
7318 goto out_unlock; 7182 goto out_unlock;
7319 } 7183 }
7320 leaf = path->nodes[0]; 7184 leaf = path->nodes[0];
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2616f7ed4799..85e818ce00c5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -50,6 +50,7 @@
50#include "print-tree.h" 50#include "print-tree.h"
51#include "volumes.h" 51#include "volumes.h"
52#include "locking.h" 52#include "locking.h"
53#include "inode-map.h"
53 54
54/* Mask out flags that are inappropriate for the given type of inode. */ 55/* Mask out flags that are inappropriate for the given type of inode. */
55static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 56static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -281,8 +282,9 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
281 if (!capable(CAP_SYS_ADMIN)) 282 if (!capable(CAP_SYS_ADMIN))
282 return -EPERM; 283 return -EPERM;
283 284
284 mutex_lock(&fs_info->fs_devices->device_list_mutex); 285 rcu_read_lock();
285 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { 286 list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
287 dev_list) {
286 if (!device->bdev) 288 if (!device->bdev)
287 continue; 289 continue;
288 q = bdev_get_queue(device->bdev); 290 q = bdev_get_queue(device->bdev);
@@ -292,7 +294,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
292 minlen); 294 minlen);
293 } 295 }
294 } 296 }
295 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 297 rcu_read_unlock();
296 if (!num_devices) 298 if (!num_devices)
297 return -EOPNOTSUPP; 299 return -EOPNOTSUPP;
298 300
@@ -329,8 +331,7 @@ static noinline int create_subvol(struct btrfs_root *root,
329 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 331 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
330 u64 index = 0; 332 u64 index = 0;
331 333
332 ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, 334 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
333 0, &objectid);
334 if (ret) { 335 if (ret) {
335 dput(parent); 336 dput(parent);
336 return ret; 337 return ret;
@@ -422,7 +423,7 @@ static noinline int create_subvol(struct btrfs_root *root,
422 BUG_ON(ret); 423 BUG_ON(ret);
423 424
424 ret = btrfs_insert_dir_item(trans, root, 425 ret = btrfs_insert_dir_item(trans, root,
425 name, namelen, dir->i_ino, &key, 426 name, namelen, dir, &key,
426 BTRFS_FT_DIR, index); 427 BTRFS_FT_DIR, index);
427 if (ret) 428 if (ret)
428 goto fail; 429 goto fail;
@@ -433,7 +434,7 @@ static noinline int create_subvol(struct btrfs_root *root,
433 434
434 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 435 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
435 objectid, root->root_key.objectid, 436 objectid, root->root_key.objectid,
436 dir->i_ino, index, name, namelen); 437 btrfs_ino(dir), index, name, namelen);
437 438
438 BUG_ON(ret); 439 BUG_ON(ret);
439 440
@@ -655,6 +656,106 @@ out_unlock:
655 return error; 656 return error;
656} 657}
657 658
659/*
660 * When we're defragging a range, we don't want to kick it off again
661 * if it is really just waiting for delalloc to send it down.
662 * If we find a nice big extent or delalloc range for the bytes in the
663 * file you want to defrag, we return 0 to let you know to skip this
664 * part of the file
665 */
666static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh)
667{
668 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
669 struct extent_map *em = NULL;
670 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
671 u64 end;
672
673 read_lock(&em_tree->lock);
674 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
675 read_unlock(&em_tree->lock);
676
677 if (em) {
678 end = extent_map_end(em);
679 free_extent_map(em);
680 if (end - offset > thresh)
681 return 0;
682 }
683 /* if we already have a nice delalloc here, just stop */
684 thresh /= 2;
685 end = count_range_bits(io_tree, &offset, offset + thresh,
686 thresh, EXTENT_DELALLOC, 1);
687 if (end >= thresh)
688 return 0;
689 return 1;
690}
691
692/*
693 * helper function to walk through a file and find extents
694 * newer than a specific transid, and smaller than thresh.
695 *
696 * This is used by the defragging code to find new and small
697 * extents
698 */
699static int find_new_extents(struct btrfs_root *root,
700 struct inode *inode, u64 newer_than,
701 u64 *off, int thresh)
702{
703 struct btrfs_path *path;
704 struct btrfs_key min_key;
705 struct btrfs_key max_key;
706 struct extent_buffer *leaf;
707 struct btrfs_file_extent_item *extent;
708 int type;
709 int ret;
710
711 path = btrfs_alloc_path();
712 if (!path)
713 return -ENOMEM;
714
715 min_key.objectid = inode->i_ino;
716 min_key.type = BTRFS_EXTENT_DATA_KEY;
717 min_key.offset = *off;
718
719 max_key.objectid = inode->i_ino;
720 max_key.type = (u8)-1;
721 max_key.offset = (u64)-1;
722
723 path->keep_locks = 1;
724
725 while(1) {
726 ret = btrfs_search_forward(root, &min_key, &max_key,
727 path, 0, newer_than);
728 if (ret != 0)
729 goto none;
730 if (min_key.objectid != inode->i_ino)
731 goto none;
732 if (min_key.type != BTRFS_EXTENT_DATA_KEY)
733 goto none;
734
735 leaf = path->nodes[0];
736 extent = btrfs_item_ptr(leaf, path->slots[0],
737 struct btrfs_file_extent_item);
738
739 type = btrfs_file_extent_type(leaf, extent);
740 if (type == BTRFS_FILE_EXTENT_REG &&
741 btrfs_file_extent_num_bytes(leaf, extent) < thresh &&
742 check_defrag_in_cache(inode, min_key.offset, thresh)) {
743 *off = min_key.offset;
744 btrfs_free_path(path);
745 return 0;
746 }
747
748 if (min_key.offset == (u64)-1)
749 goto none;
750
751 min_key.offset++;
752 btrfs_release_path(path);
753 }
754none:
755 btrfs_free_path(path);
756 return -ENOENT;
757}
758
658static int should_defrag_range(struct inode *inode, u64 start, u64 len, 759static int should_defrag_range(struct inode *inode, u64 start, u64 len,
659 int thresh, u64 *last_len, u64 *skip, 760 int thresh, u64 *last_len, u64 *skip,
660 u64 *defrag_end) 761 u64 *defrag_end)
@@ -664,10 +765,6 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
664 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 765 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
665 int ret = 1; 766 int ret = 1;
666 767
667
668 if (thresh == 0)
669 thresh = 256 * 1024;
670
671 /* 768 /*
672 * make sure that once we start defragging and extent, we keep on 769 * make sure that once we start defragging and extent, we keep on
673 * defragging it 770 * defragging it
@@ -726,27 +823,176 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
726 return ret; 823 return ret;
727} 824}
728 825
729static int btrfs_defrag_file(struct file *file, 826/*
730 struct btrfs_ioctl_defrag_range_args *range) 827 * it doesn't do much good to defrag one or two pages
828 * at a time. This pulls in a nice chunk of pages
829 * to COW and defrag.
830 *
831 * It also makes sure the delalloc code has enough
832 * dirty data to avoid making new small extents as part
833 * of the defrag
834 *
835 * It's a good idea to start RA on this range
836 * before calling this.
837 */
838static int cluster_pages_for_defrag(struct inode *inode,
839 struct page **pages,
840 unsigned long start_index,
841 int num_pages)
731{ 842{
732 struct inode *inode = fdentry(file)->d_inode; 843 unsigned long file_end;
733 struct btrfs_root *root = BTRFS_I(inode)->root; 844 u64 isize = i_size_read(inode);
734 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 845 u64 page_start;
846 u64 page_end;
847 int ret;
848 int i;
849 int i_done;
735 struct btrfs_ordered_extent *ordered; 850 struct btrfs_ordered_extent *ordered;
736 struct page *page; 851 struct extent_state *cached_state = NULL;
852
853 if (isize == 0)
854 return 0;
855 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
856
857 ret = btrfs_delalloc_reserve_space(inode,
858 num_pages << PAGE_CACHE_SHIFT);
859 if (ret)
860 return ret;
861again:
862 ret = 0;
863 i_done = 0;
864
865 /* step one, lock all the pages */
866 for (i = 0; i < num_pages; i++) {
867 struct page *page;
868 page = grab_cache_page(inode->i_mapping,
869 start_index + i);
870 if (!page)
871 break;
872
873 if (!PageUptodate(page)) {
874 btrfs_readpage(NULL, page);
875 lock_page(page);
876 if (!PageUptodate(page)) {
877 unlock_page(page);
878 page_cache_release(page);
879 ret = -EIO;
880 break;
881 }
882 }
883 isize = i_size_read(inode);
884 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
885 if (!isize || page->index > file_end ||
886 page->mapping != inode->i_mapping) {
887 /* whoops, we blew past eof, skip this page */
888 unlock_page(page);
889 page_cache_release(page);
890 break;
891 }
892 pages[i] = page;
893 i_done++;
894 }
895 if (!i_done || ret)
896 goto out;
897
898 if (!(inode->i_sb->s_flags & MS_ACTIVE))
899 goto out;
900
901 /*
902 * so now we have a nice long stream of locked
903 * and up to date pages, lets wait on them
904 */
905 for (i = 0; i < i_done; i++)
906 wait_on_page_writeback(pages[i]);
907
908 page_start = page_offset(pages[0]);
909 page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
910
911 lock_extent_bits(&BTRFS_I(inode)->io_tree,
912 page_start, page_end - 1, 0, &cached_state,
913 GFP_NOFS);
914 ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1);
915 if (ordered &&
916 ordered->file_offset + ordered->len > page_start &&
917 ordered->file_offset < page_end) {
918 btrfs_put_ordered_extent(ordered);
919 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
920 page_start, page_end - 1,
921 &cached_state, GFP_NOFS);
922 for (i = 0; i < i_done; i++) {
923 unlock_page(pages[i]);
924 page_cache_release(pages[i]);
925 }
926 btrfs_wait_ordered_range(inode, page_start,
927 page_end - page_start);
928 goto again;
929 }
930 if (ordered)
931 btrfs_put_ordered_extent(ordered);
932
933 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
934 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
935 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
936 GFP_NOFS);
937
938 if (i_done != num_pages) {
939 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
940 btrfs_delalloc_release_space(inode,
941 (num_pages - i_done) << PAGE_CACHE_SHIFT);
942 }
943
944
945 btrfs_set_extent_delalloc(inode, page_start, page_end - 1,
946 &cached_state);
947
948 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
949 page_start, page_end - 1, &cached_state,
950 GFP_NOFS);
951
952 for (i = 0; i < i_done; i++) {
953 clear_page_dirty_for_io(pages[i]);
954 ClearPageChecked(pages[i]);
955 set_page_extent_mapped(pages[i]);
956 set_page_dirty(pages[i]);
957 unlock_page(pages[i]);
958 page_cache_release(pages[i]);
959 }
960 return i_done;
961out:
962 for (i = 0; i < i_done; i++) {
963 unlock_page(pages[i]);
964 page_cache_release(pages[i]);
965 }
966 btrfs_delalloc_release_space(inode, num_pages << PAGE_CACHE_SHIFT);
967 return ret;
968
969}
970
971int btrfs_defrag_file(struct inode *inode, struct file *file,
972 struct btrfs_ioctl_defrag_range_args *range,
973 u64 newer_than, unsigned long max_to_defrag)
974{
975 struct btrfs_root *root = BTRFS_I(inode)->root;
737 struct btrfs_super_block *disk_super; 976 struct btrfs_super_block *disk_super;
977 struct file_ra_state *ra = NULL;
738 unsigned long last_index; 978 unsigned long last_index;
739 unsigned long ra_pages = root->fs_info->bdi.ra_pages;
740 unsigned long total_read = 0;
741 u64 features; 979 u64 features;
742 u64 page_start;
743 u64 page_end;
744 u64 last_len = 0; 980 u64 last_len = 0;
745 u64 skip = 0; 981 u64 skip = 0;
746 u64 defrag_end = 0; 982 u64 defrag_end = 0;
983 u64 newer_off = range->start;
984 int newer_left = 0;
747 unsigned long i; 985 unsigned long i;
748 int ret; 986 int ret;
987 int defrag_count = 0;
749 int compress_type = BTRFS_COMPRESS_ZLIB; 988 int compress_type = BTRFS_COMPRESS_ZLIB;
989 int extent_thresh = range->extent_thresh;
990 int newer_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
991 u64 new_align = ~((u64)128 * 1024 - 1);
992 struct page **pages = NULL;
993
994 if (extent_thresh == 0)
995 extent_thresh = 256 * 1024;
750 996
751 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 997 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
752 if (range->compress_type > BTRFS_COMPRESS_TYPES) 998 if (range->compress_type > BTRFS_COMPRESS_TYPES)
@@ -758,6 +1004,27 @@ static int btrfs_defrag_file(struct file *file,
758 if (inode->i_size == 0) 1004 if (inode->i_size == 0)
759 return 0; 1005 return 0;
760 1006
1007 /*
1008 * if we were not given a file, allocate a readahead
1009 * context
1010 */
1011 if (!file) {
1012 ra = kzalloc(sizeof(*ra), GFP_NOFS);
1013 if (!ra)
1014 return -ENOMEM;
1015 file_ra_state_init(ra, inode->i_mapping);
1016 } else {
1017 ra = &file->f_ra;
1018 }
1019
1020 pages = kmalloc(sizeof(struct page *) * newer_cluster,
1021 GFP_NOFS);
1022 if (!pages) {
1023 ret = -ENOMEM;
1024 goto out_ra;
1025 }
1026
1027 /* find the last page to defrag */
761 if (range->start + range->len > range->start) { 1028 if (range->start + range->len > range->start) {
762 last_index = min_t(u64, inode->i_size - 1, 1029 last_index = min_t(u64, inode->i_size - 1,
763 range->start + range->len - 1) >> PAGE_CACHE_SHIFT; 1030 range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
@@ -765,11 +1032,37 @@ static int btrfs_defrag_file(struct file *file,
765 last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; 1032 last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
766 } 1033 }
767 1034
768 i = range->start >> PAGE_CACHE_SHIFT; 1035 if (newer_than) {
769 while (i <= last_index) { 1036 ret = find_new_extents(root, inode, newer_than,
770 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1037 &newer_off, 64 * 1024);
1038 if (!ret) {
1039 range->start = newer_off;
1040 /*
1041 * we always align our defrag to help keep
1042 * the extents in the file evenly spaced
1043 */
1044 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
1045 newer_left = newer_cluster;
1046 } else
1047 goto out_ra;
1048 } else {
1049 i = range->start >> PAGE_CACHE_SHIFT;
1050 }
1051 if (!max_to_defrag)
1052 max_to_defrag = last_index - 1;
1053
1054 while (i <= last_index && defrag_count < max_to_defrag) {
1055 /*
1056 * make sure we stop running if someone unmounts
1057 * the FS
1058 */
1059 if (!(inode->i_sb->s_flags & MS_ACTIVE))
1060 break;
1061
1062 if (!newer_than &&
1063 !should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
771 PAGE_CACHE_SIZE, 1064 PAGE_CACHE_SIZE,
772 range->extent_thresh, 1065 extent_thresh,
773 &last_len, &skip, 1066 &last_len, &skip,
774 &defrag_end)) { 1067 &defrag_end)) {
775 unsigned long next; 1068 unsigned long next;
@@ -781,92 +1074,39 @@ static int btrfs_defrag_file(struct file *file,
781 i = max(i + 1, next); 1074 i = max(i + 1, next);
782 continue; 1075 continue;
783 } 1076 }
784
785 if (total_read % ra_pages == 0) {
786 btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
787 min(last_index, i + ra_pages - 1));
788 }
789 total_read++;
790 mutex_lock(&inode->i_mutex);
791 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 1077 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
792 BTRFS_I(inode)->force_compress = compress_type; 1078 BTRFS_I(inode)->force_compress = compress_type;
793 1079
794 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 1080 btrfs_force_ra(inode->i_mapping, ra, file, i, newer_cluster);
795 if (ret)
796 goto err_unlock;
797again:
798 if (inode->i_size == 0 ||
799 i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
800 ret = 0;
801 goto err_reservations;
802 }
803 1081
804 page = grab_cache_page(inode->i_mapping, i); 1082 ret = cluster_pages_for_defrag(inode, pages, i, newer_cluster);
805 if (!page) { 1083 if (ret < 0)
806 ret = -ENOMEM; 1084 goto out_ra;
807 goto err_reservations;
808 }
809
810 if (!PageUptodate(page)) {
811 btrfs_readpage(NULL, page);
812 lock_page(page);
813 if (!PageUptodate(page)) {
814 unlock_page(page);
815 page_cache_release(page);
816 ret = -EIO;
817 goto err_reservations;
818 }
819 }
820
821 if (page->mapping != inode->i_mapping) {
822 unlock_page(page);
823 page_cache_release(page);
824 goto again;
825 }
826 1085
827 wait_on_page_writeback(page); 1086 defrag_count += ret;
1087 balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret);
1088 i += ret;
828 1089
829 if (PageDirty(page)) { 1090 if (newer_than) {
830 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 1091 if (newer_off == (u64)-1)
831 goto loop_unlock; 1092 break;
832 }
833
834 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
835 page_end = page_start + PAGE_CACHE_SIZE - 1;
836 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
837 1093
838 ordered = btrfs_lookup_ordered_extent(inode, page_start); 1094 newer_off = max(newer_off + 1,
839 if (ordered) { 1095 (u64)i << PAGE_CACHE_SHIFT);
840 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 1096
841 unlock_page(page); 1097 ret = find_new_extents(root, inode,
842 page_cache_release(page); 1098 newer_than, &newer_off,
843 btrfs_start_ordered_extent(inode, ordered, 1); 1099 64 * 1024);
844 btrfs_put_ordered_extent(ordered); 1100 if (!ret) {
845 goto again; 1101 range->start = newer_off;
1102 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
1103 newer_left = newer_cluster;
1104 } else {
1105 break;
1106 }
1107 } else {
1108 i++;
846 } 1109 }
847 set_page_extent_mapped(page);
848
849 /*
850 * this makes sure page_mkwrite is called on the
851 * page if it is dirtied again later
852 */
853 clear_page_dirty_for_io(page);
854 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start,
855 page_end, EXTENT_DIRTY | EXTENT_DELALLOC |
856 EXTENT_DO_ACCOUNTING, GFP_NOFS);
857
858 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
859 ClearPageChecked(page);
860 set_page_dirty(page);
861 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
862
863loop_unlock:
864 unlock_page(page);
865 page_cache_release(page);
866 mutex_unlock(&inode->i_mutex);
867
868 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
869 i++;
870 } 1110 }
871 1111
872 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) 1112 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO))
@@ -898,12 +1138,14 @@ loop_unlock:
898 btrfs_set_super_incompat_flags(disk_super, features); 1138 btrfs_set_super_incompat_flags(disk_super, features);
899 } 1139 }
900 1140
901 return 0; 1141 if (!file)
1142 kfree(ra);
1143 return defrag_count;
902 1144
903err_reservations: 1145out_ra:
904 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 1146 if (!file)
905err_unlock: 1147 kfree(ra);
906 mutex_unlock(&inode->i_mutex); 1148 kfree(pages);
907 return ret; 1149 return ret;
908} 1150}
909 1151
@@ -1129,7 +1371,7 @@ static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
1129 int ret = 0; 1371 int ret = 0;
1130 u64 flags = 0; 1372 u64 flags = 0;
1131 1373
1132 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1374 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
1133 return -EINVAL; 1375 return -EINVAL;
1134 1376
1135 down_read(&root->fs_info->subvol_sem); 1377 down_read(&root->fs_info->subvol_sem);
@@ -1156,7 +1398,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1156 if (root->fs_info->sb->s_flags & MS_RDONLY) 1398 if (root->fs_info->sb->s_flags & MS_RDONLY)
1157 return -EROFS; 1399 return -EROFS;
1158 1400
1159 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1401 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
1160 return -EINVAL; 1402 return -EINVAL;
1161 1403
1162 if (copy_from_user(&flags, arg, sizeof(flags))) 1404 if (copy_from_user(&flags, arg, sizeof(flags)))
@@ -1279,7 +1521,6 @@ static noinline int copy_to_sk(struct btrfs_root *root,
1279 int nritems; 1521 int nritems;
1280 int i; 1522 int i;
1281 int slot; 1523 int slot;
1282 int found = 0;
1283 int ret = 0; 1524 int ret = 0;
1284 1525
1285 leaf = path->nodes[0]; 1526 leaf = path->nodes[0];
@@ -1326,7 +1567,7 @@ static noinline int copy_to_sk(struct btrfs_root *root,
1326 item_off, item_len); 1567 item_off, item_len);
1327 *sk_offset += item_len; 1568 *sk_offset += item_len;
1328 } 1569 }
1329 found++; 1570 (*num_found)++;
1330 1571
1331 if (*num_found >= sk->nr_items) 1572 if (*num_found >= sk->nr_items)
1332 break; 1573 break;
@@ -1345,7 +1586,6 @@ advance_key:
1345 } else 1586 } else
1346 ret = 1; 1587 ret = 1;
1347overflow: 1588overflow:
1348 *num_found += found;
1349 return ret; 1589 return ret;
1350} 1590}
1351 1591
@@ -1402,7 +1642,7 @@ static noinline int search_ioctl(struct inode *inode,
1402 } 1642 }
1403 ret = copy_to_sk(root, path, &key, sk, args->buf, 1643 ret = copy_to_sk(root, path, &key, sk, args->buf,
1404 &sk_offset, &num_found); 1644 &sk_offset, &num_found);
1405 btrfs_release_path(root, path); 1645 btrfs_release_path(path);
1406 if (ret || num_found >= sk->nr_items) 1646 if (ret || num_found >= sk->nr_items)
1407 break; 1647 break;
1408 1648
@@ -1509,7 +1749,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1509 if (key.offset == BTRFS_FIRST_FREE_OBJECTID) 1749 if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
1510 break; 1750 break;
1511 1751
1512 btrfs_release_path(root, path); 1752 btrfs_release_path(path);
1513 key.objectid = key.offset; 1753 key.objectid = key.offset;
1514 key.offset = (u64)-1; 1754 key.offset = (u64)-1;
1515 dirid = key.objectid; 1755 dirid = key.objectid;
@@ -1639,7 +1879,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1639 goto out_dput; 1879 goto out_dput;
1640 } 1880 }
1641 1881
1642 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 1882 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
1643 err = -EINVAL; 1883 err = -EINVAL;
1644 goto out_dput; 1884 goto out_dput;
1645 } 1885 }
@@ -1757,7 +1997,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
1757 /* the rest are all set to zero by kzalloc */ 1997 /* the rest are all set to zero by kzalloc */
1758 range->len = (u64)-1; 1998 range->len = (u64)-1;
1759 } 1999 }
1760 ret = btrfs_defrag_file(file, range); 2000 ret = btrfs_defrag_file(fdentry(file)->d_inode, file,
2001 range, 0, 0);
2002 if (ret > 0)
2003 ret = 0;
1761 kfree(range); 2004 kfree(range);
1762 break; 2005 break;
1763 default: 2006 default:
@@ -1809,6 +2052,75 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
1809 return ret; 2052 return ret;
1810} 2053}
1811 2054
2055static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
2056{
2057 struct btrfs_ioctl_fs_info_args fi_args;
2058 struct btrfs_device *device;
2059 struct btrfs_device *next;
2060 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2061
2062 if (!capable(CAP_SYS_ADMIN))
2063 return -EPERM;
2064
2065 fi_args.num_devices = fs_devices->num_devices;
2066 fi_args.max_id = 0;
2067 memcpy(&fi_args.fsid, root->fs_info->fsid, sizeof(fi_args.fsid));
2068
2069 mutex_lock(&fs_devices->device_list_mutex);
2070 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
2071 if (device->devid > fi_args.max_id)
2072 fi_args.max_id = device->devid;
2073 }
2074 mutex_unlock(&fs_devices->device_list_mutex);
2075
2076 if (copy_to_user(arg, &fi_args, sizeof(fi_args)))
2077 return -EFAULT;
2078
2079 return 0;
2080}
2081
2082static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2083{
2084 struct btrfs_ioctl_dev_info_args *di_args;
2085 struct btrfs_device *dev;
2086 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2087 int ret = 0;
2088 char *s_uuid = NULL;
2089 char empty_uuid[BTRFS_UUID_SIZE] = {0};
2090
2091 if (!capable(CAP_SYS_ADMIN))
2092 return -EPERM;
2093
2094 di_args = memdup_user(arg, sizeof(*di_args));
2095 if (IS_ERR(di_args))
2096 return PTR_ERR(di_args);
2097
2098 if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0)
2099 s_uuid = di_args->uuid;
2100
2101 mutex_lock(&fs_devices->device_list_mutex);
2102 dev = btrfs_find_device(root, di_args->devid, s_uuid, NULL);
2103 mutex_unlock(&fs_devices->device_list_mutex);
2104
2105 if (!dev) {
2106 ret = -ENODEV;
2107 goto out;
2108 }
2109
2110 di_args->devid = dev->devid;
2111 di_args->bytes_used = dev->bytes_used;
2112 di_args->total_bytes = dev->total_bytes;
2113 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2114 strncpy(di_args->path, dev->name, sizeof(di_args->path));
2115
2116out:
2117 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
2118 ret = -EFAULT;
2119
2120 kfree(di_args);
2121 return ret;
2122}
2123
1812static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 2124static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1813 u64 off, u64 olen, u64 destoff) 2125 u64 off, u64 olen, u64 destoff)
1814{ 2126{
@@ -1925,7 +2237,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1925 } 2237 }
1926 2238
1927 /* clone data */ 2239 /* clone data */
1928 key.objectid = src->i_ino; 2240 key.objectid = btrfs_ino(src);
1929 key.type = BTRFS_EXTENT_DATA_KEY; 2241 key.type = BTRFS_EXTENT_DATA_KEY;
1930 key.offset = 0; 2242 key.offset = 0;
1931 2243
@@ -1952,7 +2264,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1952 2264
1953 btrfs_item_key_to_cpu(leaf, &key, slot); 2265 btrfs_item_key_to_cpu(leaf, &key, slot);
1954 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 2266 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
1955 key.objectid != src->i_ino) 2267 key.objectid != btrfs_ino(src))
1956 break; 2268 break;
1957 2269
1958 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { 2270 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
@@ -1988,14 +2300,14 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1988 datal = btrfs_file_extent_ram_bytes(leaf, 2300 datal = btrfs_file_extent_ram_bytes(leaf,
1989 extent); 2301 extent);
1990 } 2302 }
1991 btrfs_release_path(root, path); 2303 btrfs_release_path(path);
1992 2304
1993 if (key.offset + datal <= off || 2305 if (key.offset + datal <= off ||
1994 key.offset >= off+len) 2306 key.offset >= off+len)
1995 goto next; 2307 goto next;
1996 2308
1997 memcpy(&new_key, &key, sizeof(new_key)); 2309 memcpy(&new_key, &key, sizeof(new_key));
1998 new_key.objectid = inode->i_ino; 2310 new_key.objectid = btrfs_ino(inode);
1999 if (off <= key.offset) 2311 if (off <= key.offset)
2000 new_key.offset = key.offset + destoff - off; 2312 new_key.offset = key.offset + destoff - off;
2001 else 2313 else
@@ -2049,7 +2361,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2049 ret = btrfs_inc_extent_ref(trans, root, 2361 ret = btrfs_inc_extent_ref(trans, root,
2050 disko, diskl, 0, 2362 disko, diskl, 0,
2051 root->root_key.objectid, 2363 root->root_key.objectid,
2052 inode->i_ino, 2364 btrfs_ino(inode),
2053 new_key.offset - datao); 2365 new_key.offset - datao);
2054 BUG_ON(ret); 2366 BUG_ON(ret);
2055 } 2367 }
@@ -2098,7 +2410,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2098 } 2410 }
2099 2411
2100 btrfs_mark_buffer_dirty(leaf); 2412 btrfs_mark_buffer_dirty(leaf);
2101 btrfs_release_path(root, path); 2413 btrfs_release_path(path);
2102 2414
2103 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2415 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2104 2416
@@ -2119,12 +2431,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2119 btrfs_end_transaction(trans, root); 2431 btrfs_end_transaction(trans, root);
2120 } 2432 }
2121next: 2433next:
2122 btrfs_release_path(root, path); 2434 btrfs_release_path(path);
2123 key.offset++; 2435 key.offset++;
2124 } 2436 }
2125 ret = 0; 2437 ret = 0;
2126out: 2438out:
2127 btrfs_release_path(root, path); 2439 btrfs_release_path(path);
2128 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 2440 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
2129out_unlock: 2441out_unlock:
2130 mutex_unlock(&src->i_mutex); 2442 mutex_unlock(&src->i_mutex);
@@ -2471,6 +2783,58 @@ static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
2471 return btrfs_wait_for_commit(root, transid); 2783 return btrfs_wait_for_commit(root, transid);
2472} 2784}
2473 2785
2786static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg)
2787{
2788 int ret;
2789 struct btrfs_ioctl_scrub_args *sa;
2790
2791 if (!capable(CAP_SYS_ADMIN))
2792 return -EPERM;
2793
2794 sa = memdup_user(arg, sizeof(*sa));
2795 if (IS_ERR(sa))
2796 return PTR_ERR(sa);
2797
2798 ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end,
2799 &sa->progress, sa->flags & BTRFS_SCRUB_READONLY);
2800
2801 if (copy_to_user(arg, sa, sizeof(*sa)))
2802 ret = -EFAULT;
2803
2804 kfree(sa);
2805 return ret;
2806}
2807
2808static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg)
2809{
2810 if (!capable(CAP_SYS_ADMIN))
2811 return -EPERM;
2812
2813 return btrfs_scrub_cancel(root);
2814}
2815
2816static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
2817 void __user *arg)
2818{
2819 struct btrfs_ioctl_scrub_args *sa;
2820 int ret;
2821
2822 if (!capable(CAP_SYS_ADMIN))
2823 return -EPERM;
2824
2825 sa = memdup_user(arg, sizeof(*sa));
2826 if (IS_ERR(sa))
2827 return PTR_ERR(sa);
2828
2829 ret = btrfs_scrub_progress(root, sa->devid, &sa->progress);
2830
2831 if (copy_to_user(arg, sa, sizeof(*sa)))
2832 ret = -EFAULT;
2833
2834 kfree(sa);
2835 return ret;
2836}
2837
2474long btrfs_ioctl(struct file *file, unsigned int 2838long btrfs_ioctl(struct file *file, unsigned int
2475 cmd, unsigned long arg) 2839 cmd, unsigned long arg)
2476{ 2840{
@@ -2510,6 +2874,10 @@ long btrfs_ioctl(struct file *file, unsigned int
2510 return btrfs_ioctl_add_dev(root, argp); 2874 return btrfs_ioctl_add_dev(root, argp);
2511 case BTRFS_IOC_RM_DEV: 2875 case BTRFS_IOC_RM_DEV:
2512 return btrfs_ioctl_rm_dev(root, argp); 2876 return btrfs_ioctl_rm_dev(root, argp);
2877 case BTRFS_IOC_FS_INFO:
2878 return btrfs_ioctl_fs_info(root, argp);
2879 case BTRFS_IOC_DEV_INFO:
2880 return btrfs_ioctl_dev_info(root, argp);
2513 case BTRFS_IOC_BALANCE: 2881 case BTRFS_IOC_BALANCE:
2514 return btrfs_balance(root->fs_info->dev_root); 2882 return btrfs_balance(root->fs_info->dev_root);
2515 case BTRFS_IOC_CLONE: 2883 case BTRFS_IOC_CLONE:
@@ -2533,6 +2901,12 @@ long btrfs_ioctl(struct file *file, unsigned int
2533 return btrfs_ioctl_start_sync(file, argp); 2901 return btrfs_ioctl_start_sync(file, argp);
2534 case BTRFS_IOC_WAIT_SYNC: 2902 case BTRFS_IOC_WAIT_SYNC:
2535 return btrfs_ioctl_wait_sync(file, argp); 2903 return btrfs_ioctl_wait_sync(file, argp);
2904 case BTRFS_IOC_SCRUB:
2905 return btrfs_ioctl_scrub(root, argp);
2906 case BTRFS_IOC_SCRUB_CANCEL:
2907 return btrfs_ioctl_scrub_cancel(root, argp);
2908 case BTRFS_IOC_SCRUB_PROGRESS:
2909 return btrfs_ioctl_scrub_progress(root, argp);
2536 } 2910 }
2537 2911
2538 return -ENOTTY; 2912 return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 8fb382167b13..ad1ea789fcb4 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -32,6 +32,8 @@ struct btrfs_ioctl_vol_args {
32 32
33#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) 33#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
34#define BTRFS_SUBVOL_RDONLY (1ULL << 1) 34#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
35#define BTRFS_FSID_SIZE 16
36#define BTRFS_UUID_SIZE 16
35 37
36#define BTRFS_SUBVOL_NAME_MAX 4039 38#define BTRFS_SUBVOL_NAME_MAX 4039
37struct btrfs_ioctl_vol_args_v2 { 39struct btrfs_ioctl_vol_args_v2 {
@@ -42,6 +44,71 @@ struct btrfs_ioctl_vol_args_v2 {
42 char name[BTRFS_SUBVOL_NAME_MAX + 1]; 44 char name[BTRFS_SUBVOL_NAME_MAX + 1];
43}; 45};
44 46
47/*
48 * structure to report errors and progress to userspace, either as a
49 * result of a finished scrub, a canceled scrub or a progress inquiry
50 */
51struct btrfs_scrub_progress {
52 __u64 data_extents_scrubbed; /* # of data extents scrubbed */
53 __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */
54 __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */
55 __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */
56 __u64 read_errors; /* # of read errors encountered (EIO) */
57 __u64 csum_errors; /* # of failed csum checks */
58 __u64 verify_errors; /* # of occurences, where the metadata
59 * of a tree block did not match the
60 * expected values, like generation or
61 * logical */
62 __u64 no_csum; /* # of 4k data block for which no csum
63 * is present, probably the result of
64 * data written with nodatasum */
65 __u64 csum_discards; /* # of csum for which no data was found
66 * in the extent tree. */
67 __u64 super_errors; /* # of bad super blocks encountered */
68 __u64 malloc_errors; /* # of internal kmalloc errors. These
69 * will likely cause an incomplete
70 * scrub */
71 __u64 uncorrectable_errors; /* # of errors where either no intact
72 * copy was found or the writeback
73 * failed */
74 __u64 corrected_errors; /* # of errors corrected */
75 __u64 last_physical; /* last physical address scrubbed. In
76 * case a scrub was aborted, this can
77 * be used to restart the scrub */
78 __u64 unverified_errors; /* # of occurences where a read for a
79 * full (64k) bio failed, but the re-
80 * check succeeded for each 4k piece.
81 * Intermittent error. */
82};
83
84#define BTRFS_SCRUB_READONLY 1
85struct btrfs_ioctl_scrub_args {
86 __u64 devid; /* in */
87 __u64 start; /* in */
88 __u64 end; /* in */
89 __u64 flags; /* in */
90 struct btrfs_scrub_progress progress; /* out */
91 /* pad to 1k */
92 __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8];
93};
94
95#define BTRFS_DEVICE_PATH_NAME_MAX 1024
96struct btrfs_ioctl_dev_info_args {
97 __u64 devid; /* in/out */
98 __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */
99 __u64 bytes_used; /* out */
100 __u64 total_bytes; /* out */
101 __u64 unused[379]; /* pad to 4k */
102 __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */
103};
104
105struct btrfs_ioctl_fs_info_args {
106 __u64 max_id; /* out */
107 __u64 num_devices; /* out */
108 __u8 fsid[BTRFS_FSID_SIZE]; /* out */
109 __u64 reserved[124]; /* pad to 1k */
110};
111
45#define BTRFS_INO_LOOKUP_PATH_MAX 4080 112#define BTRFS_INO_LOOKUP_PATH_MAX 4080
46struct btrfs_ioctl_ino_lookup_args { 113struct btrfs_ioctl_ino_lookup_args {
47 __u64 treeid; 114 __u64 treeid;
@@ -114,37 +181,6 @@ struct btrfs_ioctl_clone_range_args {
114#define BTRFS_DEFRAG_RANGE_COMPRESS 1 181#define BTRFS_DEFRAG_RANGE_COMPRESS 1
115#define BTRFS_DEFRAG_RANGE_START_IO 2 182#define BTRFS_DEFRAG_RANGE_START_IO 2
116 183
117struct btrfs_ioctl_defrag_range_args {
118 /* start of the defrag operation */
119 __u64 start;
120
121 /* number of bytes to defrag, use (u64)-1 to say all */
122 __u64 len;
123
124 /*
125 * flags for the operation, which can include turning
126 * on compression for this one defrag
127 */
128 __u64 flags;
129
130 /*
131 * any extent bigger than this will be considered
132 * already defragged. Use 0 to take the kernel default
133 * Use 1 to say every single extent must be rewritten
134 */
135 __u32 extent_thresh;
136
137 /*
138 * which compression method to use if turning on compression
139 * for this defrag operation. If unspecified, zlib will
140 * be used
141 */
142 __u32 compress_type;
143
144 /* spare for later */
145 __u32 unused[4];
146};
147
148struct btrfs_ioctl_space_info { 184struct btrfs_ioctl_space_info {
149 __u64 flags; 185 __u64 flags;
150 __u64 total_bytes; 186 __u64 total_bytes;
@@ -203,4 +239,13 @@ struct btrfs_ioctl_space_args {
203 struct btrfs_ioctl_vol_args_v2) 239 struct btrfs_ioctl_vol_args_v2)
204#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) 240#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
205#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) 241#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
242#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
243 struct btrfs_ioctl_scrub_args)
244#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28)
245#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \
246 struct btrfs_ioctl_scrub_args)
247#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \
248 struct btrfs_ioctl_dev_info_args)
249#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
250 struct btrfs_ioctl_fs_info_args)
206#endif 251#endif
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 6151f2ea38bb..66fa43dc3f0f 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -185,31 +185,6 @@ sleep:
185 return 0; 185 return 0;
186} 186}
187 187
188/*
189 * Very quick trylock, this does not spin or schedule. It returns
190 * 1 with the spinlock held if it was able to take the lock, or it
191 * returns zero if it was unable to take the lock.
192 *
193 * After this call, scheduling is not safe without first calling
194 * btrfs_set_lock_blocking()
195 */
196int btrfs_try_tree_lock(struct extent_buffer *eb)
197{
198 if (spin_trylock(&eb->lock)) {
199 if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
200 /*
201 * we've got the spinlock, but the real owner is
202 * blocking. Drop the spinlock and return failure
203 */
204 spin_unlock(&eb->lock);
205 return 0;
206 }
207 return 1;
208 }
209 /* someone else has the spinlock giveup */
210 return 0;
211}
212
213int btrfs_tree_unlock(struct extent_buffer *eb) 188int btrfs_tree_unlock(struct extent_buffer *eb)
214{ 189{
215 /* 190 /*
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 6c4ce457168c..5c33a560a2f1 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -21,8 +21,6 @@
21 21
22int btrfs_tree_lock(struct extent_buffer *eb); 22int btrfs_tree_lock(struct extent_buffer *eb);
23int btrfs_tree_unlock(struct extent_buffer *eb); 23int btrfs_tree_unlock(struct extent_buffer *eb);
24
25int btrfs_try_tree_lock(struct extent_buffer *eb);
26int btrfs_try_spin_lock(struct extent_buffer *eb); 24int btrfs_try_spin_lock(struct extent_buffer *eb);
27 25
28void btrfs_set_lock_blocking(struct extent_buffer *eb); 26void btrfs_set_lock_blocking(struct extent_buffer *eb);
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c
index a97314cf6bd6..82d569cb6267 100644
--- a/fs/btrfs/ref-cache.c
+++ b/fs/btrfs/ref-cache.c
@@ -23,56 +23,6 @@
23#include "ref-cache.h" 23#include "ref-cache.h"
24#include "transaction.h" 24#include "transaction.h"
25 25
26/*
27 * leaf refs are used to cache the information about which extents
28 * a given leaf has references on. This allows us to process that leaf
29 * in btrfs_drop_snapshot without needing to read it back from disk.
30 */
31
32/*
33 * kmalloc a leaf reference struct and update the counters for the
34 * total ref cache size
35 */
36struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
37 int nr_extents)
38{
39 struct btrfs_leaf_ref *ref;
40 size_t size = btrfs_leaf_ref_size(nr_extents);
41
42 ref = kmalloc(size, GFP_NOFS);
43 if (ref) {
44 spin_lock(&root->fs_info->ref_cache_lock);
45 root->fs_info->total_ref_cache_size += size;
46 spin_unlock(&root->fs_info->ref_cache_lock);
47
48 memset(ref, 0, sizeof(*ref));
49 atomic_set(&ref->usage, 1);
50 INIT_LIST_HEAD(&ref->list);
51 }
52 return ref;
53}
54
55/*
56 * free a leaf reference struct and update the counters for the
57 * total ref cache size
58 */
59void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
60{
61 if (!ref)
62 return;
63 WARN_ON(atomic_read(&ref->usage) == 0);
64 if (atomic_dec_and_test(&ref->usage)) {
65 size_t size = btrfs_leaf_ref_size(ref->nritems);
66
67 BUG_ON(ref->in_tree);
68 kfree(ref);
69
70 spin_lock(&root->fs_info->ref_cache_lock);
71 root->fs_info->total_ref_cache_size -= size;
72 spin_unlock(&root->fs_info->ref_cache_lock);
73 }
74}
75
76static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, 26static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
77 struct rb_node *node) 27 struct rb_node *node)
78{ 28{
@@ -116,117 +66,3 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
116 } 66 }
117 return NULL; 67 return NULL;
118} 68}
119
120int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
121 int shared)
122{
123 struct btrfs_leaf_ref *ref = NULL;
124 struct btrfs_leaf_ref_tree *tree = root->ref_tree;
125
126 if (shared)
127 tree = &root->fs_info->shared_ref_tree;
128 if (!tree)
129 return 0;
130
131 spin_lock(&tree->lock);
132 while (!list_empty(&tree->list)) {
133 ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list);
134 BUG_ON(ref->tree != tree);
135 if (ref->root_gen > max_root_gen)
136 break;
137 if (!xchg(&ref->in_tree, 0)) {
138 cond_resched_lock(&tree->lock);
139 continue;
140 }
141
142 rb_erase(&ref->rb_node, &tree->root);
143 list_del_init(&ref->list);
144
145 spin_unlock(&tree->lock);
146 btrfs_free_leaf_ref(root, ref);
147 cond_resched();
148 spin_lock(&tree->lock);
149 }
150 spin_unlock(&tree->lock);
151 return 0;
152}
153
154/*
155 * find the leaf ref for a given extent. This returns the ref struct with
156 * a usage reference incremented
157 */
158struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
159 u64 bytenr)
160{
161 struct rb_node *rb;
162 struct btrfs_leaf_ref *ref = NULL;
163 struct btrfs_leaf_ref_tree *tree = root->ref_tree;
164again:
165 if (tree) {
166 spin_lock(&tree->lock);
167 rb = tree_search(&tree->root, bytenr);
168 if (rb)
169 ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node);
170 if (ref)
171 atomic_inc(&ref->usage);
172 spin_unlock(&tree->lock);
173 if (ref)
174 return ref;
175 }
176 if (tree != &root->fs_info->shared_ref_tree) {
177 tree = &root->fs_info->shared_ref_tree;
178 goto again;
179 }
180 return NULL;
181}
182
183/*
184 * add a fully filled in leaf ref struct
185 * remove all the refs older than a given root generation
186 */
187int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
188 int shared)
189{
190 int ret = 0;
191 struct rb_node *rb;
192 struct btrfs_leaf_ref_tree *tree = root->ref_tree;
193
194 if (shared)
195 tree = &root->fs_info->shared_ref_tree;
196
197 spin_lock(&tree->lock);
198 rb = tree_insert(&tree->root, ref->bytenr, &ref->rb_node);
199 if (rb) {
200 ret = -EEXIST;
201 } else {
202 atomic_inc(&ref->usage);
203 ref->tree = tree;
204 ref->in_tree = 1;
205 list_add_tail(&ref->list, &tree->list);
206 }
207 spin_unlock(&tree->lock);
208 return ret;
209}
210
211/*
212 * remove a single leaf ref from the tree. This drops the ref held by the tree
213 * only
214 */
215int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
216{
217 struct btrfs_leaf_ref_tree *tree;
218
219 if (!xchg(&ref->in_tree, 0))
220 return 0;
221
222 tree = ref->tree;
223 spin_lock(&tree->lock);
224
225 rb_erase(&ref->rb_node, &tree->root);
226 list_del_init(&ref->list);
227
228 spin_unlock(&tree->lock);
229
230 btrfs_free_leaf_ref(root, ref);
231 return 0;
232}
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h
index e2a55cb2072b..24f7001f6387 100644
--- a/fs/btrfs/ref-cache.h
+++ b/fs/btrfs/ref-cache.h
@@ -49,28 +49,4 @@ static inline size_t btrfs_leaf_ref_size(int nr_extents)
49 return sizeof(struct btrfs_leaf_ref) + 49 return sizeof(struct btrfs_leaf_ref) +
50 sizeof(struct btrfs_extent_info) * nr_extents; 50 sizeof(struct btrfs_extent_info) * nr_extents;
51} 51}
52
53static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree)
54{
55 tree->root = RB_ROOT;
56 INIT_LIST_HEAD(&tree->list);
57 spin_lock_init(&tree->lock);
58}
59
60static inline int btrfs_leaf_ref_tree_empty(struct btrfs_leaf_ref_tree *tree)
61{
62 return RB_EMPTY_ROOT(&tree->root);
63}
64
65void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree);
66struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
67 int nr_extents);
68void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
69struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
70 u64 bytenr);
71int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
72 int shared);
73int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
74 int shared);
75int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
76#endif 52#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index f340f7c99d09..ca38eca70af0 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -30,6 +30,7 @@
30#include "btrfs_inode.h" 30#include "btrfs_inode.h"
31#include "async-thread.h" 31#include "async-thread.h"
32#include "free-space-cache.h" 32#include "free-space-cache.h"
33#include "inode-map.h"
33 34
34/* 35/*
35 * backref_node, mapping_node and tree_block start with this 36 * backref_node, mapping_node and tree_block start with this
@@ -507,6 +508,7 @@ static int update_backref_cache(struct btrfs_trans_handle *trans,
507 return 1; 508 return 1;
508} 509}
509 510
511
510static int should_ignore_root(struct btrfs_root *root) 512static int should_ignore_root(struct btrfs_root *root)
511{ 513{
512 struct btrfs_root *reloc_root; 514 struct btrfs_root *reloc_root;
@@ -529,7 +531,6 @@ static int should_ignore_root(struct btrfs_root *root)
529 */ 531 */
530 return 1; 532 return 1;
531} 533}
532
533/* 534/*
534 * find reloc tree by address of tree root 535 * find reloc tree by address of tree root
535 */ 536 */
@@ -961,7 +962,7 @@ again:
961 lower = upper; 962 lower = upper;
962 upper = NULL; 963 upper = NULL;
963 } 964 }
964 btrfs_release_path(root, path2); 965 btrfs_release_path(path2);
965next: 966next:
966 if (ptr < end) { 967 if (ptr < end) {
967 ptr += btrfs_extent_inline_ref_size(key.type); 968 ptr += btrfs_extent_inline_ref_size(key.type);
@@ -974,7 +975,7 @@ next:
974 if (ptr >= end) 975 if (ptr >= end)
975 path1->slots[0]++; 976 path1->slots[0]++;
976 } 977 }
977 btrfs_release_path(rc->extent_root, path1); 978 btrfs_release_path(path1);
978 979
979 cur->checked = 1; 980 cur->checked = 1;
980 WARN_ON(exist); 981 WARN_ON(exist);
@@ -1409,9 +1410,9 @@ again:
1409 prev = node; 1410 prev = node;
1410 entry = rb_entry(node, struct btrfs_inode, rb_node); 1411 entry = rb_entry(node, struct btrfs_inode, rb_node);
1411 1412
1412 if (objectid < entry->vfs_inode.i_ino) 1413 if (objectid < btrfs_ino(&entry->vfs_inode))
1413 node = node->rb_left; 1414 node = node->rb_left;
1414 else if (objectid > entry->vfs_inode.i_ino) 1415 else if (objectid > btrfs_ino(&entry->vfs_inode))
1415 node = node->rb_right; 1416 node = node->rb_right;
1416 else 1417 else
1417 break; 1418 break;
@@ -1419,7 +1420,7 @@ again:
1419 if (!node) { 1420 if (!node) {
1420 while (prev) { 1421 while (prev) {
1421 entry = rb_entry(prev, struct btrfs_inode, rb_node); 1422 entry = rb_entry(prev, struct btrfs_inode, rb_node);
1422 if (objectid <= entry->vfs_inode.i_ino) { 1423 if (objectid <= btrfs_ino(&entry->vfs_inode)) {
1423 node = prev; 1424 node = prev;
1424 break; 1425 break;
1425 } 1426 }
@@ -1434,7 +1435,7 @@ again:
1434 return inode; 1435 return inode;
1435 } 1436 }
1436 1437
1437 objectid = entry->vfs_inode.i_ino + 1; 1438 objectid = btrfs_ino(&entry->vfs_inode) + 1;
1438 if (cond_resched_lock(&root->inode_lock)) 1439 if (cond_resched_lock(&root->inode_lock))
1439 goto again; 1440 goto again;
1440 1441
@@ -1470,7 +1471,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1470 return -ENOMEM; 1471 return -ENOMEM;
1471 1472
1472 bytenr -= BTRFS_I(reloc_inode)->index_cnt; 1473 bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1473 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 1474 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode),
1474 bytenr, 0); 1475 bytenr, 0);
1475 if (ret < 0) 1476 if (ret < 0)
1476 goto out; 1477 goto out;
@@ -1558,11 +1559,11 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1558 if (first) { 1559 if (first) {
1559 inode = find_next_inode(root, key.objectid); 1560 inode = find_next_inode(root, key.objectid);
1560 first = 0; 1561 first = 0;
1561 } else if (inode && inode->i_ino < key.objectid) { 1562 } else if (inode && btrfs_ino(inode) < key.objectid) {
1562 btrfs_add_delayed_iput(inode); 1563 btrfs_add_delayed_iput(inode);
1563 inode = find_next_inode(root, key.objectid); 1564 inode = find_next_inode(root, key.objectid);
1564 } 1565 }
1565 if (inode && inode->i_ino == key.objectid) { 1566 if (inode && btrfs_ino(inode) == key.objectid) {
1566 end = key.offset + 1567 end = key.offset +
1567 btrfs_file_extent_num_bytes(leaf, fi); 1568 btrfs_file_extent_num_bytes(leaf, fi);
1568 WARN_ON(!IS_ALIGNED(key.offset, 1569 WARN_ON(!IS_ALIGNED(key.offset,
@@ -1749,7 +1750,7 @@ again:
1749 1750
1750 btrfs_node_key_to_cpu(path->nodes[level], &key, 1751 btrfs_node_key_to_cpu(path->nodes[level], &key,
1751 path->slots[level]); 1752 path->slots[level]);
1752 btrfs_release_path(src, path); 1753 btrfs_release_path(path);
1753 1754
1754 path->lowest_level = level; 1755 path->lowest_level = level;
1755 ret = btrfs_search_slot(trans, src, &key, path, 0, 1); 1756 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
@@ -1893,6 +1894,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1893 struct inode *inode = NULL; 1894 struct inode *inode = NULL;
1894 u64 objectid; 1895 u64 objectid;
1895 u64 start, end; 1896 u64 start, end;
1897 u64 ino;
1896 1898
1897 objectid = min_key->objectid; 1899 objectid = min_key->objectid;
1898 while (1) { 1900 while (1) {
@@ -1905,17 +1907,18 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1905 inode = find_next_inode(root, objectid); 1907 inode = find_next_inode(root, objectid);
1906 if (!inode) 1908 if (!inode)
1907 break; 1909 break;
1910 ino = btrfs_ino(inode);
1908 1911
1909 if (inode->i_ino > max_key->objectid) { 1912 if (ino > max_key->objectid) {
1910 iput(inode); 1913 iput(inode);
1911 break; 1914 break;
1912 } 1915 }
1913 1916
1914 objectid = inode->i_ino + 1; 1917 objectid = ino + 1;
1915 if (!S_ISREG(inode->i_mode)) 1918 if (!S_ISREG(inode->i_mode))
1916 continue; 1919 continue;
1917 1920
1918 if (unlikely(min_key->objectid == inode->i_ino)) { 1921 if (unlikely(min_key->objectid == ino)) {
1919 if (min_key->type > BTRFS_EXTENT_DATA_KEY) 1922 if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1920 continue; 1923 continue;
1921 if (min_key->type < BTRFS_EXTENT_DATA_KEY) 1924 if (min_key->type < BTRFS_EXTENT_DATA_KEY)
@@ -1928,7 +1931,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1928 start = 0; 1931 start = 0;
1929 } 1932 }
1930 1933
1931 if (unlikely(max_key->objectid == inode->i_ino)) { 1934 if (unlikely(max_key->objectid == ino)) {
1932 if (max_key->type < BTRFS_EXTENT_DATA_KEY) 1935 if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1933 continue; 1936 continue;
1934 if (max_key->type > BTRFS_EXTENT_DATA_KEY) { 1937 if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
@@ -2496,7 +2499,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2496 path->locks[upper->level] = 0; 2499 path->locks[upper->level] = 0;
2497 2500
2498 slot = path->slots[upper->level]; 2501 slot = path->slots[upper->level];
2499 btrfs_release_path(NULL, path); 2502 btrfs_release_path(path);
2500 } else { 2503 } else {
2501 ret = btrfs_bin_search(upper->eb, key, upper->level, 2504 ret = btrfs_bin_search(upper->eb, key, upper->level,
2502 &slot); 2505 &slot);
@@ -2737,7 +2740,7 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
2737 } else { 2740 } else {
2738 path->lowest_level = node->level; 2741 path->lowest_level = node->level;
2739 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2742 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2740 btrfs_release_path(root, path); 2743 btrfs_release_path(path);
2741 if (ret > 0) 2744 if (ret > 0)
2742 ret = 0; 2745 ret = 0;
2743 } 2746 }
@@ -2870,7 +2873,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2870 struct extent_map *em; 2873 struct extent_map *em;
2871 int ret = 0; 2874 int ret = 0;
2872 2875
2873 em = alloc_extent_map(GFP_NOFS); 2876 em = alloc_extent_map();
2874 if (!em) 2877 if (!em)
2875 return -ENOMEM; 2878 return -ENOMEM;
2876 2879
@@ -3119,7 +3122,7 @@ static int add_tree_block(struct reloc_control *rc,
3119#endif 3122#endif
3120 } 3123 }
3121 3124
3122 btrfs_release_path(rc->extent_root, path); 3125 btrfs_release_path(path);
3123 3126
3124 BUG_ON(level == -1); 3127 BUG_ON(level == -1);
3125 3128
@@ -3220,7 +3223,7 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3220 key.offset = 0; 3223 key.offset = 0;
3221 3224
3222 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 3225 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3223 if (!inode || IS_ERR(inode) || is_bad_inode(inode)) { 3226 if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
3224 if (inode && !IS_ERR(inode)) 3227 if (inode && !IS_ERR(inode))
3225 iput(inode); 3228 iput(inode);
3226 return -ENOENT; 3229 return -ENOENT;
@@ -3505,7 +3508,7 @@ int add_data_references(struct reloc_control *rc,
3505 } 3508 }
3506 path->slots[0]++; 3509 path->slots[0]++;
3507 } 3510 }
3508 btrfs_release_path(rc->extent_root, path); 3511 btrfs_release_path(path);
3509 if (err) 3512 if (err)
3510 free_block_list(blocks); 3513 free_block_list(blocks);
3511 return err; 3514 return err;
@@ -3568,7 +3571,7 @@ next:
3568 EXTENT_DIRTY); 3571 EXTENT_DIRTY);
3569 3572
3570 if (ret == 0 && start <= key.objectid) { 3573 if (ret == 0 && start <= key.objectid) {
3571 btrfs_release_path(rc->extent_root, path); 3574 btrfs_release_path(path);
3572 rc->search_start = end + 1; 3575 rc->search_start = end + 1;
3573 } else { 3576 } else {
3574 rc->search_start = key.objectid + key.offset; 3577 rc->search_start = key.objectid + key.offset;
@@ -3576,7 +3579,7 @@ next:
3576 return 0; 3579 return 0;
3577 } 3580 }
3578 } 3581 }
3579 btrfs_release_path(rc->extent_root, path); 3582 btrfs_release_path(path);
3580 return ret; 3583 return ret;
3581} 3584}
3582 3585
@@ -3713,7 +3716,7 @@ restart:
3713 flags = BTRFS_EXTENT_FLAG_DATA; 3716 flags = BTRFS_EXTENT_FLAG_DATA;
3714 3717
3715 if (path_change) { 3718 if (path_change) {
3716 btrfs_release_path(rc->extent_root, path); 3719 btrfs_release_path(path);
3717 3720
3718 path->search_commit_root = 1; 3721 path->search_commit_root = 1;
3719 path->skip_locking = 1; 3722 path->skip_locking = 1;
@@ -3736,7 +3739,7 @@ restart:
3736 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3739 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3737 ret = add_data_references(rc, &key, path, &blocks); 3740 ret = add_data_references(rc, &key, path, &blocks);
3738 } else { 3741 } else {
3739 btrfs_release_path(rc->extent_root, path); 3742 btrfs_release_path(path);
3740 ret = 0; 3743 ret = 0;
3741 } 3744 }
3742 if (ret < 0) { 3745 if (ret < 0) {
@@ -3799,7 +3802,7 @@ restart:
3799 } 3802 }
3800 } 3803 }
3801 3804
3802 btrfs_release_path(rc->extent_root, path); 3805 btrfs_release_path(path);
3803 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3806 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
3804 GFP_NOFS); 3807 GFP_NOFS);
3805 3808
@@ -3867,7 +3870,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3867 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | 3870 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
3868 BTRFS_INODE_PREALLOC); 3871 BTRFS_INODE_PREALLOC);
3869 btrfs_mark_buffer_dirty(leaf); 3872 btrfs_mark_buffer_dirty(leaf);
3870 btrfs_release_path(root, path); 3873 btrfs_release_path(path);
3871out: 3874out:
3872 btrfs_free_path(path); 3875 btrfs_free_path(path);
3873 return ret; 3876 return ret;
@@ -3897,7 +3900,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3897 if (IS_ERR(trans)) 3900 if (IS_ERR(trans))
3898 return ERR_CAST(trans); 3901 return ERR_CAST(trans);
3899 3902
3900 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 3903 err = btrfs_find_free_objectid(root, &objectid);
3901 if (err) 3904 if (err)
3902 goto out; 3905 goto out;
3903 3906
@@ -3935,7 +3938,7 @@ static struct reloc_control *alloc_reloc_control(void)
3935 INIT_LIST_HEAD(&rc->reloc_roots); 3938 INIT_LIST_HEAD(&rc->reloc_roots);
3936 backref_cache_init(&rc->backref_cache); 3939 backref_cache_init(&rc->backref_cache);
3937 mapping_tree_init(&rc->reloc_root_tree); 3940 mapping_tree_init(&rc->reloc_root_tree);
3938 extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); 3941 extent_io_tree_init(&rc->processed_blocks, NULL);
3939 return rc; 3942 return rc;
3940} 3943}
3941 3944
@@ -4109,7 +4112,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4109 } 4112 }
4110 leaf = path->nodes[0]; 4113 leaf = path->nodes[0];
4111 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 4114 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4112 btrfs_release_path(root->fs_info->tree_root, path); 4115 btrfs_release_path(path);
4113 4116
4114 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || 4117 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
4115 key.type != BTRFS_ROOT_ITEM_KEY) 4118 key.type != BTRFS_ROOT_ITEM_KEY)
@@ -4141,7 +4144,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4141 4144
4142 key.offset--; 4145 key.offset--;
4143 } 4146 }
4144 btrfs_release_path(root->fs_info->tree_root, path); 4147 btrfs_release_path(path);
4145 4148
4146 if (list_empty(&reloc_roots)) 4149 if (list_empty(&reloc_roots))
4147 goto out; 4150 goto out;
@@ -4242,7 +4245,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4242 4245
4243 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 4246 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
4244 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 4247 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
4245 disk_bytenr + len - 1, &list); 4248 disk_bytenr + len - 1, &list, 0);
4246 4249
4247 while (!list_empty(&list)) { 4250 while (!list_empty(&list)) {
4248 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 4251 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 6928bff62daa..ebe45443de06 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -22,53 +22,6 @@
22#include "print-tree.h" 22#include "print-tree.h"
23 23
24/* 24/*
25 * search forward for a root, starting with objectid 'search_start'
26 * if a root key is found, the objectid we find is filled into 'found_objectid'
27 * and 0 is returned. < 0 is returned on error, 1 if there is nothing
28 * left in the tree.
29 */
30int btrfs_search_root(struct btrfs_root *root, u64 search_start,
31 u64 *found_objectid)
32{
33 struct btrfs_path *path;
34 struct btrfs_key search_key;
35 int ret;
36
37 root = root->fs_info->tree_root;
38 search_key.objectid = search_start;
39 search_key.type = (u8)-1;
40 search_key.offset = (u64)-1;
41
42 path = btrfs_alloc_path();
43 BUG_ON(!path);
44again:
45 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
46 if (ret < 0)
47 goto out;
48 if (ret == 0) {
49 ret = 1;
50 goto out;
51 }
52 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
53 ret = btrfs_next_leaf(root, path);
54 if (ret)
55 goto out;
56 }
57 btrfs_item_key_to_cpu(path->nodes[0], &search_key, path->slots[0]);
58 if (search_key.type != BTRFS_ROOT_ITEM_KEY) {
59 search_key.offset++;
60 btrfs_release_path(root, path);
61 goto again;
62 }
63 ret = 0;
64 *found_objectid = search_key.objectid;
65
66out:
67 btrfs_free_path(path);
68 return ret;
69}
70
71/*
72 * lookup the root with the highest offset for a given objectid. The key we do 25 * lookup the root with the highest offset for a given objectid. The key we do
73 * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 26 * find is copied into 'key'. If we find something return 0, otherwise 1, < 0
74 * on error. 27 * on error.
@@ -230,7 +183,7 @@ again:
230 183
231 memcpy(&found_key, &key, sizeof(key)); 184 memcpy(&found_key, &key, sizeof(key));
232 key.offset++; 185 key.offset++;
233 btrfs_release_path(root, path); 186 btrfs_release_path(path);
234 dead_root = 187 dead_root =
235 btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 188 btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
236 &found_key); 189 &found_key);
@@ -292,7 +245,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
292 } 245 }
293 246
294 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 247 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
295 btrfs_release_path(tree_root, path); 248 btrfs_release_path(path);
296 249
297 if (key.objectid != BTRFS_ORPHAN_OBJECTID || 250 if (key.objectid != BTRFS_ORPHAN_OBJECTID ||
298 key.type != BTRFS_ORPHAN_ITEM_KEY) 251 key.type != BTRFS_ORPHAN_ITEM_KEY)
@@ -385,18 +338,22 @@ again:
385 *sequence = btrfs_root_ref_sequence(leaf, ref); 338 *sequence = btrfs_root_ref_sequence(leaf, ref);
386 339
387 ret = btrfs_del_item(trans, tree_root, path); 340 ret = btrfs_del_item(trans, tree_root, path);
388 BUG_ON(ret); 341 if (ret) {
342 err = ret;
343 goto out;
344 }
389 } else 345 } else
390 err = -ENOENT; 346 err = -ENOENT;
391 347
392 if (key.type == BTRFS_ROOT_BACKREF_KEY) { 348 if (key.type == BTRFS_ROOT_BACKREF_KEY) {
393 btrfs_release_path(tree_root, path); 349 btrfs_release_path(path);
394 key.objectid = ref_id; 350 key.objectid = ref_id;
395 key.type = BTRFS_ROOT_REF_KEY; 351 key.type = BTRFS_ROOT_REF_KEY;
396 key.offset = root_id; 352 key.offset = root_id;
397 goto again; 353 goto again;
398 } 354 }
399 355
356out:
400 btrfs_free_path(path); 357 btrfs_free_path(path);
401 return err; 358 return err;
402} 359}
@@ -463,7 +420,7 @@ again:
463 btrfs_mark_buffer_dirty(leaf); 420 btrfs_mark_buffer_dirty(leaf);
464 421
465 if (key.type == BTRFS_ROOT_BACKREF_KEY) { 422 if (key.type == BTRFS_ROOT_BACKREF_KEY) {
466 btrfs_release_path(tree_root, path); 423 btrfs_release_path(path);
467 key.objectid = ref_id; 424 key.objectid = ref_id;
468 key.type = BTRFS_ROOT_REF_KEY; 425 key.type = BTRFS_ROOT_REF_KEY;
469 key.offset = root_id; 426 key.offset = root_id;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
new file mode 100644
index 000000000000..6dfed0c27ac3
--- /dev/null
+++ b/fs/btrfs/scrub.c
@@ -0,0 +1,1369 @@
1/*
2 * Copyright (C) 2011 STRATO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include <linux/slab.h>
25#include <linux/workqueue.h>
26#include "ctree.h"
27#include "volumes.h"
28#include "disk-io.h"
29#include "ordered-data.h"
30
31/*
32 * This is only the first step towards a full-features scrub. It reads all
33 * extent and super block and verifies the checksums. In case a bad checksum
34 * is found or the extent cannot be read, good data will be written back if
35 * any can be found.
36 *
37 * Future enhancements:
38 * - To enhance the performance, better read-ahead strategies for the
39 * extent-tree can be employed.
40 * - In case an unrepairable extent is encountered, track which files are
41 * affected and report them
42 * - In case of a read error on files with nodatasum, map the file and read
43 * the extent to trigger a writeback of the good copy
44 * - track and record media errors, throw out bad devices
45 * - add a mode to also read unallocated space
46 * - make the prefetch cancellable
47 */
48
49struct scrub_bio;
50struct scrub_page;
51struct scrub_dev;
52static void scrub_bio_end_io(struct bio *bio, int err);
53static void scrub_checksum(struct btrfs_work *work);
54static int scrub_checksum_data(struct scrub_dev *sdev,
55 struct scrub_page *spag, void *buffer);
56static int scrub_checksum_tree_block(struct scrub_dev *sdev,
57 struct scrub_page *spag, u64 logical,
58 void *buffer);
59static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
60static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
61static void scrub_fixup_end_io(struct bio *bio, int err);
62static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
63 struct page *page);
64static void scrub_fixup(struct scrub_bio *sbio, int ix);
65
66#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
67#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */
68
69struct scrub_page {
70 u64 flags; /* extent flags */
71 u64 generation;
72 u64 mirror_num;
73 int have_csum;
74 u8 csum[BTRFS_CSUM_SIZE];
75};
76
77struct scrub_bio {
78 int index;
79 struct scrub_dev *sdev;
80 struct bio *bio;
81 int err;
82 u64 logical;
83 u64 physical;
84 struct scrub_page spag[SCRUB_PAGES_PER_BIO];
85 u64 count;
86 int next_free;
87 struct btrfs_work work;
88};
89
90struct scrub_dev {
91 struct scrub_bio *bios[SCRUB_BIOS_PER_DEV];
92 struct btrfs_device *dev;
93 int first_free;
94 int curr;
95 atomic_t in_flight;
96 spinlock_t list_lock;
97 wait_queue_head_t list_wait;
98 u16 csum_size;
99 struct list_head csum_list;
100 atomic_t cancel_req;
101 int readonly;
102 /*
103 * statistics
104 */
105 struct btrfs_scrub_progress stat;
106 spinlock_t stat_lock;
107};
108
109static void scrub_free_csums(struct scrub_dev *sdev)
110{
111 while (!list_empty(&sdev->csum_list)) {
112 struct btrfs_ordered_sum *sum;
113 sum = list_first_entry(&sdev->csum_list,
114 struct btrfs_ordered_sum, list);
115 list_del(&sum->list);
116 kfree(sum);
117 }
118}
119
120static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
121{
122 int i;
123 int j;
124 struct page *last_page;
125
126 if (!sdev)
127 return;
128
129 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
130 struct scrub_bio *sbio = sdev->bios[i];
131 struct bio *bio;
132
133 if (!sbio)
134 break;
135
136 bio = sbio->bio;
137 if (bio) {
138 last_page = NULL;
139 for (j = 0; j < bio->bi_vcnt; ++j) {
140 if (bio->bi_io_vec[j].bv_page == last_page)
141 continue;
142 last_page = bio->bi_io_vec[j].bv_page;
143 __free_page(last_page);
144 }
145 bio_put(bio);
146 }
147 kfree(sbio);
148 }
149
150 scrub_free_csums(sdev);
151 kfree(sdev);
152}
153
154static noinline_for_stack
155struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
156{
157 struct scrub_dev *sdev;
158 int i;
159 int j;
160 int ret;
161 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
162
163 sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
164 if (!sdev)
165 goto nomem;
166 sdev->dev = dev;
167 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
168 struct bio *bio;
169 struct scrub_bio *sbio;
170
171 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
172 if (!sbio)
173 goto nomem;
174 sdev->bios[i] = sbio;
175
176 bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
177 if (!bio)
178 goto nomem;
179
180 sbio->index = i;
181 sbio->sdev = sdev;
182 sbio->bio = bio;
183 sbio->count = 0;
184 sbio->work.func = scrub_checksum;
185 bio->bi_private = sdev->bios[i];
186 bio->bi_end_io = scrub_bio_end_io;
187 bio->bi_sector = 0;
188 bio->bi_bdev = dev->bdev;
189 bio->bi_size = 0;
190
191 for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) {
192 struct page *page;
193 page = alloc_page(GFP_NOFS);
194 if (!page)
195 goto nomem;
196
197 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
198 if (!ret)
199 goto nomem;
200 }
201 WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO);
202
203 if (i != SCRUB_BIOS_PER_DEV-1)
204 sdev->bios[i]->next_free = i + 1;
205 else
206 sdev->bios[i]->next_free = -1;
207 }
208 sdev->first_free = 0;
209 sdev->curr = -1;
210 atomic_set(&sdev->in_flight, 0);
211 atomic_set(&sdev->cancel_req, 0);
212 sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy);
213 INIT_LIST_HEAD(&sdev->csum_list);
214
215 spin_lock_init(&sdev->list_lock);
216 spin_lock_init(&sdev->stat_lock);
217 init_waitqueue_head(&sdev->list_wait);
218 return sdev;
219
220nomem:
221 scrub_free_dev(sdev);
222 return ERR_PTR(-ENOMEM);
223}
224
225/*
226 * scrub_recheck_error gets called when either verification of the page
227 * failed or the bio failed to read, e.g. with EIO. In the latter case,
228 * recheck_error gets called for every page in the bio, even though only
229 * one may be bad
230 */
231static void scrub_recheck_error(struct scrub_bio *sbio, int ix)
232{
233 if (sbio->err) {
234 if (scrub_fixup_io(READ, sbio->sdev->dev->bdev,
235 (sbio->physical + ix * PAGE_SIZE) >> 9,
236 sbio->bio->bi_io_vec[ix].bv_page) == 0) {
237 if (scrub_fixup_check(sbio, ix) == 0)
238 return;
239 }
240 }
241
242 scrub_fixup(sbio, ix);
243}
244
245static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
246{
247 int ret = 1;
248 struct page *page;
249 void *buffer;
250 u64 flags = sbio->spag[ix].flags;
251
252 page = sbio->bio->bi_io_vec[ix].bv_page;
253 buffer = kmap_atomic(page, KM_USER0);
254 if (flags & BTRFS_EXTENT_FLAG_DATA) {
255 ret = scrub_checksum_data(sbio->sdev,
256 sbio->spag + ix, buffer);
257 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
258 ret = scrub_checksum_tree_block(sbio->sdev,
259 sbio->spag + ix,
260 sbio->logical + ix * PAGE_SIZE,
261 buffer);
262 } else {
263 WARN_ON(1);
264 }
265 kunmap_atomic(buffer, KM_USER0);
266
267 return ret;
268}
269
270static void scrub_fixup_end_io(struct bio *bio, int err)
271{
272 complete((struct completion *)bio->bi_private);
273}
274
275static void scrub_fixup(struct scrub_bio *sbio, int ix)
276{
277 struct scrub_dev *sdev = sbio->sdev;
278 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
279 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
280 struct btrfs_multi_bio *multi = NULL;
281 u64 logical = sbio->logical + ix * PAGE_SIZE;
282 u64 length;
283 int i;
284 int ret;
285 DECLARE_COMPLETION_ONSTACK(complete);
286
287 if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
288 (sbio->spag[ix].have_csum == 0)) {
289 /*
290 * nodatasum, don't try to fix anything
291 * FIXME: we can do better, open the inode and trigger a
292 * writeback
293 */
294 goto uncorrectable;
295 }
296
297 length = PAGE_SIZE;
298 ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length,
299 &multi, 0);
300 if (ret || !multi || length < PAGE_SIZE) {
301 printk(KERN_ERR
302 "scrub_fixup: btrfs_map_block failed us for %llu\n",
303 (unsigned long long)logical);
304 WARN_ON(1);
305 return;
306 }
307
308 if (multi->num_stripes == 1)
309 /* there aren't any replicas */
310 goto uncorrectable;
311
312 /*
313 * first find a good copy
314 */
315 for (i = 0; i < multi->num_stripes; ++i) {
316 if (i == sbio->spag[ix].mirror_num)
317 continue;
318
319 if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev,
320 multi->stripes[i].physical >> 9,
321 sbio->bio->bi_io_vec[ix].bv_page)) {
322 /* I/O-error, this is not a good copy */
323 continue;
324 }
325
326 if (scrub_fixup_check(sbio, ix) == 0)
327 break;
328 }
329 if (i == multi->num_stripes)
330 goto uncorrectable;
331
332 if (!sdev->readonly) {
333 /*
334 * bi_io_vec[ix].bv_page now contains good data, write it back
335 */
336 if (scrub_fixup_io(WRITE, sdev->dev->bdev,
337 (sbio->physical + ix * PAGE_SIZE) >> 9,
338 sbio->bio->bi_io_vec[ix].bv_page)) {
339 /* I/O-error, writeback failed, give up */
340 goto uncorrectable;
341 }
342 }
343
344 kfree(multi);
345 spin_lock(&sdev->stat_lock);
346 ++sdev->stat.corrected_errors;
347 spin_unlock(&sdev->stat_lock);
348
349 if (printk_ratelimit())
350 printk(KERN_ERR "btrfs: fixed up at %llu\n",
351 (unsigned long long)logical);
352 return;
353
354uncorrectable:
355 kfree(multi);
356 spin_lock(&sdev->stat_lock);
357 ++sdev->stat.uncorrectable_errors;
358 spin_unlock(&sdev->stat_lock);
359
360 if (printk_ratelimit())
361 printk(KERN_ERR "btrfs: unable to fixup at %llu\n",
362 (unsigned long long)logical);
363}
364
365static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
366 struct page *page)
367{
368 struct bio *bio = NULL;
369 int ret;
370 DECLARE_COMPLETION_ONSTACK(complete);
371
372 /* we are going to wait on this IO */
373 rw |= REQ_SYNC;
374
375 bio = bio_alloc(GFP_NOFS, 1);
376 bio->bi_bdev = bdev;
377 bio->bi_sector = sector;
378 bio_add_page(bio, page, PAGE_SIZE, 0);
379 bio->bi_end_io = scrub_fixup_end_io;
380 bio->bi_private = &complete;
381 submit_bio(rw, bio);
382
383 wait_for_completion(&complete);
384
385 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
386 bio_put(bio);
387 return ret;
388}
389
390static void scrub_bio_end_io(struct bio *bio, int err)
391{
392 struct scrub_bio *sbio = bio->bi_private;
393 struct scrub_dev *sdev = sbio->sdev;
394 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
395
396 sbio->err = err;
397
398 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
399}
400
401static void scrub_checksum(struct btrfs_work *work)
402{
403 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
404 struct scrub_dev *sdev = sbio->sdev;
405 struct page *page;
406 void *buffer;
407 int i;
408 u64 flags;
409 u64 logical;
410 int ret;
411
412 if (sbio->err) {
413 for (i = 0; i < sbio->count; ++i)
414 scrub_recheck_error(sbio, i);
415
416 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
417 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
418 sbio->bio->bi_phys_segments = 0;
419 sbio->bio->bi_idx = 0;
420
421 for (i = 0; i < sbio->count; i++) {
422 struct bio_vec *bi;
423 bi = &sbio->bio->bi_io_vec[i];
424 bi->bv_offset = 0;
425 bi->bv_len = PAGE_SIZE;
426 }
427
428 spin_lock(&sdev->stat_lock);
429 ++sdev->stat.read_errors;
430 spin_unlock(&sdev->stat_lock);
431 goto out;
432 }
433 for (i = 0; i < sbio->count; ++i) {
434 page = sbio->bio->bi_io_vec[i].bv_page;
435 buffer = kmap_atomic(page, KM_USER0);
436 flags = sbio->spag[i].flags;
437 logical = sbio->logical + i * PAGE_SIZE;
438 ret = 0;
439 if (flags & BTRFS_EXTENT_FLAG_DATA) {
440 ret = scrub_checksum_data(sdev, sbio->spag + i, buffer);
441 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
442 ret = scrub_checksum_tree_block(sdev, sbio->spag + i,
443 logical, buffer);
444 } else if (flags & BTRFS_EXTENT_FLAG_SUPER) {
445 BUG_ON(i);
446 (void)scrub_checksum_super(sbio, buffer);
447 } else {
448 WARN_ON(1);
449 }
450 kunmap_atomic(buffer, KM_USER0);
451 if (ret)
452 scrub_recheck_error(sbio, i);
453 }
454
455out:
456 spin_lock(&sdev->list_lock);
457 sbio->next_free = sdev->first_free;
458 sdev->first_free = sbio->index;
459 spin_unlock(&sdev->list_lock);
460 atomic_dec(&sdev->in_flight);
461 wake_up(&sdev->list_wait);
462}
463
464static int scrub_checksum_data(struct scrub_dev *sdev,
465 struct scrub_page *spag, void *buffer)
466{
467 u8 csum[BTRFS_CSUM_SIZE];
468 u32 crc = ~(u32)0;
469 int fail = 0;
470 struct btrfs_root *root = sdev->dev->dev_root;
471
472 if (!spag->have_csum)
473 return 0;
474
475 crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE);
476 btrfs_csum_final(crc, csum);
477 if (memcmp(csum, spag->csum, sdev->csum_size))
478 fail = 1;
479
480 spin_lock(&sdev->stat_lock);
481 ++sdev->stat.data_extents_scrubbed;
482 sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
483 if (fail)
484 ++sdev->stat.csum_errors;
485 spin_unlock(&sdev->stat_lock);
486
487 return fail;
488}
489
490static int scrub_checksum_tree_block(struct scrub_dev *sdev,
491 struct scrub_page *spag, u64 logical,
492 void *buffer)
493{
494 struct btrfs_header *h;
495 struct btrfs_root *root = sdev->dev->dev_root;
496 struct btrfs_fs_info *fs_info = root->fs_info;
497 u8 csum[BTRFS_CSUM_SIZE];
498 u32 crc = ~(u32)0;
499 int fail = 0;
500 int crc_fail = 0;
501
502 /*
503 * we don't use the getter functions here, as we
504 * a) don't have an extent buffer and
505 * b) the page is already kmapped
506 */
507 h = (struct btrfs_header *)buffer;
508
509 if (logical != le64_to_cpu(h->bytenr))
510 ++fail;
511
512 if (spag->generation != le64_to_cpu(h->generation))
513 ++fail;
514
515 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
516 ++fail;
517
518 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
519 BTRFS_UUID_SIZE))
520 ++fail;
521
522 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
523 PAGE_SIZE - BTRFS_CSUM_SIZE);
524 btrfs_csum_final(crc, csum);
525 if (memcmp(csum, h->csum, sdev->csum_size))
526 ++crc_fail;
527
528 spin_lock(&sdev->stat_lock);
529 ++sdev->stat.tree_extents_scrubbed;
530 sdev->stat.tree_bytes_scrubbed += PAGE_SIZE;
531 if (crc_fail)
532 ++sdev->stat.csum_errors;
533 if (fail)
534 ++sdev->stat.verify_errors;
535 spin_unlock(&sdev->stat_lock);
536
537 return fail || crc_fail;
538}
539
540static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
541{
542 struct btrfs_super_block *s;
543 u64 logical;
544 struct scrub_dev *sdev = sbio->sdev;
545 struct btrfs_root *root = sdev->dev->dev_root;
546 struct btrfs_fs_info *fs_info = root->fs_info;
547 u8 csum[BTRFS_CSUM_SIZE];
548 u32 crc = ~(u32)0;
549 int fail = 0;
550
551 s = (struct btrfs_super_block *)buffer;
552 logical = sbio->logical;
553
554 if (logical != le64_to_cpu(s->bytenr))
555 ++fail;
556
557 if (sbio->spag[0].generation != le64_to_cpu(s->generation))
558 ++fail;
559
560 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
561 ++fail;
562
563 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
564 PAGE_SIZE - BTRFS_CSUM_SIZE);
565 btrfs_csum_final(crc, csum);
566 if (memcmp(csum, s->csum, sbio->sdev->csum_size))
567 ++fail;
568
569 if (fail) {
570 /*
571 * if we find an error in a super block, we just report it.
572 * They will get written with the next transaction commit
573 * anyway
574 */
575 spin_lock(&sdev->stat_lock);
576 ++sdev->stat.super_errors;
577 spin_unlock(&sdev->stat_lock);
578 }
579
580 return fail;
581}
582
583static int scrub_submit(struct scrub_dev *sdev)
584{
585 struct scrub_bio *sbio;
586
587 if (sdev->curr == -1)
588 return 0;
589
590 sbio = sdev->bios[sdev->curr];
591
592 sbio->bio->bi_sector = sbio->physical >> 9;
593 sbio->bio->bi_size = sbio->count * PAGE_SIZE;
594 sbio->bio->bi_next = NULL;
595 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
596 sbio->bio->bi_comp_cpu = -1;
597 sbio->bio->bi_bdev = sdev->dev->bdev;
598 sbio->err = 0;
599 sdev->curr = -1;
600 atomic_inc(&sdev->in_flight);
601
602 submit_bio(0, sbio->bio);
603
604 return 0;
605}
606
607static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
608 u64 physical, u64 flags, u64 gen, u64 mirror_num,
609 u8 *csum, int force)
610{
611 struct scrub_bio *sbio;
612
613again:
614 /*
615 * grab a fresh bio or wait for one to become available
616 */
617 while (sdev->curr == -1) {
618 spin_lock(&sdev->list_lock);
619 sdev->curr = sdev->first_free;
620 if (sdev->curr != -1) {
621 sdev->first_free = sdev->bios[sdev->curr]->next_free;
622 sdev->bios[sdev->curr]->next_free = -1;
623 sdev->bios[sdev->curr]->count = 0;
624 spin_unlock(&sdev->list_lock);
625 } else {
626 spin_unlock(&sdev->list_lock);
627 wait_event(sdev->list_wait, sdev->first_free != -1);
628 }
629 }
630 sbio = sdev->bios[sdev->curr];
631 if (sbio->count == 0) {
632 sbio->physical = physical;
633 sbio->logical = logical;
634 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
635 sbio->logical + sbio->count * PAGE_SIZE != logical) {
636 scrub_submit(sdev);
637 goto again;
638 }
639 sbio->spag[sbio->count].flags = flags;
640 sbio->spag[sbio->count].generation = gen;
641 sbio->spag[sbio->count].have_csum = 0;
642 sbio->spag[sbio->count].mirror_num = mirror_num;
643 if (csum) {
644 sbio->spag[sbio->count].have_csum = 1;
645 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
646 }
647 ++sbio->count;
648 if (sbio->count == SCRUB_PAGES_PER_BIO || force)
649 scrub_submit(sdev);
650
651 return 0;
652}
653
654static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
655 u8 *csum)
656{
657 struct btrfs_ordered_sum *sum = NULL;
658 int ret = 0;
659 unsigned long i;
660 unsigned long num_sectors;
661 u32 sectorsize = sdev->dev->dev_root->sectorsize;
662
663 while (!list_empty(&sdev->csum_list)) {
664 sum = list_first_entry(&sdev->csum_list,
665 struct btrfs_ordered_sum, list);
666 if (sum->bytenr > logical)
667 return 0;
668 if (sum->bytenr + sum->len > logical)
669 break;
670
671 ++sdev->stat.csum_discards;
672 list_del(&sum->list);
673 kfree(sum);
674 sum = NULL;
675 }
676 if (!sum)
677 return 0;
678
679 num_sectors = sum->len / sectorsize;
680 for (i = 0; i < num_sectors; ++i) {
681 if (sum->sums[i].bytenr == logical) {
682 memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
683 ret = 1;
684 break;
685 }
686 }
687 if (ret && i == num_sectors - 1) {
688 list_del(&sum->list);
689 kfree(sum);
690 }
691 return ret;
692}
693
694/* scrub extent tries to collect up to 64 kB for each bio */
695static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
696 u64 physical, u64 flags, u64 gen, u64 mirror_num)
697{
698 int ret;
699 u8 csum[BTRFS_CSUM_SIZE];
700
701 while (len) {
702 u64 l = min_t(u64, len, PAGE_SIZE);
703 int have_csum = 0;
704
705 if (flags & BTRFS_EXTENT_FLAG_DATA) {
706 /* push csums to sbio */
707 have_csum = scrub_find_csum(sdev, logical, l, csum);
708 if (have_csum == 0)
709 ++sdev->stat.no_csum;
710 }
711 ret = scrub_page(sdev, logical, l, physical, flags, gen,
712 mirror_num, have_csum ? csum : NULL, 0);
713 if (ret)
714 return ret;
715 len -= l;
716 logical += l;
717 physical += l;
718 }
719 return 0;
720}
721
722static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
723 struct map_lookup *map, int num, u64 base, u64 length)
724{
725 struct btrfs_path *path;
726 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
727 struct btrfs_root *root = fs_info->extent_root;
728 struct btrfs_root *csum_root = fs_info->csum_root;
729 struct btrfs_extent_item *extent;
730 u64 flags;
731 int ret;
732 int slot;
733 int i;
734 u64 nstripes;
735 int start_stripe;
736 struct extent_buffer *l;
737 struct btrfs_key key;
738 u64 physical;
739 u64 logical;
740 u64 generation;
741 u64 mirror_num;
742
743 u64 increment = map->stripe_len;
744 u64 offset;
745
746 nstripes = length;
747 offset = 0;
748 do_div(nstripes, map->stripe_len);
749 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
750 offset = map->stripe_len * num;
751 increment = map->stripe_len * map->num_stripes;
752 mirror_num = 0;
753 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
754 int factor = map->num_stripes / map->sub_stripes;
755 offset = map->stripe_len * (num / map->sub_stripes);
756 increment = map->stripe_len * factor;
757 mirror_num = num % map->sub_stripes;
758 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
759 increment = map->stripe_len;
760 mirror_num = num % map->num_stripes;
761 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
762 increment = map->stripe_len;
763 mirror_num = num % map->num_stripes;
764 } else {
765 increment = map->stripe_len;
766 mirror_num = 0;
767 }
768
769 path = btrfs_alloc_path();
770 if (!path)
771 return -ENOMEM;
772
773 path->reada = 2;
774 path->search_commit_root = 1;
775 path->skip_locking = 1;
776
777 /*
778 * find all extents for each stripe and just read them to get
779 * them into the page cache
780 * FIXME: we can do better. build a more intelligent prefetching
781 */
782 logical = base + offset;
783 physical = map->stripes[num].physical;
784 ret = 0;
785 for (i = 0; i < nstripes; ++i) {
786 key.objectid = logical;
787 key.type = BTRFS_EXTENT_ITEM_KEY;
788 key.offset = (u64)0;
789
790 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
791 if (ret < 0)
792 goto out;
793
794 l = path->nodes[0];
795 slot = path->slots[0];
796 btrfs_item_key_to_cpu(l, &key, slot);
797 if (key.objectid != logical) {
798 ret = btrfs_previous_item(root, path, 0,
799 BTRFS_EXTENT_ITEM_KEY);
800 if (ret < 0)
801 goto out;
802 }
803
804 while (1) {
805 l = path->nodes[0];
806 slot = path->slots[0];
807 if (slot >= btrfs_header_nritems(l)) {
808 ret = btrfs_next_leaf(root, path);
809 if (ret == 0)
810 continue;
811 if (ret < 0)
812 goto out;
813
814 break;
815 }
816 btrfs_item_key_to_cpu(l, &key, slot);
817
818 if (key.objectid >= logical + map->stripe_len)
819 break;
820
821 path->slots[0]++;
822 }
823 btrfs_release_path(path);
824 logical += increment;
825 physical += map->stripe_len;
826 cond_resched();
827 }
828
829 /*
830 * collect all data csums for the stripe to avoid seeking during
831 * the scrub. This might currently (crc32) end up to be about 1MB
832 */
833 start_stripe = 0;
834again:
835 logical = base + offset + start_stripe * increment;
836 for (i = start_stripe; i < nstripes; ++i) {
837 ret = btrfs_lookup_csums_range(csum_root, logical,
838 logical + map->stripe_len - 1,
839 &sdev->csum_list, 1);
840 if (ret)
841 goto out;
842
843 logical += increment;
844 cond_resched();
845 }
846 /*
847 * now find all extents for each stripe and scrub them
848 */
849 logical = base + offset + start_stripe * increment;
850 physical = map->stripes[num].physical + start_stripe * map->stripe_len;
851 ret = 0;
852 for (i = start_stripe; i < nstripes; ++i) {
853 /*
854 * canceled?
855 */
856 if (atomic_read(&fs_info->scrub_cancel_req) ||
857 atomic_read(&sdev->cancel_req)) {
858 ret = -ECANCELED;
859 goto out;
860 }
861 /*
862 * check to see if we have to pause
863 */
864 if (atomic_read(&fs_info->scrub_pause_req)) {
865 /* push queued extents */
866 scrub_submit(sdev);
867 wait_event(sdev->list_wait,
868 atomic_read(&sdev->in_flight) == 0);
869 atomic_inc(&fs_info->scrubs_paused);
870 wake_up(&fs_info->scrub_pause_wait);
871 mutex_lock(&fs_info->scrub_lock);
872 while (atomic_read(&fs_info->scrub_pause_req)) {
873 mutex_unlock(&fs_info->scrub_lock);
874 wait_event(fs_info->scrub_pause_wait,
875 atomic_read(&fs_info->scrub_pause_req) == 0);
876 mutex_lock(&fs_info->scrub_lock);
877 }
878 atomic_dec(&fs_info->scrubs_paused);
879 mutex_unlock(&fs_info->scrub_lock);
880 wake_up(&fs_info->scrub_pause_wait);
881 scrub_free_csums(sdev);
882 start_stripe = i;
883 goto again;
884 }
885
886 key.objectid = logical;
887 key.type = BTRFS_EXTENT_ITEM_KEY;
888 key.offset = (u64)0;
889
890 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
891 if (ret < 0)
892 goto out;
893
894 l = path->nodes[0];
895 slot = path->slots[0];
896 btrfs_item_key_to_cpu(l, &key, slot);
897 if (key.objectid != logical) {
898 ret = btrfs_previous_item(root, path, 0,
899 BTRFS_EXTENT_ITEM_KEY);
900 if (ret < 0)
901 goto out;
902 }
903
904 while (1) {
905 l = path->nodes[0];
906 slot = path->slots[0];
907 if (slot >= btrfs_header_nritems(l)) {
908 ret = btrfs_next_leaf(root, path);
909 if (ret == 0)
910 continue;
911 if (ret < 0)
912 goto out;
913
914 break;
915 }
916 btrfs_item_key_to_cpu(l, &key, slot);
917
918 if (key.objectid + key.offset <= logical)
919 goto next;
920
921 if (key.objectid >= logical + map->stripe_len)
922 break;
923
924 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
925 goto next;
926
927 extent = btrfs_item_ptr(l, slot,
928 struct btrfs_extent_item);
929 flags = btrfs_extent_flags(l, extent);
930 generation = btrfs_extent_generation(l, extent);
931
932 if (key.objectid < logical &&
933 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
934 printk(KERN_ERR
935 "btrfs scrub: tree block %llu spanning "
936 "stripes, ignored. logical=%llu\n",
937 (unsigned long long)key.objectid,
938 (unsigned long long)logical);
939 goto next;
940 }
941
942 /*
943 * trim extent to this stripe
944 */
945 if (key.objectid < logical) {
946 key.offset -= logical - key.objectid;
947 key.objectid = logical;
948 }
949 if (key.objectid + key.offset >
950 logical + map->stripe_len) {
951 key.offset = logical + map->stripe_len -
952 key.objectid;
953 }
954
955 ret = scrub_extent(sdev, key.objectid, key.offset,
956 key.objectid - logical + physical,
957 flags, generation, mirror_num);
958 if (ret)
959 goto out;
960
961next:
962 path->slots[0]++;
963 }
964 btrfs_release_path(path);
965 logical += increment;
966 physical += map->stripe_len;
967 spin_lock(&sdev->stat_lock);
968 sdev->stat.last_physical = physical;
969 spin_unlock(&sdev->stat_lock);
970 }
971 /* push queued extents */
972 scrub_submit(sdev);
973
974out:
975 btrfs_free_path(path);
976 return ret < 0 ? ret : 0;
977}
978
979static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
980 u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
981{
982 struct btrfs_mapping_tree *map_tree =
983 &sdev->dev->dev_root->fs_info->mapping_tree;
984 struct map_lookup *map;
985 struct extent_map *em;
986 int i;
987 int ret = -EINVAL;
988
989 read_lock(&map_tree->map_tree.lock);
990 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
991 read_unlock(&map_tree->map_tree.lock);
992
993 if (!em)
994 return -EINVAL;
995
996 map = (struct map_lookup *)em->bdev;
997 if (em->start != chunk_offset)
998 goto out;
999
1000 if (em->len < length)
1001 goto out;
1002
1003 for (i = 0; i < map->num_stripes; ++i) {
1004 if (map->stripes[i].dev == sdev->dev) {
1005 ret = scrub_stripe(sdev, map, i, chunk_offset, length);
1006 if (ret)
1007 goto out;
1008 }
1009 }
1010out:
1011 free_extent_map(em);
1012
1013 return ret;
1014}
1015
1016static noinline_for_stack
1017int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1018{
1019 struct btrfs_dev_extent *dev_extent = NULL;
1020 struct btrfs_path *path;
1021 struct btrfs_root *root = sdev->dev->dev_root;
1022 struct btrfs_fs_info *fs_info = root->fs_info;
1023 u64 length;
1024 u64 chunk_tree;
1025 u64 chunk_objectid;
1026 u64 chunk_offset;
1027 int ret;
1028 int slot;
1029 struct extent_buffer *l;
1030 struct btrfs_key key;
1031 struct btrfs_key found_key;
1032 struct btrfs_block_group_cache *cache;
1033
1034 path = btrfs_alloc_path();
1035 if (!path)
1036 return -ENOMEM;
1037
1038 path->reada = 2;
1039 path->search_commit_root = 1;
1040 path->skip_locking = 1;
1041
1042 key.objectid = sdev->dev->devid;
1043 key.offset = 0ull;
1044 key.type = BTRFS_DEV_EXTENT_KEY;
1045
1046
1047 while (1) {
1048 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1049 if (ret < 0)
1050 goto out;
1051 ret = 0;
1052
1053 l = path->nodes[0];
1054 slot = path->slots[0];
1055
1056 btrfs_item_key_to_cpu(l, &found_key, slot);
1057
1058 if (found_key.objectid != sdev->dev->devid)
1059 break;
1060
1061 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
1062 break;
1063
1064 if (found_key.offset >= end)
1065 break;
1066
1067 if (found_key.offset < key.offset)
1068 break;
1069
1070 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1071 length = btrfs_dev_extent_length(l, dev_extent);
1072
1073 if (found_key.offset + length <= start) {
1074 key.offset = found_key.offset + length;
1075 btrfs_release_path(path);
1076 continue;
1077 }
1078
1079 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1080 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1081 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1082
1083 /*
1084 * get a reference on the corresponding block group to prevent
1085 * the chunk from going away while we scrub it
1086 */
1087 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1088 if (!cache) {
1089 ret = -ENOENT;
1090 goto out;
1091 }
1092 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1093 chunk_offset, length);
1094 btrfs_put_block_group(cache);
1095 if (ret)
1096 break;
1097
1098 key.offset = found_key.offset + length;
1099 btrfs_release_path(path);
1100 }
1101
1102out:
1103 btrfs_free_path(path);
1104 return ret;
1105}
1106
1107static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1108{
1109 int i;
1110 u64 bytenr;
1111 u64 gen;
1112 int ret;
1113 struct btrfs_device *device = sdev->dev;
1114 struct btrfs_root *root = device->dev_root;
1115
1116 gen = root->fs_info->last_trans_committed;
1117
1118 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1119 bytenr = btrfs_sb_offset(i);
1120 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
1121 break;
1122
1123 ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr,
1124 BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
1125 if (ret)
1126 return ret;
1127 }
1128 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1129
1130 return 0;
1131}
1132
1133/*
1134 * get a reference count on fs_info->scrub_workers. start worker if necessary
1135 */
1136static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1137{
1138 struct btrfs_fs_info *fs_info = root->fs_info;
1139
1140 mutex_lock(&fs_info->scrub_lock);
1141 if (fs_info->scrub_workers_refcnt == 0)
1142 btrfs_start_workers(&fs_info->scrub_workers, 1);
1143 ++fs_info->scrub_workers_refcnt;
1144 mutex_unlock(&fs_info->scrub_lock);
1145
1146 return 0;
1147}
1148
1149static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
1150{
1151 struct btrfs_fs_info *fs_info = root->fs_info;
1152
1153 mutex_lock(&fs_info->scrub_lock);
1154 if (--fs_info->scrub_workers_refcnt == 0)
1155 btrfs_stop_workers(&fs_info->scrub_workers);
1156 WARN_ON(fs_info->scrub_workers_refcnt < 0);
1157 mutex_unlock(&fs_info->scrub_lock);
1158}
1159
1160
1161int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1162 struct btrfs_scrub_progress *progress, int readonly)
1163{
1164 struct scrub_dev *sdev;
1165 struct btrfs_fs_info *fs_info = root->fs_info;
1166 int ret;
1167 struct btrfs_device *dev;
1168
1169 if (root->fs_info->closing)
1170 return -EINVAL;
1171
1172 /*
1173 * check some assumptions
1174 */
1175 if (root->sectorsize != PAGE_SIZE ||
1176 root->sectorsize != root->leafsize ||
1177 root->sectorsize != root->nodesize) {
1178 printk(KERN_ERR "btrfs_scrub: size assumptions fail\n");
1179 return -EINVAL;
1180 }
1181
1182 ret = scrub_workers_get(root);
1183 if (ret)
1184 return ret;
1185
1186 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1187 dev = btrfs_find_device(root, devid, NULL, NULL);
1188 if (!dev || dev->missing) {
1189 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1190 scrub_workers_put(root);
1191 return -ENODEV;
1192 }
1193 mutex_lock(&fs_info->scrub_lock);
1194
1195 if (!dev->in_fs_metadata) {
1196 mutex_unlock(&fs_info->scrub_lock);
1197 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1198 scrub_workers_put(root);
1199 return -ENODEV;
1200 }
1201
1202 if (dev->scrub_device) {
1203 mutex_unlock(&fs_info->scrub_lock);
1204 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1205 scrub_workers_put(root);
1206 return -EINPROGRESS;
1207 }
1208 sdev = scrub_setup_dev(dev);
1209 if (IS_ERR(sdev)) {
1210 mutex_unlock(&fs_info->scrub_lock);
1211 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1212 scrub_workers_put(root);
1213 return PTR_ERR(sdev);
1214 }
1215 sdev->readonly = readonly;
1216 dev->scrub_device = sdev;
1217
1218 atomic_inc(&fs_info->scrubs_running);
1219 mutex_unlock(&fs_info->scrub_lock);
1220 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1221
1222 down_read(&fs_info->scrub_super_lock);
1223 ret = scrub_supers(sdev);
1224 up_read(&fs_info->scrub_super_lock);
1225
1226 if (!ret)
1227 ret = scrub_enumerate_chunks(sdev, start, end);
1228
1229 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1230
1231 atomic_dec(&fs_info->scrubs_running);
1232 wake_up(&fs_info->scrub_pause_wait);
1233
1234 if (progress)
1235 memcpy(progress, &sdev->stat, sizeof(*progress));
1236
1237 mutex_lock(&fs_info->scrub_lock);
1238 dev->scrub_device = NULL;
1239 mutex_unlock(&fs_info->scrub_lock);
1240
1241 scrub_free_dev(sdev);
1242 scrub_workers_put(root);
1243
1244 return ret;
1245}
1246
1247int btrfs_scrub_pause(struct btrfs_root *root)
1248{
1249 struct btrfs_fs_info *fs_info = root->fs_info;
1250
1251 mutex_lock(&fs_info->scrub_lock);
1252 atomic_inc(&fs_info->scrub_pause_req);
1253 while (atomic_read(&fs_info->scrubs_paused) !=
1254 atomic_read(&fs_info->scrubs_running)) {
1255 mutex_unlock(&fs_info->scrub_lock);
1256 wait_event(fs_info->scrub_pause_wait,
1257 atomic_read(&fs_info->scrubs_paused) ==
1258 atomic_read(&fs_info->scrubs_running));
1259 mutex_lock(&fs_info->scrub_lock);
1260 }
1261 mutex_unlock(&fs_info->scrub_lock);
1262
1263 return 0;
1264}
1265
1266int btrfs_scrub_continue(struct btrfs_root *root)
1267{
1268 struct btrfs_fs_info *fs_info = root->fs_info;
1269
1270 atomic_dec(&fs_info->scrub_pause_req);
1271 wake_up(&fs_info->scrub_pause_wait);
1272 return 0;
1273}
1274
1275int btrfs_scrub_pause_super(struct btrfs_root *root)
1276{
1277 down_write(&root->fs_info->scrub_super_lock);
1278 return 0;
1279}
1280
1281int btrfs_scrub_continue_super(struct btrfs_root *root)
1282{
1283 up_write(&root->fs_info->scrub_super_lock);
1284 return 0;
1285}
1286
1287int btrfs_scrub_cancel(struct btrfs_root *root)
1288{
1289 struct btrfs_fs_info *fs_info = root->fs_info;
1290
1291 mutex_lock(&fs_info->scrub_lock);
1292 if (!atomic_read(&fs_info->scrubs_running)) {
1293 mutex_unlock(&fs_info->scrub_lock);
1294 return -ENOTCONN;
1295 }
1296
1297 atomic_inc(&fs_info->scrub_cancel_req);
1298 while (atomic_read(&fs_info->scrubs_running)) {
1299 mutex_unlock(&fs_info->scrub_lock);
1300 wait_event(fs_info->scrub_pause_wait,
1301 atomic_read(&fs_info->scrubs_running) == 0);
1302 mutex_lock(&fs_info->scrub_lock);
1303 }
1304 atomic_dec(&fs_info->scrub_cancel_req);
1305 mutex_unlock(&fs_info->scrub_lock);
1306
1307 return 0;
1308}
1309
1310int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
1311{
1312 struct btrfs_fs_info *fs_info = root->fs_info;
1313 struct scrub_dev *sdev;
1314
1315 mutex_lock(&fs_info->scrub_lock);
1316 sdev = dev->scrub_device;
1317 if (!sdev) {
1318 mutex_unlock(&fs_info->scrub_lock);
1319 return -ENOTCONN;
1320 }
1321 atomic_inc(&sdev->cancel_req);
1322 while (dev->scrub_device) {
1323 mutex_unlock(&fs_info->scrub_lock);
1324 wait_event(fs_info->scrub_pause_wait,
1325 dev->scrub_device == NULL);
1326 mutex_lock(&fs_info->scrub_lock);
1327 }
1328 mutex_unlock(&fs_info->scrub_lock);
1329
1330 return 0;
1331}
1332int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
1333{
1334 struct btrfs_fs_info *fs_info = root->fs_info;
1335 struct btrfs_device *dev;
1336 int ret;
1337
1338 /*
1339 * we have to hold the device_list_mutex here so the device
1340 * does not go away in cancel_dev. FIXME: find a better solution
1341 */
1342 mutex_lock(&fs_info->fs_devices->device_list_mutex);
1343 dev = btrfs_find_device(root, devid, NULL, NULL);
1344 if (!dev) {
1345 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1346 return -ENODEV;
1347 }
1348 ret = btrfs_scrub_cancel_dev(root, dev);
1349 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1350
1351 return ret;
1352}
1353
1354int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
1355 struct btrfs_scrub_progress *progress)
1356{
1357 struct btrfs_device *dev;
1358 struct scrub_dev *sdev = NULL;
1359
1360 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1361 dev = btrfs_find_device(root, devid, NULL, NULL);
1362 if (dev)
1363 sdev = dev->scrub_device;
1364 if (sdev)
1365 memcpy(progress, &sdev->stat, sizeof(*progress));
1366 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1367
1368 return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
1369}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index be4ffa12f3ef..9b2e7e5bc3ef 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -41,6 +41,7 @@
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/cleancache.h> 42#include <linux/cleancache.h>
43#include "compat.h" 43#include "compat.h"
44#include "delayed-inode.h"
44#include "ctree.h" 45#include "ctree.h"
45#include "disk-io.h" 46#include "disk-io.h"
46#include "transaction.h" 47#include "transaction.h"
@@ -160,7 +161,7 @@ enum {
160 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 161 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
161 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 162 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
162 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, 163 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
163 Opt_enospc_debug, Opt_subvolrootid, Opt_err, 164 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_err,
164}; 165};
165 166
166static match_table_t tokens = { 167static match_table_t tokens = {
@@ -191,6 +192,7 @@ static match_table_t tokens = {
191 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 192 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
192 {Opt_enospc_debug, "enospc_debug"}, 193 {Opt_enospc_debug, "enospc_debug"},
193 {Opt_subvolrootid, "subvolrootid=%d"}, 194 {Opt_subvolrootid, "subvolrootid=%d"},
195 {Opt_defrag, "autodefrag"},
194 {Opt_err, NULL}, 196 {Opt_err, NULL},
195}; 197};
196 198
@@ -369,6 +371,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
369 case Opt_enospc_debug: 371 case Opt_enospc_debug:
370 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); 372 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
371 break; 373 break;
374 case Opt_defrag:
375 printk(KERN_INFO "btrfs: enabling auto defrag");
376 btrfs_set_opt(info->mount_opt, AUTO_DEFRAG);
377 break;
372 case Opt_err: 378 case Opt_err:
373 printk(KERN_INFO "btrfs: unrecognized mount option " 379 printk(KERN_INFO "btrfs: unrecognized mount option "
374 "'%s'\n", p); 380 "'%s'\n", p);
@@ -507,8 +513,10 @@ static struct dentry *get_default_root(struct super_block *sb,
507 */ 513 */
508 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); 514 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
509 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); 515 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
510 if (IS_ERR(di)) 516 if (IS_ERR(di)) {
517 btrfs_free_path(path);
511 return ERR_CAST(di); 518 return ERR_CAST(di);
519 }
512 if (!di) { 520 if (!di) {
513 /* 521 /*
514 * Ok the default dir item isn't there. This is weird since 522 * Ok the default dir item isn't there. This is weird since
@@ -741,7 +749,7 @@ static int btrfs_set_super(struct super_block *s, void *data)
741 * for multiple device setup. Make sure to keep it in sync. 749 * for multiple device setup. Make sure to keep it in sync.
742 */ 750 */
743static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, 751static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
744 const char *dev_name, void *data) 752 const char *device_name, void *data)
745{ 753{
746 struct block_device *bdev = NULL; 754 struct block_device *bdev = NULL;
747 struct super_block *s; 755 struct super_block *s;
@@ -764,7 +772,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
764 if (error) 772 if (error)
765 return ERR_PTR(error); 773 return ERR_PTR(error);
766 774
767 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices); 775 error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
768 if (error) 776 if (error)
769 goto error_free_subvol_name; 777 goto error_free_subvol_name;
770 778
@@ -915,6 +923,32 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
915 return 0; 923 return 0;
916} 924}
917 925
926/* Used to sort the devices by max_avail(descending sort) */
927static int btrfs_cmp_device_free_bytes(const void *dev_info1,
928 const void *dev_info2)
929{
930 if (((struct btrfs_device_info *)dev_info1)->max_avail >
931 ((struct btrfs_device_info *)dev_info2)->max_avail)
932 return -1;
933 else if (((struct btrfs_device_info *)dev_info1)->max_avail <
934 ((struct btrfs_device_info *)dev_info2)->max_avail)
935 return 1;
936 else
937 return 0;
938}
939
940/*
941 * sort the devices by max_avail, in which max free extent size of each device
942 * is stored.(Descending Sort)
943 */
944static inline void btrfs_descending_sort_devices(
945 struct btrfs_device_info *devices,
946 size_t nr_devices)
947{
948 sort(devices, nr_devices, sizeof(struct btrfs_device_info),
949 btrfs_cmp_device_free_bytes, NULL);
950}
951
918/* 952/*
919 * The helper to calc the free space on the devices that can be used to store 953 * The helper to calc the free space on the devices that can be used to store
920 * file data. 954 * file data.
@@ -1208,10 +1242,14 @@ static int __init init_btrfs_fs(void)
1208 if (err) 1242 if (err)
1209 goto free_extent_io; 1243 goto free_extent_io;
1210 1244
1211 err = btrfs_interface_init(); 1245 err = btrfs_delayed_inode_init();
1212 if (err) 1246 if (err)
1213 goto free_extent_map; 1247 goto free_extent_map;
1214 1248
1249 err = btrfs_interface_init();
1250 if (err)
1251 goto free_delayed_inode;
1252
1215 err = register_filesystem(&btrfs_fs_type); 1253 err = register_filesystem(&btrfs_fs_type);
1216 if (err) 1254 if (err)
1217 goto unregister_ioctl; 1255 goto unregister_ioctl;
@@ -1221,6 +1259,8 @@ static int __init init_btrfs_fs(void)
1221 1259
1222unregister_ioctl: 1260unregister_ioctl:
1223 btrfs_interface_exit(); 1261 btrfs_interface_exit();
1262free_delayed_inode:
1263 btrfs_delayed_inode_exit();
1224free_extent_map: 1264free_extent_map:
1225 extent_map_exit(); 1265 extent_map_exit();
1226free_extent_io: 1266free_extent_io:
@@ -1237,6 +1277,7 @@ free_sysfs:
1237static void __exit exit_btrfs_fs(void) 1277static void __exit exit_btrfs_fs(void)
1238{ 1278{
1239 btrfs_destroy_cachep(); 1279 btrfs_destroy_cachep();
1280 btrfs_delayed_inode_exit();
1240 extent_map_exit(); 1281 extent_map_exit();
1241 extent_io_exit(); 1282 extent_io_exit();
1242 btrfs_interface_exit(); 1283 btrfs_interface_exit();
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 4ce16ef702a3..c3c223ae6691 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -174,86 +174,9 @@ static const struct sysfs_ops btrfs_root_attr_ops = {
174 .store = btrfs_root_attr_store, 174 .store = btrfs_root_attr_store,
175}; 175};
176 176
177static struct kobj_type btrfs_root_ktype = {
178 .default_attrs = btrfs_root_attrs,
179 .sysfs_ops = &btrfs_root_attr_ops,
180 .release = btrfs_root_release,
181};
182
183static struct kobj_type btrfs_super_ktype = {
184 .default_attrs = btrfs_super_attrs,
185 .sysfs_ops = &btrfs_super_attr_ops,
186 .release = btrfs_super_release,
187};
188
189/* /sys/fs/btrfs/ entry */ 177/* /sys/fs/btrfs/ entry */
190static struct kset *btrfs_kset; 178static struct kset *btrfs_kset;
191 179
192int btrfs_sysfs_add_super(struct btrfs_fs_info *fs)
193{
194 int error;
195 char *name;
196 char c;
197 int len = strlen(fs->sb->s_id) + 1;
198 int i;
199
200 name = kmalloc(len, GFP_NOFS);
201 if (!name) {
202 error = -ENOMEM;
203 goto fail;
204 }
205
206 for (i = 0; i < len; i++) {
207 c = fs->sb->s_id[i];
208 if (c == '/' || c == '\\')
209 c = '!';
210 name[i] = c;
211 }
212 name[len] = '\0';
213
214 fs->super_kobj.kset = btrfs_kset;
215 error = kobject_init_and_add(&fs->super_kobj, &btrfs_super_ktype,
216 NULL, "%s", name);
217 kfree(name);
218 if (error)
219 goto fail;
220
221 return 0;
222
223fail:
224 printk(KERN_ERR "btrfs: sysfs creation for super failed\n");
225 return error;
226}
227
228int btrfs_sysfs_add_root(struct btrfs_root *root)
229{
230 int error;
231
232 error = kobject_init_and_add(&root->root_kobj, &btrfs_root_ktype,
233 &root->fs_info->super_kobj,
234 "%s", root->name);
235 if (error)
236 goto fail;
237
238 return 0;
239
240fail:
241 printk(KERN_ERR "btrfs: sysfs creation for root failed\n");
242 return error;
243}
244
245void btrfs_sysfs_del_root(struct btrfs_root *root)
246{
247 kobject_put(&root->root_kobj);
248 wait_for_completion(&root->kobj_unregister);
249}
250
251void btrfs_sysfs_del_super(struct btrfs_fs_info *fs)
252{
253 kobject_put(&fs->super_kobj);
254 wait_for_completion(&fs->kobj_unregister);
255}
256
257int btrfs_init_sysfs(void) 180int btrfs_init_sysfs(void)
258{ 181{
259 btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); 182 btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c571734d5e5a..dc80f7156923 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -27,6 +27,7 @@
27#include "transaction.h" 27#include "transaction.h"
28#include "locking.h" 28#include "locking.h"
29#include "tree-log.h" 29#include "tree-log.h"
30#include "inode-map.h"
30 31
31#define BTRFS_ROOT_TRANS_TAG 0 32#define BTRFS_ROOT_TRANS_TAG 0
32 33
@@ -80,8 +81,7 @@ static noinline int join_transaction(struct btrfs_root *root)
80 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 81 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
81 list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 82 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
82 extent_io_tree_init(&cur_trans->dirty_pages, 83 extent_io_tree_init(&cur_trans->dirty_pages,
83 root->fs_info->btree_inode->i_mapping, 84 root->fs_info->btree_inode->i_mapping);
84 GFP_NOFS);
85 spin_lock(&root->fs_info->new_trans_lock); 85 spin_lock(&root->fs_info->new_trans_lock);
86 root->fs_info->running_transaction = cur_trans; 86 root->fs_info->running_transaction = cur_trans;
87 spin_unlock(&root->fs_info->new_trans_lock); 87 spin_unlock(&root->fs_info->new_trans_lock);
@@ -347,49 +347,6 @@ out_unlock:
347 return ret; 347 return ret;
348} 348}
349 349
350#if 0
351/*
352 * rate limit against the drop_snapshot code. This helps to slow down new
353 * operations if the drop_snapshot code isn't able to keep up.
354 */
355static void throttle_on_drops(struct btrfs_root *root)
356{
357 struct btrfs_fs_info *info = root->fs_info;
358 int harder_count = 0;
359
360harder:
361 if (atomic_read(&info->throttles)) {
362 DEFINE_WAIT(wait);
363 int thr;
364 thr = atomic_read(&info->throttle_gen);
365
366 do {
367 prepare_to_wait(&info->transaction_throttle,
368 &wait, TASK_UNINTERRUPTIBLE);
369 if (!atomic_read(&info->throttles)) {
370 finish_wait(&info->transaction_throttle, &wait);
371 break;
372 }
373 schedule();
374 finish_wait(&info->transaction_throttle, &wait);
375 } while (thr == atomic_read(&info->throttle_gen));
376 harder_count++;
377
378 if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 &&
379 harder_count < 2)
380 goto harder;
381
382 if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 &&
383 harder_count < 10)
384 goto harder;
385
386 if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 &&
387 harder_count < 20)
388 goto harder;
389 }
390}
391#endif
392
393void btrfs_throttle(struct btrfs_root *root) 350void btrfs_throttle(struct btrfs_root *root)
394{ 351{
395 mutex_lock(&root->fs_info->trans_mutex); 352 mutex_lock(&root->fs_info->trans_mutex);
@@ -487,19 +444,40 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
487int btrfs_end_transaction(struct btrfs_trans_handle *trans, 444int btrfs_end_transaction(struct btrfs_trans_handle *trans,
488 struct btrfs_root *root) 445 struct btrfs_root *root)
489{ 446{
490 return __btrfs_end_transaction(trans, root, 0, 1); 447 int ret;
448
449 ret = __btrfs_end_transaction(trans, root, 0, 1);
450 if (ret)
451 return ret;
452 return 0;
491} 453}
492 454
493int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 455int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
494 struct btrfs_root *root) 456 struct btrfs_root *root)
495{ 457{
496 return __btrfs_end_transaction(trans, root, 1, 1); 458 int ret;
459
460 ret = __btrfs_end_transaction(trans, root, 1, 1);
461 if (ret)
462 return ret;
463 return 0;
497} 464}
498 465
499int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, 466int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
500 struct btrfs_root *root) 467 struct btrfs_root *root)
501{ 468{
502 return __btrfs_end_transaction(trans, root, 0, 0); 469 int ret;
470
471 ret = __btrfs_end_transaction(trans, root, 0, 0);
472 if (ret)
473 return ret;
474 return 0;
475}
476
477int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
478 struct btrfs_root *root)
479{
480 return __btrfs_end_transaction(trans, root, 1, 1);
503} 481}
504 482
505/* 483/*
@@ -760,8 +738,14 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
760 btrfs_update_reloc_root(trans, root); 738 btrfs_update_reloc_root(trans, root);
761 btrfs_orphan_commit_root(trans, root); 739 btrfs_orphan_commit_root(trans, root);
762 740
741 btrfs_save_ino_cache(root, trans);
742
763 if (root->commit_root != root->node) { 743 if (root->commit_root != root->node) {
744 mutex_lock(&root->fs_commit_mutex);
764 switch_commit_root(root); 745 switch_commit_root(root);
746 btrfs_unpin_free_ino(root);
747 mutex_unlock(&root->fs_commit_mutex);
748
765 btrfs_set_root_node(&root->root_item, 749 btrfs_set_root_node(&root->root_item,
766 root->node); 750 root->node);
767 } 751 }
@@ -809,97 +793,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
809 return ret; 793 return ret;
810} 794}
811 795
812#if 0
813/*
814 * when dropping snapshots, we generate a ton of delayed refs, and it makes
815 * sense not to join the transaction while it is trying to flush the current
816 * queue of delayed refs out.
817 *
818 * This is used by the drop snapshot code only
819 */
820static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
821{
822 DEFINE_WAIT(wait);
823
824 mutex_lock(&info->trans_mutex);
825 while (info->running_transaction &&
826 info->running_transaction->delayed_refs.flushing) {
827 prepare_to_wait(&info->transaction_wait, &wait,
828 TASK_UNINTERRUPTIBLE);
829 mutex_unlock(&info->trans_mutex);
830
831 schedule();
832
833 mutex_lock(&info->trans_mutex);
834 finish_wait(&info->transaction_wait, &wait);
835 }
836 mutex_unlock(&info->trans_mutex);
837 return 0;
838}
839
840/*
841 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
842 * all of them
843 */
844int btrfs_drop_dead_root(struct btrfs_root *root)
845{
846 struct btrfs_trans_handle *trans;
847 struct btrfs_root *tree_root = root->fs_info->tree_root;
848 unsigned long nr;
849 int ret;
850
851 while (1) {
852 /*
853 * we don't want to jump in and create a bunch of
854 * delayed refs if the transaction is starting to close
855 */
856 wait_transaction_pre_flush(tree_root->fs_info);
857 trans = btrfs_start_transaction(tree_root, 1);
858
859 /*
860 * we've joined a transaction, make sure it isn't
861 * closing right now
862 */
863 if (trans->transaction->delayed_refs.flushing) {
864 btrfs_end_transaction(trans, tree_root);
865 continue;
866 }
867
868 ret = btrfs_drop_snapshot(trans, root);
869 if (ret != -EAGAIN)
870 break;
871
872 ret = btrfs_update_root(trans, tree_root,
873 &root->root_key,
874 &root->root_item);
875 if (ret)
876 break;
877
878 nr = trans->blocks_used;
879 ret = btrfs_end_transaction(trans, tree_root);
880 BUG_ON(ret);
881
882 btrfs_btree_balance_dirty(tree_root, nr);
883 cond_resched();
884 }
885 BUG_ON(ret);
886
887 ret = btrfs_del_root(trans, tree_root, &root->root_key);
888 BUG_ON(ret);
889
890 nr = trans->blocks_used;
891 ret = btrfs_end_transaction(trans, tree_root);
892 BUG_ON(ret);
893
894 free_extent_buffer(root->node);
895 free_extent_buffer(root->commit_root);
896 kfree(root);
897
898 btrfs_btree_balance_dirty(tree_root, nr);
899 return ret;
900}
901#endif
902
903/* 796/*
904 * new snapshots need to be created at a very specific time in the 797 * new snapshots need to be created at a very specific time in the
905 * transaction commit. This does the actual creation 798 * transaction commit. This does the actual creation
@@ -930,7 +823,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
930 goto fail; 823 goto fail;
931 } 824 }
932 825
933 ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); 826 ret = btrfs_find_free_objectid(tree_root, &objectid);
934 if (ret) { 827 if (ret) {
935 pending->error = ret; 828 pending->error = ret;
936 goto fail; 829 goto fail;
@@ -967,7 +860,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
967 BUG_ON(ret); 860 BUG_ON(ret);
968 ret = btrfs_insert_dir_item(trans, parent_root, 861 ret = btrfs_insert_dir_item(trans, parent_root,
969 dentry->d_name.name, dentry->d_name.len, 862 dentry->d_name.name, dentry->d_name.len,
970 parent_inode->i_ino, &key, 863 parent_inode, &key,
971 BTRFS_FT_DIR, index); 864 BTRFS_FT_DIR, index);
972 BUG_ON(ret); 865 BUG_ON(ret);
973 866
@@ -1009,7 +902,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1009 */ 902 */
1010 ret = btrfs_add_root_ref(trans, tree_root, objectid, 903 ret = btrfs_add_root_ref(trans, tree_root, objectid,
1011 parent_root->root_key.objectid, 904 parent_root->root_key.objectid,
1012 parent_inode->i_ino, index, 905 btrfs_ino(parent_inode), index,
1013 dentry->d_name.name, dentry->d_name.len); 906 dentry->d_name.name, dentry->d_name.len);
1014 BUG_ON(ret); 907 BUG_ON(ret);
1015 dput(parent); 908 dput(parent);
@@ -1037,6 +930,14 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
1037 int ret; 930 int ret;
1038 931
1039 list_for_each_entry(pending, head, list) { 932 list_for_each_entry(pending, head, list) {
933 /*
934 * We must deal with the delayed items before creating
935 * snapshots, or we will create a snapthot with inconsistent
936 * information.
937 */
938 ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
939 BUG_ON(ret);
940
1040 ret = create_pending_snapshot(trans, fs_info, pending); 941 ret = create_pending_snapshot(trans, fs_info, pending);
1041 BUG_ON(ret); 942 BUG_ON(ret);
1042 } 943 }
@@ -1290,6 +1191,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1290 BUG_ON(ret); 1191 BUG_ON(ret);
1291 } 1192 }
1292 1193
1194 ret = btrfs_run_delayed_items(trans, root);
1195 BUG_ON(ret);
1196
1293 /* 1197 /*
1294 * rename don't use btrfs_join_transaction, so, once we 1198 * rename don't use btrfs_join_transaction, so, once we
1295 * set the transaction to blocked above, we aren't going 1199 * set the transaction to blocked above, we aren't going
@@ -1316,11 +1220,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1316 ret = create_pending_snapshots(trans, root->fs_info); 1220 ret = create_pending_snapshots(trans, root->fs_info);
1317 BUG_ON(ret); 1221 BUG_ON(ret);
1318 1222
1223 ret = btrfs_run_delayed_items(trans, root);
1224 BUG_ON(ret);
1225
1319 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1226 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1320 BUG_ON(ret); 1227 BUG_ON(ret);
1321 1228
1322 WARN_ON(cur_trans != trans->transaction); 1229 WARN_ON(cur_trans != trans->transaction);
1323 1230
1231 btrfs_scrub_pause(root);
1324 /* btrfs_commit_tree_roots is responsible for getting the 1232 /* btrfs_commit_tree_roots is responsible for getting the
1325 * various roots consistent with each other. Every pointer 1233 * various roots consistent with each other. Every pointer
1326 * in the tree of tree roots has to point to the most up to date 1234 * in the tree of tree roots has to point to the most up to date
@@ -1405,6 +1313,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1405 1313
1406 mutex_unlock(&root->fs_info->trans_mutex); 1314 mutex_unlock(&root->fs_info->trans_mutex);
1407 1315
1316 btrfs_scrub_continue(root);
1317
1408 if (current->journal_info == trans) 1318 if (current->journal_info == trans)
1409 current->journal_info = NULL; 1319 current->journal_info = NULL;
1410 1320
@@ -1432,6 +1342,8 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1432 root = list_entry(list.next, struct btrfs_root, root_list); 1342 root = list_entry(list.next, struct btrfs_root, root_list);
1433 list_del(&root->root_list); 1343 list_del(&root->root_list);
1434 1344
1345 btrfs_kill_all_delayed_nodes(root);
1346
1435 if (btrfs_header_backref_rev(root->node) < 1347 if (btrfs_header_backref_rev(root->node) <
1436 BTRFS_MIXED_BACKREF_REV) 1348 BTRFS_MIXED_BACKREF_REV)
1437 btrfs_drop_snapshot(root, NULL, 0); 1349 btrfs_drop_snapshot(root, NULL, 0);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index e441acc6c584..804c88639e5d 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -101,11 +101,8 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
101int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); 101int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
102int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 102int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root); 103 struct btrfs_root *root);
104int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
105 struct btrfs_root *root);
106 104
107int btrfs_add_dead_root(struct btrfs_root *root); 105int btrfs_add_dead_root(struct btrfs_root *root);
108int btrfs_drop_dead_root(struct btrfs_root *root);
109int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); 106int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
110int btrfs_clean_old_snapshots(struct btrfs_root *root); 107int btrfs_clean_old_snapshots(struct btrfs_root *root);
111int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 108int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -115,6 +112,8 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
115 int wait_for_unblock); 112 int wait_for_unblock);
116int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 113int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
117 struct btrfs_root *root); 114 struct btrfs_root *root);
115int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
116 struct btrfs_root *root);
118int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, 117int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
119 struct btrfs_root *root); 118 struct btrfs_root *root);
120void btrfs_throttle(struct btrfs_root *root); 119void btrfs_throttle(struct btrfs_root *root);
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 992ab425599d..3b580ee8ab1d 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -97,7 +97,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
97 ret = 0; 97 ret = 0;
98 goto out; 98 goto out;
99 } 99 }
100 btrfs_release_path(root, path); 100 btrfs_release_path(path);
101 wret = btrfs_search_slot(trans, root, &key, path, 0, 1); 101 wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
102 102
103 if (wret < 0) { 103 if (wret < 0) {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f997ec0c1ba4..592396c6dc47 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -333,13 +333,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
333 goto insert; 333 goto insert;
334 334
335 if (item_size == 0) { 335 if (item_size == 0) {
336 btrfs_release_path(root, path); 336 btrfs_release_path(path);
337 return 0; 337 return 0;
338 } 338 }
339 dst_copy = kmalloc(item_size, GFP_NOFS); 339 dst_copy = kmalloc(item_size, GFP_NOFS);
340 src_copy = kmalloc(item_size, GFP_NOFS); 340 src_copy = kmalloc(item_size, GFP_NOFS);
341 if (!dst_copy || !src_copy) { 341 if (!dst_copy || !src_copy) {
342 btrfs_release_path(root, path); 342 btrfs_release_path(path);
343 kfree(dst_copy); 343 kfree(dst_copy);
344 kfree(src_copy); 344 kfree(src_copy);
345 return -ENOMEM; 345 return -ENOMEM;
@@ -361,13 +361,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
361 * sync 361 * sync
362 */ 362 */
363 if (ret == 0) { 363 if (ret == 0) {
364 btrfs_release_path(root, path); 364 btrfs_release_path(path);
365 return 0; 365 return 0;
366 } 366 }
367 367
368 } 368 }
369insert: 369insert:
370 btrfs_release_path(root, path); 370 btrfs_release_path(path);
371 /* try to insert the key into the destination tree */ 371 /* try to insert the key into the destination tree */
372 ret = btrfs_insert_empty_item(trans, root, path, 372 ret = btrfs_insert_empty_item(trans, root, path,
373 key, item_size); 373 key, item_size);
@@ -382,7 +382,6 @@ insert:
382 } else if (found_size < item_size) { 382 } else if (found_size < item_size) {
383 ret = btrfs_extend_item(trans, root, path, 383 ret = btrfs_extend_item(trans, root, path,
384 item_size - found_size); 384 item_size - found_size);
385 BUG_ON(ret);
386 } 385 }
387 } else if (ret) { 386 } else if (ret) {
388 return ret; 387 return ret;
@@ -438,7 +437,7 @@ insert:
438 } 437 }
439no_copy: 438no_copy:
440 btrfs_mark_buffer_dirty(path->nodes[0]); 439 btrfs_mark_buffer_dirty(path->nodes[0]);
441 btrfs_release_path(root, path); 440 btrfs_release_path(path);
442 return 0; 441 return 0;
443} 442}
444 443
@@ -519,7 +518,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
519 * file. This must be done before the btrfs_drop_extents run 518 * file. This must be done before the btrfs_drop_extents run
520 * so we don't try to drop this extent. 519 * so we don't try to drop this extent.
521 */ 520 */
522 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 521 ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
523 start, 0); 522 start, 0);
524 523
525 if (ret == 0 && 524 if (ret == 0 &&
@@ -544,11 +543,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
544 * we don't have to do anything 543 * we don't have to do anything
545 */ 544 */
546 if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { 545 if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) {
547 btrfs_release_path(root, path); 546 btrfs_release_path(path);
548 goto out; 547 goto out;
549 } 548 }
550 } 549 }
551 btrfs_release_path(root, path); 550 btrfs_release_path(path);
552 551
553 saved_nbytes = inode_get_bytes(inode); 552 saved_nbytes = inode_get_bytes(inode);
554 /* drop any overlapping extents */ 553 /* drop any overlapping extents */
@@ -590,6 +589,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
590 ins.objectid, ins.offset, 589 ins.objectid, ins.offset,
591 0, root->root_key.objectid, 590 0, root->root_key.objectid,
592 key->objectid, offset); 591 key->objectid, offset);
592 BUG_ON(ret);
593 } else { 593 } else {
594 /* 594 /*
595 * insert the extent pointer in the extent 595 * insert the extent pointer in the extent
@@ -600,7 +600,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
600 key->objectid, offset, &ins); 600 key->objectid, offset, &ins);
601 BUG_ON(ret); 601 BUG_ON(ret);
602 } 602 }
603 btrfs_release_path(root, path); 603 btrfs_release_path(path);
604 604
605 if (btrfs_file_extent_compression(eb, item)) { 605 if (btrfs_file_extent_compression(eb, item)) {
606 csum_start = ins.objectid; 606 csum_start = ins.objectid;
@@ -614,7 +614,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
614 614
615 ret = btrfs_lookup_csums_range(root->log_root, 615 ret = btrfs_lookup_csums_range(root->log_root,
616 csum_start, csum_end - 1, 616 csum_start, csum_end - 1,
617 &ordered_sums); 617 &ordered_sums, 0);
618 BUG_ON(ret); 618 BUG_ON(ret);
619 while (!list_empty(&ordered_sums)) { 619 while (!list_empty(&ordered_sums)) {
620 struct btrfs_ordered_sum *sums; 620 struct btrfs_ordered_sum *sums;
@@ -629,7 +629,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
629 kfree(sums); 629 kfree(sums);
630 } 630 }
631 } else { 631 } else {
632 btrfs_release_path(root, path); 632 btrfs_release_path(path);
633 } 633 }
634 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 634 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
635 /* inline extents are easy, we just overwrite them */ 635 /* inline extents are easy, we just overwrite them */
@@ -675,10 +675,13 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
675 return -ENOMEM; 675 return -ENOMEM;
676 676
677 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); 677 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
678 btrfs_release_path(root, path); 678 btrfs_release_path(path);
679 679
680 inode = read_one_inode(root, location.objectid); 680 inode = read_one_inode(root, location.objectid);
681 BUG_ON(!inode); 681 if (!inode) {
682 kfree(name);
683 return -EIO;
684 }
682 685
683 ret = link_to_fixup_dir(trans, root, path, location.objectid); 686 ret = link_to_fixup_dir(trans, root, path, location.objectid);
684 BUG_ON(ret); 687 BUG_ON(ret);
@@ -713,7 +716,7 @@ static noinline int inode_in_dir(struct btrfs_root *root,
713 goto out; 716 goto out;
714 } else 717 } else
715 goto out; 718 goto out;
716 btrfs_release_path(root, path); 719 btrfs_release_path(path);
717 720
718 di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); 721 di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
719 if (di && !IS_ERR(di)) { 722 if (di && !IS_ERR(di)) {
@@ -724,7 +727,7 @@ static noinline int inode_in_dir(struct btrfs_root *root,
724 goto out; 727 goto out;
725 match = 1; 728 match = 1;
726out: 729out:
727 btrfs_release_path(root, path); 730 btrfs_release_path(path);
728 return match; 731 return match;
729} 732}
730 733
@@ -817,7 +820,10 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
817 return -ENOENT; 820 return -ENOENT;
818 821
819 inode = read_one_inode(root, key->objectid); 822 inode = read_one_inode(root, key->objectid);
820 BUG_ON(!inode); 823 if (!inode) {
824 iput(dir);
825 return -EIO;
826 }
821 827
822 ref_ptr = btrfs_item_ptr_offset(eb, slot); 828 ref_ptr = btrfs_item_ptr_offset(eb, slot);
823 ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); 829 ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
@@ -832,7 +838,7 @@ again:
832 read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); 838 read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen);
833 839
834 /* if we already have a perfect match, we're done */ 840 /* if we already have a perfect match, we're done */
835 if (inode_in_dir(root, path, dir->i_ino, inode->i_ino, 841 if (inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
836 btrfs_inode_ref_index(eb, ref), 842 btrfs_inode_ref_index(eb, ref),
837 name, namelen)) { 843 name, namelen)) {
838 goto out; 844 goto out;
@@ -884,7 +890,7 @@ again:
884 if (!backref_in_log(log, key, victim_name, 890 if (!backref_in_log(log, key, victim_name,
885 victim_name_len)) { 891 victim_name_len)) {
886 btrfs_inc_nlink(inode); 892 btrfs_inc_nlink(inode);
887 btrfs_release_path(root, path); 893 btrfs_release_path(path);
888 894
889 ret = btrfs_unlink_inode(trans, root, dir, 895 ret = btrfs_unlink_inode(trans, root, dir,
890 inode, victim_name, 896 inode, victim_name,
@@ -901,7 +907,7 @@ again:
901 */ 907 */
902 search_done = 1; 908 search_done = 1;
903 } 909 }
904 btrfs_release_path(root, path); 910 btrfs_release_path(path);
905 911
906insert: 912insert:
907 /* insert our name */ 913 /* insert our name */
@@ -922,7 +928,7 @@ out:
922 BUG_ON(ret); 928 BUG_ON(ret);
923 929
924out_nowrite: 930out_nowrite:
925 btrfs_release_path(root, path); 931 btrfs_release_path(path);
926 iput(dir); 932 iput(dir);
927 iput(inode); 933 iput(inode);
928 return 0; 934 return 0;
@@ -960,8 +966,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
960 unsigned long ptr; 966 unsigned long ptr;
961 unsigned long ptr_end; 967 unsigned long ptr_end;
962 int name_len; 968 int name_len;
969 u64 ino = btrfs_ino(inode);
963 970
964 key.objectid = inode->i_ino; 971 key.objectid = ino;
965 key.type = BTRFS_INODE_REF_KEY; 972 key.type = BTRFS_INODE_REF_KEY;
966 key.offset = (u64)-1; 973 key.offset = (u64)-1;
967 974
@@ -980,7 +987,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
980 } 987 }
981 btrfs_item_key_to_cpu(path->nodes[0], &key, 988 btrfs_item_key_to_cpu(path->nodes[0], &key,
982 path->slots[0]); 989 path->slots[0]);
983 if (key.objectid != inode->i_ino || 990 if (key.objectid != ino ||
984 key.type != BTRFS_INODE_REF_KEY) 991 key.type != BTRFS_INODE_REF_KEY)
985 break; 992 break;
986 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 993 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
@@ -999,9 +1006,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
999 if (key.offset == 0) 1006 if (key.offset == 0)
1000 break; 1007 break;
1001 key.offset--; 1008 key.offset--;
1002 btrfs_release_path(root, path); 1009 btrfs_release_path(path);
1003 } 1010 }
1004 btrfs_release_path(root, path); 1011 btrfs_release_path(path);
1005 if (nlink != inode->i_nlink) { 1012 if (nlink != inode->i_nlink) {
1006 inode->i_nlink = nlink; 1013 inode->i_nlink = nlink;
1007 btrfs_update_inode(trans, root, inode); 1014 btrfs_update_inode(trans, root, inode);
@@ -1011,10 +1018,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1011 if (inode->i_nlink == 0) { 1018 if (inode->i_nlink == 0) {
1012 if (S_ISDIR(inode->i_mode)) { 1019 if (S_ISDIR(inode->i_mode)) {
1013 ret = replay_dir_deletes(trans, root, NULL, path, 1020 ret = replay_dir_deletes(trans, root, NULL, path,
1014 inode->i_ino, 1); 1021 ino, 1);
1015 BUG_ON(ret); 1022 BUG_ON(ret);
1016 } 1023 }
1017 ret = insert_orphan_item(trans, root, inode->i_ino); 1024 ret = insert_orphan_item(trans, root, ino);
1018 BUG_ON(ret); 1025 BUG_ON(ret);
1019 } 1026 }
1020 btrfs_free_path(path); 1027 btrfs_free_path(path);
@@ -1050,11 +1057,13 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
1050 break; 1057 break;
1051 1058
1052 ret = btrfs_del_item(trans, root, path); 1059 ret = btrfs_del_item(trans, root, path);
1053 BUG_ON(ret); 1060 if (ret)
1061 goto out;
1054 1062
1055 btrfs_release_path(root, path); 1063 btrfs_release_path(path);
1056 inode = read_one_inode(root, key.offset); 1064 inode = read_one_inode(root, key.offset);
1057 BUG_ON(!inode); 1065 if (!inode)
1066 return -EIO;
1058 1067
1059 ret = fixup_inode_link_count(trans, root, inode); 1068 ret = fixup_inode_link_count(trans, root, inode);
1060 BUG_ON(ret); 1069 BUG_ON(ret);
@@ -1068,8 +1077,10 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
1068 */ 1077 */
1069 key.offset = (u64)-1; 1078 key.offset = (u64)-1;
1070 } 1079 }
1071 btrfs_release_path(root, path); 1080 ret = 0;
1072 return 0; 1081out:
1082 btrfs_release_path(path);
1083 return ret;
1073} 1084}
1074 1085
1075 1086
@@ -1088,7 +1099,8 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
1088 struct inode *inode; 1099 struct inode *inode;
1089 1100
1090 inode = read_one_inode(root, objectid); 1101 inode = read_one_inode(root, objectid);
1091 BUG_ON(!inode); 1102 if (!inode)
1103 return -EIO;
1092 1104
1093 key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; 1105 key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
1094 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); 1106 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
@@ -1096,7 +1108,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
1096 1108
1097 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1109 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1098 1110
1099 btrfs_release_path(root, path); 1111 btrfs_release_path(path);
1100 if (ret == 0) { 1112 if (ret == 0) {
1101 btrfs_inc_nlink(inode); 1113 btrfs_inc_nlink(inode);
1102 btrfs_update_inode(trans, root, inode); 1114 btrfs_update_inode(trans, root, inode);
@@ -1175,7 +1187,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1175 int ret; 1187 int ret;
1176 1188
1177 dir = read_one_inode(root, key->objectid); 1189 dir = read_one_inode(root, key->objectid);
1178 BUG_ON(!dir); 1190 if (!dir)
1191 return -EIO;
1179 1192
1180 name_len = btrfs_dir_name_len(eb, di); 1193 name_len = btrfs_dir_name_len(eb, di);
1181 name = kmalloc(name_len, GFP_NOFS); 1194 name = kmalloc(name_len, GFP_NOFS);
@@ -1192,7 +1205,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1192 exists = 1; 1205 exists = 1;
1193 else 1206 else
1194 exists = 0; 1207 exists = 0;
1195 btrfs_release_path(root, path); 1208 btrfs_release_path(path);
1196 1209
1197 if (key->type == BTRFS_DIR_ITEM_KEY) { 1210 if (key->type == BTRFS_DIR_ITEM_KEY) {
1198 dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, 1211 dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
@@ -1205,7 +1218,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1205 } else { 1218 } else {
1206 BUG(); 1219 BUG();
1207 } 1220 }
1208 if (!dst_di || IS_ERR(dst_di)) { 1221 if (IS_ERR_OR_NULL(dst_di)) {
1209 /* we need a sequence number to insert, so we only 1222 /* we need a sequence number to insert, so we only
1210 * do inserts for the BTRFS_DIR_INDEX_KEY types 1223 * do inserts for the BTRFS_DIR_INDEX_KEY types
1211 */ 1224 */
@@ -1236,13 +1249,13 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1236 if (key->type == BTRFS_DIR_INDEX_KEY) 1249 if (key->type == BTRFS_DIR_INDEX_KEY)
1237 goto insert; 1250 goto insert;
1238out: 1251out:
1239 btrfs_release_path(root, path); 1252 btrfs_release_path(path);
1240 kfree(name); 1253 kfree(name);
1241 iput(dir); 1254 iput(dir);
1242 return 0; 1255 return 0;
1243 1256
1244insert: 1257insert:
1245 btrfs_release_path(root, path); 1258 btrfs_release_path(path);
1246 ret = insert_one_name(trans, root, path, key->objectid, key->offset, 1259 ret = insert_one_name(trans, root, path, key->objectid, key->offset,
1247 name, name_len, log_type, &log_key); 1260 name, name_len, log_type, &log_key);
1248 1261
@@ -1363,7 +1376,7 @@ next:
1363 *end_ret = found_end; 1376 *end_ret = found_end;
1364 ret = 0; 1377 ret = 0;
1365out: 1378out:
1366 btrfs_release_path(root, path); 1379 btrfs_release_path(path);
1367 return ret; 1380 return ret;
1368} 1381}
1369 1382
@@ -1426,12 +1439,15 @@ again:
1426 dir_key->offset, 1439 dir_key->offset,
1427 name, name_len, 0); 1440 name, name_len, 0);
1428 } 1441 }
1429 if (!log_di || IS_ERR(log_di)) { 1442 if (IS_ERR_OR_NULL(log_di)) {
1430 btrfs_dir_item_key_to_cpu(eb, di, &location); 1443 btrfs_dir_item_key_to_cpu(eb, di, &location);
1431 btrfs_release_path(root, path); 1444 btrfs_release_path(path);
1432 btrfs_release_path(log, log_path); 1445 btrfs_release_path(log_path);
1433 inode = read_one_inode(root, location.objectid); 1446 inode = read_one_inode(root, location.objectid);
1434 BUG_ON(!inode); 1447 if (!inode) {
1448 kfree(name);
1449 return -EIO;
1450 }
1435 1451
1436 ret = link_to_fixup_dir(trans, root, 1452 ret = link_to_fixup_dir(trans, root,
1437 path, location.objectid); 1453 path, location.objectid);
@@ -1453,7 +1469,7 @@ again:
1453 ret = 0; 1469 ret = 0;
1454 goto out; 1470 goto out;
1455 } 1471 }
1456 btrfs_release_path(log, log_path); 1472 btrfs_release_path(log_path);
1457 kfree(name); 1473 kfree(name);
1458 1474
1459 ptr = (unsigned long)(di + 1); 1475 ptr = (unsigned long)(di + 1);
@@ -1461,8 +1477,8 @@ again:
1461 } 1477 }
1462 ret = 0; 1478 ret = 0;
1463out: 1479out:
1464 btrfs_release_path(root, path); 1480 btrfs_release_path(path);
1465 btrfs_release_path(log, log_path); 1481 btrfs_release_path(log_path);
1466 return ret; 1482 return ret;
1467} 1483}
1468 1484
@@ -1550,7 +1566,7 @@ again:
1550 break; 1566 break;
1551 dir_key.offset = found_key.offset + 1; 1567 dir_key.offset = found_key.offset + 1;
1552 } 1568 }
1553 btrfs_release_path(root, path); 1569 btrfs_release_path(path);
1554 if (range_end == (u64)-1) 1570 if (range_end == (u64)-1)
1555 break; 1571 break;
1556 range_start = range_end + 1; 1572 range_start = range_end + 1;
@@ -1561,11 +1577,11 @@ next_type:
1561 if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { 1577 if (key_type == BTRFS_DIR_LOG_ITEM_KEY) {
1562 key_type = BTRFS_DIR_LOG_INDEX_KEY; 1578 key_type = BTRFS_DIR_LOG_INDEX_KEY;
1563 dir_key.type = BTRFS_DIR_INDEX_KEY; 1579 dir_key.type = BTRFS_DIR_INDEX_KEY;
1564 btrfs_release_path(root, path); 1580 btrfs_release_path(path);
1565 goto again; 1581 goto again;
1566 } 1582 }
1567out: 1583out:
1568 btrfs_release_path(root, path); 1584 btrfs_release_path(path);
1569 btrfs_free_path(log_path); 1585 btrfs_free_path(log_path);
1570 iput(dir); 1586 iput(dir);
1571 return ret; 1587 return ret;
@@ -2093,7 +2109,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2093 * the running transaction open, so a full commit can't hop 2109 * the running transaction open, so a full commit can't hop
2094 * in and cause problems either. 2110 * in and cause problems either.
2095 */ 2111 */
2112 btrfs_scrub_pause_super(root);
2096 write_ctree_super(trans, root->fs_info->tree_root, 1); 2113 write_ctree_super(trans, root->fs_info->tree_root, 1);
2114 btrfs_scrub_continue_super(root);
2097 ret = 0; 2115 ret = 0;
2098 2116
2099 mutex_lock(&root->log_mutex); 2117 mutex_lock(&root->log_mutex);
@@ -2197,6 +2215,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2197 int ret; 2215 int ret;
2198 int err = 0; 2216 int err = 0;
2199 int bytes_del = 0; 2217 int bytes_del = 0;
2218 u64 dir_ino = btrfs_ino(dir);
2200 2219
2201 if (BTRFS_I(dir)->logged_trans < trans->transid) 2220 if (BTRFS_I(dir)->logged_trans < trans->transid)
2202 return 0; 2221 return 0;
@@ -2214,7 +2233,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2214 goto out_unlock; 2233 goto out_unlock;
2215 } 2234 }
2216 2235
2217 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, 2236 di = btrfs_lookup_dir_item(trans, log, path, dir_ino,
2218 name, name_len, -1); 2237 name, name_len, -1);
2219 if (IS_ERR(di)) { 2238 if (IS_ERR(di)) {
2220 err = PTR_ERR(di); 2239 err = PTR_ERR(di);
@@ -2225,8 +2244,8 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2225 bytes_del += name_len; 2244 bytes_del += name_len;
2226 BUG_ON(ret); 2245 BUG_ON(ret);
2227 } 2246 }
2228 btrfs_release_path(log, path); 2247 btrfs_release_path(path);
2229 di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, 2248 di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
2230 index, name, name_len, -1); 2249 index, name, name_len, -1);
2231 if (IS_ERR(di)) { 2250 if (IS_ERR(di)) {
2232 err = PTR_ERR(di); 2251 err = PTR_ERR(di);
@@ -2244,10 +2263,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2244 if (bytes_del) { 2263 if (bytes_del) {
2245 struct btrfs_key key; 2264 struct btrfs_key key;
2246 2265
2247 key.objectid = dir->i_ino; 2266 key.objectid = dir_ino;
2248 key.offset = 0; 2267 key.offset = 0;
2249 key.type = BTRFS_INODE_ITEM_KEY; 2268 key.type = BTRFS_INODE_ITEM_KEY;
2250 btrfs_release_path(log, path); 2269 btrfs_release_path(path);
2251 2270
2252 ret = btrfs_search_slot(trans, log, &key, path, 0, 1); 2271 ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
2253 if (ret < 0) { 2272 if (ret < 0) {
@@ -2269,7 +2288,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2269 btrfs_mark_buffer_dirty(path->nodes[0]); 2288 btrfs_mark_buffer_dirty(path->nodes[0]);
2270 } else 2289 } else
2271 ret = 0; 2290 ret = 0;
2272 btrfs_release_path(log, path); 2291 btrfs_release_path(path);
2273 } 2292 }
2274fail: 2293fail:
2275 btrfs_free_path(path); 2294 btrfs_free_path(path);
@@ -2303,7 +2322,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
2303 log = root->log_root; 2322 log = root->log_root;
2304 mutex_lock(&BTRFS_I(inode)->log_mutex); 2323 mutex_lock(&BTRFS_I(inode)->log_mutex);
2305 2324
2306 ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, 2325 ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
2307 dirid, &index); 2326 dirid, &index);
2308 mutex_unlock(&BTRFS_I(inode)->log_mutex); 2327 mutex_unlock(&BTRFS_I(inode)->log_mutex);
2309 if (ret == -ENOSPC) { 2328 if (ret == -ENOSPC) {
@@ -2344,7 +2363,7 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
2344 struct btrfs_dir_log_item); 2363 struct btrfs_dir_log_item);
2345 btrfs_set_dir_log_end(path->nodes[0], item, last_offset); 2364 btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
2346 btrfs_mark_buffer_dirty(path->nodes[0]); 2365 btrfs_mark_buffer_dirty(path->nodes[0]);
2347 btrfs_release_path(log, path); 2366 btrfs_release_path(path);
2348 return 0; 2367 return 0;
2349} 2368}
2350 2369
@@ -2369,13 +2388,14 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2369 int nritems; 2388 int nritems;
2370 u64 first_offset = min_offset; 2389 u64 first_offset = min_offset;
2371 u64 last_offset = (u64)-1; 2390 u64 last_offset = (u64)-1;
2391 u64 ino = btrfs_ino(inode);
2372 2392
2373 log = root->log_root; 2393 log = root->log_root;
2374 max_key.objectid = inode->i_ino; 2394 max_key.objectid = ino;
2375 max_key.offset = (u64)-1; 2395 max_key.offset = (u64)-1;
2376 max_key.type = key_type; 2396 max_key.type = key_type;
2377 2397
2378 min_key.objectid = inode->i_ino; 2398 min_key.objectid = ino;
2379 min_key.type = key_type; 2399 min_key.type = key_type;
2380 min_key.offset = min_offset; 2400 min_key.offset = min_offset;
2381 2401
@@ -2388,18 +2408,17 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2388 * we didn't find anything from this transaction, see if there 2408 * we didn't find anything from this transaction, see if there
2389 * is anything at all 2409 * is anything at all
2390 */ 2410 */
2391 if (ret != 0 || min_key.objectid != inode->i_ino || 2411 if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) {
2392 min_key.type != key_type) { 2412 min_key.objectid = ino;
2393 min_key.objectid = inode->i_ino;
2394 min_key.type = key_type; 2413 min_key.type = key_type;
2395 min_key.offset = (u64)-1; 2414 min_key.offset = (u64)-1;
2396 btrfs_release_path(root, path); 2415 btrfs_release_path(path);
2397 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 2416 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
2398 if (ret < 0) { 2417 if (ret < 0) {
2399 btrfs_release_path(root, path); 2418 btrfs_release_path(path);
2400 return ret; 2419 return ret;
2401 } 2420 }
2402 ret = btrfs_previous_item(root, path, inode->i_ino, key_type); 2421 ret = btrfs_previous_item(root, path, ino, key_type);
2403 2422
2404 /* if ret == 0 there are items for this type, 2423 /* if ret == 0 there are items for this type,
2405 * create a range to tell us the last key of this type. 2424 * create a range to tell us the last key of this type.
@@ -2417,7 +2436,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2417 } 2436 }
2418 2437
2419 /* go backward to find any previous key */ 2438 /* go backward to find any previous key */
2420 ret = btrfs_previous_item(root, path, inode->i_ino, key_type); 2439 ret = btrfs_previous_item(root, path, ino, key_type);
2421 if (ret == 0) { 2440 if (ret == 0) {
2422 struct btrfs_key tmp; 2441 struct btrfs_key tmp;
2423 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 2442 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
@@ -2432,7 +2451,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2432 } 2451 }
2433 } 2452 }
2434 } 2453 }
2435 btrfs_release_path(root, path); 2454 btrfs_release_path(path);
2436 2455
2437 /* find the first key from this transaction again */ 2456 /* find the first key from this transaction again */
2438 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); 2457 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
@@ -2452,8 +2471,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2452 for (i = path->slots[0]; i < nritems; i++) { 2471 for (i = path->slots[0]; i < nritems; i++) {
2453 btrfs_item_key_to_cpu(src, &min_key, i); 2472 btrfs_item_key_to_cpu(src, &min_key, i);
2454 2473
2455 if (min_key.objectid != inode->i_ino || 2474 if (min_key.objectid != ino || min_key.type != key_type)
2456 min_key.type != key_type)
2457 goto done; 2475 goto done;
2458 ret = overwrite_item(trans, log, dst_path, src, i, 2476 ret = overwrite_item(trans, log, dst_path, src, i,
2459 &min_key); 2477 &min_key);
@@ -2474,7 +2492,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2474 goto done; 2492 goto done;
2475 } 2493 }
2476 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 2494 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
2477 if (tmp.objectid != inode->i_ino || tmp.type != key_type) { 2495 if (tmp.objectid != ino || tmp.type != key_type) {
2478 last_offset = (u64)-1; 2496 last_offset = (u64)-1;
2479 goto done; 2497 goto done;
2480 } 2498 }
@@ -2490,8 +2508,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2490 } 2508 }
2491 } 2509 }
2492done: 2510done:
2493 btrfs_release_path(root, path); 2511 btrfs_release_path(path);
2494 btrfs_release_path(log, dst_path); 2512 btrfs_release_path(dst_path);
2495 2513
2496 if (err == 0) { 2514 if (err == 0) {
2497 *last_offset_ret = last_offset; 2515 *last_offset_ret = last_offset;
@@ -2500,8 +2518,7 @@ done:
2500 * is valid 2518 * is valid
2501 */ 2519 */
2502 ret = insert_dir_log_key(trans, log, path, key_type, 2520 ret = insert_dir_log_key(trans, log, path, key_type,
2503 inode->i_ino, first_offset, 2521 ino, first_offset, last_offset);
2504 last_offset);
2505 if (ret) 2522 if (ret)
2506 err = ret; 2523 err = ret;
2507 } 2524 }
@@ -2587,10 +2604,11 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
2587 break; 2604 break;
2588 2605
2589 ret = btrfs_del_item(trans, log, path); 2606 ret = btrfs_del_item(trans, log, path);
2590 BUG_ON(ret); 2607 if (ret)
2591 btrfs_release_path(log, path); 2608 break;
2609 btrfs_release_path(path);
2592 } 2610 }
2593 btrfs_release_path(log, path); 2611 btrfs_release_path(path);
2594 return ret; 2612 return ret;
2595} 2613}
2596 2614
@@ -2665,6 +2683,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2665 extent = btrfs_item_ptr(src, start_slot + i, 2683 extent = btrfs_item_ptr(src, start_slot + i,
2666 struct btrfs_file_extent_item); 2684 struct btrfs_file_extent_item);
2667 2685
2686 if (btrfs_file_extent_generation(src, extent) < trans->transid)
2687 continue;
2688
2668 found_type = btrfs_file_extent_type(src, extent); 2689 found_type = btrfs_file_extent_type(src, extent);
2669 if (found_type == BTRFS_FILE_EXTENT_REG || 2690 if (found_type == BTRFS_FILE_EXTENT_REG ||
2670 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 2691 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
@@ -2689,14 +2710,14 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2689 ret = btrfs_lookup_csums_range( 2710 ret = btrfs_lookup_csums_range(
2690 log->fs_info->csum_root, 2711 log->fs_info->csum_root,
2691 ds + cs, ds + cs + cl - 1, 2712 ds + cs, ds + cs + cl - 1,
2692 &ordered_sums); 2713 &ordered_sums, 0);
2693 BUG_ON(ret); 2714 BUG_ON(ret);
2694 } 2715 }
2695 } 2716 }
2696 } 2717 }
2697 2718
2698 btrfs_mark_buffer_dirty(dst_path->nodes[0]); 2719 btrfs_mark_buffer_dirty(dst_path->nodes[0]);
2699 btrfs_release_path(log, dst_path); 2720 btrfs_release_path(dst_path);
2700 kfree(ins_data); 2721 kfree(ins_data);
2701 2722
2702 /* 2723 /*
@@ -2745,6 +2766,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2745 int nritems; 2766 int nritems;
2746 int ins_start_slot = 0; 2767 int ins_start_slot = 0;
2747 int ins_nr; 2768 int ins_nr;
2769 u64 ino = btrfs_ino(inode);
2748 2770
2749 log = root->log_root; 2771 log = root->log_root;
2750 2772
@@ -2757,11 +2779,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2757 return -ENOMEM; 2779 return -ENOMEM;
2758 } 2780 }
2759 2781
2760 min_key.objectid = inode->i_ino; 2782 min_key.objectid = ino;
2761 min_key.type = BTRFS_INODE_ITEM_KEY; 2783 min_key.type = BTRFS_INODE_ITEM_KEY;
2762 min_key.offset = 0; 2784 min_key.offset = 0;
2763 2785
2764 max_key.objectid = inode->i_ino; 2786 max_key.objectid = ino;
2765 2787
2766 /* today the code can only do partial logging of directories */ 2788 /* today the code can only do partial logging of directories */
2767 if (!S_ISDIR(inode->i_mode)) 2789 if (!S_ISDIR(inode->i_mode))
@@ -2773,6 +2795,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2773 max_key.type = (u8)-1; 2795 max_key.type = (u8)-1;
2774 max_key.offset = (u64)-1; 2796 max_key.offset = (u64)-1;
2775 2797
2798 ret = btrfs_commit_inode_delayed_items(trans, inode);
2799 if (ret) {
2800 btrfs_free_path(path);
2801 btrfs_free_path(dst_path);
2802 return ret;
2803 }
2804
2776 mutex_lock(&BTRFS_I(inode)->log_mutex); 2805 mutex_lock(&BTRFS_I(inode)->log_mutex);
2777 2806
2778 /* 2807 /*
@@ -2784,8 +2813,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2784 2813
2785 if (inode_only == LOG_INODE_EXISTS) 2814 if (inode_only == LOG_INODE_EXISTS)
2786 max_key_type = BTRFS_XATTR_ITEM_KEY; 2815 max_key_type = BTRFS_XATTR_ITEM_KEY;
2787 ret = drop_objectid_items(trans, log, path, 2816 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
2788 inode->i_ino, max_key_type);
2789 } else { 2817 } else {
2790 ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); 2818 ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0);
2791 } 2819 }
@@ -2803,7 +2831,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2803 break; 2831 break;
2804again: 2832again:
2805 /* note, ins_nr might be > 0 here, cleanup outside the loop */ 2833 /* note, ins_nr might be > 0 here, cleanup outside the loop */
2806 if (min_key.objectid != inode->i_ino) 2834 if (min_key.objectid != ino)
2807 break; 2835 break;
2808 if (min_key.type > max_key.type) 2836 if (min_key.type > max_key.type)
2809 break; 2837 break;
@@ -2845,7 +2873,7 @@ next_slot:
2845 } 2873 }
2846 ins_nr = 0; 2874 ins_nr = 0;
2847 } 2875 }
2848 btrfs_release_path(root, path); 2876 btrfs_release_path(path);
2849 2877
2850 if (min_key.offset < (u64)-1) 2878 if (min_key.offset < (u64)-1)
2851 min_key.offset++; 2879 min_key.offset++;
@@ -2868,8 +2896,8 @@ next_slot:
2868 } 2896 }
2869 WARN_ON(ins_nr); 2897 WARN_ON(ins_nr);
2870 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 2898 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
2871 btrfs_release_path(root, path); 2899 btrfs_release_path(path);
2872 btrfs_release_path(log, dst_path); 2900 btrfs_release_path(dst_path);
2873 ret = log_directory_changes(trans, root, inode, path, dst_path); 2901 ret = log_directory_changes(trans, root, inode, path, dst_path);
2874 if (ret) { 2902 if (ret) {
2875 err = ret; 2903 err = ret;
@@ -3136,7 +3164,7 @@ again:
3136 } 3164 }
3137 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 3165 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3138 path->slots[0]); 3166 path->slots[0]);
3139 btrfs_release_path(log_root_tree, path); 3167 btrfs_release_path(path);
3140 if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) 3168 if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
3141 break; 3169 break;
3142 3170
@@ -3171,7 +3199,7 @@ again:
3171 if (found_key.offset == 0) 3199 if (found_key.offset == 0)
3172 break; 3200 break;
3173 } 3201 }
3174 btrfs_release_path(log_root_tree, path); 3202 btrfs_release_path(path);
3175 3203
3176 /* step one is to pin it all, step two is to replay just inodes */ 3204 /* step one is to pin it all, step two is to replay just inodes */
3177 if (wc.pin) { 3205 if (wc.pin) {
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 3dfae84c8cc8..2270ac58d746 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -38,7 +38,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
38 struct btrfs_root *root, 38 struct btrfs_root *root,
39 const char *name, int name_len, 39 const char *name, int name_len,
40 struct inode *inode, u64 dirid); 40 struct inode *inode, u64 dirid);
41int btrfs_join_running_log_trans(struct btrfs_root *root);
42int btrfs_end_log_trans(struct btrfs_root *root); 41int btrfs_end_log_trans(struct btrfs_root *root);
43int btrfs_pin_log_trans(struct btrfs_root *root); 42int btrfs_pin_log_trans(struct btrfs_root *root);
44int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, 43int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/version.sh b/fs/btrfs/version.sh
deleted file mode 100644
index 1ca1952fd917..000000000000
--- a/fs/btrfs/version.sh
+++ /dev/null
@@ -1,43 +0,0 @@
1#!/bin/bash
2#
3# determine-version -- report a useful version for releases
4#
5# Copyright 2008, Aron Griffis <agriffis@n01se.net>
6# Copyright 2008, Oracle
7# Released under the GNU GPLv2
8
9v="v0.16"
10
11which git &> /dev/null
12if [ $? == 0 ]; then
13 git branch >& /dev/null
14 if [ $? == 0 ]; then
15 if head=`git rev-parse --verify HEAD 2>/dev/null`; then
16 if tag=`git describe --tags 2>/dev/null`; then
17 v="$tag"
18 fi
19
20 # Are there uncommitted changes?
21 git update-index --refresh --unmerged > /dev/null
22 if git diff-index --name-only HEAD | \
23 grep -v "^scripts/package" \
24 | read dummy; then
25 v="$v"-dirty
26 fi
27 fi
28 fi
29fi
30
31echo "#ifndef __BUILD_VERSION" > .build-version.h
32echo "#define __BUILD_VERSION" >> .build-version.h
33echo "#define BTRFS_BUILD_VERSION \"Btrfs $v\"" >> .build-version.h
34echo "#endif" >> .build-version.h
35
36diff -q version.h .build-version.h >& /dev/null
37
38if [ $? == 0 ]; then
39 rm .build-version.h
40 exit 0
41fi
42
43mv .build-version.h version.h
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c7367ae5a3e6..c48214ef5c09 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -38,22 +38,9 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans,
38 struct btrfs_device *device); 38 struct btrfs_device *device);
39static int btrfs_relocate_sys_chunks(struct btrfs_root *root); 39static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
40 40
41#define map_lookup_size(n) (sizeof(struct map_lookup) + \
42 (sizeof(struct btrfs_bio_stripe) * (n)))
43
44static DEFINE_MUTEX(uuid_mutex); 41static DEFINE_MUTEX(uuid_mutex);
45static LIST_HEAD(fs_uuids); 42static LIST_HEAD(fs_uuids);
46 43
47void btrfs_lock_volumes(void)
48{
49 mutex_lock(&uuid_mutex);
50}
51
52void btrfs_unlock_volumes(void)
53{
54 mutex_unlock(&uuid_mutex);
55}
56
57static void lock_chunks(struct btrfs_root *root) 44static void lock_chunks(struct btrfs_root *root)
58{ 45{
59 mutex_lock(&root->fs_info->chunk_mutex); 46 mutex_lock(&root->fs_info->chunk_mutex);
@@ -363,7 +350,7 @@ static noinline int device_list_add(const char *path,
363 INIT_LIST_HEAD(&device->dev_alloc_list); 350 INIT_LIST_HEAD(&device->dev_alloc_list);
364 351
365 mutex_lock(&fs_devices->device_list_mutex); 352 mutex_lock(&fs_devices->device_list_mutex);
366 list_add(&device->dev_list, &fs_devices->devices); 353 list_add_rcu(&device->dev_list, &fs_devices->devices);
367 mutex_unlock(&fs_devices->device_list_mutex); 354 mutex_unlock(&fs_devices->device_list_mutex);
368 355
369 device->fs_devices = fs_devices; 356 device->fs_devices = fs_devices;
@@ -406,7 +393,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
406 fs_devices->latest_trans = orig->latest_trans; 393 fs_devices->latest_trans = orig->latest_trans;
407 memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); 394 memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
408 395
409 mutex_lock(&orig->device_list_mutex); 396 /* We have held the volume lock, it is safe to get the devices. */
410 list_for_each_entry(orig_dev, &orig->devices, dev_list) { 397 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
411 device = kzalloc(sizeof(*device), GFP_NOFS); 398 device = kzalloc(sizeof(*device), GFP_NOFS);
412 if (!device) 399 if (!device)
@@ -429,10 +416,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
429 device->fs_devices = fs_devices; 416 device->fs_devices = fs_devices;
430 fs_devices->num_devices++; 417 fs_devices->num_devices++;
431 } 418 }
432 mutex_unlock(&orig->device_list_mutex);
433 return fs_devices; 419 return fs_devices;
434error: 420error:
435 mutex_unlock(&orig->device_list_mutex);
436 free_fs_devices(fs_devices); 421 free_fs_devices(fs_devices);
437 return ERR_PTR(-ENOMEM); 422 return ERR_PTR(-ENOMEM);
438} 423}
@@ -443,7 +428,7 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
443 428
444 mutex_lock(&uuid_mutex); 429 mutex_lock(&uuid_mutex);
445again: 430again:
446 mutex_lock(&fs_devices->device_list_mutex); 431 /* This is the initialized path, it is safe to release the devices. */
447 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 432 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
448 if (device->in_fs_metadata) 433 if (device->in_fs_metadata)
449 continue; 434 continue;
@@ -463,7 +448,6 @@ again:
463 kfree(device->name); 448 kfree(device->name);
464 kfree(device); 449 kfree(device);
465 } 450 }
466 mutex_unlock(&fs_devices->device_list_mutex);
467 451
468 if (fs_devices->seed) { 452 if (fs_devices->seed) {
469 fs_devices = fs_devices->seed; 453 fs_devices = fs_devices->seed;
@@ -474,6 +458,29 @@ again:
474 return 0; 458 return 0;
475} 459}
476 460
461static void __free_device(struct work_struct *work)
462{
463 struct btrfs_device *device;
464
465 device = container_of(work, struct btrfs_device, rcu_work);
466
467 if (device->bdev)
468 blkdev_put(device->bdev, device->mode);
469
470 kfree(device->name);
471 kfree(device);
472}
473
474static void free_device(struct rcu_head *head)
475{
476 struct btrfs_device *device;
477
478 device = container_of(head, struct btrfs_device, rcu);
479
480 INIT_WORK(&device->rcu_work, __free_device);
481 schedule_work(&device->rcu_work);
482}
483
477static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) 484static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
478{ 485{
479 struct btrfs_device *device; 486 struct btrfs_device *device;
@@ -481,20 +488,32 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
481 if (--fs_devices->opened > 0) 488 if (--fs_devices->opened > 0)
482 return 0; 489 return 0;
483 490
491 mutex_lock(&fs_devices->device_list_mutex);
484 list_for_each_entry(device, &fs_devices->devices, dev_list) { 492 list_for_each_entry(device, &fs_devices->devices, dev_list) {
485 if (device->bdev) { 493 struct btrfs_device *new_device;
486 blkdev_put(device->bdev, device->mode); 494
495 if (device->bdev)
487 fs_devices->open_devices--; 496 fs_devices->open_devices--;
488 } 497
489 if (device->writeable) { 498 if (device->writeable) {
490 list_del_init(&device->dev_alloc_list); 499 list_del_init(&device->dev_alloc_list);
491 fs_devices->rw_devices--; 500 fs_devices->rw_devices--;
492 } 501 }
493 502
494 device->bdev = NULL; 503 new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
495 device->writeable = 0; 504 BUG_ON(!new_device);
496 device->in_fs_metadata = 0; 505 memcpy(new_device, device, sizeof(*new_device));
506 new_device->name = kstrdup(device->name, GFP_NOFS);
507 BUG_ON(!new_device->name);
508 new_device->bdev = NULL;
509 new_device->writeable = 0;
510 new_device->in_fs_metadata = 0;
511 list_replace_rcu(&device->dev_list, &new_device->dev_list);
512
513 call_rcu(&device->rcu, free_device);
497 } 514 }
515 mutex_unlock(&fs_devices->device_list_mutex);
516
498 WARN_ON(fs_devices->open_devices); 517 WARN_ON(fs_devices->open_devices);
499 WARN_ON(fs_devices->rw_devices); 518 WARN_ON(fs_devices->rw_devices);
500 fs_devices->opened = 0; 519 fs_devices->opened = 0;
@@ -597,6 +616,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
597 list_add(&device->dev_alloc_list, 616 list_add(&device->dev_alloc_list,
598 &fs_devices->alloc_list); 617 &fs_devices->alloc_list);
599 } 618 }
619 brelse(bh);
600 continue; 620 continue;
601 621
602error_brelse: 622error_brelse:
@@ -815,10 +835,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
815 /* we don't want to overwrite the superblock on the drive, 835 /* we don't want to overwrite the superblock on the drive,
816 * so we make sure to start at an offset of at least 1MB 836 * so we make sure to start at an offset of at least 1MB
817 */ 837 */
818 search_start = 1024 * 1024; 838 search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
819
820 if (root->fs_info->alloc_start + num_bytes <= search_end)
821 search_start = max(root->fs_info->alloc_start, search_start);
822 839
823 max_hole_start = search_start; 840 max_hole_start = search_start;
824 max_hole_size = 0; 841 max_hole_size = 0;
@@ -949,14 +966,14 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
949 if (ret > 0) { 966 if (ret > 0) {
950 ret = btrfs_previous_item(root, path, key.objectid, 967 ret = btrfs_previous_item(root, path, key.objectid,
951 BTRFS_DEV_EXTENT_KEY); 968 BTRFS_DEV_EXTENT_KEY);
952 BUG_ON(ret); 969 if (ret)
970 goto out;
953 leaf = path->nodes[0]; 971 leaf = path->nodes[0];
954 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 972 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
955 extent = btrfs_item_ptr(leaf, path->slots[0], 973 extent = btrfs_item_ptr(leaf, path->slots[0],
956 struct btrfs_dev_extent); 974 struct btrfs_dev_extent);
957 BUG_ON(found_key.offset > start || found_key.offset + 975 BUG_ON(found_key.offset > start || found_key.offset +
958 btrfs_dev_extent_length(leaf, extent) < start); 976 btrfs_dev_extent_length(leaf, extent) < start);
959 ret = 0;
960 } else if (ret == 0) { 977 } else if (ret == 0) {
961 leaf = path->nodes[0]; 978 leaf = path->nodes[0];
962 extent = btrfs_item_ptr(leaf, path->slots[0], 979 extent = btrfs_item_ptr(leaf, path->slots[0],
@@ -967,8 +984,8 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
967 if (device->bytes_used > 0) 984 if (device->bytes_used > 0)
968 device->bytes_used -= btrfs_dev_extent_length(leaf, extent); 985 device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
969 ret = btrfs_del_item(trans, root, path); 986 ret = btrfs_del_item(trans, root, path);
970 BUG_ON(ret);
971 987
988out:
972 btrfs_free_path(path); 989 btrfs_free_path(path);
973 return ret; 990 return ret;
974} 991}
@@ -1203,11 +1220,13 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1203 struct block_device *bdev; 1220 struct block_device *bdev;
1204 struct buffer_head *bh = NULL; 1221 struct buffer_head *bh = NULL;
1205 struct btrfs_super_block *disk_super; 1222 struct btrfs_super_block *disk_super;
1223 struct btrfs_fs_devices *cur_devices;
1206 u64 all_avail; 1224 u64 all_avail;
1207 u64 devid; 1225 u64 devid;
1208 u64 num_devices; 1226 u64 num_devices;
1209 u8 *dev_uuid; 1227 u8 *dev_uuid;
1210 int ret = 0; 1228 int ret = 0;
1229 bool clear_super = false;
1211 1230
1212 mutex_lock(&uuid_mutex); 1231 mutex_lock(&uuid_mutex);
1213 mutex_lock(&root->fs_info->volume_mutex); 1232 mutex_lock(&root->fs_info->volume_mutex);
@@ -1238,14 +1257,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1238 1257
1239 device = NULL; 1258 device = NULL;
1240 devices = &root->fs_info->fs_devices->devices; 1259 devices = &root->fs_info->fs_devices->devices;
1241 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 1260 /*
1261 * It is safe to read the devices since the volume_mutex
1262 * is held.
1263 */
1242 list_for_each_entry(tmp, devices, dev_list) { 1264 list_for_each_entry(tmp, devices, dev_list) {
1243 if (tmp->in_fs_metadata && !tmp->bdev) { 1265 if (tmp->in_fs_metadata && !tmp->bdev) {
1244 device = tmp; 1266 device = tmp;
1245 break; 1267 break;
1246 } 1268 }
1247 } 1269 }
1248 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1249 bdev = NULL; 1270 bdev = NULL;
1250 bh = NULL; 1271 bh = NULL;
1251 disk_super = NULL; 1272 disk_super = NULL;
@@ -1287,8 +1308,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1287 } 1308 }
1288 1309
1289 if (device->writeable) { 1310 if (device->writeable) {
1311 lock_chunks(root);
1290 list_del_init(&device->dev_alloc_list); 1312 list_del_init(&device->dev_alloc_list);
1313 unlock_chunks(root);
1291 root->fs_info->fs_devices->rw_devices--; 1314 root->fs_info->fs_devices->rw_devices--;
1315 clear_super = true;
1292 } 1316 }
1293 1317
1294 ret = btrfs_shrink_device(device, 0); 1318 ret = btrfs_shrink_device(device, 0);
@@ -1300,15 +1324,17 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1300 goto error_undo; 1324 goto error_undo;
1301 1325
1302 device->in_fs_metadata = 0; 1326 device->in_fs_metadata = 0;
1327 btrfs_scrub_cancel_dev(root, device);
1303 1328
1304 /* 1329 /*
1305 * the device list mutex makes sure that we don't change 1330 * the device list mutex makes sure that we don't change
1306 * the device list while someone else is writing out all 1331 * the device list while someone else is writing out all
1307 * the device supers. 1332 * the device supers.
1308 */ 1333 */
1334
1335 cur_devices = device->fs_devices;
1309 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 1336 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1310 list_del_init(&device->dev_list); 1337 list_del_rcu(&device->dev_list);
1311 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1312 1338
1313 device->fs_devices->num_devices--; 1339 device->fs_devices->num_devices--;
1314 1340
@@ -1322,34 +1348,36 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1322 if (device->bdev == root->fs_info->fs_devices->latest_bdev) 1348 if (device->bdev == root->fs_info->fs_devices->latest_bdev)
1323 root->fs_info->fs_devices->latest_bdev = next_device->bdev; 1349 root->fs_info->fs_devices->latest_bdev = next_device->bdev;
1324 1350
1325 if (device->bdev) { 1351 if (device->bdev)
1326 blkdev_put(device->bdev, device->mode);
1327 device->bdev = NULL;
1328 device->fs_devices->open_devices--; 1352 device->fs_devices->open_devices--;
1329 } 1353
1354 call_rcu(&device->rcu, free_device);
1355 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1330 1356
1331 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; 1357 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
1332 btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices); 1358 btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices);
1333 1359
1334 if (device->fs_devices->open_devices == 0) { 1360 if (cur_devices->open_devices == 0) {
1335 struct btrfs_fs_devices *fs_devices; 1361 struct btrfs_fs_devices *fs_devices;
1336 fs_devices = root->fs_info->fs_devices; 1362 fs_devices = root->fs_info->fs_devices;
1337 while (fs_devices) { 1363 while (fs_devices) {
1338 if (fs_devices->seed == device->fs_devices) 1364 if (fs_devices->seed == cur_devices)
1339 break; 1365 break;
1340 fs_devices = fs_devices->seed; 1366 fs_devices = fs_devices->seed;
1341 } 1367 }
1342 fs_devices->seed = device->fs_devices->seed; 1368 fs_devices->seed = cur_devices->seed;
1343 device->fs_devices->seed = NULL; 1369 cur_devices->seed = NULL;
1344 __btrfs_close_devices(device->fs_devices); 1370 lock_chunks(root);
1345 free_fs_devices(device->fs_devices); 1371 __btrfs_close_devices(cur_devices);
1372 unlock_chunks(root);
1373 free_fs_devices(cur_devices);
1346 } 1374 }
1347 1375
1348 /* 1376 /*
1349 * at this point, the device is zero sized. We want to 1377 * at this point, the device is zero sized. We want to
1350 * remove it from the devices list and zero out the old super 1378 * remove it from the devices list and zero out the old super
1351 */ 1379 */
1352 if (device->writeable) { 1380 if (clear_super) {
1353 /* make sure this device isn't detected as part of 1381 /* make sure this device isn't detected as part of
1354 * the FS anymore 1382 * the FS anymore
1355 */ 1383 */
@@ -1358,8 +1386,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1358 sync_dirty_buffer(bh); 1386 sync_dirty_buffer(bh);
1359 } 1387 }
1360 1388
1361 kfree(device->name);
1362 kfree(device);
1363 ret = 0; 1389 ret = 0;
1364 1390
1365error_brelse: 1391error_brelse:
@@ -1373,8 +1399,10 @@ out:
1373 return ret; 1399 return ret;
1374error_undo: 1400error_undo:
1375 if (device->writeable) { 1401 if (device->writeable) {
1402 lock_chunks(root);
1376 list_add(&device->dev_alloc_list, 1403 list_add(&device->dev_alloc_list,
1377 &root->fs_info->fs_devices->alloc_list); 1404 &root->fs_info->fs_devices->alloc_list);
1405 unlock_chunks(root);
1378 root->fs_info->fs_devices->rw_devices++; 1406 root->fs_info->fs_devices->rw_devices++;
1379 } 1407 }
1380 goto error_brelse; 1408 goto error_brelse;
@@ -1414,7 +1442,12 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
1414 INIT_LIST_HEAD(&seed_devices->devices); 1442 INIT_LIST_HEAD(&seed_devices->devices);
1415 INIT_LIST_HEAD(&seed_devices->alloc_list); 1443 INIT_LIST_HEAD(&seed_devices->alloc_list);
1416 mutex_init(&seed_devices->device_list_mutex); 1444 mutex_init(&seed_devices->device_list_mutex);
1417 list_splice_init(&fs_devices->devices, &seed_devices->devices); 1445
1446 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1447 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
1448 synchronize_rcu);
1449 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1450
1418 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); 1451 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
1419 list_for_each_entry(device, &seed_devices->devices, dev_list) { 1452 list_for_each_entry(device, &seed_devices->devices, dev_list) {
1420 device->fs_devices = seed_devices; 1453 device->fs_devices = seed_devices;
@@ -1475,7 +1508,7 @@ next_slot:
1475 goto error; 1508 goto error;
1476 leaf = path->nodes[0]; 1509 leaf = path->nodes[0];
1477 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1510 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1478 btrfs_release_path(root, path); 1511 btrfs_release_path(path);
1479 continue; 1512 continue;
1480 } 1513 }
1481 1514
@@ -1611,7 +1644,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1611 * half setup 1644 * half setup
1612 */ 1645 */
1613 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 1646 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1614 list_add(&device->dev_list, &root->fs_info->fs_devices->devices); 1647 list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices);
1615 list_add(&device->dev_alloc_list, 1648 list_add(&device->dev_alloc_list,
1616 &root->fs_info->fs_devices->alloc_list); 1649 &root->fs_info->fs_devices->alloc_list);
1617 root->fs_info->fs_devices->num_devices++; 1650 root->fs_info->fs_devices->num_devices++;
@@ -1769,10 +1802,9 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
1769 BUG_ON(ret); 1802 BUG_ON(ret);
1770 1803
1771 ret = btrfs_del_item(trans, root, path); 1804 ret = btrfs_del_item(trans, root, path);
1772 BUG_ON(ret);
1773 1805
1774 btrfs_free_path(path); 1806 btrfs_free_path(path);
1775 return 0; 1807 return ret;
1776} 1808}
1777 1809
1778static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 1810static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
@@ -1947,7 +1979,7 @@ again:
1947 chunk = btrfs_item_ptr(leaf, path->slots[0], 1979 chunk = btrfs_item_ptr(leaf, path->slots[0],
1948 struct btrfs_chunk); 1980 struct btrfs_chunk);
1949 chunk_type = btrfs_chunk_type(leaf, chunk); 1981 chunk_type = btrfs_chunk_type(leaf, chunk);
1950 btrfs_release_path(chunk_root, path); 1982 btrfs_release_path(path);
1951 1983
1952 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) { 1984 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
1953 ret = btrfs_relocate_chunk(chunk_root, chunk_tree, 1985 ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
@@ -2065,7 +2097,7 @@ int btrfs_balance(struct btrfs_root *dev_root)
2065 if (found_key.offset == 0) 2097 if (found_key.offset == 0)
2066 break; 2098 break;
2067 2099
2068 btrfs_release_path(chunk_root, path); 2100 btrfs_release_path(path);
2069 ret = btrfs_relocate_chunk(chunk_root, 2101 ret = btrfs_relocate_chunk(chunk_root,
2070 chunk_root->root_key.objectid, 2102 chunk_root->root_key.objectid,
2071 found_key.objectid, 2103 found_key.objectid,
@@ -2137,7 +2169,7 @@ again:
2137 goto done; 2169 goto done;
2138 if (ret) { 2170 if (ret) {
2139 ret = 0; 2171 ret = 0;
2140 btrfs_release_path(root, path); 2172 btrfs_release_path(path);
2141 break; 2173 break;
2142 } 2174 }
2143 2175
@@ -2146,7 +2178,7 @@ again:
2146 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 2178 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
2147 2179
2148 if (key.objectid != device->devid) { 2180 if (key.objectid != device->devid) {
2149 btrfs_release_path(root, path); 2181 btrfs_release_path(path);
2150 break; 2182 break;
2151 } 2183 }
2152 2184
@@ -2154,14 +2186,14 @@ again:
2154 length = btrfs_dev_extent_length(l, dev_extent); 2186 length = btrfs_dev_extent_length(l, dev_extent);
2155 2187
2156 if (key.offset + length <= new_size) { 2188 if (key.offset + length <= new_size) {
2157 btrfs_release_path(root, path); 2189 btrfs_release_path(path);
2158 break; 2190 break;
2159 } 2191 }
2160 2192
2161 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); 2193 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2162 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); 2194 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
2163 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); 2195 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
2164 btrfs_release_path(root, path); 2196 btrfs_release_path(path);
2165 2197
2166 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, 2198 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
2167 chunk_offset); 2199 chunk_offset);
@@ -2237,275 +2269,204 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
2237 return 0; 2269 return 0;
2238} 2270}
2239 2271
2240static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, 2272/*
2241 int num_stripes, int sub_stripes) 2273 * sort the devices in descending order by max_avail, total_avail
2274 */
2275static int btrfs_cmp_device_info(const void *a, const void *b)
2242{ 2276{
2243 if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) 2277 const struct btrfs_device_info *di_a = a;
2244 return calc_size; 2278 const struct btrfs_device_info *di_b = b;
2245 else if (type & BTRFS_BLOCK_GROUP_RAID10)
2246 return calc_size * (num_stripes / sub_stripes);
2247 else
2248 return calc_size * num_stripes;
2249}
2250 2279
2251/* Used to sort the devices by max_avail(descending sort) */ 2280 if (di_a->max_avail > di_b->max_avail)
2252int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2)
2253{
2254 if (((struct btrfs_device_info *)dev_info1)->max_avail >
2255 ((struct btrfs_device_info *)dev_info2)->max_avail)
2256 return -1; 2281 return -1;
2257 else if (((struct btrfs_device_info *)dev_info1)->max_avail < 2282 if (di_a->max_avail < di_b->max_avail)
2258 ((struct btrfs_device_info *)dev_info2)->max_avail)
2259 return 1; 2283 return 1;
2260 else 2284 if (di_a->total_avail > di_b->total_avail)
2261 return 0; 2285 return -1;
2286 if (di_a->total_avail < di_b->total_avail)
2287 return 1;
2288 return 0;
2262} 2289}
2263 2290
2264static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, 2291static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2265 int *num_stripes, int *min_stripes, 2292 struct btrfs_root *extent_root,
2266 int *sub_stripes) 2293 struct map_lookup **map_ret,
2294 u64 *num_bytes_out, u64 *stripe_size_out,
2295 u64 start, u64 type)
2267{ 2296{
2268 *num_stripes = 1; 2297 struct btrfs_fs_info *info = extent_root->fs_info;
2269 *min_stripes = 1; 2298 struct btrfs_fs_devices *fs_devices = info->fs_devices;
2270 *sub_stripes = 0; 2299 struct list_head *cur;
2300 struct map_lookup *map = NULL;
2301 struct extent_map_tree *em_tree;
2302 struct extent_map *em;
2303 struct btrfs_device_info *devices_info = NULL;
2304 u64 total_avail;
2305 int num_stripes; /* total number of stripes to allocate */
2306 int sub_stripes; /* sub_stripes info for map */
2307 int dev_stripes; /* stripes per dev */
2308 int devs_max; /* max devs to use */
2309 int devs_min; /* min devs needed */
2310 int devs_increment; /* ndevs has to be a multiple of this */
2311 int ncopies; /* how many copies to data has */
2312 int ret;
2313 u64 max_stripe_size;
2314 u64 max_chunk_size;
2315 u64 stripe_size;
2316 u64 num_bytes;
2317 int ndevs;
2318 int i;
2319 int j;
2271 2320
2272 if (type & (BTRFS_BLOCK_GROUP_RAID0)) { 2321 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
2273 *num_stripes = fs_devices->rw_devices; 2322 (type & BTRFS_BLOCK_GROUP_DUP)) {
2274 *min_stripes = 2; 2323 WARN_ON(1);
2275 } 2324 type &= ~BTRFS_BLOCK_GROUP_DUP;
2276 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
2277 *num_stripes = 2;
2278 *min_stripes = 2;
2279 }
2280 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
2281 if (fs_devices->rw_devices < 2)
2282 return -ENOSPC;
2283 *num_stripes = 2;
2284 *min_stripes = 2;
2285 }
2286 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2287 *num_stripes = fs_devices->rw_devices;
2288 if (*num_stripes < 4)
2289 return -ENOSPC;
2290 *num_stripes &= ~(u32)1;
2291 *sub_stripes = 2;
2292 *min_stripes = 4;
2293 } 2325 }
2294 2326
2295 return 0; 2327 if (list_empty(&fs_devices->alloc_list))
2296} 2328 return -ENOSPC;
2297 2329
2298static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, 2330 sub_stripes = 1;
2299 u64 proposed_size, u64 type, 2331 dev_stripes = 1;
2300 int num_stripes, int small_stripe) 2332 devs_increment = 1;
2301{ 2333 ncopies = 1;
2302 int min_stripe_size = 1 * 1024 * 1024; 2334 devs_max = 0; /* 0 == as many as possible */
2303 u64 calc_size = proposed_size; 2335 devs_min = 1;
2304 u64 max_chunk_size = calc_size;
2305 int ncopies = 1;
2306 2336
2307 if (type & (BTRFS_BLOCK_GROUP_RAID1 | 2337 /*
2308 BTRFS_BLOCK_GROUP_DUP | 2338 * define the properties of each RAID type.
2309 BTRFS_BLOCK_GROUP_RAID10)) 2339 * FIXME: move this to a global table and use it in all RAID
2340 * calculation code
2341 */
2342 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
2343 dev_stripes = 2;
2344 ncopies = 2;
2345 devs_max = 1;
2346 } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
2347 devs_min = 2;
2348 } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
2349 devs_increment = 2;
2310 ncopies = 2; 2350 ncopies = 2;
2351 devs_max = 2;
2352 devs_min = 2;
2353 } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2354 sub_stripes = 2;
2355 devs_increment = 2;
2356 ncopies = 2;
2357 devs_min = 4;
2358 } else {
2359 devs_max = 1;
2360 }
2311 2361
2312 if (type & BTRFS_BLOCK_GROUP_DATA) { 2362 if (type & BTRFS_BLOCK_GROUP_DATA) {
2313 max_chunk_size = 10 * calc_size; 2363 max_stripe_size = 1024 * 1024 * 1024;
2314 min_stripe_size = 64 * 1024 * 1024; 2364 max_chunk_size = 10 * max_stripe_size;
2315 } else if (type & BTRFS_BLOCK_GROUP_METADATA) { 2365 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2316 max_chunk_size = 256 * 1024 * 1024; 2366 max_stripe_size = 256 * 1024 * 1024;
2317 min_stripe_size = 32 * 1024 * 1024; 2367 max_chunk_size = max_stripe_size;
2318 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { 2368 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2319 calc_size = 8 * 1024 * 1024; 2369 max_stripe_size = 8 * 1024 * 1024;
2320 max_chunk_size = calc_size * 2; 2370 max_chunk_size = 2 * max_stripe_size;
2321 min_stripe_size = 1 * 1024 * 1024; 2371 } else {
2372 printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n",
2373 type);
2374 BUG_ON(1);
2322 } 2375 }
2323 2376
2324 /* we don't want a chunk larger than 10% of writeable space */ 2377 /* we don't want a chunk larger than 10% of writeable space */
2325 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), 2378 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
2326 max_chunk_size); 2379 max_chunk_size);
2327 2380
2328 if (calc_size * num_stripes > max_chunk_size * ncopies) { 2381 devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
2329 calc_size = max_chunk_size * ncopies; 2382 GFP_NOFS);
2330 do_div(calc_size, num_stripes); 2383 if (!devices_info)
2331 do_div(calc_size, BTRFS_STRIPE_LEN); 2384 return -ENOMEM;
2332 calc_size *= BTRFS_STRIPE_LEN;
2333 }
2334 2385
2335 /* we don't want tiny stripes */ 2386 cur = fs_devices->alloc_list.next;
2336 if (!small_stripe)
2337 calc_size = max_t(u64, min_stripe_size, calc_size);
2338 2387
2339 /* 2388 /*
2340 * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure 2389 * in the first pass through the devices list, we gather information
2341 * we end up with something bigger than a stripe 2390 * about the available holes on each device.
2342 */ 2391 */
2343 calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); 2392 ndevs = 0;
2344 2393 while (cur != &fs_devices->alloc_list) {
2345 do_div(calc_size, BTRFS_STRIPE_LEN); 2394 struct btrfs_device *device;
2346 calc_size *= BTRFS_STRIPE_LEN; 2395 u64 max_avail;
2347 2396 u64 dev_offset;
2348 return calc_size;
2349}
2350
2351static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map,
2352 int num_stripes)
2353{
2354 struct map_lookup *new;
2355 size_t len = map_lookup_size(num_stripes);
2356
2357 BUG_ON(map->num_stripes < num_stripes);
2358
2359 if (map->num_stripes == num_stripes)
2360 return map;
2361
2362 new = kmalloc(len, GFP_NOFS);
2363 if (!new) {
2364 /* just change map->num_stripes */
2365 map->num_stripes = num_stripes;
2366 return map;
2367 }
2368
2369 memcpy(new, map, len);
2370 new->num_stripes = num_stripes;
2371 kfree(map);
2372 return new;
2373}
2374 2397
2375/* 2398 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
2376 * helper to allocate device space from btrfs_device_info, in which we stored
2377 * max free space information of every device. It is used when we can not
2378 * allocate chunks by default size.
2379 *
2380 * By this helper, we can allocate a new chunk as larger as possible.
2381 */
2382static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans,
2383 struct btrfs_fs_devices *fs_devices,
2384 struct btrfs_device_info *devices,
2385 int nr_device, u64 type,
2386 struct map_lookup **map_lookup,
2387 int min_stripes, u64 *stripe_size)
2388{
2389 int i, index, sort_again = 0;
2390 int min_devices = min_stripes;
2391 u64 max_avail, min_free;
2392 struct map_lookup *map = *map_lookup;
2393 int ret;
2394 2399
2395 if (nr_device < min_stripes) 2400 cur = cur->next;
2396 return -ENOSPC;
2397 2401
2398 btrfs_descending_sort_devices(devices, nr_device); 2402 if (!device->writeable) {
2403 printk(KERN_ERR
2404 "btrfs: read-only device in alloc_list\n");
2405 WARN_ON(1);
2406 continue;
2407 }
2399 2408
2400 max_avail = devices[0].max_avail; 2409 if (!device->in_fs_metadata)
2401 if (!max_avail) 2410 continue;
2402 return -ENOSPC;
2403 2411
2404 for (i = 0; i < nr_device; i++) { 2412 if (device->total_bytes > device->bytes_used)
2405 /* 2413 total_avail = device->total_bytes - device->bytes_used;
2406 * if dev_offset = 0, it means the free space of this device 2414 else
2407 * is less than what we need, and we didn't search max avail 2415 total_avail = 0;
2408 * extent on this device, so do it now. 2416 /* avail is off by max(alloc_start, 1MB), but that is the same
2417 * for all devices, so it doesn't hurt the sorting later on
2409 */ 2418 */
2410 if (!devices[i].dev_offset) {
2411 ret = find_free_dev_extent(trans, devices[i].dev,
2412 max_avail,
2413 &devices[i].dev_offset,
2414 &devices[i].max_avail);
2415 if (ret != 0 && ret != -ENOSPC)
2416 return ret;
2417 sort_again = 1;
2418 }
2419 }
2420
2421 /* we update the max avail free extent of each devices, sort again */
2422 if (sort_again)
2423 btrfs_descending_sort_devices(devices, nr_device);
2424 2419
2425 if (type & BTRFS_BLOCK_GROUP_DUP) 2420 ret = find_free_dev_extent(trans, device,
2426 min_devices = 1; 2421 max_stripe_size * dev_stripes,
2422 &dev_offset, &max_avail);
2423 if (ret && ret != -ENOSPC)
2424 goto error;
2427 2425
2428 if (!devices[min_devices - 1].max_avail) 2426 if (ret == 0)
2429 return -ENOSPC; 2427 max_avail = max_stripe_size * dev_stripes;
2430 2428
2431 max_avail = devices[min_devices - 1].max_avail; 2429 if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
2432 if (type & BTRFS_BLOCK_GROUP_DUP) 2430 continue;
2433 do_div(max_avail, 2);
2434 2431
2435 max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, 2432 devices_info[ndevs].dev_offset = dev_offset;
2436 min_stripes, 1); 2433 devices_info[ndevs].max_avail = max_avail;
2437 if (type & BTRFS_BLOCK_GROUP_DUP) 2434 devices_info[ndevs].total_avail = total_avail;
2438 min_free = max_avail * 2; 2435 devices_info[ndevs].dev = device;
2439 else 2436 ++ndevs;
2440 min_free = max_avail; 2437 }
2441 2438
2442 if (min_free > devices[min_devices - 1].max_avail) 2439 /*
2443 return -ENOSPC; 2440 * now sort the devices by hole size / available space
2441 */
2442 sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
2443 btrfs_cmp_device_info, NULL);
2444 2444
2445 map = __shrink_map_lookup_stripes(map, min_stripes); 2445 /* round down to number of usable stripes */
2446 *stripe_size = max_avail; 2446 ndevs -= ndevs % devs_increment;
2447 2447
2448 index = 0; 2448 if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) {
2449 for (i = 0; i < min_stripes; i++) { 2449 ret = -ENOSPC;
2450 map->stripes[i].dev = devices[index].dev; 2450 goto error;
2451 map->stripes[i].physical = devices[index].dev_offset;
2452 if (type & BTRFS_BLOCK_GROUP_DUP) {
2453 i++;
2454 map->stripes[i].dev = devices[index].dev;
2455 map->stripes[i].physical = devices[index].dev_offset +
2456 max_avail;
2457 }
2458 index++;
2459 } 2451 }
2460 *map_lookup = map;
2461 2452
2462 return 0; 2453 if (devs_max && ndevs > devs_max)
2463} 2454 ndevs = devs_max;
2464 2455 /*
2465static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 2456 * the primary goal is to maximize the number of stripes, so use as many
2466 struct btrfs_root *extent_root, 2457 * devices as possible, even if the stripes are not maximum sized.
2467 struct map_lookup **map_ret, 2458 */
2468 u64 *num_bytes, u64 *stripe_size, 2459 stripe_size = devices_info[ndevs-1].max_avail;
2469 u64 start, u64 type) 2460 num_stripes = ndevs * dev_stripes;
2470{
2471 struct btrfs_fs_info *info = extent_root->fs_info;
2472 struct btrfs_device *device = NULL;
2473 struct btrfs_fs_devices *fs_devices = info->fs_devices;
2474 struct list_head *cur;
2475 struct map_lookup *map;
2476 struct extent_map_tree *em_tree;
2477 struct extent_map *em;
2478 struct btrfs_device_info *devices_info;
2479 struct list_head private_devs;
2480 u64 calc_size = 1024 * 1024 * 1024;
2481 u64 min_free;
2482 u64 avail;
2483 u64 dev_offset;
2484 int num_stripes;
2485 int min_stripes;
2486 int sub_stripes;
2487 int min_devices; /* the min number of devices we need */
2488 int i;
2489 int ret;
2490 int index;
2491 2461
2492 if ((type & BTRFS_BLOCK_GROUP_RAID1) && 2462 if (stripe_size * num_stripes > max_chunk_size * ncopies) {
2493 (type & BTRFS_BLOCK_GROUP_DUP)) { 2463 stripe_size = max_chunk_size * ncopies;
2494 WARN_ON(1); 2464 do_div(stripe_size, num_stripes);
2495 type &= ~BTRFS_BLOCK_GROUP_DUP;
2496 } 2465 }
2497 if (list_empty(&fs_devices->alloc_list))
2498 return -ENOSPC;
2499
2500 ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes,
2501 &min_stripes, &sub_stripes);
2502 if (ret)
2503 return ret;
2504 2466
2505 devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, 2467 do_div(stripe_size, dev_stripes);
2506 GFP_NOFS); 2468 do_div(stripe_size, BTRFS_STRIPE_LEN);
2507 if (!devices_info) 2469 stripe_size *= BTRFS_STRIPE_LEN;
2508 return -ENOMEM;
2509 2470
2510 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); 2471 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
2511 if (!map) { 2472 if (!map) {
@@ -2514,85 +2475,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2514 } 2475 }
2515 map->num_stripes = num_stripes; 2476 map->num_stripes = num_stripes;
2516 2477
2517 cur = fs_devices->alloc_list.next; 2478 for (i = 0; i < ndevs; ++i) {
2518 index = 0; 2479 for (j = 0; j < dev_stripes; ++j) {
2519 i = 0; 2480 int s = i * dev_stripes + j;
2520 2481 map->stripes[s].dev = devices_info[i].dev;
2521 calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, 2482 map->stripes[s].physical = devices_info[i].dev_offset +
2522 num_stripes, 0); 2483 j * stripe_size;
2523
2524 if (type & BTRFS_BLOCK_GROUP_DUP) {
2525 min_free = calc_size * 2;
2526 min_devices = 1;
2527 } else {
2528 min_free = calc_size;
2529 min_devices = min_stripes;
2530 }
2531
2532 INIT_LIST_HEAD(&private_devs);
2533 while (index < num_stripes) {
2534 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
2535 BUG_ON(!device->writeable);
2536 if (device->total_bytes > device->bytes_used)
2537 avail = device->total_bytes - device->bytes_used;
2538 else
2539 avail = 0;
2540 cur = cur->next;
2541
2542 if (device->in_fs_metadata && avail >= min_free) {
2543 ret = find_free_dev_extent(trans, device, min_free,
2544 &devices_info[i].dev_offset,
2545 &devices_info[i].max_avail);
2546 if (ret == 0) {
2547 list_move_tail(&device->dev_alloc_list,
2548 &private_devs);
2549 map->stripes[index].dev = device;
2550 map->stripes[index].physical =
2551 devices_info[i].dev_offset;
2552 index++;
2553 if (type & BTRFS_BLOCK_GROUP_DUP) {
2554 map->stripes[index].dev = device;
2555 map->stripes[index].physical =
2556 devices_info[i].dev_offset +
2557 calc_size;
2558 index++;
2559 }
2560 } else if (ret != -ENOSPC)
2561 goto error;
2562
2563 devices_info[i].dev = device;
2564 i++;
2565 } else if (device->in_fs_metadata &&
2566 avail >= BTRFS_STRIPE_LEN) {
2567 devices_info[i].dev = device;
2568 devices_info[i].max_avail = avail;
2569 i++;
2570 }
2571
2572 if (cur == &fs_devices->alloc_list)
2573 break;
2574 }
2575
2576 list_splice(&private_devs, &fs_devices->alloc_list);
2577 if (index < num_stripes) {
2578 if (index >= min_stripes) {
2579 num_stripes = index;
2580 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2581 num_stripes /= sub_stripes;
2582 num_stripes *= sub_stripes;
2583 }
2584
2585 map = __shrink_map_lookup_stripes(map, num_stripes);
2586 } else if (i >= min_devices) {
2587 ret = __btrfs_alloc_tiny_space(trans, fs_devices,
2588 devices_info, i, type,
2589 &map, min_stripes,
2590 &calc_size);
2591 if (ret)
2592 goto error;
2593 } else {
2594 ret = -ENOSPC;
2595 goto error;
2596 } 2484 }
2597 } 2485 }
2598 map->sector_size = extent_root->sectorsize; 2486 map->sector_size = extent_root->sectorsize;
@@ -2603,20 +2491,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2603 map->sub_stripes = sub_stripes; 2491 map->sub_stripes = sub_stripes;
2604 2492
2605 *map_ret = map; 2493 *map_ret = map;
2606 *stripe_size = calc_size; 2494 num_bytes = stripe_size * (num_stripes / ncopies);
2607 *num_bytes = chunk_bytes_by_type(type, calc_size,
2608 map->num_stripes, sub_stripes);
2609 2495
2610 trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes); 2496 *stripe_size_out = stripe_size;
2497 *num_bytes_out = num_bytes;
2611 2498
2612 em = alloc_extent_map(GFP_NOFS); 2499 trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes);
2500
2501 em = alloc_extent_map();
2613 if (!em) { 2502 if (!em) {
2614 ret = -ENOMEM; 2503 ret = -ENOMEM;
2615 goto error; 2504 goto error;
2616 } 2505 }
2617 em->bdev = (struct block_device *)map; 2506 em->bdev = (struct block_device *)map;
2618 em->start = start; 2507 em->start = start;
2619 em->len = *num_bytes; 2508 em->len = num_bytes;
2620 em->block_start = 0; 2509 em->block_start = 0;
2621 em->block_len = em->len; 2510 em->block_len = em->len;
2622 2511
@@ -2629,20 +2518,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2629 2518
2630 ret = btrfs_make_block_group(trans, extent_root, 0, type, 2519 ret = btrfs_make_block_group(trans, extent_root, 0, type,
2631 BTRFS_FIRST_CHUNK_TREE_OBJECTID, 2520 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2632 start, *num_bytes); 2521 start, num_bytes);
2633 BUG_ON(ret); 2522 BUG_ON(ret);
2634 2523
2635 index = 0; 2524 for (i = 0; i < map->num_stripes; ++i) {
2636 while (index < map->num_stripes) { 2525 struct btrfs_device *device;
2637 device = map->stripes[index].dev; 2526 u64 dev_offset;
2638 dev_offset = map->stripes[index].physical; 2527
2528 device = map->stripes[i].dev;
2529 dev_offset = map->stripes[i].physical;
2639 2530
2640 ret = btrfs_alloc_dev_extent(trans, device, 2531 ret = btrfs_alloc_dev_extent(trans, device,
2641 info->chunk_root->root_key.objectid, 2532 info->chunk_root->root_key.objectid,
2642 BTRFS_FIRST_CHUNK_TREE_OBJECTID, 2533 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2643 start, dev_offset, calc_size); 2534 start, dev_offset, stripe_size);
2644 BUG_ON(ret); 2535 BUG_ON(ret);
2645 index++;
2646 } 2536 }
2647 2537
2648 kfree(devices_info); 2538 kfree(devices_info);
@@ -2849,7 +2739,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2849 2739
2850void btrfs_mapping_init(struct btrfs_mapping_tree *tree) 2740void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
2851{ 2741{
2852 extent_map_tree_init(&tree->map_tree, GFP_NOFS); 2742 extent_map_tree_init(&tree->map_tree);
2853} 2743}
2854 2744
2855void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) 2745void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
@@ -3499,7 +3389,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
3499 free_extent_map(em); 3389 free_extent_map(em);
3500 } 3390 }
3501 3391
3502 em = alloc_extent_map(GFP_NOFS); 3392 em = alloc_extent_map();
3503 if (!em) 3393 if (!em)
3504 return -ENOMEM; 3394 return -ENOMEM;
3505 num_stripes = btrfs_chunk_num_stripes(leaf, chunk); 3395 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
@@ -3688,15 +3578,6 @@ static int read_one_dev(struct btrfs_root *root,
3688 return ret; 3578 return ret;
3689} 3579}
3690 3580
3691int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
3692{
3693 struct btrfs_dev_item *dev_item;
3694
3695 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
3696 dev_item);
3697 return read_one_dev(root, buf, dev_item);
3698}
3699
3700int btrfs_read_sys_array(struct btrfs_root *root) 3581int btrfs_read_sys_array(struct btrfs_root *root)
3701{ 3582{
3702 struct btrfs_super_block *super_copy = &root->fs_info->super_copy; 3583 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
@@ -3813,7 +3694,7 @@ again:
3813 } 3694 }
3814 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { 3695 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
3815 key.objectid = 0; 3696 key.objectid = 0;
3816 btrfs_release_path(root, path); 3697 btrfs_release_path(path);
3817 goto again; 3698 goto again;
3818 } 3699 }
3819 ret = 0; 3700 ret = 0;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index cc2eadaf7a27..7c12d61ae7ae 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -85,7 +85,12 @@ struct btrfs_device {
85 /* physical drive uuid (or lvm uuid) */ 85 /* physical drive uuid (or lvm uuid) */
86 u8 uuid[BTRFS_UUID_SIZE]; 86 u8 uuid[BTRFS_UUID_SIZE];
87 87
88 /* per-device scrub information */
89 struct scrub_dev *scrub_device;
90
88 struct btrfs_work work; 91 struct btrfs_work work;
92 struct rcu_head rcu;
93 struct work_struct rcu_work;
89}; 94};
90 95
91struct btrfs_fs_devices { 96struct btrfs_fs_devices {
@@ -144,6 +149,7 @@ struct btrfs_device_info {
144 struct btrfs_device *dev; 149 struct btrfs_device *dev;
145 u64 dev_offset; 150 u64 dev_offset;
146 u64 max_avail; 151 u64 max_avail;
152 u64 total_avail;
147}; 153};
148 154
149struct map_lookup { 155struct map_lookup {
@@ -157,20 +163,8 @@ struct map_lookup {
157 struct btrfs_bio_stripe stripes[]; 163 struct btrfs_bio_stripe stripes[];
158}; 164};
159 165
160/* Used to sort the devices by max_avail(descending sort) */ 166#define map_lookup_size(n) (sizeof(struct map_lookup) + \
161int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); 167 (sizeof(struct btrfs_bio_stripe) * (n)))
162
163/*
164 * sort the devices by max_avail, in which max free extent size of each device
165 * is stored.(Descending Sort)
166 */
167static inline void btrfs_descending_sort_devices(
168 struct btrfs_device_info *devices,
169 size_t nr_devices)
170{
171 sort(devices, nr_devices, sizeof(struct btrfs_device_info),
172 btrfs_cmp_device_free_bytes, NULL);
173}
174 168
175int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, 169int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
176 u64 end, u64 *length); 170 u64 end, u64 *length);
@@ -196,7 +190,6 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
196void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); 190void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
197int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, 191int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
198 int mirror_num, int async_submit); 192 int mirror_num, int async_submit);
199int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf);
200int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 193int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
201 fmode_t flags, void *holder); 194 fmode_t flags, void *holder);
202int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, 195int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
@@ -209,8 +202,6 @@ int btrfs_add_device(struct btrfs_trans_handle *trans,
209int btrfs_rm_device(struct btrfs_root *root, char *device_path); 202int btrfs_rm_device(struct btrfs_root *root, char *device_path);
210int btrfs_cleanup_fs_uuids(void); 203int btrfs_cleanup_fs_uuids(void);
211int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); 204int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
212int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
213 u64 logical, struct page *page);
214int btrfs_grow_device(struct btrfs_trans_handle *trans, 205int btrfs_grow_device(struct btrfs_trans_handle *trans,
215 struct btrfs_device *device, u64 new_size); 206 struct btrfs_device *device, u64 new_size);
216struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, 207struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
@@ -218,8 +209,6 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
218int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); 209int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
219int btrfs_init_new_device(struct btrfs_root *root, char *path); 210int btrfs_init_new_device(struct btrfs_root *root, char *path);
220int btrfs_balance(struct btrfs_root *dev_root); 211int btrfs_balance(struct btrfs_root *dev_root);
221void btrfs_unlock_volumes(void);
222void btrfs_lock_volumes(void);
223int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 212int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
224int find_free_dev_extent(struct btrfs_trans_handle *trans, 213int find_free_dev_extent(struct btrfs_trans_handle *trans,
225 struct btrfs_device *device, u64 num_bytes, 214 struct btrfs_device *device, u64 num_bytes,
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index cfd660550ded..f3107e4b4d56 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -44,7 +44,7 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
44 return -ENOMEM; 44 return -ENOMEM;
45 45
46 /* lookup the xattr by name */ 46 /* lookup the xattr by name */
47 di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, 47 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), name,
48 strlen(name), 0); 48 strlen(name), 0);
49 if (!di) { 49 if (!di) {
50 ret = -ENODATA; 50 ret = -ENODATA;
@@ -103,7 +103,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
103 return -ENOMEM; 103 return -ENOMEM;
104 104
105 /* first lets see if we already have this xattr */ 105 /* first lets see if we already have this xattr */
106 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, 106 di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
107 strlen(name), -1); 107 strlen(name), -1);
108 if (IS_ERR(di)) { 108 if (IS_ERR(di)) {
109 ret = PTR_ERR(di); 109 ret = PTR_ERR(di);
@@ -120,13 +120,13 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
120 120
121 ret = btrfs_delete_one_dir_name(trans, root, path, di); 121 ret = btrfs_delete_one_dir_name(trans, root, path, di);
122 BUG_ON(ret); 122 BUG_ON(ret);
123 btrfs_release_path(root, path); 123 btrfs_release_path(path);
124 124
125 /* if we don't have a value then we are removing the xattr */ 125 /* if we don't have a value then we are removing the xattr */
126 if (!value) 126 if (!value)
127 goto out; 127 goto out;
128 } else { 128 } else {
129 btrfs_release_path(root, path); 129 btrfs_release_path(path);
130 130
131 if (flags & XATTR_REPLACE) { 131 if (flags & XATTR_REPLACE) {
132 /* we couldn't find the attr to replace */ 132 /* we couldn't find the attr to replace */
@@ -136,7 +136,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
136 } 136 }
137 137
138 /* ok we have to create a completely new xattr */ 138 /* ok we have to create a completely new xattr */
139 ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino, 139 ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
140 name, name_len, value, size); 140 name, name_len, value, size);
141 BUG_ON(ret); 141 BUG_ON(ret);
142out: 142out:
@@ -190,7 +190,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
190 * NOTE: we set key.offset = 0; because we want to start with the 190 * NOTE: we set key.offset = 0; because we want to start with the
191 * first xattr that we find and walk forward 191 * first xattr that we find and walk forward
192 */ 192 */
193 key.objectid = inode->i_ino; 193 key.objectid = btrfs_ino(inode);
194 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); 194 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
195 key.offset = 0; 195 key.offset = 0;
196 196
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 75c47cd8d086..1cd4c3a1862d 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -153,26 +153,6 @@ config CIFS_ACL
153 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob 153 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
154 is handed over to the application/caller. 154 is handed over to the application/caller.
155 155
156config CIFS_SMB2
157 bool "SMB2 network file system support (EXPERIMENTAL)"
158 depends on EXPERIMENTAL && INET && BROKEN
159 select NLS
160 select KEYS
161 select FSCACHE
162 select DNS_RESOLVER
163
164 help
165 This enables experimental support for the SMB2 (Server Message Block
166 version 2) protocol. The SMB2 protocol is the successor to the
167 popular CIFS and SMB network file sharing protocols. SMB2 is the
168 native file sharing mechanism for recent versions of Windows
169 operating systems (since Vista). SMB2 enablement will eventually
170 allow users better performance, security and features, than would be
171 possible with cifs. Note that smb2 mount options also are simpler
172 (compared to cifs) due to protocol improvements.
173
174 Unless you are a developer or tester, say N.
175
176config CIFS_NFSD_EXPORT 156config CIFS_NFSD_EXPORT
177 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)" 157 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
178 depends on CIFS && EXPERIMENTAL 158 depends on CIFS && EXPERIMENTAL
diff --git a/fs/cifs/README b/fs/cifs/README
index 4a3ca0e5ca24..c5c2c5e5f0f2 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -457,6 +457,9 @@ A partial list of the supported mount options follows:
457 otherwise - read from the server. All written data are stored 457 otherwise - read from the server. All written data are stored
458 in the cache, but if the client doesn't have Exclusive Oplock, 458 in the cache, but if the client doesn't have Exclusive Oplock,
459 it writes the data to the server. 459 it writes the data to the server.
460 rwpidforward Forward pid of a process who opened a file to any read or write
461 operation on that file. This prevent applications like WINE
462 from failing on read and write if we use mandatory brlock style.
460 acl Allow setfacl and getfacl to manage posix ACLs if server 463 acl Allow setfacl and getfacl to manage posix ACLs if server
461 supports them. (default) 464 supports them. (default)
462 noacl Do not allow setfacl and getfacl calls on this mount 465 noacl Do not allow setfacl and getfacl calls on this mount
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index 53d57a3fe427..dd8584d35a14 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -146,7 +146,7 @@ static char *extract_sharename(const char *treename)
146static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer, 146static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer,
147 uint16_t maxbuf) 147 uint16_t maxbuf)
148{ 148{
149 const struct cifsTconInfo *tcon = cookie_netfs_data; 149 const struct cifs_tcon *tcon = cookie_netfs_data;
150 char *sharename; 150 char *sharename;
151 uint16_t len; 151 uint16_t len;
152 152
@@ -173,7 +173,7 @@ cifs_fscache_super_get_aux(const void *cookie_netfs_data, void *buffer,
173 uint16_t maxbuf) 173 uint16_t maxbuf)
174{ 174{
175 struct cifs_fscache_super_auxdata auxdata; 175 struct cifs_fscache_super_auxdata auxdata;
176 const struct cifsTconInfo *tcon = cookie_netfs_data; 176 const struct cifs_tcon *tcon = cookie_netfs_data;
177 177
178 memset(&auxdata, 0, sizeof(auxdata)); 178 memset(&auxdata, 0, sizeof(auxdata));
179 auxdata.resource_id = tcon->resource_id; 179 auxdata.resource_id = tcon->resource_id;
@@ -192,7 +192,7 @@ fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
192 uint16_t datalen) 192 uint16_t datalen)
193{ 193{
194 struct cifs_fscache_super_auxdata auxdata; 194 struct cifs_fscache_super_auxdata auxdata;
195 const struct cifsTconInfo *tcon = cookie_netfs_data; 195 const struct cifs_tcon *tcon = cookie_netfs_data;
196 196
197 if (datalen != sizeof(auxdata)) 197 if (datalen != sizeof(auxdata))
198 return FSCACHE_CHECKAUX_OBSOLETE; 198 return FSCACHE_CHECKAUX_OBSOLETE;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 18f4272d9047..2fe3cf13b2e9 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -110,8 +110,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
110 struct list_head *tmp1, *tmp2, *tmp3; 110 struct list_head *tmp1, *tmp2, *tmp3;
111 struct mid_q_entry *mid_entry; 111 struct mid_q_entry *mid_entry;
112 struct TCP_Server_Info *server; 112 struct TCP_Server_Info *server;
113 struct cifsSesInfo *ses; 113 struct cifs_ses *ses;
114 struct cifsTconInfo *tcon; 114 struct cifs_tcon *tcon;
115 int i, j; 115 int i, j;
116 __u32 dev_type; 116 __u32 dev_type;
117 117
@@ -152,7 +152,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
152 tcp_ses_list); 152 tcp_ses_list);
153 i++; 153 i++;
154 list_for_each(tmp2, &server->smb_ses_list) { 154 list_for_each(tmp2, &server->smb_ses_list) {
155 ses = list_entry(tmp2, struct cifsSesInfo, 155 ses = list_entry(tmp2, struct cifs_ses,
156 smb_ses_list); 156 smb_ses_list);
157 if ((ses->serverDomain == NULL) || 157 if ((ses->serverDomain == NULL) ||
158 (ses->serverOS == NULL) || 158 (ses->serverOS == NULL) ||
@@ -171,7 +171,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
171 seq_printf(m, "TCP status: %d\n\tLocal Users To " 171 seq_printf(m, "TCP status: %d\n\tLocal Users To "
172 "Server: %d SecMode: 0x%x Req On Wire: %d", 172 "Server: %d SecMode: 0x%x Req On Wire: %d",
173 server->tcpStatus, server->srv_count, 173 server->tcpStatus, server->srv_count,
174 server->secMode, 174 server->sec_mode,
175 atomic_read(&server->inFlight)); 175 atomic_read(&server->inFlight));
176 176
177#ifdef CONFIG_CIFS_STATS2 177#ifdef CONFIG_CIFS_STATS2
@@ -183,7 +183,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
183 seq_puts(m, "\n\tShares:"); 183 seq_puts(m, "\n\tShares:");
184 j = 0; 184 j = 0;
185 list_for_each(tmp3, &ses->tcon_list) { 185 list_for_each(tmp3, &ses->tcon_list) {
186 tcon = list_entry(tmp3, struct cifsTconInfo, 186 tcon = list_entry(tmp3, struct cifs_tcon,
187 tcon_list); 187 tcon_list);
188 ++j; 188 ++j;
189 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); 189 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
@@ -256,8 +256,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
256 int rc; 256 int rc;
257 struct list_head *tmp1, *tmp2, *tmp3; 257 struct list_head *tmp1, *tmp2, *tmp3;
258 struct TCP_Server_Info *server; 258 struct TCP_Server_Info *server;
259 struct cifsSesInfo *ses; 259 struct cifs_ses *ses;
260 struct cifsTconInfo *tcon; 260 struct cifs_tcon *tcon;
261 261
262 rc = get_user(c, buffer); 262 rc = get_user(c, buffer);
263 if (rc) 263 if (rc)
@@ -273,11 +273,11 @@ static ssize_t cifs_stats_proc_write(struct file *file,
273 server = list_entry(tmp1, struct TCP_Server_Info, 273 server = list_entry(tmp1, struct TCP_Server_Info,
274 tcp_ses_list); 274 tcp_ses_list);
275 list_for_each(tmp2, &server->smb_ses_list) { 275 list_for_each(tmp2, &server->smb_ses_list) {
276 ses = list_entry(tmp2, struct cifsSesInfo, 276 ses = list_entry(tmp2, struct cifs_ses,
277 smb_ses_list); 277 smb_ses_list);
278 list_for_each(tmp3, &ses->tcon_list) { 278 list_for_each(tmp3, &ses->tcon_list) {
279 tcon = list_entry(tmp3, 279 tcon = list_entry(tmp3,
280 struct cifsTconInfo, 280 struct cifs_tcon,
281 tcon_list); 281 tcon_list);
282 atomic_set(&tcon->num_smbs_sent, 0); 282 atomic_set(&tcon->num_smbs_sent, 0);
283 atomic_set(&tcon->num_writes, 0); 283 atomic_set(&tcon->num_writes, 0);
@@ -312,8 +312,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
312 int i; 312 int i;
313 struct list_head *tmp1, *tmp2, *tmp3; 313 struct list_head *tmp1, *tmp2, *tmp3;
314 struct TCP_Server_Info *server; 314 struct TCP_Server_Info *server;
315 struct cifsSesInfo *ses; 315 struct cifs_ses *ses;
316 struct cifsTconInfo *tcon; 316 struct cifs_tcon *tcon;
317 317
318 seq_printf(m, 318 seq_printf(m,
319 "Resources in use\nCIFS Session: %d\n", 319 "Resources in use\nCIFS Session: %d\n",
@@ -346,11 +346,11 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
346 server = list_entry(tmp1, struct TCP_Server_Info, 346 server = list_entry(tmp1, struct TCP_Server_Info,
347 tcp_ses_list); 347 tcp_ses_list);
348 list_for_each(tmp2, &server->smb_ses_list) { 348 list_for_each(tmp2, &server->smb_ses_list) {
349 ses = list_entry(tmp2, struct cifsSesInfo, 349 ses = list_entry(tmp2, struct cifs_ses,
350 smb_ses_list); 350 smb_ses_list);
351 list_for_each(tmp3, &ses->tcon_list) { 351 list_for_each(tmp3, &ses->tcon_list) {
352 tcon = list_entry(tmp3, 352 tcon = list_entry(tmp3,
353 struct cifsTconInfo, 353 struct cifs_tcon,
354 tcon_list); 354 tcon_list);
355 i++; 355 i++;
356 seq_printf(m, "\n%d) %s", i, tcon->treeName); 356 seq_printf(m, "\n%d) %s", i, tcon->treeName);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 2b68ac57d97d..8d8f28c94c0f 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -272,7 +272,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
272 struct dfs_info3_param *referrals = NULL; 272 struct dfs_info3_param *referrals = NULL;
273 unsigned int num_referrals = 0; 273 unsigned int num_referrals = 0;
274 struct cifs_sb_info *cifs_sb; 274 struct cifs_sb_info *cifs_sb;
275 struct cifsSesInfo *ses; 275 struct cifs_ses *ses;
276 char *full_path; 276 char *full_path;
277 int xid, i; 277 int xid, i;
278 int rc; 278 int rc;
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index a9d5692e0c20..ffb1459dc6ec 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -41,6 +41,7 @@
41#define CIFS_MOUNT_MF_SYMLINKS 0x10000 /* Minshall+French Symlinks enabled */ 41#define CIFS_MOUNT_MF_SYMLINKS 0x10000 /* Minshall+French Symlinks enabled */
42#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ 42#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */
43#define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */ 43#define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */
44#define CIFS_MOUNT_RWPIDFORWARD 0x80000 /* use pid forwarding for rw */
44 45
45struct cifs_sb_info { 46struct cifs_sb_info {
46 struct rb_root tlink_tree; 47 struct rb_root tlink_tree;
@@ -56,8 +57,6 @@ struct cifs_sb_info {
56 mode_t mnt_file_mode; 57 mode_t mnt_file_mode;
57 mode_t mnt_dir_mode; 58 mode_t mnt_dir_mode;
58 unsigned int mnt_cifs_flags; 59 unsigned int mnt_cifs_flags;
59 int prepathlen;
60 char *prepath; /* relative path under the share to mount to */
61 char *mountdata; /* options received at mount time or via DFS refs */ 60 char *mountdata; /* options received at mount time or via DFS refs */
62 struct backing_dev_info bdi; 61 struct backing_dev_info bdi;
63 struct delayed_work prune_tlinks; 62 struct delayed_work prune_tlinks;
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 33d221394aca..2272fd5fe5b7 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -95,7 +95,7 @@ struct key_type cifs_spnego_key_type = {
95 95
96/* get a key struct with a SPNEGO security blob, suitable for session setup */ 96/* get a key struct with a SPNEGO security blob, suitable for session setup */
97struct key * 97struct key *
98cifs_get_spnego_key(struct cifsSesInfo *sesInfo) 98cifs_get_spnego_key(struct cifs_ses *sesInfo)
99{ 99{
100 struct TCP_Server_Info *server = sesInfo->server; 100 struct TCP_Server_Info *server = sesInfo->server;
101 struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr; 101 struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr;
diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h
index e4041ec4d712..31bef9ee078b 100644
--- a/fs/cifs/cifs_spnego.h
+++ b/fs/cifs/cifs_spnego.h
@@ -41,7 +41,7 @@ struct cifs_spnego_msg {
41 41
42#ifdef __KERNEL__ 42#ifdef __KERNEL__
43extern struct key_type cifs_spnego_key_type; 43extern struct key_type cifs_spnego_key_type;
44extern struct key *cifs_get_spnego_key(struct cifsSesInfo *sesInfo); 44extern struct key *cifs_get_spnego_key(struct cifs_ses *sesInfo);
45#endif /* KERNEL */ 45#endif /* KERNEL */
46 46
47#endif /* _CIFS_SPNEGO_H */ 47#endif /* _CIFS_SPNEGO_H */
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index f3c6fb9942ac..8f1700623b41 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -38,7 +38,7 @@ static const struct cifs_sid sid_everyone = {
38 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; 38 1, 1, {0, 0, 0, 0, 0, 1}, {0} };
39/* security id for Authenticated Users system group */ 39/* security id for Authenticated Users system group */
40static const struct cifs_sid sid_authusers = { 40static const struct cifs_sid sid_authusers = {
41 1, 1, {0, 0, 0, 0, 0, 5}, {11} }; 41 1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11)} };
42/* group users */ 42/* group users */
43static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} }; 43static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
44 44
@@ -458,7 +458,8 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
458 if (num_subauth) { 458 if (num_subauth) {
459 for (i = 0; i < num_subauth; ++i) { 459 for (i = 0; i < num_subauth; ++i) {
460 if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) { 460 if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) {
461 if (ctsid->sub_auth[i] > cwsid->sub_auth[i]) 461 if (le32_to_cpu(ctsid->sub_auth[i]) >
462 le32_to_cpu(cwsid->sub_auth[i]))
462 return 1; 463 return 1;
463 else 464 else
464 return -1; 465 return -1;
@@ -945,7 +946,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
945 int oplock = 0; 946 int oplock = 0;
946 int xid, rc; 947 int xid, rc;
947 __u16 fid; 948 __u16 fid;
948 struct cifsTconInfo *tcon; 949 struct cifs_tcon *tcon;
949 struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); 950 struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
950 951
951 if (IS_ERR(tlink)) 952 if (IS_ERR(tlink))
@@ -1013,7 +1014,7 @@ static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path,
1013 int oplock = 0; 1014 int oplock = 0;
1014 int xid, rc; 1015 int xid, rc;
1015 __u16 fid; 1016 __u16 fid;
1016 struct cifsTconInfo *tcon; 1017 struct cifs_tcon *tcon;
1017 struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); 1018 struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
1018 1019
1019 if (IS_ERR(tlink)) 1020 if (IS_ERR(tlink))
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 45c3f78c8f81..dfbd9f1f373d 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -229,7 +229,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
229} 229}
230 230
231/* first calculate 24 bytes ntlm response and then 16 byte session key */ 231/* first calculate 24 bytes ntlm response and then 16 byte session key */
232int setup_ntlm_response(struct cifsSesInfo *ses) 232int setup_ntlm_response(struct cifs_ses *ses)
233{ 233{
234 int rc = 0; 234 int rc = 0;
235 unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE; 235 unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
@@ -312,7 +312,7 @@ int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
312 * Allocate domain name which gets freed when session struct is deallocated. 312 * Allocate domain name which gets freed when session struct is deallocated.
313 */ 313 */
314static int 314static int
315build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp) 315build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp)
316{ 316{
317 unsigned int dlen; 317 unsigned int dlen;
318 unsigned int wlen; 318 unsigned int wlen;
@@ -400,7 +400,7 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
400 * about target string i.e. for some, just user name might suffice. 400 * about target string i.e. for some, just user name might suffice.
401 */ 401 */
402static int 402static int
403find_domain_name(struct cifsSesInfo *ses, const struct nls_table *nls_cp) 403find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp)
404{ 404{
405 unsigned int attrsize; 405 unsigned int attrsize;
406 unsigned int type; 406 unsigned int type;
@@ -445,7 +445,7 @@ find_domain_name(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
445 return 0; 445 return 0;
446} 446}
447 447
448static int calc_ntlmv2_hash(struct cifsSesInfo *ses, char *ntlmv2_hash, 448static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
449 const struct nls_table *nls_cp) 449 const struct nls_table *nls_cp)
450{ 450{
451 int rc = 0; 451 int rc = 0;
@@ -527,7 +527,7 @@ calc_exit_2:
527} 527}
528 528
529static int 529static int
530CalcNTLMv2_response(const struct cifsSesInfo *ses, char *ntlmv2_hash) 530CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
531{ 531{
532 int rc; 532 int rc;
533 unsigned int offset = CIFS_SESS_KEY_SIZE + 8; 533 unsigned int offset = CIFS_SESS_KEY_SIZE + 8;
@@ -563,7 +563,7 @@ CalcNTLMv2_response(const struct cifsSesInfo *ses, char *ntlmv2_hash)
563 563
564 564
565int 565int
566setup_ntlmv2_rsp(struct cifsSesInfo *ses, const struct nls_table *nls_cp) 566setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
567{ 567{
568 int rc; 568 int rc;
569 int baselen; 569 int baselen;
@@ -649,7 +649,7 @@ setup_ntlmv2_rsp_ret:
649} 649}
650 650
651int 651int
652calc_seckey(struct cifsSesInfo *ses) 652calc_seckey(struct cifs_ses *ses)
653{ 653{
654 int rc; 654 int rc;
655 struct crypto_blkcipher *tfm_arc4; 655 struct crypto_blkcipher *tfm_arc4;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 493b74ca5648..989442dcfb45 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -104,46 +104,25 @@ cifs_sb_deactive(struct super_block *sb)
104} 104}
105 105
106static int 106static int
107cifs_read_super(struct super_block *sb, void *data, 107cifs_read_super(struct super_block *sb, struct smb_vol *volume_info,
108 const char *devname, int silent) 108 const char *devname, int silent)
109{ 109{
110 struct inode *inode; 110 struct inode *inode;
111 struct cifs_sb_info *cifs_sb; 111 struct cifs_sb_info *cifs_sb;
112 int rc = 0; 112 int rc = 0;
113 113
114 /* BB should we make this contingent on mount parm? */
115 sb->s_flags |= MS_NODIRATIME | MS_NOATIME;
116 sb->s_fs_info = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL);
117 cifs_sb = CIFS_SB(sb); 114 cifs_sb = CIFS_SB(sb);
118 if (cifs_sb == NULL)
119 return -ENOMEM;
120 115
121 spin_lock_init(&cifs_sb->tlink_tree_lock); 116 spin_lock_init(&cifs_sb->tlink_tree_lock);
122 cifs_sb->tlink_tree = RB_ROOT; 117 cifs_sb->tlink_tree = RB_ROOT;
123 118
124 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); 119 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
125 if (rc) { 120 if (rc)
126 kfree(cifs_sb);
127 return rc; 121 return rc;
128 }
129 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
130 122
131 /* 123 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
132 * Copy mount params to sb for use in submounts. Better to do
133 * the copy here and deal with the error before cleanup gets
134 * complicated post-mount.
135 */
136 if (data) {
137 cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
138 if (cifs_sb->mountdata == NULL) {
139 bdi_destroy(&cifs_sb->bdi);
140 kfree(sb->s_fs_info);
141 sb->s_fs_info = NULL;
142 return -ENOMEM;
143 }
144 }
145 124
146 rc = cifs_mount(sb, cifs_sb, devname); 125 rc = cifs_mount(sb, cifs_sb, volume_info, devname);
147 126
148 if (rc) { 127 if (rc) {
149 if (!silent) 128 if (!silent)
@@ -194,15 +173,7 @@ out_no_root:
194 cifs_umount(sb, cifs_sb); 173 cifs_umount(sb, cifs_sb);
195 174
196out_mount_failed: 175out_mount_failed:
197 if (cifs_sb) { 176 bdi_destroy(&cifs_sb->bdi);
198 if (cifs_sb->mountdata) {
199 kfree(cifs_sb->mountdata);
200 cifs_sb->mountdata = NULL;
201 }
202 unload_nls(cifs_sb->local_nls);
203 bdi_destroy(&cifs_sb->bdi);
204 kfree(cifs_sb);
205 }
206 return rc; 177 return rc;
207} 178}
208 179
@@ -237,7 +208,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
237{ 208{
238 struct super_block *sb = dentry->d_sb; 209 struct super_block *sb = dentry->d_sb;
239 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 210 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
240 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 211 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
241 int rc = -EOPNOTSUPP; 212 int rc = -EOPNOTSUPP;
242 int xid; 213 int xid;
243 214
@@ -390,7 +361,7 @@ static int
390cifs_show_options(struct seq_file *s, struct vfsmount *m) 361cifs_show_options(struct seq_file *s, struct vfsmount *m)
391{ 362{
392 struct cifs_sb_info *cifs_sb = CIFS_SB(m->mnt_sb); 363 struct cifs_sb_info *cifs_sb = CIFS_SB(m->mnt_sb);
393 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 364 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
394 struct sockaddr *srcaddr; 365 struct sockaddr *srcaddr;
395 srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; 366 srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr;
396 367
@@ -444,14 +415,20 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
444 seq_printf(s, ",nocase"); 415 seq_printf(s, ",nocase");
445 if (tcon->retry) 416 if (tcon->retry)
446 seq_printf(s, ",hard"); 417 seq_printf(s, ",hard");
447 if (cifs_sb->prepath) 418 if (tcon->unix_ext)
448 seq_printf(s, ",prepath=%s", cifs_sb->prepath); 419 seq_printf(s, ",unix");
420 else
421 seq_printf(s, ",nounix");
449 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) 422 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
450 seq_printf(s, ",posixpaths"); 423 seq_printf(s, ",posixpaths");
451 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) 424 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
452 seq_printf(s, ",setuids"); 425 seq_printf(s, ",setuids");
453 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) 426 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
454 seq_printf(s, ",serverino"); 427 seq_printf(s, ",serverino");
428 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
429 seq_printf(s, ",rwpidforward");
430 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL)
431 seq_printf(s, ",forcemand");
455 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) 432 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
456 seq_printf(s, ",directio"); 433 seq_printf(s, ",directio");
457 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 434 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
@@ -484,7 +461,7 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
484static void cifs_umount_begin(struct super_block *sb) 461static void cifs_umount_begin(struct super_block *sb)
485{ 462{
486 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 463 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
487 struct cifsTconInfo *tcon; 464 struct cifs_tcon *tcon;
488 465
489 if (cifs_sb == NULL) 466 if (cifs_sb == NULL)
490 return; 467 return;
@@ -559,29 +536,189 @@ static const struct super_operations cifs_super_ops = {
559#endif 536#endif
560}; 537};
561 538
539/*
540 * Get root dentry from superblock according to prefix path mount option.
541 * Return dentry with refcount + 1 on success and NULL otherwise.
542 */
543static struct dentry *
544cifs_get_root(struct smb_vol *vol, struct super_block *sb)
545{
546 int xid, rc;
547 struct inode *inode;
548 struct qstr name;
549 struct dentry *dparent = NULL, *dchild = NULL, *alias;
550 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
551 unsigned int i, full_len, len;
552 char *full_path = NULL, *pstart;
553 char sep;
554
555 full_path = cifs_build_path_to_root(vol, cifs_sb,
556 cifs_sb_master_tcon(cifs_sb));
557 if (full_path == NULL)
558 return NULL;
559
560 cFYI(1, "Get root dentry for %s", full_path);
561
562 xid = GetXid();
563 sep = CIFS_DIR_SEP(cifs_sb);
564 dparent = dget(sb->s_root);
565 full_len = strlen(full_path);
566 full_path[full_len] = sep;
567 pstart = full_path + 1;
568
569 for (i = 1, len = 0; i <= full_len; i++) {
570 if (full_path[i] != sep || !len) {
571 len++;
572 continue;
573 }
574
575 full_path[i] = 0;
576 cFYI(1, "get dentry for %s", pstart);
577
578 name.name = pstart;
579 name.len = len;
580 name.hash = full_name_hash(pstart, len);
581 dchild = d_lookup(dparent, &name);
582 if (dchild == NULL) {
583 cFYI(1, "not exists");
584 dchild = d_alloc(dparent, &name);
585 if (dchild == NULL) {
586 dput(dparent);
587 dparent = NULL;
588 goto out;
589 }
590 }
591
592 cFYI(1, "get inode");
593 if (dchild->d_inode == NULL) {
594 cFYI(1, "not exists");
595 inode = NULL;
596 if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
597 rc = cifs_get_inode_info_unix(&inode, full_path,
598 sb, xid);
599 else
600 rc = cifs_get_inode_info(&inode, full_path,
601 NULL, sb, xid, NULL);
602 if (rc) {
603 dput(dchild);
604 dput(dparent);
605 dparent = NULL;
606 goto out;
607 }
608 alias = d_materialise_unique(dchild, inode);
609 if (alias != NULL) {
610 dput(dchild);
611 if (IS_ERR(alias)) {
612 dput(dparent);
613 dparent = NULL;
614 goto out;
615 }
616 dchild = alias;
617 }
618 }
619 cFYI(1, "parent %p, child %p", dparent, dchild);
620
621 dput(dparent);
622 dparent = dchild;
623 len = 0;
624 pstart = full_path + i + 1;
625 full_path[i] = sep;
626 }
627out:
628 _FreeXid(xid);
629 kfree(full_path);
630 return dparent;
631}
632
562static struct dentry * 633static struct dentry *
563cifs_do_mount(struct file_system_type *fs_type, 634cifs_do_mount(struct file_system_type *fs_type,
564 int flags, const char *dev_name, void *data) 635 int flags, const char *dev_name, void *data)
565{ 636{
566 int rc; 637 int rc;
567 struct super_block *sb; 638 struct super_block *sb;
568 639 struct cifs_sb_info *cifs_sb;
569 sb = sget(fs_type, NULL, set_anon_super, NULL); 640 struct smb_vol *volume_info;
641 struct cifs_mnt_data mnt_data;
642 struct dentry *root;
570 643
571 cFYI(1, "Devname: %s flags: %d ", dev_name, flags); 644 cFYI(1, "Devname: %s flags: %d ", dev_name, flags);
572 645
573 if (IS_ERR(sb)) 646 rc = cifs_setup_volume_info(&volume_info, (char *)data, dev_name);
574 return ERR_CAST(sb); 647 if (rc)
648 return ERR_PTR(rc);
649
650 cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL);
651 if (cifs_sb == NULL) {
652 root = ERR_PTR(-ENOMEM);
653 goto out;
654 }
655
656 cifs_setup_cifs_sb(volume_info, cifs_sb);
657
658 mnt_data.vol = volume_info;
659 mnt_data.cifs_sb = cifs_sb;
660 mnt_data.flags = flags;
661
662 sb = sget(fs_type, cifs_match_super, set_anon_super, &mnt_data);
663 if (IS_ERR(sb)) {
664 root = ERR_CAST(sb);
665 goto out_cifs_sb;
666 }
667
668 if (sb->s_fs_info) {
669 cFYI(1, "Use existing superblock");
670 goto out_shared;
671 }
672
673 /*
674 * Copy mount params for use in submounts. Better to do
675 * the copy here and deal with the error before cleanup gets
676 * complicated post-mount.
677 */
678 cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
679 if (cifs_sb->mountdata == NULL) {
680 root = ERR_PTR(-ENOMEM);
681 goto out_super;
682 }
575 683
576 sb->s_flags = flags; 684 sb->s_flags = flags;
685 /* BB should we make this contingent on mount parm? */
686 sb->s_flags |= MS_NODIRATIME | MS_NOATIME;
687 sb->s_fs_info = cifs_sb;
577 688
578 rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0); 689 rc = cifs_read_super(sb, volume_info, dev_name,
690 flags & MS_SILENT ? 1 : 0);
579 if (rc) { 691 if (rc) {
580 deactivate_locked_super(sb); 692 root = ERR_PTR(rc);
581 return ERR_PTR(rc); 693 goto out_super;
582 } 694 }
695
583 sb->s_flags |= MS_ACTIVE; 696 sb->s_flags |= MS_ACTIVE;
584 return dget(sb->s_root); 697
698 root = cifs_get_root(volume_info, sb);
699 if (root == NULL)
700 goto out_super;
701
702 cFYI(1, "dentry root is: %p", root);
703 goto out;
704
705out_shared:
706 root = cifs_get_root(volume_info, sb);
707 if (root)
708 cFYI(1, "dentry root is: %p", root);
709 goto out;
710
711out_super:
712 kfree(cifs_sb->mountdata);
713 deactivate_locked_super(sb);
714
715out_cifs_sb:
716 unload_nls(cifs_sb->local_nls);
717 kfree(cifs_sb);
718
719out:
720 cifs_cleanup_volume_info(&volume_info);
721 return root;
585} 722}
586 723
587static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 724static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 76b4517e74b0..6255fa812c7a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -155,6 +155,81 @@ struct cifs_cred {
155 ***************************************************************** 155 *****************************************************************
156 */ 156 */
157 157
158struct smb_vol {
159 char *username;
160 char *password;
161 char *domainname;
162 char *UNC;
163 char *UNCip;
164 char *iocharset; /* local code page for mapping to and from Unicode */
165 char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
166 char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
167 uid_t cred_uid;
168 uid_t linux_uid;
169 gid_t linux_gid;
170 mode_t file_mode;
171 mode_t dir_mode;
172 unsigned secFlg;
173 bool retry:1;
174 bool intr:1;
175 bool setuids:1;
176 bool override_uid:1;
177 bool override_gid:1;
178 bool dynperm:1;
179 bool noperm:1;
180 bool no_psx_acl:1; /* set if posix acl support should be disabled */
181 bool cifs_acl:1;
182 bool no_xattr:1; /* set if xattr (EA) support should be disabled*/
183 bool server_ino:1; /* use inode numbers from server ie UniqueId */
184 bool direct_io:1;
185 bool strict_io:1; /* strict cache behavior */
186 bool remap:1; /* set to remap seven reserved chars in filenames */
187 bool posix_paths:1; /* unset to not ask for posix pathnames. */
188 bool no_linux_ext:1;
189 bool sfu_emul:1;
190 bool nullauth:1; /* attempt to authenticate with null user */
191 bool nocase:1; /* request case insensitive filenames */
192 bool nobrl:1; /* disable sending byte range locks to srv */
193 bool mand_lock:1; /* send mandatory not posix byte range lock reqs */
194 bool seal:1; /* request transport encryption on share */
195 bool nodfs:1; /* Do not request DFS, even if available */
196 bool local_lease:1; /* check leases only on local system, not remote */
197 bool noblocksnd:1;
198 bool noautotune:1;
199 bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
200 bool fsc:1; /* enable fscache */
201 bool mfsymlinks:1; /* use Minshall+French Symlinks */
202 bool multiuser:1;
203 bool rwpidforward:1; /* pid forward for read/write operations */
204 unsigned int rsize;
205 unsigned int wsize;
206 bool sockopt_tcp_nodelay:1;
207 unsigned short int port;
208 unsigned long actimeo; /* attribute cache timeout (jiffies) */
209 char *prepath;
210 struct sockaddr_storage srcaddr; /* allow binding to a local IP */
211 struct nls_table *local_nls;
212};
213
214#define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \
215 CIFS_MOUNT_SERVER_INUM | CIFS_MOUNT_DIRECT_IO | \
216 CIFS_MOUNT_NO_XATTR | CIFS_MOUNT_MAP_SPECIAL_CHR | \
217 CIFS_MOUNT_UNX_EMUL | CIFS_MOUNT_NO_BRL | \
218 CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_OVERR_UID | \
219 CIFS_MOUNT_OVERR_GID | CIFS_MOUNT_DYNPERM | \
220 CIFS_MOUNT_NOPOSIXBRL | CIFS_MOUNT_NOSSYNC | \
221 CIFS_MOUNT_FSCACHE | CIFS_MOUNT_MF_SYMLINKS | \
222 CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_STRICT_IO)
223
224#define CIFS_MS_MASK (MS_RDONLY | MS_MANDLOCK | MS_NOEXEC | MS_NOSUID | \
225 MS_NODEV | MS_SYNCHRONOUS)
226
227struct cifs_mnt_data {
228 struct cifs_sb_info *cifs_sb;
229 struct smb_vol *vol;
230 int flags;
231};
232
158struct TCP_Server_Info { 233struct TCP_Server_Info {
159 struct list_head tcp_ses_list; 234 struct list_head tcp_ses_list;
160 struct list_head smb_ses_list; 235 struct list_head smb_ses_list;
@@ -179,7 +254,7 @@ struct TCP_Server_Info {
179 struct mutex srv_mutex; 254 struct mutex srv_mutex;
180 struct task_struct *tsk; 255 struct task_struct *tsk;
181 char server_GUID[16]; 256 char server_GUID[16];
182 char secMode; 257 char sec_mode;
183 bool session_estab; /* mark when very first sess is established */ 258 bool session_estab; /* mark when very first sess is established */
184 u16 dialect; /* dialect index that server chose */ 259 u16 dialect; /* dialect index that server chose */
185 enum securityEnum secType; 260 enum securityEnum secType;
@@ -254,7 +329,7 @@ static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
254/* 329/*
255 * Session structure. One of these for each uid session with a particular host 330 * Session structure. One of these for each uid session with a particular host
256 */ 331 */
257struct cifsSesInfo { 332struct cifs_ses {
258 struct list_head smb_ses_list; 333 struct list_head smb_ses_list;
259 struct list_head tcon_list; 334 struct list_head tcon_list;
260 struct mutex session_mutex; 335 struct mutex session_mutex;
@@ -294,11 +369,11 @@ struct cifsSesInfo {
294 * there is one of these for each connection to a resource on a particular 369 * there is one of these for each connection to a resource on a particular
295 * session 370 * session
296 */ 371 */
297struct cifsTconInfo { 372struct cifs_tcon {
298 struct list_head tcon_list; 373 struct list_head tcon_list;
299 int tc_count; 374 int tc_count;
300 struct list_head openFileList; 375 struct list_head openFileList;
301 struct cifsSesInfo *ses; /* pointer to session associated with */ 376 struct cifs_ses *ses; /* pointer to session associated with */
302 char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */ 377 char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
303 char *nativeFileSystem; 378 char *nativeFileSystem;
304 char *password; /* for share-level security */ 379 char *password; /* for share-level security */
@@ -380,12 +455,12 @@ struct tcon_link {
380#define TCON_LINK_IN_TREE 2 455#define TCON_LINK_IN_TREE 2
381 unsigned long tl_time; 456 unsigned long tl_time;
382 atomic_t tl_count; 457 atomic_t tl_count;
383 struct cifsTconInfo *tl_tcon; 458 struct cifs_tcon *tl_tcon;
384}; 459};
385 460
386extern struct tcon_link *cifs_sb_tlink(struct cifs_sb_info *cifs_sb); 461extern struct tcon_link *cifs_sb_tlink(struct cifs_sb_info *cifs_sb);
387 462
388static inline struct cifsTconInfo * 463static inline struct cifs_tcon *
389tlink_tcon(struct tcon_link *tlink) 464tlink_tcon(struct tcon_link *tlink)
390{ 465{
391 return tlink->tl_tcon; 466 return tlink->tl_tcon;
@@ -402,7 +477,7 @@ cifs_get_tlink(struct tcon_link *tlink)
402} 477}
403 478
404/* This function is always expected to succeed */ 479/* This function is always expected to succeed */
405extern struct cifsTconInfo *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb); 480extern struct cifs_tcon *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb);
406 481
407/* 482/*
408 * This info hangs off the cifsFileInfo structure, pointed to by llist. 483 * This info hangs off the cifsFileInfo structure, pointed to by llist.
@@ -455,6 +530,14 @@ struct cifsFileInfo {
455 struct work_struct oplock_break; /* work for oplock breaks */ 530 struct work_struct oplock_break; /* work for oplock breaks */
456}; 531};
457 532
533struct cifs_io_parms {
534 __u16 netfid;
535 __u32 pid;
536 __u64 offset;
537 unsigned int length;
538 struct cifs_tcon *tcon;
539};
540
458/* 541/*
459 * Take a reference on the file private data. Must be called with 542 * Take a reference on the file private data. Must be called with
460 * cifs_file_list_lock held. 543 * cifs_file_list_lock held.
@@ -509,10 +592,30 @@ static inline char CIFS_DIR_SEP(const struct cifs_sb_info *cifs_sb)
509 return '\\'; 592 return '\\';
510} 593}
511 594
595static inline void
596convert_delimiter(char *path, char delim)
597{
598 int i;
599 char old_delim;
600
601 if (path == NULL)
602 return;
603
604 if (delim == '/')
605 old_delim = '\\';
606 else
607 old_delim = '/';
608
609 for (i = 0; path[i] != '\0'; i++) {
610 if (path[i] == old_delim)
611 path[i] = delim;
612 }
613}
614
512#ifdef CONFIG_CIFS_STATS 615#ifdef CONFIG_CIFS_STATS
513#define cifs_stats_inc atomic_inc 616#define cifs_stats_inc atomic_inc
514 617
515static inline void cifs_stats_bytes_written(struct cifsTconInfo *tcon, 618static inline void cifs_stats_bytes_written(struct cifs_tcon *tcon,
516 unsigned int bytes) 619 unsigned int bytes)
517{ 620{
518 if (bytes) { 621 if (bytes) {
@@ -522,7 +625,7 @@ static inline void cifs_stats_bytes_written(struct cifsTconInfo *tcon,
522 } 625 }
523} 626}
524 627
525static inline void cifs_stats_bytes_read(struct cifsTconInfo *tcon, 628static inline void cifs_stats_bytes_read(struct cifs_tcon *tcon,
526 unsigned int bytes) 629 unsigned int bytes)
527{ 630{
528 spin_lock(&tcon->stat_lock); 631 spin_lock(&tcon->stat_lock);
@@ -543,9 +646,8 @@ struct mid_q_entry;
543 * This is the prototype for the mid callback function. When creating one, 646 * This is the prototype for the mid callback function. When creating one,
544 * take special care to avoid deadlocks. Things to bear in mind: 647 * take special care to avoid deadlocks. Things to bear in mind:
545 * 648 *
546 * - it will be called by cifsd 649 * - it will be called by cifsd, with no locks held
547 * - the GlobalMid_Lock will be held 650 * - the mid will be removed from any lists
548 * - the mid will be removed from the pending_mid_q list
549 */ 651 */
550typedef void (mid_callback_t)(struct mid_q_entry *mid); 652typedef void (mid_callback_t)(struct mid_q_entry *mid);
551 653
@@ -573,7 +675,7 @@ struct mid_q_entry {
573struct oplock_q_entry { 675struct oplock_q_entry {
574 struct list_head qhead; 676 struct list_head qhead;
575 struct inode *pinode; 677 struct inode *pinode;
576 struct cifsTconInfo *tcon; 678 struct cifs_tcon *tcon;
577 __u16 netfid; 679 __u16 netfid;
578}; 680};
579 681
@@ -656,6 +758,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
656#define MID_RESPONSE_RECEIVED 4 758#define MID_RESPONSE_RECEIVED 4
657#define MID_RETRY_NEEDED 8 /* session closed while this request out */ 759#define MID_RETRY_NEEDED 8 /* session closed while this request out */
658#define MID_RESPONSE_MALFORMED 0x10 760#define MID_RESPONSE_MALFORMED 0x10
761#define MID_SHUTDOWN 0x20
659 762
660/* Types of response buffer returned from SendReceive2 */ 763/* Types of response buffer returned from SendReceive2 */
661#define CIFS_NO_BUFFER 0 /* Response buffer not returned */ 764#define CIFS_NO_BUFFER 0 /* Response buffer not returned */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 6e69e06a30b3..953f84413c77 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -57,8 +57,9 @@ extern int init_cifs_idmap(void);
57extern void exit_cifs_idmap(void); 57extern void exit_cifs_idmap(void);
58extern void cifs_destroy_idmaptrees(void); 58extern void cifs_destroy_idmaptrees(void);
59extern char *build_path_from_dentry(struct dentry *); 59extern char *build_path_from_dentry(struct dentry *);
60extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb, 60extern char *cifs_build_path_to_root(struct smb_vol *vol,
61 struct cifsTconInfo *tcon); 61 struct cifs_sb_info *cifs_sb,
62 struct cifs_tcon *tcon);
62extern char *build_wildcard_path_from_dentry(struct dentry *direntry); 63extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
63extern char *cifs_compose_mount_options(const char *sb_mountdata, 64extern char *cifs_compose_mount_options(const char *sb_mountdata,
64 const char *fullpath, const struct dfs_info3_param *ref, 65 const char *fullpath, const struct dfs_info3_param *ref,
@@ -67,20 +68,22 @@ extern char *cifs_compose_mount_options(const char *sb_mountdata,
67extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, 68extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer,
68 struct TCP_Server_Info *server); 69 struct TCP_Server_Info *server);
69extern void DeleteMidQEntry(struct mid_q_entry *midEntry); 70extern void DeleteMidQEntry(struct mid_q_entry *midEntry);
70extern int cifs_call_async(struct TCP_Server_Info *server, 71extern int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
71 struct smb_hdr *in_buf, mid_callback_t *callback, 72 unsigned int nvec, mid_callback_t *callback,
72 void *cbdata); 73 void *cbdata, bool ignore_pend);
73extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *, 74extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *,
74 struct smb_hdr * /* input */ , 75 struct smb_hdr * /* input */ ,
75 struct smb_hdr * /* out */ , 76 struct smb_hdr * /* out */ ,
76 int * /* bytes returned */ , const int long_op); 77 int * /* bytes returned */ , const int long_op);
77extern int SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses, 78extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses,
78 struct smb_hdr *in_buf, int flags); 79 struct smb_hdr *in_buf, int flags);
79extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *, 80extern int cifs_check_receive(struct mid_q_entry *mid,
81 struct TCP_Server_Info *server, bool log_error);
82extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
80 struct kvec *, int /* nvec to send */, 83 struct kvec *, int /* nvec to send */,
81 int * /* type of buf returned */ , const int flags); 84 int * /* type of buf returned */ , const int flags);
82extern int SendReceiveBlockingLock(const unsigned int xid, 85extern int SendReceiveBlockingLock(const unsigned int xid,
83 struct cifsTconInfo *ptcon, 86 struct cifs_tcon *ptcon,
84 struct smb_hdr *in_buf , 87 struct smb_hdr *in_buf ,
85 struct smb_hdr *out_buf, 88 struct smb_hdr *out_buf,
86 int *bytes_returned); 89 int *bytes_returned);
@@ -99,14 +102,14 @@ extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
99extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port); 102extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port);
100extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, 103extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
101 const unsigned short int port); 104 const unsigned short int port);
102extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); 105extern int map_smb_to_linux_error(struct smb_hdr *smb, bool logErr);
103extern void header_assemble(struct smb_hdr *, char /* command */ , 106extern void header_assemble(struct smb_hdr *, char /* command */ ,
104 const struct cifsTconInfo *, int /* length of 107 const struct cifs_tcon *, int /* length of
105 fixed section (word count) in two byte units */); 108 fixed section (word count) in two byte units */);
106extern int small_smb_init_no_tc(const int smb_cmd, const int wct, 109extern int small_smb_init_no_tc(const int smb_cmd, const int wct,
107 struct cifsSesInfo *ses, 110 struct cifs_ses *ses,
108 void **request_buf); 111 void **request_buf);
109extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, 112extern int CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses,
110 const struct nls_table *nls_cp); 113 const struct nls_table *nls_cp);
111extern __u16 GetNextMid(struct TCP_Server_Info *server); 114extern __u16 GetNextMid(struct TCP_Server_Info *server);
112extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); 115extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
@@ -148,102 +151,108 @@ extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
148extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, 151extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
149 const char *); 152 const char *);
150 153
154extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
155 struct cifs_sb_info *cifs_sb);
156extern int cifs_match_super(struct super_block *, void *);
157extern void cifs_cleanup_volume_info(struct smb_vol **pvolume_info);
158extern int cifs_setup_volume_info(struct smb_vol **pvolume_info,
159 char *mount_data, const char *devname);
151extern int cifs_mount(struct super_block *, struct cifs_sb_info *, 160extern int cifs_mount(struct super_block *, struct cifs_sb_info *,
152 const char *); 161 struct smb_vol *, const char *);
153extern int cifs_umount(struct super_block *, struct cifs_sb_info *); 162extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
154extern void cifs_dfs_release_automount_timer(void); 163extern void cifs_dfs_release_automount_timer(void);
155void cifs_proc_init(void); 164void cifs_proc_init(void);
156void cifs_proc_clean(void); 165void cifs_proc_clean(void);
157 166
158extern int cifs_negotiate_protocol(unsigned int xid, 167extern int cifs_negotiate_protocol(unsigned int xid,
159 struct cifsSesInfo *ses); 168 struct cifs_ses *ses);
160extern int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses, 169extern int cifs_setup_session(unsigned int xid, struct cifs_ses *ses,
161 struct nls_table *nls_info); 170 struct nls_table *nls_info);
162extern int CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses); 171extern int CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses);
163 172
164extern int CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, 173extern int CIFSTCon(unsigned int xid, struct cifs_ses *ses,
165 const char *tree, struct cifsTconInfo *tcon, 174 const char *tree, struct cifs_tcon *tcon,
166 const struct nls_table *); 175 const struct nls_table *);
167 176
168extern int CIFSFindFirst(const int xid, struct cifsTconInfo *tcon, 177extern int CIFSFindFirst(const int xid, struct cifs_tcon *tcon,
169 const char *searchName, const struct nls_table *nls_codepage, 178 const char *searchName, const struct nls_table *nls_codepage,
170 __u16 *searchHandle, struct cifs_search_info *psrch_inf, 179 __u16 *searchHandle, struct cifs_search_info *psrch_inf,
171 int map, const char dirsep); 180 int map, const char dirsep);
172 181
173extern int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, 182extern int CIFSFindNext(const int xid, struct cifs_tcon *tcon,
174 __u16 searchHandle, struct cifs_search_info *psrch_inf); 183 __u16 searchHandle, struct cifs_search_info *psrch_inf);
175 184
176extern int CIFSFindClose(const int, struct cifsTconInfo *tcon, 185extern int CIFSFindClose(const int, struct cifs_tcon *tcon,
177 const __u16 search_handle); 186 const __u16 search_handle);
178 187
179extern int CIFSSMBQFileInfo(const int xid, struct cifsTconInfo *tcon, 188extern int CIFSSMBQFileInfo(const int xid, struct cifs_tcon *tcon,
180 u16 netfid, FILE_ALL_INFO *pFindData); 189 u16 netfid, FILE_ALL_INFO *pFindData);
181extern int CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon, 190extern int CIFSSMBQPathInfo(const int xid, struct cifs_tcon *tcon,
182 const unsigned char *searchName, 191 const unsigned char *searchName,
183 FILE_ALL_INFO *findData, 192 FILE_ALL_INFO *findData,
184 int legacy /* whether to use old info level */, 193 int legacy /* whether to use old info level */,
185 const struct nls_table *nls_codepage, int remap); 194 const struct nls_table *nls_codepage, int remap);
186extern int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon, 195extern int SMBQueryInformation(const int xid, struct cifs_tcon *tcon,
187 const unsigned char *searchName, 196 const unsigned char *searchName,
188 FILE_ALL_INFO *findData, 197 FILE_ALL_INFO *findData,
189 const struct nls_table *nls_codepage, int remap); 198 const struct nls_table *nls_codepage, int remap);
190 199
191extern int CIFSSMBUnixQFileInfo(const int xid, struct cifsTconInfo *tcon, 200extern int CIFSSMBUnixQFileInfo(const int xid, struct cifs_tcon *tcon,
192 u16 netfid, FILE_UNIX_BASIC_INFO *pFindData); 201 u16 netfid, FILE_UNIX_BASIC_INFO *pFindData);
193extern int CIFSSMBUnixQPathInfo(const int xid, 202extern int CIFSSMBUnixQPathInfo(const int xid,
194 struct cifsTconInfo *tcon, 203 struct cifs_tcon *tcon,
195 const unsigned char *searchName, 204 const unsigned char *searchName,
196 FILE_UNIX_BASIC_INFO *pFindData, 205 FILE_UNIX_BASIC_INFO *pFindData,
197 const struct nls_table *nls_codepage, int remap); 206 const struct nls_table *nls_codepage, int remap);
198 207
199extern int CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses, 208extern int CIFSGetDFSRefer(const int xid, struct cifs_ses *ses,
200 const unsigned char *searchName, 209 const unsigned char *searchName,
201 struct dfs_info3_param **target_nodes, 210 struct dfs_info3_param **target_nodes,
202 unsigned int *number_of_nodes_in_array, 211 unsigned int *number_of_nodes_in_array,
203 const struct nls_table *nls_codepage, int remap); 212 const struct nls_table *nls_codepage, int remap);
204 213
205extern int get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, 214extern int get_dfs_path(int xid, struct cifs_ses *pSesInfo,
206 const char *old_path, 215 const char *old_path,
207 const struct nls_table *nls_codepage, 216 const struct nls_table *nls_codepage,
208 unsigned int *pnum_referrals, 217 unsigned int *pnum_referrals,
209 struct dfs_info3_param **preferrals, 218 struct dfs_info3_param **preferrals,
210 int remap); 219 int remap);
211extern void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon, 220extern void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon,
212 struct super_block *sb, struct smb_vol *vol); 221 struct super_block *sb, struct smb_vol *vol);
213extern int CIFSSMBQFSInfo(const int xid, struct cifsTconInfo *tcon, 222extern int CIFSSMBQFSInfo(const int xid, struct cifs_tcon *tcon,
214 struct kstatfs *FSData); 223 struct kstatfs *FSData);
215extern int SMBOldQFSInfo(const int xid, struct cifsTconInfo *tcon, 224extern int SMBOldQFSInfo(const int xid, struct cifs_tcon *tcon,
216 struct kstatfs *FSData); 225 struct kstatfs *FSData);
217extern int CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, 226extern int CIFSSMBSetFSUnixInfo(const int xid, struct cifs_tcon *tcon,
218 __u64 cap); 227 __u64 cap);
219 228
220extern int CIFSSMBQFSAttributeInfo(const int xid, 229extern int CIFSSMBQFSAttributeInfo(const int xid,
221 struct cifsTconInfo *tcon); 230 struct cifs_tcon *tcon);
222extern int CIFSSMBQFSDeviceInfo(const int xid, struct cifsTconInfo *tcon); 231extern int CIFSSMBQFSDeviceInfo(const int xid, struct cifs_tcon *tcon);
223extern int CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon); 232extern int CIFSSMBQFSUnixInfo(const int xid, struct cifs_tcon *tcon);
224extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon, 233extern int CIFSSMBQFSPosixInfo(const int xid, struct cifs_tcon *tcon,
225 struct kstatfs *FSData); 234 struct kstatfs *FSData);
226 235
227extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, 236extern int CIFSSMBSetPathInfo(const int xid, struct cifs_tcon *tcon,
228 const char *fileName, const FILE_BASIC_INFO *data, 237 const char *fileName, const FILE_BASIC_INFO *data,
229 const struct nls_table *nls_codepage, 238 const struct nls_table *nls_codepage,
230 int remap_special_chars); 239 int remap_special_chars);
231extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, 240extern int CIFSSMBSetFileInfo(const int xid, struct cifs_tcon *tcon,
232 const FILE_BASIC_INFO *data, __u16 fid, 241 const FILE_BASIC_INFO *data, __u16 fid,
233 __u32 pid_of_opener); 242 __u32 pid_of_opener);
234extern int CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon, 243extern int CIFSSMBSetFileDisposition(const int xid, struct cifs_tcon *tcon,
235 bool delete_file, __u16 fid, __u32 pid_of_opener); 244 bool delete_file, __u16 fid, __u32 pid_of_opener);
236#if 0 245#if 0
237extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, 246extern int CIFSSMBSetAttrLegacy(int xid, struct cifs_tcon *tcon,
238 char *fileName, __u16 dos_attributes, 247 char *fileName, __u16 dos_attributes,
239 const struct nls_table *nls_codepage); 248 const struct nls_table *nls_codepage);
240#endif /* possibly unneeded function */ 249#endif /* possibly unneeded function */
241extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, 250extern int CIFSSMBSetEOF(const int xid, struct cifs_tcon *tcon,
242 const char *fileName, __u64 size, 251 const char *fileName, __u64 size,
243 bool setAllocationSizeFlag, 252 bool setAllocationSizeFlag,
244 const struct nls_table *nls_codepage, 253 const struct nls_table *nls_codepage,
245 int remap_special_chars); 254 int remap_special_chars);
246extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, 255extern int CIFSSMBSetFileSize(const int xid, struct cifs_tcon *tcon,
247 __u64 size, __u16 fileHandle, __u32 opener_pid, 256 __u64 size, __u16 fileHandle, __u32 opener_pid,
248 bool AllocSizeFlag); 257 bool AllocSizeFlag);
249 258
@@ -257,120 +266,116 @@ struct cifs_unix_set_info_args {
257 dev_t device; 266 dev_t device;
258}; 267};
259 268
260extern int CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon, 269extern int CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon,
261 const struct cifs_unix_set_info_args *args, 270 const struct cifs_unix_set_info_args *args,
262 u16 fid, u32 pid_of_opener); 271 u16 fid, u32 pid_of_opener);
263 272
264extern int CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *pTcon, 273extern int CIFSSMBUnixSetPathInfo(const int xid, struct cifs_tcon *pTcon,
265 char *fileName, 274 char *fileName,
266 const struct cifs_unix_set_info_args *args, 275 const struct cifs_unix_set_info_args *args,
267 const struct nls_table *nls_codepage, 276 const struct nls_table *nls_codepage,
268 int remap_special_chars); 277 int remap_special_chars);
269 278
270extern int CIFSSMBMkDir(const int xid, struct cifsTconInfo *tcon, 279extern int CIFSSMBMkDir(const int xid, struct cifs_tcon *tcon,
271 const char *newName, 280 const char *newName,
272 const struct nls_table *nls_codepage, 281 const struct nls_table *nls_codepage,
273 int remap_special_chars); 282 int remap_special_chars);
274extern int CIFSSMBRmDir(const int xid, struct cifsTconInfo *tcon, 283extern int CIFSSMBRmDir(const int xid, struct cifs_tcon *tcon,
275 const char *name, const struct nls_table *nls_codepage, 284 const char *name, const struct nls_table *nls_codepage,
276 int remap_special_chars); 285 int remap_special_chars);
277extern int CIFSPOSIXDelFile(const int xid, struct cifsTconInfo *tcon, 286extern int CIFSPOSIXDelFile(const int xid, struct cifs_tcon *tcon,
278 const char *name, __u16 type, 287 const char *name, __u16 type,
279 const struct nls_table *nls_codepage, 288 const struct nls_table *nls_codepage,
280 int remap_special_chars); 289 int remap_special_chars);
281extern int CIFSSMBDelFile(const int xid, struct cifsTconInfo *tcon, 290extern int CIFSSMBDelFile(const int xid, struct cifs_tcon *tcon,
282 const char *name, 291 const char *name,
283 const struct nls_table *nls_codepage, 292 const struct nls_table *nls_codepage,
284 int remap_special_chars); 293 int remap_special_chars);
285extern int CIFSSMBRename(const int xid, struct cifsTconInfo *tcon, 294extern int CIFSSMBRename(const int xid, struct cifs_tcon *tcon,
286 const char *fromName, const char *toName, 295 const char *fromName, const char *toName,
287 const struct nls_table *nls_codepage, 296 const struct nls_table *nls_codepage,
288 int remap_special_chars); 297 int remap_special_chars);
289extern int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, 298extern int CIFSSMBRenameOpenFile(const int xid, struct cifs_tcon *pTcon,
290 int netfid, const char *target_name, 299 int netfid, const char *target_name,
291 const struct nls_table *nls_codepage, 300 const struct nls_table *nls_codepage,
292 int remap_special_chars); 301 int remap_special_chars);
293extern int CIFSCreateHardLink(const int xid, 302extern int CIFSCreateHardLink(const int xid,
294 struct cifsTconInfo *tcon, 303 struct cifs_tcon *tcon,
295 const char *fromName, const char *toName, 304 const char *fromName, const char *toName,
296 const struct nls_table *nls_codepage, 305 const struct nls_table *nls_codepage,
297 int remap_special_chars); 306 int remap_special_chars);
298extern int CIFSUnixCreateHardLink(const int xid, 307extern int CIFSUnixCreateHardLink(const int xid,
299 struct cifsTconInfo *tcon, 308 struct cifs_tcon *tcon,
300 const char *fromName, const char *toName, 309 const char *fromName, const char *toName,
301 const struct nls_table *nls_codepage, 310 const struct nls_table *nls_codepage,
302 int remap_special_chars); 311 int remap_special_chars);
303extern int CIFSUnixCreateSymLink(const int xid, 312extern int CIFSUnixCreateSymLink(const int xid,
304 struct cifsTconInfo *tcon, 313 struct cifs_tcon *tcon,
305 const char *fromName, const char *toName, 314 const char *fromName, const char *toName,
306 const struct nls_table *nls_codepage); 315 const struct nls_table *nls_codepage);
307extern int CIFSSMBUnixQuerySymLink(const int xid, 316extern int CIFSSMBUnixQuerySymLink(const int xid,
308 struct cifsTconInfo *tcon, 317 struct cifs_tcon *tcon,
309 const unsigned char *searchName, char **syminfo, 318 const unsigned char *searchName, char **syminfo,
310 const struct nls_table *nls_codepage); 319 const struct nls_table *nls_codepage);
311#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL 320#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
312extern int CIFSSMBQueryReparseLinkInfo(const int xid, 321extern int CIFSSMBQueryReparseLinkInfo(const int xid,
313 struct cifsTconInfo *tcon, 322 struct cifs_tcon *tcon,
314 const unsigned char *searchName, 323 const unsigned char *searchName,
315 char *symlinkinfo, const int buflen, __u16 fid, 324 char *symlinkinfo, const int buflen, __u16 fid,
316 const struct nls_table *nls_codepage); 325 const struct nls_table *nls_codepage);
317#endif /* temporarily unused until cifs_symlink fixed */ 326#endif /* temporarily unused until cifs_symlink fixed */
318extern int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon, 327extern int CIFSSMBOpen(const int xid, struct cifs_tcon *tcon,
319 const char *fileName, const int disposition, 328 const char *fileName, const int disposition,
320 const int access_flags, const int omode, 329 const int access_flags, const int omode,
321 __u16 *netfid, int *pOplock, FILE_ALL_INFO *, 330 __u16 *netfid, int *pOplock, FILE_ALL_INFO *,
322 const struct nls_table *nls_codepage, int remap); 331 const struct nls_table *nls_codepage, int remap);
323extern int SMBLegacyOpen(const int xid, struct cifsTconInfo *tcon, 332extern int SMBLegacyOpen(const int xid, struct cifs_tcon *tcon,
324 const char *fileName, const int disposition, 333 const char *fileName, const int disposition,
325 const int access_flags, const int omode, 334 const int access_flags, const int omode,
326 __u16 *netfid, int *pOplock, FILE_ALL_INFO *, 335 __u16 *netfid, int *pOplock, FILE_ALL_INFO *,
327 const struct nls_table *nls_codepage, int remap); 336 const struct nls_table *nls_codepage, int remap);
328extern int CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, 337extern int CIFSPOSIXCreate(const int xid, struct cifs_tcon *tcon,
329 u32 posix_flags, __u64 mode, __u16 *netfid, 338 u32 posix_flags, __u64 mode, __u16 *netfid,
330 FILE_UNIX_BASIC_INFO *pRetData, 339 FILE_UNIX_BASIC_INFO *pRetData,
331 __u32 *pOplock, const char *name, 340 __u32 *pOplock, const char *name,
332 const struct nls_table *nls_codepage, int remap); 341 const struct nls_table *nls_codepage, int remap);
333extern int CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, 342extern int CIFSSMBClose(const int xid, struct cifs_tcon *tcon,
334 const int smb_file_id); 343 const int smb_file_id);
335 344
336extern int CIFSSMBFlush(const int xid, struct cifsTconInfo *tcon, 345extern int CIFSSMBFlush(const int xid, struct cifs_tcon *tcon,
337 const int smb_file_id); 346 const int smb_file_id);
338 347
339extern int CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, 348extern int CIFSSMBRead(const int xid, struct cifs_io_parms *io_parms,
340 const int netfid, unsigned int count, 349 unsigned int *nbytes, char **buf,
341 const __u64 lseek, unsigned int *nbytes, char **buf,
342 int *return_buf_type); 350 int *return_buf_type);
343extern int CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, 351extern int CIFSSMBWrite(const int xid, struct cifs_io_parms *io_parms,
344 const int netfid, const unsigned int count, 352 unsigned int *nbytes, const char *buf,
345 const __u64 lseek, unsigned int *nbytes, 353 const char __user *ubuf, const int long_op);
346 const char *buf, const char __user *ubuf, 354extern int CIFSSMBWrite2(const int xid, struct cifs_io_parms *io_parms,
355 unsigned int *nbytes, struct kvec *iov, const int nvec,
347 const int long_op); 356 const int long_op);
348extern int CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon, 357extern int CIFSGetSrvInodeNumber(const int xid, struct cifs_tcon *tcon,
349 const int netfid, const unsigned int count,
350 const __u64 offset, unsigned int *nbytes,
351 struct kvec *iov, const int nvec, const int long_op);
352extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
353 const unsigned char *searchName, __u64 *inode_number, 358 const unsigned char *searchName, __u64 *inode_number,
354 const struct nls_table *nls_codepage, 359 const struct nls_table *nls_codepage,
355 int remap_special_chars); 360 int remap_special_chars);
356 361
357extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, 362extern int CIFSSMBLock(const int xid, struct cifs_tcon *tcon,
358 const __u16 netfid, const __u64 len, 363 const __u16 netfid, const __u64 len,
359 const __u64 offset, const __u32 numUnlock, 364 const __u64 offset, const __u32 numUnlock,
360 const __u32 numLock, const __u8 lockType, 365 const __u32 numLock, const __u8 lockType,
361 const bool waitFlag, const __u8 oplock_level); 366 const bool waitFlag, const __u8 oplock_level);
362extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, 367extern int CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon,
363 const __u16 smb_file_id, const int get_flag, 368 const __u16 smb_file_id, const int get_flag,
364 const __u64 len, struct file_lock *, 369 const __u64 len, struct file_lock *,
365 const __u16 lock_type, const bool waitFlag); 370 const __u16 lock_type, const bool waitFlag);
366extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon); 371extern int CIFSSMBTDis(const int xid, struct cifs_tcon *tcon);
367extern int CIFSSMBEcho(struct TCP_Server_Info *server); 372extern int CIFSSMBEcho(struct TCP_Server_Info *server);
368extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses); 373extern int CIFSSMBLogoff(const int xid, struct cifs_ses *ses);
369 374
370extern struct cifsSesInfo *sesInfoAlloc(void); 375extern struct cifs_ses *sesInfoAlloc(void);
371extern void sesInfoFree(struct cifsSesInfo *); 376extern void sesInfoFree(struct cifs_ses *);
372extern struct cifsTconInfo *tconInfoAlloc(void); 377extern struct cifs_tcon *tconInfoAlloc(void);
373extern void tconInfoFree(struct cifsTconInfo *); 378extern void tconInfoFree(struct cifs_tcon *);
374 379
375extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); 380extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
376extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, 381extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
@@ -379,51 +384,51 @@ extern int cifs_verify_signature(struct smb_hdr *,
379 struct TCP_Server_Info *server, 384 struct TCP_Server_Info *server,
380 __u32 expected_sequence_number); 385 __u32 expected_sequence_number);
381extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *); 386extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
382extern int setup_ntlm_response(struct cifsSesInfo *); 387extern int setup_ntlm_response(struct cifs_ses *);
383extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *); 388extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
384extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); 389extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
385extern void cifs_crypto_shash_release(struct TCP_Server_Info *); 390extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
386extern int calc_seckey(struct cifsSesInfo *); 391extern int calc_seckey(struct cifs_ses *);
387 392
388#ifdef CONFIG_CIFS_WEAK_PW_HASH 393#ifdef CONFIG_CIFS_WEAK_PW_HASH
389extern int calc_lanman_hash(const char *password, const char *cryptkey, 394extern int calc_lanman_hash(const char *password, const char *cryptkey,
390 bool encrypt, char *lnm_session_key); 395 bool encrypt, char *lnm_session_key);
391#endif /* CIFS_WEAK_PW_HASH */ 396#endif /* CIFS_WEAK_PW_HASH */
392#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */ 397#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
393extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon, 398extern int CIFSSMBNotify(const int xid, struct cifs_tcon *tcon,
394 const int notify_subdirs, const __u16 netfid, 399 const int notify_subdirs, const __u16 netfid,
395 __u32 filter, struct file *file, int multishot, 400 __u32 filter, struct file *file, int multishot,
396 const struct nls_table *nls_codepage); 401 const struct nls_table *nls_codepage);
397#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */ 402#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
398extern int CIFSSMBCopy(int xid, 403extern int CIFSSMBCopy(int xid,
399 struct cifsTconInfo *source_tcon, 404 struct cifs_tcon *source_tcon,
400 const char *fromName, 405 const char *fromName,
401 const __u16 target_tid, 406 const __u16 target_tid,
402 const char *toName, const int flags, 407 const char *toName, const int flags,
403 const struct nls_table *nls_codepage, 408 const struct nls_table *nls_codepage,
404 int remap_special_chars); 409 int remap_special_chars);
405extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon, 410extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifs_tcon *tcon,
406 const unsigned char *searchName, 411 const unsigned char *searchName,
407 const unsigned char *ea_name, char *EAData, 412 const unsigned char *ea_name, char *EAData,
408 size_t bufsize, const struct nls_table *nls_codepage, 413 size_t bufsize, const struct nls_table *nls_codepage,
409 int remap_special_chars); 414 int remap_special_chars);
410extern int CIFSSMBSetEA(const int xid, struct cifsTconInfo *tcon, 415extern int CIFSSMBSetEA(const int xid, struct cifs_tcon *tcon,
411 const char *fileName, const char *ea_name, 416 const char *fileName, const char *ea_name,
412 const void *ea_value, const __u16 ea_value_len, 417 const void *ea_value, const __u16 ea_value_len,
413 const struct nls_table *nls_codepage, int remap_special_chars); 418 const struct nls_table *nls_codepage, int remap_special_chars);
414extern int CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, 419extern int CIFSSMBGetCIFSACL(const int xid, struct cifs_tcon *tcon,
415 __u16 fid, struct cifs_ntsd **acl_inf, __u32 *buflen); 420 __u16 fid, struct cifs_ntsd **acl_inf, __u32 *buflen);
416extern int CIFSSMBSetCIFSACL(const int, struct cifsTconInfo *, __u16, 421extern int CIFSSMBSetCIFSACL(const int, struct cifs_tcon *, __u16,
417 struct cifs_ntsd *, __u32); 422 struct cifs_ntsd *, __u32);
418extern int CIFSSMBGetPosixACL(const int xid, struct cifsTconInfo *tcon, 423extern int CIFSSMBGetPosixACL(const int xid, struct cifs_tcon *tcon,
419 const unsigned char *searchName, 424 const unsigned char *searchName,
420 char *acl_inf, const int buflen, const int acl_type, 425 char *acl_inf, const int buflen, const int acl_type,
421 const struct nls_table *nls_codepage, int remap_special_chars); 426 const struct nls_table *nls_codepage, int remap_special_chars);
422extern int CIFSSMBSetPosixACL(const int xid, struct cifsTconInfo *tcon, 427extern int CIFSSMBSetPosixACL(const int xid, struct cifs_tcon *tcon,
423 const unsigned char *fileName, 428 const unsigned char *fileName,
424 const char *local_acl, const int buflen, const int acl_type, 429 const char *local_acl, const int buflen, const int acl_type,
425 const struct nls_table *nls_codepage, int remap_special_chars); 430 const struct nls_table *nls_codepage, int remap_special_chars);
426extern int CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon, 431extern int CIFSGetExtAttr(const int xid, struct cifs_tcon *tcon,
427 const int netfid, __u64 *pExtAttrBits, __u64 *pMask); 432 const int netfid, __u64 *pExtAttrBits, __u64 *pMask);
428extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb); 433extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb);
429extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr); 434extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr);
@@ -434,4 +439,22 @@ extern int mdfour(unsigned char *, unsigned char *, int);
434extern int E_md4hash(const unsigned char *passwd, unsigned char *p16); 439extern int E_md4hash(const unsigned char *passwd, unsigned char *p16);
435extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, 440extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
436 unsigned char *p24); 441 unsigned char *p24);
442
443/* asynchronous write support */
444struct cifs_writedata {
445 struct kref refcount;
446 enum writeback_sync_modes sync_mode;
447 struct work_struct work;
448 struct cifsFileInfo *cfile;
449 __u64 offset;
450 unsigned int bytes;
451 int result;
452 unsigned int nr_pages;
453 struct page *pages[1];
454};
455
456int cifs_async_writev(struct cifs_writedata *wdata);
457struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages);
458void cifs_writedata_release(struct kref *refcount);
459
437#endif /* _CIFSPROTO_H */ 460#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 83df937b814e..1a9fe7f816d1 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -32,6 +32,7 @@
32#include <linux/vfs.h> 32#include <linux/vfs.h>
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <linux/posix_acl_xattr.h> 34#include <linux/posix_acl_xattr.h>
35#include <linux/pagemap.h>
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
36#include "cifspdu.h" 37#include "cifspdu.h"
37#include "cifsglob.h" 38#include "cifsglob.h"
@@ -84,7 +85,7 @@ static struct {
84 85
85/* Mark as invalid, all open files on tree connections since they 86/* Mark as invalid, all open files on tree connections since they
86 were closed when session to server was lost */ 87 were closed when session to server was lost */
87static void mark_open_files_invalid(struct cifsTconInfo *pTcon) 88static void mark_open_files_invalid(struct cifs_tcon *pTcon)
88{ 89{
89 struct cifsFileInfo *open_file = NULL; 90 struct cifsFileInfo *open_file = NULL;
90 struct list_head *tmp; 91 struct list_head *tmp;
@@ -104,10 +105,10 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
104 105
105/* reconnect the socket, tcon, and smb session if needed */ 106/* reconnect the socket, tcon, and smb session if needed */
106static int 107static int
107cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command) 108cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
108{ 109{
109 int rc = 0; 110 int rc = 0;
110 struct cifsSesInfo *ses; 111 struct cifs_ses *ses;
111 struct TCP_Server_Info *server; 112 struct TCP_Server_Info *server;
112 struct nls_table *nls_codepage; 113 struct nls_table *nls_codepage;
113 114
@@ -226,7 +227,7 @@ out:
226 SMB information in the SMB header. If the return code is zero, this 227 SMB information in the SMB header. If the return code is zero, this
227 function must have filled in request_buf pointer */ 228 function must have filled in request_buf pointer */
228static int 229static int
229small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, 230small_smb_init(int smb_command, int wct, struct cifs_tcon *tcon,
230 void **request_buf) 231 void **request_buf)
231{ 232{
232 int rc; 233 int rc;
@@ -252,7 +253,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
252 253
253int 254int
254small_smb_init_no_tc(const int smb_command, const int wct, 255small_smb_init_no_tc(const int smb_command, const int wct,
255 struct cifsSesInfo *ses, void **request_buf) 256 struct cifs_ses *ses, void **request_buf)
256{ 257{
257 int rc; 258 int rc;
258 struct smb_hdr *buffer; 259 struct smb_hdr *buffer;
@@ -278,7 +279,7 @@ small_smb_init_no_tc(const int smb_command, const int wct,
278 279
279/* If the return code is zero, this function must fill in request_buf pointer */ 280/* If the return code is zero, this function must fill in request_buf pointer */
280static int 281static int
281__smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, 282__smb_init(int smb_command, int wct, struct cifs_tcon *tcon,
282 void **request_buf, void **response_buf) 283 void **request_buf, void **response_buf)
283{ 284{
284 *request_buf = cifs_buf_get(); 285 *request_buf = cifs_buf_get();
@@ -304,7 +305,7 @@ __smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
304 305
305/* If the return code is zero, this function must fill in request_buf pointer */ 306/* If the return code is zero, this function must fill in request_buf pointer */
306static int 307static int
307smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, 308smb_init(int smb_command, int wct, struct cifs_tcon *tcon,
308 void **request_buf, void **response_buf) 309 void **request_buf, void **response_buf)
309{ 310{
310 int rc; 311 int rc;
@@ -317,7 +318,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
317} 318}
318 319
319static int 320static int
320smb_init_no_reconnect(int smb_command, int wct, struct cifsTconInfo *tcon, 321smb_init_no_reconnect(int smb_command, int wct, struct cifs_tcon *tcon,
321 void **request_buf, void **response_buf) 322 void **request_buf, void **response_buf)
322{ 323{
323 if (tcon->ses->need_reconnect || tcon->need_reconnect) 324 if (tcon->ses->need_reconnect || tcon->need_reconnect)
@@ -366,7 +367,7 @@ static inline void inc_rfc1001_len(void *pSMB, int count)
366} 367}
367 368
368int 369int
369CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) 370CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses)
370{ 371{
371 NEGOTIATE_REQ *pSMB; 372 NEGOTIATE_REQ *pSMB;
372 NEGOTIATE_RSP *pSMBr; 373 NEGOTIATE_RSP *pSMBr;
@@ -450,7 +451,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
450 rc = -EOPNOTSUPP; 451 rc = -EOPNOTSUPP;
451 goto neg_err_exit; 452 goto neg_err_exit;
452 } 453 }
453 server->secMode = (__u8)le16_to_cpu(rsp->SecurityMode); 454 server->sec_mode = (__u8)le16_to_cpu(rsp->SecurityMode);
454 server->maxReq = le16_to_cpu(rsp->MaxMpxCount); 455 server->maxReq = le16_to_cpu(rsp->MaxMpxCount);
455 server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize), 456 server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize),
456 (__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); 457 (__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
@@ -504,7 +505,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
504 cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { 505 cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
505 memcpy(ses->server->cryptkey, rsp->EncryptionKey, 506 memcpy(ses->server->cryptkey, rsp->EncryptionKey,
506 CIFS_CRYPTO_KEY_SIZE); 507 CIFS_CRYPTO_KEY_SIZE);
507 } else if (server->secMode & SECMODE_PW_ENCRYPT) { 508 } else if (server->sec_mode & SECMODE_PW_ENCRYPT) {
508 rc = -EIO; /* need cryptkey unless plain text */ 509 rc = -EIO; /* need cryptkey unless plain text */
509 goto neg_err_exit; 510 goto neg_err_exit;
510 } 511 }
@@ -526,11 +527,11 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
526 goto neg_err_exit; 527 goto neg_err_exit;
527 } 528 }
528 /* else wct == 17 NTLM */ 529 /* else wct == 17 NTLM */
529 server->secMode = pSMBr->SecurityMode; 530 server->sec_mode = pSMBr->SecurityMode;
530 if ((server->secMode & SECMODE_USER) == 0) 531 if ((server->sec_mode & SECMODE_USER) == 0)
531 cFYI(1, "share mode security"); 532 cFYI(1, "share mode security");
532 533
533 if ((server->secMode & SECMODE_PW_ENCRYPT) == 0) 534 if ((server->sec_mode & SECMODE_PW_ENCRYPT) == 0)
534#ifdef CONFIG_CIFS_WEAK_PW_HASH 535#ifdef CONFIG_CIFS_WEAK_PW_HASH
535 if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0) 536 if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0)
536#endif /* CIFS_WEAK_PW_HASH */ 537#endif /* CIFS_WEAK_PW_HASH */
@@ -570,18 +571,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
570 if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { 571 if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
571 memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey, 572 memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey,
572 CIFS_CRYPTO_KEY_SIZE); 573 CIFS_CRYPTO_KEY_SIZE);
573 } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) 574 } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC ||
574 && (pSMBr->EncryptionKeyLength == 0)) { 575 server->capabilities & CAP_EXTENDED_SECURITY) &&
576 (pSMBr->EncryptionKeyLength == 0)) {
575 /* decode security blob */ 577 /* decode security blob */
576 } else if (server->secMode & SECMODE_PW_ENCRYPT) {
577 rc = -EIO; /* no crypt key only if plain text pwd */
578 goto neg_err_exit;
579 }
580
581 /* BB might be helpful to save off the domain of server here */
582
583 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
584 (server->capabilities & CAP_EXTENDED_SECURITY)) {
585 count = get_bcc(&pSMBr->hdr); 578 count = get_bcc(&pSMBr->hdr);
586 if (count < 16) { 579 if (count < 16) {
587 rc = -EIO; 580 rc = -EIO;
@@ -624,6 +617,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
624 } else 617 } else
625 rc = -EOPNOTSUPP; 618 rc = -EOPNOTSUPP;
626 } 619 }
620 } else if (server->sec_mode & SECMODE_PW_ENCRYPT) {
621 rc = -EIO; /* no crypt key only if plain text pwd */
622 goto neg_err_exit;
627 } else 623 } else
628 server->capabilities &= ~CAP_EXTENDED_SECURITY; 624 server->capabilities &= ~CAP_EXTENDED_SECURITY;
629 625
@@ -634,27 +630,27 @@ signing_check:
634 /* MUST_SIGN already includes the MAY_SIGN FLAG 630 /* MUST_SIGN already includes the MAY_SIGN FLAG
635 so if this is zero it means that signing is disabled */ 631 so if this is zero it means that signing is disabled */
636 cFYI(1, "Signing disabled"); 632 cFYI(1, "Signing disabled");
637 if (server->secMode & SECMODE_SIGN_REQUIRED) { 633 if (server->sec_mode & SECMODE_SIGN_REQUIRED) {
638 cERROR(1, "Server requires " 634 cERROR(1, "Server requires "
639 "packet signing to be enabled in " 635 "packet signing to be enabled in "
640 "/proc/fs/cifs/SecurityFlags."); 636 "/proc/fs/cifs/SecurityFlags.");
641 rc = -EOPNOTSUPP; 637 rc = -EOPNOTSUPP;
642 } 638 }
643 server->secMode &= 639 server->sec_mode &=
644 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); 640 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
645 } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { 641 } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) {
646 /* signing required */ 642 /* signing required */
647 cFYI(1, "Must sign - secFlags 0x%x", secFlags); 643 cFYI(1, "Must sign - secFlags 0x%x", secFlags);
648 if ((server->secMode & 644 if ((server->sec_mode &
649 (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) { 645 (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) {
650 cERROR(1, "signing required but server lacks support"); 646 cERROR(1, "signing required but server lacks support");
651 rc = -EOPNOTSUPP; 647 rc = -EOPNOTSUPP;
652 } else 648 } else
653 server->secMode |= SECMODE_SIGN_REQUIRED; 649 server->sec_mode |= SECMODE_SIGN_REQUIRED;
654 } else { 650 } else {
655 /* signing optional ie CIFSSEC_MAY_SIGN */ 651 /* signing optional ie CIFSSEC_MAY_SIGN */
656 if ((server->secMode & SECMODE_SIGN_REQUIRED) == 0) 652 if ((server->sec_mode & SECMODE_SIGN_REQUIRED) == 0)
657 server->secMode &= 653 server->sec_mode &=
658 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); 654 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
659 } 655 }
660 656
@@ -666,7 +662,7 @@ neg_err_exit:
666} 662}
667 663
668int 664int
669CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon) 665CIFSSMBTDis(const int xid, struct cifs_tcon *tcon)
670{ 666{
671 struct smb_hdr *smb_buffer; 667 struct smb_hdr *smb_buffer;
672 int rc = 0; 668 int rc = 0;
@@ -725,6 +721,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
725{ 721{
726 ECHO_REQ *smb; 722 ECHO_REQ *smb;
727 int rc = 0; 723 int rc = 0;
724 struct kvec iov;
728 725
729 cFYI(1, "In echo request"); 726 cFYI(1, "In echo request");
730 727
@@ -739,9 +736,10 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
739 put_bcc(1, &smb->hdr); 736 put_bcc(1, &smb->hdr);
740 smb->Data[0] = 'a'; 737 smb->Data[0] = 'a';
741 inc_rfc1001_len(smb, 3); 738 inc_rfc1001_len(smb, 3);
739 iov.iov_base = smb;
740 iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4;
742 741
743 rc = cifs_call_async(server, (struct smb_hdr *)smb, 742 rc = cifs_call_async(server, &iov, 1, cifs_echo_callback, server, true);
744 cifs_echo_callback, server);
745 if (rc) 743 if (rc)
746 cFYI(1, "Echo request failed: %d", rc); 744 cFYI(1, "Echo request failed: %d", rc);
747 745
@@ -751,7 +749,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
751} 749}
752 750
753int 751int
754CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses) 752CIFSSMBLogoff(const int xid, struct cifs_ses *ses)
755{ 753{
756 LOGOFF_ANDX_REQ *pSMB; 754 LOGOFF_ANDX_REQ *pSMB;
757 int rc = 0; 755 int rc = 0;
@@ -778,7 +776,7 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses)
778 776
779 pSMB->hdr.Mid = GetNextMid(ses->server); 777 pSMB->hdr.Mid = GetNextMid(ses->server);
780 778
781 if (ses->server->secMode & 779 if (ses->server->sec_mode &
782 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 780 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
783 pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 781 pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
784 782
@@ -798,7 +796,7 @@ session_already_dead:
798} 796}
799 797
800int 798int
801CIFSPOSIXDelFile(const int xid, struct cifsTconInfo *tcon, const char *fileName, 799CIFSPOSIXDelFile(const int xid, struct cifs_tcon *tcon, const char *fileName,
802 __u16 type, const struct nls_table *nls_codepage, int remap) 800 __u16 type, const struct nls_table *nls_codepage, int remap)
803{ 801{
804 TRANSACTION2_SPI_REQ *pSMB = NULL; 802 TRANSACTION2_SPI_REQ *pSMB = NULL;
@@ -873,7 +871,7 @@ PsxDelete:
873} 871}
874 872
875int 873int
876CIFSSMBDelFile(const int xid, struct cifsTconInfo *tcon, const char *fileName, 874CIFSSMBDelFile(const int xid, struct cifs_tcon *tcon, const char *fileName,
877 const struct nls_table *nls_codepage, int remap) 875 const struct nls_table *nls_codepage, int remap)
878{ 876{
879 DELETE_FILE_REQ *pSMB = NULL; 877 DELETE_FILE_REQ *pSMB = NULL;
@@ -918,7 +916,7 @@ DelFileRetry:
918} 916}
919 917
920int 918int
921CIFSSMBRmDir(const int xid, struct cifsTconInfo *tcon, const char *dirName, 919CIFSSMBRmDir(const int xid, struct cifs_tcon *tcon, const char *dirName,
922 const struct nls_table *nls_codepage, int remap) 920 const struct nls_table *nls_codepage, int remap)
923{ 921{
924 DELETE_DIRECTORY_REQ *pSMB = NULL; 922 DELETE_DIRECTORY_REQ *pSMB = NULL;
@@ -961,7 +959,7 @@ RmDirRetry:
961} 959}
962 960
963int 961int
964CIFSSMBMkDir(const int xid, struct cifsTconInfo *tcon, 962CIFSSMBMkDir(const int xid, struct cifs_tcon *tcon,
965 const char *name, const struct nls_table *nls_codepage, int remap) 963 const char *name, const struct nls_table *nls_codepage, int remap)
966{ 964{
967 int rc = 0; 965 int rc = 0;
@@ -1004,7 +1002,7 @@ MkDirRetry:
1004} 1002}
1005 1003
1006int 1004int
1007CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, __u32 posix_flags, 1005CIFSPOSIXCreate(const int xid, struct cifs_tcon *tcon, __u32 posix_flags,
1008 __u64 mode, __u16 *netfid, FILE_UNIX_BASIC_INFO *pRetData, 1006 __u64 mode, __u16 *netfid, FILE_UNIX_BASIC_INFO *pRetData,
1009 __u32 *pOplock, const char *name, 1007 __u32 *pOplock, const char *name,
1010 const struct nls_table *nls_codepage, int remap) 1008 const struct nls_table *nls_codepage, int remap)
@@ -1170,7 +1168,7 @@ access_flags_to_smbopen_mode(const int access_flags)
1170} 1168}
1171 1169
1172int 1170int
1173SMBLegacyOpen(const int xid, struct cifsTconInfo *tcon, 1171SMBLegacyOpen(const int xid, struct cifs_tcon *tcon,
1174 const char *fileName, const int openDisposition, 1172 const char *fileName, const int openDisposition,
1175 const int access_flags, const int create_options, __u16 *netfid, 1173 const int access_flags, const int create_options, __u16 *netfid,
1176 int *pOplock, FILE_ALL_INFO *pfile_info, 1174 int *pOplock, FILE_ALL_INFO *pfile_info,
@@ -1277,7 +1275,7 @@ OldOpenRetry:
1277} 1275}
1278 1276
1279int 1277int
1280CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon, 1278CIFSSMBOpen(const int xid, struct cifs_tcon *tcon,
1281 const char *fileName, const int openDisposition, 1279 const char *fileName, const int openDisposition,
1282 const int access_flags, const int create_options, __u16 *netfid, 1280 const int access_flags, const int create_options, __u16 *netfid,
1283 int *pOplock, FILE_ALL_INFO *pfile_info, 1281 int *pOplock, FILE_ALL_INFO *pfile_info,
@@ -1379,8 +1377,7 @@ openRetry:
1379} 1377}
1380 1378
1381int 1379int
1382CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid, 1380CIFSSMBRead(const int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes,
1383 const unsigned int count, const __u64 lseek, unsigned int *nbytes,
1384 char **buf, int *pbuf_type) 1381 char **buf, int *pbuf_type)
1385{ 1382{
1386 int rc = -EACCES; 1383 int rc = -EACCES;
@@ -1390,13 +1387,18 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1390 int wct; 1387 int wct;
1391 int resp_buf_type = 0; 1388 int resp_buf_type = 0;
1392 struct kvec iov[1]; 1389 struct kvec iov[1];
1390 __u32 pid = io_parms->pid;
1391 __u16 netfid = io_parms->netfid;
1392 __u64 offset = io_parms->offset;
1393 struct cifs_tcon *tcon = io_parms->tcon;
1394 unsigned int count = io_parms->length;
1393 1395
1394 cFYI(1, "Reading %d bytes on fid %d", count, netfid); 1396 cFYI(1, "Reading %d bytes on fid %d", count, netfid);
1395 if (tcon->ses->capabilities & CAP_LARGE_FILES) 1397 if (tcon->ses->capabilities & CAP_LARGE_FILES)
1396 wct = 12; 1398 wct = 12;
1397 else { 1399 else {
1398 wct = 10; /* old style read */ 1400 wct = 10; /* old style read */
1399 if ((lseek >> 32) > 0) { 1401 if ((offset >> 32) > 0) {
1400 /* can not handle this big offset for old */ 1402 /* can not handle this big offset for old */
1401 return -EIO; 1403 return -EIO;
1402 } 1404 }
@@ -1407,15 +1409,18 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1407 if (rc) 1409 if (rc)
1408 return rc; 1410 return rc;
1409 1411
1412 pSMB->hdr.Pid = cpu_to_le16((__u16)pid);
1413 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid >> 16));
1414
1410 /* tcon and ses pointer are checked in smb_init */ 1415 /* tcon and ses pointer are checked in smb_init */
1411 if (tcon->ses->server == NULL) 1416 if (tcon->ses->server == NULL)
1412 return -ECONNABORTED; 1417 return -ECONNABORTED;
1413 1418
1414 pSMB->AndXCommand = 0xFF; /* none */ 1419 pSMB->AndXCommand = 0xFF; /* none */
1415 pSMB->Fid = netfid; 1420 pSMB->Fid = netfid;
1416 pSMB->OffsetLow = cpu_to_le32(lseek & 0xFFFFFFFF); 1421 pSMB->OffsetLow = cpu_to_le32(offset & 0xFFFFFFFF);
1417 if (wct == 12) 1422 if (wct == 12)
1418 pSMB->OffsetHigh = cpu_to_le32(lseek >> 32); 1423 pSMB->OffsetHigh = cpu_to_le32(offset >> 32);
1419 1424
1420 pSMB->Remaining = 0; 1425 pSMB->Remaining = 0;
1421 pSMB->MaxCount = cpu_to_le16(count & 0xFFFF); 1426 pSMB->MaxCount = cpu_to_le16(count & 0xFFFF);
@@ -1484,9 +1489,8 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1484 1489
1485 1490
1486int 1491int
1487CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, 1492CIFSSMBWrite(const int xid, struct cifs_io_parms *io_parms,
1488 const int netfid, const unsigned int count, 1493 unsigned int *nbytes, const char *buf,
1489 const __u64 offset, unsigned int *nbytes, const char *buf,
1490 const char __user *ubuf, const int long_op) 1494 const char __user *ubuf, const int long_op)
1491{ 1495{
1492 int rc = -EACCES; 1496 int rc = -EACCES;
@@ -1495,6 +1499,11 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1495 int bytes_returned, wct; 1499 int bytes_returned, wct;
1496 __u32 bytes_sent; 1500 __u32 bytes_sent;
1497 __u16 byte_count; 1501 __u16 byte_count;
1502 __u32 pid = io_parms->pid;
1503 __u16 netfid = io_parms->netfid;
1504 __u64 offset = io_parms->offset;
1505 struct cifs_tcon *tcon = io_parms->tcon;
1506 unsigned int count = io_parms->length;
1498 1507
1499 *nbytes = 0; 1508 *nbytes = 0;
1500 1509
@@ -1516,6 +1525,10 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1516 (void **) &pSMBr); 1525 (void **) &pSMBr);
1517 if (rc) 1526 if (rc)
1518 return rc; 1527 return rc;
1528
1529 pSMB->hdr.Pid = cpu_to_le16((__u16)pid);
1530 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid >> 16));
1531
1519 /* tcon and ses pointer are checked in smb_init */ 1532 /* tcon and ses pointer are checked in smb_init */
1520 if (tcon->ses->server == NULL) 1533 if (tcon->ses->server == NULL)
1521 return -ECONNABORTED; 1534 return -ECONNABORTED;
@@ -1602,17 +1615,259 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1602 return rc; 1615 return rc;
1603} 1616}
1604 1617
1618void
1619cifs_writedata_release(struct kref *refcount)
1620{
1621 struct cifs_writedata *wdata = container_of(refcount,
1622 struct cifs_writedata, refcount);
1623
1624 if (wdata->cfile)
1625 cifsFileInfo_put(wdata->cfile);
1626
1627 kfree(wdata);
1628}
1629
1630/*
1631 * Write failed with a retryable error. Resend the write request. It's also
1632 * possible that the page was redirtied so re-clean the page.
1633 */
1634static void
1635cifs_writev_requeue(struct cifs_writedata *wdata)
1636{
1637 int i, rc;
1638 struct inode *inode = wdata->cfile->dentry->d_inode;
1639
1640 for (i = 0; i < wdata->nr_pages; i++) {
1641 lock_page(wdata->pages[i]);
1642 clear_page_dirty_for_io(wdata->pages[i]);
1643 }
1644
1645 do {
1646 rc = cifs_async_writev(wdata);
1647 } while (rc == -EAGAIN);
1648
1649 for (i = 0; i < wdata->nr_pages; i++) {
1650 if (rc != 0)
1651 SetPageError(wdata->pages[i]);
1652 unlock_page(wdata->pages[i]);
1653 }
1654
1655 mapping_set_error(inode->i_mapping, rc);
1656 kref_put(&wdata->refcount, cifs_writedata_release);
1657}
1658
1659static void
1660cifs_writev_complete(struct work_struct *work)
1661{
1662 struct cifs_writedata *wdata = container_of(work,
1663 struct cifs_writedata, work);
1664 struct inode *inode = wdata->cfile->dentry->d_inode;
1665 int i = 0;
1666
1667 if (wdata->result == 0) {
1668 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
1669 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
1670 wdata->bytes);
1671 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
1672 return cifs_writev_requeue(wdata);
1673
1674 for (i = 0; i < wdata->nr_pages; i++) {
1675 struct page *page = wdata->pages[i];
1676 if (wdata->result == -EAGAIN)
1677 __set_page_dirty_nobuffers(page);
1678 else if (wdata->result < 0)
1679 SetPageError(page);
1680 end_page_writeback(page);
1681 page_cache_release(page);
1682 }
1683 if (wdata->result != -EAGAIN)
1684 mapping_set_error(inode->i_mapping, wdata->result);
1685 kref_put(&wdata->refcount, cifs_writedata_release);
1686}
1687
1688struct cifs_writedata *
1689cifs_writedata_alloc(unsigned int nr_pages)
1690{
1691 struct cifs_writedata *wdata;
1692
1693 /* this would overflow */
1694 if (nr_pages == 0) {
1695 cERROR(1, "%s: called with nr_pages == 0!", __func__);
1696 return NULL;
1697 }
1698
1699 /* writedata + number of page pointers */
1700 wdata = kzalloc(sizeof(*wdata) +
1701 sizeof(struct page *) * (nr_pages - 1), GFP_NOFS);
1702 if (wdata != NULL) {
1703 INIT_WORK(&wdata->work, cifs_writev_complete);
1704 kref_init(&wdata->refcount);
1705 }
1706 return wdata;
1707}
1708
1709/*
1710 * Check the midState and signature on received buffer (if any), and queue the
1711 * workqueue completion task.
1712 */
1713static void
1714cifs_writev_callback(struct mid_q_entry *mid)
1715{
1716 struct cifs_writedata *wdata = mid->callback_data;
1717 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
1718 unsigned int written;
1719 WRITE_RSP *smb = (WRITE_RSP *)mid->resp_buf;
1720
1721 switch (mid->midState) {
1722 case MID_RESPONSE_RECEIVED:
1723 wdata->result = cifs_check_receive(mid, tcon->ses->server, 0);
1724 if (wdata->result != 0)
1725 break;
1726
1727 written = le16_to_cpu(smb->CountHigh);
1728 written <<= 16;
1729 written += le16_to_cpu(smb->Count);
1730 /*
1731 * Mask off high 16 bits when bytes written as returned
1732 * by the server is greater than bytes requested by the
1733 * client. OS/2 servers are known to set incorrect
1734 * CountHigh values.
1735 */
1736 if (written > wdata->bytes)
1737 written &= 0xFFFF;
1738
1739 if (written < wdata->bytes)
1740 wdata->result = -ENOSPC;
1741 else
1742 wdata->bytes = written;
1743 break;
1744 case MID_REQUEST_SUBMITTED:
1745 case MID_RETRY_NEEDED:
1746 wdata->result = -EAGAIN;
1747 break;
1748 default:
1749 wdata->result = -EIO;
1750 break;
1751 }
1752
1753 queue_work(system_nrt_wq, &wdata->work);
1754 DeleteMidQEntry(mid);
1755 atomic_dec(&tcon->ses->server->inFlight);
1756 wake_up(&tcon->ses->server->request_q);
1757}
1758
1759/* cifs_async_writev - send an async write, and set up mid to handle result */
1760int
1761cifs_async_writev(struct cifs_writedata *wdata)
1762{
1763 int i, rc = -EACCES;
1764 WRITE_REQ *smb = NULL;
1765 int wct;
1766 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
1767 struct inode *inode = wdata->cfile->dentry->d_inode;
1768 struct kvec *iov = NULL;
1769
1770 if (tcon->ses->capabilities & CAP_LARGE_FILES) {
1771 wct = 14;
1772 } else {
1773 wct = 12;
1774 if (wdata->offset >> 32 > 0) {
1775 /* can not handle big offset for old srv */
1776 return -EIO;
1777 }
1778 }
1779
1780 rc = small_smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **)&smb);
1781 if (rc)
1782 goto async_writev_out;
1783
1784 /* 1 iov per page + 1 for header */
1785 iov = kzalloc((wdata->nr_pages + 1) * sizeof(*iov), GFP_NOFS);
1786 if (iov == NULL) {
1787 rc = -ENOMEM;
1788 goto async_writev_out;
1789 }
1790
1791 smb->hdr.Pid = cpu_to_le16((__u16)wdata->cfile->pid);
1792 smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->cfile->pid >> 16));
1793
1794 smb->AndXCommand = 0xFF; /* none */
1795 smb->Fid = wdata->cfile->netfid;
1796 smb->OffsetLow = cpu_to_le32(wdata->offset & 0xFFFFFFFF);
1797 if (wct == 14)
1798 smb->OffsetHigh = cpu_to_le32(wdata->offset >> 32);
1799 smb->Reserved = 0xFFFFFFFF;
1800 smb->WriteMode = 0;
1801 smb->Remaining = 0;
1802
1803 smb->DataOffset =
1804 cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4);
1805
1806 /* 4 for RFC1001 length + 1 for BCC */
1807 iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1;
1808 iov[0].iov_base = smb;
1809
1810 /* marshal up the pages into iov array */
1811 wdata->bytes = 0;
1812 for (i = 0; i < wdata->nr_pages; i++) {
1813 iov[i + 1].iov_len = min(inode->i_size -
1814 page_offset(wdata->pages[i]),
1815 (loff_t)PAGE_CACHE_SIZE);
1816 iov[i + 1].iov_base = kmap(wdata->pages[i]);
1817 wdata->bytes += iov[i + 1].iov_len;
1818 }
1819
1820 cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
1821
1822 smb->DataLengthLow = cpu_to_le16(wdata->bytes & 0xFFFF);
1823 smb->DataLengthHigh = cpu_to_le16(wdata->bytes >> 16);
1824
1825 if (wct == 14) {
1826 inc_rfc1001_len(&smb->hdr, wdata->bytes + 1);
1827 put_bcc(wdata->bytes + 1, &smb->hdr);
1828 } else {
1829 /* wct == 12 */
1830 struct smb_com_writex_req *smbw =
1831 (struct smb_com_writex_req *)smb;
1832 inc_rfc1001_len(&smbw->hdr, wdata->bytes + 5);
1833 put_bcc(wdata->bytes + 5, &smbw->hdr);
1834 iov[0].iov_len += 4; /* pad bigger by four bytes */
1835 }
1836
1837 kref_get(&wdata->refcount);
1838 rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1,
1839 cifs_writev_callback, wdata, false);
1840
1841 if (rc == 0)
1842 cifs_stats_inc(&tcon->num_writes);
1843 else
1844 kref_put(&wdata->refcount, cifs_writedata_release);
1845
1846 /* send is done, unmap pages */
1847 for (i = 0; i < wdata->nr_pages; i++)
1848 kunmap(wdata->pages[i]);
1849
1850async_writev_out:
1851 cifs_small_buf_release(smb);
1852 kfree(iov);
1853 return rc;
1854}
1855
1605int 1856int
1606CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon, 1857CIFSSMBWrite2(const int xid, struct cifs_io_parms *io_parms,
1607 const int netfid, const unsigned int count, 1858 unsigned int *nbytes, struct kvec *iov, int n_vec,
1608 const __u64 offset, unsigned int *nbytes, struct kvec *iov, 1859 const int long_op)
1609 int n_vec, const int long_op)
1610{ 1860{
1611 int rc = -EACCES; 1861 int rc = -EACCES;
1612 WRITE_REQ *pSMB = NULL; 1862 WRITE_REQ *pSMB = NULL;
1613 int wct; 1863 int wct;
1614 int smb_hdr_len; 1864 int smb_hdr_len;
1615 int resp_buf_type = 0; 1865 int resp_buf_type = 0;
1866 __u32 pid = io_parms->pid;
1867 __u16 netfid = io_parms->netfid;
1868 __u64 offset = io_parms->offset;
1869 struct cifs_tcon *tcon = io_parms->tcon;
1870 unsigned int count = io_parms->length;
1616 1871
1617 *nbytes = 0; 1872 *nbytes = 0;
1618 1873
@@ -1630,6 +1885,10 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1630 rc = small_smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **) &pSMB); 1885 rc = small_smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **) &pSMB);
1631 if (rc) 1886 if (rc)
1632 return rc; 1887 return rc;
1888
1889 pSMB->hdr.Pid = cpu_to_le16((__u16)pid);
1890 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid >> 16));
1891
1633 /* tcon and ses pointer are checked in smb_init */ 1892 /* tcon and ses pointer are checked in smb_init */
1634 if (tcon->ses->server == NULL) 1893 if (tcon->ses->server == NULL)
1635 return -ECONNABORTED; 1894 return -ECONNABORTED;
@@ -1705,7 +1964,7 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1705 1964
1706 1965
1707int 1966int
1708CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, 1967CIFSSMBLock(const int xid, struct cifs_tcon *tcon,
1709 const __u16 smb_file_id, const __u64 len, 1968 const __u16 smb_file_id, const __u64 len,
1710 const __u64 offset, const __u32 numUnlock, 1969 const __u64 offset, const __u32 numUnlock,
1711 const __u32 numLock, const __u8 lockType, 1970 const __u32 numLock, const __u8 lockType,
@@ -1775,7 +2034,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1775} 2034}
1776 2035
1777int 2036int
1778CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, 2037CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon,
1779 const __u16 smb_file_id, const int get_flag, const __u64 len, 2038 const __u16 smb_file_id, const int get_flag, const __u64 len,
1780 struct file_lock *pLockData, const __u16 lock_type, 2039 struct file_lock *pLockData, const __u16 lock_type,
1781 const bool waitFlag) 2040 const bool waitFlag)
@@ -1913,7 +2172,7 @@ plk_err_exit:
1913 2172
1914 2173
1915int 2174int
1916CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id) 2175CIFSSMBClose(const int xid, struct cifs_tcon *tcon, int smb_file_id)
1917{ 2176{
1918 int rc = 0; 2177 int rc = 0;
1919 CLOSE_REQ *pSMB = NULL; 2178 CLOSE_REQ *pSMB = NULL;
@@ -1946,7 +2205,7 @@ CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1946} 2205}
1947 2206
1948int 2207int
1949CIFSSMBFlush(const int xid, struct cifsTconInfo *tcon, int smb_file_id) 2208CIFSSMBFlush(const int xid, struct cifs_tcon *tcon, int smb_file_id)
1950{ 2209{
1951 int rc = 0; 2210 int rc = 0;
1952 FLUSH_REQ *pSMB = NULL; 2211 FLUSH_REQ *pSMB = NULL;
@@ -1967,7 +2226,7 @@ CIFSSMBFlush(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1967} 2226}
1968 2227
1969int 2228int
1970CIFSSMBRename(const int xid, struct cifsTconInfo *tcon, 2229CIFSSMBRename(const int xid, struct cifs_tcon *tcon,
1971 const char *fromName, const char *toName, 2230 const char *fromName, const char *toName,
1972 const struct nls_table *nls_codepage, int remap) 2231 const struct nls_table *nls_codepage, int remap)
1973{ 2232{
@@ -2034,7 +2293,7 @@ renameRetry:
2034 return rc; 2293 return rc;
2035} 2294}
2036 2295
2037int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, 2296int CIFSSMBRenameOpenFile(const int xid, struct cifs_tcon *pTcon,
2038 int netfid, const char *target_name, 2297 int netfid, const char *target_name,
2039 const struct nls_table *nls_codepage, int remap) 2298 const struct nls_table *nls_codepage, int remap)
2040{ 2299{
@@ -2114,7 +2373,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
2114} 2373}
2115 2374
2116int 2375int
2117CIFSSMBCopy(const int xid, struct cifsTconInfo *tcon, const char *fromName, 2376CIFSSMBCopy(const int xid, struct cifs_tcon *tcon, const char *fromName,
2118 const __u16 target_tid, const char *toName, const int flags, 2377 const __u16 target_tid, const char *toName, const int flags,
2119 const struct nls_table *nls_codepage, int remap) 2378 const struct nls_table *nls_codepage, int remap)
2120{ 2379{
@@ -2182,7 +2441,7 @@ copyRetry:
2182} 2441}
2183 2442
2184int 2443int
2185CIFSUnixCreateSymLink(const int xid, struct cifsTconInfo *tcon, 2444CIFSUnixCreateSymLink(const int xid, struct cifs_tcon *tcon,
2186 const char *fromName, const char *toName, 2445 const char *fromName, const char *toName,
2187 const struct nls_table *nls_codepage) 2446 const struct nls_table *nls_codepage)
2188{ 2447{
@@ -2271,7 +2530,7 @@ createSymLinkRetry:
2271} 2530}
2272 2531
2273int 2532int
2274CIFSUnixCreateHardLink(const int xid, struct cifsTconInfo *tcon, 2533CIFSUnixCreateHardLink(const int xid, struct cifs_tcon *tcon,
2275 const char *fromName, const char *toName, 2534 const char *fromName, const char *toName,
2276 const struct nls_table *nls_codepage, int remap) 2535 const struct nls_table *nls_codepage, int remap)
2277{ 2536{
@@ -2356,7 +2615,7 @@ createHardLinkRetry:
2356} 2615}
2357 2616
2358int 2617int
2359CIFSCreateHardLink(const int xid, struct cifsTconInfo *tcon, 2618CIFSCreateHardLink(const int xid, struct cifs_tcon *tcon,
2360 const char *fromName, const char *toName, 2619 const char *fromName, const char *toName,
2361 const struct nls_table *nls_codepage, int remap) 2620 const struct nls_table *nls_codepage, int remap)
2362{ 2621{
@@ -2428,7 +2687,7 @@ winCreateHardLinkRetry:
2428} 2687}
2429 2688
2430int 2689int
2431CIFSSMBUnixQuerySymLink(const int xid, struct cifsTconInfo *tcon, 2690CIFSSMBUnixQuerySymLink(const int xid, struct cifs_tcon *tcon,
2432 const unsigned char *searchName, char **symlinkinfo, 2691 const unsigned char *searchName, char **symlinkinfo,
2433 const struct nls_table *nls_codepage) 2692 const struct nls_table *nls_codepage)
2434{ 2693{
@@ -2533,7 +2792,7 @@ querySymLinkRetry:
2533 * it is not compiled in by default until callers fixed up and more tested. 2792 * it is not compiled in by default until callers fixed up and more tested.
2534 */ 2793 */
2535int 2794int
2536CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, 2795CIFSSMBQueryReparseLinkInfo(const int xid, struct cifs_tcon *tcon,
2537 const unsigned char *searchName, 2796 const unsigned char *searchName,
2538 char *symlinkinfo, const int buflen, __u16 fid, 2797 char *symlinkinfo, const int buflen, __u16 fid,
2539 const struct nls_table *nls_codepage) 2798 const struct nls_table *nls_codepage)
@@ -2771,7 +3030,7 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
2771} 3030}
2772 3031
2773int 3032int
2774CIFSSMBGetPosixACL(const int xid, struct cifsTconInfo *tcon, 3033CIFSSMBGetPosixACL(const int xid, struct cifs_tcon *tcon,
2775 const unsigned char *searchName, 3034 const unsigned char *searchName,
2776 char *acl_inf, const int buflen, const int acl_type, 3035 char *acl_inf, const int buflen, const int acl_type,
2777 const struct nls_table *nls_codepage, int remap) 3036 const struct nls_table *nls_codepage, int remap)
@@ -2859,7 +3118,7 @@ queryAclRetry:
2859} 3118}
2860 3119
2861int 3120int
2862CIFSSMBSetPosixACL(const int xid, struct cifsTconInfo *tcon, 3121CIFSSMBSetPosixACL(const int xid, struct cifs_tcon *tcon,
2863 const unsigned char *fileName, 3122 const unsigned char *fileName,
2864 const char *local_acl, const int buflen, 3123 const char *local_acl, const int buflen,
2865 const int acl_type, 3124 const int acl_type,
@@ -2939,7 +3198,7 @@ setACLerrorExit:
2939 3198
2940/* BB fix tabs in this function FIXME BB */ 3199/* BB fix tabs in this function FIXME BB */
2941int 3200int
2942CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon, 3201CIFSGetExtAttr(const int xid, struct cifs_tcon *tcon,
2943 const int netfid, __u64 *pExtAttrBits, __u64 *pMask) 3202 const int netfid, __u64 *pExtAttrBits, __u64 *pMask)
2944{ 3203{
2945 int rc = 0; 3204 int rc = 0;
@@ -3032,7 +3291,7 @@ GetExtAttrOut:
3032 */ 3291 */
3033static int 3292static int
3034smb_init_nttransact(const __u16 sub_command, const int setup_count, 3293smb_init_nttransact(const __u16 sub_command, const int setup_count,
3035 const int parm_len, struct cifsTconInfo *tcon, 3294 const int parm_len, struct cifs_tcon *tcon,
3036 void **ret_buf) 3295 void **ret_buf)
3037{ 3296{
3038 int rc; 3297 int rc;
@@ -3115,7 +3374,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3115 3374
3116/* Get Security Descriptor (by handle) from remote server for a file or dir */ 3375/* Get Security Descriptor (by handle) from remote server for a file or dir */
3117int 3376int
3118CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, 3377CIFSSMBGetCIFSACL(const int xid, struct cifs_tcon *tcon, __u16 fid,
3119 struct cifs_ntsd **acl_inf, __u32 *pbuflen) 3378 struct cifs_ntsd **acl_inf, __u32 *pbuflen)
3120{ 3379{
3121 int rc = 0; 3380 int rc = 0;
@@ -3207,7 +3466,7 @@ qsec_out:
3207} 3466}
3208 3467
3209int 3468int
3210CIFSSMBSetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, 3469CIFSSMBSetCIFSACL(const int xid, struct cifs_tcon *tcon, __u16 fid,
3211 struct cifs_ntsd *pntsd, __u32 acllen) 3470 struct cifs_ntsd *pntsd, __u32 acllen)
3212{ 3471{
3213 __u16 byte_count, param_count, data_count, param_offset, data_offset; 3472 __u16 byte_count, param_count, data_count, param_offset, data_offset;
@@ -3273,7 +3532,7 @@ setCifsAclRetry:
3273 3532
3274/* Legacy Query Path Information call for lookup to old servers such 3533/* Legacy Query Path Information call for lookup to old servers such
3275 as Win9x/WinME */ 3534 as Win9x/WinME */
3276int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon, 3535int SMBQueryInformation(const int xid, struct cifs_tcon *tcon,
3277 const unsigned char *searchName, 3536 const unsigned char *searchName,
3278 FILE_ALL_INFO *pFinfo, 3537 FILE_ALL_INFO *pFinfo,
3279 const struct nls_table *nls_codepage, int remap) 3538 const struct nls_table *nls_codepage, int remap)
@@ -3341,7 +3600,7 @@ QInfRetry:
3341} 3600}
3342 3601
3343int 3602int
3344CIFSSMBQFileInfo(const int xid, struct cifsTconInfo *tcon, 3603CIFSSMBQFileInfo(const int xid, struct cifs_tcon *tcon,
3345 u16 netfid, FILE_ALL_INFO *pFindData) 3604 u16 netfid, FILE_ALL_INFO *pFindData)
3346{ 3605{
3347 struct smb_t2_qfi_req *pSMB = NULL; 3606 struct smb_t2_qfi_req *pSMB = NULL;
@@ -3408,7 +3667,7 @@ QFileInfoRetry:
3408} 3667}
3409 3668
3410int 3669int
3411CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon, 3670CIFSSMBQPathInfo(const int xid, struct cifs_tcon *tcon,
3412 const unsigned char *searchName, 3671 const unsigned char *searchName,
3413 FILE_ALL_INFO *pFindData, 3672 FILE_ALL_INFO *pFindData,
3414 int legacy /* old style infolevel */, 3673 int legacy /* old style infolevel */,
@@ -3509,7 +3768,7 @@ QPathInfoRetry:
3509} 3768}
3510 3769
3511int 3770int
3512CIFSSMBUnixQFileInfo(const int xid, struct cifsTconInfo *tcon, 3771CIFSSMBUnixQFileInfo(const int xid, struct cifs_tcon *tcon,
3513 u16 netfid, FILE_UNIX_BASIC_INFO *pFindData) 3772 u16 netfid, FILE_UNIX_BASIC_INFO *pFindData)
3514{ 3773{
3515 struct smb_t2_qfi_req *pSMB = NULL; 3774 struct smb_t2_qfi_req *pSMB = NULL;
@@ -3578,7 +3837,7 @@ UnixQFileInfoRetry:
3578} 3837}
3579 3838
3580int 3839int
3581CIFSSMBUnixQPathInfo(const int xid, struct cifsTconInfo *tcon, 3840CIFSSMBUnixQPathInfo(const int xid, struct cifs_tcon *tcon,
3582 const unsigned char *searchName, 3841 const unsigned char *searchName,
3583 FILE_UNIX_BASIC_INFO *pFindData, 3842 FILE_UNIX_BASIC_INFO *pFindData,
3584 const struct nls_table *nls_codepage, int remap) 3843 const struct nls_table *nls_codepage, int remap)
@@ -3664,7 +3923,7 @@ UnixQPathInfoRetry:
3664 3923
3665/* xid, tcon, searchName and codepage are input parms, rest are returned */ 3924/* xid, tcon, searchName and codepage are input parms, rest are returned */
3666int 3925int
3667CIFSFindFirst(const int xid, struct cifsTconInfo *tcon, 3926CIFSFindFirst(const int xid, struct cifs_tcon *tcon,
3668 const char *searchName, 3927 const char *searchName,
3669 const struct nls_table *nls_codepage, 3928 const struct nls_table *nls_codepage,
3670 __u16 *pnetfid, 3929 __u16 *pnetfid,
@@ -3812,7 +4071,7 @@ findFirstRetry:
3812 return rc; 4071 return rc;
3813} 4072}
3814 4073
3815int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, 4074int CIFSFindNext(const int xid, struct cifs_tcon *tcon,
3816 __u16 searchHandle, struct cifs_search_info *psrch_inf) 4075 __u16 searchHandle, struct cifs_search_info *psrch_inf)
3817{ 4076{
3818 TRANSACTION2_FNEXT_REQ *pSMB = NULL; 4077 TRANSACTION2_FNEXT_REQ *pSMB = NULL;
@@ -3950,7 +4209,7 @@ FNext2_err_exit:
3950} 4209}
3951 4210
3952int 4211int
3953CIFSFindClose(const int xid, struct cifsTconInfo *tcon, 4212CIFSFindClose(const int xid, struct cifs_tcon *tcon,
3954 const __u16 searchHandle) 4213 const __u16 searchHandle)
3955{ 4214{
3956 int rc = 0; 4215 int rc = 0;
@@ -3982,7 +4241,7 @@ CIFSFindClose(const int xid, struct cifsTconInfo *tcon,
3982} 4241}
3983 4242
3984int 4243int
3985CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon, 4244CIFSGetSrvInodeNumber(const int xid, struct cifs_tcon *tcon,
3986 const unsigned char *searchName, 4245 const unsigned char *searchName,
3987 __u64 *inode_number, 4246 __u64 *inode_number,
3988 const struct nls_table *nls_codepage, int remap) 4247 const struct nls_table *nls_codepage, int remap)
@@ -4184,7 +4443,7 @@ parse_DFS_referrals_exit:
4184} 4443}
4185 4444
4186int 4445int
4187CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses, 4446CIFSGetDFSRefer(const int xid, struct cifs_ses *ses,
4188 const unsigned char *searchName, 4447 const unsigned char *searchName,
4189 struct dfs_info3_param **target_nodes, 4448 struct dfs_info3_param **target_nodes,
4190 unsigned int *num_of_nodes, 4449 unsigned int *num_of_nodes,
@@ -4233,7 +4492,7 @@ getDFSRetry:
4233 } 4492 }
4234 4493
4235 if (ses->server) { 4494 if (ses->server) {
4236 if (ses->server->secMode & 4495 if (ses->server->sec_mode &
4237 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 4496 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
4238 pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 4497 pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
4239 } 4498 }
@@ -4298,7 +4557,7 @@ GetDFSRefExit:
4298 4557
4299/* Query File System Info such as free space to old servers such as Win 9x */ 4558/* Query File System Info such as free space to old servers such as Win 9x */
4300int 4559int
4301SMBOldQFSInfo(const int xid, struct cifsTconInfo *tcon, struct kstatfs *FSData) 4560SMBOldQFSInfo(const int xid, struct cifs_tcon *tcon, struct kstatfs *FSData)
4302{ 4561{
4303/* level 0x01 SMB_QUERY_FILE_SYSTEM_INFO */ 4562/* level 0x01 SMB_QUERY_FILE_SYSTEM_INFO */
4304 TRANSACTION2_QFSI_REQ *pSMB = NULL; 4563 TRANSACTION2_QFSI_REQ *pSMB = NULL;
@@ -4377,7 +4636,7 @@ oldQFSInfoRetry:
4377} 4636}
4378 4637
4379int 4638int
4380CIFSSMBQFSInfo(const int xid, struct cifsTconInfo *tcon, struct kstatfs *FSData) 4639CIFSSMBQFSInfo(const int xid, struct cifs_tcon *tcon, struct kstatfs *FSData)
4381{ 4640{
4382/* level 0x103 SMB_QUERY_FILE_SYSTEM_INFO */ 4641/* level 0x103 SMB_QUERY_FILE_SYSTEM_INFO */
4383 TRANSACTION2_QFSI_REQ *pSMB = NULL; 4642 TRANSACTION2_QFSI_REQ *pSMB = NULL;
@@ -4456,7 +4715,7 @@ QFSInfoRetry:
4456} 4715}
4457 4716
4458int 4717int
4459CIFSSMBQFSAttributeInfo(const int xid, struct cifsTconInfo *tcon) 4718CIFSSMBQFSAttributeInfo(const int xid, struct cifs_tcon *tcon)
4460{ 4719{
4461/* level 0x105 SMB_QUERY_FILE_SYSTEM_INFO */ 4720/* level 0x105 SMB_QUERY_FILE_SYSTEM_INFO */
4462 TRANSACTION2_QFSI_REQ *pSMB = NULL; 4721 TRANSACTION2_QFSI_REQ *pSMB = NULL;
@@ -4526,7 +4785,7 @@ QFSAttributeRetry:
4526} 4785}
4527 4786
4528int 4787int
4529CIFSSMBQFSDeviceInfo(const int xid, struct cifsTconInfo *tcon) 4788CIFSSMBQFSDeviceInfo(const int xid, struct cifs_tcon *tcon)
4530{ 4789{
4531/* level 0x104 SMB_QUERY_FILE_SYSTEM_INFO */ 4790/* level 0x104 SMB_QUERY_FILE_SYSTEM_INFO */
4532 TRANSACTION2_QFSI_REQ *pSMB = NULL; 4791 TRANSACTION2_QFSI_REQ *pSMB = NULL;
@@ -4597,7 +4856,7 @@ QFSDeviceRetry:
4597} 4856}
4598 4857
4599int 4858int
4600CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon) 4859CIFSSMBQFSUnixInfo(const int xid, struct cifs_tcon *tcon)
4601{ 4860{
4602/* level 0x200 SMB_QUERY_CIFS_UNIX_INFO */ 4861/* level 0x200 SMB_QUERY_CIFS_UNIX_INFO */
4603 TRANSACTION2_QFSI_REQ *pSMB = NULL; 4862 TRANSACTION2_QFSI_REQ *pSMB = NULL;
@@ -4667,7 +4926,7 @@ QFSUnixRetry:
4667} 4926}
4668 4927
4669int 4928int
4670CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, __u64 cap) 4929CIFSSMBSetFSUnixInfo(const int xid, struct cifs_tcon *tcon, __u64 cap)
4671{ 4930{
4672/* level 0x200 SMB_SET_CIFS_UNIX_INFO */ 4931/* level 0x200 SMB_SET_CIFS_UNIX_INFO */
4673 TRANSACTION2_SETFSI_REQ *pSMB = NULL; 4932 TRANSACTION2_SETFSI_REQ *pSMB = NULL;
@@ -4741,7 +5000,7 @@ SETFSUnixRetry:
4741 5000
4742 5001
4743int 5002int
4744CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon, 5003CIFSSMBQFSPosixInfo(const int xid, struct cifs_tcon *tcon,
4745 struct kstatfs *FSData) 5004 struct kstatfs *FSData)
4746{ 5005{
4747/* level 0x201 SMB_QUERY_CIFS_POSIX_INFO */ 5006/* level 0x201 SMB_QUERY_CIFS_POSIX_INFO */
@@ -4834,7 +5093,7 @@ QFSPosixRetry:
4834 in Samba which this routine can run into */ 5093 in Samba which this routine can run into */
4835 5094
4836int 5095int
4837CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, const char *fileName, 5096CIFSSMBSetEOF(const int xid, struct cifs_tcon *tcon, const char *fileName,
4838 __u64 size, bool SetAllocation, 5097 __u64 size, bool SetAllocation,
4839 const struct nls_table *nls_codepage, int remap) 5098 const struct nls_table *nls_codepage, int remap)
4840{ 5099{
@@ -4923,7 +5182,7 @@ SetEOFRetry:
4923} 5182}
4924 5183
4925int 5184int
4926CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, 5185CIFSSMBSetFileSize(const int xid, struct cifs_tcon *tcon, __u64 size,
4927 __u16 fid, __u32 pid_of_opener, bool SetAllocation) 5186 __u16 fid, __u32 pid_of_opener, bool SetAllocation)
4928{ 5187{
4929 struct smb_com_transaction2_sfi_req *pSMB = NULL; 5188 struct smb_com_transaction2_sfi_req *pSMB = NULL;
@@ -5005,7 +5264,7 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
5005 time and resort to the original setpathinfo level which takes the ancient 5264 time and resort to the original setpathinfo level which takes the ancient
5006 DOS time format with 2 second granularity */ 5265 DOS time format with 2 second granularity */
5007int 5266int
5008CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, 5267CIFSSMBSetFileInfo(const int xid, struct cifs_tcon *tcon,
5009 const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener) 5268 const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener)
5010{ 5269{
5011 struct smb_com_transaction2_sfi_req *pSMB = NULL; 5270 struct smb_com_transaction2_sfi_req *pSMB = NULL;
@@ -5067,7 +5326,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5067} 5326}
5068 5327
5069int 5328int
5070CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon, 5329CIFSSMBSetFileDisposition(const int xid, struct cifs_tcon *tcon,
5071 bool delete_file, __u16 fid, __u32 pid_of_opener) 5330 bool delete_file, __u16 fid, __u32 pid_of_opener)
5072{ 5331{
5073 struct smb_com_transaction2_sfi_req *pSMB = NULL; 5332 struct smb_com_transaction2_sfi_req *pSMB = NULL;
@@ -5123,7 +5382,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
5123} 5382}
5124 5383
5125int 5384int
5126CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, 5385CIFSSMBSetPathInfo(const int xid, struct cifs_tcon *tcon,
5127 const char *fileName, const FILE_BASIC_INFO *data, 5386 const char *fileName, const FILE_BASIC_INFO *data,
5128 const struct nls_table *nls_codepage, int remap) 5387 const struct nls_table *nls_codepage, int remap)
5129{ 5388{
@@ -5207,7 +5466,7 @@ SetTimesRetry:
5207 handling it anyway and NT4 was what we thought it would be needed for 5466 handling it anyway and NT4 was what we thought it would be needed for
5208 Do not delete it until we prove whether needed for Win9x though */ 5467 Do not delete it until we prove whether needed for Win9x though */
5209int 5468int
5210CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, char *fileName, 5469CIFSSMBSetAttrLegacy(int xid, struct cifs_tcon *tcon, char *fileName,
5211 __u16 dos_attrs, const struct nls_table *nls_codepage) 5470 __u16 dos_attrs, const struct nls_table *nls_codepage)
5212{ 5471{
5213 SETATTR_REQ *pSMB = NULL; 5472 SETATTR_REQ *pSMB = NULL;
@@ -5295,7 +5554,7 @@ cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
5295} 5554}
5296 5555
5297int 5556int
5298CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon, 5557CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon,
5299 const struct cifs_unix_set_info_args *args, 5558 const struct cifs_unix_set_info_args *args,
5300 u16 fid, u32 pid_of_opener) 5559 u16 fid, u32 pid_of_opener)
5301{ 5560{
@@ -5358,7 +5617,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5358} 5617}
5359 5618
5360int 5619int
5361CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName, 5620CIFSSMBUnixSetPathInfo(const int xid, struct cifs_tcon *tcon, char *fileName,
5362 const struct cifs_unix_set_info_args *args, 5621 const struct cifs_unix_set_info_args *args,
5363 const struct nls_table *nls_codepage, int remap) 5622 const struct nls_table *nls_codepage, int remap)
5364{ 5623{
@@ -5445,7 +5704,7 @@ setPermsRetry:
5445 * the data isn't copied to it, but the length is returned. 5704 * the data isn't copied to it, but the length is returned.
5446 */ 5705 */
5447ssize_t 5706ssize_t
5448CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon, 5707CIFSSMBQAllEAs(const int xid, struct cifs_tcon *tcon,
5449 const unsigned char *searchName, const unsigned char *ea_name, 5708 const unsigned char *searchName, const unsigned char *ea_name,
5450 char *EAData, size_t buf_size, 5709 char *EAData, size_t buf_size,
5451 const struct nls_table *nls_codepage, int remap) 5710 const struct nls_table *nls_codepage, int remap)
@@ -5626,7 +5885,7 @@ QAllEAsOut:
5626} 5885}
5627 5886
5628int 5887int
5629CIFSSMBSetEA(const int xid, struct cifsTconInfo *tcon, const char *fileName, 5888CIFSSMBSetEA(const int xid, struct cifs_tcon *tcon, const char *fileName,
5630 const char *ea_name, const void *ea_value, 5889 const char *ea_name, const void *ea_value,
5631 const __u16 ea_value_len, const struct nls_table *nls_codepage, 5890 const __u16 ea_value_len, const struct nls_table *nls_codepage,
5632 int remap) 5891 int remap)
@@ -5753,7 +6012,7 @@ SetEARetry:
5753 * incompatible for network fs clients, we could instead simply 6012 * incompatible for network fs clients, we could instead simply
5754 * expose this config flag by adding a future cifs (and smb2) notify ioctl. 6013 * expose this config flag by adding a future cifs (and smb2) notify ioctl.
5755 */ 6014 */
5756int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon, 6015int CIFSSMBNotify(const int xid, struct cifs_tcon *tcon,
5757 const int notify_subdirs, const __u16 netfid, 6016 const int notify_subdirs, const __u16 netfid,
5758 __u32 filter, struct file *pfile, int multishot, 6017 __u32 filter, struct file *pfile, int multishot,
5759 const struct nls_table *nls_codepage) 6018 const struct nls_table *nls_codepage)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index da284e3cb653..6d88b82537c3 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -57,62 +57,6 @@
57 57
58extern mempool_t *cifs_req_poolp; 58extern mempool_t *cifs_req_poolp;
59 59
60struct smb_vol {
61 char *username;
62 char *password;
63 char *domainname;
64 char *UNC;
65 char *UNCip;
66 char *iocharset; /* local code page for mapping to and from Unicode */
67 char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
68 char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
69 uid_t cred_uid;
70 uid_t linux_uid;
71 gid_t linux_gid;
72 mode_t file_mode;
73 mode_t dir_mode;
74 unsigned secFlg;
75 bool retry:1;
76 bool intr:1;
77 bool setuids:1;
78 bool override_uid:1;
79 bool override_gid:1;
80 bool dynperm:1;
81 bool noperm:1;
82 bool no_psx_acl:1; /* set if posix acl support should be disabled */
83 bool cifs_acl:1;
84 bool no_xattr:1; /* set if xattr (EA) support should be disabled*/
85 bool server_ino:1; /* use inode numbers from server ie UniqueId */
86 bool direct_io:1;
87 bool strict_io:1; /* strict cache behavior */
88 bool remap:1; /* set to remap seven reserved chars in filenames */
89 bool posix_paths:1; /* unset to not ask for posix pathnames. */
90 bool no_linux_ext:1;
91 bool sfu_emul:1;
92 bool nullauth:1; /* attempt to authenticate with null user */
93 bool nocase:1; /* request case insensitive filenames */
94 bool nobrl:1; /* disable sending byte range locks to srv */
95 bool mand_lock:1; /* send mandatory not posix byte range lock reqs */
96 bool seal:1; /* request transport encryption on share */
97 bool nodfs:1; /* Do not request DFS, even if available */
98 bool local_lease:1; /* check leases only on local system, not remote */
99 bool noblocksnd:1;
100 bool noautotune:1;
101 bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
102 bool fsc:1; /* enable fscache */
103 bool mfsymlinks:1; /* use Minshall+French Symlinks */
104 bool multiuser:1;
105 bool use_smb2:1; /* force smb2 use on mount instead of cifs */
106 unsigned int rsize;
107 unsigned int wsize;
108 bool sockopt_tcp_nodelay:1;
109 unsigned short int port;
110 unsigned long actimeo; /* attribute cache timeout (jiffies) */
111 char *prepath;
112 struct sockaddr_storage srcaddr; /* allow binding to a local IP */
113 struct nls_table *local_nls;
114};
115
116/* FIXME: should these be tunable? */ 60/* FIXME: should these be tunable? */
117#define TLINK_ERROR_EXPIRE (1 * HZ) 61#define TLINK_ERROR_EXPIRE (1 * HZ)
118#define TLINK_IDLE_EXPIRE (600 * HZ) 62#define TLINK_IDLE_EXPIRE (600 * HZ)
@@ -135,9 +79,10 @@ cifs_reconnect(struct TCP_Server_Info *server)
135{ 79{
136 int rc = 0; 80 int rc = 0;
137 struct list_head *tmp, *tmp2; 81 struct list_head *tmp, *tmp2;
138 struct cifsSesInfo *ses; 82 struct cifs_ses *ses;
139 struct cifsTconInfo *tcon; 83 struct cifs_tcon *tcon;
140 struct mid_q_entry *mid_entry; 84 struct mid_q_entry *mid_entry;
85 struct list_head retry_list;
141 86
142 spin_lock(&GlobalMid_Lock); 87 spin_lock(&GlobalMid_Lock);
143 if (server->tcpStatus == CifsExiting) { 88 if (server->tcpStatus == CifsExiting) {
@@ -157,11 +102,11 @@ cifs_reconnect(struct TCP_Server_Info *server)
157 cFYI(1, "%s: marking sessions and tcons for reconnect", __func__); 102 cFYI(1, "%s: marking sessions and tcons for reconnect", __func__);
158 spin_lock(&cifs_tcp_ses_lock); 103 spin_lock(&cifs_tcp_ses_lock);
159 list_for_each(tmp, &server->smb_ses_list) { 104 list_for_each(tmp, &server->smb_ses_list) {
160 ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); 105 ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
161 ses->need_reconnect = true; 106 ses->need_reconnect = true;
162 ses->ipc_tid = 0; 107 ses->ipc_tid = 0;
163 list_for_each(tmp2, &ses->tcon_list) { 108 list_for_each(tmp2, &ses->tcon_list) {
164 tcon = list_entry(tmp2, struct cifsTconInfo, tcon_list); 109 tcon = list_entry(tmp2, struct cifs_tcon, tcon_list);
165 tcon->need_reconnect = true; 110 tcon->need_reconnect = true;
166 } 111 }
167 } 112 }
@@ -189,16 +134,23 @@ cifs_reconnect(struct TCP_Server_Info *server)
189 mutex_unlock(&server->srv_mutex); 134 mutex_unlock(&server->srv_mutex);
190 135
191 /* mark submitted MIDs for retry and issue callback */ 136 /* mark submitted MIDs for retry and issue callback */
192 cFYI(1, "%s: issuing mid callbacks", __func__); 137 INIT_LIST_HEAD(&retry_list);
138 cFYI(1, "%s: moving mids to private list", __func__);
193 spin_lock(&GlobalMid_Lock); 139 spin_lock(&GlobalMid_Lock);
194 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { 140 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
195 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 141 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
196 if (mid_entry->midState == MID_REQUEST_SUBMITTED) 142 if (mid_entry->midState == MID_REQUEST_SUBMITTED)
197 mid_entry->midState = MID_RETRY_NEEDED; 143 mid_entry->midState = MID_RETRY_NEEDED;
144 list_move(&mid_entry->qhead, &retry_list);
145 }
146 spin_unlock(&GlobalMid_Lock);
147
148 cFYI(1, "%s: issuing mid callbacks", __func__);
149 list_for_each_safe(tmp, tmp2, &retry_list) {
150 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
198 list_del_init(&mid_entry->qhead); 151 list_del_init(&mid_entry->qhead);
199 mid_entry->callback(mid_entry); 152 mid_entry->callback(mid_entry);
200 } 153 }
201 spin_unlock(&GlobalMid_Lock);
202 154
203 while (server->tcpStatus == CifsNeedReconnect) { 155 while (server->tcpStatus == CifsNeedReconnect) {
204 try_to_freeze(); 156 try_to_freeze();
@@ -672,12 +624,12 @@ multi_t2_fnd:
672 mid_entry->when_received = jiffies; 624 mid_entry->when_received = jiffies;
673#endif 625#endif
674 list_del_init(&mid_entry->qhead); 626 list_del_init(&mid_entry->qhead);
675 mid_entry->callback(mid_entry);
676 break; 627 break;
677 } 628 }
678 spin_unlock(&GlobalMid_Lock); 629 spin_unlock(&GlobalMid_Lock);
679 630
680 if (mid_entry != NULL) { 631 if (mid_entry != NULL) {
632 mid_entry->callback(mid_entry);
681 /* Was previous buf put in mpx struct for multi-rsp? */ 633 /* Was previous buf put in mpx struct for multi-rsp? */
682 if (!isMultiRsp) { 634 if (!isMultiRsp) {
683 /* smb buffer will be freed by user thread */ 635 /* smb buffer will be freed by user thread */
@@ -741,15 +693,25 @@ multi_t2_fnd:
741 cifs_small_buf_release(smallbuf); 693 cifs_small_buf_release(smallbuf);
742 694
743 if (!list_empty(&server->pending_mid_q)) { 695 if (!list_empty(&server->pending_mid_q)) {
696 struct list_head dispose_list;
697
698 INIT_LIST_HEAD(&dispose_list);
744 spin_lock(&GlobalMid_Lock); 699 spin_lock(&GlobalMid_Lock);
745 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { 700 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
746 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 701 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
747 cFYI(1, "Clearing Mid 0x%x - issuing callback", 702 cFYI(1, "Clearing mid 0x%x", mid_entry->mid);
748 mid_entry->mid); 703 mid_entry->midState = MID_SHUTDOWN;
704 list_move(&mid_entry->qhead, &dispose_list);
705 }
706 spin_unlock(&GlobalMid_Lock);
707
708 /* now walk dispose list and issue callbacks */
709 list_for_each_safe(tmp, tmp2, &dispose_list) {
710 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
711 cFYI(1, "Callback mid 0x%x", mid_entry->mid);
749 list_del_init(&mid_entry->qhead); 712 list_del_init(&mid_entry->qhead);
750 mid_entry->callback(mid_entry); 713 mid_entry->callback(mid_entry);
751 } 714 }
752 spin_unlock(&GlobalMid_Lock);
753 /* 1/8th of sec is more than enough time for them to exit */ 715 /* 1/8th of sec is more than enough time for them to exit */
754 msleep(125); 716 msleep(125);
755 } 717 }
@@ -1062,13 +1024,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1062 (strnicmp(value, "1", 1) == 0)) { 1024 (strnicmp(value, "1", 1) == 0)) {
1063 /* this is the default */ 1025 /* this is the default */
1064 continue; 1026 continue;
1065 } else if ((strnicmp(value, "smb2", 4) == 0) ||
1066 (strnicmp(value, "2", 1) == 0)) {
1067#ifdef CONFIG_CIFS_SMB2
1068 vol->use_smb2 = true;
1069#else
1070 cERROR(1, "smb2 support not enabled");
1071#endif /* CONFIG_CIFS_SMB2 */
1072 } 1027 }
1073 } else if ((strnicmp(data, "unc", 3) == 0) 1028 } else if ((strnicmp(data, "unc", 3) == 0)
1074 || (strnicmp(data, "target", 6) == 0) 1029 || (strnicmp(data, "target", 6) == 0)
@@ -1404,6 +1359,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1404 vol->server_ino = 1; 1359 vol->server_ino = 1;
1405 } else if (strnicmp(data, "noserverino", 9) == 0) { 1360 } else if (strnicmp(data, "noserverino", 9) == 0) {
1406 vol->server_ino = 0; 1361 vol->server_ino = 0;
1362 } else if (strnicmp(data, "rwpidforward", 4) == 0) {
1363 vol->rwpidforward = 1;
1407 } else if (strnicmp(data, "cifsacl", 7) == 0) { 1364 } else if (strnicmp(data, "cifsacl", 7) == 0) {
1408 vol->cifs_acl = 1; 1365 vol->cifs_acl = 1;
1409 } else if (strnicmp(data, "nocifsacl", 9) == 0) { 1366 } else if (strnicmp(data, "nocifsacl", 9) == 0) {
@@ -1640,16 +1597,35 @@ match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
1640 1597
1641 /* now check if signing mode is acceptable */ 1598 /* now check if signing mode is acceptable */
1642 if ((secFlags & CIFSSEC_MAY_SIGN) == 0 && 1599 if ((secFlags & CIFSSEC_MAY_SIGN) == 0 &&
1643 (server->secMode & SECMODE_SIGN_REQUIRED)) 1600 (server->sec_mode & SECMODE_SIGN_REQUIRED))
1644 return false; 1601 return false;
1645 else if (((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) && 1602 else if (((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) &&
1646 (server->secMode & 1603 (server->sec_mode &
1647 (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)) == 0) 1604 (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)) == 0)
1648 return false; 1605 return false;
1649 1606
1650 return true; 1607 return true;
1651} 1608}
1652 1609
1610static int match_server(struct TCP_Server_Info *server, struct sockaddr *addr,
1611 struct smb_vol *vol)
1612{
1613 if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns))
1614 return 0;
1615
1616 if (!match_address(server, addr,
1617 (struct sockaddr *)&vol->srcaddr))
1618 return 0;
1619
1620 if (!match_port(server, addr))
1621 return 0;
1622
1623 if (!match_security(server, vol))
1624 return 0;
1625
1626 return 1;
1627}
1628
1653static struct TCP_Server_Info * 1629static struct TCP_Server_Info *
1654cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol) 1630cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
1655{ 1631{
@@ -1657,17 +1633,7 @@ cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
1657 1633
1658 spin_lock(&cifs_tcp_ses_lock); 1634 spin_lock(&cifs_tcp_ses_lock);
1659 list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { 1635 list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
1660 if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns)) 1636 if (!match_server(server, addr, vol))
1661 continue;
1662
1663 if (!match_address(server, addr,
1664 (struct sockaddr *)&vol->srcaddr))
1665 continue;
1666
1667 if (!match_port(server, addr))
1668 continue;
1669
1670 if (!match_security(server, vol))
1671 continue; 1637 continue;
1672 1638
1673 ++server->srv_count; 1639 ++server->srv_count;
@@ -1861,32 +1827,39 @@ out_err:
1861 return ERR_PTR(rc); 1827 return ERR_PTR(rc);
1862} 1828}
1863 1829
1864static struct cifsSesInfo * 1830static int match_session(struct cifs_ses *ses, struct smb_vol *vol)
1831{
1832 switch (ses->server->secType) {
1833 case Kerberos:
1834 if (vol->cred_uid != ses->cred_uid)
1835 return 0;
1836 break;
1837 default:
1838 /* anything else takes username/password */
1839 if (ses->user_name == NULL)
1840 return 0;
1841 if (strncmp(ses->user_name, vol->username,
1842 MAX_USERNAME_SIZE))
1843 return 0;
1844 if (strlen(vol->username) != 0 &&
1845 ses->password != NULL &&
1846 strncmp(ses->password,
1847 vol->password ? vol->password : "",
1848 MAX_PASSWORD_SIZE))
1849 return 0;
1850 }
1851 return 1;
1852}
1853
1854static struct cifs_ses *
1865cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) 1855cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
1866{ 1856{
1867 struct cifsSesInfo *ses; 1857 struct cifs_ses *ses;
1868 1858
1869 spin_lock(&cifs_tcp_ses_lock); 1859 spin_lock(&cifs_tcp_ses_lock);
1870 list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { 1860 list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
1871 switch (server->secType) { 1861 if (!match_session(ses, vol))
1872 case Kerberos: 1862 continue;
1873 if (vol->cred_uid != ses->cred_uid)
1874 continue;
1875 break;
1876 default:
1877 /* anything else takes username/password */
1878 if (ses->user_name == NULL)
1879 continue;
1880 if (strncmp(ses->user_name, vol->username,
1881 MAX_USERNAME_SIZE))
1882 continue;
1883 if (strlen(vol->username) != 0 &&
1884 ses->password != NULL &&
1885 strncmp(ses->password,
1886 vol->password ? vol->password : "",
1887 MAX_PASSWORD_SIZE))
1888 continue;
1889 }
1890 ++ses->ses_count; 1863 ++ses->ses_count;
1891 spin_unlock(&cifs_tcp_ses_lock); 1864 spin_unlock(&cifs_tcp_ses_lock);
1892 return ses; 1865 return ses;
@@ -1896,7 +1869,7 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
1896} 1869}
1897 1870
1898static void 1871static void
1899cifs_put_smb_ses(struct cifsSesInfo *ses) 1872cifs_put_smb_ses(struct cifs_ses *ses)
1900{ 1873{
1901 int xid; 1874 int xid;
1902 struct TCP_Server_Info *server = ses->server; 1875 struct TCP_Server_Info *server = ses->server;
@@ -1922,11 +1895,11 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
1922 1895
1923static bool warned_on_ntlm; /* globals init to false automatically */ 1896static bool warned_on_ntlm; /* globals init to false automatically */
1924 1897
1925static struct cifsSesInfo * 1898static struct cifs_ses *
1926cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) 1899cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1927{ 1900{
1928 int rc = -ENOMEM, xid; 1901 int rc = -ENOMEM, xid;
1929 struct cifsSesInfo *ses; 1902 struct cifs_ses *ses;
1930 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; 1903 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
1931 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; 1904 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
1932 1905
@@ -2029,20 +2002,26 @@ get_ses_fail:
2029 return ERR_PTR(rc); 2002 return ERR_PTR(rc);
2030} 2003}
2031 2004
2032static struct cifsTconInfo * 2005static int match_tcon(struct cifs_tcon *tcon, const char *unc)
2033cifs_find_tcon(struct cifsSesInfo *ses, const char *unc) 2006{
2007 if (tcon->tidStatus == CifsExiting)
2008 return 0;
2009 if (strncmp(tcon->treeName, unc, MAX_TREE_SIZE))
2010 return 0;
2011 return 1;
2012}
2013
2014static struct cifs_tcon *
2015cifs_find_tcon(struct cifs_ses *ses, const char *unc)
2034{ 2016{
2035 struct list_head *tmp; 2017 struct list_head *tmp;
2036 struct cifsTconInfo *tcon; 2018 struct cifs_tcon *tcon;
2037 2019
2038 spin_lock(&cifs_tcp_ses_lock); 2020 spin_lock(&cifs_tcp_ses_lock);
2039 list_for_each(tmp, &ses->tcon_list) { 2021 list_for_each(tmp, &ses->tcon_list) {
2040 tcon = list_entry(tmp, struct cifsTconInfo, tcon_list); 2022 tcon = list_entry(tmp, struct cifs_tcon, tcon_list);
2041 if (tcon->tidStatus == CifsExiting) 2023 if (!match_tcon(tcon, unc))
2042 continue;
2043 if (strncmp(tcon->treeName, unc, MAX_TREE_SIZE))
2044 continue; 2024 continue;
2045
2046 ++tcon->tc_count; 2025 ++tcon->tc_count;
2047 spin_unlock(&cifs_tcp_ses_lock); 2026 spin_unlock(&cifs_tcp_ses_lock);
2048 return tcon; 2027 return tcon;
@@ -2052,10 +2031,10 @@ cifs_find_tcon(struct cifsSesInfo *ses, const char *unc)
2052} 2031}
2053 2032
2054static void 2033static void
2055cifs_put_tcon(struct cifsTconInfo *tcon) 2034cifs_put_tcon(struct cifs_tcon *tcon)
2056{ 2035{
2057 int xid; 2036 int xid;
2058 struct cifsSesInfo *ses = tcon->ses; 2037 struct cifs_ses *ses = tcon->ses;
2059 2038
2060 cFYI(1, "%s: tc_count=%d\n", __func__, tcon->tc_count); 2039 cFYI(1, "%s: tc_count=%d\n", __func__, tcon->tc_count);
2061 spin_lock(&cifs_tcp_ses_lock); 2040 spin_lock(&cifs_tcp_ses_lock);
@@ -2076,11 +2055,11 @@ cifs_put_tcon(struct cifsTconInfo *tcon)
2076 cifs_put_smb_ses(ses); 2055 cifs_put_smb_ses(ses);
2077} 2056}
2078 2057
2079static struct cifsTconInfo * 2058static struct cifs_tcon *
2080cifs_get_tcon(struct cifsSesInfo *ses, struct smb_vol *volume_info) 2059cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
2081{ 2060{
2082 int rc, xid; 2061 int rc, xid;
2083 struct cifsTconInfo *tcon; 2062 struct cifs_tcon *tcon;
2084 2063
2085 tcon = cifs_find_tcon(ses, volume_info->UNC); 2064 tcon = cifs_find_tcon(ses, volume_info->UNC);
2086 if (tcon) { 2065 if (tcon) {
@@ -2169,8 +2148,102 @@ cifs_put_tlink(struct tcon_link *tlink)
2169 return; 2148 return;
2170} 2149}
2171 2150
2151static inline struct tcon_link *
2152cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb);
2153
2154static int
2155compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
2156{
2157 struct cifs_sb_info *old = CIFS_SB(sb);
2158 struct cifs_sb_info *new = mnt_data->cifs_sb;
2159
2160 if ((sb->s_flags & CIFS_MS_MASK) != (mnt_data->flags & CIFS_MS_MASK))
2161 return 0;
2162
2163 if ((old->mnt_cifs_flags & CIFS_MOUNT_MASK) !=
2164 (new->mnt_cifs_flags & CIFS_MOUNT_MASK))
2165 return 0;
2166
2167 if (old->rsize != new->rsize)
2168 return 0;
2169
2170 /*
2171 * We want to share sb only if we don't specify wsize or specified wsize
2172 * is greater or equal than existing one.
2173 */
2174 if (new->wsize && new->wsize < old->wsize)
2175 return 0;
2176
2177 if (old->mnt_uid != new->mnt_uid || old->mnt_gid != new->mnt_gid)
2178 return 0;
2179
2180 if (old->mnt_file_mode != new->mnt_file_mode ||
2181 old->mnt_dir_mode != new->mnt_dir_mode)
2182 return 0;
2183
2184 if (strcmp(old->local_nls->charset, new->local_nls->charset))
2185 return 0;
2186
2187 if (old->actimeo != new->actimeo)
2188 return 0;
2189
2190 return 1;
2191}
2192
2172int 2193int
2173get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, 2194cifs_match_super(struct super_block *sb, void *data)
2195{
2196 struct cifs_mnt_data *mnt_data = (struct cifs_mnt_data *)data;
2197 struct smb_vol *volume_info;
2198 struct cifs_sb_info *cifs_sb;
2199 struct TCP_Server_Info *tcp_srv;
2200 struct cifs_ses *ses;
2201 struct cifs_tcon *tcon;
2202 struct tcon_link *tlink;
2203 struct sockaddr_storage addr;
2204 int rc = 0;
2205
2206 memset(&addr, 0, sizeof(struct sockaddr_storage));
2207
2208 spin_lock(&cifs_tcp_ses_lock);
2209 cifs_sb = CIFS_SB(sb);
2210 tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
2211 if (IS_ERR(tlink)) {
2212 spin_unlock(&cifs_tcp_ses_lock);
2213 return rc;
2214 }
2215 tcon = tlink_tcon(tlink);
2216 ses = tcon->ses;
2217 tcp_srv = ses->server;
2218
2219 volume_info = mnt_data->vol;
2220
2221 if (!volume_info->UNCip || !volume_info->UNC)
2222 goto out;
2223
2224 rc = cifs_fill_sockaddr((struct sockaddr *)&addr,
2225 volume_info->UNCip,
2226 strlen(volume_info->UNCip),
2227 volume_info->port);
2228 if (!rc)
2229 goto out;
2230
2231 if (!match_server(tcp_srv, (struct sockaddr *)&addr, volume_info) ||
2232 !match_session(ses, volume_info) ||
2233 !match_tcon(tcon, volume_info->UNC)) {
2234 rc = 0;
2235 goto out;
2236 }
2237
2238 rc = compare_mount_options(sb, mnt_data);
2239out:
2240 cifs_put_tlink(tlink);
2241 spin_unlock(&cifs_tcp_ses_lock);
2242 return rc;
2243}
2244
2245int
2246get_dfs_path(int xid, struct cifs_ses *pSesInfo, const char *old_path,
2174 const struct nls_table *nls_codepage, unsigned int *pnum_referrals, 2247 const struct nls_table *nls_codepage, unsigned int *pnum_referrals,
2175 struct dfs_info3_param **preferrals, int remap) 2248 struct dfs_info3_param **preferrals, int remap)
2176{ 2249{
@@ -2469,7 +2542,7 @@ ip_connect(struct TCP_Server_Info *server)
2469 return generic_ip_connect(server); 2542 return generic_ip_connect(server);
2470} 2543}
2471 2544
2472void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon, 2545void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon,
2473 struct super_block *sb, struct smb_vol *vol_info) 2546 struct super_block *sb, struct smb_vol *vol_info)
2474{ 2547{
2475 /* if we are reconnecting then should we check to see if 2548 /* if we are reconnecting then should we check to see if
@@ -2498,7 +2571,7 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2498 2571
2499 if (!CIFSSMBQFSUnixInfo(xid, tcon)) { 2572 if (!CIFSSMBQFSUnixInfo(xid, tcon)) {
2500 __u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability); 2573 __u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
2501 2574 cFYI(1, "unix caps which server supports %lld", cap);
2502 /* check for reconnect case in which we do not 2575 /* check for reconnect case in which we do not
2503 want to change the mount behavior if we can avoid it */ 2576 want to change the mount behavior if we can avoid it */
2504 if (vol_info == NULL) { 2577 if (vol_info == NULL) {
@@ -2516,6 +2589,9 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2516 } 2589 }
2517 } 2590 }
2518 2591
2592 if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)
2593 cERROR(1, "per-share encryption not supported yet");
2594
2519 cap &= CIFS_UNIX_CAP_MASK; 2595 cap &= CIFS_UNIX_CAP_MASK;
2520 if (vol_info && vol_info->no_psx_acl) 2596 if (vol_info && vol_info->no_psx_acl)
2521 cap &= ~CIFS_UNIX_POSIX_ACL_CAP; 2597 cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
@@ -2534,12 +2610,6 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2534 CIFS_MOUNT_POSIX_PATHS; 2610 CIFS_MOUNT_POSIX_PATHS;
2535 } 2611 }
2536 2612
2537 /* We might be setting the path sep back to a different
2538 form if we are reconnecting and the server switched its
2539 posix path capability for this share */
2540 if (sb && (CIFS_SB(sb)->prepathlen > 0))
2541 CIFS_SB(sb)->prepath[0] = CIFS_DIR_SEP(CIFS_SB(sb));
2542
2543 if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) { 2613 if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) {
2544 if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) { 2614 if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) {
2545 CIFS_SB(sb)->rsize = 127 * 1024; 2615 CIFS_SB(sb)->rsize = 127 * 1024;
@@ -2564,6 +2634,10 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2564 cFYI(1, "very large read cap"); 2634 cFYI(1, "very large read cap");
2565 if (cap & CIFS_UNIX_LARGE_WRITE_CAP) 2635 if (cap & CIFS_UNIX_LARGE_WRITE_CAP)
2566 cFYI(1, "very large write cap"); 2636 cFYI(1, "very large write cap");
2637 if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_CAP)
2638 cFYI(1, "transport encryption cap");
2639 if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)
2640 cFYI(1, "mandatory transport encryption cap");
2567#endif /* CIFS_DEBUG2 */ 2641#endif /* CIFS_DEBUG2 */
2568 if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { 2642 if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
2569 if (vol_info == NULL) { 2643 if (vol_info == NULL) {
@@ -2580,28 +2654,8 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2580 } 2654 }
2581} 2655}
2582 2656
2583static void 2657void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
2584convert_delimiter(char *path, char delim) 2658 struct cifs_sb_info *cifs_sb)
2585{
2586 int i;
2587 char old_delim;
2588
2589 if (path == NULL)
2590 return;
2591
2592 if (delim == '/')
2593 old_delim = '\\';
2594 else
2595 old_delim = '/';
2596
2597 for (i = 0; path[i] != '\0'; i++) {
2598 if (path[i] == old_delim)
2599 path[i] = delim;
2600 }
2601}
2602
2603static void setup_cifs_sb(struct smb_vol *pvolume_info,
2604 struct cifs_sb_info *cifs_sb)
2605{ 2659{
2606 INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); 2660 INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks);
2607 2661
@@ -2615,40 +2669,19 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2615 else /* default */ 2669 else /* default */
2616 cifs_sb->rsize = CIFSMaxBufSize; 2670 cifs_sb->rsize = CIFSMaxBufSize;
2617 2671
2618 if (pvolume_info->wsize > PAGEVEC_SIZE * PAGE_CACHE_SIZE) {
2619 cERROR(1, "wsize %d too large, using 4096 instead",
2620 pvolume_info->wsize);
2621 cifs_sb->wsize = 4096;
2622 } else if (pvolume_info->wsize)
2623 cifs_sb->wsize = pvolume_info->wsize;
2624 else
2625 cifs_sb->wsize = min_t(const int,
2626 PAGEVEC_SIZE * PAGE_CACHE_SIZE,
2627 127*1024);
2628 /* old default of CIFSMaxBufSize was too small now
2629 that SMB Write2 can send multiple pages in kvec.
2630 RFC1001 does not describe what happens when frame
2631 bigger than 128K is sent so use that as max in
2632 conjunction with 52K kvec constraint on arch with 4K
2633 page size */
2634
2635 if (cifs_sb->rsize < 2048) { 2672 if (cifs_sb->rsize < 2048) {
2636 cifs_sb->rsize = 2048; 2673 cifs_sb->rsize = 2048;
2637 /* Windows ME may prefer this */ 2674 /* Windows ME may prefer this */
2638 cFYI(1, "readsize set to minimum: 2048"); 2675 cFYI(1, "readsize set to minimum: 2048");
2639 } 2676 }
2640 /* calculate prepath */ 2677
2641 cifs_sb->prepath = pvolume_info->prepath; 2678 /*
2642 if (cifs_sb->prepath) { 2679 * Temporarily set wsize for matching superblock. If we end up using
2643 cifs_sb->prepathlen = strlen(cifs_sb->prepath); 2680 * new sb then cifs_negotiate_wsize will later negotiate it downward
2644 /* we can not convert the / to \ in the path 2681 * if needed.
2645 separators in the prefixpath yet because we do not 2682 */
2646 know (until reset_cifs_unix_caps is called later) 2683 cifs_sb->wsize = pvolume_info->wsize;
2647 whether POSIX PATH CAP is available. We normalize 2684
2648 the / to \ after reset_cifs_unix_caps is called */
2649 pvolume_info->prepath = NULL;
2650 } else
2651 cifs_sb->prepathlen = 0;
2652 cifs_sb->mnt_uid = pvolume_info->linux_uid; 2685 cifs_sb->mnt_uid = pvolume_info->linux_uid;
2653 cifs_sb->mnt_gid = pvolume_info->linux_gid; 2686 cifs_sb->mnt_gid = pvolume_info->linux_gid;
2654 cifs_sb->mnt_file_mode = pvolume_info->file_mode; 2687 cifs_sb->mnt_file_mode = pvolume_info->file_mode;
@@ -2657,6 +2690,7 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2657 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); 2690 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
2658 2691
2659 cifs_sb->actimeo = pvolume_info->actimeo; 2692 cifs_sb->actimeo = pvolume_info->actimeo;
2693 cifs_sb->local_nls = pvolume_info->local_nls;
2660 2694
2661 if (pvolume_info->noperm) 2695 if (pvolume_info->noperm)
2662 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; 2696 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
@@ -2676,6 +2710,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2676 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC; 2710 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC;
2677 if (pvolume_info->mand_lock) 2711 if (pvolume_info->mand_lock)
2678 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL; 2712 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL;
2713 if (pvolume_info->rwpidforward)
2714 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD;
2679 if (pvolume_info->cifs_acl) 2715 if (pvolume_info->cifs_acl)
2680 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; 2716 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
2681 if (pvolume_info->override_uid) 2717 if (pvolume_info->override_uid)
@@ -2709,8 +2745,55 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2709 "mount option supported"); 2745 "mount option supported");
2710} 2746}
2711 2747
2748/*
2749 * When the server supports very large writes via POSIX extensions, we can
2750 * allow up to 2^24 - PAGE_CACHE_SIZE.
2751 *
2752 * Note that this might make for "interesting" allocation problems during
2753 * writeback however (as we have to allocate an array of pointers for the
2754 * pages). A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096.
2755 */
2756#define CIFS_MAX_WSIZE ((1<<24) - PAGE_CACHE_SIZE)
2757
2758/*
2759 * When the server doesn't allow large posix writes, default to a wsize of
2760 * 128k - PAGE_CACHE_SIZE -- one page less than the largest frame size
2761 * described in RFC1001. This allows space for the header without going over
2762 * that by default.
2763 */
2764#define CIFS_MAX_RFC1001_WSIZE (128 * 1024 - PAGE_CACHE_SIZE)
2765
2766/*
2767 * The default wsize is 1M. find_get_pages seems to return a maximum of 256
2768 * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill
2769 * a single wsize request with a single call.
2770 */
2771#define CIFS_DEFAULT_WSIZE (1024 * 1024)
2772
2773static unsigned int
2774cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
2775{
2776 __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
2777 struct TCP_Server_Info *server = tcon->ses->server;
2778 unsigned int wsize = pvolume_info->wsize ? pvolume_info->wsize :
2779 CIFS_DEFAULT_WSIZE;
2780
2781 /* can server support 24-bit write sizes? (via UNIX extensions) */
2782 if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
2783 wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1001_WSIZE);
2784
2785 /* no CAP_LARGE_WRITE_X? Limit it to 16 bits */
2786 if (!(server->capabilities & CAP_LARGE_WRITE_X))
2787 wsize = min_t(unsigned int, wsize, USHRT_MAX);
2788
2789 /* hard limit of CIFS_MAX_WSIZE */
2790 wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
2791
2792 return wsize;
2793}
2794
2712static int 2795static int
2713is_path_accessible(int xid, struct cifsTconInfo *tcon, 2796is_path_accessible(int xid, struct cifs_tcon *tcon,
2714 struct cifs_sb_info *cifs_sb, const char *full_path) 2797 struct cifs_sb_info *cifs_sb, const char *full_path)
2715{ 2798{
2716 int rc; 2799 int rc;
@@ -2733,8 +2816,8 @@ is_path_accessible(int xid, struct cifsTconInfo *tcon,
2733 return rc; 2816 return rc;
2734} 2817}
2735 2818
2736static void 2819void
2737cleanup_volume_info(struct smb_vol **pvolume_info) 2820cifs_cleanup_volume_info(struct smb_vol **pvolume_info)
2738{ 2821{
2739 struct smb_vol *volume_info; 2822 struct smb_vol *volume_info;
2740 2823
@@ -2764,24 +2847,13 @@ build_unc_path_to_root(const struct smb_vol *volume_info,
2764 char *full_path; 2847 char *full_path;
2765 2848
2766 int unc_len = strnlen(volume_info->UNC, MAX_TREE_SIZE + 1); 2849 int unc_len = strnlen(volume_info->UNC, MAX_TREE_SIZE + 1);
2767 full_path = kmalloc(unc_len + cifs_sb->prepathlen + 1, GFP_KERNEL); 2850 full_path = kmalloc(unc_len + 1, GFP_KERNEL);
2768 if (full_path == NULL) 2851 if (full_path == NULL)
2769 return ERR_PTR(-ENOMEM); 2852 return ERR_PTR(-ENOMEM);
2770 2853
2771 strncpy(full_path, volume_info->UNC, unc_len); 2854 strncpy(full_path, volume_info->UNC, unc_len);
2772 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) { 2855 full_path[unc_len] = 0; /* add trailing null */
2773 int i; 2856 convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb));
2774 for (i = 0; i < unc_len; i++) {
2775 if (full_path[i] == '\\')
2776 full_path[i] = '/';
2777 }
2778 }
2779
2780 if (cifs_sb->prepathlen)
2781 strncpy(full_path + unc_len, cifs_sb->prepath,
2782 cifs_sb->prepathlen);
2783
2784 full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */
2785 return full_path; 2857 return full_path;
2786} 2858}
2787 2859
@@ -2796,7 +2868,7 @@ build_unc_path_to_root(const struct smb_vol *volume_info,
2796 * determine whether there were referrals. 2868 * determine whether there were referrals.
2797 */ 2869 */
2798static int 2870static int
2799expand_dfs_referral(int xid, struct cifsSesInfo *pSesInfo, 2871expand_dfs_referral(int xid, struct cifs_ses *pSesInfo,
2800 struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb, 2872 struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb,
2801 int check_prefix) 2873 int check_prefix)
2802{ 2874{
@@ -2840,40 +2912,13 @@ expand_dfs_referral(int xid, struct cifsSesInfo *pSesInfo,
2840} 2912}
2841#endif 2913#endif
2842 2914
2843int 2915int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data,
2844cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2916 const char *devname)
2845 const char *devname)
2846{ 2917{
2847 int rc;
2848 int xid;
2849 struct smb_vol *volume_info; 2918 struct smb_vol *volume_info;
2850 struct cifsSesInfo *pSesInfo; 2919 int rc = 0;
2851 struct cifsTconInfo *tcon;
2852 struct TCP_Server_Info *srvTcp;
2853 char *full_path;
2854 struct tcon_link *tlink;
2855#ifdef CONFIG_CIFS_DFS_UPCALL
2856 int referral_walks_count = 0;
2857try_mount_again:
2858 /* cleanup activities if we're chasing a referral */
2859 if (referral_walks_count) {
2860 if (tcon)
2861 cifs_put_tcon(tcon);
2862 else if (pSesInfo)
2863 cifs_put_smb_ses(pSesInfo);
2864
2865 cleanup_volume_info(&volume_info);
2866 FreeXid(xid);
2867 }
2868#endif
2869 rc = 0;
2870 tcon = NULL;
2871 pSesInfo = NULL;
2872 srvTcp = NULL;
2873 full_path = NULL;
2874 tlink = NULL;
2875 2920
2876 xid = GetXid(); 2921 *pvolume_info = NULL;
2877 2922
2878 volume_info = kzalloc(sizeof(struct smb_vol), GFP_KERNEL); 2923 volume_info = kzalloc(sizeof(struct smb_vol), GFP_KERNEL);
2879 if (!volume_info) { 2924 if (!volume_info) {
@@ -2881,7 +2926,7 @@ try_mount_again:
2881 goto out; 2926 goto out;
2882 } 2927 }
2883 2928
2884 if (cifs_parse_mount_options(cifs_sb->mountdata, devname, 2929 if (cifs_parse_mount_options(mount_data, devname,
2885 volume_info)) { 2930 volume_info)) {
2886 rc = -EINVAL; 2931 rc = -EINVAL;
2887 goto out; 2932 goto out;
@@ -2914,7 +2959,46 @@ try_mount_again:
2914 goto out; 2959 goto out;
2915 } 2960 }
2916 } 2961 }
2917 cifs_sb->local_nls = volume_info->local_nls; 2962
2963 *pvolume_info = volume_info;
2964 return rc;
2965out:
2966 cifs_cleanup_volume_info(&volume_info);
2967 return rc;
2968}
2969
2970int
2971cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2972 struct smb_vol *volume_info, const char *devname)
2973{
2974 int rc = 0;
2975 int xid;
2976 struct cifs_ses *pSesInfo;
2977 struct cifs_tcon *tcon;
2978 struct TCP_Server_Info *srvTcp;
2979 char *full_path;
2980 struct tcon_link *tlink;
2981#ifdef CONFIG_CIFS_DFS_UPCALL
2982 int referral_walks_count = 0;
2983try_mount_again:
2984 /* cleanup activities if we're chasing a referral */
2985 if (referral_walks_count) {
2986 if (tcon)
2987 cifs_put_tcon(tcon);
2988 else if (pSesInfo)
2989 cifs_put_smb_ses(pSesInfo);
2990
2991 cifs_cleanup_volume_info(&volume_info);
2992 FreeXid(xid);
2993 }
2994#endif
2995 tcon = NULL;
2996 pSesInfo = NULL;
2997 srvTcp = NULL;
2998 full_path = NULL;
2999 tlink = NULL;
3000
3001 xid = GetXid();
2918 3002
2919 /* get a reference to a tcp session */ 3003 /* get a reference to a tcp session */
2920 srvTcp = cifs_get_tcp_session(volume_info); 3004 srvTcp = cifs_get_tcp_session(volume_info);
@@ -2931,7 +3015,6 @@ try_mount_again:
2931 goto mount_fail_check; 3015 goto mount_fail_check;
2932 } 3016 }
2933 3017
2934 setup_cifs_sb(volume_info, cifs_sb);
2935 if (pSesInfo->capabilities & CAP_LARGE_FILES) 3018 if (pSesInfo->capabilities & CAP_LARGE_FILES)
2936 sb->s_maxbytes = MAX_LFS_FILESIZE; 3019 sb->s_maxbytes = MAX_LFS_FILESIZE;
2937 else 3020 else
@@ -2948,35 +3031,36 @@ try_mount_again:
2948 goto remote_path_check; 3031 goto remote_path_check;
2949 } 3032 }
2950 3033
2951 /* do not care if following two calls succeed - informational */
2952 if (!tcon->ipc) {
2953 CIFSSMBQFSDeviceInfo(xid, tcon);
2954 CIFSSMBQFSAttributeInfo(xid, tcon);
2955 }
2956
2957 /* tell server which Unix caps we support */ 3034 /* tell server which Unix caps we support */
2958 if (tcon->ses->capabilities & CAP_UNIX) 3035 if (tcon->ses->capabilities & CAP_UNIX) {
2959 /* reset of caps checks mount to see if unix extensions 3036 /* reset of caps checks mount to see if unix extensions
2960 disabled for just this mount */ 3037 disabled for just this mount */
2961 reset_cifs_unix_caps(xid, tcon, sb, volume_info); 3038 reset_cifs_unix_caps(xid, tcon, sb, volume_info);
2962 else 3039 if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) &&
3040 (le64_to_cpu(tcon->fsUnixInfo.Capability) &
3041 CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) {
3042 rc = -EACCES;
3043 goto mount_fail_check;
3044 }
3045 } else
2963 tcon->unix_ext = 0; /* server does not support them */ 3046 tcon->unix_ext = 0; /* server does not support them */
2964 3047
2965 /* convert forward to back slashes in prepath here if needed */ 3048 /* do not care if following two calls succeed - informational */
2966 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0) 3049 if (!tcon->ipc) {
2967 convert_delimiter(cifs_sb->prepath, CIFS_DIR_SEP(cifs_sb)); 3050 CIFSSMBQFSDeviceInfo(xid, tcon);
3051 CIFSSMBQFSAttributeInfo(xid, tcon);
3052 }
2968 3053
2969 if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) { 3054 if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) {
2970 cifs_sb->rsize = 1024 * 127; 3055 cifs_sb->rsize = 1024 * 127;
2971 cFYI(DBG2, "no very large read support, rsize now 127K"); 3056 cFYI(DBG2, "no very large read support, rsize now 127K");
2972 } 3057 }
2973 if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X))
2974 cifs_sb->wsize = min(cifs_sb->wsize,
2975 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
2976 if (!(tcon->ses->capabilities & CAP_LARGE_READ_X)) 3058 if (!(tcon->ses->capabilities & CAP_LARGE_READ_X))
2977 cifs_sb->rsize = min(cifs_sb->rsize, 3059 cifs_sb->rsize = min(cifs_sb->rsize,
2978 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); 3060 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
2979 3061
3062 cifs_sb->wsize = cifs_negotiate_wsize(tcon, volume_info);
3063
2980remote_path_check: 3064remote_path_check:
2981#ifdef CONFIG_CIFS_DFS_UPCALL 3065#ifdef CONFIG_CIFS_DFS_UPCALL
2982 /* 3066 /*
@@ -2996,10 +3080,10 @@ remote_path_check:
2996 } 3080 }
2997#endif 3081#endif
2998 3082
2999 /* check if a whole path (including prepath) is not remote */ 3083 /* check if a whole path is not remote */
3000 if (!rc && tcon) { 3084 if (!rc && tcon) {
3001 /* build_path_to_root works only when we have a valid tcon */ 3085 /* build_path_to_root works only when we have a valid tcon */
3002 full_path = cifs_build_path_to_root(cifs_sb, tcon); 3086 full_path = cifs_build_path_to_root(volume_info, cifs_sb, tcon);
3003 if (full_path == NULL) { 3087 if (full_path == NULL) {
3004 rc = -ENOMEM; 3088 rc = -ENOMEM;
3005 goto mount_fail_check; 3089 goto mount_fail_check;
@@ -3025,10 +3109,6 @@ remote_path_check:
3025 rc = -ELOOP; 3109 rc = -ELOOP;
3026 goto mount_fail_check; 3110 goto mount_fail_check;
3027 } 3111 }
3028 /* convert forward to back slashes in prepath here if needed */
3029 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0)
3030 convert_delimiter(cifs_sb->prepath,
3031 CIFS_DIR_SEP(cifs_sb));
3032 3112
3033 rc = expand_dfs_referral(xid, pSesInfo, volume_info, cifs_sb, 3113 rc = expand_dfs_referral(xid, pSesInfo, volume_info, cifs_sb,
3034 true); 3114 true);
@@ -3087,14 +3167,13 @@ mount_fail_check:
3087 password will be freed at unmount time) */ 3167 password will be freed at unmount time) */
3088out: 3168out:
3089 /* zero out password before freeing */ 3169 /* zero out password before freeing */
3090 cleanup_volume_info(&volume_info);
3091 FreeXid(xid); 3170 FreeXid(xid);
3092 return rc; 3171 return rc;
3093} 3172}
3094 3173
3095int 3174int
3096CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, 3175CIFSTCon(unsigned int xid, struct cifs_ses *ses,
3097 const char *tree, struct cifsTconInfo *tcon, 3176 const char *tree, struct cifs_tcon *tcon,
3098 const struct nls_table *nls_codepage) 3177 const struct nls_table *nls_codepage)
3099{ 3178{
3100 struct smb_hdr *smb_buffer; 3179 struct smb_hdr *smb_buffer;
@@ -3126,7 +3205,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3126 pSMB->AndXCommand = 0xFF; 3205 pSMB->AndXCommand = 0xFF;
3127 pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO); 3206 pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO);
3128 bcc_ptr = &pSMB->Password[0]; 3207 bcc_ptr = &pSMB->Password[0];
3129 if ((ses->server->secMode) & SECMODE_USER) { 3208 if ((ses->server->sec_mode) & SECMODE_USER) {
3130 pSMB->PasswordLength = cpu_to_le16(1); /* minimum */ 3209 pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
3131 *bcc_ptr = 0; /* password is null byte */ 3210 *bcc_ptr = 0; /* password is null byte */
3132 bcc_ptr++; /* skip password */ 3211 bcc_ptr++; /* skip password */
@@ -3143,7 +3222,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3143 if ((global_secflags & CIFSSEC_MAY_LANMAN) && 3222 if ((global_secflags & CIFSSEC_MAY_LANMAN) &&
3144 (ses->server->secType == LANMAN)) 3223 (ses->server->secType == LANMAN))
3145 calc_lanman_hash(tcon->password, ses->server->cryptkey, 3224 calc_lanman_hash(tcon->password, ses->server->cryptkey,
3146 ses->server->secMode & 3225 ses->server->sec_mode &
3147 SECMODE_PW_ENCRYPT ? true : false, 3226 SECMODE_PW_ENCRYPT ? true : false,
3148 bcc_ptr); 3227 bcc_ptr);
3149 else 3228 else
@@ -3159,7 +3238,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3159 } 3238 }
3160 } 3239 }
3161 3240
3162 if (ses->server->secMode & 3241 if (ses->server->sec_mode &
3163 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 3242 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
3164 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 3243 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
3165 3244
@@ -3255,7 +3334,6 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3255 struct rb_root *root = &cifs_sb->tlink_tree; 3334 struct rb_root *root = &cifs_sb->tlink_tree;
3256 struct rb_node *node; 3335 struct rb_node *node;
3257 struct tcon_link *tlink; 3336 struct tcon_link *tlink;
3258 char *tmp;
3259 3337
3260 cancel_delayed_work_sync(&cifs_sb->prune_tlinks); 3338 cancel_delayed_work_sync(&cifs_sb->prune_tlinks);
3261 3339
@@ -3272,15 +3350,10 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3272 } 3350 }
3273 spin_unlock(&cifs_sb->tlink_tree_lock); 3351 spin_unlock(&cifs_sb->tlink_tree_lock);
3274 3352
3275 tmp = cifs_sb->prepath;
3276 cifs_sb->prepathlen = 0;
3277 cifs_sb->prepath = NULL;
3278 kfree(tmp);
3279
3280 return 0; 3353 return 0;
3281} 3354}
3282 3355
3283int cifs_negotiate_protocol(unsigned int xid, struct cifsSesInfo *ses) 3356int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses)
3284{ 3357{
3285 int rc = 0; 3358 int rc = 0;
3286 struct TCP_Server_Info *server = ses->server; 3359 struct TCP_Server_Info *server = ses->server;
@@ -3310,7 +3383,7 @@ int cifs_negotiate_protocol(unsigned int xid, struct cifsSesInfo *ses)
3310} 3383}
3311 3384
3312 3385
3313int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses, 3386int cifs_setup_session(unsigned int xid, struct cifs_ses *ses,
3314 struct nls_table *nls_info) 3387 struct nls_table *nls_info)
3315{ 3388{
3316 int rc = 0; 3389 int rc = 0;
@@ -3322,7 +3395,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
3322 ses->capabilities &= (~CAP_UNIX); 3395 ses->capabilities &= (~CAP_UNIX);
3323 3396
3324 cFYI(1, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", 3397 cFYI(1, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
3325 server->secMode, server->capabilities, server->timeAdj); 3398 server->sec_mode, server->capabilities, server->timeAdj);
3326 3399
3327 rc = CIFS_SessSetup(xid, ses, nls_info); 3400 rc = CIFS_SessSetup(xid, ses, nls_info);
3328 if (rc) { 3401 if (rc) {
@@ -3354,12 +3427,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
3354 return rc; 3427 return rc;
3355} 3428}
3356 3429
3357static struct cifsTconInfo * 3430static struct cifs_tcon *
3358cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) 3431cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid)
3359{ 3432{
3360 struct cifsTconInfo *master_tcon = cifs_sb_master_tcon(cifs_sb); 3433 struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
3361 struct cifsSesInfo *ses; 3434 struct cifs_ses *ses;
3362 struct cifsTconInfo *tcon = NULL; 3435 struct cifs_tcon *tcon = NULL;
3363 struct smb_vol *vol_info; 3436 struct smb_vol *vol_info;
3364 char username[28]; /* big enough for "krb50x" + hex of ULONG_MAX 6+16 */ 3437 char username[28]; /* big enough for "krb50x" + hex of ULONG_MAX 6+16 */
3365 /* We used to have this as MAX_USERNAME which is */ 3438 /* We used to have this as MAX_USERNAME which is */
@@ -3392,7 +3465,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid)
3392 3465
3393 ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info); 3466 ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info);
3394 if (IS_ERR(ses)) { 3467 if (IS_ERR(ses)) {
3395 tcon = (struct cifsTconInfo *)ses; 3468 tcon = (struct cifs_tcon *)ses;
3396 cifs_put_tcp_session(master_tcon->ses->server); 3469 cifs_put_tcp_session(master_tcon->ses->server);
3397 goto out; 3470 goto out;
3398 } 3471 }
@@ -3417,7 +3490,7 @@ cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
3417 return cifs_sb->master_tlink; 3490 return cifs_sb->master_tlink;
3418} 3491}
3419 3492
3420struct cifsTconInfo * 3493struct cifs_tcon *
3421cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb) 3494cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
3422{ 3495{
3423 return tlink_tcon(cifs_sb_master_tlink(cifs_sb)); 3496 return tlink_tcon(cifs_sb_master_tlink(cifs_sb));
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 9ea65cf36714..81914df47ef1 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -50,12 +50,11 @@ build_path_from_dentry(struct dentry *direntry)
50{ 50{
51 struct dentry *temp; 51 struct dentry *temp;
52 int namelen; 52 int namelen;
53 int pplen;
54 int dfsplen; 53 int dfsplen;
55 char *full_path; 54 char *full_path;
56 char dirsep; 55 char dirsep;
57 struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); 56 struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
58 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 57 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
59 58
60 if (direntry == NULL) 59 if (direntry == NULL)
61 return NULL; /* not much we can do if dentry is freed and 60 return NULL; /* not much we can do if dentry is freed and
@@ -63,13 +62,12 @@ build_path_from_dentry(struct dentry *direntry)
63 when the server crashed */ 62 when the server crashed */
64 63
65 dirsep = CIFS_DIR_SEP(cifs_sb); 64 dirsep = CIFS_DIR_SEP(cifs_sb);
66 pplen = cifs_sb->prepathlen;
67 if (tcon->Flags & SMB_SHARE_IS_IN_DFS) 65 if (tcon->Flags & SMB_SHARE_IS_IN_DFS)
68 dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1); 66 dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1);
69 else 67 else
70 dfsplen = 0; 68 dfsplen = 0;
71cifs_bp_rename_retry: 69cifs_bp_rename_retry:
72 namelen = pplen + dfsplen; 70 namelen = dfsplen;
73 for (temp = direntry; !IS_ROOT(temp);) { 71 for (temp = direntry; !IS_ROOT(temp);) {
74 namelen += (1 + temp->d_name.len); 72 namelen += (1 + temp->d_name.len);
75 temp = temp->d_parent; 73 temp = temp->d_parent;
@@ -100,7 +98,7 @@ cifs_bp_rename_retry:
100 return NULL; 98 return NULL;
101 } 99 }
102 } 100 }
103 if (namelen != pplen + dfsplen) { 101 if (namelen != dfsplen) {
104 cERROR(1, "did not end path lookup where expected namelen is %d", 102 cERROR(1, "did not end path lookup where expected namelen is %d",
105 namelen); 103 namelen);
106 /* presumably this is only possible if racing with a rename 104 /* presumably this is only possible if racing with a rename
@@ -126,7 +124,6 @@ cifs_bp_rename_retry:
126 } 124 }
127 } 125 }
128 } 126 }
129 strncpy(full_path + dfsplen, CIFS_SB(direntry->d_sb)->prepath, pplen);
130 return full_path; 127 return full_path;
131} 128}
132 129
@@ -152,7 +149,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
152 __u16 fileHandle; 149 __u16 fileHandle;
153 struct cifs_sb_info *cifs_sb; 150 struct cifs_sb_info *cifs_sb;
154 struct tcon_link *tlink; 151 struct tcon_link *tlink;
155 struct cifsTconInfo *tcon; 152 struct cifs_tcon *tcon;
156 char *full_path = NULL; 153 char *full_path = NULL;
157 FILE_ALL_INFO *buf = NULL; 154 FILE_ALL_INFO *buf = NULL;
158 struct inode *newinode = NULL; 155 struct inode *newinode = NULL;
@@ -356,7 +353,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
356 int xid; 353 int xid;
357 struct cifs_sb_info *cifs_sb; 354 struct cifs_sb_info *cifs_sb;
358 struct tcon_link *tlink; 355 struct tcon_link *tlink;
359 struct cifsTconInfo *pTcon; 356 struct cifs_tcon *pTcon;
357 struct cifs_io_parms io_parms;
360 char *full_path = NULL; 358 char *full_path = NULL;
361 struct inode *newinode = NULL; 359 struct inode *newinode = NULL;
362 int oplock = 0; 360 int oplock = 0;
@@ -439,16 +437,19 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
439 * timestamps in, but we can reuse it safely */ 437 * timestamps in, but we can reuse it safely */
440 438
441 pdev = (struct win_dev *)buf; 439 pdev = (struct win_dev *)buf;
440 io_parms.netfid = fileHandle;
441 io_parms.pid = current->tgid;
442 io_parms.tcon = pTcon;
443 io_parms.offset = 0;
444 io_parms.length = sizeof(struct win_dev);
442 if (S_ISCHR(mode)) { 445 if (S_ISCHR(mode)) {
443 memcpy(pdev->type, "IntxCHR", 8); 446 memcpy(pdev->type, "IntxCHR", 8);
444 pdev->major = 447 pdev->major =
445 cpu_to_le64(MAJOR(device_number)); 448 cpu_to_le64(MAJOR(device_number));
446 pdev->minor = 449 pdev->minor =
447 cpu_to_le64(MINOR(device_number)); 450 cpu_to_le64(MINOR(device_number));
448 rc = CIFSSMBWrite(xid, pTcon, 451 rc = CIFSSMBWrite(xid, &io_parms,
449 fileHandle, 452 &bytes_written, (char *)pdev,
450 sizeof(struct win_dev),
451 0, &bytes_written, (char *)pdev,
452 NULL, 0); 453 NULL, 0);
453 } else if (S_ISBLK(mode)) { 454 } else if (S_ISBLK(mode)) {
454 memcpy(pdev->type, "IntxBLK", 8); 455 memcpy(pdev->type, "IntxBLK", 8);
@@ -456,10 +457,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
456 cpu_to_le64(MAJOR(device_number)); 457 cpu_to_le64(MAJOR(device_number));
457 pdev->minor = 458 pdev->minor =
458 cpu_to_le64(MINOR(device_number)); 459 cpu_to_le64(MINOR(device_number));
459 rc = CIFSSMBWrite(xid, pTcon, 460 rc = CIFSSMBWrite(xid, &io_parms,
460 fileHandle, 461 &bytes_written, (char *)pdev,
461 sizeof(struct win_dev),
462 0, &bytes_written, (char *)pdev,
463 NULL, 0); 462 NULL, 0);
464 } /* else if (S_ISFIFO) */ 463 } /* else if (S_ISFIFO) */
465 CIFSSMBClose(xid, pTcon, fileHandle); 464 CIFSSMBClose(xid, pTcon, fileHandle);
@@ -486,7 +485,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
486 bool posix_open = false; 485 bool posix_open = false;
487 struct cifs_sb_info *cifs_sb; 486 struct cifs_sb_info *cifs_sb;
488 struct tcon_link *tlink; 487 struct tcon_link *tlink;
489 struct cifsTconInfo *pTcon; 488 struct cifs_tcon *pTcon;
490 struct cifsFileInfo *cfile; 489 struct cifsFileInfo *cfile;
491 struct inode *newInode = NULL; 490 struct inode *newInode = NULL;
492 char *full_path = NULL; 491 char *full_path = NULL;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c672afef0c09..bb71471a4d9d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -114,7 +114,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 114 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
115 struct cifs_fattr fattr; 115 struct cifs_fattr fattr;
116 struct tcon_link *tlink; 116 struct tcon_link *tlink;
117 struct cifsTconInfo *tcon; 117 struct cifs_tcon *tcon;
118 118
119 cFYI(1, "posix open %s", full_path); 119 cFYI(1, "posix open %s", full_path);
120 120
@@ -168,7 +168,7 @@ posix_open_ret:
168 168
169static int 169static int
170cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, 170cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
171 struct cifsTconInfo *tcon, unsigned int f_flags, __u32 *poplock, 171 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock,
172 __u16 *pnetfid, int xid) 172 __u16 *pnetfid, int xid)
173{ 173{
174 int rc; 174 int rc;
@@ -285,7 +285,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
285void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 285void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
286{ 286{
287 struct inode *inode = cifs_file->dentry->d_inode; 287 struct inode *inode = cifs_file->dentry->d_inode;
288 struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink); 288 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
289 struct cifsInodeInfo *cifsi = CIFS_I(inode); 289 struct cifsInodeInfo *cifsi = CIFS_I(inode);
290 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 290 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
291 struct cifsLockInfo *li, *tmp; 291 struct cifsLockInfo *li, *tmp;
@@ -343,7 +343,7 @@ int cifs_open(struct inode *inode, struct file *file)
343 int xid; 343 int xid;
344 __u32 oplock; 344 __u32 oplock;
345 struct cifs_sb_info *cifs_sb; 345 struct cifs_sb_info *cifs_sb;
346 struct cifsTconInfo *tcon; 346 struct cifs_tcon *tcon;
347 struct tcon_link *tlink; 347 struct tcon_link *tlink;
348 struct cifsFileInfo *pCifsFile = NULL; 348 struct cifsFileInfo *pCifsFile = NULL;
349 char *full_path = NULL; 349 char *full_path = NULL;
@@ -457,7 +457,7 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
457 int xid; 457 int xid;
458 __u32 oplock; 458 __u32 oplock;
459 struct cifs_sb_info *cifs_sb; 459 struct cifs_sb_info *cifs_sb;
460 struct cifsTconInfo *tcon; 460 struct cifs_tcon *tcon;
461 struct cifsInodeInfo *pCifsInode; 461 struct cifsInodeInfo *pCifsInode;
462 struct inode *inode; 462 struct inode *inode;
463 char *full_path = NULL; 463 char *full_path = NULL;
@@ -596,7 +596,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
596 xid = GetXid(); 596 xid = GetXid();
597 597
598 if (pCFileStruct) { 598 if (pCFileStruct) {
599 struct cifsTconInfo *pTcon = tlink_tcon(pCFileStruct->tlink); 599 struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink);
600 600
601 cFYI(1, "Freeing private data in close dir"); 601 cFYI(1, "Freeing private data in close dir");
602 spin_lock(&cifs_file_list_lock); 602 spin_lock(&cifs_file_list_lock);
@@ -653,7 +653,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
653 __u64 length; 653 __u64 length;
654 bool wait_flag = false; 654 bool wait_flag = false;
655 struct cifs_sb_info *cifs_sb; 655 struct cifs_sb_info *cifs_sb;
656 struct cifsTconInfo *tcon; 656 struct cifs_tcon *tcon;
657 __u16 netfid; 657 __u16 netfid;
658 __u8 lockType = LOCKING_ANDX_LARGE_FILES; 658 __u8 lockType = LOCKING_ANDX_LARGE_FILES;
659 bool posix_locking = 0; 659 bool posix_locking = 0;
@@ -725,8 +725,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
725 else 725 else
726 posix_lock_type = CIFS_WRLCK; 726 posix_lock_type = CIFS_WRLCK;
727 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */, 727 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
728 length, pfLock, 728 length, pfLock, posix_lock_type,
729 posix_lock_type, wait_flag); 729 wait_flag);
730 FreeXid(xid); 730 FreeXid(xid);
731 return rc; 731 return rc;
732 } 732 }
@@ -797,8 +797,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
797 posix_lock_type = CIFS_UNLCK; 797 posix_lock_type = CIFS_UNLCK;
798 798
799 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */, 799 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
800 length, pfLock, 800 length, pfLock, posix_lock_type,
801 posix_lock_type, wait_flag); 801 wait_flag);
802 } else { 802 } else {
803 struct cifsFileInfo *fid = file->private_data; 803 struct cifsFileInfo *fid = file->private_data;
804 804
@@ -857,7 +857,7 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
857 cifsi->server_eof = end_of_write; 857 cifsi->server_eof = end_of_write;
858} 858}
859 859
860static ssize_t cifs_write(struct cifsFileInfo *open_file, 860static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
861 const char *write_data, size_t write_size, 861 const char *write_data, size_t write_size,
862 loff_t *poffset) 862 loff_t *poffset)
863{ 863{
@@ -865,10 +865,11 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
865 unsigned int bytes_written = 0; 865 unsigned int bytes_written = 0;
866 unsigned int total_written; 866 unsigned int total_written;
867 struct cifs_sb_info *cifs_sb; 867 struct cifs_sb_info *cifs_sb;
868 struct cifsTconInfo *pTcon; 868 struct cifs_tcon *pTcon;
869 int xid; 869 int xid;
870 struct dentry *dentry = open_file->dentry; 870 struct dentry *dentry = open_file->dentry;
871 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode); 871 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
872 struct cifs_io_parms io_parms;
872 873
873 cifs_sb = CIFS_SB(dentry->d_sb); 874 cifs_sb = CIFS_SB(dentry->d_sb);
874 875
@@ -901,8 +902,13 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
901 /* iov[0] is reserved for smb header */ 902 /* iov[0] is reserved for smb header */
902 iov[1].iov_base = (char *)write_data + total_written; 903 iov[1].iov_base = (char *)write_data + total_written;
903 iov[1].iov_len = len; 904 iov[1].iov_len = len;
904 rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid, len, 905 io_parms.netfid = open_file->netfid;
905 *poffset, &bytes_written, iov, 1, 0); 906 io_parms.pid = pid;
907 io_parms.tcon = pTcon;
908 io_parms.offset = *poffset;
909 io_parms.length = len;
910 rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
911 1, 0);
906 } 912 }
907 if (rc || (bytes_written == 0)) { 913 if (rc || (bytes_written == 0)) {
908 if (total_written) 914 if (total_written)
@@ -1071,8 +1077,8 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1071 1077
1072 open_file = find_writable_file(CIFS_I(mapping->host), false); 1078 open_file = find_writable_file(CIFS_I(mapping->host), false);
1073 if (open_file) { 1079 if (open_file) {
1074 bytes_written = cifs_write(open_file, write_data, 1080 bytes_written = cifs_write(open_file, open_file->pid,
1075 to - from, &offset); 1081 write_data, to - from, &offset);
1076 cifsFileInfo_put(open_file); 1082 cifsFileInfo_put(open_file);
1077 /* Does mm or vfs already set times? */ 1083 /* Does mm or vfs already set times? */
1078 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb); 1084 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
@@ -1092,58 +1098,20 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1092static int cifs_writepages(struct address_space *mapping, 1098static int cifs_writepages(struct address_space *mapping,
1093 struct writeback_control *wbc) 1099 struct writeback_control *wbc)
1094{ 1100{
1095 unsigned int bytes_to_write; 1101 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1096 unsigned int bytes_written; 1102 bool done = false, scanned = false, range_whole = false;
1097 struct cifs_sb_info *cifs_sb; 1103 pgoff_t end, index;
1098 int done = 0; 1104 struct cifs_writedata *wdata;
1099 pgoff_t end;
1100 pgoff_t index;
1101 int range_whole = 0;
1102 struct kvec *iov;
1103 int len;
1104 int n_iov = 0;
1105 pgoff_t next;
1106 int nr_pages;
1107 __u64 offset = 0;
1108 struct cifsFileInfo *open_file;
1109 struct cifsTconInfo *tcon;
1110 struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1111 struct page *page; 1105 struct page *page;
1112 struct pagevec pvec;
1113 int rc = 0; 1106 int rc = 0;
1114 int scanned = 0;
1115 int xid;
1116
1117 cifs_sb = CIFS_SB(mapping->host->i_sb);
1118 1107
1119 /* 1108 /*
1120 * If wsize is smaller that the page cache size, default to writing 1109 * If wsize is smaller than the page cache size, default to writing
1121 * one page at a time via cifs_writepage 1110 * one page at a time via cifs_writepage
1122 */ 1111 */
1123 if (cifs_sb->wsize < PAGE_CACHE_SIZE) 1112 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1124 return generic_writepages(mapping, wbc); 1113 return generic_writepages(mapping, wbc);
1125 1114
1126 iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1127 if (iov == NULL)
1128 return generic_writepages(mapping, wbc);
1129
1130 /*
1131 * if there's no open file, then this is likely to fail too,
1132 * but it'll at least handle the return. Maybe it should be
1133 * a BUG() instead?
1134 */
1135 open_file = find_writable_file(CIFS_I(mapping->host), false);
1136 if (!open_file) {
1137 kfree(iov);
1138 return generic_writepages(mapping, wbc);
1139 }
1140
1141 tcon = tlink_tcon(open_file->tlink);
1142 cifsFileInfo_put(open_file);
1143
1144 xid = GetXid();
1145
1146 pagevec_init(&pvec, 0);
1147 if (wbc->range_cyclic) { 1115 if (wbc->range_cyclic) {
1148 index = mapping->writeback_index; /* Start from prev offset */ 1116 index = mapping->writeback_index; /* Start from prev offset */
1149 end = -1; 1117 end = -1;
@@ -1151,24 +1119,49 @@ static int cifs_writepages(struct address_space *mapping,
1151 index = wbc->range_start >> PAGE_CACHE_SHIFT; 1119 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1152 end = wbc->range_end >> PAGE_CACHE_SHIFT; 1120 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1153 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 1121 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1154 range_whole = 1; 1122 range_whole = true;
1155 scanned = 1; 1123 scanned = true;
1156 } 1124 }
1157retry: 1125retry:
1158 while (!done && (index <= end) && 1126 while (!done && index <= end) {
1159 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 1127 unsigned int i, nr_pages, found_pages;
1160 PAGECACHE_TAG_DIRTY, 1128 pgoff_t next = 0, tofind;
1161 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) { 1129 struct page **pages;
1162 int first; 1130
1163 unsigned int i; 1131 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1164 1132 end - index) + 1;
1165 first = -1; 1133
1166 next = 0; 1134 wdata = cifs_writedata_alloc((unsigned int)tofind);
1167 n_iov = 0; 1135 if (!wdata) {
1168 bytes_to_write = 0; 1136 rc = -ENOMEM;
1169 1137 break;
1170 for (i = 0; i < nr_pages; i++) { 1138 }
1171 page = pvec.pages[i]; 1139
1140 /*
1141 * find_get_pages_tag seems to return a max of 256 on each
1142 * iteration, so we must call it several times in order to
1143 * fill the array or the wsize is effectively limited to
1144 * 256 * PAGE_CACHE_SIZE.
1145 */
1146 found_pages = 0;
1147 pages = wdata->pages;
1148 do {
1149 nr_pages = find_get_pages_tag(mapping, &index,
1150 PAGECACHE_TAG_DIRTY,
1151 tofind, pages);
1152 found_pages += nr_pages;
1153 tofind -= nr_pages;
1154 pages += nr_pages;
1155 } while (nr_pages && tofind && index <= end);
1156
1157 if (found_pages == 0) {
1158 kref_put(&wdata->refcount, cifs_writedata_release);
1159 break;
1160 }
1161
1162 nr_pages = 0;
1163 for (i = 0; i < found_pages; i++) {
1164 page = wdata->pages[i];
1172 /* 1165 /*
1173 * At this point we hold neither mapping->tree_lock nor 1166 * At this point we hold neither mapping->tree_lock nor
1174 * lock on the page itself: the page may be truncated or 1167 * lock on the page itself: the page may be truncated or
@@ -1177,7 +1170,7 @@ retry:
1177 * mapping 1170 * mapping
1178 */ 1171 */
1179 1172
1180 if (first < 0) 1173 if (nr_pages == 0)
1181 lock_page(page); 1174 lock_page(page);
1182 else if (!trylock_page(page)) 1175 else if (!trylock_page(page))
1183 break; 1176 break;
@@ -1188,7 +1181,7 @@ retry:
1188 } 1181 }
1189 1182
1190 if (!wbc->range_cyclic && page->index > end) { 1183 if (!wbc->range_cyclic && page->index > end) {
1191 done = 1; 1184 done = true;
1192 unlock_page(page); 1185 unlock_page(page);
1193 break; 1186 break;
1194 } 1187 }
@@ -1215,119 +1208,89 @@ retry:
1215 set_page_writeback(page); 1208 set_page_writeback(page);
1216 1209
1217 if (page_offset(page) >= mapping->host->i_size) { 1210 if (page_offset(page) >= mapping->host->i_size) {
1218 done = 1; 1211 done = true;
1219 unlock_page(page); 1212 unlock_page(page);
1220 end_page_writeback(page); 1213 end_page_writeback(page);
1221 break; 1214 break;
1222 } 1215 }
1223 1216
1224 /* 1217 wdata->pages[i] = page;
1225 * BB can we get rid of this? pages are held by pvec 1218 next = page->index + 1;
1226 */ 1219 ++nr_pages;
1227 page_cache_get(page); 1220 }
1228 1221
1229 len = min(mapping->host->i_size - page_offset(page), 1222 /* reset index to refind any pages skipped */
1230 (loff_t)PAGE_CACHE_SIZE); 1223 if (nr_pages == 0)
1224 index = wdata->pages[0]->index + 1;
1231 1225
1232 /* reserve iov[0] for the smb header */ 1226 /* put any pages we aren't going to use */
1233 n_iov++; 1227 for (i = nr_pages; i < found_pages; i++) {
1234 iov[n_iov].iov_base = kmap(page); 1228 page_cache_release(wdata->pages[i]);
1235 iov[n_iov].iov_len = len; 1229 wdata->pages[i] = NULL;
1236 bytes_to_write += len; 1230 }
1237 1231
1238 if (first < 0) { 1232 /* nothing to write? */
1239 first = i; 1233 if (nr_pages == 0) {
1240 offset = page_offset(page); 1234 kref_put(&wdata->refcount, cifs_writedata_release);
1241 } 1235 continue;
1242 next = page->index + 1;
1243 if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1244 break;
1245 } 1236 }
1246 if (n_iov) {
1247retry_write:
1248 open_file = find_writable_file(CIFS_I(mapping->host),
1249 false);
1250 if (!open_file) {
1251 cERROR(1, "No writable handles for inode");
1252 rc = -EBADF;
1253 } else {
1254 rc = CIFSSMBWrite2(xid, tcon, open_file->netfid,
1255 bytes_to_write, offset,
1256 &bytes_written, iov, n_iov,
1257 0);
1258 cifsFileInfo_put(open_file);
1259 }
1260 1237
1261 cFYI(1, "Write2 rc=%d, wrote=%u", rc, bytes_written); 1238 wdata->sync_mode = wbc->sync_mode;
1239 wdata->nr_pages = nr_pages;
1240 wdata->offset = page_offset(wdata->pages[0]);
1262 1241
1263 /* 1242 do {
1264 * For now, treat a short write as if nothing got 1243 if (wdata->cfile != NULL)
1265 * written. A zero length write however indicates 1244 cifsFileInfo_put(wdata->cfile);
1266 * ENOSPC or EFBIG. We have no way to know which 1245 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1267 * though, so call it ENOSPC for now. EFBIG would 1246 false);
1268 * get translated to AS_EIO anyway. 1247 if (!wdata->cfile) {
1269 * 1248 cERROR(1, "No writable handles for inode");
1270 * FIXME: make it take into account the data that did 1249 rc = -EBADF;
1271 * get written 1250 break;
1272 */
1273 if (rc == 0) {
1274 if (bytes_written == 0)
1275 rc = -ENOSPC;
1276 else if (bytes_written < bytes_to_write)
1277 rc = -EAGAIN;
1278 } 1251 }
1252 rc = cifs_async_writev(wdata);
1253 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1279 1254
1280 /* retry on data-integrity flush */ 1255 for (i = 0; i < nr_pages; ++i)
1281 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) 1256 unlock_page(wdata->pages[i]);
1282 goto retry_write;
1283
1284 /* fix the stats and EOF */
1285 if (bytes_written > 0) {
1286 cifs_stats_bytes_written(tcon, bytes_written);
1287 cifs_update_eof(cifsi, offset, bytes_written);
1288 }
1289 1257
1290 for (i = 0; i < n_iov; i++) { 1258 /* send failure -- clean up the mess */
1291 page = pvec.pages[first + i]; 1259 if (rc != 0) {
1292 /* on retryable write error, redirty page */ 1260 for (i = 0; i < nr_pages; ++i) {
1293 if (rc == -EAGAIN) 1261 if (rc == -EAGAIN)
1294 redirty_page_for_writepage(wbc, page); 1262 redirty_page_for_writepage(wbc,
1295 else if (rc != 0) 1263 wdata->pages[i]);
1296 SetPageError(page); 1264 else
1297 kunmap(page); 1265 SetPageError(wdata->pages[i]);
1298 unlock_page(page); 1266 end_page_writeback(wdata->pages[i]);
1299 end_page_writeback(page); 1267 page_cache_release(wdata->pages[i]);
1300 page_cache_release(page);
1301 } 1268 }
1302
1303 if (rc != -EAGAIN) 1269 if (rc != -EAGAIN)
1304 mapping_set_error(mapping, rc); 1270 mapping_set_error(mapping, rc);
1305 else 1271 }
1306 rc = 0; 1272 kref_put(&wdata->refcount, cifs_writedata_release);
1307 1273
1308 if ((wbc->nr_to_write -= n_iov) <= 0) 1274 wbc->nr_to_write -= nr_pages;
1309 done = 1; 1275 if (wbc->nr_to_write <= 0)
1310 index = next; 1276 done = true;
1311 } else
1312 /* Need to re-find the pages we skipped */
1313 index = pvec.pages[0]->index + 1;
1314 1277
1315 pagevec_release(&pvec); 1278 index = next;
1316 } 1279 }
1280
1317 if (!scanned && !done) { 1281 if (!scanned && !done) {
1318 /* 1282 /*
1319 * We hit the last page and there is more work to be done: wrap 1283 * We hit the last page and there is more work to be done: wrap
1320 * back to the start of the file 1284 * back to the start of the file
1321 */ 1285 */
1322 scanned = 1; 1286 scanned = true;
1323 index = 0; 1287 index = 0;
1324 goto retry; 1288 goto retry;
1325 } 1289 }
1290
1326 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 1291 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1327 mapping->writeback_index = index; 1292 mapping->writeback_index = index;
1328 1293
1329 FreeXid(xid);
1330 kfree(iov);
1331 return rc; 1294 return rc;
1332} 1295}
1333 1296
@@ -1383,6 +1346,14 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
1383{ 1346{
1384 int rc; 1347 int rc;
1385 struct inode *inode = mapping->host; 1348 struct inode *inode = mapping->host;
1349 struct cifsFileInfo *cfile = file->private_data;
1350 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1351 __u32 pid;
1352
1353 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1354 pid = cfile->pid;
1355 else
1356 pid = current->tgid;
1386 1357
1387 cFYI(1, "write_end for page %p from pos %lld with %d bytes", 1358 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1388 page, pos, copied); 1359 page, pos, copied);
@@ -1406,8 +1377,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
1406 /* BB check if anything else missing out of ppw 1377 /* BB check if anything else missing out of ppw
1407 such as updating last write time */ 1378 such as updating last write time */
1408 page_data = kmap(page); 1379 page_data = kmap(page);
1409 rc = cifs_write(file->private_data, page_data + offset, 1380 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
1410 copied, &pos);
1411 /* if (rc < 0) should we set writebehind rc? */ 1381 /* if (rc < 0) should we set writebehind rc? */
1412 kunmap(page); 1382 kunmap(page);
1413 1383
@@ -1435,7 +1405,7 @@ int cifs_strict_fsync(struct file *file, int datasync)
1435{ 1405{
1436 int xid; 1406 int xid;
1437 int rc = 0; 1407 int rc = 0;
1438 struct cifsTconInfo *tcon; 1408 struct cifs_tcon *tcon;
1439 struct cifsFileInfo *smbfile = file->private_data; 1409 struct cifsFileInfo *smbfile = file->private_data;
1440 struct inode *inode = file->f_path.dentry->d_inode; 1410 struct inode *inode = file->f_path.dentry->d_inode;
1441 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1411 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -1465,7 +1435,7 @@ int cifs_fsync(struct file *file, int datasync)
1465{ 1435{
1466 int xid; 1436 int xid;
1467 int rc = 0; 1437 int rc = 0;
1468 struct cifsTconInfo *tcon; 1438 struct cifs_tcon *tcon;
1469 struct cifsFileInfo *smbfile = file->private_data; 1439 struct cifsFileInfo *smbfile = file->private_data;
1470 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 1440 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1471 1441
@@ -1556,9 +1526,11 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
1556 struct iov_iter it; 1526 struct iov_iter it;
1557 struct inode *inode; 1527 struct inode *inode;
1558 struct cifsFileInfo *open_file; 1528 struct cifsFileInfo *open_file;
1559 struct cifsTconInfo *pTcon; 1529 struct cifs_tcon *pTcon;
1560 struct cifs_sb_info *cifs_sb; 1530 struct cifs_sb_info *cifs_sb;
1531 struct cifs_io_parms io_parms;
1561 int xid, rc; 1532 int xid, rc;
1533 __u32 pid;
1562 1534
1563 len = iov_length(iov, nr_segs); 1535 len = iov_length(iov, nr_segs);
1564 if (!len) 1536 if (!len)
@@ -1590,6 +1562,12 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
1590 1562
1591 xid = GetXid(); 1563 xid = GetXid();
1592 open_file = file->private_data; 1564 open_file = file->private_data;
1565
1566 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1567 pid = open_file->pid;
1568 else
1569 pid = current->tgid;
1570
1593 pTcon = tlink_tcon(open_file->tlink); 1571 pTcon = tlink_tcon(open_file->tlink);
1594 inode = file->f_path.dentry->d_inode; 1572 inode = file->f_path.dentry->d_inode;
1595 1573
@@ -1616,9 +1594,13 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
1616 if (rc != 0) 1594 if (rc != 0)
1617 break; 1595 break;
1618 } 1596 }
1619 rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid, 1597 io_parms.netfid = open_file->netfid;
1620 cur_len, *poffset, &written, 1598 io_parms.pid = pid;
1621 to_send, npages, 0); 1599 io_parms.tcon = pTcon;
1600 io_parms.offset = *poffset;
1601 io_parms.length = cur_len;
1602 rc = CIFSSMBWrite2(xid, &io_parms, &written, to_send,
1603 npages, 0);
1622 } while (rc == -EAGAIN); 1604 } while (rc == -EAGAIN);
1623 1605
1624 for (i = 0; i < npages; i++) 1606 for (i = 0; i < npages; i++)
@@ -1711,10 +1693,12 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
1711 size_t len, cur_len; 1693 size_t len, cur_len;
1712 int iov_offset = 0; 1694 int iov_offset = 0;
1713 struct cifs_sb_info *cifs_sb; 1695 struct cifs_sb_info *cifs_sb;
1714 struct cifsTconInfo *pTcon; 1696 struct cifs_tcon *pTcon;
1715 struct cifsFileInfo *open_file; 1697 struct cifsFileInfo *open_file;
1716 struct smb_com_read_rsp *pSMBr; 1698 struct smb_com_read_rsp *pSMBr;
1699 struct cifs_io_parms io_parms;
1717 char *read_data; 1700 char *read_data;
1701 __u32 pid;
1718 1702
1719 if (!nr_segs) 1703 if (!nr_segs)
1720 return 0; 1704 return 0;
@@ -1729,6 +1713,11 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
1729 open_file = file->private_data; 1713 open_file = file->private_data;
1730 pTcon = tlink_tcon(open_file->tlink); 1714 pTcon = tlink_tcon(open_file->tlink);
1731 1715
1716 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1717 pid = open_file->pid;
1718 else
1719 pid = current->tgid;
1720
1732 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 1721 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1733 cFYI(1, "attempting read on write only file instance"); 1722 cFYI(1, "attempting read on write only file instance");
1734 1723
@@ -1744,8 +1733,12 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
1744 if (rc != 0) 1733 if (rc != 0)
1745 break; 1734 break;
1746 } 1735 }
1747 rc = CIFSSMBRead(xid, pTcon, open_file->netfid, 1736 io_parms.netfid = open_file->netfid;
1748 cur_len, *poffset, &bytes_read, 1737 io_parms.pid = pid;
1738 io_parms.tcon = pTcon;
1739 io_parms.offset = *poffset;
1740 io_parms.length = len;
1741 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
1749 &read_data, &buf_type); 1742 &read_data, &buf_type);
1750 pSMBr = (struct smb_com_read_rsp *)read_data; 1743 pSMBr = (struct smb_com_read_rsp *)read_data;
1751 if (read_data) { 1744 if (read_data) {
@@ -1822,11 +1815,13 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1822 unsigned int total_read; 1815 unsigned int total_read;
1823 unsigned int current_read_size; 1816 unsigned int current_read_size;
1824 struct cifs_sb_info *cifs_sb; 1817 struct cifs_sb_info *cifs_sb;
1825 struct cifsTconInfo *pTcon; 1818 struct cifs_tcon *pTcon;
1826 int xid; 1819 int xid;
1827 char *current_offset; 1820 char *current_offset;
1828 struct cifsFileInfo *open_file; 1821 struct cifsFileInfo *open_file;
1822 struct cifs_io_parms io_parms;
1829 int buf_type = CIFS_NO_BUFFER; 1823 int buf_type = CIFS_NO_BUFFER;
1824 __u32 pid;
1830 1825
1831 xid = GetXid(); 1826 xid = GetXid();
1832 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 1827 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
@@ -1839,6 +1834,11 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1839 open_file = file->private_data; 1834 open_file = file->private_data;
1840 pTcon = tlink_tcon(open_file->tlink); 1835 pTcon = tlink_tcon(open_file->tlink);
1841 1836
1837 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1838 pid = open_file->pid;
1839 else
1840 pid = current->tgid;
1841
1842 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 1842 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1843 cFYI(1, "attempting read on write only file instance"); 1843 cFYI(1, "attempting read on write only file instance");
1844 1844
@@ -1861,11 +1861,13 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1861 if (rc != 0) 1861 if (rc != 0)
1862 break; 1862 break;
1863 } 1863 }
1864 rc = CIFSSMBRead(xid, pTcon, 1864 io_parms.netfid = open_file->netfid;
1865 open_file->netfid, 1865 io_parms.pid = pid;
1866 current_read_size, *poffset, 1866 io_parms.tcon = pTcon;
1867 &bytes_read, &current_offset, 1867 io_parms.offset = *poffset;
1868 &buf_type); 1868 io_parms.length = current_read_size;
1869 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
1870 &current_offset, &buf_type);
1869 } 1871 }
1870 if (rc || (bytes_read == 0)) { 1872 if (rc || (bytes_read == 0)) {
1871 if (total_read) { 1873 if (total_read) {
@@ -1996,13 +1998,15 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1996 loff_t offset; 1998 loff_t offset;
1997 struct page *page; 1999 struct page *page;
1998 struct cifs_sb_info *cifs_sb; 2000 struct cifs_sb_info *cifs_sb;
1999 struct cifsTconInfo *pTcon; 2001 struct cifs_tcon *pTcon;
2000 unsigned int bytes_read = 0; 2002 unsigned int bytes_read = 0;
2001 unsigned int read_size, i; 2003 unsigned int read_size, i;
2002 char *smb_read_data = NULL; 2004 char *smb_read_data = NULL;
2003 struct smb_com_read_rsp *pSMBr; 2005 struct smb_com_read_rsp *pSMBr;
2004 struct cifsFileInfo *open_file; 2006 struct cifsFileInfo *open_file;
2007 struct cifs_io_parms io_parms;
2005 int buf_type = CIFS_NO_BUFFER; 2008 int buf_type = CIFS_NO_BUFFER;
2009 __u32 pid;
2006 2010
2007 xid = GetXid(); 2011 xid = GetXid();
2008 if (file->private_data == NULL) { 2012 if (file->private_data == NULL) {
@@ -2024,6 +2028,11 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2024 goto read_complete; 2028 goto read_complete;
2025 2029
2026 cFYI(DBG2, "rpages: num pages %d", num_pages); 2030 cFYI(DBG2, "rpages: num pages %d", num_pages);
2031 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2032 pid = open_file->pid;
2033 else
2034 pid = current->tgid;
2035
2027 for (i = 0; i < num_pages; ) { 2036 for (i = 0; i < num_pages; ) {
2028 unsigned contig_pages; 2037 unsigned contig_pages;
2029 struct page *tmp_page; 2038 struct page *tmp_page;
@@ -2065,12 +2074,13 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2065 if (rc != 0) 2074 if (rc != 0)
2066 break; 2075 break;
2067 } 2076 }
2068 2077 io_parms.netfid = open_file->netfid;
2069 rc = CIFSSMBRead(xid, pTcon, 2078 io_parms.pid = pid;
2070 open_file->netfid, 2079 io_parms.tcon = pTcon;
2071 read_size, offset, 2080 io_parms.offset = offset;
2072 &bytes_read, &smb_read_data, 2081 io_parms.length = read_size;
2073 &buf_type); 2082 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2083 &smb_read_data, &buf_type);
2074 /* BB more RC checks ? */ 2084 /* BB more RC checks ? */
2075 if (rc == -EAGAIN) { 2085 if (rc == -EAGAIN) {
2076 if (smb_read_data) { 2086 if (smb_read_data) {
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 297a43d0ff7f..d368a47ba5eb 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -40,7 +40,7 @@ void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
40 server->fscache = NULL; 40 server->fscache = NULL;
41} 41}
42 42
43void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon) 43void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
44{ 44{
45 struct TCP_Server_Info *server = tcon->ses->server; 45 struct TCP_Server_Info *server = tcon->ses->server;
46 46
@@ -51,7 +51,7 @@ void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon)
51 server->fscache, tcon->fscache); 51 server->fscache, tcon->fscache);
52} 52}
53 53
54void cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon) 54void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
55{ 55{
56 cFYI(1, "CIFS: releasing superblock cookie (0x%p)", tcon->fscache); 56 cFYI(1, "CIFS: releasing superblock cookie (0x%p)", tcon->fscache);
57 fscache_relinquish_cookie(tcon->fscache, 0); 57 fscache_relinquish_cookie(tcon->fscache, 0);
@@ -62,7 +62,7 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode)
62{ 62{
63 struct cifsInodeInfo *cifsi = CIFS_I(inode); 63 struct cifsInodeInfo *cifsi = CIFS_I(inode);
64 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 64 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
65 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 65 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
66 66
67 if (cifsi->fscache) 67 if (cifsi->fscache)
68 return; 68 return;
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index 31b88ec2341e..63539323e0b9 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -40,8 +40,8 @@ extern void cifs_fscache_unregister(void);
40 */ 40 */
41extern void cifs_fscache_get_client_cookie(struct TCP_Server_Info *); 41extern void cifs_fscache_get_client_cookie(struct TCP_Server_Info *);
42extern void cifs_fscache_release_client_cookie(struct TCP_Server_Info *); 42extern void cifs_fscache_release_client_cookie(struct TCP_Server_Info *);
43extern void cifs_fscache_get_super_cookie(struct cifsTconInfo *); 43extern void cifs_fscache_get_super_cookie(struct cifs_tcon *);
44extern void cifs_fscache_release_super_cookie(struct cifsTconInfo *); 44extern void cifs_fscache_release_super_cookie(struct cifs_tcon *);
45 45
46extern void cifs_fscache_release_inode_cookie(struct inode *); 46extern void cifs_fscache_release_inode_cookie(struct inode *);
47extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *); 47extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *);
@@ -99,9 +99,9 @@ static inline void
99cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) {} 99cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) {}
100static inline void 100static inline void
101cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) {} 101cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) {}
102static inline void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon) {} 102static inline void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) {}
103static inline void 103static inline void
104cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon) {} 104cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) {}
105 105
106static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {} 106static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
107static inline void cifs_fscache_set_inode_cookie(struct inode *inode, 107static inline void cifs_fscache_set_inode_cookie(struct inode *inode,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index de02ed5e25c2..9b018c8334fa 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -295,7 +295,7 @@ int cifs_get_file_info_unix(struct file *filp)
295 struct inode *inode = filp->f_path.dentry->d_inode; 295 struct inode *inode = filp->f_path.dentry->d_inode;
296 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 296 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
297 struct cifsFileInfo *cfile = filp->private_data; 297 struct cifsFileInfo *cfile = filp->private_data;
298 struct cifsTconInfo *tcon = tlink_tcon(cfile->tlink); 298 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
299 299
300 xid = GetXid(); 300 xid = GetXid();
301 rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data); 301 rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data);
@@ -318,7 +318,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
318 int rc; 318 int rc;
319 FILE_UNIX_BASIC_INFO find_data; 319 FILE_UNIX_BASIC_INFO find_data;
320 struct cifs_fattr fattr; 320 struct cifs_fattr fattr;
321 struct cifsTconInfo *tcon; 321 struct cifs_tcon *tcon;
322 struct tcon_link *tlink; 322 struct tcon_link *tlink;
323 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 323 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
324 324
@@ -373,7 +373,8 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path,
373 int oplock = 0; 373 int oplock = 0;
374 __u16 netfid; 374 __u16 netfid;
375 struct tcon_link *tlink; 375 struct tcon_link *tlink;
376 struct cifsTconInfo *tcon; 376 struct cifs_tcon *tcon;
377 struct cifs_io_parms io_parms;
377 char buf[24]; 378 char buf[24];
378 unsigned int bytes_read; 379 unsigned int bytes_read;
379 char *pbuf; 380 char *pbuf;
@@ -405,9 +406,13 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path,
405 if (rc == 0) { 406 if (rc == 0) {
406 int buf_type = CIFS_NO_BUFFER; 407 int buf_type = CIFS_NO_BUFFER;
407 /* Read header */ 408 /* Read header */
408 rc = CIFSSMBRead(xid, tcon, netfid, 409 io_parms.netfid = netfid;
409 24 /* length */, 0 /* offset */, 410 io_parms.pid = current->tgid;
410 &bytes_read, &pbuf, &buf_type); 411 io_parms.tcon = tcon;
412 io_parms.offset = 0;
413 io_parms.length = 24;
414 rc = CIFSSMBRead(xid, &io_parms, &bytes_read, &pbuf,
415 &buf_type);
411 if ((rc == 0) && (bytes_read >= 8)) { 416 if ((rc == 0) && (bytes_read >= 8)) {
412 if (memcmp("IntxBLK", pbuf, 8) == 0) { 417 if (memcmp("IntxBLK", pbuf, 8) == 0) {
413 cFYI(1, "Block device"); 418 cFYI(1, "Block device");
@@ -468,7 +473,7 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
468 char ea_value[4]; 473 char ea_value[4];
469 __u32 mode; 474 __u32 mode;
470 struct tcon_link *tlink; 475 struct tcon_link *tlink;
471 struct cifsTconInfo *tcon; 476 struct cifs_tcon *tcon;
472 477
473 tlink = cifs_sb_tlink(cifs_sb); 478 tlink = cifs_sb_tlink(cifs_sb);
474 if (IS_ERR(tlink)) 479 if (IS_ERR(tlink))
@@ -502,7 +507,7 @@ static void
502cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, 507cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
503 struct cifs_sb_info *cifs_sb, bool adjust_tz) 508 struct cifs_sb_info *cifs_sb, bool adjust_tz)
504{ 509{
505 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 510 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
506 511
507 memset(fattr, 0, sizeof(*fattr)); 512 memset(fattr, 0, sizeof(*fattr));
508 fattr->cf_cifsattrs = le32_to_cpu(info->Attributes); 513 fattr->cf_cifsattrs = le32_to_cpu(info->Attributes);
@@ -553,7 +558,7 @@ int cifs_get_file_info(struct file *filp)
553 struct inode *inode = filp->f_path.dentry->d_inode; 558 struct inode *inode = filp->f_path.dentry->d_inode;
554 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 559 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
555 struct cifsFileInfo *cfile = filp->private_data; 560 struct cifsFileInfo *cfile = filp->private_data;
556 struct cifsTconInfo *tcon = tlink_tcon(cfile->tlink); 561 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
557 562
558 xid = GetXid(); 563 xid = GetXid();
559 rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data); 564 rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data);
@@ -590,7 +595,7 @@ int cifs_get_inode_info(struct inode **pinode,
590 struct super_block *sb, int xid, const __u16 *pfid) 595 struct super_block *sb, int xid, const __u16 *pfid)
591{ 596{
592 int rc = 0, tmprc; 597 int rc = 0, tmprc;
593 struct cifsTconInfo *pTcon; 598 struct cifs_tcon *pTcon;
594 struct tcon_link *tlink; 599 struct tcon_link *tlink;
595 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 600 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
596 char *buf = NULL; 601 char *buf = NULL;
@@ -735,10 +740,10 @@ static const struct inode_operations cifs_ipc_inode_ops = {
735 .lookup = cifs_lookup, 740 .lookup = cifs_lookup,
736}; 741};
737 742
738char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb, 743char *cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
739 struct cifsTconInfo *tcon) 744 struct cifs_tcon *tcon)
740{ 745{
741 int pplen = cifs_sb->prepathlen; 746 int pplen = vol->prepath ? strlen(vol->prepath) : 0;
742 int dfsplen; 747 int dfsplen;
743 char *full_path = NULL; 748 char *full_path = NULL;
744 749
@@ -772,7 +777,7 @@ char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
772 } 777 }
773 } 778 }
774 } 779 }
775 strncpy(full_path + dfsplen, cifs_sb->prepath, pplen); 780 strncpy(full_path + dfsplen, vol->prepath, pplen);
776 full_path[dfsplen + pplen] = 0; /* add trailing null */ 781 full_path[dfsplen + pplen] = 0; /* add trailing null */
777 return full_path; 782 return full_path;
778} 783}
@@ -884,19 +889,13 @@ struct inode *cifs_root_iget(struct super_block *sb)
884 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 889 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
885 struct inode *inode = NULL; 890 struct inode *inode = NULL;
886 long rc; 891 long rc;
887 char *full_path; 892 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
888 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
889
890 full_path = cifs_build_path_to_root(cifs_sb, tcon);
891 if (full_path == NULL)
892 return ERR_PTR(-ENOMEM);
893 893
894 xid = GetXid(); 894 xid = GetXid();
895 if (tcon->unix_ext) 895 if (tcon->unix_ext)
896 rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); 896 rc = cifs_get_inode_info_unix(&inode, "", sb, xid);
897 else 897 else
898 rc = cifs_get_inode_info(&inode, full_path, NULL, sb, 898 rc = cifs_get_inode_info(&inode, "", NULL, sb, xid, NULL);
899 xid, NULL);
900 899
901 if (!inode) { 900 if (!inode) {
902 inode = ERR_PTR(rc); 901 inode = ERR_PTR(rc);
@@ -922,7 +921,6 @@ struct inode *cifs_root_iget(struct super_block *sb)
922 } 921 }
923 922
924out: 923out:
925 kfree(full_path);
926 /* can not call macro FreeXid here since in a void func 924 /* can not call macro FreeXid here since in a void func
927 * TODO: This is no longer true 925 * TODO: This is no longer true
928 */ 926 */
@@ -943,7 +941,7 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
943 struct cifsInodeInfo *cifsInode = CIFS_I(inode); 941 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
944 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 942 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
945 struct tcon_link *tlink = NULL; 943 struct tcon_link *tlink = NULL;
946 struct cifsTconInfo *pTcon; 944 struct cifs_tcon *pTcon;
947 FILE_BASIC_INFO info_buf; 945 FILE_BASIC_INFO info_buf;
948 946
949 if (attrs == NULL) 947 if (attrs == NULL)
@@ -1061,7 +1059,7 @@ cifs_rename_pending_delete(char *full_path, struct dentry *dentry, int xid)
1061 struct cifsInodeInfo *cifsInode = CIFS_I(inode); 1059 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1062 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1060 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1063 struct tcon_link *tlink; 1061 struct tcon_link *tlink;
1064 struct cifsTconInfo *tcon; 1062 struct cifs_tcon *tcon;
1065 __u32 dosattr, origattr; 1063 __u32 dosattr, origattr;
1066 FILE_BASIC_INFO *info_buf = NULL; 1064 FILE_BASIC_INFO *info_buf = NULL;
1067 1065
@@ -1179,7 +1177,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
1179 struct super_block *sb = dir->i_sb; 1177 struct super_block *sb = dir->i_sb;
1180 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 1178 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
1181 struct tcon_link *tlink; 1179 struct tcon_link *tlink;
1182 struct cifsTconInfo *tcon; 1180 struct cifs_tcon *tcon;
1183 struct iattr *attrs = NULL; 1181 struct iattr *attrs = NULL;
1184 __u32 dosattr = 0, origattr = 0; 1182 __u32 dosattr = 0, origattr = 0;
1185 1183
@@ -1277,7 +1275,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1277 int xid; 1275 int xid;
1278 struct cifs_sb_info *cifs_sb; 1276 struct cifs_sb_info *cifs_sb;
1279 struct tcon_link *tlink; 1277 struct tcon_link *tlink;
1280 struct cifsTconInfo *pTcon; 1278 struct cifs_tcon *pTcon;
1281 char *full_path = NULL; 1279 char *full_path = NULL;
1282 struct inode *newinode = NULL; 1280 struct inode *newinode = NULL;
1283 struct cifs_fattr fattr; 1281 struct cifs_fattr fattr;
@@ -1455,7 +1453,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
1455 int xid; 1453 int xid;
1456 struct cifs_sb_info *cifs_sb; 1454 struct cifs_sb_info *cifs_sb;
1457 struct tcon_link *tlink; 1455 struct tcon_link *tlink;
1458 struct cifsTconInfo *pTcon; 1456 struct cifs_tcon *pTcon;
1459 char *full_path = NULL; 1457 char *full_path = NULL;
1460 struct cifsInodeInfo *cifsInode; 1458 struct cifsInodeInfo *cifsInode;
1461 1459
@@ -1512,7 +1510,7 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
1512{ 1510{
1513 struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb); 1511 struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb);
1514 struct tcon_link *tlink; 1512 struct tcon_link *tlink;
1515 struct cifsTconInfo *pTcon; 1513 struct cifs_tcon *pTcon;
1516 __u16 srcfid; 1514 __u16 srcfid;
1517 int oplock, rc; 1515 int oplock, rc;
1518 1516
@@ -1564,7 +1562,7 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1564 char *toName = NULL; 1562 char *toName = NULL;
1565 struct cifs_sb_info *cifs_sb; 1563 struct cifs_sb_info *cifs_sb;
1566 struct tcon_link *tlink; 1564 struct tcon_link *tlink;
1567 struct cifsTconInfo *tcon; 1565 struct cifs_tcon *tcon;
1568 FILE_UNIX_BASIC_INFO *info_buf_source = NULL; 1566 FILE_UNIX_BASIC_INFO *info_buf_source = NULL;
1569 FILE_UNIX_BASIC_INFO *info_buf_target; 1567 FILE_UNIX_BASIC_INFO *info_buf_target;
1570 int xid, rc, tmprc; 1568 int xid, rc, tmprc;
@@ -1794,7 +1792,7 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1794 struct kstat *stat) 1792 struct kstat *stat)
1795{ 1793{
1796 struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); 1794 struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
1797 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 1795 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1798 struct inode *inode = dentry->d_inode; 1796 struct inode *inode = dentry->d_inode;
1799 int rc; 1797 int rc;
1800 1798
@@ -1872,7 +1870,8 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1872 struct cifsInodeInfo *cifsInode = CIFS_I(inode); 1870 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1873 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1871 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1874 struct tcon_link *tlink = NULL; 1872 struct tcon_link *tlink = NULL;
1875 struct cifsTconInfo *pTcon = NULL; 1873 struct cifs_tcon *pTcon = NULL;
1874 struct cifs_io_parms io_parms;
1876 1875
1877 /* 1876 /*
1878 * To avoid spurious oplock breaks from server, in the case of 1877 * To avoid spurious oplock breaks from server, in the case of
@@ -1894,8 +1893,14 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1894 cFYI(1, "SetFSize for attrs rc = %d", rc); 1893 cFYI(1, "SetFSize for attrs rc = %d", rc);
1895 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 1894 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1896 unsigned int bytes_written; 1895 unsigned int bytes_written;
1897 rc = CIFSSMBWrite(xid, pTcon, nfid, 0, attrs->ia_size, 1896
1898 &bytes_written, NULL, NULL, 1); 1897 io_parms.netfid = nfid;
1898 io_parms.pid = npid;
1899 io_parms.tcon = pTcon;
1900 io_parms.offset = 0;
1901 io_parms.length = attrs->ia_size;
1902 rc = CIFSSMBWrite(xid, &io_parms, &bytes_written,
1903 NULL, NULL, 1);
1899 cFYI(1, "Wrt seteof rc %d", rc); 1904 cFYI(1, "Wrt seteof rc %d", rc);
1900 } 1905 }
1901 } else 1906 } else
@@ -1930,10 +1935,15 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1930 CIFS_MOUNT_MAP_SPECIAL_CHR); 1935 CIFS_MOUNT_MAP_SPECIAL_CHR);
1931 if (rc == 0) { 1936 if (rc == 0) {
1932 unsigned int bytes_written; 1937 unsigned int bytes_written;
1933 rc = CIFSSMBWrite(xid, pTcon, netfid, 0, 1938
1934 attrs->ia_size, 1939 io_parms.netfid = netfid;
1935 &bytes_written, NULL, 1940 io_parms.pid = current->tgid;
1936 NULL, 1); 1941 io_parms.tcon = pTcon;
1942 io_parms.offset = 0;
1943 io_parms.length = attrs->ia_size;
1944 rc = CIFSSMBWrite(xid, &io_parms,
1945 &bytes_written,
1946 NULL, NULL, 1);
1937 cFYI(1, "wrt seteof rc %d", rc); 1947 cFYI(1, "wrt seteof rc %d", rc);
1938 CIFSSMBClose(xid, pTcon, netfid); 1948 CIFSSMBClose(xid, pTcon, netfid);
1939 } 1949 }
@@ -1961,7 +1971,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1961 struct cifsInodeInfo *cifsInode = CIFS_I(inode); 1971 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1962 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1972 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1963 struct tcon_link *tlink; 1973 struct tcon_link *tlink;
1964 struct cifsTconInfo *pTcon; 1974 struct cifs_tcon *pTcon;
1965 struct cifs_unix_set_info_args *args = NULL; 1975 struct cifs_unix_set_info_args *args = NULL;
1966 struct cifsFileInfo *open_file; 1976 struct cifsFileInfo *open_file;
1967 1977
@@ -2247,7 +2257,7 @@ cifs_setattr(struct dentry *direntry, struct iattr *attrs)
2247{ 2257{
2248 struct inode *inode = direntry->d_inode; 2258 struct inode *inode = direntry->d_inode;
2249 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2259 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2250 struct cifsTconInfo *pTcon = cifs_sb_master_tcon(cifs_sb); 2260 struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb);
2251 2261
2252 if (pTcon->unix_ext) 2262 if (pTcon->unix_ext)
2253 return cifs_setattr_unix(direntry, attrs); 2263 return cifs_setattr_unix(direntry, attrs);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 0c98672d0122..4221b5e48a42 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -38,7 +38,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
38 struct cifs_sb_info *cifs_sb; 38 struct cifs_sb_info *cifs_sb;
39#ifdef CONFIG_CIFS_POSIX 39#ifdef CONFIG_CIFS_POSIX
40 struct cifsFileInfo *pSMBFile = filep->private_data; 40 struct cifsFileInfo *pSMBFile = filep->private_data;
41 struct cifsTconInfo *tcon; 41 struct cifs_tcon *tcon;
42 __u64 ExtAttrBits = 0; 42 __u64 ExtAttrBits = 0;
43 __u64 ExtAttrMask = 0; 43 __u64 ExtAttrMask = 0;
44 __u64 caps; 44 __u64 caps;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index ce417a9764a3..556b1a0b54de 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -175,7 +175,7 @@ CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str)
175} 175}
176 176
177static int 177static int
178CIFSCreateMFSymLink(const int xid, struct cifsTconInfo *tcon, 178CIFSCreateMFSymLink(const int xid, struct cifs_tcon *tcon,
179 const char *fromName, const char *toName, 179 const char *fromName, const char *toName,
180 const struct nls_table *nls_codepage, int remap) 180 const struct nls_table *nls_codepage, int remap)
181{ 181{
@@ -184,6 +184,7 @@ CIFSCreateMFSymLink(const int xid, struct cifsTconInfo *tcon,
184 __u16 netfid = 0; 184 __u16 netfid = 0;
185 u8 *buf; 185 u8 *buf;
186 unsigned int bytes_written = 0; 186 unsigned int bytes_written = 0;
187 struct cifs_io_parms io_parms;
187 188
188 buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); 189 buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL);
189 if (!buf) 190 if (!buf)
@@ -203,10 +204,13 @@ CIFSCreateMFSymLink(const int xid, struct cifsTconInfo *tcon,
203 return rc; 204 return rc;
204 } 205 }
205 206
206 rc = CIFSSMBWrite(xid, tcon, netfid, 207 io_parms.netfid = netfid;
207 CIFS_MF_SYMLINK_FILE_SIZE /* length */, 208 io_parms.pid = current->tgid;
208 0 /* offset */, 209 io_parms.tcon = tcon;
209 &bytes_written, buf, NULL, 0); 210 io_parms.offset = 0;
211 io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE;
212
213 rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, buf, NULL, 0);
210 CIFSSMBClose(xid, tcon, netfid); 214 CIFSSMBClose(xid, tcon, netfid);
211 kfree(buf); 215 kfree(buf);
212 if (rc != 0) 216 if (rc != 0)
@@ -219,7 +223,7 @@ CIFSCreateMFSymLink(const int xid, struct cifsTconInfo *tcon,
219} 223}
220 224
221static int 225static int
222CIFSQueryMFSymLink(const int xid, struct cifsTconInfo *tcon, 226CIFSQueryMFSymLink(const int xid, struct cifs_tcon *tcon,
223 const unsigned char *searchName, char **symlinkinfo, 227 const unsigned char *searchName, char **symlinkinfo,
224 const struct nls_table *nls_codepage, int remap) 228 const struct nls_table *nls_codepage, int remap)
225{ 229{
@@ -231,6 +235,7 @@ CIFSQueryMFSymLink(const int xid, struct cifsTconInfo *tcon,
231 unsigned int bytes_read = 0; 235 unsigned int bytes_read = 0;
232 int buf_type = CIFS_NO_BUFFER; 236 int buf_type = CIFS_NO_BUFFER;
233 unsigned int link_len = 0; 237 unsigned int link_len = 0;
238 struct cifs_io_parms io_parms;
234 FILE_ALL_INFO file_info; 239 FILE_ALL_INFO file_info;
235 240
236 rc = CIFSSMBOpen(xid, tcon, searchName, FILE_OPEN, GENERIC_READ, 241 rc = CIFSSMBOpen(xid, tcon, searchName, FILE_OPEN, GENERIC_READ,
@@ -249,11 +254,13 @@ CIFSQueryMFSymLink(const int xid, struct cifsTconInfo *tcon,
249 if (!buf) 254 if (!buf)
250 return -ENOMEM; 255 return -ENOMEM;
251 pbuf = buf; 256 pbuf = buf;
257 io_parms.netfid = netfid;
258 io_parms.pid = current->tgid;
259 io_parms.tcon = tcon;
260 io_parms.offset = 0;
261 io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE;
252 262
253 rc = CIFSSMBRead(xid, tcon, netfid, 263 rc = CIFSSMBRead(xid, &io_parms, &bytes_read, &pbuf, &buf_type);
254 CIFS_MF_SYMLINK_FILE_SIZE /* length */,
255 0 /* offset */,
256 &bytes_read, &pbuf, &buf_type);
257 CIFSSMBClose(xid, tcon, netfid); 264 CIFSSMBClose(xid, tcon, netfid);
258 if (rc != 0) { 265 if (rc != 0) {
259 kfree(buf); 266 kfree(buf);
@@ -291,7 +298,8 @@ CIFSCheckMFSymlink(struct cifs_fattr *fattr,
291 int oplock = 0; 298 int oplock = 0;
292 __u16 netfid = 0; 299 __u16 netfid = 0;
293 struct tcon_link *tlink; 300 struct tcon_link *tlink;
294 struct cifsTconInfo *pTcon; 301 struct cifs_tcon *pTcon;
302 struct cifs_io_parms io_parms;
295 u8 *buf; 303 u8 *buf;
296 char *pbuf; 304 char *pbuf;
297 unsigned int bytes_read = 0; 305 unsigned int bytes_read = 0;
@@ -328,11 +336,13 @@ CIFSCheckMFSymlink(struct cifs_fattr *fattr,
328 goto out; 336 goto out;
329 } 337 }
330 pbuf = buf; 338 pbuf = buf;
339 io_parms.netfid = netfid;
340 io_parms.pid = current->tgid;
341 io_parms.tcon = pTcon;
342 io_parms.offset = 0;
343 io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE;
331 344
332 rc = CIFSSMBRead(xid, pTcon, netfid, 345 rc = CIFSSMBRead(xid, &io_parms, &bytes_read, &pbuf, &buf_type);
333 CIFS_MF_SYMLINK_FILE_SIZE /* length */,
334 0 /* offset */,
335 &bytes_read, &pbuf, &buf_type);
336 CIFSSMBClose(xid, pTcon, netfid); 346 CIFSSMBClose(xid, pTcon, netfid);
337 if (rc != 0) { 347 if (rc != 0) {
338 kfree(buf); 348 kfree(buf);
@@ -370,7 +380,7 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
370 char *toName = NULL; 380 char *toName = NULL;
371 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 381 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
372 struct tcon_link *tlink; 382 struct tcon_link *tlink;
373 struct cifsTconInfo *pTcon; 383 struct cifs_tcon *pTcon;
374 struct cifsInodeInfo *cifsInode; 384 struct cifsInodeInfo *cifsInode;
375 385
376 tlink = cifs_sb_tlink(cifs_sb); 386 tlink = cifs_sb_tlink(cifs_sb);
@@ -445,7 +455,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
445 char *target_path = NULL; 455 char *target_path = NULL;
446 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 456 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
447 struct tcon_link *tlink = NULL; 457 struct tcon_link *tlink = NULL;
448 struct cifsTconInfo *tcon; 458 struct cifs_tcon *tcon;
449 459
450 xid = GetXid(); 460 xid = GetXid();
451 461
@@ -518,7 +528,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
518 int xid; 528 int xid;
519 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 529 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
520 struct tcon_link *tlink; 530 struct tcon_link *tlink;
521 struct cifsTconInfo *pTcon; 531 struct cifs_tcon *pTcon;
522 char *full_path = NULL; 532 char *full_path = NULL;
523 struct inode *newinode = NULL; 533 struct inode *newinode = NULL;
524 534
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 907531ac5888..03a1f491d39b 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -67,12 +67,12 @@ _FreeXid(unsigned int xid)
67 spin_unlock(&GlobalMid_Lock); 67 spin_unlock(&GlobalMid_Lock);
68} 68}
69 69
70struct cifsSesInfo * 70struct cifs_ses *
71sesInfoAlloc(void) 71sesInfoAlloc(void)
72{ 72{
73 struct cifsSesInfo *ret_buf; 73 struct cifs_ses *ret_buf;
74 74
75 ret_buf = kzalloc(sizeof(struct cifsSesInfo), GFP_KERNEL); 75 ret_buf = kzalloc(sizeof(struct cifs_ses), GFP_KERNEL);
76 if (ret_buf) { 76 if (ret_buf) {
77 atomic_inc(&sesInfoAllocCount); 77 atomic_inc(&sesInfoAllocCount);
78 ret_buf->status = CifsNew; 78 ret_buf->status = CifsNew;
@@ -85,7 +85,7 @@ sesInfoAlloc(void)
85} 85}
86 86
87void 87void
88sesInfoFree(struct cifsSesInfo *buf_to_free) 88sesInfoFree(struct cifs_ses *buf_to_free)
89{ 89{
90 if (buf_to_free == NULL) { 90 if (buf_to_free == NULL) {
91 cFYI(1, "Null buffer passed to sesInfoFree"); 91 cFYI(1, "Null buffer passed to sesInfoFree");
@@ -105,11 +105,11 @@ sesInfoFree(struct cifsSesInfo *buf_to_free)
105 kfree(buf_to_free); 105 kfree(buf_to_free);
106} 106}
107 107
108struct cifsTconInfo * 108struct cifs_tcon *
109tconInfoAlloc(void) 109tconInfoAlloc(void)
110{ 110{
111 struct cifsTconInfo *ret_buf; 111 struct cifs_tcon *ret_buf;
112 ret_buf = kzalloc(sizeof(struct cifsTconInfo), GFP_KERNEL); 112 ret_buf = kzalloc(sizeof(struct cifs_tcon), GFP_KERNEL);
113 if (ret_buf) { 113 if (ret_buf) {
114 atomic_inc(&tconInfoAllocCount); 114 atomic_inc(&tconInfoAllocCount);
115 ret_buf->tidStatus = CifsNew; 115 ret_buf->tidStatus = CifsNew;
@@ -124,7 +124,7 @@ tconInfoAlloc(void)
124} 124}
125 125
126void 126void
127tconInfoFree(struct cifsTconInfo *buf_to_free) 127tconInfoFree(struct cifs_tcon *buf_to_free)
128{ 128{
129 if (buf_to_free == NULL) { 129 if (buf_to_free == NULL) {
130 cFYI(1, "Null buffer passed to tconInfoFree"); 130 cFYI(1, "Null buffer passed to tconInfoFree");
@@ -295,11 +295,11 @@ __u16 GetNextMid(struct TCP_Server_Info *server)
295 case it is responsbility of caller to set the mid */ 295 case it is responsbility of caller to set the mid */
296void 296void
297header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , 297header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
298 const struct cifsTconInfo *treeCon, int word_count 298 const struct cifs_tcon *treeCon, int word_count
299 /* length of fixed section (word count) in two byte units */) 299 /* length of fixed section (word count) in two byte units */)
300{ 300{
301 struct list_head *temp_item; 301 struct list_head *temp_item;
302 struct cifsSesInfo *ses; 302 struct cifs_ses *ses;
303 char *temp = (char *) buffer; 303 char *temp = (char *) buffer;
304 304
305 memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */ 305 memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */
@@ -359,7 +359,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
359 "did not match tcon uid"); 359 "did not match tcon uid");
360 spin_lock(&cifs_tcp_ses_lock); 360 spin_lock(&cifs_tcp_ses_lock);
361 list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) { 361 list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) {
362 ses = list_entry(temp_item, struct cifsSesInfo, smb_ses_list); 362 ses = list_entry(temp_item, struct cifs_ses, smb_ses_list);
363 if (ses->linux_uid == current_fsuid()) { 363 if (ses->linux_uid == current_fsuid()) {
364 if (ses->server == treeCon->ses->server) { 364 if (ses->server == treeCon->ses->server) {
365 cFYI(1, "found matching uid substitute right smb_uid"); 365 cFYI(1, "found matching uid substitute right smb_uid");
@@ -380,7 +380,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
380 if (treeCon->nocase) 380 if (treeCon->nocase)
381 buffer->Flags |= SMBFLG_CASELESS; 381 buffer->Flags |= SMBFLG_CASELESS;
382 if ((treeCon->ses) && (treeCon->ses->server)) 382 if ((treeCon->ses) && (treeCon->ses->server))
383 if (treeCon->ses->server->secMode & 383 if (treeCon->ses->server->sec_mode &
384 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 384 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
385 buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 385 buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
386 } 386 }
@@ -507,8 +507,8 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
507{ 507{
508 struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf; 508 struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf;
509 struct list_head *tmp, *tmp1, *tmp2; 509 struct list_head *tmp, *tmp1, *tmp2;
510 struct cifsSesInfo *ses; 510 struct cifs_ses *ses;
511 struct cifsTconInfo *tcon; 511 struct cifs_tcon *tcon;
512 struct cifsInodeInfo *pCifsInode; 512 struct cifsInodeInfo *pCifsInode;
513 struct cifsFileInfo *netfile; 513 struct cifsFileInfo *netfile;
514 514
@@ -566,9 +566,9 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
566 /* look up tcon based on tid & uid */ 566 /* look up tcon based on tid & uid */
567 spin_lock(&cifs_tcp_ses_lock); 567 spin_lock(&cifs_tcp_ses_lock);
568 list_for_each(tmp, &srv->smb_ses_list) { 568 list_for_each(tmp, &srv->smb_ses_list) {
569 ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); 569 ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
570 list_for_each(tmp1, &ses->tcon_list) { 570 list_for_each(tmp1, &ses->tcon_list) {
571 tcon = list_entry(tmp1, struct cifsTconInfo, tcon_list); 571 tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
572 if (tcon->tid != buf->Tid) 572 if (tcon->tid != buf->Tid)
573 continue; 573 continue;
574 574
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 79b71c2c7c9d..73e47e84b61a 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -836,7 +836,7 @@ ntstatus_to_dos(__u32 ntstatus, __u8 *eclass, __u16 *ecode)
836} 836}
837 837
838int 838int
839map_smb_to_linux_error(struct smb_hdr *smb, int logErr) 839map_smb_to_linux_error(struct smb_hdr *smb, bool logErr)
840{ 840{
841 unsigned int i; 841 unsigned int i;
842 int rc = -EIO; /* if transport error smb error may not be set */ 842 int rc = -EIO; /* if transport error smb error may not be set */
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index f8e4cd2a7912..6751e745bbc6 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -195,7 +195,7 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb,
195 int len; 195 int len;
196 int oplock = 0; 196 int oplock = 0;
197 int rc; 197 int rc;
198 struct cifsTconInfo *ptcon = cifs_sb_tcon(cifs_sb); 198 struct cifs_tcon *ptcon = cifs_sb_tcon(cifs_sb);
199 char *tmpbuffer; 199 char *tmpbuffer;
200 200
201 rc = CIFSSMBOpen(xid, ptcon, full_path, FILE_OPEN, GENERIC_READ, 201 rc = CIFSSMBOpen(xid, ptcon, full_path, FILE_OPEN, GENERIC_READ,
@@ -223,7 +223,7 @@ static int initiate_cifs_search(const int xid, struct file *file)
223 struct cifsFileInfo *cifsFile; 223 struct cifsFileInfo *cifsFile;
224 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 224 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
225 struct tcon_link *tlink = NULL; 225 struct tcon_link *tlink = NULL;
226 struct cifsTconInfo *pTcon; 226 struct cifs_tcon *pTcon;
227 227
228 if (file->private_data == NULL) { 228 if (file->private_data == NULL) {
229 tlink = cifs_sb_tlink(cifs_sb); 229 tlink = cifs_sb_tlink(cifs_sb);
@@ -496,7 +496,7 @@ static int cifs_save_resume_key(const char *current_entry,
496 assume that they are located in the findfirst return buffer.*/ 496 assume that they are located in the findfirst return buffer.*/
497/* We start counting in the buffer with entry 2 and increment for every 497/* We start counting in the buffer with entry 2 and increment for every
498 entry (do not increment for . or .. entry) */ 498 entry (do not increment for . or .. entry) */
499static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, 499static int find_cifs_entry(const int xid, struct cifs_tcon *pTcon,
500 struct file *file, char **ppCurrentEntry, int *num_to_ret) 500 struct file *file, char **ppCurrentEntry, int *num_to_ret)
501{ 501{
502 int rc = 0; 502 int rc = 0;
@@ -764,7 +764,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
764{ 764{
765 int rc = 0; 765 int rc = 0;
766 int xid, i; 766 int xid, i;
767 struct cifsTconInfo *pTcon; 767 struct cifs_tcon *pTcon;
768 struct cifsFileInfo *cifsFile = NULL; 768 struct cifsFileInfo *cifsFile = NULL;
769 char *current_entry; 769 char *current_entry;
770 int num_to_fill = 0; 770 int num_to_fill = 0;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7dd462100378..3892ab817a36 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -37,13 +37,13 @@
37 * the socket has been reestablished (so we know whether to use vc 0). 37 * the socket has been reestablished (so we know whether to use vc 0).
38 * Called while holding the cifs_tcp_ses_lock, so do not block 38 * Called while holding the cifs_tcp_ses_lock, so do not block
39 */ 39 */
40static bool is_first_ses_reconnect(struct cifsSesInfo *ses) 40static bool is_first_ses_reconnect(struct cifs_ses *ses)
41{ 41{
42 struct list_head *tmp; 42 struct list_head *tmp;
43 struct cifsSesInfo *tmp_ses; 43 struct cifs_ses *tmp_ses;
44 44
45 list_for_each(tmp, &ses->server->smb_ses_list) { 45 list_for_each(tmp, &ses->server->smb_ses_list) {
46 tmp_ses = list_entry(tmp, struct cifsSesInfo, 46 tmp_ses = list_entry(tmp, struct cifs_ses,
47 smb_ses_list); 47 smb_ses_list);
48 if (tmp_ses->need_reconnect == false) 48 if (tmp_ses->need_reconnect == false)
49 return false; 49 return false;
@@ -61,11 +61,11 @@ static bool is_first_ses_reconnect(struct cifsSesInfo *ses)
61 * any vc but zero (some servers reset the connection on vcnum zero) 61 * any vc but zero (some servers reset the connection on vcnum zero)
62 * 62 *
63 */ 63 */
64static __le16 get_next_vcnum(struct cifsSesInfo *ses) 64static __le16 get_next_vcnum(struct cifs_ses *ses)
65{ 65{
66 __u16 vcnum = 0; 66 __u16 vcnum = 0;
67 struct list_head *tmp; 67 struct list_head *tmp;
68 struct cifsSesInfo *tmp_ses; 68 struct cifs_ses *tmp_ses;
69 __u16 max_vcs = ses->server->max_vcs; 69 __u16 max_vcs = ses->server->max_vcs;
70 __u16 i; 70 __u16 i;
71 int free_vc_found = 0; 71 int free_vc_found = 0;
@@ -87,7 +87,7 @@ static __le16 get_next_vcnum(struct cifsSesInfo *ses)
87 free_vc_found = 1; 87 free_vc_found = 1;
88 88
89 list_for_each(tmp, &ses->server->smb_ses_list) { 89 list_for_each(tmp, &ses->server->smb_ses_list) {
90 tmp_ses = list_entry(tmp, struct cifsSesInfo, 90 tmp_ses = list_entry(tmp, struct cifs_ses,
91 smb_ses_list); 91 smb_ses_list);
92 if (tmp_ses->vcnum == i) { 92 if (tmp_ses->vcnum == i) {
93 free_vc_found = 0; 93 free_vc_found = 0;
@@ -114,7 +114,7 @@ get_vc_num_exit:
114 return cpu_to_le16(vcnum); 114 return cpu_to_le16(vcnum);
115} 115}
116 116
117static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB) 117static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
118{ 118{
119 __u32 capabilities = 0; 119 __u32 capabilities = 0;
120 120
@@ -136,7 +136,7 @@ static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
136 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | 136 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
137 CAP_LARGE_WRITE_X | CAP_LARGE_READ_X; 137 CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
138 138
139 if (ses->server->secMode & 139 if (ses->server->sec_mode &
140 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 140 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
141 pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 141 pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
142 142
@@ -181,7 +181,7 @@ unicode_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp)
181 *pbcc_area = bcc_ptr; 181 *pbcc_area = bcc_ptr;
182} 182}
183 183
184static void unicode_domain_string(char **pbcc_area, struct cifsSesInfo *ses, 184static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses,
185 const struct nls_table *nls_cp) 185 const struct nls_table *nls_cp)
186{ 186{
187 char *bcc_ptr = *pbcc_area; 187 char *bcc_ptr = *pbcc_area;
@@ -204,7 +204,7 @@ static void unicode_domain_string(char **pbcc_area, struct cifsSesInfo *ses,
204} 204}
205 205
206 206
207static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses, 207static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
208 const struct nls_table *nls_cp) 208 const struct nls_table *nls_cp)
209{ 209{
210 char *bcc_ptr = *pbcc_area; 210 char *bcc_ptr = *pbcc_area;
@@ -236,7 +236,7 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
236 *pbcc_area = bcc_ptr; 236 *pbcc_area = bcc_ptr;
237} 237}
238 238
239static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses, 239static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
240 const struct nls_table *nls_cp) 240 const struct nls_table *nls_cp)
241{ 241{
242 char *bcc_ptr = *pbcc_area; 242 char *bcc_ptr = *pbcc_area;
@@ -276,7 +276,7 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
276} 276}
277 277
278static void 278static void
279decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses, 279decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses,
280 const struct nls_table *nls_cp) 280 const struct nls_table *nls_cp)
281{ 281{
282 int len; 282 int len;
@@ -310,7 +310,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
310} 310}
311 311
312static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, 312static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
313 struct cifsSesInfo *ses, 313 struct cifs_ses *ses,
314 const struct nls_table *nls_cp) 314 const struct nls_table *nls_cp)
315{ 315{
316 int rc = 0; 316 int rc = 0;
@@ -364,7 +364,7 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
364} 364}
365 365
366static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, 366static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
367 struct cifsSesInfo *ses) 367 struct cifs_ses *ses)
368{ 368{
369 unsigned int tioffset; /* challenge message target info area */ 369 unsigned int tioffset; /* challenge message target info area */
370 unsigned int tilen; /* challenge message target info area length */ 370 unsigned int tilen; /* challenge message target info area length */
@@ -411,7 +411,7 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
411/* We do not malloc the blob, it is passed in pbuffer, because 411/* We do not malloc the blob, it is passed in pbuffer, because
412 it is fixed size, and small, making this approach cleaner */ 412 it is fixed size, and small, making this approach cleaner */
413static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, 413static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
414 struct cifsSesInfo *ses) 414 struct cifs_ses *ses)
415{ 415{
416 NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer; 416 NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer;
417 __u32 flags; 417 __u32 flags;
@@ -424,7 +424,7 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
424 flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | 424 flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
425 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | 425 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
426 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; 426 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
427 if (ses->server->secMode & 427 if (ses->server->sec_mode &
428 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { 428 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
429 flags |= NTLMSSP_NEGOTIATE_SIGN; 429 flags |= NTLMSSP_NEGOTIATE_SIGN;
430 if (!ses->server->session_estab) 430 if (!ses->server->session_estab)
@@ -449,7 +449,7 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
449 This function returns the length of the data in the blob */ 449 This function returns the length of the data in the blob */
450static int build_ntlmssp_auth_blob(unsigned char *pbuffer, 450static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
451 u16 *buflen, 451 u16 *buflen,
452 struct cifsSesInfo *ses, 452 struct cifs_ses *ses,
453 const struct nls_table *nls_cp) 453 const struct nls_table *nls_cp)
454{ 454{
455 int rc; 455 int rc;
@@ -464,10 +464,10 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
464 NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | 464 NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO |
465 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | 465 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
466 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; 466 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
467 if (ses->server->secMode & 467 if (ses->server->sec_mode &
468 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 468 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
469 flags |= NTLMSSP_NEGOTIATE_SIGN; 469 flags |= NTLMSSP_NEGOTIATE_SIGN;
470 if (ses->server->secMode & SECMODE_SIGN_REQUIRED) 470 if (ses->server->sec_mode & SECMODE_SIGN_REQUIRED)
471 flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; 471 flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
472 472
473 tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE); 473 tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
@@ -551,7 +551,7 @@ setup_ntlmv2_ret:
551} 551}
552 552
553int 553int
554CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, 554CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses,
555 const struct nls_table *nls_cp) 555 const struct nls_table *nls_cp)
556{ 556{
557 int rc = 0; 557 int rc = 0;
@@ -657,7 +657,7 @@ ssetup_ntlmssp_authenticate:
657 */ 657 */
658 658
659 rc = calc_lanman_hash(ses->password, ses->server->cryptkey, 659 rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
660 ses->server->secMode & SECMODE_PW_ENCRYPT ? 660 ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
661 true : false, lnm_session_key); 661 true : false, lnm_session_key);
662 662
663 ses->flags |= CIFS_SES_LANMAN; 663 ses->flags |= CIFS_SES_LANMAN;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index f2513fb8c391..147aa22c3c3a 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -295,7 +295,7 @@ static int wait_for_free_request(struct TCP_Server_Info *server,
295 return 0; 295 return 0;
296} 296}
297 297
298static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf, 298static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
299 struct mid_q_entry **ppmidQ) 299 struct mid_q_entry **ppmidQ)
300{ 300{
301 if (ses->server->tcpStatus == CifsExiting) { 301 if (ses->server->tcpStatus == CifsExiting) {
@@ -342,22 +342,24 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
342 * the result. Caller is responsible for dealing with timeouts. 342 * the result. Caller is responsible for dealing with timeouts.
343 */ 343 */
344int 344int
345cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf, 345cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
346 mid_callback_t *callback, void *cbdata) 346 unsigned int nvec, mid_callback_t *callback, void *cbdata,
347 bool ignore_pend)
347{ 348{
348 int rc; 349 int rc;
349 struct mid_q_entry *mid; 350 struct mid_q_entry *mid;
351 struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base;
350 352
351 rc = wait_for_free_request(server, CIFS_ASYNC_OP); 353 rc = wait_for_free_request(server, ignore_pend ? CIFS_ASYNC_OP : 0);
352 if (rc) 354 if (rc)
353 return rc; 355 return rc;
354 356
355 /* enable signing if server requires it */ 357 /* enable signing if server requires it */
356 if (server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 358 if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
357 in_buf->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 359 hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
358 360
359 mutex_lock(&server->srv_mutex); 361 mutex_lock(&server->srv_mutex);
360 mid = AllocMidQEntry(in_buf, server); 362 mid = AllocMidQEntry(hdr, server);
361 if (mid == NULL) { 363 if (mid == NULL) {
362 mutex_unlock(&server->srv_mutex); 364 mutex_unlock(&server->srv_mutex);
363 return -ENOMEM; 365 return -ENOMEM;
@@ -368,7 +370,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
368 list_add_tail(&mid->qhead, &server->pending_mid_q); 370 list_add_tail(&mid->qhead, &server->pending_mid_q);
369 spin_unlock(&GlobalMid_Lock); 371 spin_unlock(&GlobalMid_Lock);
370 372
371 rc = cifs_sign_smb(in_buf, server, &mid->sequence_number); 373 rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number);
372 if (rc) { 374 if (rc) {
373 mutex_unlock(&server->srv_mutex); 375 mutex_unlock(&server->srv_mutex);
374 goto out_err; 376 goto out_err;
@@ -380,7 +382,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
380#ifdef CONFIG_CIFS_STATS2 382#ifdef CONFIG_CIFS_STATS2
381 atomic_inc(&server->inSend); 383 atomic_inc(&server->inSend);
382#endif 384#endif
383 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); 385 rc = smb_sendv(server, iov, nvec);
384#ifdef CONFIG_CIFS_STATS2 386#ifdef CONFIG_CIFS_STATS2
385 atomic_dec(&server->inSend); 387 atomic_dec(&server->inSend);
386 mid->when_sent = jiffies; 388 mid->when_sent = jiffies;
@@ -407,7 +409,7 @@ out_err:
407 * 409 *
408 */ 410 */
409int 411int
410SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses, 412SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses,
411 struct smb_hdr *in_buf, int flags) 413 struct smb_hdr *in_buf, int flags)
412{ 414{
413 int rc; 415 int rc;
@@ -424,7 +426,7 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
424} 426}
425 427
426static int 428static int
427sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) 429cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
428{ 430{
429 int rc = 0; 431 int rc = 0;
430 432
@@ -432,28 +434,21 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
432 mid->mid, mid->midState); 434 mid->mid, mid->midState);
433 435
434 spin_lock(&GlobalMid_Lock); 436 spin_lock(&GlobalMid_Lock);
435 /* ensure that it's no longer on the pending_mid_q */
436 list_del_init(&mid->qhead);
437
438 switch (mid->midState) { 437 switch (mid->midState) {
439 case MID_RESPONSE_RECEIVED: 438 case MID_RESPONSE_RECEIVED:
440 spin_unlock(&GlobalMid_Lock); 439 spin_unlock(&GlobalMid_Lock);
441 return rc; 440 return rc;
442 case MID_REQUEST_SUBMITTED:
443 /* socket is going down, reject all calls */
444 if (server->tcpStatus == CifsExiting) {
445 cERROR(1, "%s: canceling mid=%d cmd=0x%x state=%d",
446 __func__, mid->mid, mid->command, mid->midState);
447 rc = -EHOSTDOWN;
448 break;
449 }
450 case MID_RETRY_NEEDED: 441 case MID_RETRY_NEEDED:
451 rc = -EAGAIN; 442 rc = -EAGAIN;
452 break; 443 break;
453 case MID_RESPONSE_MALFORMED: 444 case MID_RESPONSE_MALFORMED:
454 rc = -EIO; 445 rc = -EIO;
455 break; 446 break;
447 case MID_SHUTDOWN:
448 rc = -EHOSTDOWN;
449 break;
456 default: 450 default:
451 list_del_init(&mid->qhead);
457 cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__, 452 cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__,
458 mid->mid, mid->midState); 453 mid->mid, mid->midState);
459 rc = -EIO; 454 rc = -EIO;
@@ -502,13 +497,31 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
502} 497}
503 498
504int 499int
505SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, 500cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
501 bool log_error)
502{
503 dump_smb(mid->resp_buf,
504 min_t(u32, 92, be32_to_cpu(mid->resp_buf->smb_buf_length)));
505
506 /* convert the length into a more usable form */
507 if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
508 /* FIXME: add code to kill session */
509 if (cifs_verify_signature(mid->resp_buf, server,
510 mid->sequence_number + 1) != 0)
511 cERROR(1, "Unexpected SMB signature");
512 }
513
514 /* BB special case reconnect tid and uid here? */
515 return map_smb_to_linux_error(mid->resp_buf, log_error);
516}
517
518int
519SendReceive2(const unsigned int xid, struct cifs_ses *ses,
506 struct kvec *iov, int n_vec, int *pRespBufType /* ret */, 520 struct kvec *iov, int n_vec, int *pRespBufType /* ret */,
507 const int flags) 521 const int flags)
508{ 522{
509 int rc = 0; 523 int rc = 0;
510 int long_op; 524 int long_op;
511 unsigned int receive_len;
512 struct mid_q_entry *midQ; 525 struct mid_q_entry *midQ;
513 struct smb_hdr *in_buf = iov[0].iov_base; 526 struct smb_hdr *in_buf = iov[0].iov_base;
514 527
@@ -598,61 +611,31 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
598 611
599 cifs_small_buf_release(in_buf); 612 cifs_small_buf_release(in_buf);
600 613
601 rc = sync_mid_result(midQ, ses->server); 614 rc = cifs_sync_mid_result(midQ, ses->server);
602 if (rc != 0) { 615 if (rc != 0) {
603 atomic_dec(&ses->server->inFlight); 616 atomic_dec(&ses->server->inFlight);
604 wake_up(&ses->server->request_q); 617 wake_up(&ses->server->request_q);
605 return rc; 618 return rc;
606 } 619 }
607 620
608 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length); 621 if (!midQ->resp_buf || midQ->midState != MID_RESPONSE_RECEIVED) {
609
610 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
611 cERROR(1, "Frame too large received. Length: %d Xid: %d",
612 receive_len, xid);
613 rc = -EIO; 622 rc = -EIO;
623 cFYI(1, "Bad MID state?");
614 goto out; 624 goto out;
615 } 625 }
616 626
617 /* rcvd frame is ok */ 627 iov[0].iov_base = (char *)midQ->resp_buf;
618 628 iov[0].iov_len = be32_to_cpu(midQ->resp_buf->smb_buf_length) + 4;
619 if (midQ->resp_buf && 629 if (midQ->largeBuf)
620 (midQ->midState == MID_RESPONSE_RECEIVED)) { 630 *pRespBufType = CIFS_LARGE_BUFFER;
621 631 else
622 iov[0].iov_base = (char *)midQ->resp_buf; 632 *pRespBufType = CIFS_SMALL_BUFFER;
623 if (midQ->largeBuf)
624 *pRespBufType = CIFS_LARGE_BUFFER;
625 else
626 *pRespBufType = CIFS_SMALL_BUFFER;
627 iov[0].iov_len = receive_len + 4;
628
629 dump_smb(midQ->resp_buf, 80);
630 /* convert the length into a more usable form */
631 if ((receive_len > 24) &&
632 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
633 SECMODE_SIGN_ENABLED))) {
634 rc = cifs_verify_signature(midQ->resp_buf,
635 ses->server,
636 midQ->sequence_number+1);
637 if (rc) {
638 cERROR(1, "Unexpected SMB signature");
639 /* BB FIXME add code to kill session */
640 }
641 }
642
643 /* BB special case reconnect tid and uid here? */
644 rc = map_smb_to_linux_error(midQ->resp_buf,
645 flags & CIFS_LOG_ERROR);
646 633
647 if ((flags & CIFS_NO_RESP) == 0) 634 rc = cifs_check_receive(midQ, ses->server, flags & CIFS_LOG_ERROR);
648 midQ->resp_buf = NULL; /* mark it so buf will
649 not be freed by
650 delete_mid */
651 } else {
652 rc = -EIO;
653 cFYI(1, "Bad MID state?");
654 }
655 635
636 /* mark it so buf will not be freed by delete_mid */
637 if ((flags & CIFS_NO_RESP) == 0)
638 midQ->resp_buf = NULL;
656out: 639out:
657 delete_mid(midQ); 640 delete_mid(midQ);
658 atomic_dec(&ses->server->inFlight); 641 atomic_dec(&ses->server->inFlight);
@@ -662,12 +645,11 @@ out:
662} 645}
663 646
664int 647int
665SendReceive(const unsigned int xid, struct cifsSesInfo *ses, 648SendReceive(const unsigned int xid, struct cifs_ses *ses,
666 struct smb_hdr *in_buf, struct smb_hdr *out_buf, 649 struct smb_hdr *in_buf, struct smb_hdr *out_buf,
667 int *pbytes_returned, const int long_op) 650 int *pbytes_returned, const int long_op)
668{ 651{
669 int rc = 0; 652 int rc = 0;
670 unsigned int receive_len;
671 struct mid_q_entry *midQ; 653 struct mid_q_entry *midQ;
672 654
673 if (ses == NULL) { 655 if (ses == NULL) {
@@ -750,54 +732,23 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
750 spin_unlock(&GlobalMid_Lock); 732 spin_unlock(&GlobalMid_Lock);
751 } 733 }
752 734
753 rc = sync_mid_result(midQ, ses->server); 735 rc = cifs_sync_mid_result(midQ, ses->server);
754 if (rc != 0) { 736 if (rc != 0) {
755 atomic_dec(&ses->server->inFlight); 737 atomic_dec(&ses->server->inFlight);
756 wake_up(&ses->server->request_q); 738 wake_up(&ses->server->request_q);
757 return rc; 739 return rc;
758 } 740 }
759 741
760 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length); 742 if (!midQ->resp_buf || !out_buf ||
761 743 midQ->midState != MID_RESPONSE_RECEIVED) {
762 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
763 cERROR(1, "Frame too large received. Length: %d Xid: %d",
764 receive_len, xid);
765 rc = -EIO;
766 goto out;
767 }
768
769 /* rcvd frame is ok */
770
771 if (midQ->resp_buf && out_buf
772 && (midQ->midState == MID_RESPONSE_RECEIVED)) {
773 out_buf->smb_buf_length = cpu_to_be32(receive_len);
774 memcpy((char *)out_buf + 4,
775 (char *)midQ->resp_buf + 4,
776 receive_len);
777
778 dump_smb(out_buf, 92);
779 /* convert the length into a more usable form */
780 if ((receive_len > 24) &&
781 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
782 SECMODE_SIGN_ENABLED))) {
783 rc = cifs_verify_signature(out_buf,
784 ses->server,
785 midQ->sequence_number+1);
786 if (rc) {
787 cERROR(1, "Unexpected SMB signature");
788 /* BB FIXME add code to kill session */
789 }
790 }
791
792 *pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
793
794 /* BB special case reconnect tid and uid here? */
795 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
796 } else {
797 rc = -EIO; 744 rc = -EIO;
798 cERROR(1, "Bad MID state?"); 745 cERROR(1, "Bad MID state?");
746 goto out;
799 } 747 }
800 748
749 *pbytes_returned = be32_to_cpu(midQ->resp_buf->smb_buf_length);
750 memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
751 rc = cifs_check_receive(midQ, ses->server, 0);
801out: 752out:
802 delete_mid(midQ); 753 delete_mid(midQ);
803 atomic_dec(&ses->server->inFlight); 754 atomic_dec(&ses->server->inFlight);
@@ -810,12 +761,12 @@ out:
810 blocking lock to return. */ 761 blocking lock to return. */
811 762
812static int 763static int
813send_lock_cancel(const unsigned int xid, struct cifsTconInfo *tcon, 764send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon,
814 struct smb_hdr *in_buf, 765 struct smb_hdr *in_buf,
815 struct smb_hdr *out_buf) 766 struct smb_hdr *out_buf)
816{ 767{
817 int bytes_returned; 768 int bytes_returned;
818 struct cifsSesInfo *ses = tcon->ses; 769 struct cifs_ses *ses = tcon->ses;
819 LOCK_REQ *pSMB = (LOCK_REQ *)in_buf; 770 LOCK_REQ *pSMB = (LOCK_REQ *)in_buf;
820 771
821 /* We just modify the current in_buf to change 772 /* We just modify the current in_buf to change
@@ -832,15 +783,14 @@ send_lock_cancel(const unsigned int xid, struct cifsTconInfo *tcon,
832} 783}
833 784
834int 785int
835SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, 786SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
836 struct smb_hdr *in_buf, struct smb_hdr *out_buf, 787 struct smb_hdr *in_buf, struct smb_hdr *out_buf,
837 int *pbytes_returned) 788 int *pbytes_returned)
838{ 789{
839 int rc = 0; 790 int rc = 0;
840 int rstart = 0; 791 int rstart = 0;
841 unsigned int receive_len;
842 struct mid_q_entry *midQ; 792 struct mid_q_entry *midQ;
843 struct cifsSesInfo *ses; 793 struct cifs_ses *ses;
844 794
845 if (tcon == NULL || tcon->ses == NULL) { 795 if (tcon == NULL || tcon->ses == NULL) {
846 cERROR(1, "Null smb session"); 796 cERROR(1, "Null smb session");
@@ -957,50 +907,20 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
957 rstart = 1; 907 rstart = 1;
958 } 908 }
959 909
960 rc = sync_mid_result(midQ, ses->server); 910 rc = cifs_sync_mid_result(midQ, ses->server);
961 if (rc != 0) 911 if (rc != 0)
962 return rc; 912 return rc;
963 913
964 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
965 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
966 cERROR(1, "Frame too large received. Length: %d Xid: %d",
967 receive_len, xid);
968 rc = -EIO;
969 goto out;
970 }
971
972 /* rcvd frame is ok */ 914 /* rcvd frame is ok */
973 915 if (out_buf == NULL || midQ->midState != MID_RESPONSE_RECEIVED) {
974 if ((out_buf == NULL) || (midQ->midState != MID_RESPONSE_RECEIVED)) {
975 rc = -EIO; 916 rc = -EIO;
976 cERROR(1, "Bad MID state?"); 917 cERROR(1, "Bad MID state?");
977 goto out; 918 goto out;
978 } 919 }
979 920
980 out_buf->smb_buf_length = cpu_to_be32(receive_len); 921 *pbytes_returned = be32_to_cpu(midQ->resp_buf->smb_buf_length);
981 memcpy((char *)out_buf + 4, 922 memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
982 (char *)midQ->resp_buf + 4, 923 rc = cifs_check_receive(midQ, ses->server, 0);
983 receive_len);
984
985 dump_smb(out_buf, 92);
986 /* convert the length into a more usable form */
987 if ((receive_len > 24) &&
988 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
989 SECMODE_SIGN_ENABLED))) {
990 rc = cifs_verify_signature(out_buf,
991 ses->server,
992 midQ->sequence_number+1);
993 if (rc) {
994 cERROR(1, "Unexpected SMB signature");
995 /* BB FIXME add code to kill session */
996 }
997 }
998
999 *pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
1000
1001 /* BB special case reconnect tid and uid here? */
1002 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
1003
1004out: 924out:
1005 delete_mid(midQ); 925 delete_mid(midQ);
1006 if (rstart && rc == -EACCES) 926 if (rstart && rc == -EACCES)
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 912995e013ec..2a22fb2989e4 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -49,7 +49,7 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
49 int xid; 49 int xid;
50 struct cifs_sb_info *cifs_sb; 50 struct cifs_sb_info *cifs_sb;
51 struct tcon_link *tlink; 51 struct tcon_link *tlink;
52 struct cifsTconInfo *pTcon; 52 struct cifs_tcon *pTcon;
53 struct super_block *sb; 53 struct super_block *sb;
54 char *full_path = NULL; 54 char *full_path = NULL;
55 55
@@ -109,7 +109,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
109 int xid; 109 int xid;
110 struct cifs_sb_info *cifs_sb; 110 struct cifs_sb_info *cifs_sb;
111 struct tcon_link *tlink; 111 struct tcon_link *tlink;
112 struct cifsTconInfo *pTcon; 112 struct cifs_tcon *pTcon;
113 struct super_block *sb; 113 struct super_block *sb;
114 char *full_path; 114 char *full_path;
115 struct cifs_ntsd *pacl; 115 struct cifs_ntsd *pacl;
@@ -240,7 +240,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
240 int xid; 240 int xid;
241 struct cifs_sb_info *cifs_sb; 241 struct cifs_sb_info *cifs_sb;
242 struct tcon_link *tlink; 242 struct tcon_link *tlink;
243 struct cifsTconInfo *pTcon; 243 struct cifs_tcon *pTcon;
244 struct super_block *sb; 244 struct super_block *sb;
245 char *full_path; 245 char *full_path;
246 246
@@ -372,7 +372,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
372 int xid; 372 int xid;
373 struct cifs_sb_info *cifs_sb; 373 struct cifs_sb_info *cifs_sb;
374 struct tcon_link *tlink; 374 struct tcon_link *tlink;
375 struct cifsTconInfo *pTcon; 375 struct cifs_tcon *pTcon;
376 struct super_block *sb; 376 struct super_block *sb;
377 char *full_path; 377 char *full_path;
378 378
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index b80e0aa3cfa5..5a59efa0bb46 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -50,7 +50,7 @@ static int __init init_dlm(void)
50 if (error) 50 if (error)
51 goto out_netlink; 51 goto out_netlink;
52 52
53 printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); 53 printk("DLM installed\n");
54 54
55 return 0; 55 return 0;
56 56
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 227b409b8406..bc116b9ffcf2 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -529,6 +529,8 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
529 dget(lower_dentry); 529 dget(lower_dentry);
530 rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); 530 rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
531 dput(lower_dentry); 531 dput(lower_dentry);
532 if (!rc && dentry->d_inode)
533 clear_nlink(dentry->d_inode);
532 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); 534 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
533 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; 535 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
534 unlock_dir(lower_dir_dentry); 536 unlock_dir(lower_dir_dentry);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 03e609c45012..27a7fefb83eb 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -599,8 +599,8 @@ struct ecryptfs_write_tag_70_packet_silly_stack {
599 struct mutex *tfm_mutex; 599 struct mutex *tfm_mutex;
600 char *block_aligned_filename; 600 char *block_aligned_filename;
601 struct ecryptfs_auth_tok *auth_tok; 601 struct ecryptfs_auth_tok *auth_tok;
602 struct scatterlist src_sg; 602 struct scatterlist src_sg[2];
603 struct scatterlist dst_sg; 603 struct scatterlist dst_sg[2];
604 struct blkcipher_desc desc; 604 struct blkcipher_desc desc;
605 char iv[ECRYPTFS_MAX_IV_BYTES]; 605 char iv[ECRYPTFS_MAX_IV_BYTES];
606 char hash[ECRYPTFS_TAG_70_DIGEST_SIZE]; 606 char hash[ECRYPTFS_TAG_70_DIGEST_SIZE];
@@ -816,23 +816,21 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
816 memcpy(&s->block_aligned_filename[s->num_rand_bytes], filename, 816 memcpy(&s->block_aligned_filename[s->num_rand_bytes], filename,
817 filename_size); 817 filename_size);
818 rc = virt_to_scatterlist(s->block_aligned_filename, 818 rc = virt_to_scatterlist(s->block_aligned_filename,
819 s->block_aligned_filename_size, &s->src_sg, 1); 819 s->block_aligned_filename_size, s->src_sg, 2);
820 if (rc != 1) { 820 if (rc < 1) {
821 printk(KERN_ERR "%s: Internal error whilst attempting to " 821 printk(KERN_ERR "%s: Internal error whilst attempting to "
822 "convert filename memory to scatterlist; " 822 "convert filename memory to scatterlist; rc = [%d]. "
823 "expected rc = 1; got rc = [%d]. "
824 "block_aligned_filename_size = [%zd]\n", __func__, rc, 823 "block_aligned_filename_size = [%zd]\n", __func__, rc,
825 s->block_aligned_filename_size); 824 s->block_aligned_filename_size);
826 goto out_release_free_unlock; 825 goto out_release_free_unlock;
827 } 826 }
828 rc = virt_to_scatterlist(&dest[s->i], s->block_aligned_filename_size, 827 rc = virt_to_scatterlist(&dest[s->i], s->block_aligned_filename_size,
829 &s->dst_sg, 1); 828 s->dst_sg, 2);
830 if (rc != 1) { 829 if (rc < 1) {
831 printk(KERN_ERR "%s: Internal error whilst attempting to " 830 printk(KERN_ERR "%s: Internal error whilst attempting to "
832 "convert encrypted filename memory to scatterlist; " 831 "convert encrypted filename memory to scatterlist; "
833 "expected rc = 1; got rc = [%d]. " 832 "rc = [%d]. block_aligned_filename_size = [%zd]\n",
834 "block_aligned_filename_size = [%zd]\n", __func__, rc, 833 __func__, rc, s->block_aligned_filename_size);
835 s->block_aligned_filename_size);
836 goto out_release_free_unlock; 834 goto out_release_free_unlock;
837 } 835 }
838 /* The characters in the first block effectively do the job 836 /* The characters in the first block effectively do the job
@@ -855,7 +853,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
855 mount_crypt_stat->global_default_fn_cipher_key_bytes); 853 mount_crypt_stat->global_default_fn_cipher_key_bytes);
856 goto out_release_free_unlock; 854 goto out_release_free_unlock;
857 } 855 }
858 rc = crypto_blkcipher_encrypt_iv(&s->desc, &s->dst_sg, &s->src_sg, 856 rc = crypto_blkcipher_encrypt_iv(&s->desc, s->dst_sg, s->src_sg,
859 s->block_aligned_filename_size); 857 s->block_aligned_filename_size);
860 if (rc) { 858 if (rc) {
861 printk(KERN_ERR "%s: Error attempting to encrypt filename; " 859 printk(KERN_ERR "%s: Error attempting to encrypt filename; "
@@ -891,8 +889,8 @@ struct ecryptfs_parse_tag_70_packet_silly_stack {
891 struct mutex *tfm_mutex; 889 struct mutex *tfm_mutex;
892 char *decrypted_filename; 890 char *decrypted_filename;
893 struct ecryptfs_auth_tok *auth_tok; 891 struct ecryptfs_auth_tok *auth_tok;
894 struct scatterlist src_sg; 892 struct scatterlist src_sg[2];
895 struct scatterlist dst_sg; 893 struct scatterlist dst_sg[2];
896 struct blkcipher_desc desc; 894 struct blkcipher_desc desc;
897 char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1]; 895 char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1];
898 char iv[ECRYPTFS_MAX_IV_BYTES]; 896 char iv[ECRYPTFS_MAX_IV_BYTES];
@@ -1008,13 +1006,12 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
1008 } 1006 }
1009 mutex_lock(s->tfm_mutex); 1007 mutex_lock(s->tfm_mutex);
1010 rc = virt_to_scatterlist(&data[(*packet_size)], 1008 rc = virt_to_scatterlist(&data[(*packet_size)],
1011 s->block_aligned_filename_size, &s->src_sg, 1); 1009 s->block_aligned_filename_size, s->src_sg, 2);
1012 if (rc != 1) { 1010 if (rc < 1) {
1013 printk(KERN_ERR "%s: Internal error whilst attempting to " 1011 printk(KERN_ERR "%s: Internal error whilst attempting to "
1014 "convert encrypted filename memory to scatterlist; " 1012 "convert encrypted filename memory to scatterlist; "
1015 "expected rc = 1; got rc = [%d]. " 1013 "rc = [%d]. block_aligned_filename_size = [%zd]\n",
1016 "block_aligned_filename_size = [%zd]\n", __func__, rc, 1014 __func__, rc, s->block_aligned_filename_size);
1017 s->block_aligned_filename_size);
1018 goto out_unlock; 1015 goto out_unlock;
1019 } 1016 }
1020 (*packet_size) += s->block_aligned_filename_size; 1017 (*packet_size) += s->block_aligned_filename_size;
@@ -1028,13 +1025,12 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
1028 goto out_unlock; 1025 goto out_unlock;
1029 } 1026 }
1030 rc = virt_to_scatterlist(s->decrypted_filename, 1027 rc = virt_to_scatterlist(s->decrypted_filename,
1031 s->block_aligned_filename_size, &s->dst_sg, 1); 1028 s->block_aligned_filename_size, s->dst_sg, 2);
1032 if (rc != 1) { 1029 if (rc < 1) {
1033 printk(KERN_ERR "%s: Internal error whilst attempting to " 1030 printk(KERN_ERR "%s: Internal error whilst attempting to "
1034 "convert decrypted filename memory to scatterlist; " 1031 "convert decrypted filename memory to scatterlist; "
1035 "expected rc = 1; got rc = [%d]. " 1032 "rc = [%d]. block_aligned_filename_size = [%zd]\n",
1036 "block_aligned_filename_size = [%zd]\n", __func__, rc, 1033 __func__, rc, s->block_aligned_filename_size);
1037 s->block_aligned_filename_size);
1038 goto out_free_unlock; 1034 goto out_free_unlock;
1039 } 1035 }
1040 /* The characters in the first block effectively do the job of 1036 /* The characters in the first block effectively do the job of
@@ -1065,7 +1061,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
1065 mount_crypt_stat->global_default_fn_cipher_key_bytes); 1061 mount_crypt_stat->global_default_fn_cipher_key_bytes);
1066 goto out_free_unlock; 1062 goto out_free_unlock;
1067 } 1063 }
1068 rc = crypto_blkcipher_decrypt_iv(&s->desc, &s->dst_sg, &s->src_sg, 1064 rc = crypto_blkcipher_decrypt_iv(&s->desc, s->dst_sg, s->src_sg,
1069 s->block_aligned_filename_size); 1065 s->block_aligned_filename_size);
1070 if (rc) { 1066 if (rc) {
1071 printk(KERN_ERR "%s: Error attempting to decrypt filename; " 1067 printk(KERN_ERR "%s: Error attempting to decrypt filename; "
diff --git a/fs/exec.c b/fs/exec.c
index 936f5776655c..ea5f748906a8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -42,7 +42,6 @@
42#include <linux/pid_namespace.h> 42#include <linux/pid_namespace.h>
43#include <linux/module.h> 43#include <linux/module.h>
44#include <linux/namei.h> 44#include <linux/namei.h>
45#include <linux/proc_fs.h>
46#include <linux/mount.h> 45#include <linux/mount.h>
47#include <linux/security.h> 46#include <linux/security.h>
48#include <linux/syscalls.h> 47#include <linux/syscalls.h>
@@ -1624,6 +1623,41 @@ expand_fail:
1624 return ret; 1623 return ret;
1625} 1624}
1626 1625
1626static int cn_print_exe_file(struct core_name *cn)
1627{
1628 struct file *exe_file;
1629 char *pathbuf, *path, *p;
1630 int ret;
1631
1632 exe_file = get_mm_exe_file(current->mm);
1633 if (!exe_file)
1634 return cn_printf(cn, "(unknown)");
1635
1636 pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
1637 if (!pathbuf) {
1638 ret = -ENOMEM;
1639 goto put_exe_file;
1640 }
1641
1642 path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
1643 if (IS_ERR(path)) {
1644 ret = PTR_ERR(path);
1645 goto free_buf;
1646 }
1647
1648 for (p = path; *p; p++)
1649 if (*p == '/')
1650 *p = '!';
1651
1652 ret = cn_printf(cn, "%s", path);
1653
1654free_buf:
1655 kfree(pathbuf);
1656put_exe_file:
1657 fput(exe_file);
1658 return ret;
1659}
1660
1627/* format_corename will inspect the pattern parameter, and output a 1661/* format_corename will inspect the pattern parameter, and output a
1628 * name into corename, which must have space for at least 1662 * name into corename, which must have space for at least
1629 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 1663 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
@@ -1695,6 +1729,9 @@ static int format_corename(struct core_name *cn, long signr)
1695 case 'e': 1729 case 'e':
1696 err = cn_printf(cn, "%s", current->comm); 1730 err = cn_printf(cn, "%s", current->comm);
1697 break; 1731 break;
1732 case 'E':
1733 err = cn_print_exe_file(cn);
1734 break;
1698 /* core limit size */ 1735 /* core limit size */
1699 case 'c': 1736 case 'c':
1700 err = cn_printf(cn, "%lu", 1737 err = cn_printf(cn, "%lu",
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index cfa327d33194..c2b34cd2abe0 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -146,7 +146,7 @@ static int __init init_gfs2_fs(void)
146 146
147 gfs2_register_debugfs(); 147 gfs2_register_debugfs();
148 148
149 printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); 149 printk("GFS2 installed\n");
150 150
151 return 0; 151 return 0;
152 152
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 05f73328b28b..9a1e86fc1362 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -75,7 +75,6 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
75 struct nameidata *nd) 75 struct nameidata *nd)
76{ 76{
77 struct jffs2_inode_info *dir_f; 77 struct jffs2_inode_info *dir_f;
78 struct jffs2_sb_info *c;
79 struct jffs2_full_dirent *fd = NULL, *fd_list; 78 struct jffs2_full_dirent *fd = NULL, *fd_list;
80 uint32_t ino = 0; 79 uint32_t ino = 0;
81 struct inode *inode = NULL; 80 struct inode *inode = NULL;
@@ -86,7 +85,6 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
86 return ERR_PTR(-ENAMETOOLONG); 85 return ERR_PTR(-ENAMETOOLONG);
87 86
88 dir_f = JFFS2_INODE_INFO(dir_i); 87 dir_f = JFFS2_INODE_INFO(dir_i);
89 c = JFFS2_SB_INFO(dir_i->i_sb);
90 88
91 mutex_lock(&dir_f->sem); 89 mutex_lock(&dir_f->sem);
92 90
@@ -119,7 +117,6 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
119static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir) 117static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
120{ 118{
121 struct jffs2_inode_info *f; 119 struct jffs2_inode_info *f;
122 struct jffs2_sb_info *c;
123 struct inode *inode = filp->f_path.dentry->d_inode; 120 struct inode *inode = filp->f_path.dentry->d_inode;
124 struct jffs2_full_dirent *fd; 121 struct jffs2_full_dirent *fd;
125 unsigned long offset, curofs; 122 unsigned long offset, curofs;
@@ -127,7 +124,6 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
127 D1(printk(KERN_DEBUG "jffs2_readdir() for dir_i #%lu\n", filp->f_path.dentry->d_inode->i_ino)); 124 D1(printk(KERN_DEBUG "jffs2_readdir() for dir_i #%lu\n", filp->f_path.dentry->d_inode->i_ino));
128 125
129 f = JFFS2_INODE_INFO(inode); 126 f = JFFS2_INODE_INFO(inode);
130 c = JFFS2_SB_INFO(inode->i_sb);
131 127
132 offset = filp->f_pos; 128 offset = filp->f_pos;
133 129
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index b632dddcb482..8d8cd3419d02 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -94,7 +94,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
94 uint32_t buf_size = 0; 94 uint32_t buf_size = 0;
95 struct jffs2_summary *s = NULL; /* summary info collected by the scan process */ 95 struct jffs2_summary *s = NULL; /* summary info collected by the scan process */
96#ifndef __ECOS 96#ifndef __ECOS
97 size_t pointlen; 97 size_t pointlen, try_size;
98 98
99 if (c->mtd->point) { 99 if (c->mtd->point) {
100 ret = c->mtd->point(c->mtd, 0, c->mtd->size, &pointlen, 100 ret = c->mtd->point(c->mtd, 0, c->mtd->size, &pointlen,
@@ -113,18 +113,21 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
113 /* For NAND it's quicker to read a whole eraseblock at a time, 113 /* For NAND it's quicker to read a whole eraseblock at a time,
114 apparently */ 114 apparently */
115 if (jffs2_cleanmarker_oob(c)) 115 if (jffs2_cleanmarker_oob(c))
116 buf_size = c->sector_size; 116 try_size = c->sector_size;
117 else 117 else
118 buf_size = PAGE_SIZE; 118 try_size = PAGE_SIZE;
119 119
120 /* Respect kmalloc limitations */ 120 D1(printk(KERN_DEBUG "Trying to allocate readbuf of %zu "
121 if (buf_size > 128*1024) 121 "bytes\n", try_size));
122 buf_size = 128*1024;
123 122
124 D1(printk(KERN_DEBUG "Allocating readbuf of %d bytes\n", buf_size)); 123 flashbuf = mtd_kmalloc_up_to(c->mtd, &try_size);
125 flashbuf = kmalloc(buf_size, GFP_KERNEL);
126 if (!flashbuf) 124 if (!flashbuf)
127 return -ENOMEM; 125 return -ENOMEM;
126
127 D1(printk(KERN_DEBUG "Allocated readbuf of %zu bytes\n",
128 try_size));
129
130 buf_size = (uint32_t)try_size;
128 } 131 }
129 132
130 if (jffs2_sum_active()) { 133 if (jffs2_sum_active()) {
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index a7c07b44b100..e5d71b27a5b0 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -16,6 +16,7 @@
16#include <linux/mman.h> 16#include <linux/mman.h>
17#include <linux/string.h> 17#include <linux/string.h>
18#include <linux/fcntl.h> 18#include <linux/fcntl.h>
19#include <linux/memcontrol.h>
19 20
20#include <asm/uaccess.h> 21#include <asm/uaccess.h>
21#include <asm/system.h> 22#include <asm/system.h>
@@ -92,6 +93,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
92 * -- wli 93 * -- wli
93 */ 94 */
94 count_vm_event(PGMAJFAULT); 95 count_vm_event(PGMAJFAULT);
96 mem_cgroup_count_vm_event(area->vm_mm, PGMAJFAULT);
95 return VM_FAULT_MAJOR; 97 return VM_FAULT_MAJOR;
96} 98}
97 99
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 4c5488468c14..cd9427023d2e 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -368,7 +368,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
368 int *vict_bit, 368 int *vict_bit,
369 struct buffer_head **ret_bh) 369 struct buffer_head **ret_bh)
370{ 370{
371 int ret, i, blocks_per_unit = 1; 371 int ret, i, bits_per_unit = 0;
372 u64 blkno; 372 u64 blkno;
373 char namebuf[40]; 373 char namebuf[40];
374 374
@@ -398,14 +398,14 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
398 rec = &(cl->cl_recs[0]); 398 rec = &(cl->cl_recs[0]);
399 399
400 if (type == GLOBAL_BITMAP_SYSTEM_INODE) 400 if (type == GLOBAL_BITMAP_SYSTEM_INODE)
401 blocks_per_unit <<= (osb->s_clustersize_bits - 401 bits_per_unit = osb->s_clustersize_bits -
402 inode->i_sb->s_blocksize_bits); 402 inode->i_sb->s_blocksize_bits;
403 /* 403 /*
404 * 'vict_blkno' was out of the valid range. 404 * 'vict_blkno' was out of the valid range.
405 */ 405 */
406 if ((vict_blkno < le64_to_cpu(rec->c_blkno)) || 406 if ((vict_blkno < le64_to_cpu(rec->c_blkno)) ||
407 (vict_blkno >= (le32_to_cpu(ac_dinode->id1.bitmap1.i_total) * 407 (vict_blkno >= (le32_to_cpu(ac_dinode->id1.bitmap1.i_total) <<
408 blocks_per_unit))) { 408 bits_per_unit))) {
409 ret = -EINVAL; 409 ret = -EINVAL;
410 goto out; 410 goto out;
411 } 411 }
@@ -441,8 +441,8 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
441 le16_to_cpu(bg->bg_bits))) { 441 le16_to_cpu(bg->bg_bits))) {
442 442
443 *ret_bh = gd_bh; 443 *ret_bh = gd_bh;
444 *vict_bit = (vict_blkno - blkno) / 444 *vict_bit = (vict_blkno - blkno) >>
445 blocks_per_unit; 445 bits_per_unit;
446 mlog(0, "find the victim group: #%llu, " 446 mlog(0, "find the victim group: #%llu, "
447 "total_bits: %u, vict_bit: %u\n", 447 "total_bits: %u, vict_bit: %u\n",
448 blkno, le16_to_cpu(bg->bg_bits), 448 blkno, le16_to_cpu(bg->bg_bits),
@@ -472,12 +472,24 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode,
472 int ret, goal_bit = 0; 472 int ret, goal_bit = 0;
473 473
474 struct buffer_head *gd_bh = NULL; 474 struct buffer_head *gd_bh = NULL;
475 struct ocfs2_group_desc *bg; 475 struct ocfs2_group_desc *bg = NULL;
476 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 476 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
477 int c_to_b = 1 << (osb->s_clustersize_bits - 477 int c_to_b = 1 << (osb->s_clustersize_bits -
478 inode->i_sb->s_blocksize_bits); 478 inode->i_sb->s_blocksize_bits);
479 479
480 /* 480 /*
481 * make goal become cluster aligned.
482 */
483 range->me_goal = ocfs2_block_to_cluster_start(inode->i_sb,
484 range->me_goal);
485 /*
486 * moving goal is not allowd to start with a group desc blok(#0 blk)
487 * let's compromise to the latter cluster.
488 */
489 if (range->me_goal == le64_to_cpu(bg->bg_blkno))
490 range->me_goal += c_to_b;
491
492 /*
481 * validate goal sits within global_bitmap, and return the victim 493 * validate goal sits within global_bitmap, and return the victim
482 * group desc 494 * group desc
483 */ 495 */
@@ -491,19 +503,6 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode,
491 bg = (struct ocfs2_group_desc *)gd_bh->b_data; 503 bg = (struct ocfs2_group_desc *)gd_bh->b_data;
492 504
493 /* 505 /*
494 * make goal become cluster aligned.
495 */
496 if (range->me_goal % c_to_b)
497 range->me_goal = range->me_goal / c_to_b * c_to_b;
498
499 /*
500 * moving goal is not allowd to start with a group desc blok(#0 blk)
501 * let's compromise to the latter cluster.
502 */
503 if (range->me_goal == le64_to_cpu(bg->bg_blkno))
504 range->me_goal += c_to_b;
505
506 /*
507 * movement is not gonna cross two groups. 506 * movement is not gonna cross two groups.
508 */ 507 */
509 if ((le16_to_cpu(bg->bg_bits) - goal_bit) * osb->s_clustersize < 508 if ((le16_to_cpu(bg->bg_bits) - goal_bit) * osb->s_clustersize <
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 8ed4d3433199..f82e762eeca2 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -256,10 +256,12 @@ ssize_t part_discard_alignment_show(struct device *dev,
256{ 256{
257 struct hd_struct *p = dev_to_part(dev); 257 struct hd_struct *p = dev_to_part(dev);
258 struct gendisk *disk = dev_to_disk(dev); 258 struct gendisk *disk = dev_to_disk(dev);
259 unsigned int alignment = 0;
259 260
260 return sprintf(buf, "%u\n", 261 if (disk->queue)
261 queue_limit_discard_alignment(&disk->queue->limits, 262 alignment = queue_limit_discard_alignment(&disk->queue->limits,
262 p->start_sect)); 263 p->start_sect);
264 return sprintf(buf, "%u\n", alignment);
263} 265}
264 266
265ssize_t part_stat_show(struct device *dev, 267ssize_t part_stat_show(struct device *dev,
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index 19d6750d1d6c..6296b403c67a 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -310,6 +310,15 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
310 goto fail; 310 goto fail;
311 } 311 }
312 312
313 /* Check the GUID Partition Table header size */
314 if (le32_to_cpu((*gpt)->header_size) >
315 bdev_logical_block_size(state->bdev)) {
316 pr_debug("GUID Partition Table Header size is wrong: %u > %u\n",
317 le32_to_cpu((*gpt)->header_size),
318 bdev_logical_block_size(state->bdev));
319 goto fail;
320 }
321
313 /* Check the GUID Partition Table CRC */ 322 /* Check the GUID Partition Table CRC */
314 origcrc = le32_to_cpu((*gpt)->header_crc32); 323 origcrc = le32_to_cpu((*gpt)->header_crc32);
315 (*gpt)->header_crc32 = 0; 324 (*gpt)->header_crc32 = 0;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 5e4f776b0917..9b45ee84fbcc 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -131,7 +131,7 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
131 * you can test for combinations of others with 131 * you can test for combinations of others with
132 * simple bit tests. 132 * simple bit tests.
133 */ 133 */
134static const char *task_state_array[] = { 134static const char * const task_state_array[] = {
135 "R (running)", /* 0 */ 135 "R (running)", /* 0 */
136 "S (sleeping)", /* 1 */ 136 "S (sleeping)", /* 1 */
137 "D (disk sleep)", /* 2 */ 137 "D (disk sleep)", /* 2 */
@@ -147,7 +147,7 @@ static const char *task_state_array[] = {
147static inline const char *get_task_state(struct task_struct *tsk) 147static inline const char *get_task_state(struct task_struct *tsk)
148{ 148{
149 unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state; 149 unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
150 const char **p = &task_state_array[0]; 150 const char * const *p = &task_state_array[0];
151 151
152 BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array)); 152 BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
153 153
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dc8bca72b002..4ede550517a6 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -894,20 +894,20 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
894 if (!task) 894 if (!task)
895 goto out_no_task; 895 goto out_no_task;
896 896
897 copied = -ENOMEM;
898 page = (char *)__get_free_page(GFP_TEMPORARY);
899 if (!page)
900 goto out_task;
901
897 mm = check_mem_permission(task); 902 mm = check_mem_permission(task);
898 copied = PTR_ERR(mm); 903 copied = PTR_ERR(mm);
899 if (IS_ERR(mm)) 904 if (IS_ERR(mm))
900 goto out_task; 905 goto out_free;
901 906
902 copied = -EIO; 907 copied = -EIO;
903 if (file->private_data != (void *)((long)current->self_exec_id)) 908 if (file->private_data != (void *)((long)current->self_exec_id))
904 goto out_mm; 909 goto out_mm;
905 910
906 copied = -ENOMEM;
907 page = (char *)__get_free_page(GFP_TEMPORARY);
908 if (!page)
909 goto out_mm;
910
911 copied = 0; 911 copied = 0;
912 while (count > 0) { 912 while (count > 0) {
913 int this_len, retval; 913 int this_len, retval;
@@ -929,9 +929,11 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
929 count -= retval; 929 count -= retval;
930 } 930 }
931 *ppos = dst; 931 *ppos = dst;
932 free_page((unsigned long) page); 932
933out_mm: 933out_mm:
934 mmput(mm); 934 mmput(mm);
935out_free:
936 free_page((unsigned long) page);
935out_task: 937out_task:
936 put_task_struct(task); 938 put_task_struct(task);
937out_no_task: 939out_no_task:
@@ -1059,7 +1061,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1059{ 1061{
1060 struct task_struct *task; 1062 struct task_struct *task;
1061 char buffer[PROC_NUMBUF]; 1063 char buffer[PROC_NUMBUF];
1062 long oom_adjust; 1064 int oom_adjust;
1063 unsigned long flags; 1065 unsigned long flags;
1064 int err; 1066 int err;
1065 1067
@@ -1071,7 +1073,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1071 goto out; 1073 goto out;
1072 } 1074 }
1073 1075
1074 err = strict_strtol(strstrip(buffer), 0, &oom_adjust); 1076 err = kstrtoint(strstrip(buffer), 0, &oom_adjust);
1075 if (err) 1077 if (err)
1076 goto out; 1078 goto out;
1077 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && 1079 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
@@ -1168,7 +1170,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1168 struct task_struct *task; 1170 struct task_struct *task;
1169 char buffer[PROC_NUMBUF]; 1171 char buffer[PROC_NUMBUF];
1170 unsigned long flags; 1172 unsigned long flags;
1171 long oom_score_adj; 1173 int oom_score_adj;
1172 int err; 1174 int err;
1173 1175
1174 memset(buffer, 0, sizeof(buffer)); 1176 memset(buffer, 0, sizeof(buffer));
@@ -1179,7 +1181,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1179 goto out; 1181 goto out;
1180 } 1182 }
1181 1183
1182 err = strict_strtol(strstrip(buffer), 0, &oom_score_adj); 1184 err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
1183 if (err) 1185 if (err)
1184 goto out; 1186 goto out;
1185 if (oom_score_adj < OOM_SCORE_ADJ_MIN || 1187 if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
@@ -1468,7 +1470,7 @@ sched_autogroup_write(struct file *file, const char __user *buf,
1468 struct inode *inode = file->f_path.dentry->d_inode; 1470 struct inode *inode = file->f_path.dentry->d_inode;
1469 struct task_struct *p; 1471 struct task_struct *p;
1470 char buffer[PROC_NUMBUF]; 1472 char buffer[PROC_NUMBUF];
1471 long nice; 1473 int nice;
1472 int err; 1474 int err;
1473 1475
1474 memset(buffer, 0, sizeof(buffer)); 1476 memset(buffer, 0, sizeof(buffer));
@@ -1477,9 +1479,9 @@ sched_autogroup_write(struct file *file, const char __user *buf,
1477 if (copy_from_user(buffer, buf, count)) 1479 if (copy_from_user(buffer, buf, count))
1478 return -EFAULT; 1480 return -EFAULT;
1479 1481
1480 err = strict_strtol(strstrip(buffer), 0, &nice); 1482 err = kstrtoint(strstrip(buffer), 0, &nice);
1481 if (err) 1483 if (err < 0)
1482 return -EINVAL; 1484 return err;
1483 1485
1484 p = get_proc_task(inode); 1486 p = get_proc_task(inode);
1485 if (!p) 1487 if (!p)
@@ -1576,57 +1578,6 @@ static const struct file_operations proc_pid_set_comm_operations = {
1576 .release = single_release, 1578 .release = single_release,
1577}; 1579};
1578 1580
1579/*
1580 * We added or removed a vma mapping the executable. The vmas are only mapped
1581 * during exec and are not mapped with the mmap system call.
1582 * Callers must hold down_write() on the mm's mmap_sem for these
1583 */
1584void added_exe_file_vma(struct mm_struct *mm)
1585{
1586 mm->num_exe_file_vmas++;
1587}
1588
1589void removed_exe_file_vma(struct mm_struct *mm)
1590{
1591 mm->num_exe_file_vmas--;
1592 if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
1593 fput(mm->exe_file);
1594 mm->exe_file = NULL;
1595 }
1596
1597}
1598
1599void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
1600{
1601 if (new_exe_file)
1602 get_file(new_exe_file);
1603 if (mm->exe_file)
1604 fput(mm->exe_file);
1605 mm->exe_file = new_exe_file;
1606 mm->num_exe_file_vmas = 0;
1607}
1608
1609struct file *get_mm_exe_file(struct mm_struct *mm)
1610{
1611 struct file *exe_file;
1612
1613 /* We need mmap_sem to protect against races with removal of
1614 * VM_EXECUTABLE vmas */
1615 down_read(&mm->mmap_sem);
1616 exe_file = mm->exe_file;
1617 if (exe_file)
1618 get_file(exe_file);
1619 up_read(&mm->mmap_sem);
1620 return exe_file;
1621}
1622
1623void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
1624{
1625 /* It's safe to write the exe_file pointer without exe_file_lock because
1626 * this is called during fork when the task is not yet in /proc */
1627 newmm->exe_file = get_mm_exe_file(oldmm);
1628}
1629
1630static int proc_exe_link(struct inode *inode, struct path *exe_path) 1581static int proc_exe_link(struct inode *inode, struct path *exe_path)
1631{ 1582{
1632 struct task_struct *task; 1583 struct task_struct *task;
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 1cffa2b8a2fc..9758b654a1bc 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -138,9 +138,9 @@ static int stat_open(struct inode *inode, struct file *file)
138 struct seq_file *m; 138 struct seq_file *m;
139 int res; 139 int res;
140 140
141 /* don't ask for more than the kmalloc() max size, currently 128 KB */ 141 /* don't ask for more than the kmalloc() max size */
142 if (size > 128 * 1024) 142 if (size > KMALLOC_MAX_SIZE)
143 size = 128 * 1024; 143 size = KMALLOC_MAX_SIZE;
144 buf = kmalloc(size, GFP_KERNEL); 144 buf = kmalloc(size, GFP_KERNEL);
145 if (!buf) 145 if (!buf)
146 return -ENOMEM; 146 return -ENOMEM;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index db15935fa757..25b6a887adb9 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -536,15 +536,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
536 char buffer[PROC_NUMBUF]; 536 char buffer[PROC_NUMBUF];
537 struct mm_struct *mm; 537 struct mm_struct *mm;
538 struct vm_area_struct *vma; 538 struct vm_area_struct *vma;
539 long type; 539 int type;
540 int rv;
540 541
541 memset(buffer, 0, sizeof(buffer)); 542 memset(buffer, 0, sizeof(buffer));
542 if (count > sizeof(buffer) - 1) 543 if (count > sizeof(buffer) - 1)
543 count = sizeof(buffer) - 1; 544 count = sizeof(buffer) - 1;
544 if (copy_from_user(buffer, buf, count)) 545 if (copy_from_user(buffer, buf, count))
545 return -EFAULT; 546 return -EFAULT;
546 if (strict_strtol(strstrip(buffer), 10, &type)) 547 rv = kstrtoint(strstrip(buffer), 10, &type);
547 return -EINVAL; 548 if (rv < 0)
549 return rv;
548 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) 550 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
549 return -EINVAL; 551 return -EINVAL;
550 task = get_proc_task(file->f_path.dentry->d_inode); 552 task = get_proc_task(file->f_path.dentry->d_inode);
@@ -769,18 +771,12 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
769 if (!task) 771 if (!task)
770 goto out; 772 goto out;
771 773
772 mm = mm_for_maps(task);
773 ret = PTR_ERR(mm);
774 if (!mm || IS_ERR(mm))
775 goto out_task;
776
777 ret = -EINVAL; 774 ret = -EINVAL;
778 /* file position must be aligned */ 775 /* file position must be aligned */
779 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) 776 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
780 goto out_task; 777 goto out_task;
781 778
782 ret = 0; 779 ret = 0;
783
784 if (!count) 780 if (!count)
785 goto out_task; 781 goto out_task;
786 782
@@ -788,7 +784,12 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
788 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); 784 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
789 ret = -ENOMEM; 785 ret = -ENOMEM;
790 if (!pm.buffer) 786 if (!pm.buffer)
791 goto out_mm; 787 goto out_task;
788
789 mm = mm_for_maps(task);
790 ret = PTR_ERR(mm);
791 if (!mm || IS_ERR(mm))
792 goto out_free;
792 793
793 pagemap_walk.pmd_entry = pagemap_pte_range; 794 pagemap_walk.pmd_entry = pagemap_pte_range;
794 pagemap_walk.pte_hole = pagemap_pte_hole; 795 pagemap_walk.pte_hole = pagemap_pte_hole;
@@ -831,7 +832,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
831 len = min(count, PM_ENTRY_BYTES * pm.pos); 832 len = min(count, PM_ENTRY_BYTES * pm.pos);
832 if (copy_to_user(buf, pm.buffer, len)) { 833 if (copy_to_user(buf, pm.buffer, len)) {
833 ret = -EFAULT; 834 ret = -EFAULT;
834 goto out_free; 835 goto out_mm;
835 } 836 }
836 copied += len; 837 copied += len;
837 buf += len; 838 buf += len;
@@ -841,10 +842,10 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
841 if (!ret || ret == PM_END_OF_BUFFER) 842 if (!ret || ret == PM_END_OF_BUFFER)
842 ret = copied; 843 ret = copied;
843 844
844out_free:
845 kfree(pm.buffer);
846out_mm: 845out_mm:
847 mmput(mm); 846 mmput(mm);
847out_free:
848 kfree(pm.buffer);
848out_task: 849out_task:
849 put_task_struct(task); 850 put_task_struct(task);
850out: 851out:
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 74802bc5ded9..cd99bf557650 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -35,6 +35,46 @@ static u64 vmcore_size;
35 35
36static struct proc_dir_entry *proc_vmcore = NULL; 36static struct proc_dir_entry *proc_vmcore = NULL;
37 37
38/*
39 * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
40 * The called function has to take care of module refcounting.
41 */
42static int (*oldmem_pfn_is_ram)(unsigned long pfn);
43
44int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn))
45{
46 if (oldmem_pfn_is_ram)
47 return -EBUSY;
48 oldmem_pfn_is_ram = fn;
49 return 0;
50}
51EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram);
52
53void unregister_oldmem_pfn_is_ram(void)
54{
55 oldmem_pfn_is_ram = NULL;
56 wmb();
57}
58EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram);
59
60static int pfn_is_ram(unsigned long pfn)
61{
62 int (*fn)(unsigned long pfn);
63 /* pfn is ram unless fn() checks pagetype */
64 int ret = 1;
65
66 /*
67 * Ask hypervisor if the pfn is really ram.
68 * A ballooned page contains no data and reading from such a page
69 * will cause high load in the hypervisor.
70 */
71 fn = oldmem_pfn_is_ram;
72 if (fn)
73 ret = fn(pfn);
74
75 return ret;
76}
77
38/* Reads a page from the oldmem device from given offset. */ 78/* Reads a page from the oldmem device from given offset. */
39static ssize_t read_from_oldmem(char *buf, size_t count, 79static ssize_t read_from_oldmem(char *buf, size_t count,
40 u64 *ppos, int userbuf) 80 u64 *ppos, int userbuf)
@@ -55,9 +95,15 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
55 else 95 else
56 nr_bytes = count; 96 nr_bytes = count;
57 97
58 tmp = copy_oldmem_page(pfn, buf, nr_bytes, offset, userbuf); 98 /* If pfn is not ram, return zeros for sparse dump files */
59 if (tmp < 0) 99 if (pfn_is_ram(pfn) == 0)
60 return tmp; 100 memset(buf, 0, nr_bytes);
101 else {
102 tmp = copy_oldmem_page(pfn, buf, nr_bytes,
103 offset, userbuf);
104 if (tmp < 0)
105 return tmp;
106 }
61 *ppos += nr_bytes; 107 *ppos += nr_bytes;
62 count -= nr_bytes; 108 count -= nr_bytes;
63 buf += nr_bytes; 109 buf += nr_bytes;
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 8ab48bc2fa7d..ed0eb2a921f4 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 4b5a3fbb1f1f..f744be98cd5a 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -393,19 +393,36 @@ struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb,
393/* 393/*
394 * Read a filesystem table (uncompressed sequence of bytes) from disk 394 * Read a filesystem table (uncompressed sequence of bytes) from disk
395 */ 395 */
396int squashfs_read_table(struct super_block *sb, void *buffer, u64 block, 396void *squashfs_read_table(struct super_block *sb, u64 block, int length)
397 int length)
398{ 397{
399 int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 398 int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
400 int i, res; 399 int i, res;
401 void **data = kcalloc(pages, sizeof(void *), GFP_KERNEL); 400 void *table, *buffer, **data;
402 if (data == NULL) 401
403 return -ENOMEM; 402 table = buffer = kmalloc(length, GFP_KERNEL);
403 if (table == NULL)
404 return ERR_PTR(-ENOMEM);
405
406 data = kcalloc(pages, sizeof(void *), GFP_KERNEL);
407 if (data == NULL) {
408 res = -ENOMEM;
409 goto failed;
410 }
404 411
405 for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) 412 for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
406 data[i] = buffer; 413 data[i] = buffer;
414
407 res = squashfs_read_data(sb, data, block, length | 415 res = squashfs_read_data(sb, data, block, length |
408 SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages); 416 SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages);
417
409 kfree(data); 418 kfree(data);
410 return res; 419
420 if (res < 0)
421 goto failed;
422
423 return table;
424
425failed:
426 kfree(table);
427 return ERR_PTR(res);
411} 428}
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index e921bd213738..9f1b0bb96f13 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index 099745ad5691..8ba70cff09a6 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -4,7 +4,7 @@
4 * Squashfs - a compressed read only filesystem for Linux 4 * Squashfs - a compressed read only filesystem for Linux
5 * 5 *
6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
7 * Phillip Lougher <phillip@lougher.demon.co.uk> 7 * Phillip Lougher <phillip@squashfs.org.uk>
8 * 8 *
9 * This program is free software; you can redistribute it and/or 9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License 10 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index 3f79cd1d0c19..9dfe2ce0fb70 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c
index 7f93d5a9ee05..730c56248c9b 100644
--- a/fs/squashfs/export.c
+++ b/fs/squashfs/export.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -121,30 +121,38 @@ static struct dentry *squashfs_get_parent(struct dentry *child)
121 * Read uncompressed inode lookup table indexes off disk into memory 121 * Read uncompressed inode lookup table indexes off disk into memory
122 */ 122 */
123__le64 *squashfs_read_inode_lookup_table(struct super_block *sb, 123__le64 *squashfs_read_inode_lookup_table(struct super_block *sb,
124 u64 lookup_table_start, unsigned int inodes) 124 u64 lookup_table_start, u64 next_table, unsigned int inodes)
125{ 125{
126 unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(inodes); 126 unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(inodes);
127 __le64 *inode_lookup_table; 127 __le64 *table;
128 int err;
129 128
130 TRACE("In read_inode_lookup_table, length %d\n", length); 129 TRACE("In read_inode_lookup_table, length %d\n", length);
131 130
132 /* Allocate inode lookup table indexes */ 131 /* Sanity check values */
133 inode_lookup_table = kmalloc(length, GFP_KERNEL); 132
134 if (inode_lookup_table == NULL) { 133 /* there should always be at least one inode */
135 ERROR("Failed to allocate inode lookup table\n"); 134 if (inodes == 0)
136 return ERR_PTR(-ENOMEM); 135 return ERR_PTR(-EINVAL);
137 } 136
137 /* length bytes should not extend into the next table - this check
138 * also traps instances where lookup_table_start is incorrectly larger
139 * than the next table start
140 */
141 if (lookup_table_start + length > next_table)
142 return ERR_PTR(-EINVAL);
143
144 table = squashfs_read_table(sb, lookup_table_start, length);
138 145
139 err = squashfs_read_table(sb, inode_lookup_table, lookup_table_start, 146 /*
140 length); 147 * table[0] points to the first inode lookup table metadata block,
141 if (err < 0) { 148 * this should be less than lookup_table_start
142 ERROR("unable to read inode lookup table\n"); 149 */
143 kfree(inode_lookup_table); 150 if (!IS_ERR(table) && table[0] >= lookup_table_start) {
144 return ERR_PTR(err); 151 kfree(table);
152 return ERR_PTR(-EINVAL);
145 } 153 }
146 154
147 return inode_lookup_table; 155 return table;
148} 156}
149 157
150 158
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index a25c5060bdcb..38bb1c640559 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
index 7eef571443c6..1516a6490bfb 100644
--- a/fs/squashfs/fragment.c
+++ b/fs/squashfs/fragment.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -71,26 +71,29 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment,
71 * Read the uncompressed fragment lookup table indexes off disk into memory 71 * Read the uncompressed fragment lookup table indexes off disk into memory
72 */ 72 */
73__le64 *squashfs_read_fragment_index_table(struct super_block *sb, 73__le64 *squashfs_read_fragment_index_table(struct super_block *sb,
74 u64 fragment_table_start, unsigned int fragments) 74 u64 fragment_table_start, u64 next_table, unsigned int fragments)
75{ 75{
76 unsigned int length = SQUASHFS_FRAGMENT_INDEX_BYTES(fragments); 76 unsigned int length = SQUASHFS_FRAGMENT_INDEX_BYTES(fragments);
77 __le64 *fragment_index; 77 __le64 *table;
78 int err;
79 78
80 /* Allocate fragment lookup table indexes */ 79 /*
81 fragment_index = kmalloc(length, GFP_KERNEL); 80 * Sanity check, length bytes should not extend into the next table -
82 if (fragment_index == NULL) { 81 * this check also traps instances where fragment_table_start is
83 ERROR("Failed to allocate fragment index table\n"); 82 * incorrectly larger than the next table start
84 return ERR_PTR(-ENOMEM); 83 */
85 } 84 if (fragment_table_start + length > next_table)
85 return ERR_PTR(-EINVAL);
86
87 table = squashfs_read_table(sb, fragment_table_start, length);
86 88
87 err = squashfs_read_table(sb, fragment_index, fragment_table_start, 89 /*
88 length); 90 * table[0] points to the first fragment table metadata block, this
89 if (err < 0) { 91 * should be less than fragment_table_start
90 ERROR("unable to read fragment index table\n"); 92 */
91 kfree(fragment_index); 93 if (!IS_ERR(table) && table[0] >= fragment_table_start) {
92 return ERR_PTR(err); 94 kfree(table);
95 return ERR_PTR(-EINVAL);
93 } 96 }
94 97
95 return fragment_index; 98 return table;
96} 99}
diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c
index d8f32452638e..a70858e0fb44 100644
--- a/fs/squashfs/id.c
+++ b/fs/squashfs/id.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -66,27 +66,37 @@ int squashfs_get_id(struct super_block *sb, unsigned int index,
66 * Read uncompressed id lookup table indexes from disk into memory 66 * Read uncompressed id lookup table indexes from disk into memory
67 */ 67 */
68__le64 *squashfs_read_id_index_table(struct super_block *sb, 68__le64 *squashfs_read_id_index_table(struct super_block *sb,
69 u64 id_table_start, unsigned short no_ids) 69 u64 id_table_start, u64 next_table, unsigned short no_ids)
70{ 70{
71 unsigned int length = SQUASHFS_ID_BLOCK_BYTES(no_ids); 71 unsigned int length = SQUASHFS_ID_BLOCK_BYTES(no_ids);
72 __le64 *id_table; 72 __le64 *table;
73 int err;
74 73
75 TRACE("In read_id_index_table, length %d\n", length); 74 TRACE("In read_id_index_table, length %d\n", length);
76 75
77 /* Allocate id lookup table indexes */ 76 /* Sanity check values */
78 id_table = kmalloc(length, GFP_KERNEL); 77
79 if (id_table == NULL) { 78 /* there should always be at least one id */
80 ERROR("Failed to allocate id index table\n"); 79 if (no_ids == 0)
81 return ERR_PTR(-ENOMEM); 80 return ERR_PTR(-EINVAL);
82 } 81
82 /*
83 * length bytes should not extend into the next table - this check
84 * also traps instances where id_table_start is incorrectly larger
85 * than the next table start
86 */
87 if (id_table_start + length > next_table)
88 return ERR_PTR(-EINVAL);
89
90 table = squashfs_read_table(sb, id_table_start, length);
83 91
84 err = squashfs_read_table(sb, id_table, id_table_start, length); 92 /*
85 if (err < 0) { 93 * table[0] points to the first id lookup table metadata block, this
86 ERROR("unable to read id index table\n"); 94 * should be less than id_table_start
87 kfree(id_table); 95 */
88 return ERR_PTR(err); 96 if (!IS_ERR(table) && table[0] >= id_table_start) {
97 kfree(table);
98 return ERR_PTR(-EINVAL);
89 } 99 }
90 100
91 return id_table; 101 return table;
92} 102}
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 62e63ad25075..04bebcaa2373 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 5d922a6701ab..4bc63ac64bc0 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 1f2e608b8785..e3be6a71cfa7 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -44,24 +44,24 @@ extern struct squashfs_cache_entry *squashfs_get_fragment(struct super_block *,
44 u64, int); 44 u64, int);
45extern struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *, 45extern struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *,
46 u64, int); 46 u64, int);
47extern int squashfs_read_table(struct super_block *, void *, u64, int); 47extern void *squashfs_read_table(struct super_block *, u64, int);
48 48
49/* decompressor.c */ 49/* decompressor.c */
50extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); 50extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
51extern void *squashfs_decompressor_init(struct super_block *, unsigned short); 51extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
52 52
53/* export.c */ 53/* export.c */
54extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, 54extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
55 unsigned int); 55 unsigned int);
56 56
57/* fragment.c */ 57/* fragment.c */
58extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *); 58extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *);
59extern __le64 *squashfs_read_fragment_index_table(struct super_block *, 59extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
60 u64, unsigned int); 60 u64, u64, unsigned int);
61 61
62/* id.c */ 62/* id.c */
63extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); 63extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
64extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, 64extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64,
65 unsigned short); 65 unsigned short);
66 66
67/* inode.c */ 67/* inode.c */
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 4582c568ef4d..b4a4e539a08c 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -4,7 +4,7 @@
4 * Squashfs 4 * Squashfs
5 * 5 *
6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
7 * Phillip Lougher <phillip@lougher.demon.co.uk> 7 * Phillip Lougher <phillip@squashfs.org.uk>
8 * 8 *
9 * This program is free software; you can redistribute it and/or 9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License 10 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h
index 359baefc01fc..73588e7700ed 100644
--- a/fs/squashfs/squashfs_fs_i.h
+++ b/fs/squashfs/squashfs_fs_i.h
@@ -4,7 +4,7 @@
4 * Squashfs 4 * Squashfs
5 * 5 *
6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
7 * Phillip Lougher <phillip@lougher.demon.co.uk> 7 * Phillip Lougher <phillip@squashfs.org.uk>
8 * 8 *
9 * This program is free software; you can redistribute it and/or 9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License 10 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index d9037a5215f0..651f0b31d296 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -4,7 +4,7 @@
4 * Squashfs 4 * Squashfs
5 * 5 *
6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
7 * Phillip Lougher <phillip@lougher.demon.co.uk> 7 * Phillip Lougher <phillip@squashfs.org.uk>
8 * 8 *
9 * This program is free software; you can redistribute it and/or 9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License 10 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 5c8184c061a4..6f26abee3597 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -83,7 +83,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
83 long long root_inode; 83 long long root_inode;
84 unsigned short flags; 84 unsigned short flags;
85 unsigned int fragments; 85 unsigned int fragments;
86 u64 lookup_table_start, xattr_id_table_start; 86 u64 lookup_table_start, xattr_id_table_start, next_table;
87 int err; 87 int err;
88 88
89 TRACE("Entered squashfs_fill_superblock\n"); 89 TRACE("Entered squashfs_fill_superblock\n");
@@ -95,12 +95,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
95 } 95 }
96 msblk = sb->s_fs_info; 96 msblk = sb->s_fs_info;
97 97
98 sblk = kzalloc(sizeof(*sblk), GFP_KERNEL);
99 if (sblk == NULL) {
100 ERROR("Failed to allocate squashfs_super_block\n");
101 goto failure;
102 }
103
104 msblk->devblksize = sb_min_blocksize(sb, BLOCK_SIZE); 98 msblk->devblksize = sb_min_blocksize(sb, BLOCK_SIZE);
105 msblk->devblksize_log2 = ffz(~msblk->devblksize); 99 msblk->devblksize_log2 = ffz(~msblk->devblksize);
106 100
@@ -114,10 +108,12 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
114 * of bytes_used) we need to set it to an initial sensible dummy value 108 * of bytes_used) we need to set it to an initial sensible dummy value
115 */ 109 */
116 msblk->bytes_used = sizeof(*sblk); 110 msblk->bytes_used = sizeof(*sblk);
117 err = squashfs_read_table(sb, sblk, SQUASHFS_START, sizeof(*sblk)); 111 sblk = squashfs_read_table(sb, SQUASHFS_START, sizeof(*sblk));
118 112
119 if (err < 0) { 113 if (IS_ERR(sblk)) {
120 ERROR("unable to read squashfs_super_block\n"); 114 ERROR("unable to read squashfs_super_block\n");
115 err = PTR_ERR(sblk);
116 sblk = NULL;
121 goto failed_mount; 117 goto failed_mount;
122 } 118 }
123 119
@@ -218,18 +214,61 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
218 goto failed_mount; 214 goto failed_mount;
219 } 215 }
220 216
217 /* Handle xattrs */
218 sb->s_xattr = squashfs_xattr_handlers;
219 xattr_id_table_start = le64_to_cpu(sblk->xattr_id_table_start);
220 if (xattr_id_table_start == SQUASHFS_INVALID_BLK) {
221 next_table = msblk->bytes_used;
222 goto allocate_id_index_table;
223 }
224
225 /* Allocate and read xattr id lookup table */
226 msblk->xattr_id_table = squashfs_read_xattr_id_table(sb,
227 xattr_id_table_start, &msblk->xattr_table, &msblk->xattr_ids);
228 if (IS_ERR(msblk->xattr_id_table)) {
229 ERROR("unable to read xattr id index table\n");
230 err = PTR_ERR(msblk->xattr_id_table);
231 msblk->xattr_id_table = NULL;
232 if (err != -ENOTSUPP)
233 goto failed_mount;
234 }
235 next_table = msblk->xattr_table;
236
237allocate_id_index_table:
221 /* Allocate and read id index table */ 238 /* Allocate and read id index table */
222 msblk->id_table = squashfs_read_id_index_table(sb, 239 msblk->id_table = squashfs_read_id_index_table(sb,
223 le64_to_cpu(sblk->id_table_start), le16_to_cpu(sblk->no_ids)); 240 le64_to_cpu(sblk->id_table_start), next_table,
241 le16_to_cpu(sblk->no_ids));
224 if (IS_ERR(msblk->id_table)) { 242 if (IS_ERR(msblk->id_table)) {
243 ERROR("unable to read id index table\n");
225 err = PTR_ERR(msblk->id_table); 244 err = PTR_ERR(msblk->id_table);
226 msblk->id_table = NULL; 245 msblk->id_table = NULL;
227 goto failed_mount; 246 goto failed_mount;
228 } 247 }
248 next_table = msblk->id_table[0];
249
250 /* Handle inode lookup table */
251 lookup_table_start = le64_to_cpu(sblk->lookup_table_start);
252 if (lookup_table_start == SQUASHFS_INVALID_BLK)
253 goto handle_fragments;
254
255 /* Allocate and read inode lookup table */
256 msblk->inode_lookup_table = squashfs_read_inode_lookup_table(sb,
257 lookup_table_start, next_table, msblk->inodes);
258 if (IS_ERR(msblk->inode_lookup_table)) {
259 ERROR("unable to read inode lookup table\n");
260 err = PTR_ERR(msblk->inode_lookup_table);
261 msblk->inode_lookup_table = NULL;
262 goto failed_mount;
263 }
264 next_table = msblk->inode_lookup_table[0];
229 265
266 sb->s_export_op = &squashfs_export_ops;
267
268handle_fragments:
230 fragments = le32_to_cpu(sblk->fragments); 269 fragments = le32_to_cpu(sblk->fragments);
231 if (fragments == 0) 270 if (fragments == 0)
232 goto allocate_lookup_table; 271 goto check_directory_table;
233 272
234 msblk->fragment_cache = squashfs_cache_init("fragment", 273 msblk->fragment_cache = squashfs_cache_init("fragment",
235 SQUASHFS_CACHED_FRAGMENTS, msblk->block_size); 274 SQUASHFS_CACHED_FRAGMENTS, msblk->block_size);
@@ -240,45 +279,29 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
240 279
241 /* Allocate and read fragment index table */ 280 /* Allocate and read fragment index table */
242 msblk->fragment_index = squashfs_read_fragment_index_table(sb, 281 msblk->fragment_index = squashfs_read_fragment_index_table(sb,
243 le64_to_cpu(sblk->fragment_table_start), fragments); 282 le64_to_cpu(sblk->fragment_table_start), next_table, fragments);
244 if (IS_ERR(msblk->fragment_index)) { 283 if (IS_ERR(msblk->fragment_index)) {
284 ERROR("unable to read fragment index table\n");
245 err = PTR_ERR(msblk->fragment_index); 285 err = PTR_ERR(msblk->fragment_index);
246 msblk->fragment_index = NULL; 286 msblk->fragment_index = NULL;
247 goto failed_mount; 287 goto failed_mount;
248 } 288 }
289 next_table = msblk->fragment_index[0];
249 290
250allocate_lookup_table: 291check_directory_table:
251 lookup_table_start = le64_to_cpu(sblk->lookup_table_start); 292 /* Sanity check directory_table */
252 if (lookup_table_start == SQUASHFS_INVALID_BLK) 293 if (msblk->directory_table >= next_table) {
253 goto allocate_xattr_table; 294 err = -EINVAL;
254
255 /* Allocate and read inode lookup table */
256 msblk->inode_lookup_table = squashfs_read_inode_lookup_table(sb,
257 lookup_table_start, msblk->inodes);
258 if (IS_ERR(msblk->inode_lookup_table)) {
259 err = PTR_ERR(msblk->inode_lookup_table);
260 msblk->inode_lookup_table = NULL;
261 goto failed_mount; 295 goto failed_mount;
262 } 296 }
263 297
264 sb->s_export_op = &squashfs_export_ops; 298 /* Sanity check inode_table */
265 299 if (msblk->inode_table >= msblk->directory_table) {
266allocate_xattr_table: 300 err = -EINVAL;
267 sb->s_xattr = squashfs_xattr_handlers; 301 goto failed_mount;
268 xattr_id_table_start = le64_to_cpu(sblk->xattr_id_table_start);
269 if (xattr_id_table_start == SQUASHFS_INVALID_BLK)
270 goto allocate_root;
271
272 /* Allocate and read xattr id lookup table */
273 msblk->xattr_id_table = squashfs_read_xattr_id_table(sb,
274 xattr_id_table_start, &msblk->xattr_table, &msblk->xattr_ids);
275 if (IS_ERR(msblk->xattr_id_table)) {
276 err = PTR_ERR(msblk->xattr_id_table);
277 msblk->xattr_id_table = NULL;
278 if (err != -ENOTSUPP)
279 goto failed_mount;
280 } 302 }
281allocate_root: 303
304 /* allocate root */
282 root = new_inode(sb); 305 root = new_inode(sb);
283 if (!root) { 306 if (!root) {
284 err = -ENOMEM; 307 err = -ENOMEM;
@@ -318,11 +341,6 @@ failed_mount:
318 sb->s_fs_info = NULL; 341 sb->s_fs_info = NULL;
319 kfree(sblk); 342 kfree(sblk);
320 return err; 343 return err;
321
322failure:
323 kfree(sb->s_fs_info);
324 sb->s_fs_info = NULL;
325 return -ENOMEM;
326} 344}
327 345
328 346
@@ -475,5 +493,5 @@ static const struct super_operations squashfs_super_ops = {
475module_init(init_squashfs_fs); 493module_init(init_squashfs_fs);
476module_exit(exit_squashfs_fs); 494module_exit(exit_squashfs_fs);
477MODULE_DESCRIPTION("squashfs 4.0, a compressed read-only filesystem"); 495MODULE_DESCRIPTION("squashfs 4.0, a compressed read-only filesystem");
478MODULE_AUTHOR("Phillip Lougher <phillip@lougher.demon.co.uk>"); 496MODULE_AUTHOR("Phillip Lougher <phillip@squashfs.org.uk>");
479MODULE_LICENSE("GPL"); 497MODULE_LICENSE("GPL");
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index ec86434921e1..1191817264cc 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c
index 3876c36699a1..92fcde7b4d61 100644
--- a/fs/squashfs/xattr.c
+++ b/fs/squashfs/xattr.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2010 4 * Copyright (c) 2010
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index b634efce4bde..c83f5d9ec125 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2010 4 * Copyright (c) 2010
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -31,6 +31,7 @@ static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
31 u64 start, u64 *xattr_table_start, int *xattr_ids) 31 u64 start, u64 *xattr_table_start, int *xattr_ids)
32{ 32{
33 ERROR("Xattrs in filesystem, these will be ignored\n"); 33 ERROR("Xattrs in filesystem, these will be ignored\n");
34 *xattr_table_start = start;
34 return ERR_PTR(-ENOTSUPP); 35 return ERR_PTR(-ENOTSUPP);
35} 36}
36 37
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index 05385dbe1465..c89607d690c4 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2010 4 * Copyright (c) 2010
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -67,34 +67,29 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start,
67 u64 *xattr_table_start, int *xattr_ids) 67 u64 *xattr_table_start, int *xattr_ids)
68{ 68{
69 unsigned int len; 69 unsigned int len;
70 __le64 *xid_table; 70 struct squashfs_xattr_id_table *id_table;
71 struct squashfs_xattr_id_table id_table; 71
72 int err; 72 id_table = squashfs_read_table(sb, start, sizeof(*id_table));
73 if (IS_ERR(id_table))
74 return (__le64 *) id_table;
75
76 *xattr_table_start = le64_to_cpu(id_table->xattr_table_start);
77 *xattr_ids = le32_to_cpu(id_table->xattr_ids);
78 kfree(id_table);
79
80 /* Sanity check values */
81
82 /* there is always at least one xattr id */
83 if (*xattr_ids == 0)
84 return ERR_PTR(-EINVAL);
85
86 /* xattr_table should be less than start */
87 if (*xattr_table_start >= start)
88 return ERR_PTR(-EINVAL);
73 89
74 err = squashfs_read_table(sb, &id_table, start, sizeof(id_table));
75 if (err < 0) {
76 ERROR("unable to read xattr id table\n");
77 return ERR_PTR(err);
78 }
79 *xattr_table_start = le64_to_cpu(id_table.xattr_table_start);
80 *xattr_ids = le32_to_cpu(id_table.xattr_ids);
81 len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids); 90 len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
82 91
83 TRACE("In read_xattr_index_table, length %d\n", len); 92 TRACE("In read_xattr_index_table, length %d\n", len);
84 93
85 /* Allocate xattr id lookup table indexes */ 94 return squashfs_read_table(sb, start + sizeof(*id_table), len);
86 xid_table = kmalloc(len, GFP_KERNEL);
87 if (xid_table == NULL) {
88 ERROR("Failed to allocate xattr id index table\n");
89 return ERR_PTR(-ENOMEM);
90 }
91
92 err = squashfs_read_table(sb, xid_table, start + sizeof(id_table), len);
93 if (err < 0) {
94 ERROR("unable to read xattr id index table\n");
95 kfree(xid_table);
96 return ERR_PTR(err);
97 }
98
99 return xid_table;
100} 95}
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index aa47a286d1f8..1760b7d108f6 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 517688b32ffa..55d918fd2d86 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 46f7a807bbc1..42694e11c23d 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -424,8 +424,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
424 ufs_cpu_to_data_ptr(sb, p, result); 424 ufs_cpu_to_data_ptr(sb, p, result);
425 *err = 0; 425 *err = 0;
426 UFS_I(inode)->i_lastfrag = 426 UFS_I(inode)->i_lastfrag =
427 max_t(u32, UFS_I(inode)->i_lastfrag, 427 max(UFS_I(inode)->i_lastfrag, fragment + count);
428 fragment + count);
429 ufs_clear_frags(inode, result + oldcount, 428 ufs_clear_frags(inode, result + oldcount,
430 newcount - oldcount, locked_page != NULL); 429 newcount - oldcount, locked_page != NULL);
431 } 430 }
@@ -440,7 +439,8 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
440 result = ufs_add_fragments (inode, tmp, oldcount, newcount, err); 439 result = ufs_add_fragments (inode, tmp, oldcount, newcount, err);
441 if (result) { 440 if (result) {
442 *err = 0; 441 *err = 0;
443 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); 442 UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
443 fragment + count);
444 ufs_clear_frags(inode, result + oldcount, newcount - oldcount, 444 ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
445 locked_page != NULL); 445 locked_page != NULL);
446 unlock_super(sb); 446 unlock_super(sb);
@@ -479,7 +479,8 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
479 uspi->s_sbbase + result, locked_page); 479 uspi->s_sbbase + result, locked_page);
480 ufs_cpu_to_data_ptr(sb, p, result); 480 ufs_cpu_to_data_ptr(sb, p, result);
481 *err = 0; 481 *err = 0;
482 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); 482 UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
483 fragment + count);
483 unlock_super(sb); 484 unlock_super(sb);
484 if (newcount < request) 485 if (newcount < request)
485 ufs_free_fragments (inode, result + newcount, request - newcount); 486 ufs_free_fragments (inode, result + newcount, request - newcount);
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 5f821dbc0579..f04f89fbd4d9 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -84,7 +84,7 @@ static int ufs_trunc_direct(struct inode *inode)
84 retry = 0; 84 retry = 0;
85 85
86 frag1 = DIRECT_FRAGMENT; 86 frag1 = DIRECT_FRAGMENT;
87 frag4 = min_t(u32, UFS_NDIR_FRAGMENT, ufsi->i_lastfrag); 87 frag4 = min_t(u64, UFS_NDIR_FRAGMENT, ufsi->i_lastfrag);
88 frag2 = ((frag1 & uspi->s_fpbmask) ? ((frag1 | uspi->s_fpbmask) + 1) : frag1); 88 frag2 = ((frag1 & uspi->s_fpbmask) ? ((frag1 | uspi->s_fpbmask) + 1) : frag1);
89 frag3 = frag4 & ~uspi->s_fpbmask; 89 frag3 = frag4 & ~uspi->s_fpbmask;
90 block1 = block2 = 0; 90 block1 = block2 = 0;