aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/9p/vfs_inode.c4
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/bio-integrity.c9
-rw-r--r--fs/btrfs/Kconfig9
-rw-r--r--fs/btrfs/Makefile5
-rw-r--r--fs/btrfs/backref.c93
-rw-r--r--fs/btrfs/backref.h2
-rw-r--r--fs/btrfs/btrfs_inode.h21
-rw-r--r--fs/btrfs/check-integrity.c422
-rw-r--r--fs/btrfs/compression.c11
-rw-r--r--fs/btrfs/ctree.c289
-rw-r--r--fs/btrfs/ctree.h161
-rw-r--r--fs/btrfs/delayed-inode.c46
-rw-r--r--fs/btrfs/delayed-ref.c8
-rw-r--r--fs/btrfs/dev-replace.c4
-rw-r--r--fs/btrfs/disk-io.c174
-rw-r--r--fs/btrfs/extent-tree.c184
-rw-r--r--fs/btrfs/extent_io.c664
-rw-r--r--fs/btrfs/extent_io.h35
-rw-r--r--fs/btrfs/file-item.c85
-rw-r--r--fs/btrfs/file.c11
-rw-r--r--fs/btrfs/free-space-cache.c525
-rw-r--r--fs/btrfs/free-space-cache.h11
-rw-r--r--fs/btrfs/inode.c615
-rw-r--r--fs/btrfs/ioctl.c745
-rw-r--r--fs/btrfs/lzo.c4
-rw-r--r--fs/btrfs/ordered-data.c28
-rw-r--r--fs/btrfs/ordered-data.h7
-rw-r--r--fs/btrfs/print-tree.c107
-rw-r--r--fs/btrfs/qgroup.c69
-rw-r--r--fs/btrfs/raid56.c14
-rw-r--r--fs/btrfs/relocation.c43
-rw-r--r--fs/btrfs/root-tree.c21
-rw-r--r--fs/btrfs/scrub.c42
-rw-r--r--fs/btrfs/send.c240
-rw-r--r--fs/btrfs/super.c145
-rw-r--r--fs/btrfs/tests/btrfs-tests.h34
-rw-r--r--fs/btrfs/tests/free-space-tests.c395
-rw-r--r--fs/btrfs/transaction.c34
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-log.c19
-rw-r--r--fs/btrfs/uuid-tree.c358
-rw-r--r--fs/btrfs/volumes.c613
-rw-r--r--fs/btrfs/volumes.h12
-rw-r--r--fs/cifs/AUTHORS55
-rw-r--r--fs/cifs/CHANGES1065
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README753
-rw-r--r--fs/cifs/TODO129
-rw-r--r--fs/cifs/cifs_unicode.h2
-rw-r--r--fs/cifs/cifsfs.c49
-rw-r--r--fs/cifs/cifsglob.h61
-rw-r--r--fs/cifs/cifspdu.h11
-rw-r--r--fs/cifs/cifsproto.h12
-rw-r--r--fs/cifs/cifssmb.c110
-rw-r--r--fs/cifs/connect.c43
-rw-r--r--fs/cifs/dir.c58
-rw-r--r--fs/cifs/file.c35
-rw-r--r--fs/cifs/inode.c12
-rw-r--r--fs/cifs/link.c24
-rw-r--r--fs/cifs/misc.c13
-rw-r--r--fs/cifs/readdir.c3
-rw-r--r--fs/cifs/sess.c58
-rw-r--r--fs/cifs/smb1ops.c44
-rw-r--r--fs/cifs/smb2file.c25
-rw-r--r--fs/cifs/smb2inode.c9
-rw-r--r--fs/cifs/smb2misc.c186
-rw-r--r--fs/cifs/smb2ops.c344
-rw-r--r--fs/cifs/smb2pdu.c135
-rw-r--r--fs/cifs/smb2pdu.h37
-rw-r--r--fs/cifs/smb2proto.h5
-rw-r--r--fs/cifs/smb2transport.c70
-rw-r--r--fs/cifs/winucase.c663
-rw-r--r--fs/coredump.c5
-rw-r--r--fs/dcache.c76
-rw-r--r--fs/ecryptfs/crypto.c16
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c122
-rw-r--r--fs/file_table.c3
-rw-r--r--fs/fs-writeback.c12
-rw-r--r--fs/fscache/page.c2
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/hfsplus/Kconfig18
-rw-r--r--fs/hfsplus/Makefile2
-rw-r--r--fs/hfsplus/acl.h30
-rw-r--r--fs/hfsplus/dir.c4
-rw-r--r--fs/hfsplus/hfsplus_fs.h1
-rw-r--r--fs/hfsplus/inode.c11
-rw-r--r--fs/hfsplus/posix_acl.c274
-rw-r--r--fs/hfsplus/xattr.c62
-rw-r--r--fs/hfsplus/xattr.h33
-rw-r--r--fs/hfsplus/xattr_security.c13
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/nfs4_fs.h10
-rw-r--r--fs/nfs/nfs4proc.c22
-rw-r--r--fs/nfs/nfs4xdr.c17
-rw-r--r--fs/nfsd/nfs4recover.c2
-rw-r--r--fs/nfsd/nfs4state.c33
-rw-r--r--fs/ocfs2/acl.c4
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c32
-rw-r--r--fs/ocfs2/cluster/tcp.c60
-rw-r--r--fs/ocfs2/dlm/dlmast.c8
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c18
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c15
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c35
-rw-r--r--fs/ocfs2/dlm/dlmlock.c9
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c18
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c13
-rw-r--r--fs/ocfs2/dlm/dlmthread.c19
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c4
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c3
-rw-r--r--fs/ocfs2/extent_map.c11
-rw-r--r--fs/ocfs2/file.c7
-rw-r--r--fs/ocfs2/ioctl.c2
-rw-r--r--fs/ocfs2/journal.c43
-rw-r--r--fs/ocfs2/journal.h11
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/move_extents.c3
-rw-r--r--fs/ocfs2/ocfs2_trace.h2
-rw-r--r--fs/ocfs2/quota_global.c6
-rw-r--r--fs/ocfs2/quota_local.c12
-rw-r--r--fs/ocfs2/refcounttree.c10
-rw-r--r--fs/ocfs2/xattr.c11
-rw-r--r--fs/proc/fd.c2
-rw-r--r--fs/proc/task_mmu.c50
-rw-r--r--fs/proc/vmcore.c154
-rw-r--r--fs/ramfs/inode.c26
-rw-r--r--fs/squashfs/block.c11
-rw-r--r--fs/squashfs/dir.c17
-rw-r--r--fs/squashfs/namei.c8
-rw-r--r--fs/squashfs/squashfs_fs.h5
134 files changed, 6550 insertions, 5202 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index d384a8b77ee8..aa5ecf479a57 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -183,7 +183,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
183 else 183 else
184 flock.length = fl->fl_end - fl->fl_start + 1; 184 flock.length = fl->fl_end - fl->fl_start + 1;
185 flock.proc_id = fl->fl_pid; 185 flock.proc_id = fl->fl_pid;
186 flock.client_id = utsname()->nodename; 186 flock.client_id = fid->clnt->name;
187 if (IS_SETLKW(cmd)) 187 if (IS_SETLKW(cmd))
188 flock.flags = P9_LOCK_FLAGS_BLOCK; 188 flock.flags = P9_LOCK_FLAGS_BLOCK;
189 189
@@ -260,7 +260,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
260 else 260 else
261 glock.length = fl->fl_end - fl->fl_start + 1; 261 glock.length = fl->fl_end - fl->fl_start + 1;
262 glock.proc_id = fl->fl_pid; 262 glock.proc_id = fl->fl_pid;
263 glock.client_id = utsname()->nodename; 263 glock.client_id = fid->clnt->name;
264 264
265 res = p9_client_getlock_dotl(fid, &glock); 265 res = p9_client_getlock_dotl(fid, &glock);
266 if (res < 0) 266 if (res < 0)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 25b018efb8ab..94de6d1482e2 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -146,7 +146,7 @@ static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses,
146 char type = 0, ext[32]; 146 char type = 0, ext[32];
147 int major = -1, minor = -1; 147 int major = -1, minor = -1;
148 148
149 strncpy(ext, stat->extension, sizeof(ext)); 149 strlcpy(ext, stat->extension, sizeof(ext));
150 sscanf(ext, "%c %u %u", &type, &major, &minor); 150 sscanf(ext, "%c %u %u", &type, &major, &minor);
151 switch (type) { 151 switch (type) {
152 case 'c': 152 case 'c':
@@ -1186,7 +1186,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
1186 * this even with .u extension. So check 1186 * this even with .u extension. So check
1187 * for non NULL stat->extension 1187 * for non NULL stat->extension
1188 */ 1188 */
1189 strncpy(ext, stat->extension, sizeof(ext)); 1189 strlcpy(ext, stat->extension, sizeof(ext));
1190 /* HARDLINKCOUNT %u */ 1190 /* HARDLINKCOUNT %u */
1191 sscanf(ext, "%13s %u", tag_name, &i_nlink); 1191 sscanf(ext, "%13s %u", tag_name, &i_nlink);
1192 if (!strncmp(tag_name, "HARDLINKCOUNT", 13)) 1192 if (!strncmp(tag_name, "HARDLINKCOUNT", 13))
diff --git a/fs/affs/file.c b/fs/affs/file.c
index af3261b78102..776e3935a758 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -836,7 +836,7 @@ affs_truncate(struct inode *inode)
836 struct address_space *mapping = inode->i_mapping; 836 struct address_space *mapping = inode->i_mapping;
837 struct page *page; 837 struct page *page;
838 void *fsdata; 838 void *fsdata;
839 u32 size = inode->i_size; 839 loff_t size = inode->i_size;
840 int res; 840 int res;
841 841
842 res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata); 842 res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata);
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 8fb42916d8a2..60250847929f 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -716,13 +716,14 @@ int bioset_integrity_create(struct bio_set *bs, int pool_size)
716 return 0; 716 return 0;
717 717
718 bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab); 718 bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab);
719 719 if (!bs->bio_integrity_pool)
720 bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size);
721 if (!bs->bvec_integrity_pool)
722 return -1; 720 return -1;
723 721
724 if (!bs->bio_integrity_pool) 722 bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size);
723 if (!bs->bvec_integrity_pool) {
724 mempool_destroy(bs->bio_integrity_pool);
725 return -1; 725 return -1;
726 }
726 727
727 return 0; 728 return 0;
728} 729}
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 2b3b83296977..398cbd517be2 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -72,3 +72,12 @@ config BTRFS_DEBUG
72 performance, or export extra information via sysfs. 72 performance, or export extra information via sysfs.
73 73
74 If unsure, say N. 74 If unsure, say N.
75
76config BTRFS_ASSERT
77 bool "Btrfs assert support"
78 depends on BTRFS_FS
79 help
80 Enable run-time assertion checking. This will result in panics if
81 any of the assertions trip. This is meant for btrfs developers only.
82
83 If unsure, say N.
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 3932224f99e9..a91a6a355cc5 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -8,7 +8,10 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 export.o tree-log.o free-space-cache.o zlib.o lzo.o \ 9 export.o tree-log.o free-space-cache.o zlib.o lzo.o \
10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ 10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
11 reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o 11 reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
12 uuid-tree.o
12 13
13btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o 14btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
14btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o 15btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
16
17btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 8bc5e8ccb091..0552a599b28f 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -119,6 +119,26 @@ struct __prelim_ref {
119 u64 wanted_disk_byte; 119 u64 wanted_disk_byte;
120}; 120};
121 121
122static struct kmem_cache *btrfs_prelim_ref_cache;
123
124int __init btrfs_prelim_ref_init(void)
125{
126 btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref",
127 sizeof(struct __prelim_ref),
128 0,
129 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
130 NULL);
131 if (!btrfs_prelim_ref_cache)
132 return -ENOMEM;
133 return 0;
134}
135
136void btrfs_prelim_ref_exit(void)
137{
138 if (btrfs_prelim_ref_cache)
139 kmem_cache_destroy(btrfs_prelim_ref_cache);
140}
141
122/* 142/*
123 * the rules for all callers of this function are: 143 * the rules for all callers of this function are:
124 * - obtaining the parent is the goal 144 * - obtaining the parent is the goal
@@ -160,12 +180,12 @@ struct __prelim_ref {
160 180
161static int __add_prelim_ref(struct list_head *head, u64 root_id, 181static int __add_prelim_ref(struct list_head *head, u64 root_id,
162 struct btrfs_key *key, int level, 182 struct btrfs_key *key, int level,
163 u64 parent, u64 wanted_disk_byte, int count) 183 u64 parent, u64 wanted_disk_byte, int count,
184 gfp_t gfp_mask)
164{ 185{
165 struct __prelim_ref *ref; 186 struct __prelim_ref *ref;
166 187
167 /* in case we're adding delayed refs, we're holding the refs spinlock */ 188 ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask);
168 ref = kmalloc(sizeof(*ref), GFP_ATOMIC);
169 if (!ref) 189 if (!ref)
170 return -ENOMEM; 190 return -ENOMEM;
171 191
@@ -295,10 +315,9 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
295 ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); 315 ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
296 pr_debug("search slot in root %llu (level %d, ref count %d) returned " 316 pr_debug("search slot in root %llu (level %d, ref count %d) returned "
297 "%d for key (%llu %u %llu)\n", 317 "%d for key (%llu %u %llu)\n",
298 (unsigned long long)ref->root_id, level, ref->count, ret, 318 ref->root_id, level, ref->count, ret,
299 (unsigned long long)ref->key_for_search.objectid, 319 ref->key_for_search.objectid, ref->key_for_search.type,
300 ref->key_for_search.type, 320 ref->key_for_search.offset);
301 (unsigned long long)ref->key_for_search.offset);
302 if (ret < 0) 321 if (ret < 0)
303 goto out; 322 goto out;
304 323
@@ -365,11 +384,12 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
365 node = ulist_next(parents, &uiter); 384 node = ulist_next(parents, &uiter);
366 ref->parent = node ? node->val : 0; 385 ref->parent = node ? node->val : 0;
367 ref->inode_list = node ? 386 ref->inode_list = node ?
368 (struct extent_inode_elem *)(uintptr_t)node->aux : 0; 387 (struct extent_inode_elem *)(uintptr_t)node->aux : NULL;
369 388
370 /* additional parents require new refs being added here */ 389 /* additional parents require new refs being added here */
371 while ((node = ulist_next(parents, &uiter))) { 390 while ((node = ulist_next(parents, &uiter))) {
372 new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); 391 new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache,
392 GFP_NOFS);
373 if (!new_ref) { 393 if (!new_ref) {
374 ret = -ENOMEM; 394 ret = -ENOMEM;
375 goto out; 395 goto out;
@@ -493,7 +513,7 @@ static void __merge_refs(struct list_head *head, int mode)
493 ref1->count += ref2->count; 513 ref1->count += ref2->count;
494 514
495 list_del(&ref2->list); 515 list_del(&ref2->list);
496 kfree(ref2); 516 kmem_cache_free(btrfs_prelim_ref_cache, ref2);
497 } 517 }
498 518
499 } 519 }
@@ -548,7 +568,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
548 ref = btrfs_delayed_node_to_tree_ref(node); 568 ref = btrfs_delayed_node_to_tree_ref(node);
549 ret = __add_prelim_ref(prefs, ref->root, &op_key, 569 ret = __add_prelim_ref(prefs, ref->root, &op_key,
550 ref->level + 1, 0, node->bytenr, 570 ref->level + 1, 0, node->bytenr,
551 node->ref_mod * sgn); 571 node->ref_mod * sgn, GFP_ATOMIC);
552 break; 572 break;
553 } 573 }
554 case BTRFS_SHARED_BLOCK_REF_KEY: { 574 case BTRFS_SHARED_BLOCK_REF_KEY: {
@@ -558,7 +578,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
558 ret = __add_prelim_ref(prefs, ref->root, NULL, 578 ret = __add_prelim_ref(prefs, ref->root, NULL,
559 ref->level + 1, ref->parent, 579 ref->level + 1, ref->parent,
560 node->bytenr, 580 node->bytenr,
561 node->ref_mod * sgn); 581 node->ref_mod * sgn, GFP_ATOMIC);
562 break; 582 break;
563 } 583 }
564 case BTRFS_EXTENT_DATA_REF_KEY: { 584 case BTRFS_EXTENT_DATA_REF_KEY: {
@@ -570,7 +590,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
570 key.offset = ref->offset; 590 key.offset = ref->offset;
571 ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, 591 ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0,
572 node->bytenr, 592 node->bytenr,
573 node->ref_mod * sgn); 593 node->ref_mod * sgn, GFP_ATOMIC);
574 break; 594 break;
575 } 595 }
576 case BTRFS_SHARED_DATA_REF_KEY: { 596 case BTRFS_SHARED_DATA_REF_KEY: {
@@ -583,7 +603,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
583 key.offset = ref->offset; 603 key.offset = ref->offset;
584 ret = __add_prelim_ref(prefs, ref->root, &key, 0, 604 ret = __add_prelim_ref(prefs, ref->root, &key, 0,
585 ref->parent, node->bytenr, 605 ref->parent, node->bytenr,
586 node->ref_mod * sgn); 606 node->ref_mod * sgn, GFP_ATOMIC);
587 break; 607 break;
588 } 608 }
589 default: 609 default:
@@ -657,7 +677,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
657 case BTRFS_SHARED_BLOCK_REF_KEY: 677 case BTRFS_SHARED_BLOCK_REF_KEY:
658 ret = __add_prelim_ref(prefs, 0, NULL, 678 ret = __add_prelim_ref(prefs, 0, NULL,
659 *info_level + 1, offset, 679 *info_level + 1, offset,
660 bytenr, 1); 680 bytenr, 1, GFP_NOFS);
661 break; 681 break;
662 case BTRFS_SHARED_DATA_REF_KEY: { 682 case BTRFS_SHARED_DATA_REF_KEY: {
663 struct btrfs_shared_data_ref *sdref; 683 struct btrfs_shared_data_ref *sdref;
@@ -666,13 +686,13 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
666 sdref = (struct btrfs_shared_data_ref *)(iref + 1); 686 sdref = (struct btrfs_shared_data_ref *)(iref + 1);
667 count = btrfs_shared_data_ref_count(leaf, sdref); 687 count = btrfs_shared_data_ref_count(leaf, sdref);
668 ret = __add_prelim_ref(prefs, 0, NULL, 0, offset, 688 ret = __add_prelim_ref(prefs, 0, NULL, 0, offset,
669 bytenr, count); 689 bytenr, count, GFP_NOFS);
670 break; 690 break;
671 } 691 }
672 case BTRFS_TREE_BLOCK_REF_KEY: 692 case BTRFS_TREE_BLOCK_REF_KEY:
673 ret = __add_prelim_ref(prefs, offset, NULL, 693 ret = __add_prelim_ref(prefs, offset, NULL,
674 *info_level + 1, 0, 694 *info_level + 1, 0,
675 bytenr, 1); 695 bytenr, 1, GFP_NOFS);
676 break; 696 break;
677 case BTRFS_EXTENT_DATA_REF_KEY: { 697 case BTRFS_EXTENT_DATA_REF_KEY: {
678 struct btrfs_extent_data_ref *dref; 698 struct btrfs_extent_data_ref *dref;
@@ -687,7 +707,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
687 key.offset = btrfs_extent_data_ref_offset(leaf, dref); 707 key.offset = btrfs_extent_data_ref_offset(leaf, dref);
688 root = btrfs_extent_data_ref_root(leaf, dref); 708 root = btrfs_extent_data_ref_root(leaf, dref);
689 ret = __add_prelim_ref(prefs, root, &key, 0, 0, 709 ret = __add_prelim_ref(prefs, root, &key, 0, 0,
690 bytenr, count); 710 bytenr, count, GFP_NOFS);
691 break; 711 break;
692 } 712 }
693 default: 713 default:
@@ -738,7 +758,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
738 case BTRFS_SHARED_BLOCK_REF_KEY: 758 case BTRFS_SHARED_BLOCK_REF_KEY:
739 ret = __add_prelim_ref(prefs, 0, NULL, 759 ret = __add_prelim_ref(prefs, 0, NULL,
740 info_level + 1, key.offset, 760 info_level + 1, key.offset,
741 bytenr, 1); 761 bytenr, 1, GFP_NOFS);
742 break; 762 break;
743 case BTRFS_SHARED_DATA_REF_KEY: { 763 case BTRFS_SHARED_DATA_REF_KEY: {
744 struct btrfs_shared_data_ref *sdref; 764 struct btrfs_shared_data_ref *sdref;
@@ -748,13 +768,13 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
748 struct btrfs_shared_data_ref); 768 struct btrfs_shared_data_ref);
749 count = btrfs_shared_data_ref_count(leaf, sdref); 769 count = btrfs_shared_data_ref_count(leaf, sdref);
750 ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset, 770 ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset,
751 bytenr, count); 771 bytenr, count, GFP_NOFS);
752 break; 772 break;
753 } 773 }
754 case BTRFS_TREE_BLOCK_REF_KEY: 774 case BTRFS_TREE_BLOCK_REF_KEY:
755 ret = __add_prelim_ref(prefs, key.offset, NULL, 775 ret = __add_prelim_ref(prefs, key.offset, NULL,
756 info_level + 1, 0, 776 info_level + 1, 0,
757 bytenr, 1); 777 bytenr, 1, GFP_NOFS);
758 break; 778 break;
759 case BTRFS_EXTENT_DATA_REF_KEY: { 779 case BTRFS_EXTENT_DATA_REF_KEY: {
760 struct btrfs_extent_data_ref *dref; 780 struct btrfs_extent_data_ref *dref;
@@ -770,7 +790,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
770 key.offset = btrfs_extent_data_ref_offset(leaf, dref); 790 key.offset = btrfs_extent_data_ref_offset(leaf, dref);
771 root = btrfs_extent_data_ref_root(leaf, dref); 791 root = btrfs_extent_data_ref_root(leaf, dref);
772 ret = __add_prelim_ref(prefs, root, &key, 0, 0, 792 ret = __add_prelim_ref(prefs, root, &key, 0, 0,
773 bytenr, count); 793 bytenr, count, GFP_NOFS);
774 break; 794 break;
775 } 795 }
776 default: 796 default:
@@ -911,7 +931,6 @@ again:
911 931
912 while (!list_empty(&prefs)) { 932 while (!list_empty(&prefs)) {
913 ref = list_first_entry(&prefs, struct __prelim_ref, list); 933 ref = list_first_entry(&prefs, struct __prelim_ref, list);
914 list_del(&ref->list);
915 WARN_ON(ref->count < 0); 934 WARN_ON(ref->count < 0);
916 if (ref->count && ref->root_id && ref->parent == 0) { 935 if (ref->count && ref->root_id && ref->parent == 0) {
917 /* no parent == root of tree */ 936 /* no parent == root of tree */
@@ -935,8 +954,10 @@ again:
935 } 954 }
936 ret = find_extent_in_eb(eb, bytenr, 955 ret = find_extent_in_eb(eb, bytenr,
937 *extent_item_pos, &eie); 956 *extent_item_pos, &eie);
938 ref->inode_list = eie;
939 free_extent_buffer(eb); 957 free_extent_buffer(eb);
958 if (ret < 0)
959 goto out;
960 ref->inode_list = eie;
940 } 961 }
941 ret = ulist_add_merge(refs, ref->parent, 962 ret = ulist_add_merge(refs, ref->parent,
942 (uintptr_t)ref->inode_list, 963 (uintptr_t)ref->inode_list,
@@ -954,7 +975,8 @@ again:
954 eie->next = ref->inode_list; 975 eie->next = ref->inode_list;
955 } 976 }
956 } 977 }
957 kfree(ref); 978 list_del(&ref->list);
979 kmem_cache_free(btrfs_prelim_ref_cache, ref);
958 } 980 }
959 981
960out: 982out:
@@ -962,13 +984,13 @@ out:
962 while (!list_empty(&prefs)) { 984 while (!list_empty(&prefs)) {
963 ref = list_first_entry(&prefs, struct __prelim_ref, list); 985 ref = list_first_entry(&prefs, struct __prelim_ref, list);
964 list_del(&ref->list); 986 list_del(&ref->list);
965 kfree(ref); 987 kmem_cache_free(btrfs_prelim_ref_cache, ref);
966 } 988 }
967 while (!list_empty(&prefs_delayed)) { 989 while (!list_empty(&prefs_delayed)) {
968 ref = list_first_entry(&prefs_delayed, struct __prelim_ref, 990 ref = list_first_entry(&prefs_delayed, struct __prelim_ref,
969 list); 991 list);
970 list_del(&ref->list); 992 list_del(&ref->list);
971 kfree(ref); 993 kmem_cache_free(btrfs_prelim_ref_cache, ref);
972 } 994 }
973 995
974 return ret; 996 return ret;
@@ -1326,8 +1348,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
1326 found_key->type != BTRFS_METADATA_ITEM_KEY) || 1348 found_key->type != BTRFS_METADATA_ITEM_KEY) ||
1327 found_key->objectid > logical || 1349 found_key->objectid > logical ||
1328 found_key->objectid + size <= logical) { 1350 found_key->objectid + size <= logical) {
1329 pr_debug("logical %llu is not within any extent\n", 1351 pr_debug("logical %llu is not within any extent\n", logical);
1330 (unsigned long long)logical);
1331 return -ENOENT; 1352 return -ENOENT;
1332 } 1353 }
1333 1354
@@ -1340,11 +1361,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
1340 1361
1341 pr_debug("logical %llu is at position %llu within the extent (%llu " 1362 pr_debug("logical %llu is at position %llu within the extent (%llu "
1342 "EXTENT_ITEM %llu) flags %#llx size %u\n", 1363 "EXTENT_ITEM %llu) flags %#llx size %u\n",
1343 (unsigned long long)logical, 1364 logical, logical - found_key->objectid, found_key->objectid,
1344 (unsigned long long)(logical - found_key->objectid), 1365 found_key->offset, flags, item_size);
1345 (unsigned long long)found_key->objectid,
1346 (unsigned long long)found_key->offset,
1347 (unsigned long long)flags, item_size);
1348 1366
1349 WARN_ON(!flags_ret); 1367 WARN_ON(!flags_ret);
1350 if (flags_ret) { 1368 if (flags_ret) {
@@ -1516,7 +1534,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1516 while (!ret && (root_node = ulist_next(roots, &root_uiter))) { 1534 while (!ret && (root_node = ulist_next(roots, &root_uiter))) {
1517 pr_debug("root %llu references leaf %llu, data list " 1535 pr_debug("root %llu references leaf %llu, data list "
1518 "%#llx\n", root_node->val, ref_node->val, 1536 "%#llx\n", root_node->val, ref_node->val,
1519 (long long)ref_node->aux); 1537 ref_node->aux);
1520 ret = iterate_leaf_refs((struct extent_inode_elem *) 1538 ret = iterate_leaf_refs((struct extent_inode_elem *)
1521 (uintptr_t)ref_node->aux, 1539 (uintptr_t)ref_node->aux,
1522 root_node->val, 1540 root_node->val,
@@ -1608,9 +1626,8 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
1608 name_len = btrfs_inode_ref_name_len(eb, iref); 1626 name_len = btrfs_inode_ref_name_len(eb, iref);
1609 /* path must be released before calling iterate()! */ 1627 /* path must be released before calling iterate()! */
1610 pr_debug("following ref at offset %u for inode %llu in " 1628 pr_debug("following ref at offset %u for inode %llu in "
1611 "tree %llu\n", cur, 1629 "tree %llu\n", cur, found_key.objectid,
1612 (unsigned long long)found_key.objectid, 1630 fs_root->objectid);
1613 (unsigned long long)fs_root->objectid);
1614 ret = iterate(parent, name_len, 1631 ret = iterate(parent, name_len,
1615 (unsigned long)(iref + 1), eb, ctx); 1632 (unsigned long)(iref + 1), eb, ctx);
1616 if (ret) 1633 if (ret)
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 8f2e76702932..a910b27a8ad9 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -72,4 +72,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
72 struct btrfs_inode_extref **ret_extref, 72 struct btrfs_inode_extref **ret_extref,
73 u64 *found_off); 73 u64 *found_off);
74 74
75int __init btrfs_prelim_ref_init(void);
76void btrfs_prelim_ref_exit(void);
75#endif 77#endif
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 08b286b2a2c5..d0ae226926ee 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -218,6 +218,27 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
218 return 0; 218 return 0;
219} 219}
220 220
221struct btrfs_dio_private {
222 struct inode *inode;
223 u64 logical_offset;
224 u64 disk_bytenr;
225 u64 bytes;
226 void *private;
227
228 /* number of bios pending for this dio */
229 atomic_t pending_bios;
230
231 /* IO errors */
232 int errors;
233
234 /* orig_bio is our btrfs_io_bio */
235 struct bio *orig_bio;
236
237 /* dio_bio came from fs/direct-io.c */
238 struct bio *dio_bio;
239 u8 csum[0];
240};
241
221/* 242/*
222 * Disable DIO read nolock optimization, so new dio readers will be forced 243 * Disable DIO read nolock optimization, so new dio readers will be forced
223 * to grab i_mutex. It is used to avoid the endless truncate due to 244 * to grab i_mutex. It is used to avoid the endless truncate due to
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 1431a6965017..1c47be187240 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -701,15 +701,13 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
701 next_bytenr = btrfs_super_root(selected_super); 701 next_bytenr = btrfs_super_root(selected_super);
702 if (state->print_mask & 702 if (state->print_mask &
703 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 703 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
704 printk(KERN_INFO "root@%llu\n", 704 printk(KERN_INFO "root@%llu\n", next_bytenr);
705 (unsigned long long)next_bytenr);
706 break; 705 break;
707 case 1: 706 case 1:
708 next_bytenr = btrfs_super_chunk_root(selected_super); 707 next_bytenr = btrfs_super_chunk_root(selected_super);
709 if (state->print_mask & 708 if (state->print_mask &
710 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 709 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
711 printk(KERN_INFO "chunk@%llu\n", 710 printk(KERN_INFO "chunk@%llu\n", next_bytenr);
712 (unsigned long long)next_bytenr);
713 break; 711 break;
714 case 2: 712 case 2:
715 next_bytenr = btrfs_super_log_root(selected_super); 713 next_bytenr = btrfs_super_log_root(selected_super);
@@ -717,8 +715,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
717 continue; 715 continue;
718 if (state->print_mask & 716 if (state->print_mask &
719 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 717 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
720 printk(KERN_INFO "log@%llu\n", 718 printk(KERN_INFO "log@%llu\n", next_bytenr);
721 (unsigned long long)next_bytenr);
722 break; 719 break;
723 } 720 }
724 721
@@ -727,7 +724,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
727 next_bytenr, state->metablock_size); 724 next_bytenr, state->metablock_size);
728 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 725 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
729 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 726 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
730 (unsigned long long)next_bytenr, num_copies); 727 next_bytenr, num_copies);
731 728
732 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 729 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
733 struct btrfsic_block *next_block; 730 struct btrfsic_block *next_block;
@@ -742,8 +739,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
742 printk(KERN_INFO "btrfsic:" 739 printk(KERN_INFO "btrfsic:"
743 " btrfsic_map_block(root @%llu," 740 " btrfsic_map_block(root @%llu,"
744 " mirror %d) failed!\n", 741 " mirror %d) failed!\n",
745 (unsigned long long)next_bytenr, 742 next_bytenr, mirror_num);
746 mirror_num);
747 kfree(selected_super); 743 kfree(selected_super);
748 return -1; 744 return -1;
749 } 745 }
@@ -767,7 +763,6 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
767 if (ret < (int)PAGE_CACHE_SIZE) { 763 if (ret < (int)PAGE_CACHE_SIZE) {
768 printk(KERN_INFO 764 printk(KERN_INFO
769 "btrfsic: read @logical %llu failed!\n", 765 "btrfsic: read @logical %llu failed!\n",
770 (unsigned long long)
771 tmp_next_block_ctx.start); 766 tmp_next_block_ctx.start);
772 btrfsic_release_block_ctx(&tmp_next_block_ctx); 767 btrfsic_release_block_ctx(&tmp_next_block_ctx);
773 kfree(selected_super); 768 kfree(selected_super);
@@ -813,7 +808,7 @@ static int btrfsic_process_superblock_dev_mirror(
813 (bh->b_data + (dev_bytenr & 4095)); 808 (bh->b_data + (dev_bytenr & 4095));
814 809
815 if (btrfs_super_bytenr(super_tmp) != dev_bytenr || 810 if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
816 super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) || 811 btrfs_super_magic(super_tmp) != BTRFS_MAGIC ||
817 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || 812 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
818 btrfs_super_nodesize(super_tmp) != state->metablock_size || 813 btrfs_super_nodesize(super_tmp) != state->metablock_size ||
819 btrfs_super_leafsize(super_tmp) != state->metablock_size || 814 btrfs_super_leafsize(super_tmp) != state->metablock_size ||
@@ -847,10 +842,8 @@ static int btrfsic_process_superblock_dev_mirror(
847 printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" 842 printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
848 " @%llu (%s/%llu/%d)\n", 843 " @%llu (%s/%llu/%d)\n",
849 superblock_bdev, 844 superblock_bdev,
850 rcu_str_deref(device->name), 845 rcu_str_deref(device->name), dev_bytenr,
851 (unsigned long long)dev_bytenr, 846 dev_state->name, dev_bytenr,
852 dev_state->name,
853 (unsigned long long)dev_bytenr,
854 superblock_mirror_num); 847 superblock_mirror_num);
855 list_add(&superblock_tmp->all_blocks_node, 848 list_add(&superblock_tmp->all_blocks_node,
856 &state->all_blocks_list); 849 &state->all_blocks_list);
@@ -880,20 +873,20 @@ static int btrfsic_process_superblock_dev_mirror(
880 tmp_disk_key.offset = 0; 873 tmp_disk_key.offset = 0;
881 switch (pass) { 874 switch (pass) {
882 case 0: 875 case 0:
883 tmp_disk_key.objectid = 876 btrfs_set_disk_key_objectid(&tmp_disk_key,
884 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); 877 BTRFS_ROOT_TREE_OBJECTID);
885 additional_string = "initial root "; 878 additional_string = "initial root ";
886 next_bytenr = btrfs_super_root(super_tmp); 879 next_bytenr = btrfs_super_root(super_tmp);
887 break; 880 break;
888 case 1: 881 case 1:
889 tmp_disk_key.objectid = 882 btrfs_set_disk_key_objectid(&tmp_disk_key,
890 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); 883 BTRFS_CHUNK_TREE_OBJECTID);
891 additional_string = "initial chunk "; 884 additional_string = "initial chunk ";
892 next_bytenr = btrfs_super_chunk_root(super_tmp); 885 next_bytenr = btrfs_super_chunk_root(super_tmp);
893 break; 886 break;
894 case 2: 887 case 2:
895 tmp_disk_key.objectid = 888 btrfs_set_disk_key_objectid(&tmp_disk_key,
896 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); 889 BTRFS_TREE_LOG_OBJECTID);
897 additional_string = "initial log "; 890 additional_string = "initial log ";
898 next_bytenr = btrfs_super_log_root(super_tmp); 891 next_bytenr = btrfs_super_log_root(super_tmp);
899 if (0 == next_bytenr) 892 if (0 == next_bytenr)
@@ -906,7 +899,7 @@ static int btrfsic_process_superblock_dev_mirror(
906 next_bytenr, state->metablock_size); 899 next_bytenr, state->metablock_size);
907 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 900 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
908 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 901 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
909 (unsigned long long)next_bytenr, num_copies); 902 next_bytenr, num_copies);
910 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 903 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
911 struct btrfsic_block *next_block; 904 struct btrfsic_block *next_block;
912 struct btrfsic_block_data_ctx tmp_next_block_ctx; 905 struct btrfsic_block_data_ctx tmp_next_block_ctx;
@@ -918,8 +911,7 @@ static int btrfsic_process_superblock_dev_mirror(
918 mirror_num)) { 911 mirror_num)) {
919 printk(KERN_INFO "btrfsic: btrfsic_map_block(" 912 printk(KERN_INFO "btrfsic: btrfsic_map_block("
920 "bytenr @%llu, mirror %d) failed!\n", 913 "bytenr @%llu, mirror %d) failed!\n",
921 (unsigned long long)next_bytenr, 914 next_bytenr, mirror_num);
922 mirror_num);
923 brelse(bh); 915 brelse(bh);
924 return -1; 916 return -1;
925 } 917 }
@@ -1003,19 +995,17 @@ continue_with_new_stack_frame:
1003 (struct btrfs_leaf *)sf->hdr; 995 (struct btrfs_leaf *)sf->hdr;
1004 996
1005 if (-1 == sf->i) { 997 if (-1 == sf->i) {
1006 sf->nr = le32_to_cpu(leafhdr->header.nritems); 998 sf->nr = btrfs_stack_header_nritems(&leafhdr->header);
1007 999
1008 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1000 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1009 printk(KERN_INFO 1001 printk(KERN_INFO
1010 "leaf %llu items %d generation %llu" 1002 "leaf %llu items %d generation %llu"
1011 " owner %llu\n", 1003 " owner %llu\n",
1012 (unsigned long long) 1004 sf->block_ctx->start, sf->nr,
1013 sf->block_ctx->start, 1005 btrfs_stack_header_generation(
1014 sf->nr, 1006 &leafhdr->header),
1015 (unsigned long long) 1007 btrfs_stack_header_owner(
1016 le64_to_cpu(leafhdr->header.generation), 1008 &leafhdr->header));
1017 (unsigned long long)
1018 le64_to_cpu(leafhdr->header.owner));
1019 } 1009 }
1020 1010
1021continue_with_current_leaf_stack_frame: 1011continue_with_current_leaf_stack_frame:
@@ -1047,10 +1037,10 @@ leaf_item_out_of_bounce_error:
1047 &disk_item, 1037 &disk_item,
1048 disk_item_offset, 1038 disk_item_offset,
1049 sizeof(struct btrfs_item)); 1039 sizeof(struct btrfs_item));
1050 item_offset = le32_to_cpu(disk_item.offset); 1040 item_offset = btrfs_stack_item_offset(&disk_item);
1051 item_size = le32_to_cpu(disk_item.size); 1041 item_size = btrfs_stack_item_offset(&disk_item);
1052 disk_key = &disk_item.key; 1042 disk_key = &disk_item.key;
1053 type = disk_key->type; 1043 type = btrfs_disk_key_type(disk_key);
1054 1044
1055 if (BTRFS_ROOT_ITEM_KEY == type) { 1045 if (BTRFS_ROOT_ITEM_KEY == type) {
1056 struct btrfs_root_item root_item; 1046 struct btrfs_root_item root_item;
@@ -1066,7 +1056,7 @@ leaf_item_out_of_bounce_error:
1066 sf->block_ctx, &root_item, 1056 sf->block_ctx, &root_item,
1067 root_item_offset, 1057 root_item_offset,
1068 item_size); 1058 item_size);
1069 next_bytenr = le64_to_cpu(root_item.bytenr); 1059 next_bytenr = btrfs_root_bytenr(&root_item);
1070 1060
1071 sf->error = 1061 sf->error =
1072 btrfsic_create_link_to_next_block( 1062 btrfsic_create_link_to_next_block(
@@ -1081,8 +1071,8 @@ leaf_item_out_of_bounce_error:
1081 &sf->num_copies, 1071 &sf->num_copies,
1082 &sf->mirror_num, 1072 &sf->mirror_num,
1083 disk_key, 1073 disk_key,
1084 le64_to_cpu(root_item. 1074 btrfs_root_generation(
1085 generation)); 1075 &root_item));
1086 if (sf->error) 1076 if (sf->error)
1087 goto one_stack_frame_backwards; 1077 goto one_stack_frame_backwards;
1088 1078
@@ -1130,18 +1120,17 @@ leaf_item_out_of_bounce_error:
1130 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr; 1120 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1131 1121
1132 if (-1 == sf->i) { 1122 if (-1 == sf->i) {
1133 sf->nr = le32_to_cpu(nodehdr->header.nritems); 1123 sf->nr = btrfs_stack_header_nritems(&nodehdr->header);
1134 1124
1135 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1125 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1136 printk(KERN_INFO "node %llu level %d items %d" 1126 printk(KERN_INFO "node %llu level %d items %d"
1137 " generation %llu owner %llu\n", 1127 " generation %llu owner %llu\n",
1138 (unsigned long long)
1139 sf->block_ctx->start, 1128 sf->block_ctx->start,
1140 nodehdr->header.level, sf->nr, 1129 nodehdr->header.level, sf->nr,
1141 (unsigned long long) 1130 btrfs_stack_header_generation(
1142 le64_to_cpu(nodehdr->header.generation), 1131 &nodehdr->header),
1143 (unsigned long long) 1132 btrfs_stack_header_owner(
1144 le64_to_cpu(nodehdr->header.owner)); 1133 &nodehdr->header));
1145 } 1134 }
1146 1135
1147continue_with_current_node_stack_frame: 1136continue_with_current_node_stack_frame:
@@ -1168,7 +1157,7 @@ continue_with_current_node_stack_frame:
1168 btrfsic_read_from_block_data( 1157 btrfsic_read_from_block_data(
1169 sf->block_ctx, &key_ptr, key_ptr_offset, 1158 sf->block_ctx, &key_ptr, key_ptr_offset,
1170 sizeof(struct btrfs_key_ptr)); 1159 sizeof(struct btrfs_key_ptr));
1171 next_bytenr = le64_to_cpu(key_ptr.blockptr); 1160 next_bytenr = btrfs_stack_key_blockptr(&key_ptr);
1172 1161
1173 sf->error = btrfsic_create_link_to_next_block( 1162 sf->error = btrfsic_create_link_to_next_block(
1174 state, 1163 state,
@@ -1182,7 +1171,7 @@ continue_with_current_node_stack_frame:
1182 &sf->num_copies, 1171 &sf->num_copies,
1183 &sf->mirror_num, 1172 &sf->mirror_num,
1184 &key_ptr.key, 1173 &key_ptr.key,
1185 le64_to_cpu(key_ptr.generation)); 1174 btrfs_stack_key_generation(&key_ptr));
1186 if (sf->error) 1175 if (sf->error)
1187 goto one_stack_frame_backwards; 1176 goto one_stack_frame_backwards;
1188 1177
@@ -1247,8 +1236,7 @@ static void btrfsic_read_from_block_data(
1247 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; 1236 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1248 1237
1249 WARN_ON(offset + len > block_ctx->len); 1238 WARN_ON(offset + len > block_ctx->len);
1250 offset_in_page = (start_offset + offset) & 1239 offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1);
1251 ((unsigned long)PAGE_CACHE_SIZE - 1);
1252 1240
1253 while (len > 0) { 1241 while (len > 0) {
1254 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); 1242 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
@@ -1290,7 +1278,7 @@ static int btrfsic_create_link_to_next_block(
1290 next_bytenr, state->metablock_size); 1278 next_bytenr, state->metablock_size);
1291 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1279 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1292 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1280 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1293 (unsigned long long)next_bytenr, *num_copiesp); 1281 next_bytenr, *num_copiesp);
1294 *mirror_nump = 1; 1282 *mirror_nump = 1;
1295 } 1283 }
1296 1284
@@ -1307,7 +1295,7 @@ static int btrfsic_create_link_to_next_block(
1307 if (ret) { 1295 if (ret) {
1308 printk(KERN_INFO 1296 printk(KERN_INFO
1309 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", 1297 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1310 (unsigned long long)next_bytenr, *mirror_nump); 1298 next_bytenr, *mirror_nump);
1311 btrfsic_release_block_ctx(next_block_ctx); 1299 btrfsic_release_block_ctx(next_block_ctx);
1312 *next_blockp = NULL; 1300 *next_blockp = NULL;
1313 return -1; 1301 return -1;
@@ -1335,20 +1323,16 @@ static int btrfsic_create_link_to_next_block(
1335 "Referenced block @%llu (%s/%llu/%d)" 1323 "Referenced block @%llu (%s/%llu/%d)"
1336 " found in hash table, %c," 1324 " found in hash table, %c,"
1337 " bytenr mismatch (!= stored %llu).\n", 1325 " bytenr mismatch (!= stored %llu).\n",
1338 (unsigned long long)next_bytenr, 1326 next_bytenr, next_block_ctx->dev->name,
1339 next_block_ctx->dev->name, 1327 next_block_ctx->dev_bytenr, *mirror_nump,
1340 (unsigned long long)next_block_ctx->dev_bytenr,
1341 *mirror_nump,
1342 btrfsic_get_block_type(state, next_block), 1328 btrfsic_get_block_type(state, next_block),
1343 (unsigned long long)next_block->logical_bytenr); 1329 next_block->logical_bytenr);
1344 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1330 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1345 printk(KERN_INFO 1331 printk(KERN_INFO
1346 "Referenced block @%llu (%s/%llu/%d)" 1332 "Referenced block @%llu (%s/%llu/%d)"
1347 " found in hash table, %c.\n", 1333 " found in hash table, %c.\n",
1348 (unsigned long long)next_bytenr, 1334 next_bytenr, next_block_ctx->dev->name,
1349 next_block_ctx->dev->name, 1335 next_block_ctx->dev_bytenr, *mirror_nump,
1350 (unsigned long long)next_block_ctx->dev_bytenr,
1351 *mirror_nump,
1352 btrfsic_get_block_type(state, next_block)); 1336 btrfsic_get_block_type(state, next_block));
1353 next_block->logical_bytenr = next_bytenr; 1337 next_block->logical_bytenr = next_bytenr;
1354 1338
@@ -1400,7 +1384,7 @@ static int btrfsic_create_link_to_next_block(
1400 if (ret < (int)next_block_ctx->len) { 1384 if (ret < (int)next_block_ctx->len) {
1401 printk(KERN_INFO 1385 printk(KERN_INFO
1402 "btrfsic: read block @logical %llu failed!\n", 1386 "btrfsic: read block @logical %llu failed!\n",
1403 (unsigned long long)next_bytenr); 1387 next_bytenr);
1404 btrfsic_release_block_ctx(next_block_ctx); 1388 btrfsic_release_block_ctx(next_block_ctx);
1405 *next_blockp = NULL; 1389 *next_blockp = NULL;
1406 return -1; 1390 return -1;
@@ -1444,12 +1428,12 @@ static int btrfsic_handle_extent_data(
1444 file_extent_item_offset, 1428 file_extent_item_offset,
1445 offsetof(struct btrfs_file_extent_item, disk_num_bytes)); 1429 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1446 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || 1430 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1447 ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) { 1431 btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) {
1448 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1432 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1449 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", 1433 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1450 file_extent_item.type, 1434 file_extent_item.type,
1451 (unsigned long long) 1435 btrfs_stack_file_extent_disk_bytenr(
1452 le64_to_cpu(file_extent_item.disk_bytenr)); 1436 &file_extent_item));
1453 return 0; 1437 return 0;
1454 } 1438 }
1455 1439
@@ -1463,20 +1447,19 @@ static int btrfsic_handle_extent_data(
1463 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1447 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1464 file_extent_item_offset, 1448 file_extent_item_offset,
1465 sizeof(struct btrfs_file_extent_item)); 1449 sizeof(struct btrfs_file_extent_item));
1466 next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) + 1450 next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item) +
1467 le64_to_cpu(file_extent_item.offset); 1451 btrfs_stack_file_extent_offset(&file_extent_item);
1468 generation = le64_to_cpu(file_extent_item.generation); 1452 generation = btrfs_stack_file_extent_generation(&file_extent_item);
1469 num_bytes = le64_to_cpu(file_extent_item.num_bytes); 1453 num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
1470 generation = le64_to_cpu(file_extent_item.generation); 1454 generation = btrfs_stack_file_extent_generation(&file_extent_item);
1471 1455
1472 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1456 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1473 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," 1457 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1474 " offset = %llu, num_bytes = %llu\n", 1458 " offset = %llu, num_bytes = %llu\n",
1475 file_extent_item.type, 1459 file_extent_item.type,
1476 (unsigned long long) 1460 btrfs_stack_file_extent_disk_bytenr(&file_extent_item),
1477 le64_to_cpu(file_extent_item.disk_bytenr), 1461 btrfs_stack_file_extent_offset(&file_extent_item),
1478 (unsigned long long)le64_to_cpu(file_extent_item.offset), 1462 num_bytes);
1479 (unsigned long long)num_bytes);
1480 while (num_bytes > 0) { 1463 while (num_bytes > 0) {
1481 u32 chunk_len; 1464 u32 chunk_len;
1482 int num_copies; 1465 int num_copies;
@@ -1492,7 +1475,7 @@ static int btrfsic_handle_extent_data(
1492 next_bytenr, state->datablock_size); 1475 next_bytenr, state->datablock_size);
1493 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1476 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1494 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1477 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1495 (unsigned long long)next_bytenr, num_copies); 1478 next_bytenr, num_copies);
1496 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 1479 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1497 struct btrfsic_block_data_ctx next_block_ctx; 1480 struct btrfsic_block_data_ctx next_block_ctx;
1498 struct btrfsic_block *next_block; 1481 struct btrfsic_block *next_block;
@@ -1504,8 +1487,7 @@ static int btrfsic_handle_extent_data(
1504 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1487 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1505 printk(KERN_INFO 1488 printk(KERN_INFO
1506 "\tdisk_bytenr = %llu, num_bytes %u\n", 1489 "\tdisk_bytenr = %llu, num_bytes %u\n",
1507 (unsigned long long)next_bytenr, 1490 next_bytenr, chunk_len);
1508 chunk_len);
1509 ret = btrfsic_map_block(state, next_bytenr, 1491 ret = btrfsic_map_block(state, next_bytenr,
1510 chunk_len, &next_block_ctx, 1492 chunk_len, &next_block_ctx,
1511 mirror_num); 1493 mirror_num);
@@ -1513,8 +1495,7 @@ static int btrfsic_handle_extent_data(
1513 printk(KERN_INFO 1495 printk(KERN_INFO
1514 "btrfsic: btrfsic_map_block(@%llu," 1496 "btrfsic: btrfsic_map_block(@%llu,"
1515 " mirror=%d) failed!\n", 1497 " mirror=%d) failed!\n",
1516 (unsigned long long)next_bytenr, 1498 next_bytenr, mirror_num);
1517 mirror_num);
1518 return -1; 1499 return -1;
1519 } 1500 }
1520 1501
@@ -1543,12 +1524,10 @@ static int btrfsic_handle_extent_data(
1543 " found in hash table, D," 1524 " found in hash table, D,"
1544 " bytenr mismatch" 1525 " bytenr mismatch"
1545 " (!= stored %llu).\n", 1526 " (!= stored %llu).\n",
1546 (unsigned long long)next_bytenr, 1527 next_bytenr,
1547 next_block_ctx.dev->name, 1528 next_block_ctx.dev->name,
1548 (unsigned long long)
1549 next_block_ctx.dev_bytenr, 1529 next_block_ctx.dev_bytenr,
1550 mirror_num, 1530 mirror_num,
1551 (unsigned long long)
1552 next_block->logical_bytenr); 1531 next_block->logical_bytenr);
1553 } 1532 }
1554 next_block->logical_bytenr = next_bytenr; 1533 next_block->logical_bytenr = next_bytenr;
@@ -1675,7 +1654,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
1675 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { 1654 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
1676 printk(KERN_INFO 1655 printk(KERN_INFO
1677 "btrfsic: read_block() with unaligned bytenr %llu\n", 1656 "btrfsic: read_block() with unaligned bytenr %llu\n",
1678 (unsigned long long)block_ctx->dev_bytenr); 1657 block_ctx->dev_bytenr);
1679 return -1; 1658 return -1;
1680 } 1659 }
1681 1660
@@ -1772,10 +1751,8 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
1772 1751
1773 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n", 1752 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1774 btrfsic_get_block_type(state, b_all), 1753 btrfsic_get_block_type(state, b_all),
1775 (unsigned long long)b_all->logical_bytenr, 1754 b_all->logical_bytenr, b_all->dev_state->name,
1776 b_all->dev_state->name, 1755 b_all->dev_bytenr, b_all->mirror_num);
1777 (unsigned long long)b_all->dev_bytenr,
1778 b_all->mirror_num);
1779 1756
1780 list_for_each(elem_ref_to, &b_all->ref_to_list) { 1757 list_for_each(elem_ref_to, &b_all->ref_to_list) {
1781 const struct btrfsic_block_link *const l = 1758 const struct btrfsic_block_link *const l =
@@ -1787,16 +1764,13 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
1787 " refers %u* to" 1764 " refers %u* to"
1788 " %c @%llu (%s/%llu/%d)\n", 1765 " %c @%llu (%s/%llu/%d)\n",
1789 btrfsic_get_block_type(state, b_all), 1766 btrfsic_get_block_type(state, b_all),
1790 (unsigned long long)b_all->logical_bytenr, 1767 b_all->logical_bytenr, b_all->dev_state->name,
1791 b_all->dev_state->name, 1768 b_all->dev_bytenr, b_all->mirror_num,
1792 (unsigned long long)b_all->dev_bytenr,
1793 b_all->mirror_num,
1794 l->ref_cnt, 1769 l->ref_cnt,
1795 btrfsic_get_block_type(state, l->block_ref_to), 1770 btrfsic_get_block_type(state, l->block_ref_to),
1796 (unsigned long long)
1797 l->block_ref_to->logical_bytenr, 1771 l->block_ref_to->logical_bytenr,
1798 l->block_ref_to->dev_state->name, 1772 l->block_ref_to->dev_state->name,
1799 (unsigned long long)l->block_ref_to->dev_bytenr, 1773 l->block_ref_to->dev_bytenr,
1800 l->block_ref_to->mirror_num); 1774 l->block_ref_to->mirror_num);
1801 } 1775 }
1802 1776
@@ -1810,16 +1784,12 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
1810 " is ref %u* from" 1784 " is ref %u* from"
1811 " %c @%llu (%s/%llu/%d)\n", 1785 " %c @%llu (%s/%llu/%d)\n",
1812 btrfsic_get_block_type(state, b_all), 1786 btrfsic_get_block_type(state, b_all),
1813 (unsigned long long)b_all->logical_bytenr, 1787 b_all->logical_bytenr, b_all->dev_state->name,
1814 b_all->dev_state->name, 1788 b_all->dev_bytenr, b_all->mirror_num,
1815 (unsigned long long)b_all->dev_bytenr,
1816 b_all->mirror_num,
1817 l->ref_cnt, 1789 l->ref_cnt,
1818 btrfsic_get_block_type(state, l->block_ref_from), 1790 btrfsic_get_block_type(state, l->block_ref_from),
1819 (unsigned long long)
1820 l->block_ref_from->logical_bytenr, 1791 l->block_ref_from->logical_bytenr,
1821 l->block_ref_from->dev_state->name, 1792 l->block_ref_from->dev_state->name,
1822 (unsigned long long)
1823 l->block_ref_from->dev_bytenr, 1793 l->block_ref_from->dev_bytenr,
1824 l->block_ref_from->mirror_num); 1794 l->block_ref_from->mirror_num);
1825 } 1795 }
@@ -1896,8 +1866,8 @@ again:
1896 struct list_head *tmp_ref_to; 1866 struct list_head *tmp_ref_to;
1897 1867
1898 if (block->is_superblock) { 1868 if (block->is_superblock) {
1899 bytenr = le64_to_cpu(((struct btrfs_super_block *) 1869 bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
1900 mapped_datav[0])->bytenr); 1870 mapped_datav[0]);
1901 if (num_pages * PAGE_CACHE_SIZE < 1871 if (num_pages * PAGE_CACHE_SIZE <
1902 BTRFS_SUPER_INFO_SIZE) { 1872 BTRFS_SUPER_INFO_SIZE) {
1903 printk(KERN_INFO 1873 printk(KERN_INFO
@@ -1923,8 +1893,9 @@ again:
1923 return; 1893 return;
1924 } 1894 }
1925 processed_len = state->metablock_size; 1895 processed_len = state->metablock_size;
1926 bytenr = le64_to_cpu(((struct btrfs_header *) 1896 bytenr = btrfs_stack_header_bytenr(
1927 mapped_datav[0])->bytenr); 1897 (struct btrfs_header *)
1898 mapped_datav[0]);
1928 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, 1899 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1929 dev_state, 1900 dev_state,
1930 dev_bytenr); 1901 dev_bytenr);
@@ -1935,12 +1906,9 @@ again:
1935 " found in hash table, %c," 1906 " found in hash table, %c,"
1936 " bytenr mismatch" 1907 " bytenr mismatch"
1937 " (!= stored %llu).\n", 1908 " (!= stored %llu).\n",
1938 (unsigned long long)bytenr, 1909 bytenr, dev_state->name, dev_bytenr,
1939 dev_state->name,
1940 (unsigned long long)dev_bytenr,
1941 block->mirror_num, 1910 block->mirror_num,
1942 btrfsic_get_block_type(state, block), 1911 btrfsic_get_block_type(state, block),
1943 (unsigned long long)
1944 block->logical_bytenr); 1912 block->logical_bytenr);
1945 block->logical_bytenr = bytenr; 1913 block->logical_bytenr = bytenr;
1946 } else if (state->print_mask & 1914 } else if (state->print_mask &
@@ -1948,9 +1916,7 @@ again:
1948 printk(KERN_INFO 1916 printk(KERN_INFO
1949 "Written block @%llu (%s/%llu/%d)" 1917 "Written block @%llu (%s/%llu/%d)"
1950 " found in hash table, %c.\n", 1918 " found in hash table, %c.\n",
1951 (unsigned long long)bytenr, 1919 bytenr, dev_state->name, dev_bytenr,
1952 dev_state->name,
1953 (unsigned long long)dev_bytenr,
1954 block->mirror_num, 1920 block->mirror_num,
1955 btrfsic_get_block_type(state, block)); 1921 btrfsic_get_block_type(state, block));
1956 } else { 1922 } else {
@@ -1966,9 +1932,7 @@ again:
1966 printk(KERN_INFO 1932 printk(KERN_INFO
1967 "Written block @%llu (%s/%llu/%d)" 1933 "Written block @%llu (%s/%llu/%d)"
1968 " found in hash table, %c.\n", 1934 " found in hash table, %c.\n",
1969 (unsigned long long)bytenr, 1935 bytenr, dev_state->name, dev_bytenr,
1970 dev_state->name,
1971 (unsigned long long)dev_bytenr,
1972 block->mirror_num, 1936 block->mirror_num,
1973 btrfsic_get_block_type(state, block)); 1937 btrfsic_get_block_type(state, block));
1974 } 1938 }
@@ -1985,21 +1949,14 @@ again:
1985 " new(gen=%llu)," 1949 " new(gen=%llu),"
1986 " which is referenced by most recent superblock" 1950 " which is referenced by most recent superblock"
1987 " (superblockgen=%llu)!\n", 1951 " (superblockgen=%llu)!\n",
1988 btrfsic_get_block_type(state, block), 1952 btrfsic_get_block_type(state, block), bytenr,
1989 (unsigned long long)bytenr, 1953 dev_state->name, dev_bytenr, block->mirror_num,
1990 dev_state->name, 1954 block->generation,
1991 (unsigned long long)dev_bytenr, 1955 btrfs_disk_key_objectid(&block->disk_key),
1992 block->mirror_num,
1993 (unsigned long long)block->generation,
1994 (unsigned long long)
1995 le64_to_cpu(block->disk_key.objectid),
1996 block->disk_key.type, 1956 block->disk_key.type,
1997 (unsigned long long) 1957 btrfs_disk_key_offset(&block->disk_key),
1998 le64_to_cpu(block->disk_key.offset), 1958 btrfs_stack_header_generation(
1999 (unsigned long long) 1959 (struct btrfs_header *) mapped_datav[0]),
2000 le64_to_cpu(((struct btrfs_header *)
2001 mapped_datav[0])->generation),
2002 (unsigned long long)
2003 state->max_superblock_generation); 1960 state->max_superblock_generation);
2004 btrfsic_dump_tree(state); 1961 btrfsic_dump_tree(state);
2005 } 1962 }
@@ -2008,15 +1965,12 @@ again:
2008 printk(KERN_INFO "btrfs: attempt to overwrite %c-block" 1965 printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
2009 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu," 1966 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
2010 " which is not yet iodone!\n", 1967 " which is not yet iodone!\n",
2011 btrfsic_get_block_type(state, block), 1968 btrfsic_get_block_type(state, block), bytenr,
2012 (unsigned long long)bytenr, 1969 dev_state->name, dev_bytenr, block->mirror_num,
2013 dev_state->name, 1970 block->generation,
2014 (unsigned long long)dev_bytenr, 1971 btrfs_stack_header_generation(
2015 block->mirror_num, 1972 (struct btrfs_header *)
2016 (unsigned long long)block->generation, 1973 mapped_datav[0]));
2017 (unsigned long long)
2018 le64_to_cpu(((struct btrfs_header *)
2019 mapped_datav[0])->generation));
2020 /* it would not be safe to go on */ 1974 /* it would not be safe to go on */
2021 btrfsic_dump_tree(state); 1975 btrfsic_dump_tree(state);
2022 goto continue_loop; 1976 goto continue_loop;
@@ -2056,7 +2010,7 @@ again:
2056 if (ret) { 2010 if (ret) {
2057 printk(KERN_INFO 2011 printk(KERN_INFO
2058 "btrfsic: btrfsic_map_block(root @%llu)" 2012 "btrfsic: btrfsic_map_block(root @%llu)"
2059 " failed!\n", (unsigned long long)bytenr); 2013 " failed!\n", bytenr);
2060 goto continue_loop; 2014 goto continue_loop;
2061 } 2015 }
2062 block_ctx.datav = mapped_datav; 2016 block_ctx.datav = mapped_datav;
@@ -2140,7 +2094,7 @@ again:
2140 printk(KERN_INFO 2094 printk(KERN_INFO
2141 "btrfsic: btrfsic_process_metablock" 2095 "btrfsic: btrfsic_process_metablock"
2142 "(root @%llu) failed!\n", 2096 "(root @%llu) failed!\n",
2143 (unsigned long long)dev_bytenr); 2097 dev_bytenr);
2144 } else { 2098 } else {
2145 block->is_metadata = 0; 2099 block->is_metadata = 0;
2146 block->mirror_num = 0; /* unknown */ 2100 block->mirror_num = 0; /* unknown */
@@ -2168,8 +2122,7 @@ again:
2168 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2122 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2169 printk(KERN_INFO "Written block (%s/%llu/?)" 2123 printk(KERN_INFO "Written block (%s/%llu/?)"
2170 " !found in hash table, D.\n", 2124 " !found in hash table, D.\n",
2171 dev_state->name, 2125 dev_state->name, dev_bytenr);
2172 (unsigned long long)dev_bytenr);
2173 if (!state->include_extent_data) { 2126 if (!state->include_extent_data) {
2174 /* ignore that written D block */ 2127 /* ignore that written D block */
2175 goto continue_loop; 2128 goto continue_loop;
@@ -2184,17 +2137,16 @@ again:
2184 block_ctx.pagev = NULL; 2137 block_ctx.pagev = NULL;
2185 } else { 2138 } else {
2186 processed_len = state->metablock_size; 2139 processed_len = state->metablock_size;
2187 bytenr = le64_to_cpu(((struct btrfs_header *) 2140 bytenr = btrfs_stack_header_bytenr(
2188 mapped_datav[0])->bytenr); 2141 (struct btrfs_header *)
2142 mapped_datav[0]);
2189 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, 2143 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
2190 dev_bytenr); 2144 dev_bytenr);
2191 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2145 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2192 printk(KERN_INFO 2146 printk(KERN_INFO
2193 "Written block @%llu (%s/%llu/?)" 2147 "Written block @%llu (%s/%llu/?)"
2194 " !found in hash table, M.\n", 2148 " !found in hash table, M.\n",
2195 (unsigned long long)bytenr, 2149 bytenr, dev_state->name, dev_bytenr);
2196 dev_state->name,
2197 (unsigned long long)dev_bytenr);
2198 2150
2199 ret = btrfsic_map_block(state, bytenr, processed_len, 2151 ret = btrfsic_map_block(state, bytenr, processed_len,
2200 &block_ctx, 0); 2152 &block_ctx, 0);
@@ -2202,7 +2154,7 @@ again:
2202 printk(KERN_INFO 2154 printk(KERN_INFO
2203 "btrfsic: btrfsic_map_block(root @%llu)" 2155 "btrfsic: btrfsic_map_block(root @%llu)"
2204 " failed!\n", 2156 " failed!\n",
2205 (unsigned long long)dev_bytenr); 2157 dev_bytenr);
2206 goto continue_loop; 2158 goto continue_loop;
2207 } 2159 }
2208 } 2160 }
@@ -2267,10 +2219,8 @@ again:
2267 printk(KERN_INFO 2219 printk(KERN_INFO
2268 "New written %c-block @%llu (%s/%llu/%d)\n", 2220 "New written %c-block @%llu (%s/%llu/%d)\n",
2269 is_metadata ? 'M' : 'D', 2221 is_metadata ? 'M' : 'D',
2270 (unsigned long long)block->logical_bytenr, 2222 block->logical_bytenr, block->dev_state->name,
2271 block->dev_state->name, 2223 block->dev_bytenr, block->mirror_num);
2272 (unsigned long long)block->dev_bytenr,
2273 block->mirror_num);
2274 list_add(&block->all_blocks_node, &state->all_blocks_list); 2224 list_add(&block->all_blocks_node, &state->all_blocks_list);
2275 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2225 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2276 2226
@@ -2281,7 +2231,7 @@ again:
2281 printk(KERN_INFO 2231 printk(KERN_INFO
2282 "btrfsic: process_metablock(root @%llu)" 2232 "btrfsic: process_metablock(root @%llu)"
2283 " failed!\n", 2233 " failed!\n",
2284 (unsigned long long)dev_bytenr); 2234 dev_bytenr);
2285 } 2235 }
2286 btrfsic_release_block_ctx(&block_ctx); 2236 btrfsic_release_block_ctx(&block_ctx);
2287 } 2237 }
@@ -2319,10 +2269,8 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
2319 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", 2269 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2320 bio_error_status, 2270 bio_error_status,
2321 btrfsic_get_block_type(dev_state->state, block), 2271 btrfsic_get_block_type(dev_state->state, block),
2322 (unsigned long long)block->logical_bytenr, 2272 block->logical_bytenr, dev_state->name,
2323 dev_state->name, 2273 block->dev_bytenr, block->mirror_num);
2324 (unsigned long long)block->dev_bytenr,
2325 block->mirror_num);
2326 next_block = block->next_in_same_bio; 2274 next_block = block->next_in_same_bio;
2327 block->iodone_w_error = iodone_w_error; 2275 block->iodone_w_error = iodone_w_error;
2328 if (block->submit_bio_bh_rw & REQ_FLUSH) { 2276 if (block->submit_bio_bh_rw & REQ_FLUSH) {
@@ -2332,7 +2280,6 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
2332 printk(KERN_INFO 2280 printk(KERN_INFO
2333 "bio_end_io() new %s flush_gen=%llu\n", 2281 "bio_end_io() new %s flush_gen=%llu\n",
2334 dev_state->name, 2282 dev_state->name,
2335 (unsigned long long)
2336 dev_state->last_flush_gen); 2283 dev_state->last_flush_gen);
2337 } 2284 }
2338 if (block->submit_bio_bh_rw & REQ_FUA) 2285 if (block->submit_bio_bh_rw & REQ_FUA)
@@ -2358,10 +2305,8 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2358 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n", 2305 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2359 iodone_w_error, 2306 iodone_w_error,
2360 btrfsic_get_block_type(dev_state->state, block), 2307 btrfsic_get_block_type(dev_state->state, block),
2361 (unsigned long long)block->logical_bytenr, 2308 block->logical_bytenr, block->dev_state->name,
2362 block->dev_state->name, 2309 block->dev_bytenr, block->mirror_num);
2363 (unsigned long long)block->dev_bytenr,
2364 block->mirror_num);
2365 2310
2366 block->iodone_w_error = iodone_w_error; 2311 block->iodone_w_error = iodone_w_error;
2367 if (block->submit_bio_bh_rw & REQ_FLUSH) { 2312 if (block->submit_bio_bh_rw & REQ_FLUSH) {
@@ -2370,8 +2315,7 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2370 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2315 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2371 printk(KERN_INFO 2316 printk(KERN_INFO
2372 "bh_end_io() new %s flush_gen=%llu\n", 2317 "bh_end_io() new %s flush_gen=%llu\n",
2373 dev_state->name, 2318 dev_state->name, dev_state->last_flush_gen);
2374 (unsigned long long)dev_state->last_flush_gen);
2375 } 2319 }
2376 if (block->submit_bio_bh_rw & REQ_FUA) 2320 if (block->submit_bio_bh_rw & REQ_FUA)
2377 block->flush_gen = 0; /* FUA completed means block is on disk */ 2321 block->flush_gen = 0; /* FUA completed means block is on disk */
@@ -2396,26 +2340,20 @@ static int btrfsic_process_written_superblock(
2396 printk(KERN_INFO 2340 printk(KERN_INFO
2397 "btrfsic: superblock @%llu (%s/%llu/%d)" 2341 "btrfsic: superblock @%llu (%s/%llu/%d)"
2398 " with old gen %llu <= %llu\n", 2342 " with old gen %llu <= %llu\n",
2399 (unsigned long long)superblock->logical_bytenr, 2343 superblock->logical_bytenr,
2400 superblock->dev_state->name, 2344 superblock->dev_state->name,
2401 (unsigned long long)superblock->dev_bytenr, 2345 superblock->dev_bytenr, superblock->mirror_num,
2402 superblock->mirror_num,
2403 (unsigned long long)
2404 btrfs_super_generation(super_hdr), 2346 btrfs_super_generation(super_hdr),
2405 (unsigned long long)
2406 state->max_superblock_generation); 2347 state->max_superblock_generation);
2407 } else { 2348 } else {
2408 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2349 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2409 printk(KERN_INFO 2350 printk(KERN_INFO
2410 "btrfsic: got new superblock @%llu (%s/%llu/%d)" 2351 "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2411 " with new gen %llu > %llu\n", 2352 " with new gen %llu > %llu\n",
2412 (unsigned long long)superblock->logical_bytenr, 2353 superblock->logical_bytenr,
2413 superblock->dev_state->name, 2354 superblock->dev_state->name,
2414 (unsigned long long)superblock->dev_bytenr, 2355 superblock->dev_bytenr, superblock->mirror_num,
2415 superblock->mirror_num,
2416 (unsigned long long)
2417 btrfs_super_generation(super_hdr), 2356 btrfs_super_generation(super_hdr),
2418 (unsigned long long)
2419 state->max_superblock_generation); 2357 state->max_superblock_generation);
2420 2358
2421 state->max_superblock_generation = 2359 state->max_superblock_generation =
@@ -2432,43 +2370,41 @@ static int btrfsic_process_written_superblock(
2432 int num_copies; 2370 int num_copies;
2433 int mirror_num; 2371 int mirror_num;
2434 const char *additional_string = NULL; 2372 const char *additional_string = NULL;
2435 struct btrfs_disk_key tmp_disk_key; 2373 struct btrfs_disk_key tmp_disk_key = {0};
2436 2374
2437 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; 2375 btrfs_set_disk_key_objectid(&tmp_disk_key,
2438 tmp_disk_key.offset = 0; 2376 BTRFS_ROOT_ITEM_KEY);
2377 btrfs_set_disk_key_objectid(&tmp_disk_key, 0);
2439 2378
2440 switch (pass) { 2379 switch (pass) {
2441 case 0: 2380 case 0:
2442 tmp_disk_key.objectid = 2381 btrfs_set_disk_key_objectid(&tmp_disk_key,
2443 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); 2382 BTRFS_ROOT_TREE_OBJECTID);
2444 additional_string = "root "; 2383 additional_string = "root ";
2445 next_bytenr = btrfs_super_root(super_hdr); 2384 next_bytenr = btrfs_super_root(super_hdr);
2446 if (state->print_mask & 2385 if (state->print_mask &
2447 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2386 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2448 printk(KERN_INFO "root@%llu\n", 2387 printk(KERN_INFO "root@%llu\n", next_bytenr);
2449 (unsigned long long)next_bytenr);
2450 break; 2388 break;
2451 case 1: 2389 case 1:
2452 tmp_disk_key.objectid = 2390 btrfs_set_disk_key_objectid(&tmp_disk_key,
2453 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); 2391 BTRFS_CHUNK_TREE_OBJECTID);
2454 additional_string = "chunk "; 2392 additional_string = "chunk ";
2455 next_bytenr = btrfs_super_chunk_root(super_hdr); 2393 next_bytenr = btrfs_super_chunk_root(super_hdr);
2456 if (state->print_mask & 2394 if (state->print_mask &
2457 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2395 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2458 printk(KERN_INFO "chunk@%llu\n", 2396 printk(KERN_INFO "chunk@%llu\n", next_bytenr);
2459 (unsigned long long)next_bytenr);
2460 break; 2397 break;
2461 case 2: 2398 case 2:
2462 tmp_disk_key.objectid = 2399 btrfs_set_disk_key_objectid(&tmp_disk_key,
2463 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); 2400 BTRFS_TREE_LOG_OBJECTID);
2464 additional_string = "log "; 2401 additional_string = "log ";
2465 next_bytenr = btrfs_super_log_root(super_hdr); 2402 next_bytenr = btrfs_super_log_root(super_hdr);
2466 if (0 == next_bytenr) 2403 if (0 == next_bytenr)
2467 continue; 2404 continue;
2468 if (state->print_mask & 2405 if (state->print_mask &
2469 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2406 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2470 printk(KERN_INFO "log@%llu\n", 2407 printk(KERN_INFO "log@%llu\n", next_bytenr);
2471 (unsigned long long)next_bytenr);
2472 break; 2408 break;
2473 } 2409 }
2474 2410
@@ -2477,7 +2413,7 @@ static int btrfsic_process_written_superblock(
2477 next_bytenr, BTRFS_SUPER_INFO_SIZE); 2413 next_bytenr, BTRFS_SUPER_INFO_SIZE);
2478 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 2414 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2479 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 2415 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2480 (unsigned long long)next_bytenr, num_copies); 2416 next_bytenr, num_copies);
2481 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2417 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2482 int was_created; 2418 int was_created;
2483 2419
@@ -2493,8 +2429,7 @@ static int btrfsic_process_written_superblock(
2493 printk(KERN_INFO 2429 printk(KERN_INFO
2494 "btrfsic: btrfsic_map_block(@%llu," 2430 "btrfsic: btrfsic_map_block(@%llu,"
2495 " mirror=%d) failed!\n", 2431 " mirror=%d) failed!\n",
2496 (unsigned long long)next_bytenr, 2432 next_bytenr, mirror_num);
2497 mirror_num);
2498 return -1; 2433 return -1;
2499 } 2434 }
2500 2435
@@ -2579,26 +2514,22 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2579 " %u* refers to %c @%llu (%s/%llu/%d)\n", 2514 " %u* refers to %c @%llu (%s/%llu/%d)\n",
2580 recursion_level, 2515 recursion_level,
2581 btrfsic_get_block_type(state, block), 2516 btrfsic_get_block_type(state, block),
2582 (unsigned long long)block->logical_bytenr, 2517 block->logical_bytenr, block->dev_state->name,
2583 block->dev_state->name, 2518 block->dev_bytenr, block->mirror_num,
2584 (unsigned long long)block->dev_bytenr,
2585 block->mirror_num,
2586 l->ref_cnt, 2519 l->ref_cnt,
2587 btrfsic_get_block_type(state, l->block_ref_to), 2520 btrfsic_get_block_type(state, l->block_ref_to),
2588 (unsigned long long)
2589 l->block_ref_to->logical_bytenr, 2521 l->block_ref_to->logical_bytenr,
2590 l->block_ref_to->dev_state->name, 2522 l->block_ref_to->dev_state->name,
2591 (unsigned long long)l->block_ref_to->dev_bytenr, 2523 l->block_ref_to->dev_bytenr,
2592 l->block_ref_to->mirror_num); 2524 l->block_ref_to->mirror_num);
2593 if (l->block_ref_to->never_written) { 2525 if (l->block_ref_to->never_written) {
2594 printk(KERN_INFO "btrfs: attempt to write superblock" 2526 printk(KERN_INFO "btrfs: attempt to write superblock"
2595 " which references block %c @%llu (%s/%llu/%d)" 2527 " which references block %c @%llu (%s/%llu/%d)"
2596 " which is never written!\n", 2528 " which is never written!\n",
2597 btrfsic_get_block_type(state, l->block_ref_to), 2529 btrfsic_get_block_type(state, l->block_ref_to),
2598 (unsigned long long)
2599 l->block_ref_to->logical_bytenr, 2530 l->block_ref_to->logical_bytenr,
2600 l->block_ref_to->dev_state->name, 2531 l->block_ref_to->dev_state->name,
2601 (unsigned long long)l->block_ref_to->dev_bytenr, 2532 l->block_ref_to->dev_bytenr,
2602 l->block_ref_to->mirror_num); 2533 l->block_ref_to->mirror_num);
2603 ret = -1; 2534 ret = -1;
2604 } else if (!l->block_ref_to->is_iodone) { 2535 } else if (!l->block_ref_to->is_iodone) {
@@ -2606,10 +2537,9 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2606 " which references block %c @%llu (%s/%llu/%d)" 2537 " which references block %c @%llu (%s/%llu/%d)"
2607 " which is not yet iodone!\n", 2538 " which is not yet iodone!\n",
2608 btrfsic_get_block_type(state, l->block_ref_to), 2539 btrfsic_get_block_type(state, l->block_ref_to),
2609 (unsigned long long)
2610 l->block_ref_to->logical_bytenr, 2540 l->block_ref_to->logical_bytenr,
2611 l->block_ref_to->dev_state->name, 2541 l->block_ref_to->dev_state->name,
2612 (unsigned long long)l->block_ref_to->dev_bytenr, 2542 l->block_ref_to->dev_bytenr,
2613 l->block_ref_to->mirror_num); 2543 l->block_ref_to->mirror_num);
2614 ret = -1; 2544 ret = -1;
2615 } else if (l->block_ref_to->iodone_w_error) { 2545 } else if (l->block_ref_to->iodone_w_error) {
@@ -2617,10 +2547,9 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2617 " which references block %c @%llu (%s/%llu/%d)" 2547 " which references block %c @%llu (%s/%llu/%d)"
2618 " which has write error!\n", 2548 " which has write error!\n",
2619 btrfsic_get_block_type(state, l->block_ref_to), 2549 btrfsic_get_block_type(state, l->block_ref_to),
2620 (unsigned long long)
2621 l->block_ref_to->logical_bytenr, 2550 l->block_ref_to->logical_bytenr,
2622 l->block_ref_to->dev_state->name, 2551 l->block_ref_to->dev_state->name,
2623 (unsigned long long)l->block_ref_to->dev_bytenr, 2552 l->block_ref_to->dev_bytenr,
2624 l->block_ref_to->mirror_num); 2553 l->block_ref_to->mirror_num);
2625 ret = -1; 2554 ret = -1;
2626 } else if (l->parent_generation != 2555 } else if (l->parent_generation !=
@@ -2634,13 +2563,12 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2634 " with generation %llu !=" 2563 " with generation %llu !="
2635 " parent generation %llu!\n", 2564 " parent generation %llu!\n",
2636 btrfsic_get_block_type(state, l->block_ref_to), 2565 btrfsic_get_block_type(state, l->block_ref_to),
2637 (unsigned long long)
2638 l->block_ref_to->logical_bytenr, 2566 l->block_ref_to->logical_bytenr,
2639 l->block_ref_to->dev_state->name, 2567 l->block_ref_to->dev_state->name,
2640 (unsigned long long)l->block_ref_to->dev_bytenr, 2568 l->block_ref_to->dev_bytenr,
2641 l->block_ref_to->mirror_num, 2569 l->block_ref_to->mirror_num,
2642 (unsigned long long)l->block_ref_to->generation, 2570 l->block_ref_to->generation,
2643 (unsigned long long)l->parent_generation); 2571 l->parent_generation);
2644 ret = -1; 2572 ret = -1;
2645 } else if (l->block_ref_to->flush_gen > 2573 } else if (l->block_ref_to->flush_gen >
2646 l->block_ref_to->dev_state->last_flush_gen) { 2574 l->block_ref_to->dev_state->last_flush_gen) {
@@ -2650,13 +2578,10 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2650 " (block flush_gen=%llu," 2578 " (block flush_gen=%llu,"
2651 " dev->flush_gen=%llu)!\n", 2579 " dev->flush_gen=%llu)!\n",
2652 btrfsic_get_block_type(state, l->block_ref_to), 2580 btrfsic_get_block_type(state, l->block_ref_to),
2653 (unsigned long long)
2654 l->block_ref_to->logical_bytenr, 2581 l->block_ref_to->logical_bytenr,
2655 l->block_ref_to->dev_state->name, 2582 l->block_ref_to->dev_state->name,
2656 (unsigned long long)l->block_ref_to->dev_bytenr, 2583 l->block_ref_to->dev_bytenr,
2657 l->block_ref_to->mirror_num, 2584 l->block_ref_to->mirror_num, block->flush_gen,
2658 (unsigned long long)block->flush_gen,
2659 (unsigned long long)
2660 l->block_ref_to->dev_state->last_flush_gen); 2585 l->block_ref_to->dev_state->last_flush_gen);
2661 ret = -1; 2586 ret = -1;
2662 } else if (-1 == btrfsic_check_all_ref_blocks(state, 2587 } else if (-1 == btrfsic_check_all_ref_blocks(state,
@@ -2701,16 +2626,12 @@ static int btrfsic_is_block_ref_by_superblock(
2701 " is ref %u* from %c @%llu (%s/%llu/%d)\n", 2626 " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2702 recursion_level, 2627 recursion_level,
2703 btrfsic_get_block_type(state, block), 2628 btrfsic_get_block_type(state, block),
2704 (unsigned long long)block->logical_bytenr, 2629 block->logical_bytenr, block->dev_state->name,
2705 block->dev_state->name, 2630 block->dev_bytenr, block->mirror_num,
2706 (unsigned long long)block->dev_bytenr,
2707 block->mirror_num,
2708 l->ref_cnt, 2631 l->ref_cnt,
2709 btrfsic_get_block_type(state, l->block_ref_from), 2632 btrfsic_get_block_type(state, l->block_ref_from),
2710 (unsigned long long)
2711 l->block_ref_from->logical_bytenr, 2633 l->block_ref_from->logical_bytenr,
2712 l->block_ref_from->dev_state->name, 2634 l->block_ref_from->dev_state->name,
2713 (unsigned long long)
2714 l->block_ref_from->dev_bytenr, 2635 l->block_ref_from->dev_bytenr,
2715 l->block_ref_from->mirror_num); 2636 l->block_ref_from->mirror_num);
2716 if (l->block_ref_from->is_superblock && 2637 if (l->block_ref_from->is_superblock &&
@@ -2737,14 +2658,12 @@ static void btrfsic_print_add_link(const struct btrfsic_state *state,
2737 " to %c @%llu (%s/%llu/%d).\n", 2658 " to %c @%llu (%s/%llu/%d).\n",
2738 l->ref_cnt, 2659 l->ref_cnt,
2739 btrfsic_get_block_type(state, l->block_ref_from), 2660 btrfsic_get_block_type(state, l->block_ref_from),
2740 (unsigned long long)l->block_ref_from->logical_bytenr, 2661 l->block_ref_from->logical_bytenr,
2741 l->block_ref_from->dev_state->name, 2662 l->block_ref_from->dev_state->name,
2742 (unsigned long long)l->block_ref_from->dev_bytenr, 2663 l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
2743 l->block_ref_from->mirror_num,
2744 btrfsic_get_block_type(state, l->block_ref_to), 2664 btrfsic_get_block_type(state, l->block_ref_to),
2745 (unsigned long long)l->block_ref_to->logical_bytenr, 2665 l->block_ref_to->logical_bytenr,
2746 l->block_ref_to->dev_state->name, 2666 l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
2747 (unsigned long long)l->block_ref_to->dev_bytenr,
2748 l->block_ref_to->mirror_num); 2667 l->block_ref_to->mirror_num);
2749} 2668}
2750 2669
@@ -2756,14 +2675,12 @@ static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2756 " to %c @%llu (%s/%llu/%d).\n", 2675 " to %c @%llu (%s/%llu/%d).\n",
2757 l->ref_cnt, 2676 l->ref_cnt,
2758 btrfsic_get_block_type(state, l->block_ref_from), 2677 btrfsic_get_block_type(state, l->block_ref_from),
2759 (unsigned long long)l->block_ref_from->logical_bytenr, 2678 l->block_ref_from->logical_bytenr,
2760 l->block_ref_from->dev_state->name, 2679 l->block_ref_from->dev_state->name,
2761 (unsigned long long)l->block_ref_from->dev_bytenr, 2680 l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
2762 l->block_ref_from->mirror_num,
2763 btrfsic_get_block_type(state, l->block_ref_to), 2681 btrfsic_get_block_type(state, l->block_ref_to),
2764 (unsigned long long)l->block_ref_to->logical_bytenr, 2682 l->block_ref_to->logical_bytenr,
2765 l->block_ref_to->dev_state->name, 2683 l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
2766 (unsigned long long)l->block_ref_to->dev_bytenr,
2767 l->block_ref_to->mirror_num); 2684 l->block_ref_to->mirror_num);
2768} 2685}
2769 2686
@@ -2807,10 +2724,8 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2807 */ 2724 */
2808 indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", 2725 indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
2809 btrfsic_get_block_type(state, block), 2726 btrfsic_get_block_type(state, block),
2810 (unsigned long long)block->logical_bytenr, 2727 block->logical_bytenr, block->dev_state->name,
2811 block->dev_state->name, 2728 block->dev_bytenr, block->mirror_num);
2812 (unsigned long long)block->dev_bytenr,
2813 block->mirror_num);
2814 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2729 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2815 printk("[...]\n"); 2730 printk("[...]\n");
2816 return; 2731 return;
@@ -2943,10 +2858,8 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add(
2943 "New %s%c-block @%llu (%s/%llu/%d)\n", 2858 "New %s%c-block @%llu (%s/%llu/%d)\n",
2944 additional_string, 2859 additional_string,
2945 btrfsic_get_block_type(state, block), 2860 btrfsic_get_block_type(state, block),
2946 (unsigned long long)block->logical_bytenr, 2861 block->logical_bytenr, dev_state->name,
2947 dev_state->name, 2862 block->dev_bytenr, mirror_num);
2948 (unsigned long long)block->dev_bytenr,
2949 mirror_num);
2950 list_add(&block->all_blocks_node, &state->all_blocks_list); 2863 list_add(&block->all_blocks_node, &state->all_blocks_list);
2951 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2864 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2952 if (NULL != was_created) 2865 if (NULL != was_created)
@@ -2980,7 +2893,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2980 printk(KERN_INFO "btrfsic:" 2893 printk(KERN_INFO "btrfsic:"
2981 " btrfsic_map_block(logical @%llu," 2894 " btrfsic_map_block(logical @%llu,"
2982 " mirror %d) failed!\n", 2895 " mirror %d) failed!\n",
2983 (unsigned long long)bytenr, mirror_num); 2896 bytenr, mirror_num);
2984 continue; 2897 continue;
2985 } 2898 }
2986 2899
@@ -2997,8 +2910,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2997 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," 2910 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2998 " buffer->log_bytenr=%llu, submit_bio(bdev=%s," 2911 " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2999 " phys_bytenr=%llu)!\n", 2912 " phys_bytenr=%llu)!\n",
3000 (unsigned long long)bytenr, dev_state->name, 2913 bytenr, dev_state->name, dev_bytenr);
3001 (unsigned long long)dev_bytenr);
3002 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2914 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
3003 ret = btrfsic_map_block(state, bytenr, 2915 ret = btrfsic_map_block(state, bytenr,
3004 state->metablock_size, 2916 state->metablock_size,
@@ -3008,10 +2920,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
3008 2920
3009 printk(KERN_INFO "Read logical bytenr @%llu maps to" 2921 printk(KERN_INFO "Read logical bytenr @%llu maps to"
3010 " (%s/%llu/%d)\n", 2922 " (%s/%llu/%d)\n",
3011 (unsigned long long)bytenr, 2923 bytenr, block_ctx.dev->name,
3012 block_ctx.dev->name, 2924 block_ctx.dev_bytenr, mirror_num);
3013 (unsigned long long)block_ctx.dev_bytenr,
3014 mirror_num);
3015 } 2925 }
3016 WARN_ON(1); 2926 WARN_ON(1);
3017 } 2927 }
@@ -3048,12 +2958,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
3048 if (dev_state->state->print_mask & 2958 if (dev_state->state->print_mask &
3049 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 2959 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3050 printk(KERN_INFO 2960 printk(KERN_INFO
3051 "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," 2961 "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu),"
3052 " size=%lu, data=%p, bdev=%p)\n", 2962 " size=%zu, data=%p, bdev=%p)\n",
3053 rw, (unsigned long)bh->b_blocknr, 2963 rw, (unsigned long long)bh->b_blocknr,
3054 (unsigned long long)dev_bytenr, 2964 dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev);
3055 (unsigned long)bh->b_size, bh->b_data,
3056 bh->b_bdev);
3057 btrfsic_process_written_block(dev_state, dev_bytenr, 2965 btrfsic_process_written_block(dev_state, dev_bytenr,
3058 &bh->b_data, 1, NULL, 2966 &bh->b_data, 1, NULL,
3059 NULL, bh, rw); 2967 NULL, bh, rw);
@@ -3118,9 +3026,9 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
3118 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3026 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3119 printk(KERN_INFO 3027 printk(KERN_INFO
3120 "submit_bio(rw=0x%x, bi_vcnt=%u," 3028 "submit_bio(rw=0x%x, bi_vcnt=%u,"
3121 " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", 3029 " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
3122 rw, bio->bi_vcnt, (unsigned long)bio->bi_sector, 3030 rw, bio->bi_vcnt,
3123 (unsigned long long)dev_bytenr, 3031 (unsigned long long)bio->bi_sector, dev_bytenr,
3124 bio->bi_bdev); 3032 bio->bi_bdev);
3125 3033
3126 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, 3034 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
@@ -3213,19 +3121,19 @@ int btrfsic_mount(struct btrfs_root *root,
3213 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { 3121 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3214 printk(KERN_INFO 3122 printk(KERN_INFO
3215 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3123 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3216 root->nodesize, (unsigned long)PAGE_CACHE_SIZE); 3124 root->nodesize, PAGE_CACHE_SIZE);
3217 return -1; 3125 return -1;
3218 } 3126 }
3219 if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { 3127 if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3220 printk(KERN_INFO 3128 printk(KERN_INFO
3221 "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3129 "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3222 root->leafsize, (unsigned long)PAGE_CACHE_SIZE); 3130 root->leafsize, PAGE_CACHE_SIZE);
3223 return -1; 3131 return -1;
3224 } 3132 }
3225 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { 3133 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3226 printk(KERN_INFO 3134 printk(KERN_INFO
3227 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3135 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3228 root->sectorsize, (unsigned long)PAGE_CACHE_SIZE); 3136 root->sectorsize, PAGE_CACHE_SIZE);
3229 return -1; 3137 return -1;
3230 } 3138 }
3231 state = kzalloc(sizeof(*state), GFP_NOFS); 3139 state = kzalloc(sizeof(*state), GFP_NOFS);
@@ -3369,10 +3277,8 @@ void btrfsic_unmount(struct btrfs_root *root,
3369 " @%llu (%s/%llu/%d) on umount which is" 3277 " @%llu (%s/%llu/%d) on umount which is"
3370 " not yet iodone!\n", 3278 " not yet iodone!\n",
3371 btrfsic_get_block_type(state, b_all), 3279 btrfsic_get_block_type(state, b_all),
3372 (unsigned long long)b_all->logical_bytenr, 3280 b_all->logical_bytenr, b_all->dev_state->name,
3373 b_all->dev_state->name, 3281 b_all->dev_bytenr, b_all->mirror_num);
3374 (unsigned long long)b_all->dev_bytenr,
3375 b_all->mirror_num);
3376 } 3282 }
3377 3283
3378 mutex_unlock(&btrfsic_mutex); 3284 mutex_unlock(&btrfsic_mutex);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b189bd1e7a3e..6aad98cb343f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -132,9 +132,8 @@ static int check_compressed_csum(struct inode *inode,
132 printk(KERN_INFO "btrfs csum failed ino %llu " 132 printk(KERN_INFO "btrfs csum failed ino %llu "
133 "extent %llu csum %u " 133 "extent %llu csum %u "
134 "wanted %u mirror %d\n", 134 "wanted %u mirror %d\n",
135 (unsigned long long)btrfs_ino(inode), 135 btrfs_ino(inode), disk_start, csum, *cb_sum,
136 (unsigned long long)disk_start, 136 cb->mirror_num);
137 csum, *cb_sum, cb->mirror_num);
138 ret = -EIO; 137 ret = -EIO;
139 goto fail; 138 goto fail;
140 } 139 }
@@ -639,7 +638,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
639 faili = nr_pages - 1; 638 faili = nr_pages - 1;
640 cb->nr_pages = nr_pages; 639 cb->nr_pages = nr_pages;
641 640
642 add_ra_bio_pages(inode, em_start + em_len, cb); 641 /* In the parent-locked case, we only locked the range we are
642 * interested in. In all other cases, we can opportunistically
643 * cache decompressed data that goes beyond the requested range. */
644 if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
645 add_ra_bio_pages(inode, em_start + em_len, cb);
643 646
644 /* include any pages we added in add_ra-bio_pages */ 647 /* include any pages we added in add_ra-bio_pages */
645 uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; 648 uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ed504607d8ec..64346721173f 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -274,8 +274,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
274 else 274 else
275 btrfs_set_header_owner(cow, new_root_objectid); 275 btrfs_set_header_owner(cow, new_root_objectid);
276 276
277 write_extent_buffer(cow, root->fs_info->fsid, 277 write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow),
278 (unsigned long)btrfs_header_fsid(cow),
279 BTRFS_FSID_SIZE); 278 BTRFS_FSID_SIZE);
280 279
281 WARN_ON(btrfs_header_generation(buf) > trans->transid); 280 WARN_ON(btrfs_header_generation(buf) > trans->transid);
@@ -484,8 +483,27 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
484 struct rb_node **new; 483 struct rb_node **new;
485 struct rb_node *parent = NULL; 484 struct rb_node *parent = NULL;
486 struct tree_mod_elem *cur; 485 struct tree_mod_elem *cur;
486 int ret = 0;
487
488 BUG_ON(!tm);
489
490 tree_mod_log_write_lock(fs_info);
491 if (list_empty(&fs_info->tree_mod_seq_list)) {
492 tree_mod_log_write_unlock(fs_info);
493 /*
494 * Ok we no longer care about logging modifications, free up tm
495 * and return 0. Any callers shouldn't be using tm after
496 * calling tree_mod_log_insert, but if they do we can just
497 * change this to return a special error code to let the callers
498 * do their own thing.
499 */
500 kfree(tm);
501 return 0;
502 }
487 503
488 BUG_ON(!tm || !tm->seq); 504 spin_lock(&fs_info->tree_mod_seq_lock);
505 tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
506 spin_unlock(&fs_info->tree_mod_seq_lock);
489 507
490 tm_root = &fs_info->tree_mod_log; 508 tm_root = &fs_info->tree_mod_log;
491 new = &tm_root->rb_node; 509 new = &tm_root->rb_node;
@@ -501,14 +519,17 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
501 else if (cur->seq > tm->seq) 519 else if (cur->seq > tm->seq)
502 new = &((*new)->rb_right); 520 new = &((*new)->rb_right);
503 else { 521 else {
522 ret = -EEXIST;
504 kfree(tm); 523 kfree(tm);
505 return -EEXIST; 524 goto out;
506 } 525 }
507 } 526 }
508 527
509 rb_link_node(&tm->node, parent, new); 528 rb_link_node(&tm->node, parent, new);
510 rb_insert_color(&tm->node, tm_root); 529 rb_insert_color(&tm->node, tm_root);
511 return 0; 530out:
531 tree_mod_log_write_unlock(fs_info);
532 return ret;
512} 533}
513 534
514/* 535/*
@@ -524,57 +545,19 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
524 return 1; 545 return 1;
525 if (eb && btrfs_header_level(eb) == 0) 546 if (eb && btrfs_header_level(eb) == 0)
526 return 1; 547 return 1;
527
528 tree_mod_log_write_lock(fs_info);
529 if (list_empty(&fs_info->tree_mod_seq_list)) {
530 /*
531 * someone emptied the list while we were waiting for the lock.
532 * we must not add to the list when no blocker exists.
533 */
534 tree_mod_log_write_unlock(fs_info);
535 return 1;
536 }
537
538 return 0; 548 return 0;
539} 549}
540 550
541/*
542 * This allocates memory and gets a tree modification sequence number.
543 *
544 * Returns <0 on error.
545 * Returns >0 (the added sequence number) on success.
546 */
547static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
548 struct tree_mod_elem **tm_ret)
549{
550 struct tree_mod_elem *tm;
551
552 /*
553 * once we switch from spin locks to something different, we should
554 * honor the flags parameter here.
555 */
556 tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC);
557 if (!tm)
558 return -ENOMEM;
559
560 spin_lock(&fs_info->tree_mod_seq_lock);
561 tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
562 spin_unlock(&fs_info->tree_mod_seq_lock);
563
564 return tm->seq;
565}
566
567static inline int 551static inline int
568__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, 552__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
569 struct extent_buffer *eb, int slot, 553 struct extent_buffer *eb, int slot,
570 enum mod_log_op op, gfp_t flags) 554 enum mod_log_op op, gfp_t flags)
571{ 555{
572 int ret;
573 struct tree_mod_elem *tm; 556 struct tree_mod_elem *tm;
574 557
575 ret = tree_mod_alloc(fs_info, flags, &tm); 558 tm = kzalloc(sizeof(*tm), flags);
576 if (ret < 0) 559 if (!tm)
577 return ret; 560 return -ENOMEM;
578 561
579 tm->index = eb->start >> PAGE_CACHE_SHIFT; 562 tm->index = eb->start >> PAGE_CACHE_SHIFT;
580 if (op != MOD_LOG_KEY_ADD) { 563 if (op != MOD_LOG_KEY_ADD) {
@@ -589,34 +572,14 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
589} 572}
590 573
591static noinline int 574static noinline int
592tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, 575tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
593 struct extent_buffer *eb, int slot, 576 struct extent_buffer *eb, int slot,
594 enum mod_log_op op, gfp_t flags) 577 enum mod_log_op op, gfp_t flags)
595{ 578{
596 int ret;
597
598 if (tree_mod_dont_log(fs_info, eb)) 579 if (tree_mod_dont_log(fs_info, eb))
599 return 0; 580 return 0;
600 581
601 ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); 582 return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
602
603 tree_mod_log_write_unlock(fs_info);
604 return ret;
605}
606
607static noinline int
608tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
609 int slot, enum mod_log_op op)
610{
611 return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS);
612}
613
614static noinline int
615tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info,
616 struct extent_buffer *eb, int slot,
617 enum mod_log_op op)
618{
619 return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS);
620} 583}
621 584
622static noinline int 585static noinline int
@@ -637,14 +600,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
637 * buffer, i.e. dst_slot < src_slot. 600 * buffer, i.e. dst_slot < src_slot.
638 */ 601 */
639 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { 602 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
640 ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, 603 ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
641 MOD_LOG_KEY_REMOVE_WHILE_MOVING); 604 MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
642 BUG_ON(ret < 0); 605 BUG_ON(ret < 0);
643 } 606 }
644 607
645 ret = tree_mod_alloc(fs_info, flags, &tm); 608 tm = kzalloc(sizeof(*tm), flags);
646 if (ret < 0) 609 if (!tm)
647 goto out; 610 return -ENOMEM;
648 611
649 tm->index = eb->start >> PAGE_CACHE_SHIFT; 612 tm->index = eb->start >> PAGE_CACHE_SHIFT;
650 tm->slot = src_slot; 613 tm->slot = src_slot;
@@ -652,10 +615,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
652 tm->move.nr_items = nr_items; 615 tm->move.nr_items = nr_items;
653 tm->op = MOD_LOG_MOVE_KEYS; 616 tm->op = MOD_LOG_MOVE_KEYS;
654 617
655 ret = __tree_mod_log_insert(fs_info, tm); 618 return __tree_mod_log_insert(fs_info, tm);
656out:
657 tree_mod_log_write_unlock(fs_info);
658 return ret;
659} 619}
660 620
661static inline void 621static inline void
@@ -670,8 +630,8 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
670 630
671 nritems = btrfs_header_nritems(eb); 631 nritems = btrfs_header_nritems(eb);
672 for (i = nritems - 1; i >= 0; i--) { 632 for (i = nritems - 1; i >= 0; i--) {
673 ret = tree_mod_log_insert_key_locked(fs_info, eb, i, 633 ret = __tree_mod_log_insert_key(fs_info, eb, i,
674 MOD_LOG_KEY_REMOVE_WHILE_FREEING); 634 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
675 BUG_ON(ret < 0); 635 BUG_ON(ret < 0);
676 } 636 }
677} 637}
@@ -683,7 +643,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
683 int log_removal) 643 int log_removal)
684{ 644{
685 struct tree_mod_elem *tm; 645 struct tree_mod_elem *tm;
686 int ret;
687 646
688 if (tree_mod_dont_log(fs_info, NULL)) 647 if (tree_mod_dont_log(fs_info, NULL))
689 return 0; 648 return 0;
@@ -691,9 +650,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
691 if (log_removal) 650 if (log_removal)
692 __tree_mod_log_free_eb(fs_info, old_root); 651 __tree_mod_log_free_eb(fs_info, old_root);
693 652
694 ret = tree_mod_alloc(fs_info, flags, &tm); 653 tm = kzalloc(sizeof(*tm), flags);
695 if (ret < 0) 654 if (!tm)
696 goto out; 655 return -ENOMEM;
697 656
698 tm->index = new_root->start >> PAGE_CACHE_SHIFT; 657 tm->index = new_root->start >> PAGE_CACHE_SHIFT;
699 tm->old_root.logical = old_root->start; 658 tm->old_root.logical = old_root->start;
@@ -701,10 +660,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
701 tm->generation = btrfs_header_generation(old_root); 660 tm->generation = btrfs_header_generation(old_root);
702 tm->op = MOD_LOG_ROOT_REPLACE; 661 tm->op = MOD_LOG_ROOT_REPLACE;
703 662
704 ret = __tree_mod_log_insert(fs_info, tm); 663 return __tree_mod_log_insert(fs_info, tm);
705out:
706 tree_mod_log_write_unlock(fs_info);
707 return ret;
708} 664}
709 665
710static struct tree_mod_elem * 666static struct tree_mod_elem *
@@ -784,23 +740,20 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
784 if (tree_mod_dont_log(fs_info, NULL)) 740 if (tree_mod_dont_log(fs_info, NULL))
785 return; 741 return;
786 742
787 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) { 743 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
788 tree_mod_log_write_unlock(fs_info);
789 return; 744 return;
790 }
791 745
792 for (i = 0; i < nr_items; i++) { 746 for (i = 0; i < nr_items; i++) {
793 ret = tree_mod_log_insert_key_locked(fs_info, src, 747 ret = __tree_mod_log_insert_key(fs_info, src,
794 i + src_offset, 748 i + src_offset,
795 MOD_LOG_KEY_REMOVE); 749 MOD_LOG_KEY_REMOVE, GFP_NOFS);
796 BUG_ON(ret < 0); 750 BUG_ON(ret < 0);
797 ret = tree_mod_log_insert_key_locked(fs_info, dst, 751 ret = __tree_mod_log_insert_key(fs_info, dst,
798 i + dst_offset, 752 i + dst_offset,
799 MOD_LOG_KEY_ADD); 753 MOD_LOG_KEY_ADD,
754 GFP_NOFS);
800 BUG_ON(ret < 0); 755 BUG_ON(ret < 0);
801 } 756 }
802
803 tree_mod_log_write_unlock(fs_info);
804} 757}
805 758
806static inline void 759static inline void
@@ -819,9 +772,9 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
819{ 772{
820 int ret; 773 int ret;
821 774
822 ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, 775 ret = __tree_mod_log_insert_key(fs_info, eb, slot,
823 MOD_LOG_KEY_REPLACE, 776 MOD_LOG_KEY_REPLACE,
824 atomic ? GFP_ATOMIC : GFP_NOFS); 777 atomic ? GFP_ATOMIC : GFP_NOFS);
825 BUG_ON(ret < 0); 778 BUG_ON(ret < 0);
826} 779}
827 780
@@ -830,10 +783,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
830{ 783{
831 if (tree_mod_dont_log(fs_info, eb)) 784 if (tree_mod_dont_log(fs_info, eb))
832 return; 785 return;
833
834 __tree_mod_log_free_eb(fs_info, eb); 786 __tree_mod_log_free_eb(fs_info, eb);
835
836 tree_mod_log_write_unlock(fs_info);
837} 787}
838 788
839static noinline void 789static noinline void
@@ -1046,8 +996,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1046 else 996 else
1047 btrfs_set_header_owner(cow, root->root_key.objectid); 997 btrfs_set_header_owner(cow, root->root_key.objectid);
1048 998
1049 write_extent_buffer(cow, root->fs_info->fsid, 999 write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow),
1050 (unsigned long)btrfs_header_fsid(cow),
1051 BTRFS_FSID_SIZE); 1000 BTRFS_FSID_SIZE);
1052 1001
1053 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); 1002 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
@@ -1083,7 +1032,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1083 1032
1084 WARN_ON(trans->transid != btrfs_header_generation(parent)); 1033 WARN_ON(trans->transid != btrfs_header_generation(parent));
1085 tree_mod_log_insert_key(root->fs_info, parent, parent_slot, 1034 tree_mod_log_insert_key(root->fs_info, parent, parent_slot,
1086 MOD_LOG_KEY_REPLACE); 1035 MOD_LOG_KEY_REPLACE, GFP_NOFS);
1087 btrfs_set_node_blockptr(parent, parent_slot, 1036 btrfs_set_node_blockptr(parent, parent_slot,
1088 cow->start); 1037 cow->start);
1089 btrfs_set_node_ptr_generation(parent, parent_slot, 1038 btrfs_set_node_ptr_generation(parent, parent_slot,
@@ -1116,7 +1065,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1116 int looped = 0; 1065 int looped = 0;
1117 1066
1118 if (!time_seq) 1067 if (!time_seq)
1119 return 0; 1068 return NULL;
1120 1069
1121 /* 1070 /*
1122 * the very last operation that's logged for a root is the replacement 1071 * the very last operation that's logged for a root is the replacement
@@ -1127,7 +1076,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1127 tm = tree_mod_log_search_oldest(fs_info, root_logical, 1076 tm = tree_mod_log_search_oldest(fs_info, root_logical,
1128 time_seq); 1077 time_seq);
1129 if (!looped && !tm) 1078 if (!looped && !tm)
1130 return 0; 1079 return NULL;
1131 /* 1080 /*
1132 * if there are no tree operation for the oldest root, we simply 1081 * if there are no tree operation for the oldest root, we simply
1133 * return it. this should only happen if that (old) root is at 1082 * return it. this should only happen if that (old) root is at
@@ -1240,8 +1189,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1240 * is freed (its refcount is decremented). 1189 * is freed (its refcount is decremented).
1241 */ 1190 */
1242static struct extent_buffer * 1191static struct extent_buffer *
1243tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, 1192tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1244 u64 time_seq) 1193 struct extent_buffer *eb, u64 time_seq)
1245{ 1194{
1246 struct extent_buffer *eb_rewin; 1195 struct extent_buffer *eb_rewin;
1247 struct tree_mod_elem *tm; 1196 struct tree_mod_elem *tm;
@@ -1256,11 +1205,18 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1256 if (!tm) 1205 if (!tm)
1257 return eb; 1206 return eb;
1258 1207
1208 btrfs_set_path_blocking(path);
1209 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1210
1259 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { 1211 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1260 BUG_ON(tm->slot != 0); 1212 BUG_ON(tm->slot != 0);
1261 eb_rewin = alloc_dummy_extent_buffer(eb->start, 1213 eb_rewin = alloc_dummy_extent_buffer(eb->start,
1262 fs_info->tree_root->nodesize); 1214 fs_info->tree_root->nodesize);
1263 BUG_ON(!eb_rewin); 1215 if (!eb_rewin) {
1216 btrfs_tree_read_unlock_blocking(eb);
1217 free_extent_buffer(eb);
1218 return NULL;
1219 }
1264 btrfs_set_header_bytenr(eb_rewin, eb->start); 1220 btrfs_set_header_bytenr(eb_rewin, eb->start);
1265 btrfs_set_header_backref_rev(eb_rewin, 1221 btrfs_set_header_backref_rev(eb_rewin,
1266 btrfs_header_backref_rev(eb)); 1222 btrfs_header_backref_rev(eb));
@@ -1268,10 +1224,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1268 btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); 1224 btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
1269 } else { 1225 } else {
1270 eb_rewin = btrfs_clone_extent_buffer(eb); 1226 eb_rewin = btrfs_clone_extent_buffer(eb);
1271 BUG_ON(!eb_rewin); 1227 if (!eb_rewin) {
1228 btrfs_tree_read_unlock_blocking(eb);
1229 free_extent_buffer(eb);
1230 return NULL;
1231 }
1272 } 1232 }
1273 1233
1274 btrfs_tree_read_unlock(eb); 1234 btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK);
1235 btrfs_tree_read_unlock_blocking(eb);
1275 free_extent_buffer(eb); 1236 free_extent_buffer(eb);
1276 1237
1277 extent_buffer_get(eb_rewin); 1238 extent_buffer_get(eb_rewin);
@@ -1335,8 +1296,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1335 free_extent_buffer(eb_root); 1296 free_extent_buffer(eb_root);
1336 eb = alloc_dummy_extent_buffer(logical, root->nodesize); 1297 eb = alloc_dummy_extent_buffer(logical, root->nodesize);
1337 } else { 1298 } else {
1299 btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
1338 eb = btrfs_clone_extent_buffer(eb_root); 1300 eb = btrfs_clone_extent_buffer(eb_root);
1339 btrfs_tree_read_unlock(eb_root); 1301 btrfs_tree_read_unlock_blocking(eb_root);
1340 free_extent_buffer(eb_root); 1302 free_extent_buffer(eb_root);
1341 } 1303 }
1342 1304
@@ -1419,14 +1381,12 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
1419 1381
1420 if (trans->transaction != root->fs_info->running_transaction) 1382 if (trans->transaction != root->fs_info->running_transaction)
1421 WARN(1, KERN_CRIT "trans %llu running %llu\n", 1383 WARN(1, KERN_CRIT "trans %llu running %llu\n",
1422 (unsigned long long)trans->transid, 1384 trans->transid,
1423 (unsigned long long)
1424 root->fs_info->running_transaction->transid); 1385 root->fs_info->running_transaction->transid);
1425 1386
1426 if (trans->transid != root->fs_info->generation) 1387 if (trans->transid != root->fs_info->generation)
1427 WARN(1, KERN_CRIT "trans %llu running %llu\n", 1388 WARN(1, KERN_CRIT "trans %llu running %llu\n",
1428 (unsigned long long)trans->transid, 1389 trans->transid, root->fs_info->generation);
1429 (unsigned long long)root->fs_info->generation);
1430 1390
1431 if (!should_cow_block(trans, root, buf)) { 1391 if (!should_cow_block(trans, root, buf)) {
1432 *cow_ret = buf; 1392 *cow_ret = buf;
@@ -2466,6 +2426,40 @@ done:
2466 return ret; 2426 return ret;
2467} 2427}
2468 2428
2429static void key_search_validate(struct extent_buffer *b,
2430 struct btrfs_key *key,
2431 int level)
2432{
2433#ifdef CONFIG_BTRFS_ASSERT
2434 struct btrfs_disk_key disk_key;
2435
2436 btrfs_cpu_key_to_disk(&disk_key, key);
2437
2438 if (level == 0)
2439 ASSERT(!memcmp_extent_buffer(b, &disk_key,
2440 offsetof(struct btrfs_leaf, items[0].key),
2441 sizeof(disk_key)));
2442 else
2443 ASSERT(!memcmp_extent_buffer(b, &disk_key,
2444 offsetof(struct btrfs_node, ptrs[0].key),
2445 sizeof(disk_key)));
2446#endif
2447}
2448
2449static int key_search(struct extent_buffer *b, struct btrfs_key *key,
2450 int level, int *prev_cmp, int *slot)
2451{
2452 if (*prev_cmp != 0) {
2453 *prev_cmp = bin_search(b, key, level, slot);
2454 return *prev_cmp;
2455 }
2456
2457 key_search_validate(b, key, level);
2458 *slot = 0;
2459
2460 return 0;
2461}
2462
2469/* 2463/*
2470 * look for key in the tree. path is filled in with nodes along the way 2464 * look for key in the tree. path is filled in with nodes along the way
2471 * if key is found, we return zero and you can find the item in the leaf 2465 * if key is found, we return zero and you can find the item in the leaf
@@ -2494,6 +2488,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2494 int write_lock_level = 0; 2488 int write_lock_level = 0;
2495 u8 lowest_level = 0; 2489 u8 lowest_level = 0;
2496 int min_write_lock_level; 2490 int min_write_lock_level;
2491 int prev_cmp;
2497 2492
2498 lowest_level = p->lowest_level; 2493 lowest_level = p->lowest_level;
2499 WARN_ON(lowest_level && ins_len > 0); 2494 WARN_ON(lowest_level && ins_len > 0);
@@ -2524,6 +2519,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2524 min_write_lock_level = write_lock_level; 2519 min_write_lock_level = write_lock_level;
2525 2520
2526again: 2521again:
2522 prev_cmp = -1;
2527 /* 2523 /*
2528 * we try very hard to do read locks on the root 2524 * we try very hard to do read locks on the root
2529 */ 2525 */
@@ -2624,7 +2620,7 @@ cow_done:
2624 if (!cow) 2620 if (!cow)
2625 btrfs_unlock_up_safe(p, level + 1); 2621 btrfs_unlock_up_safe(p, level + 1);
2626 2622
2627 ret = bin_search(b, key, level, &slot); 2623 ret = key_search(b, key, level, &prev_cmp, &slot);
2628 2624
2629 if (level != 0) { 2625 if (level != 0) {
2630 int dec = 0; 2626 int dec = 0;
@@ -2759,6 +2755,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2759 int level; 2755 int level;
2760 int lowest_unlock = 1; 2756 int lowest_unlock = 1;
2761 u8 lowest_level = 0; 2757 u8 lowest_level = 0;
2758 int prev_cmp;
2762 2759
2763 lowest_level = p->lowest_level; 2760 lowest_level = p->lowest_level;
2764 WARN_ON(p->nodes[0] != NULL); 2761 WARN_ON(p->nodes[0] != NULL);
@@ -2769,6 +2766,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2769 } 2766 }
2770 2767
2771again: 2768again:
2769 prev_cmp = -1;
2772 b = get_old_root(root, time_seq); 2770 b = get_old_root(root, time_seq);
2773 level = btrfs_header_level(b); 2771 level = btrfs_header_level(b);
2774 p->locks[level] = BTRFS_READ_LOCK; 2772 p->locks[level] = BTRFS_READ_LOCK;
@@ -2786,7 +2784,7 @@ again:
2786 */ 2784 */
2787 btrfs_unlock_up_safe(p, level + 1); 2785 btrfs_unlock_up_safe(p, level + 1);
2788 2786
2789 ret = bin_search(b, key, level, &slot); 2787 ret = key_search(b, key, level, &prev_cmp, &slot);
2790 2788
2791 if (level != 0) { 2789 if (level != 0) {
2792 int dec = 0; 2790 int dec = 0;
@@ -2820,7 +2818,11 @@ again:
2820 btrfs_clear_path_blocking(p, b, 2818 btrfs_clear_path_blocking(p, b,
2821 BTRFS_READ_LOCK); 2819 BTRFS_READ_LOCK);
2822 } 2820 }
2823 b = tree_mod_log_rewind(root->fs_info, b, time_seq); 2821 b = tree_mod_log_rewind(root->fs_info, p, b, time_seq);
2822 if (!b) {
2823 ret = -ENOMEM;
2824 goto done;
2825 }
2824 p->locks[level] = BTRFS_READ_LOCK; 2826 p->locks[level] = BTRFS_READ_LOCK;
2825 p->nodes[level] = b; 2827 p->nodes[level] = b;
2826 } else { 2828 } else {
@@ -3143,13 +3145,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3143 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); 3145 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
3144 btrfs_set_header_owner(c, root->root_key.objectid); 3146 btrfs_set_header_owner(c, root->root_key.objectid);
3145 3147
3146 write_extent_buffer(c, root->fs_info->fsid, 3148 write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(c),
3147 (unsigned long)btrfs_header_fsid(c),
3148 BTRFS_FSID_SIZE); 3149 BTRFS_FSID_SIZE);
3149 3150
3150 write_extent_buffer(c, root->fs_info->chunk_tree_uuid, 3151 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
3151 (unsigned long)btrfs_header_chunk_tree_uuid(c), 3152 btrfs_header_chunk_tree_uuid(c), BTRFS_UUID_SIZE);
3152 BTRFS_UUID_SIZE);
3153 3153
3154 btrfs_set_node_key(c, &lower_key, 0); 3154 btrfs_set_node_key(c, &lower_key, 0);
3155 btrfs_set_node_blockptr(c, 0, lower->start); 3155 btrfs_set_node_blockptr(c, 0, lower->start);
@@ -3208,7 +3208,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
3208 } 3208 }
3209 if (level) { 3209 if (level) {
3210 ret = tree_mod_log_insert_key(root->fs_info, lower, slot, 3210 ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
3211 MOD_LOG_KEY_ADD); 3211 MOD_LOG_KEY_ADD, GFP_NOFS);
3212 BUG_ON(ret < 0); 3212 BUG_ON(ret < 0);
3213 } 3213 }
3214 btrfs_set_node_key(lower, key, slot); 3214 btrfs_set_node_key(lower, key, slot);
@@ -3284,10 +3284,9 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3284 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); 3284 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
3285 btrfs_set_header_owner(split, root->root_key.objectid); 3285 btrfs_set_header_owner(split, root->root_key.objectid);
3286 write_extent_buffer(split, root->fs_info->fsid, 3286 write_extent_buffer(split, root->fs_info->fsid,
3287 (unsigned long)btrfs_header_fsid(split), 3287 btrfs_header_fsid(split), BTRFS_FSID_SIZE);
3288 BTRFS_FSID_SIZE);
3289 write_extent_buffer(split, root->fs_info->chunk_tree_uuid, 3288 write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
3290 (unsigned long)btrfs_header_chunk_tree_uuid(split), 3289 btrfs_header_chunk_tree_uuid(split),
3291 BTRFS_UUID_SIZE); 3290 BTRFS_UUID_SIZE);
3292 3291
3293 tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); 3292 tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
@@ -4040,11 +4039,10 @@ again:
4040 btrfs_set_header_owner(right, root->root_key.objectid); 4039 btrfs_set_header_owner(right, root->root_key.objectid);
4041 btrfs_set_header_level(right, 0); 4040 btrfs_set_header_level(right, 0);
4042 write_extent_buffer(right, root->fs_info->fsid, 4041 write_extent_buffer(right, root->fs_info->fsid,
4043 (unsigned long)btrfs_header_fsid(right), 4042 btrfs_header_fsid(right), BTRFS_FSID_SIZE);
4044 BTRFS_FSID_SIZE);
4045 4043
4046 write_extent_buffer(right, root->fs_info->chunk_tree_uuid, 4044 write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
4047 (unsigned long)btrfs_header_chunk_tree_uuid(right), 4045 btrfs_header_chunk_tree_uuid(right),
4048 BTRFS_UUID_SIZE); 4046 BTRFS_UUID_SIZE);
4049 4047
4050 if (split == 0) { 4048 if (split == 0) {
@@ -4642,7 +4640,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4642 (nritems - slot - 1)); 4640 (nritems - slot - 1));
4643 } else if (level) { 4641 } else if (level) {
4644 ret = tree_mod_log_insert_key(root->fs_info, parent, slot, 4642 ret = tree_mod_log_insert_key(root->fs_info, parent, slot,
4645 MOD_LOG_KEY_REMOVE); 4643 MOD_LOG_KEY_REMOVE, GFP_NOFS);
4646 BUG_ON(ret < 0); 4644 BUG_ON(ret < 0);
4647 } 4645 }
4648 4646
@@ -4814,7 +4812,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4814 * This may release the path, and so you may lose any locks held at the 4812 * This may release the path, and so you may lose any locks held at the
4815 * time you call it. 4813 * time you call it.
4816 */ 4814 */
4817int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 4815static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
4818{ 4816{
4819 struct btrfs_key key; 4817 struct btrfs_key key;
4820 struct btrfs_disk_key found_key; 4818 struct btrfs_disk_key found_key;
@@ -5329,19 +5327,20 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
5329 goto out; 5327 goto out;
5330 advance_right = ADVANCE; 5328 advance_right = ADVANCE;
5331 } else { 5329 } else {
5330 enum btrfs_compare_tree_result cmp;
5331
5332 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); 5332 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
5333 ret = tree_compare_item(left_root, left_path, 5333 ret = tree_compare_item(left_root, left_path,
5334 right_path, tmp_buf); 5334 right_path, tmp_buf);
5335 if (ret) { 5335 if (ret)
5336 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); 5336 cmp = BTRFS_COMPARE_TREE_CHANGED;
5337 ret = changed_cb(left_root, right_root, 5337 else
5338 left_path, right_path, 5338 cmp = BTRFS_COMPARE_TREE_SAME;
5339 &left_key, 5339 ret = changed_cb(left_root, right_root,
5340 BTRFS_COMPARE_TREE_CHANGED, 5340 left_path, right_path,
5341 ctx); 5341 &left_key, cmp, ctx);
5342 if (ret < 0) 5342 if (ret < 0)
5343 goto out; 5343 goto out;
5344 }
5345 advance_left = ADVANCE; 5344 advance_left = ADVANCE;
5346 advance_right = ADVANCE; 5345 advance_right = ADVANCE;
5347 } 5346 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e795bf135e80..3c1da6f98a4d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -23,6 +23,7 @@
23#include <linux/highmem.h> 23#include <linux/highmem.h>
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/rwsem.h> 25#include <linux/rwsem.h>
26#include <linux/semaphore.h>
26#include <linux/completion.h> 27#include <linux/completion.h>
27#include <linux/backing-dev.h> 28#include <linux/backing-dev.h>
28#include <linux/wait.h> 29#include <linux/wait.h>
@@ -91,6 +92,9 @@ struct btrfs_ordered_sum;
91/* holds quota configuration and tracking */ 92/* holds quota configuration and tracking */
92#define BTRFS_QUOTA_TREE_OBJECTID 8ULL 93#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
93 94
95/* for storing items that use the BTRFS_UUID_KEY* types */
96#define BTRFS_UUID_TREE_OBJECTID 9ULL
97
94/* for storing balance parameters in the root tree */ 98/* for storing balance parameters in the root tree */
95#define BTRFS_BALANCE_OBJECTID -4ULL 99#define BTRFS_BALANCE_OBJECTID -4ULL
96 100
@@ -142,7 +146,7 @@ struct btrfs_ordered_sum;
142 146
143#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 147#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
144 148
145#define BTRFS_DEV_REPLACE_DEVID 0 149#define BTRFS_DEV_REPLACE_DEVID 0ULL
146 150
147/* 151/*
148 * the max metadata block size. This limit is somewhat artificial, 152 * the max metadata block size. This limit is somewhat artificial,
@@ -478,9 +482,10 @@ struct btrfs_super_block {
478 char label[BTRFS_LABEL_SIZE]; 482 char label[BTRFS_LABEL_SIZE];
479 483
480 __le64 cache_generation; 484 __le64 cache_generation;
485 __le64 uuid_tree_generation;
481 486
482 /* future expansion */ 487 /* future expansion */
483 __le64 reserved[31]; 488 __le64 reserved[30];
484 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; 489 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
485 struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; 490 struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
486} __attribute__ ((__packed__)); 491} __attribute__ ((__packed__));
@@ -1188,6 +1193,7 @@ enum btrfs_caching_type {
1188 BTRFS_CACHE_STARTED = 1, 1193 BTRFS_CACHE_STARTED = 1,
1189 BTRFS_CACHE_FAST = 2, 1194 BTRFS_CACHE_FAST = 2,
1190 BTRFS_CACHE_FINISHED = 3, 1195 BTRFS_CACHE_FINISHED = 3,
1196 BTRFS_CACHE_ERROR = 4,
1191}; 1197};
1192 1198
1193enum btrfs_disk_cache_state { 1199enum btrfs_disk_cache_state {
@@ -1302,6 +1308,7 @@ struct btrfs_fs_info {
1302 struct btrfs_root *fs_root; 1308 struct btrfs_root *fs_root;
1303 struct btrfs_root *csum_root; 1309 struct btrfs_root *csum_root;
1304 struct btrfs_root *quota_root; 1310 struct btrfs_root *quota_root;
1311 struct btrfs_root *uuid_root;
1305 1312
1306 /* the log root tree is a directory of all the other log roots */ 1313 /* the log root tree is a directory of all the other log roots */
1307 struct btrfs_root *log_root_tree; 1314 struct btrfs_root *log_root_tree;
@@ -1350,6 +1357,7 @@ struct btrfs_fs_info {
1350 u64 last_trans_log_full_commit; 1357 u64 last_trans_log_full_commit;
1351 unsigned long mount_opt; 1358 unsigned long mount_opt;
1352 unsigned long compress_type:4; 1359 unsigned long compress_type:4;
1360 int commit_interval;
1353 /* 1361 /*
1354 * It is a suggestive number, the read side is safe even it gets a 1362 * It is a suggestive number, the read side is safe even it gets a
1355 * wrong number because we will write out the data into a regular 1363 * wrong number because we will write out the data into a regular
@@ -1411,6 +1419,13 @@ struct btrfs_fs_info {
1411 * before jumping into the main commit. 1419 * before jumping into the main commit.
1412 */ 1420 */
1413 struct mutex ordered_operations_mutex; 1421 struct mutex ordered_operations_mutex;
1422
1423 /*
1424 * Same as ordered_operations_mutex except this is for ordered extents
1425 * and not the operations.
1426 */
1427 struct mutex ordered_extent_flush_mutex;
1428
1414 struct rw_semaphore extent_commit_sem; 1429 struct rw_semaphore extent_commit_sem;
1415 1430
1416 struct rw_semaphore cleanup_work_sem; 1431 struct rw_semaphore cleanup_work_sem;
@@ -1641,6 +1656,9 @@ struct btrfs_fs_info {
1641 struct btrfs_dev_replace dev_replace; 1656 struct btrfs_dev_replace dev_replace;
1642 1657
1643 atomic_t mutually_exclusive_operation_running; 1658 atomic_t mutually_exclusive_operation_running;
1659
1660 struct semaphore uuid_tree_rescan_sem;
1661 unsigned int update_uuid_tree_gen:1;
1644}; 1662};
1645 1663
1646/* 1664/*
@@ -1934,6 +1952,19 @@ struct btrfs_ioctl_defrag_range_args {
1934#define BTRFS_DEV_REPLACE_KEY 250 1952#define BTRFS_DEV_REPLACE_KEY 250
1935 1953
1936/* 1954/*
1955 * Stores items that allow to quickly map UUIDs to something else.
1956 * These items are part of the filesystem UUID tree.
1957 * The key is built like this:
1958 * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
1959 */
1960#if BTRFS_UUID_SIZE != 16
1961#error "UUID items require BTRFS_UUID_SIZE == 16!"
1962#endif
1963#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */
1964#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to
1965 * received subvols */
1966
1967/*
1937 * string items are for debugging. They just store a short string of 1968 * string items are for debugging. They just store a short string of
1938 * data in the FS 1969 * data in the FS
1939 */ 1970 */
@@ -1967,6 +1998,9 @@ struct btrfs_ioctl_defrag_range_args {
1967#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) 1998#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
1968#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) 1999#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
1969#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) 2000#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
2001#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23)
2002
2003#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
1970 2004
1971#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 2005#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1972#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 2006#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2130,14 +2164,14 @@ BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item,
2130BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item, 2164BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item,
2131 generation, 64); 2165 generation, 64);
2132 2166
2133static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) 2167static inline unsigned long btrfs_device_uuid(struct btrfs_dev_item *d)
2134{ 2168{
2135 return (char *)d + offsetof(struct btrfs_dev_item, uuid); 2169 return (unsigned long)d + offsetof(struct btrfs_dev_item, uuid);
2136} 2170}
2137 2171
2138static inline char *btrfs_device_fsid(struct btrfs_dev_item *d) 2172static inline unsigned long btrfs_device_fsid(struct btrfs_dev_item *d)
2139{ 2173{
2140 return (char *)d + offsetof(struct btrfs_dev_item, fsid); 2174 return (unsigned long)d + offsetof(struct btrfs_dev_item, fsid);
2141} 2175}
2142 2176
2143BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); 2177BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
@@ -2240,6 +2274,23 @@ BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
2240BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); 2274BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
2241BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); 2275BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
2242BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64); 2276BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
2277BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
2278 generation, 64);
2279BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
2280 sequence, 64);
2281BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
2282 transid, 64);
2283BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
2284BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
2285 nbytes, 64);
2286BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
2287 block_group, 64);
2288BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
2289BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
2290BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
2291BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
2292BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
2293BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
2243 2294
2244static inline struct btrfs_timespec * 2295static inline struct btrfs_timespec *
2245btrfs_inode_atime(struct btrfs_inode_item *inode_item) 2296btrfs_inode_atime(struct btrfs_inode_item *inode_item)
@@ -2267,6 +2318,8 @@ btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
2267 2318
2268BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); 2319BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
2269BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); 2320BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
2321BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
2322BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
2270 2323
2271/* struct btrfs_dev_extent */ 2324/* struct btrfs_dev_extent */
2272BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, 2325BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent,
@@ -2277,10 +2330,10 @@ BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
2277 chunk_offset, 64); 2330 chunk_offset, 64);
2278BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); 2331BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
2279 2332
2280static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) 2333static inline unsigned long btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
2281{ 2334{
2282 unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid); 2335 unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
2283 return (u8 *)((unsigned long)dev + ptr); 2336 return (unsigned long)dev + ptr;
2284} 2337}
2285 2338
2286BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64); 2339BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
@@ -2348,6 +2401,10 @@ BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
2348/* struct btrfs_node */ 2401/* struct btrfs_node */
2349BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); 2402BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
2350BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); 2403BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
2404BTRFS_SETGET_STACK_FUNCS(stack_key_blockptr, struct btrfs_key_ptr,
2405 blockptr, 64);
2406BTRFS_SETGET_STACK_FUNCS(stack_key_generation, struct btrfs_key_ptr,
2407 generation, 64);
2351 2408
2352static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) 2409static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
2353{ 2410{
@@ -2404,6 +2461,8 @@ static inline void btrfs_set_node_key(struct extent_buffer *eb,
2404/* struct btrfs_item */ 2461/* struct btrfs_item */
2405BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32); 2462BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32);
2406BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); 2463BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32);
2464BTRFS_SETGET_STACK_FUNCS(stack_item_offset, struct btrfs_item, offset, 32);
2465BTRFS_SETGET_STACK_FUNCS(stack_item_size, struct btrfs_item, size, 32);
2407 2466
2408static inline unsigned long btrfs_item_nr_offset(int nr) 2467static inline unsigned long btrfs_item_nr_offset(int nr)
2409{ 2468{
@@ -2466,6 +2525,13 @@ BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16);
2466BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); 2525BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8);
2467BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); 2526BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16);
2468BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); 2527BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64);
2528BTRFS_SETGET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8);
2529BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item,
2530 data_len, 16);
2531BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item,
2532 name_len, 16);
2533BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item,
2534 transid, 64);
2469 2535
2470static inline void btrfs_dir_item_key(struct extent_buffer *eb, 2536static inline void btrfs_dir_item_key(struct extent_buffer *eb,
2471 struct btrfs_dir_item *item, 2537 struct btrfs_dir_item *item,
@@ -2568,6 +2634,12 @@ BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64);
2568BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); 2634BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32);
2569BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64); 2635BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64);
2570BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); 2636BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8);
2637BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header,
2638 generation, 64);
2639BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64);
2640BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header,
2641 nritems, 32);
2642BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64);
2571 2643
2572static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag) 2644static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
2573{ 2645{
@@ -2603,16 +2675,14 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
2603 btrfs_set_header_flags(eb, flags); 2675 btrfs_set_header_flags(eb, flags);
2604} 2676}
2605 2677
2606static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) 2678static inline unsigned long btrfs_header_fsid(struct extent_buffer *eb)
2607{ 2679{
2608 unsigned long ptr = offsetof(struct btrfs_header, fsid); 2680 return offsetof(struct btrfs_header, fsid);
2609 return (u8 *)ptr;
2610} 2681}
2611 2682
2612static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) 2683static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
2613{ 2684{
2614 unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid); 2685 return offsetof(struct btrfs_header, chunk_tree_uuid);
2615 return (u8 *)ptr;
2616} 2686}
2617 2687
2618static inline int btrfs_is_leaf(struct extent_buffer *eb) 2688static inline int btrfs_is_leaf(struct extent_buffer *eb)
@@ -2830,6 +2900,9 @@ BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
2830 csum_type, 16); 2900 csum_type, 16);
2831BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, 2901BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
2832 cache_generation, 64); 2902 cache_generation, 64);
2903BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
2904BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
2905 uuid_tree_generation, 64);
2833 2906
2834static inline int btrfs_super_csum_size(struct btrfs_super_block *s) 2907static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
2835{ 2908{
@@ -2847,6 +2920,14 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
2847 2920
2848/* struct btrfs_file_extent_item */ 2921/* struct btrfs_file_extent_item */
2849BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); 2922BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
2923BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr,
2924 struct btrfs_file_extent_item, disk_bytenr, 64);
2925BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset,
2926 struct btrfs_file_extent_item, offset, 64);
2927BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation,
2928 struct btrfs_file_extent_item, generation, 64);
2929BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes,
2930 struct btrfs_file_extent_item, num_bytes, 64);
2850 2931
2851static inline unsigned long 2932static inline unsigned long
2852btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) 2933btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
@@ -3107,11 +3188,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
3107 struct btrfs_root *root, 3188 struct btrfs_root *root,
3108 u64 root_objectid, u64 owner, u64 offset, 3189 u64 root_objectid, u64 owner, u64 offset,
3109 struct btrfs_key *ins); 3190 struct btrfs_key *ins);
3110int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 3191int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
3111 struct btrfs_root *root, 3192 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
3112 u64 num_bytes, u64 min_alloc_size, 3193 struct btrfs_key *ins, int is_data);
3113 u64 empty_size, u64 hint_byte,
3114 struct btrfs_key *ins, int is_data);
3115int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3194int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3116 struct extent_buffer *buf, int full_backref, int for_cow); 3195 struct extent_buffer *buf, int full_backref, int for_cow);
3117int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3196int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -3175,7 +3254,7 @@ void btrfs_orphan_release_metadata(struct inode *inode);
3175int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, 3254int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
3176 struct btrfs_block_rsv *rsv, 3255 struct btrfs_block_rsv *rsv,
3177 int nitems, 3256 int nitems,
3178 u64 *qgroup_reserved); 3257 u64 *qgroup_reserved, bool use_global_rsv);
3179void btrfs_subvolume_release_metadata(struct btrfs_root *root, 3258void btrfs_subvolume_release_metadata(struct btrfs_root *root,
3180 struct btrfs_block_rsv *rsv, 3259 struct btrfs_block_rsv *rsv,
3181 u64 qgroup_reserved); 3260 u64 qgroup_reserved);
@@ -3245,6 +3324,7 @@ enum btrfs_compare_tree_result {
3245 BTRFS_COMPARE_TREE_NEW, 3324 BTRFS_COMPARE_TREE_NEW,
3246 BTRFS_COMPARE_TREE_DELETED, 3325 BTRFS_COMPARE_TREE_DELETED,
3247 BTRFS_COMPARE_TREE_CHANGED, 3326 BTRFS_COMPARE_TREE_CHANGED,
3327 BTRFS_COMPARE_TREE_SAME,
3248}; 3328};
3249typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root, 3329typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root,
3250 struct btrfs_root *right_root, 3330 struct btrfs_root *right_root,
@@ -3380,6 +3460,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
3380 kfree(fs_info->dev_root); 3460 kfree(fs_info->dev_root);
3381 kfree(fs_info->csum_root); 3461 kfree(fs_info->csum_root);
3382 kfree(fs_info->quota_root); 3462 kfree(fs_info->quota_root);
3463 kfree(fs_info->uuid_root);
3383 kfree(fs_info->super_copy); 3464 kfree(fs_info->super_copy);
3384 kfree(fs_info->super_for_commit); 3465 kfree(fs_info->super_for_commit);
3385 kfree(fs_info); 3466 kfree(fs_info);
@@ -3414,8 +3495,6 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
3414 struct btrfs_root *root, 3495 struct btrfs_root *root,
3415 struct btrfs_key *key, 3496 struct btrfs_key *key,
3416 struct btrfs_root_item *item); 3497 struct btrfs_root_item *item);
3417void btrfs_read_root_item(struct extent_buffer *eb, int slot,
3418 struct btrfs_root_item *item);
3419int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key, 3498int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key,
3420 struct btrfs_path *path, struct btrfs_root_item *root_item, 3499 struct btrfs_path *path, struct btrfs_root_item *root_item,
3421 struct btrfs_key *root_key); 3500 struct btrfs_key *root_key);
@@ -3426,6 +3505,17 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
3426void btrfs_update_root_times(struct btrfs_trans_handle *trans, 3505void btrfs_update_root_times(struct btrfs_trans_handle *trans,
3427 struct btrfs_root *root); 3506 struct btrfs_root *root);
3428 3507
3508/* uuid-tree.c */
3509int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
3510 struct btrfs_root *uuid_root, u8 *uuid, u8 type,
3511 u64 subid);
3512int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
3513 struct btrfs_root *uuid_root, u8 *uuid, u8 type,
3514 u64 subid);
3515int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
3516 int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
3517 u64));
3518
3429/* dir-item.c */ 3519/* dir-item.c */
3430int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, 3520int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
3431 const char *name, int name_len); 3521 const char *name, int name_len);
@@ -3509,12 +3599,14 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
3509 struct btrfs_inode_extref **extref_ret); 3599 struct btrfs_inode_extref **extref_ret);
3510 3600
3511/* file-item.c */ 3601/* file-item.c */
3602struct btrfs_dio_private;
3512int btrfs_del_csums(struct btrfs_trans_handle *trans, 3603int btrfs_del_csums(struct btrfs_trans_handle *trans,
3513 struct btrfs_root *root, u64 bytenr, u64 len); 3604 struct btrfs_root *root, u64 bytenr, u64 len);
3514int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, 3605int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
3515 struct bio *bio, u32 *dst); 3606 struct bio *bio, u32 *dst);
3516int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, 3607int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
3517 struct bio *bio, u64 logical_offset); 3608 struct btrfs_dio_private *dip, struct bio *bio,
3609 u64 logical_offset);
3518int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, 3610int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
3519 struct btrfs_root *root, 3611 struct btrfs_root *root,
3520 u64 objectid, u64 pos, 3612 u64 objectid, u64 pos,
@@ -3552,8 +3644,7 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
3552struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, 3644struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
3553 size_t pg_offset, u64 start, u64 len, 3645 size_t pg_offset, u64 start, u64 len,
3554 int create); 3646 int create);
3555noinline int can_nocow_extent(struct btrfs_trans_handle *trans, 3647noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
3556 struct inode *inode, u64 offset, u64 *len,
3557 u64 *orig_start, u64 *orig_block_len, 3648 u64 *orig_start, u64 *orig_block_len,
3558 u64 *ram_bytes); 3649 u64 *ram_bytes);
3559 3650
@@ -3643,11 +3734,15 @@ extern const struct dentry_operations btrfs_dentry_operations;
3643long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 3734long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
3644void btrfs_update_iflags(struct inode *inode); 3735void btrfs_update_iflags(struct inode *inode);
3645void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); 3736void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
3737int btrfs_is_empty_uuid(u8 *uuid);
3646int btrfs_defrag_file(struct inode *inode, struct file *file, 3738int btrfs_defrag_file(struct inode *inode, struct file *file,
3647 struct btrfs_ioctl_defrag_range_args *range, 3739 struct btrfs_ioctl_defrag_range_args *range,
3648 u64 newer_than, unsigned long max_pages); 3740 u64 newer_than, unsigned long max_pages);
3649void btrfs_get_block_group_info(struct list_head *groups_list, 3741void btrfs_get_block_group_info(struct list_head *groups_list,
3650 struct btrfs_ioctl_space_info *space); 3742 struct btrfs_ioctl_space_info *space);
3743void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
3744 struct btrfs_ioctl_balance_args *bargs);
3745
3651 3746
3652/* file.c */ 3747/* file.c */
3653int btrfs_auto_defrag_init(void); 3748int btrfs_auto_defrag_init(void);
@@ -3720,6 +3815,22 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
3720#define btrfs_debug(fs_info, fmt, args...) \ 3815#define btrfs_debug(fs_info, fmt, args...) \
3721 btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) 3816 btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
3722 3817
3818#ifdef CONFIG_BTRFS_ASSERT
3819
3820static inline void assfail(char *expr, char *file, int line)
3821{
3822 printk(KERN_ERR "BTRFS assertion failed: %s, file: %s, line: %d",
3823 expr, file, line);
3824 BUG();
3825}
3826
3827#define ASSERT(expr) \
3828 (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
3829#else
3830#define ASSERT(expr) ((void)0)
3831#endif
3832
3833#define btrfs_assert()
3723__printf(5, 6) 3834__printf(5, 6)
3724void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, 3835void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
3725 unsigned int line, int errno, const char *fmt, ...); 3836 unsigned int line, int errno, const char *fmt, ...);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 375510913fe7..cbd9523ad09c 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -21,6 +21,7 @@
21#include "delayed-inode.h" 21#include "delayed-inode.h"
22#include "disk-io.h" 22#include "disk-io.h"
23#include "transaction.h" 23#include "transaction.h"
24#include "ctree.h"
24 25
25#define BTRFS_DELAYED_WRITEBACK 512 26#define BTRFS_DELAYED_WRITEBACK 512
26#define BTRFS_DELAYED_BACKGROUND 128 27#define BTRFS_DELAYED_BACKGROUND 128
@@ -1453,10 +1454,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1453 1454
1454 dir_item = (struct btrfs_dir_item *)delayed_item->data; 1455 dir_item = (struct btrfs_dir_item *)delayed_item->data;
1455 dir_item->location = *disk_key; 1456 dir_item->location = *disk_key;
1456 dir_item->transid = cpu_to_le64(trans->transid); 1457 btrfs_set_stack_dir_transid(dir_item, trans->transid);
1457 dir_item->data_len = 0; 1458 btrfs_set_stack_dir_data_len(dir_item, 0);
1458 dir_item->name_len = cpu_to_le16(name_len); 1459 btrfs_set_stack_dir_name_len(dir_item, name_len);
1459 dir_item->type = type; 1460 btrfs_set_stack_dir_type(dir_item, type);
1460 memcpy((char *)(dir_item + 1), name, name_len); 1461 memcpy((char *)(dir_item + 1), name, name_len);
1461 1462
1462 ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item); 1463 ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
@@ -1470,13 +1471,11 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1470 mutex_lock(&delayed_node->mutex); 1471 mutex_lock(&delayed_node->mutex);
1471 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); 1472 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1472 if (unlikely(ret)) { 1473 if (unlikely(ret)) {
1473 printk(KERN_ERR "err add delayed dir index item(name: %s) into " 1474 printk(KERN_ERR "err add delayed dir index item(name: %.*s) "
1474 "the insertion tree of the delayed node" 1475 "into the insertion tree of the delayed node"
1475 "(root id: %llu, inode id: %llu, errno: %d)\n", 1476 "(root id: %llu, inode id: %llu, errno: %d)\n",
1476 name, 1477 name_len, name, delayed_node->root->objectid,
1477 (unsigned long long)delayed_node->root->objectid, 1478 delayed_node->inode_id, ret);
1478 (unsigned long long)delayed_node->inode_id,
1479 ret);
1480 BUG(); 1479 BUG();
1481 } 1480 }
1482 mutex_unlock(&delayed_node->mutex); 1481 mutex_unlock(&delayed_node->mutex);
@@ -1547,9 +1546,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1547 printk(KERN_ERR "err add delayed dir index item(index: %llu) " 1546 printk(KERN_ERR "err add delayed dir index item(index: %llu) "
1548 "into the deletion tree of the delayed node" 1547 "into the deletion tree of the delayed node"
1549 "(root id: %llu, inode id: %llu, errno: %d)\n", 1548 "(root id: %llu, inode id: %llu, errno: %d)\n",
1550 (unsigned long long)index, 1549 index, node->root->objectid, node->inode_id,
1551 (unsigned long long)node->root->objectid,
1552 (unsigned long long)node->inode_id,
1553 ret); 1550 ret);
1554 BUG(); 1551 BUG();
1555 } 1552 }
@@ -1699,7 +1696,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1699 1696
1700 di = (struct btrfs_dir_item *)curr->data; 1697 di = (struct btrfs_dir_item *)curr->data;
1701 name = (char *)(di + 1); 1698 name = (char *)(di + 1);
1702 name_len = le16_to_cpu(di->name_len); 1699 name_len = btrfs_stack_dir_name_len(di);
1703 1700
1704 d_type = btrfs_filetype_table[di->type]; 1701 d_type = btrfs_filetype_table[di->type];
1705 btrfs_disk_key_to_cpu(&location, &di->location); 1702 btrfs_disk_key_to_cpu(&location, &di->location);
@@ -1716,27 +1713,6 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1716 return 0; 1713 return 0;
1717} 1714}
1718 1715
1719BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
1720 generation, 64);
1721BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
1722 sequence, 64);
1723BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
1724 transid, 64);
1725BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
1726BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
1727 nbytes, 64);
1728BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
1729 block_group, 64);
1730BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
1731BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
1732BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
1733BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
1734BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
1735BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
1736
1737BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
1738BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
1739
1740static void fill_stack_inode_item(struct btrfs_trans_handle *trans, 1716static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1741 struct btrfs_inode_item *inode_item, 1717 struct btrfs_inode_item *inode_item,
1742 struct inode *inode) 1718 struct inode *inode)
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index c219463fb1fd..e4d467be2dd4 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -241,7 +241,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
241 return 0; 241 return 0;
242} 242}
243 243
244static void inline drop_delayed_ref(struct btrfs_trans_handle *trans, 244static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
245 struct btrfs_delayed_ref_root *delayed_refs, 245 struct btrfs_delayed_ref_root *delayed_refs,
246 struct btrfs_delayed_ref_node *ref) 246 struct btrfs_delayed_ref_node *ref)
247{ 247{
@@ -600,7 +600,7 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
600 INIT_LIST_HEAD(&head_ref->cluster); 600 INIT_LIST_HEAD(&head_ref->cluster);
601 mutex_init(&head_ref->mutex); 601 mutex_init(&head_ref->mutex);
602 602
603 trace_btrfs_delayed_ref_head(ref, head_ref, action); 603 trace_add_delayed_ref_head(ref, head_ref, action);
604 604
605 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 605 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
606 606
@@ -661,7 +661,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
661 ref->type = BTRFS_TREE_BLOCK_REF_KEY; 661 ref->type = BTRFS_TREE_BLOCK_REF_KEY;
662 full_ref->level = level; 662 full_ref->level = level;
663 663
664 trace_btrfs_delayed_tree_ref(ref, full_ref, action); 664 trace_add_delayed_tree_ref(ref, full_ref, action);
665 665
666 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 666 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
667 667
@@ -722,7 +722,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
722 full_ref->objectid = owner; 722 full_ref->objectid = owner;
723 full_ref->offset = offset; 723 full_ref->offset = offset;
724 724
725 trace_btrfs_delayed_data_ref(ref, full_ref, action); 725 trace_add_delayed_data_ref(ref, full_ref, action);
726 726
727 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 727 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
728 728
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5f8f3341c099..a64435359385 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -148,13 +148,13 @@ no_valid_dev_replace_entry_found:
148 !btrfs_test_opt(dev_root, DEGRADED)) { 148 !btrfs_test_opt(dev_root, DEGRADED)) {
149 ret = -EIO; 149 ret = -EIO;
150 pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n", 150 pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n",
151 (unsigned long long)src_devid); 151 src_devid);
152 } 152 }
153 if (!dev_replace->tgtdev && 153 if (!dev_replace->tgtdev &&
154 !btrfs_test_opt(dev_root, DEGRADED)) { 154 !btrfs_test_opt(dev_root, DEGRADED)) {
155 ret = -EIO; 155 ret = -EIO;
156 pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n", 156 pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n",
157 (unsigned long long)BTRFS_DEV_REPLACE_DEVID); 157 BTRFS_DEV_REPLACE_DEVID);
158 } 158 }
159 if (dev_replace->tgtdev) { 159 if (dev_replace->tgtdev) {
160 if (dev_replace->srcdev) { 160 if (dev_replace->srcdev) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6b092a1c4e37..4cbb00af92ff 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -31,6 +31,7 @@
31#include <linux/migrate.h> 31#include <linux/migrate.h>
32#include <linux/ratelimit.h> 32#include <linux/ratelimit.h>
33#include <linux/uuid.h> 33#include <linux/uuid.h>
34#include <linux/semaphore.h>
34#include <asm/unaligned.h> 35#include <asm/unaligned.h>
35#include "compat.h" 36#include "compat.h"
36#include "ctree.h" 37#include "ctree.h"
@@ -302,9 +303,8 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
302 printk_ratelimited(KERN_INFO "btrfs: %s checksum verify " 303 printk_ratelimited(KERN_INFO "btrfs: %s checksum verify "
303 "failed on %llu wanted %X found %X " 304 "failed on %llu wanted %X found %X "
304 "level %d\n", 305 "level %d\n",
305 root->fs_info->sb->s_id, 306 root->fs_info->sb->s_id, buf->start,
306 (unsigned long long)buf->start, val, found, 307 val, found, btrfs_header_level(buf));
307 btrfs_header_level(buf));
308 if (result != (char *)&inline_result) 308 if (result != (char *)&inline_result)
309 kfree(result); 309 kfree(result);
310 return 1; 310 return 1;
@@ -345,9 +345,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
345 } 345 }
346 printk_ratelimited("parent transid verify failed on %llu wanted %llu " 346 printk_ratelimited("parent transid verify failed on %llu wanted %llu "
347 "found %llu\n", 347 "found %llu\n",
348 (unsigned long long)eb->start, 348 eb->start, parent_transid, btrfs_header_generation(eb));
349 (unsigned long long)parent_transid,
350 (unsigned long long)btrfs_header_generation(eb));
351 ret = 1; 349 ret = 1;
352 clear_extent_buffer_uptodate(eb); 350 clear_extent_buffer_uptodate(eb);
353out: 351out:
@@ -497,8 +495,7 @@ static int check_tree_block_fsid(struct btrfs_root *root,
497 u8 fsid[BTRFS_UUID_SIZE]; 495 u8 fsid[BTRFS_UUID_SIZE];
498 int ret = 1; 496 int ret = 1;
499 497
500 read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb), 498 read_extent_buffer(eb, fsid, btrfs_header_fsid(eb), BTRFS_FSID_SIZE);
501 BTRFS_FSID_SIZE);
502 while (fs_devices) { 499 while (fs_devices) {
503 if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { 500 if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) {
504 ret = 0; 501 ret = 0;
@@ -512,8 +509,7 @@ static int check_tree_block_fsid(struct btrfs_root *root,
512#define CORRUPT(reason, eb, root, slot) \ 509#define CORRUPT(reason, eb, root, slot) \
513 printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ 510 printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
514 "root=%llu, slot=%d\n", reason, \ 511 "root=%llu, slot=%d\n", reason, \
515 (unsigned long long)btrfs_header_bytenr(eb), \ 512 btrfs_header_bytenr(eb), root->objectid, slot)
516 (unsigned long long)root->objectid, slot)
517 513
518static noinline int check_leaf(struct btrfs_root *root, 514static noinline int check_leaf(struct btrfs_root *root,
519 struct extent_buffer *leaf) 515 struct extent_buffer *leaf)
@@ -576,8 +572,9 @@ static noinline int check_leaf(struct btrfs_root *root,
576 return 0; 572 return 0;
577} 573}
578 574
579static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 575static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
580 struct extent_state *state, int mirror) 576 u64 phy_offset, struct page *page,
577 u64 start, u64 end, int mirror)
581{ 578{
582 struct extent_io_tree *tree; 579 struct extent_io_tree *tree;
583 u64 found_start; 580 u64 found_start;
@@ -612,14 +609,13 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
612 if (found_start != eb->start) { 609 if (found_start != eb->start) {
613 printk_ratelimited(KERN_INFO "btrfs bad tree block start " 610 printk_ratelimited(KERN_INFO "btrfs bad tree block start "
614 "%llu %llu\n", 611 "%llu %llu\n",
615 (unsigned long long)found_start, 612 found_start, eb->start);
616 (unsigned long long)eb->start);
617 ret = -EIO; 613 ret = -EIO;
618 goto err; 614 goto err;
619 } 615 }
620 if (check_tree_block_fsid(root, eb)) { 616 if (check_tree_block_fsid(root, eb)) {
621 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", 617 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
622 (unsigned long long)eb->start); 618 eb->start);
623 ret = -EIO; 619 ret = -EIO;
624 goto err; 620 goto err;
625 } 621 }
@@ -1148,6 +1144,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
1148 return NULL; 1144 return NULL;
1149 1145
1150 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 1146 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
1147 if (ret) {
1148 free_extent_buffer(buf);
1149 return NULL;
1150 }
1151 return buf; 1151 return buf;
1152 1152
1153} 1153}
@@ -1291,11 +1291,10 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1291 btrfs_set_header_owner(leaf, objectid); 1291 btrfs_set_header_owner(leaf, objectid);
1292 root->node = leaf; 1292 root->node = leaf;
1293 1293
1294 write_extent_buffer(leaf, fs_info->fsid, 1294 write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(leaf),
1295 (unsigned long)btrfs_header_fsid(leaf),
1296 BTRFS_FSID_SIZE); 1295 BTRFS_FSID_SIZE);
1297 write_extent_buffer(leaf, fs_info->chunk_tree_uuid, 1296 write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
1298 (unsigned long)btrfs_header_chunk_tree_uuid(leaf), 1297 btrfs_header_chunk_tree_uuid(leaf),
1299 BTRFS_UUID_SIZE); 1298 BTRFS_UUID_SIZE);
1300 btrfs_mark_buffer_dirty(leaf); 1299 btrfs_mark_buffer_dirty(leaf);
1301 1300
@@ -1379,8 +1378,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
1379 root->node = leaf; 1378 root->node = leaf;
1380 1379
1381 write_extent_buffer(root->node, root->fs_info->fsid, 1380 write_extent_buffer(root->node, root->fs_info->fsid,
1382 (unsigned long)btrfs_header_fsid(root->node), 1381 btrfs_header_fsid(root->node), BTRFS_FSID_SIZE);
1383 BTRFS_FSID_SIZE);
1384 btrfs_mark_buffer_dirty(root->node); 1382 btrfs_mark_buffer_dirty(root->node);
1385 btrfs_tree_unlock(root->node); 1383 btrfs_tree_unlock(root->node);
1386 return root; 1384 return root;
@@ -1413,11 +1411,11 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
1413 log_root->root_key.offset = root->root_key.objectid; 1411 log_root->root_key.offset = root->root_key.objectid;
1414 1412
1415 inode_item = &log_root->root_item.inode; 1413 inode_item = &log_root->root_item.inode;
1416 inode_item->generation = cpu_to_le64(1); 1414 btrfs_set_stack_inode_generation(inode_item, 1);
1417 inode_item->size = cpu_to_le64(3); 1415 btrfs_set_stack_inode_size(inode_item, 3);
1418 inode_item->nlink = cpu_to_le32(1); 1416 btrfs_set_stack_inode_nlink(inode_item, 1);
1419 inode_item->nbytes = cpu_to_le64(root->leafsize); 1417 btrfs_set_stack_inode_nbytes(inode_item, root->leafsize);
1420 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 1418 btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
1421 1419
1422 btrfs_set_root_node(&log_root->root_item, log_root->node); 1420 btrfs_set_root_node(&log_root->root_item, log_root->node);
1423 1421
@@ -1428,8 +1426,8 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
1428 return 0; 1426 return 0;
1429} 1427}
1430 1428
1431struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, 1429static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
1432 struct btrfs_key *key) 1430 struct btrfs_key *key)
1433{ 1431{
1434 struct btrfs_root *root; 1432 struct btrfs_root *root;
1435 struct btrfs_fs_info *fs_info = tree_root->fs_info; 1433 struct btrfs_fs_info *fs_info = tree_root->fs_info;
@@ -1529,8 +1527,8 @@ fail:
1529 return ret; 1527 return ret;
1530} 1528}
1531 1529
1532struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, 1530static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
1533 u64 root_id) 1531 u64 root_id)
1534{ 1532{
1535 struct btrfs_root *root; 1533 struct btrfs_root *root;
1536 1534
@@ -1581,10 +1579,16 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1581 if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID) 1579 if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
1582 return fs_info->quota_root ? fs_info->quota_root : 1580 return fs_info->quota_root ? fs_info->quota_root :
1583 ERR_PTR(-ENOENT); 1581 ERR_PTR(-ENOENT);
1582 if (location->objectid == BTRFS_UUID_TREE_OBJECTID)
1583 return fs_info->uuid_root ? fs_info->uuid_root :
1584 ERR_PTR(-ENOENT);
1584again: 1585again:
1585 root = btrfs_lookup_fs_root(fs_info, location->objectid); 1586 root = btrfs_lookup_fs_root(fs_info, location->objectid);
1586 if (root) 1587 if (root) {
1588 if (btrfs_root_refs(&root->root_item) == 0)
1589 return ERR_PTR(-ENOENT);
1587 return root; 1590 return root;
1591 }
1588 1592
1589 root = btrfs_read_fs_root(fs_info->tree_root, location); 1593 root = btrfs_read_fs_root(fs_info->tree_root, location);
1590 if (IS_ERR(root)) 1594 if (IS_ERR(root))
@@ -1737,7 +1741,7 @@ static int transaction_kthread(void *arg)
1737 1741
1738 do { 1742 do {
1739 cannot_commit = false; 1743 cannot_commit = false;
1740 delay = HZ * 30; 1744 delay = HZ * root->fs_info->commit_interval;
1741 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1745 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1742 1746
1743 spin_lock(&root->fs_info->trans_lock); 1747 spin_lock(&root->fs_info->trans_lock);
@@ -1749,7 +1753,8 @@ static int transaction_kthread(void *arg)
1749 1753
1750 now = get_seconds(); 1754 now = get_seconds();
1751 if (cur->state < TRANS_STATE_BLOCKED && 1755 if (cur->state < TRANS_STATE_BLOCKED &&
1752 (now < cur->start_time || now - cur->start_time < 30)) { 1756 (now < cur->start_time ||
1757 now - cur->start_time < root->fs_info->commit_interval)) {
1753 spin_unlock(&root->fs_info->trans_lock); 1758 spin_unlock(&root->fs_info->trans_lock);
1754 delay = HZ * 5; 1759 delay = HZ * 5;
1755 goto sleep; 1760 goto sleep;
@@ -2038,6 +2043,12 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
2038 info->quota_root->node = NULL; 2043 info->quota_root->node = NULL;
2039 info->quota_root->commit_root = NULL; 2044 info->quota_root->commit_root = NULL;
2040 } 2045 }
2046 if (info->uuid_root) {
2047 free_extent_buffer(info->uuid_root->node);
2048 free_extent_buffer(info->uuid_root->commit_root);
2049 info->uuid_root->node = NULL;
2050 info->uuid_root->commit_root = NULL;
2051 }
2041 if (chunk_root) { 2052 if (chunk_root) {
2042 free_extent_buffer(info->chunk_root->node); 2053 free_extent_buffer(info->chunk_root->node);
2043 free_extent_buffer(info->chunk_root->commit_root); 2054 free_extent_buffer(info->chunk_root->commit_root);
@@ -2098,11 +2109,14 @@ int open_ctree(struct super_block *sb,
2098 struct btrfs_root *chunk_root; 2109 struct btrfs_root *chunk_root;
2099 struct btrfs_root *dev_root; 2110 struct btrfs_root *dev_root;
2100 struct btrfs_root *quota_root; 2111 struct btrfs_root *quota_root;
2112 struct btrfs_root *uuid_root;
2101 struct btrfs_root *log_tree_root; 2113 struct btrfs_root *log_tree_root;
2102 int ret; 2114 int ret;
2103 int err = -EINVAL; 2115 int err = -EINVAL;
2104 int num_backups_tried = 0; 2116 int num_backups_tried = 0;
2105 int backup_index = 0; 2117 int backup_index = 0;
2118 bool create_uuid_tree;
2119 bool check_uuid_tree;
2106 2120
2107 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); 2121 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
2108 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); 2122 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
@@ -2189,6 +2203,7 @@ int open_ctree(struct super_block *sb,
2189 fs_info->defrag_inodes = RB_ROOT; 2203 fs_info->defrag_inodes = RB_ROOT;
2190 fs_info->free_chunk_space = 0; 2204 fs_info->free_chunk_space = 0;
2191 fs_info->tree_mod_log = RB_ROOT; 2205 fs_info->tree_mod_log = RB_ROOT;
2206 fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
2192 2207
2193 /* readahead state */ 2208 /* readahead state */
2194 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 2209 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
@@ -2270,6 +2285,7 @@ int open_ctree(struct super_block *sb,
2270 2285
2271 2286
2272 mutex_init(&fs_info->ordered_operations_mutex); 2287 mutex_init(&fs_info->ordered_operations_mutex);
2288 mutex_init(&fs_info->ordered_extent_flush_mutex);
2273 mutex_init(&fs_info->tree_log_mutex); 2289 mutex_init(&fs_info->tree_log_mutex);
2274 mutex_init(&fs_info->chunk_mutex); 2290 mutex_init(&fs_info->chunk_mutex);
2275 mutex_init(&fs_info->transaction_kthread_mutex); 2291 mutex_init(&fs_info->transaction_kthread_mutex);
@@ -2278,6 +2294,7 @@ int open_ctree(struct super_block *sb,
2278 init_rwsem(&fs_info->extent_commit_sem); 2294 init_rwsem(&fs_info->extent_commit_sem);
2279 init_rwsem(&fs_info->cleanup_work_sem); 2295 init_rwsem(&fs_info->cleanup_work_sem);
2280 init_rwsem(&fs_info->subvol_sem); 2296 init_rwsem(&fs_info->subvol_sem);
2297 sema_init(&fs_info->uuid_tree_rescan_sem, 1);
2281 fs_info->dev_replace.lock_owner = 0; 2298 fs_info->dev_replace.lock_owner = 0;
2282 atomic_set(&fs_info->dev_replace.nesting_level, 0); 2299 atomic_set(&fs_info->dev_replace.nesting_level, 0);
2283 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); 2300 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
@@ -2383,7 +2400,7 @@ int open_ctree(struct super_block *sb,
2383 if (features) { 2400 if (features) {
2384 printk(KERN_ERR "BTRFS: couldn't mount because of " 2401 printk(KERN_ERR "BTRFS: couldn't mount because of "
2385 "unsupported optional features (%Lx).\n", 2402 "unsupported optional features (%Lx).\n",
2386 (unsigned long long)features); 2403 features);
2387 err = -EINVAL; 2404 err = -EINVAL;
2388 goto fail_alloc; 2405 goto fail_alloc;
2389 } 2406 }
@@ -2453,7 +2470,7 @@ int open_ctree(struct super_block *sb,
2453 if (!(sb->s_flags & MS_RDONLY) && features) { 2470 if (!(sb->s_flags & MS_RDONLY) && features) {
2454 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " 2471 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
2455 "unsupported option features (%Lx).\n", 2472 "unsupported option features (%Lx).\n",
2456 (unsigned long long)features); 2473 features);
2457 err = -EINVAL; 2474 err = -EINVAL;
2458 goto fail_alloc; 2475 goto fail_alloc;
2459 } 2476 }
@@ -2466,20 +2483,17 @@ int open_ctree(struct super_block *sb,
2466 &fs_info->generic_worker); 2483 &fs_info->generic_worker);
2467 2484
2468 btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", 2485 btrfs_init_workers(&fs_info->delalloc_workers, "delalloc",
2469 fs_info->thread_pool_size, 2486 fs_info->thread_pool_size, NULL);
2470 &fs_info->generic_worker);
2471 2487
2472 btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", 2488 btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc",
2473 fs_info->thread_pool_size, 2489 fs_info->thread_pool_size, NULL);
2474 &fs_info->generic_worker);
2475 2490
2476 btrfs_init_workers(&fs_info->submit_workers, "submit", 2491 btrfs_init_workers(&fs_info->submit_workers, "submit",
2477 min_t(u64, fs_devices->num_devices, 2492 min_t(u64, fs_devices->num_devices,
2478 fs_info->thread_pool_size), 2493 fs_info->thread_pool_size), NULL);
2479 &fs_info->generic_worker);
2480 2494
2481 btrfs_init_workers(&fs_info->caching_workers, "cache", 2495 btrfs_init_workers(&fs_info->caching_workers, "cache",
2482 2, &fs_info->generic_worker); 2496 fs_info->thread_pool_size, NULL);
2483 2497
2484 /* a higher idle thresh on the submit workers makes it much more 2498 /* a higher idle thresh on the submit workers makes it much more
2485 * likely that bios will be send down in a sane order to the 2499 * likely that bios will be send down in a sane order to the
@@ -2575,7 +2589,7 @@ int open_ctree(struct super_block *sb,
2575 sb->s_blocksize = sectorsize; 2589 sb->s_blocksize = sectorsize;
2576 sb->s_blocksize_bits = blksize_bits(sectorsize); 2590 sb->s_blocksize_bits = blksize_bits(sectorsize);
2577 2591
2578 if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) { 2592 if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
2579 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); 2593 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
2580 goto fail_sb_buffer; 2594 goto fail_sb_buffer;
2581 } 2595 }
@@ -2615,8 +2629,7 @@ int open_ctree(struct super_block *sb,
2615 chunk_root->commit_root = btrfs_root_node(chunk_root); 2629 chunk_root->commit_root = btrfs_root_node(chunk_root);
2616 2630
2617 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, 2631 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
2618 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), 2632 btrfs_header_chunk_tree_uuid(chunk_root->node), BTRFS_UUID_SIZE);
2619 BTRFS_UUID_SIZE);
2620 2633
2621 ret = btrfs_read_chunk_tree(chunk_root); 2634 ret = btrfs_read_chunk_tree(chunk_root);
2622 if (ret) { 2635 if (ret) {
@@ -2696,6 +2709,22 @@ retry_root_backup:
2696 fs_info->quota_root = quota_root; 2709 fs_info->quota_root = quota_root;
2697 } 2710 }
2698 2711
2712 location.objectid = BTRFS_UUID_TREE_OBJECTID;
2713 uuid_root = btrfs_read_tree_root(tree_root, &location);
2714 if (IS_ERR(uuid_root)) {
2715 ret = PTR_ERR(uuid_root);
2716 if (ret != -ENOENT)
2717 goto recovery_tree_root;
2718 create_uuid_tree = true;
2719 check_uuid_tree = false;
2720 } else {
2721 uuid_root->track_dirty = 1;
2722 fs_info->uuid_root = uuid_root;
2723 create_uuid_tree = false;
2724 check_uuid_tree =
2725 generation != btrfs_super_uuid_tree_generation(disk_super);
2726 }
2727
2699 fs_info->generation = generation; 2728 fs_info->generation = generation;
2700 fs_info->last_trans_committed = generation; 2729 fs_info->last_trans_committed = generation;
2701 2730
@@ -2882,6 +2911,29 @@ retry_root_backup:
2882 2911
2883 btrfs_qgroup_rescan_resume(fs_info); 2912 btrfs_qgroup_rescan_resume(fs_info);
2884 2913
2914 if (create_uuid_tree) {
2915 pr_info("btrfs: creating UUID tree\n");
2916 ret = btrfs_create_uuid_tree(fs_info);
2917 if (ret) {
2918 pr_warn("btrfs: failed to create the UUID tree %d\n",
2919 ret);
2920 close_ctree(tree_root);
2921 return ret;
2922 }
2923 } else if (check_uuid_tree ||
2924 btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) {
2925 pr_info("btrfs: checking UUID tree\n");
2926 ret = btrfs_check_uuid_tree(fs_info);
2927 if (ret) {
2928 pr_warn("btrfs: failed to check the UUID tree %d\n",
2929 ret);
2930 close_ctree(tree_root);
2931 return ret;
2932 }
2933 } else {
2934 fs_info->update_uuid_tree_gen = 1;
2935 }
2936
2885 return 0; 2937 return 0;
2886 2938
2887fail_qgroup: 2939fail_qgroup:
@@ -2983,15 +3035,17 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
2983 */ 3035 */
2984 for (i = 0; i < 1; i++) { 3036 for (i = 0; i < 1; i++) {
2985 bytenr = btrfs_sb_offset(i); 3037 bytenr = btrfs_sb_offset(i);
2986 if (bytenr + 4096 >= i_size_read(bdev->bd_inode)) 3038 if (bytenr + BTRFS_SUPER_INFO_SIZE >=
3039 i_size_read(bdev->bd_inode))
2987 break; 3040 break;
2988 bh = __bread(bdev, bytenr / 4096, 4096); 3041 bh = __bread(bdev, bytenr / 4096,
3042 BTRFS_SUPER_INFO_SIZE);
2989 if (!bh) 3043 if (!bh)
2990 continue; 3044 continue;
2991 3045
2992 super = (struct btrfs_super_block *)bh->b_data; 3046 super = (struct btrfs_super_block *)bh->b_data;
2993 if (btrfs_super_bytenr(super) != bytenr || 3047 if (btrfs_super_bytenr(super) != bytenr ||
2994 super->magic != cpu_to_le64(BTRFS_MAGIC)) { 3048 btrfs_super_magic(super) != BTRFS_MAGIC) {
2995 brelse(bh); 3049 brelse(bh);
2996 continue; 3050 continue;
2997 } 3051 }
@@ -3311,7 +3365,6 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3311 int total_errors = 0; 3365 int total_errors = 0;
3312 u64 flags; 3366 u64 flags;
3313 3367
3314 max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
3315 do_barriers = !btrfs_test_opt(root, NOBARRIER); 3368 do_barriers = !btrfs_test_opt(root, NOBARRIER);
3316 backup_super_roots(root->fs_info); 3369 backup_super_roots(root->fs_info);
3317 3370
@@ -3320,6 +3373,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3320 3373
3321 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 3374 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
3322 head = &root->fs_info->fs_devices->devices; 3375 head = &root->fs_info->fs_devices->devices;
3376 max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
3323 3377
3324 if (do_barriers) { 3378 if (do_barriers) {
3325 ret = barrier_all_devices(root->fs_info); 3379 ret = barrier_all_devices(root->fs_info);
@@ -3362,8 +3416,10 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3362 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 3416 printk(KERN_ERR "btrfs: %d errors while writing supers\n",
3363 total_errors); 3417 total_errors);
3364 3418
3365 /* This shouldn't happen. FUA is masked off if unsupported */ 3419 /* FUA is masked off if unsupported and can't be the reason */
3366 BUG(); 3420 btrfs_error(root->fs_info, -EIO,
3421 "%d errors while writing supers", total_errors);
3422 return -EIO;
3367 } 3423 }
3368 3424
3369 total_errors = 0; 3425 total_errors = 0;
@@ -3421,6 +3477,8 @@ static void free_fs_root(struct btrfs_root *root)
3421{ 3477{
3422 iput(root->cache_inode); 3478 iput(root->cache_inode);
3423 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); 3479 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
3480 btrfs_free_block_rsv(root, root->orphan_block_rsv);
3481 root->orphan_block_rsv = NULL;
3424 if (root->anon_dev) 3482 if (root->anon_dev)
3425 free_anon_bdev(root->anon_dev); 3483 free_anon_bdev(root->anon_dev);
3426 free_extent_buffer(root->node); 3484 free_extent_buffer(root->node);
@@ -3510,6 +3568,11 @@ int close_ctree(struct btrfs_root *root)
3510 fs_info->closing = 1; 3568 fs_info->closing = 1;
3511 smp_mb(); 3569 smp_mb();
3512 3570
3571 /* wait for the uuid_scan task to finish */
3572 down(&fs_info->uuid_tree_rescan_sem);
3573 /* avoid complains from lockdep et al., set sem back to initial state */
3574 up(&fs_info->uuid_tree_rescan_sem);
3575
3513 /* pause restriper - we want to resume on mount */ 3576 /* pause restriper - we want to resume on mount */
3514 btrfs_pause_balance(fs_info); 3577 btrfs_pause_balance(fs_info);
3515 3578
@@ -3573,6 +3636,9 @@ int close_ctree(struct btrfs_root *root)
3573 3636
3574 btrfs_free_stripe_hash_table(fs_info); 3637 btrfs_free_stripe_hash_table(fs_info);
3575 3638
3639 btrfs_free_block_rsv(root, root->orphan_block_rsv);
3640 root->orphan_block_rsv = NULL;
3641
3576 return 0; 3642 return 0;
3577} 3643}
3578 3644
@@ -3608,9 +3674,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3608 if (transid != root->fs_info->generation) 3674 if (transid != root->fs_info->generation)
3609 WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " 3675 WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "
3610 "found %llu running %llu\n", 3676 "found %llu running %llu\n",
3611 (unsigned long long)buf->start, 3677 buf->start, transid, root->fs_info->generation);
3612 (unsigned long long)transid,
3613 (unsigned long long)root->fs_info->generation);
3614 was_dirty = set_extent_buffer_dirty(buf); 3678 was_dirty = set_extent_buffer_dirty(buf);
3615 if (!was_dirty) 3679 if (!was_dirty)
3616 __percpu_counter_add(&root->fs_info->dirty_metadata_bytes, 3680 __percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
@@ -3744,8 +3808,8 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
3744 spin_unlock(&fs_info->ordered_root_lock); 3808 spin_unlock(&fs_info->ordered_root_lock);
3745} 3809}
3746 3810
3747int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 3811static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3748 struct btrfs_root *root) 3812 struct btrfs_root *root)
3749{ 3813{
3750 struct rb_node *node; 3814 struct rb_node *node;
3751 struct btrfs_delayed_ref_root *delayed_refs; 3815 struct btrfs_delayed_ref_root *delayed_refs;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1204c8ef6f32..cfb3cf711b34 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -113,7 +113,8 @@ static noinline int
113block_group_cache_done(struct btrfs_block_group_cache *cache) 113block_group_cache_done(struct btrfs_block_group_cache *cache)
114{ 114{
115 smp_mb(); 115 smp_mb();
116 return cache->cached == BTRFS_CACHE_FINISHED; 116 return cache->cached == BTRFS_CACHE_FINISHED ||
117 cache->cached == BTRFS_CACHE_ERROR;
117} 118}
118 119
119static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 120static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
@@ -389,7 +390,7 @@ static noinline void caching_thread(struct btrfs_work *work)
389 u64 total_found = 0; 390 u64 total_found = 0;
390 u64 last = 0; 391 u64 last = 0;
391 u32 nritems; 392 u32 nritems;
392 int ret = 0; 393 int ret = -ENOMEM;
393 394
394 caching_ctl = container_of(work, struct btrfs_caching_control, work); 395 caching_ctl = container_of(work, struct btrfs_caching_control, work);
395 block_group = caching_ctl->block_group; 396 block_group = caching_ctl->block_group;
@@ -420,6 +421,7 @@ again:
420 /* need to make sure the commit_root doesn't disappear */ 421 /* need to make sure the commit_root doesn't disappear */
421 down_read(&fs_info->extent_commit_sem); 422 down_read(&fs_info->extent_commit_sem);
422 423
424next:
423 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 425 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
424 if (ret < 0) 426 if (ret < 0)
425 goto err; 427 goto err;
@@ -459,6 +461,16 @@ again:
459 continue; 461 continue;
460 } 462 }
461 463
464 if (key.objectid < last) {
465 key.objectid = last;
466 key.offset = 0;
467 key.type = BTRFS_EXTENT_ITEM_KEY;
468
469 caching_ctl->progress = last;
470 btrfs_release_path(path);
471 goto next;
472 }
473
462 if (key.objectid < block_group->key.objectid) { 474 if (key.objectid < block_group->key.objectid) {
463 path->slots[0]++; 475 path->slots[0]++;
464 continue; 476 continue;
@@ -506,6 +518,12 @@ err:
506 518
507 mutex_unlock(&caching_ctl->mutex); 519 mutex_unlock(&caching_ctl->mutex);
508out: 520out:
521 if (ret) {
522 spin_lock(&block_group->lock);
523 block_group->caching_ctl = NULL;
524 block_group->cached = BTRFS_CACHE_ERROR;
525 spin_unlock(&block_group->lock);
526 }
509 wake_up(&caching_ctl->wait); 527 wake_up(&caching_ctl->wait);
510 528
511 put_caching_control(caching_ctl); 529 put_caching_control(caching_ctl);
@@ -771,10 +789,23 @@ again:
771 goto out_free; 789 goto out_free;
772 790
773 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) { 791 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
774 key.type = BTRFS_EXTENT_ITEM_KEY; 792 metadata = 0;
775 key.offset = root->leafsize; 793 if (path->slots[0]) {
776 btrfs_release_path(path); 794 path->slots[0]--;
777 goto again; 795 btrfs_item_key_to_cpu(path->nodes[0], &key,
796 path->slots[0]);
797 if (key.objectid == bytenr &&
798 key.type == BTRFS_EXTENT_ITEM_KEY &&
799 key.offset == root->leafsize)
800 ret = 0;
801 }
802 if (ret) {
803 key.objectid = bytenr;
804 key.type = BTRFS_EXTENT_ITEM_KEY;
805 key.offset = root->leafsize;
806 btrfs_release_path(path);
807 goto again;
808 }
778 } 809 }
779 810
780 if (ret == 0) { 811 if (ret == 0) {
@@ -2011,6 +2042,8 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2011 ins.type = BTRFS_EXTENT_ITEM_KEY; 2042 ins.type = BTRFS_EXTENT_ITEM_KEY;
2012 2043
2013 ref = btrfs_delayed_node_to_data_ref(node); 2044 ref = btrfs_delayed_node_to_data_ref(node);
2045 trace_run_delayed_data_ref(node, ref, node->action);
2046
2014 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 2047 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2015 parent = ref->parent; 2048 parent = ref->parent;
2016 else 2049 else
@@ -2154,6 +2187,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2154 SKINNY_METADATA); 2187 SKINNY_METADATA);
2155 2188
2156 ref = btrfs_delayed_node_to_tree_ref(node); 2189 ref = btrfs_delayed_node_to_tree_ref(node);
2190 trace_run_delayed_tree_ref(node, ref, node->action);
2191
2157 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2192 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2158 parent = ref->parent; 2193 parent = ref->parent;
2159 else 2194 else
@@ -2212,6 +2247,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2212 */ 2247 */
2213 BUG_ON(extent_op); 2248 BUG_ON(extent_op);
2214 head = btrfs_delayed_node_to_head(node); 2249 head = btrfs_delayed_node_to_head(node);
2250 trace_run_delayed_ref_head(node, head, node->action);
2251
2215 if (insert_reserved) { 2252 if (insert_reserved) {
2216 btrfs_pin_extent(root, node->bytenr, 2253 btrfs_pin_extent(root, node->bytenr,
2217 node->num_bytes, 1); 2254 node->num_bytes, 1);
@@ -2403,6 +2440,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2403 default: 2440 default:
2404 WARN_ON(1); 2441 WARN_ON(1);
2405 } 2442 }
2443 } else {
2444 list_del_init(&locked_ref->cluster);
2406 } 2445 }
2407 spin_unlock(&delayed_refs->lock); 2446 spin_unlock(&delayed_refs->lock);
2408 2447
@@ -2425,7 +2464,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2425 * list before we release it. 2464 * list before we release it.
2426 */ 2465 */
2427 if (btrfs_delayed_ref_is_head(ref)) { 2466 if (btrfs_delayed_ref_is_head(ref)) {
2428 list_del_init(&locked_ref->cluster);
2429 btrfs_delayed_ref_unlock(locked_ref); 2467 btrfs_delayed_ref_unlock(locked_ref);
2430 locked_ref = NULL; 2468 locked_ref = NULL;
2431 } 2469 }
@@ -3799,8 +3837,12 @@ again:
3799 if (force < space_info->force_alloc) 3837 if (force < space_info->force_alloc)
3800 force = space_info->force_alloc; 3838 force = space_info->force_alloc;
3801 if (space_info->full) { 3839 if (space_info->full) {
3840 if (should_alloc_chunk(extent_root, space_info, force))
3841 ret = -ENOSPC;
3842 else
3843 ret = 0;
3802 spin_unlock(&space_info->lock); 3844 spin_unlock(&space_info->lock);
3803 return 0; 3845 return ret;
3804 } 3846 }
3805 3847
3806 if (!should_alloc_chunk(extent_root, space_info, force)) { 3848 if (!should_alloc_chunk(extent_root, space_info, force)) {
@@ -4320,6 +4362,9 @@ static struct btrfs_block_rsv *get_block_rsv(
4320 if (root == root->fs_info->csum_root && trans->adding_csums) 4362 if (root == root->fs_info->csum_root && trans->adding_csums)
4321 block_rsv = trans->block_rsv; 4363 block_rsv = trans->block_rsv;
4322 4364
4365 if (root == root->fs_info->uuid_root)
4366 block_rsv = trans->block_rsv;
4367
4323 if (!block_rsv) 4368 if (!block_rsv)
4324 block_rsv = root->block_rsv; 4369 block_rsv = root->block_rsv;
4325 4370
@@ -4729,10 +4774,12 @@ void btrfs_orphan_release_metadata(struct inode *inode)
4729int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, 4774int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
4730 struct btrfs_block_rsv *rsv, 4775 struct btrfs_block_rsv *rsv,
4731 int items, 4776 int items,
4732 u64 *qgroup_reserved) 4777 u64 *qgroup_reserved,
4778 bool use_global_rsv)
4733{ 4779{
4734 u64 num_bytes; 4780 u64 num_bytes;
4735 int ret; 4781 int ret;
4782 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4736 4783
4737 if (root->fs_info->quota_enabled) { 4784 if (root->fs_info->quota_enabled) {
4738 /* One for parent inode, two for dir entries */ 4785 /* One for parent inode, two for dir entries */
@@ -4751,6 +4798,10 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
4751 BTRFS_BLOCK_GROUP_METADATA); 4798 BTRFS_BLOCK_GROUP_METADATA);
4752 ret = btrfs_block_rsv_add(root, rsv, num_bytes, 4799 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
4753 BTRFS_RESERVE_FLUSH_ALL); 4800 BTRFS_RESERVE_FLUSH_ALL);
4801
4802 if (ret == -ENOSPC && use_global_rsv)
4803 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
4804
4754 if (ret) { 4805 if (ret) {
4755 if (*qgroup_reserved) 4806 if (*qgroup_reserved)
4756 btrfs_qgroup_free(root, *qgroup_reserved); 4807 btrfs_qgroup_free(root, *qgroup_reserved);
@@ -5668,7 +5719,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5668 5719
5669 if (ret) { 5720 if (ret) {
5670 btrfs_err(info, "umm, got %d back from search, was looking for %llu", 5721 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
5671 ret, (unsigned long long)bytenr); 5722 ret, bytenr);
5672 if (ret > 0) 5723 if (ret > 0)
5673 btrfs_print_leaf(extent_root, 5724 btrfs_print_leaf(extent_root,
5674 path->nodes[0]); 5725 path->nodes[0]);
@@ -5684,11 +5735,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5684 WARN_ON(1); 5735 WARN_ON(1);
5685 btrfs_err(info, 5736 btrfs_err(info,
5686 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", 5737 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
5687 (unsigned long long)bytenr, 5738 bytenr, parent, root_objectid, owner_objectid,
5688 (unsigned long long)parent, 5739 owner_offset);
5689 (unsigned long long)root_objectid,
5690 (unsigned long long)owner_objectid,
5691 (unsigned long long)owner_offset);
5692 } else { 5740 } else {
5693 btrfs_abort_transaction(trans, extent_root, ret); 5741 btrfs_abort_transaction(trans, extent_root, ret);
5694 goto out; 5742 goto out;
@@ -5717,7 +5765,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5717 -1, 1); 5765 -1, 1);
5718 if (ret) { 5766 if (ret) {
5719 btrfs_err(info, "umm, got %d back from search, was looking for %llu", 5767 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
5720 ret, (unsigned long long)bytenr); 5768 ret, bytenr);
5721 btrfs_print_leaf(extent_root, path->nodes[0]); 5769 btrfs_print_leaf(extent_root, path->nodes[0]);
5722 } 5770 }
5723 if (ret < 0) { 5771 if (ret < 0) {
@@ -5999,8 +6047,11 @@ static u64 stripe_align(struct btrfs_root *root,
5999 * for our min num_bytes. Another option is to have it go ahead 6047 * for our min num_bytes. Another option is to have it go ahead
6000 * and look in the rbtree for a free extent of a given size, but this 6048 * and look in the rbtree for a free extent of a given size, but this
6001 * is a good start. 6049 * is a good start.
6050 *
6051 * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using
6052 * any of the information in this block group.
6002 */ 6053 */
6003static noinline int 6054static noinline void
6004wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 6055wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
6005 u64 num_bytes) 6056 u64 num_bytes)
6006{ 6057{
@@ -6008,28 +6059,29 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
6008 6059
6009 caching_ctl = get_caching_control(cache); 6060 caching_ctl = get_caching_control(cache);
6010 if (!caching_ctl) 6061 if (!caching_ctl)
6011 return 0; 6062 return;
6012 6063
6013 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 6064 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
6014 (cache->free_space_ctl->free_space >= num_bytes)); 6065 (cache->free_space_ctl->free_space >= num_bytes));
6015 6066
6016 put_caching_control(caching_ctl); 6067 put_caching_control(caching_ctl);
6017 return 0;
6018} 6068}
6019 6069
6020static noinline int 6070static noinline int
6021wait_block_group_cache_done(struct btrfs_block_group_cache *cache) 6071wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
6022{ 6072{
6023 struct btrfs_caching_control *caching_ctl; 6073 struct btrfs_caching_control *caching_ctl;
6074 int ret = 0;
6024 6075
6025 caching_ctl = get_caching_control(cache); 6076 caching_ctl = get_caching_control(cache);
6026 if (!caching_ctl) 6077 if (!caching_ctl)
6027 return 0; 6078 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
6028 6079
6029 wait_event(caching_ctl->wait, block_group_cache_done(cache)); 6080 wait_event(caching_ctl->wait, block_group_cache_done(cache));
6030 6081 if (cache->cached == BTRFS_CACHE_ERROR)
6082 ret = -EIO;
6031 put_caching_control(caching_ctl); 6083 put_caching_control(caching_ctl);
6032 return 0; 6084 return ret;
6033} 6085}
6034 6086
6035int __get_raid_index(u64 flags) 6087int __get_raid_index(u64 flags)
@@ -6070,8 +6122,7 @@ enum btrfs_loop_type {
6070 * ins->offset == number of blocks 6122 * ins->offset == number of blocks
6071 * Any available blocks before search_start are skipped. 6123 * Any available blocks before search_start are skipped.
6072 */ 6124 */
6073static noinline int find_free_extent(struct btrfs_trans_handle *trans, 6125static noinline int find_free_extent(struct btrfs_root *orig_root,
6074 struct btrfs_root *orig_root,
6075 u64 num_bytes, u64 empty_size, 6126 u64 num_bytes, u64 empty_size,
6076 u64 hint_byte, struct btrfs_key *ins, 6127 u64 hint_byte, struct btrfs_key *ins,
6077 u64 flags) 6128 u64 flags)
@@ -6212,6 +6263,8 @@ have_block_group:
6212 ret = 0; 6263 ret = 0;
6213 } 6264 }
6214 6265
6266 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
6267 goto loop;
6215 if (unlikely(block_group->ro)) 6268 if (unlikely(block_group->ro))
6216 goto loop; 6269 goto loop;
6217 6270
@@ -6292,10 +6345,10 @@ refill_cluster:
6292 block_group->full_stripe_len); 6345 block_group->full_stripe_len);
6293 6346
6294 /* allocate a cluster in this block group */ 6347 /* allocate a cluster in this block group */
6295 ret = btrfs_find_space_cluster(trans, root, 6348 ret = btrfs_find_space_cluster(root, block_group,
6296 block_group, last_ptr, 6349 last_ptr, search_start,
6297 search_start, num_bytes, 6350 num_bytes,
6298 aligned_cluster); 6351 aligned_cluster);
6299 if (ret == 0) { 6352 if (ret == 0) {
6300 /* 6353 /*
6301 * now pull our allocation out of this 6354 * now pull our allocation out of this
@@ -6426,17 +6479,28 @@ loop:
6426 index = 0; 6479 index = 0;
6427 loop++; 6480 loop++;
6428 if (loop == LOOP_ALLOC_CHUNK) { 6481 if (loop == LOOP_ALLOC_CHUNK) {
6482 struct btrfs_trans_handle *trans;
6483
6484 trans = btrfs_join_transaction(root);
6485 if (IS_ERR(trans)) {
6486 ret = PTR_ERR(trans);
6487 goto out;
6488 }
6489
6429 ret = do_chunk_alloc(trans, root, flags, 6490 ret = do_chunk_alloc(trans, root, flags,
6430 CHUNK_ALLOC_FORCE); 6491 CHUNK_ALLOC_FORCE);
6431 /* 6492 /*
6432 * Do not bail out on ENOSPC since we 6493 * Do not bail out on ENOSPC since we
6433 * can do more things. 6494 * can do more things.
6434 */ 6495 */
6435 if (ret < 0 && ret != -ENOSPC) { 6496 if (ret < 0 && ret != -ENOSPC)
6436 btrfs_abort_transaction(trans, 6497 btrfs_abort_transaction(trans,
6437 root, ret); 6498 root, ret);
6499 else
6500 ret = 0;
6501 btrfs_end_transaction(trans, root);
6502 if (ret)
6438 goto out; 6503 goto out;
6439 }
6440 } 6504 }
6441 6505
6442 if (loop == LOOP_NO_EMPTY_SIZE) { 6506 if (loop == LOOP_NO_EMPTY_SIZE) {
@@ -6463,19 +6527,15 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
6463 6527
6464 spin_lock(&info->lock); 6528 spin_lock(&info->lock);
6465 printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", 6529 printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n",
6466 (unsigned long long)info->flags, 6530 info->flags,
6467 (unsigned long long)(info->total_bytes - info->bytes_used - 6531 info->total_bytes - info->bytes_used - info->bytes_pinned -
6468 info->bytes_pinned - info->bytes_reserved - 6532 info->bytes_reserved - info->bytes_readonly,
6469 info->bytes_readonly),
6470 (info->full) ? "" : "not "); 6533 (info->full) ? "" : "not ");
6471 printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " 6534 printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, "
6472 "reserved=%llu, may_use=%llu, readonly=%llu\n", 6535 "reserved=%llu, may_use=%llu, readonly=%llu\n",
6473 (unsigned long long)info->total_bytes, 6536 info->total_bytes, info->bytes_used, info->bytes_pinned,
6474 (unsigned long long)info->bytes_used, 6537 info->bytes_reserved, info->bytes_may_use,
6475 (unsigned long long)info->bytes_pinned, 6538 info->bytes_readonly);
6476 (unsigned long long)info->bytes_reserved,
6477 (unsigned long long)info->bytes_may_use,
6478 (unsigned long long)info->bytes_readonly);
6479 spin_unlock(&info->lock); 6539 spin_unlock(&info->lock);
6480 6540
6481 if (!dump_block_groups) 6541 if (!dump_block_groups)
@@ -6486,12 +6546,9 @@ again:
6486 list_for_each_entry(cache, &info->block_groups[index], list) { 6546 list_for_each_entry(cache, &info->block_groups[index], list) {
6487 spin_lock(&cache->lock); 6547 spin_lock(&cache->lock);
6488 printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", 6548 printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n",
6489 (unsigned long long)cache->key.objectid, 6549 cache->key.objectid, cache->key.offset,
6490 (unsigned long long)cache->key.offset, 6550 btrfs_block_group_used(&cache->item), cache->pinned,
6491 (unsigned long long)btrfs_block_group_used(&cache->item), 6551 cache->reserved, cache->ro ? "[readonly]" : "");
6492 (unsigned long long)cache->pinned,
6493 (unsigned long long)cache->reserved,
6494 cache->ro ? "[readonly]" : "");
6495 btrfs_dump_free_space(cache, bytes); 6552 btrfs_dump_free_space(cache, bytes);
6496 spin_unlock(&cache->lock); 6553 spin_unlock(&cache->lock);
6497 } 6554 }
@@ -6500,8 +6557,7 @@ again:
6500 up_read(&info->groups_sem); 6557 up_read(&info->groups_sem);
6501} 6558}
6502 6559
6503int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 6560int btrfs_reserve_extent(struct btrfs_root *root,
6504 struct btrfs_root *root,
6505 u64 num_bytes, u64 min_alloc_size, 6561 u64 num_bytes, u64 min_alloc_size,
6506 u64 empty_size, u64 hint_byte, 6562 u64 empty_size, u64 hint_byte,
6507 struct btrfs_key *ins, int is_data) 6563 struct btrfs_key *ins, int is_data)
@@ -6513,8 +6569,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
6513 flags = btrfs_get_alloc_profile(root, is_data); 6569 flags = btrfs_get_alloc_profile(root, is_data);
6514again: 6570again:
6515 WARN_ON(num_bytes < root->sectorsize); 6571 WARN_ON(num_bytes < root->sectorsize);
6516 ret = find_free_extent(trans, root, num_bytes, empty_size, 6572 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
6517 hint_byte, ins, flags); 6573 flags);
6518 6574
6519 if (ret == -ENOSPC) { 6575 if (ret == -ENOSPC) {
6520 if (!final_tried) { 6576 if (!final_tried) {
@@ -6529,8 +6585,7 @@ again:
6529 6585
6530 sinfo = __find_space_info(root->fs_info, flags); 6586 sinfo = __find_space_info(root->fs_info, flags);
6531 btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu", 6587 btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
6532 (unsigned long long)flags, 6588 flags, num_bytes);
6533 (unsigned long long)num_bytes);
6534 if (sinfo) 6589 if (sinfo)
6535 dump_space_info(sinfo, num_bytes, 1); 6590 dump_space_info(sinfo, num_bytes, 1);
6536 } 6591 }
@@ -6550,7 +6605,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6550 cache = btrfs_lookup_block_group(root->fs_info, start); 6605 cache = btrfs_lookup_block_group(root->fs_info, start);
6551 if (!cache) { 6606 if (!cache) {
6552 btrfs_err(root->fs_info, "Unable to find block group for %llu", 6607 btrfs_err(root->fs_info, "Unable to find block group for %llu",
6553 (unsigned long long)start); 6608 start);
6554 return -ENOSPC; 6609 return -ENOSPC;
6555 } 6610 }
6556 6611
@@ -6646,8 +6701,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
6646 ret = update_block_group(root, ins->objectid, ins->offset, 1); 6701 ret = update_block_group(root, ins->objectid, ins->offset, 1);
6647 if (ret) { /* -ENOENT, logic error */ 6702 if (ret) { /* -ENOENT, logic error */
6648 btrfs_err(fs_info, "update block group failed for %llu %llu", 6703 btrfs_err(fs_info, "update block group failed for %llu %llu",
6649 (unsigned long long)ins->objectid, 6704 ins->objectid, ins->offset);
6650 (unsigned long long)ins->offset);
6651 BUG(); 6705 BUG();
6652 } 6706 }
6653 return ret; 6707 return ret;
@@ -6719,8 +6773,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6719 ret = update_block_group(root, ins->objectid, root->leafsize, 1); 6773 ret = update_block_group(root, ins->objectid, root->leafsize, 1);
6720 if (ret) { /* -ENOENT, logic error */ 6774 if (ret) { /* -ENOENT, logic error */
6721 btrfs_err(fs_info, "update block group failed for %llu %llu", 6775 btrfs_err(fs_info, "update block group failed for %llu %llu",
6722 (unsigned long long)ins->objectid, 6776 ins->objectid, ins->offset);
6723 (unsigned long long)ins->offset);
6724 BUG(); 6777 BUG();
6725 } 6778 }
6726 return ret; 6779 return ret;
@@ -6902,7 +6955,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6902 if (IS_ERR(block_rsv)) 6955 if (IS_ERR(block_rsv))
6903 return ERR_CAST(block_rsv); 6956 return ERR_CAST(block_rsv);
6904 6957
6905 ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, 6958 ret = btrfs_reserve_extent(root, blocksize, blocksize,
6906 empty_size, hint, &ins, 0); 6959 empty_size, hint, &ins, 0);
6907 if (ret) { 6960 if (ret) {
6908 unuse_block_rsv(root->fs_info, block_rsv, blocksize); 6961 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
@@ -7173,6 +7226,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7173 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 7226 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
7174 if (!next) 7227 if (!next)
7175 return -ENOMEM; 7228 return -ENOMEM;
7229 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
7230 level - 1);
7176 reada = 1; 7231 reada = 1;
7177 } 7232 }
7178 btrfs_tree_lock(next); 7233 btrfs_tree_lock(next);
@@ -7658,7 +7713,7 @@ out:
7658 * don't have it in the radix (like when we recover after a power fail 7713 * don't have it in the radix (like when we recover after a power fail
7659 * or unmount) so we don't leak memory. 7714 * or unmount) so we don't leak memory.
7660 */ 7715 */
7661 if (root_dropped == false) 7716 if (!for_reloc && root_dropped == false)
7662 btrfs_add_dead_root(root); 7717 btrfs_add_dead_root(root);
7663 if (err) 7718 if (err)
7664 btrfs_std_error(root->fs_info, err); 7719 btrfs_std_error(root->fs_info, err);
@@ -8192,7 +8247,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8192 * We haven't cached this block group, which means we could 8247 * We haven't cached this block group, which means we could
8193 * possibly have excluded extents on this block group. 8248 * possibly have excluded extents on this block group.
8194 */ 8249 */
8195 if (block_group->cached == BTRFS_CACHE_NO) 8250 if (block_group->cached == BTRFS_CACHE_NO ||
8251 block_group->cached == BTRFS_CACHE_ERROR)
8196 free_excluded_extents(info->extent_root, block_group); 8252 free_excluded_extents(info->extent_root, block_group);
8197 8253
8198 btrfs_remove_free_space_cache(block_group); 8254 btrfs_remove_free_space_cache(block_group);
@@ -8409,9 +8465,13 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8409 * avoid allocating from un-mirrored block group if there are 8465 * avoid allocating from un-mirrored block group if there are
8410 * mirrored block groups. 8466 * mirrored block groups.
8411 */ 8467 */
8412 list_for_each_entry(cache, &space_info->block_groups[3], list) 8468 list_for_each_entry(cache,
8469 &space_info->block_groups[BTRFS_RAID_RAID0],
8470 list)
8413 set_block_group_ro(cache, 1); 8471 set_block_group_ro(cache, 1);
8414 list_for_each_entry(cache, &space_info->block_groups[4], list) 8472 list_for_each_entry(cache,
8473 &space_info->block_groups[BTRFS_RAID_SINGLE],
8474 list)
8415 set_block_group_ro(cache, 1); 8475 set_block_group_ro(cache, 1);
8416 } 8476 }
8417 8477
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fe443fece851..09582b81640c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -61,9 +61,8 @@ void btrfs_leak_debug_check(void)
61 state = list_entry(states.next, struct extent_state, leak_list); 61 state = list_entry(states.next, struct extent_state, leak_list);
62 printk(KERN_ERR "btrfs state leak: start %llu end %llu " 62 printk(KERN_ERR "btrfs state leak: start %llu end %llu "
63 "state %lu in tree %p refs %d\n", 63 "state %lu in tree %p refs %d\n",
64 (unsigned long long)state->start, 64 state->start, state->end, state->state, state->tree,
65 (unsigned long long)state->end, 65 atomic_read(&state->refs));
66 state->state, state->tree, atomic_read(&state->refs));
67 list_del(&state->leak_list); 66 list_del(&state->leak_list);
68 kmem_cache_free(extent_state_cache, state); 67 kmem_cache_free(extent_state_cache, state);
69 } 68 }
@@ -71,8 +70,8 @@ void btrfs_leak_debug_check(void)
71 while (!list_empty(&buffers)) { 70 while (!list_empty(&buffers)) {
72 eb = list_entry(buffers.next, struct extent_buffer, leak_list); 71 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
73 printk(KERN_ERR "btrfs buffer leak start %llu len %lu " 72 printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
74 "refs %d\n", (unsigned long long)eb->start, 73 "refs %d\n",
75 eb->len, atomic_read(&eb->refs)); 74 eb->start, eb->len, atomic_read(&eb->refs));
76 list_del(&eb->leak_list); 75 list_del(&eb->leak_list);
77 kmem_cache_free(extent_buffer_cache, eb); 76 kmem_cache_free(extent_buffer_cache, eb);
78 } 77 }
@@ -88,11 +87,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
88 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 87 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
89 printk_ratelimited(KERN_DEBUG 88 printk_ratelimited(KERN_DEBUG
90 "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", 89 "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
91 caller, 90 caller, btrfs_ino(inode), isize, start, end);
92 (unsigned long long)btrfs_ino(inode),
93 (unsigned long long)isize,
94 (unsigned long long)start,
95 (unsigned long long)end);
96 } 91 }
97} 92}
98#else 93#else
@@ -388,8 +383,7 @@ static int insert_state(struct extent_io_tree *tree,
388 383
389 if (end < start) 384 if (end < start)
390 WARN(1, KERN_ERR "btrfs end < start %llu %llu\n", 385 WARN(1, KERN_ERR "btrfs end < start %llu %llu\n",
391 (unsigned long long)end, 386 end, start);
392 (unsigned long long)start);
393 state->start = start; 387 state->start = start;
394 state->end = end; 388 state->end = end;
395 389
@@ -400,9 +394,8 @@ static int insert_state(struct extent_io_tree *tree,
400 struct extent_state *found; 394 struct extent_state *found;
401 found = rb_entry(node, struct extent_state, rb_node); 395 found = rb_entry(node, struct extent_state, rb_node);
402 printk(KERN_ERR "btrfs found node %llu %llu on insert of " 396 printk(KERN_ERR "btrfs found node %llu %llu on insert of "
403 "%llu %llu\n", (unsigned long long)found->start, 397 "%llu %llu\n",
404 (unsigned long long)found->end, 398 found->start, found->end, start, end);
405 (unsigned long long)start, (unsigned long long)end);
406 return -EEXIST; 399 return -EEXIST;
407 } 400 }
408 state->tree = tree; 401 state->tree = tree;
@@ -762,15 +755,6 @@ static void cache_state(struct extent_state *state,
762 } 755 }
763} 756}
764 757
765static void uncache_state(struct extent_state **cached_ptr)
766{
767 if (cached_ptr && (*cached_ptr)) {
768 struct extent_state *state = *cached_ptr;
769 *cached_ptr = NULL;
770 free_extent_state(state);
771 }
772}
773
774/* 758/*
775 * set some bits on a range in the tree. This may require allocations or 759 * set some bits on a range in the tree. This may require allocations or
776 * sleeping, so the gfp mask is used to indicate what is allowed. 760 * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -1687,31 +1671,21 @@ out_failed:
1687 return found; 1671 return found;
1688} 1672}
1689 1673
1690int extent_clear_unlock_delalloc(struct inode *inode, 1674int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1691 struct extent_io_tree *tree, 1675 struct page *locked_page,
1692 u64 start, u64 end, struct page *locked_page, 1676 unsigned long clear_bits,
1693 unsigned long op) 1677 unsigned long page_ops)
1694{ 1678{
1679 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
1695 int ret; 1680 int ret;
1696 struct page *pages[16]; 1681 struct page *pages[16];
1697 unsigned long index = start >> PAGE_CACHE_SHIFT; 1682 unsigned long index = start >> PAGE_CACHE_SHIFT;
1698 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1683 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1699 unsigned long nr_pages = end_index - index + 1; 1684 unsigned long nr_pages = end_index - index + 1;
1700 int i; 1685 int i;
1701 unsigned long clear_bits = 0;
1702
1703 if (op & EXTENT_CLEAR_UNLOCK)
1704 clear_bits |= EXTENT_LOCKED;
1705 if (op & EXTENT_CLEAR_DIRTY)
1706 clear_bits |= EXTENT_DIRTY;
1707
1708 if (op & EXTENT_CLEAR_DELALLOC)
1709 clear_bits |= EXTENT_DELALLOC;
1710 1686
1711 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); 1687 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
1712 if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 1688 if (page_ops == 0)
1713 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
1714 EXTENT_SET_PRIVATE2)))
1715 return 0; 1689 return 0;
1716 1690
1717 while (nr_pages > 0) { 1691 while (nr_pages > 0) {
@@ -1720,20 +1694,20 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1720 nr_pages, ARRAY_SIZE(pages)), pages); 1694 nr_pages, ARRAY_SIZE(pages)), pages);
1721 for (i = 0; i < ret; i++) { 1695 for (i = 0; i < ret; i++) {
1722 1696
1723 if (op & EXTENT_SET_PRIVATE2) 1697 if (page_ops & PAGE_SET_PRIVATE2)
1724 SetPagePrivate2(pages[i]); 1698 SetPagePrivate2(pages[i]);
1725 1699
1726 if (pages[i] == locked_page) { 1700 if (pages[i] == locked_page) {
1727 page_cache_release(pages[i]); 1701 page_cache_release(pages[i]);
1728 continue; 1702 continue;
1729 } 1703 }
1730 if (op & EXTENT_CLEAR_DIRTY) 1704 if (page_ops & PAGE_CLEAR_DIRTY)
1731 clear_page_dirty_for_io(pages[i]); 1705 clear_page_dirty_for_io(pages[i]);
1732 if (op & EXTENT_SET_WRITEBACK) 1706 if (page_ops & PAGE_SET_WRITEBACK)
1733 set_page_writeback(pages[i]); 1707 set_page_writeback(pages[i]);
1734 if (op & EXTENT_END_WRITEBACK) 1708 if (page_ops & PAGE_END_WRITEBACK)
1735 end_page_writeback(pages[i]); 1709 end_page_writeback(pages[i]);
1736 if (op & EXTENT_CLEAR_UNLOCK_PAGE) 1710 if (page_ops & PAGE_UNLOCK)
1737 unlock_page(pages[i]); 1711 unlock_page(pages[i]);
1738 page_cache_release(pages[i]); 1712 page_cache_release(pages[i]);
1739 } 1713 }
@@ -1810,7 +1784,7 @@ out:
1810 * set the private field for a given byte offset in the tree. If there isn't 1784 * set the private field for a given byte offset in the tree. If there isn't
1811 * an extent_state there already, this does nothing. 1785 * an extent_state there already, this does nothing.
1812 */ 1786 */
1813int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) 1787static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1814{ 1788{
1815 struct rb_node *node; 1789 struct rb_node *node;
1816 struct extent_state *state; 1790 struct extent_state *state;
@@ -1837,64 +1811,6 @@ out:
1837 return ret; 1811 return ret;
1838} 1812}
1839 1813
1840void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
1841 int count)
1842{
1843 struct rb_node *node;
1844 struct extent_state *state;
1845
1846 spin_lock(&tree->lock);
1847 /*
1848 * this search will find all the extents that end after
1849 * our range starts.
1850 */
1851 node = tree_search(tree, start);
1852 BUG_ON(!node);
1853
1854 state = rb_entry(node, struct extent_state, rb_node);
1855 BUG_ON(state->start != start);
1856
1857 while (count) {
1858 state->private = *csums++;
1859 count--;
1860 state = next_state(state);
1861 }
1862 spin_unlock(&tree->lock);
1863}
1864
1865static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
1866{
1867 struct bio_vec *bvec = bio->bi_io_vec + bio_index;
1868
1869 return page_offset(bvec->bv_page) + bvec->bv_offset;
1870}
1871
1872void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
1873 u32 csums[], int count)
1874{
1875 struct rb_node *node;
1876 struct extent_state *state = NULL;
1877 u64 start;
1878
1879 spin_lock(&tree->lock);
1880 do {
1881 start = __btrfs_get_bio_offset(bio, bio_index);
1882 if (state == NULL || state->start != start) {
1883 node = tree_search(tree, start);
1884 BUG_ON(!node);
1885
1886 state = rb_entry(node, struct extent_state, rb_node);
1887 BUG_ON(state->start != start);
1888 }
1889 state->private = *csums++;
1890 count--;
1891 bio_index++;
1892
1893 state = next_state(state);
1894 } while (count);
1895 spin_unlock(&tree->lock);
1896}
1897
1898int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) 1814int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1899{ 1815{
1900 struct rb_node *node; 1816 struct rb_node *node;
@@ -2173,7 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page)
2173 EXTENT_LOCKED); 2089 EXTENT_LOCKED);
2174 spin_unlock(&BTRFS_I(inode)->io_tree.lock); 2090 spin_unlock(&BTRFS_I(inode)->io_tree.lock);
2175 2091
2176 if (state && state->start == failrec->start) { 2092 if (state && state->start <= failrec->start &&
2093 state->end >= failrec->start + failrec->len - 1) {
2177 fs_info = BTRFS_I(inode)->root->fs_info; 2094 fs_info = BTRFS_I(inode)->root->fs_info;
2178 num_copies = btrfs_num_copies(fs_info, failrec->logical, 2095 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2179 failrec->len); 2096 failrec->len);
@@ -2201,9 +2118,9 @@ out:
2201 * needed 2118 * needed
2202 */ 2119 */
2203 2120
2204static int bio_readpage_error(struct bio *failed_bio, struct page *page, 2121static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2205 u64 start, u64 end, int failed_mirror, 2122 struct page *page, u64 start, u64 end,
2206 struct extent_state *state) 2123 int failed_mirror)
2207{ 2124{
2208 struct io_failure_record *failrec = NULL; 2125 struct io_failure_record *failrec = NULL;
2209 u64 private; 2126 u64 private;
@@ -2213,6 +2130,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2213 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2130 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2214 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2131 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2215 struct bio *bio; 2132 struct bio *bio;
2133 struct btrfs_io_bio *btrfs_failed_bio;
2134 struct btrfs_io_bio *btrfs_bio;
2216 int num_copies; 2135 int num_copies;
2217 int ret; 2136 int ret;
2218 int read_mode; 2137 int read_mode;
@@ -2296,23 +2215,12 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2296 * all the retry and error correction code that follows. no 2215 * all the retry and error correction code that follows. no
2297 * matter what the error is, it is very likely to persist. 2216 * matter what the error is, it is very likely to persist.
2298 */ 2217 */
2299 pr_debug("bio_readpage_error: cannot repair, num_copies == 1. " 2218 pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
2300 "state=%p, num_copies=%d, next_mirror %d, " 2219 num_copies, failrec->this_mirror, failed_mirror);
2301 "failed_mirror %d\n", state, num_copies,
2302 failrec->this_mirror, failed_mirror);
2303 free_io_failure(inode, failrec, 0); 2220 free_io_failure(inode, failrec, 0);
2304 return -EIO; 2221 return -EIO;
2305 } 2222 }
2306 2223
2307 if (!state) {
2308 spin_lock(&tree->lock);
2309 state = find_first_extent_bit_state(tree, failrec->start,
2310 EXTENT_LOCKED);
2311 if (state && state->start != failrec->start)
2312 state = NULL;
2313 spin_unlock(&tree->lock);
2314 }
2315
2316 /* 2224 /*
2317 * there are two premises: 2225 * there are two premises:
2318 * a) deliver good data to the caller 2226 * a) deliver good data to the caller
@@ -2349,9 +2257,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2349 read_mode = READ_SYNC; 2257 read_mode = READ_SYNC;
2350 } 2258 }
2351 2259
2352 if (!state || failrec->this_mirror > num_copies) { 2260 if (failrec->this_mirror > num_copies) {
2353 pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, " 2261 pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
2354 "next_mirror %d, failed_mirror %d\n", state,
2355 num_copies, failrec->this_mirror, failed_mirror); 2262 num_copies, failrec->this_mirror, failed_mirror);
2356 free_io_failure(inode, failrec, 0); 2263 free_io_failure(inode, failrec, 0);
2357 return -EIO; 2264 return -EIO;
@@ -2362,12 +2269,24 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2362 free_io_failure(inode, failrec, 0); 2269 free_io_failure(inode, failrec, 0);
2363 return -EIO; 2270 return -EIO;
2364 } 2271 }
2365 bio->bi_private = state;
2366 bio->bi_end_io = failed_bio->bi_end_io; 2272 bio->bi_end_io = failed_bio->bi_end_io;
2367 bio->bi_sector = failrec->logical >> 9; 2273 bio->bi_sector = failrec->logical >> 9;
2368 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 2274 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2369 bio->bi_size = 0; 2275 bio->bi_size = 0;
2370 2276
2277 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2278 if (btrfs_failed_bio->csum) {
2279 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2280 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2281
2282 btrfs_bio = btrfs_io_bio(bio);
2283 btrfs_bio->csum = btrfs_bio->csum_inline;
2284 phy_offset >>= inode->i_sb->s_blocksize_bits;
2285 phy_offset *= csum_size;
2286 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
2287 csum_size);
2288 }
2289
2371 bio_add_page(bio, page, failrec->len, start - page_offset(page)); 2290 bio_add_page(bio, page, failrec->len, start - page_offset(page));
2372 2291
2373 pr_debug("bio_readpage_error: submitting new read[%#x] to " 2292 pr_debug("bio_readpage_error: submitting new read[%#x] to "
@@ -2450,6 +2369,18 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
2450 bio_put(bio); 2369 bio_put(bio);
2451} 2370}
2452 2371
2372static void
2373endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2374 int uptodate)
2375{
2376 struct extent_state *cached = NULL;
2377 u64 end = start + len - 1;
2378
2379 if (uptodate && tree->track_uptodate)
2380 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2381 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2382}
2383
2453/* 2384/*
2454 * after a readpage IO is done, we need to: 2385 * after a readpage IO is done, we need to:
2455 * clear the uptodate bits on error 2386 * clear the uptodate bits on error
@@ -2466,9 +2397,14 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2466 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 2397 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
2467 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; 2398 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
2468 struct bio_vec *bvec = bio->bi_io_vec; 2399 struct bio_vec *bvec = bio->bi_io_vec;
2400 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2469 struct extent_io_tree *tree; 2401 struct extent_io_tree *tree;
2402 u64 offset = 0;
2470 u64 start; 2403 u64 start;
2471 u64 end; 2404 u64 end;
2405 u64 len;
2406 u64 extent_start = 0;
2407 u64 extent_len = 0;
2472 int mirror; 2408 int mirror;
2473 int ret; 2409 int ret;
2474 2410
@@ -2477,9 +2413,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2477 2413
2478 do { 2414 do {
2479 struct page *page = bvec->bv_page; 2415 struct page *page = bvec->bv_page;
2480 struct extent_state *cached = NULL;
2481 struct extent_state *state;
2482 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2483 struct inode *inode = page->mapping->host; 2416 struct inode *inode = page->mapping->host;
2484 2417
2485 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " 2418 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
@@ -2500,37 +2433,32 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2500 2433
2501 start = page_offset(page); 2434 start = page_offset(page);
2502 end = start + bvec->bv_offset + bvec->bv_len - 1; 2435 end = start + bvec->bv_offset + bvec->bv_len - 1;
2436 len = bvec->bv_len;
2503 2437
2504 if (++bvec <= bvec_end) 2438 if (++bvec <= bvec_end)
2505 prefetchw(&bvec->bv_page->flags); 2439 prefetchw(&bvec->bv_page->flags);
2506 2440
2507 spin_lock(&tree->lock);
2508 state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
2509 if (state && state->start == start) {
2510 /*
2511 * take a reference on the state, unlock will drop
2512 * the ref
2513 */
2514 cache_state(state, &cached);
2515 }
2516 spin_unlock(&tree->lock);
2517
2518 mirror = io_bio->mirror_num; 2441 mirror = io_bio->mirror_num;
2519 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 2442 if (likely(uptodate && tree->ops &&
2520 ret = tree->ops->readpage_end_io_hook(page, start, end, 2443 tree->ops->readpage_end_io_hook)) {
2521 state, mirror); 2444 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2445 page, start, end,
2446 mirror);
2522 if (ret) 2447 if (ret)
2523 uptodate = 0; 2448 uptodate = 0;
2524 else 2449 else
2525 clean_io_failure(start, page); 2450 clean_io_failure(start, page);
2526 } 2451 }
2527 2452
2528 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { 2453 if (likely(uptodate))
2454 goto readpage_ok;
2455
2456 if (tree->ops && tree->ops->readpage_io_failed_hook) {
2529 ret = tree->ops->readpage_io_failed_hook(page, mirror); 2457 ret = tree->ops->readpage_io_failed_hook(page, mirror);
2530 if (!ret && !err && 2458 if (!ret && !err &&
2531 test_bit(BIO_UPTODATE, &bio->bi_flags)) 2459 test_bit(BIO_UPTODATE, &bio->bi_flags))
2532 uptodate = 1; 2460 uptodate = 1;
2533 } else if (!uptodate) { 2461 } else {
2534 /* 2462 /*
2535 * The generic bio_readpage_error handles errors the 2463 * The generic bio_readpage_error handles errors the
2536 * following way: If possible, new read requests are 2464 * following way: If possible, new read requests are
@@ -2541,24 +2469,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2541 * can't handle the error it will return -EIO and we 2469 * can't handle the error it will return -EIO and we
2542 * remain responsible for that page. 2470 * remain responsible for that page.
2543 */ 2471 */
2544 ret = bio_readpage_error(bio, page, start, end, mirror, NULL); 2472 ret = bio_readpage_error(bio, offset, page, start, end,
2473 mirror);
2545 if (ret == 0) { 2474 if (ret == 0) {
2546 uptodate = 2475 uptodate =
2547 test_bit(BIO_UPTODATE, &bio->bi_flags); 2476 test_bit(BIO_UPTODATE, &bio->bi_flags);
2548 if (err) 2477 if (err)
2549 uptodate = 0; 2478 uptodate = 0;
2550 uncache_state(&cached);
2551 continue; 2479 continue;
2552 } 2480 }
2553 } 2481 }
2554 2482readpage_ok:
2555 if (uptodate && tree->track_uptodate) { 2483 if (likely(uptodate)) {
2556 set_extent_uptodate(tree, start, end, &cached,
2557 GFP_ATOMIC);
2558 }
2559 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2560
2561 if (uptodate) {
2562 loff_t i_size = i_size_read(inode); 2484 loff_t i_size = i_size_read(inode);
2563 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 2485 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2564 unsigned offset; 2486 unsigned offset;
@@ -2573,8 +2495,36 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2573 SetPageError(page); 2495 SetPageError(page);
2574 } 2496 }
2575 unlock_page(page); 2497 unlock_page(page);
2498 offset += len;
2499
2500 if (unlikely(!uptodate)) {
2501 if (extent_len) {
2502 endio_readpage_release_extent(tree,
2503 extent_start,
2504 extent_len, 1);
2505 extent_start = 0;
2506 extent_len = 0;
2507 }
2508 endio_readpage_release_extent(tree, start,
2509 end - start + 1, 0);
2510 } else if (!extent_len) {
2511 extent_start = start;
2512 extent_len = end + 1 - start;
2513 } else if (extent_start + extent_len == start) {
2514 extent_len += end + 1 - start;
2515 } else {
2516 endio_readpage_release_extent(tree, extent_start,
2517 extent_len, uptodate);
2518 extent_start = start;
2519 extent_len = end + 1 - start;
2520 }
2576 } while (bvec <= bvec_end); 2521 } while (bvec <= bvec_end);
2577 2522
2523 if (extent_len)
2524 endio_readpage_release_extent(tree, extent_start, extent_len,
2525 uptodate);
2526 if (io_bio->end_io)
2527 io_bio->end_io(io_bio, err);
2578 bio_put(bio); 2528 bio_put(bio);
2579} 2529}
2580 2530
@@ -2586,6 +2536,7 @@ struct bio *
2586btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 2536btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2587 gfp_t gfp_flags) 2537 gfp_t gfp_flags)
2588{ 2538{
2539 struct btrfs_io_bio *btrfs_bio;
2589 struct bio *bio; 2540 struct bio *bio;
2590 2541
2591 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); 2542 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
@@ -2601,6 +2552,10 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2601 bio->bi_size = 0; 2552 bio->bi_size = 0;
2602 bio->bi_bdev = bdev; 2553 bio->bi_bdev = bdev;
2603 bio->bi_sector = first_sector; 2554 bio->bi_sector = first_sector;
2555 btrfs_bio = btrfs_io_bio(bio);
2556 btrfs_bio->csum = NULL;
2557 btrfs_bio->csum_allocated = NULL;
2558 btrfs_bio->end_io = NULL;
2604 } 2559 }
2605 return bio; 2560 return bio;
2606} 2561}
@@ -2614,7 +2569,17 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
2614/* this also allocates from the btrfs_bioset */ 2569/* this also allocates from the btrfs_bioset */
2615struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) 2570struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2616{ 2571{
2617 return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); 2572 struct btrfs_io_bio *btrfs_bio;
2573 struct bio *bio;
2574
2575 bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
2576 if (bio) {
2577 btrfs_bio = btrfs_io_bio(bio);
2578 btrfs_bio->csum = NULL;
2579 btrfs_bio->csum_allocated = NULL;
2580 btrfs_bio->end_io = NULL;
2581 }
2582 return bio;
2618} 2583}
2619 2584
2620 2585
@@ -2738,17 +2703,45 @@ void set_page_extent_mapped(struct page *page)
2738 } 2703 }
2739} 2704}
2740 2705
2706static struct extent_map *
2707__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
2708 u64 start, u64 len, get_extent_t *get_extent,
2709 struct extent_map **em_cached)
2710{
2711 struct extent_map *em;
2712
2713 if (em_cached && *em_cached) {
2714 em = *em_cached;
2715 if (em->in_tree && start >= em->start &&
2716 start < extent_map_end(em)) {
2717 atomic_inc(&em->refs);
2718 return em;
2719 }
2720
2721 free_extent_map(em);
2722 *em_cached = NULL;
2723 }
2724
2725 em = get_extent(inode, page, pg_offset, start, len, 0);
2726 if (em_cached && !IS_ERR_OR_NULL(em)) {
2727 BUG_ON(*em_cached);
2728 atomic_inc(&em->refs);
2729 *em_cached = em;
2730 }
2731 return em;
2732}
2741/* 2733/*
2742 * basic readpage implementation. Locked extent state structs are inserted 2734 * basic readpage implementation. Locked extent state structs are inserted
2743 * into the tree that are removed when the IO is done (by the end_io 2735 * into the tree that are removed when the IO is done (by the end_io
2744 * handlers) 2736 * handlers)
2745 * XXX JDM: This needs looking at to ensure proper page locking 2737 * XXX JDM: This needs looking at to ensure proper page locking
2746 */ 2738 */
2747static int __extent_read_full_page(struct extent_io_tree *tree, 2739static int __do_readpage(struct extent_io_tree *tree,
2748 struct page *page, 2740 struct page *page,
2749 get_extent_t *get_extent, 2741 get_extent_t *get_extent,
2750 struct bio **bio, int mirror_num, 2742 struct extent_map **em_cached,
2751 unsigned long *bio_flags, int rw) 2743 struct bio **bio, int mirror_num,
2744 unsigned long *bio_flags, int rw)
2752{ 2745{
2753 struct inode *inode = page->mapping->host; 2746 struct inode *inode = page->mapping->host;
2754 u64 start = page_offset(page); 2747 u64 start = page_offset(page);
@@ -2762,35 +2755,26 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2762 sector_t sector; 2755 sector_t sector;
2763 struct extent_map *em; 2756 struct extent_map *em;
2764 struct block_device *bdev; 2757 struct block_device *bdev;
2765 struct btrfs_ordered_extent *ordered;
2766 int ret; 2758 int ret;
2767 int nr = 0; 2759 int nr = 0;
2760 int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
2768 size_t pg_offset = 0; 2761 size_t pg_offset = 0;
2769 size_t iosize; 2762 size_t iosize;
2770 size_t disk_io_size; 2763 size_t disk_io_size;
2771 size_t blocksize = inode->i_sb->s_blocksize; 2764 size_t blocksize = inode->i_sb->s_blocksize;
2772 unsigned long this_bio_flag = 0; 2765 unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
2773 2766
2774 set_page_extent_mapped(page); 2767 set_page_extent_mapped(page);
2775 2768
2769 end = page_end;
2776 if (!PageUptodate(page)) { 2770 if (!PageUptodate(page)) {
2777 if (cleancache_get_page(page) == 0) { 2771 if (cleancache_get_page(page) == 0) {
2778 BUG_ON(blocksize != PAGE_SIZE); 2772 BUG_ON(blocksize != PAGE_SIZE);
2773 unlock_extent(tree, start, end);
2779 goto out; 2774 goto out;
2780 } 2775 }
2781 } 2776 }
2782 2777
2783 end = page_end;
2784 while (1) {
2785 lock_extent(tree, start, end);
2786 ordered = btrfs_lookup_ordered_extent(inode, start);
2787 if (!ordered)
2788 break;
2789 unlock_extent(tree, start, end);
2790 btrfs_start_ordered_extent(inode, ordered, 1);
2791 btrfs_put_ordered_extent(ordered);
2792 }
2793
2794 if (page->index == last_byte >> PAGE_CACHE_SHIFT) { 2778 if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
2795 char *userpage; 2779 char *userpage;
2796 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); 2780 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
@@ -2817,15 +2801,18 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2817 kunmap_atomic(userpage); 2801 kunmap_atomic(userpage);
2818 set_extent_uptodate(tree, cur, cur + iosize - 1, 2802 set_extent_uptodate(tree, cur, cur + iosize - 1,
2819 &cached, GFP_NOFS); 2803 &cached, GFP_NOFS);
2820 unlock_extent_cached(tree, cur, cur + iosize - 1, 2804 if (!parent_locked)
2821 &cached, GFP_NOFS); 2805 unlock_extent_cached(tree, cur,
2806 cur + iosize - 1,
2807 &cached, GFP_NOFS);
2822 break; 2808 break;
2823 } 2809 }
2824 em = get_extent(inode, page, pg_offset, cur, 2810 em = __get_extent_map(inode, page, pg_offset, cur,
2825 end - cur + 1, 0); 2811 end - cur + 1, get_extent, em_cached);
2826 if (IS_ERR_OR_NULL(em)) { 2812 if (IS_ERR_OR_NULL(em)) {
2827 SetPageError(page); 2813 SetPageError(page);
2828 unlock_extent(tree, cur, end); 2814 if (!parent_locked)
2815 unlock_extent(tree, cur, end);
2829 break; 2816 break;
2830 } 2817 }
2831 extent_offset = cur - em->start; 2818 extent_offset = cur - em->start;
@@ -2833,7 +2820,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2833 BUG_ON(end < cur); 2820 BUG_ON(end < cur);
2834 2821
2835 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 2822 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2836 this_bio_flag = EXTENT_BIO_COMPRESSED; 2823 this_bio_flag |= EXTENT_BIO_COMPRESSED;
2837 extent_set_compress_type(&this_bio_flag, 2824 extent_set_compress_type(&this_bio_flag,
2838 em->compress_type); 2825 em->compress_type);
2839 } 2826 }
@@ -2877,7 +2864,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2877 if (test_range_bit(tree, cur, cur_end, 2864 if (test_range_bit(tree, cur, cur_end,
2878 EXTENT_UPTODATE, 1, NULL)) { 2865 EXTENT_UPTODATE, 1, NULL)) {
2879 check_page_uptodate(tree, page); 2866 check_page_uptodate(tree, page);
2880 unlock_extent(tree, cur, cur + iosize - 1); 2867 if (!parent_locked)
2868 unlock_extent(tree, cur, cur + iosize - 1);
2881 cur = cur + iosize; 2869 cur = cur + iosize;
2882 pg_offset += iosize; 2870 pg_offset += iosize;
2883 continue; 2871 continue;
@@ -2887,7 +2875,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2887 */ 2875 */
2888 if (block_start == EXTENT_MAP_INLINE) { 2876 if (block_start == EXTENT_MAP_INLINE) {
2889 SetPageError(page); 2877 SetPageError(page);
2890 unlock_extent(tree, cur, cur + iosize - 1); 2878 if (!parent_locked)
2879 unlock_extent(tree, cur, cur + iosize - 1);
2891 cur = cur + iosize; 2880 cur = cur + iosize;
2892 pg_offset += iosize; 2881 pg_offset += iosize;
2893 continue; 2882 continue;
@@ -2905,7 +2894,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2905 *bio_flags = this_bio_flag; 2894 *bio_flags = this_bio_flag;
2906 } else { 2895 } else {
2907 SetPageError(page); 2896 SetPageError(page);
2908 unlock_extent(tree, cur, cur + iosize - 1); 2897 if (!parent_locked)
2898 unlock_extent(tree, cur, cur + iosize - 1);
2909 } 2899 }
2910 cur = cur + iosize; 2900 cur = cur + iosize;
2911 pg_offset += iosize; 2901 pg_offset += iosize;
@@ -2919,6 +2909,104 @@ out:
2919 return 0; 2909 return 0;
2920} 2910}
2921 2911
2912static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
2913 struct page *pages[], int nr_pages,
2914 u64 start, u64 end,
2915 get_extent_t *get_extent,
2916 struct extent_map **em_cached,
2917 struct bio **bio, int mirror_num,
2918 unsigned long *bio_flags, int rw)
2919{
2920 struct inode *inode;
2921 struct btrfs_ordered_extent *ordered;
2922 int index;
2923
2924 inode = pages[0]->mapping->host;
2925 while (1) {
2926 lock_extent(tree, start, end);
2927 ordered = btrfs_lookup_ordered_range(inode, start,
2928 end - start + 1);
2929 if (!ordered)
2930 break;
2931 unlock_extent(tree, start, end);
2932 btrfs_start_ordered_extent(inode, ordered, 1);
2933 btrfs_put_ordered_extent(ordered);
2934 }
2935
2936 for (index = 0; index < nr_pages; index++) {
2937 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
2938 mirror_num, bio_flags, rw);
2939 page_cache_release(pages[index]);
2940 }
2941}
2942
2943static void __extent_readpages(struct extent_io_tree *tree,
2944 struct page *pages[],
2945 int nr_pages, get_extent_t *get_extent,
2946 struct extent_map **em_cached,
2947 struct bio **bio, int mirror_num,
2948 unsigned long *bio_flags, int rw)
2949{
2950 u64 start = 0;
2951 u64 end = 0;
2952 u64 page_start;
2953 int index;
2954 int first_index = 0;
2955
2956 for (index = 0; index < nr_pages; index++) {
2957 page_start = page_offset(pages[index]);
2958 if (!end) {
2959 start = page_start;
2960 end = start + PAGE_CACHE_SIZE - 1;
2961 first_index = index;
2962 } else if (end + 1 == page_start) {
2963 end += PAGE_CACHE_SIZE;
2964 } else {
2965 __do_contiguous_readpages(tree, &pages[first_index],
2966 index - first_index, start,
2967 end, get_extent, em_cached,
2968 bio, mirror_num, bio_flags,
2969 rw);
2970 start = page_start;
2971 end = start + PAGE_CACHE_SIZE - 1;
2972 first_index = index;
2973 }
2974 }
2975
2976 if (end)
2977 __do_contiguous_readpages(tree, &pages[first_index],
2978 index - first_index, start,
2979 end, get_extent, em_cached, bio,
2980 mirror_num, bio_flags, rw);
2981}
2982
2983static int __extent_read_full_page(struct extent_io_tree *tree,
2984 struct page *page,
2985 get_extent_t *get_extent,
2986 struct bio **bio, int mirror_num,
2987 unsigned long *bio_flags, int rw)
2988{
2989 struct inode *inode = page->mapping->host;
2990 struct btrfs_ordered_extent *ordered;
2991 u64 start = page_offset(page);
2992 u64 end = start + PAGE_CACHE_SIZE - 1;
2993 int ret;
2994
2995 while (1) {
2996 lock_extent(tree, start, end);
2997 ordered = btrfs_lookup_ordered_extent(inode, start);
2998 if (!ordered)
2999 break;
3000 unlock_extent(tree, start, end);
3001 btrfs_start_ordered_extent(inode, ordered, 1);
3002 btrfs_put_ordered_extent(ordered);
3003 }
3004
3005 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3006 bio_flags, rw);
3007 return ret;
3008}
3009
2922int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 3010int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2923 get_extent_t *get_extent, int mirror_num) 3011 get_extent_t *get_extent, int mirror_num)
2924{ 3012{
@@ -2933,6 +3021,20 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2933 return ret; 3021 return ret;
2934} 3022}
2935 3023
3024int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
3025 get_extent_t *get_extent, int mirror_num)
3026{
3027 struct bio *bio = NULL;
3028 unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
3029 int ret;
3030
3031 ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
3032 &bio_flags, READ);
3033 if (bio)
3034 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
3035 return ret;
3036}
3037
2936static noinline void update_nr_written(struct page *page, 3038static noinline void update_nr_written(struct page *page,
2937 struct writeback_control *wbc, 3039 struct writeback_control *wbc,
2938 unsigned long nr_written) 3040 unsigned long nr_written)
@@ -3189,8 +3291,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3189 if (!PageWriteback(page)) { 3291 if (!PageWriteback(page)) {
3190 printk(KERN_ERR "btrfs warning page %lu not " 3292 printk(KERN_ERR "btrfs warning page %lu not "
3191 "writeback, cur %llu end %llu\n", 3293 "writeback, cur %llu end %llu\n",
3192 page->index, (unsigned long long)cur, 3294 page->index, cur, end);
3193 (unsigned long long)end);
3194 } 3295 }
3195 3296
3196 ret = submit_extent_page(write_flags, tree, page, 3297 ret = submit_extent_page(write_flags, tree, page,
@@ -3769,7 +3870,7 @@ int extent_readpages(struct extent_io_tree *tree,
3769 unsigned long bio_flags = 0; 3870 unsigned long bio_flags = 0;
3770 struct page *pagepool[16]; 3871 struct page *pagepool[16];
3771 struct page *page; 3872 struct page *page;
3772 int i = 0; 3873 struct extent_map *em_cached = NULL;
3773 int nr = 0; 3874 int nr = 0;
3774 3875
3775 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 3876 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
@@ -3786,18 +3887,16 @@ int extent_readpages(struct extent_io_tree *tree,
3786 pagepool[nr++] = page; 3887 pagepool[nr++] = page;
3787 if (nr < ARRAY_SIZE(pagepool)) 3888 if (nr < ARRAY_SIZE(pagepool))
3788 continue; 3889 continue;
3789 for (i = 0; i < nr; i++) { 3890 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
3790 __extent_read_full_page(tree, pagepool[i], get_extent, 3891 &bio, 0, &bio_flags, READ);
3791 &bio, 0, &bio_flags, READ);
3792 page_cache_release(pagepool[i]);
3793 }
3794 nr = 0; 3892 nr = 0;
3795 } 3893 }
3796 for (i = 0; i < nr; i++) { 3894 if (nr)
3797 __extent_read_full_page(tree, pagepool[i], get_extent, 3895 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
3798 &bio, 0, &bio_flags, READ); 3896 &bio, 0, &bio_flags, READ);
3799 page_cache_release(pagepool[i]); 3897
3800 } 3898 if (em_cached)
3899 free_extent_map(em_cached);
3801 3900
3802 BUG_ON(!list_empty(pages)); 3901 BUG_ON(!list_empty(pages));
3803 if (bio) 3902 if (bio)
@@ -4136,6 +4235,76 @@ static void __free_extent_buffer(struct extent_buffer *eb)
4136 kmem_cache_free(extent_buffer_cache, eb); 4235 kmem_cache_free(extent_buffer_cache, eb);
4137} 4236}
4138 4237
4238static int extent_buffer_under_io(struct extent_buffer *eb)
4239{
4240 return (atomic_read(&eb->io_pages) ||
4241 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4242 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4243}
4244
4245/*
4246 * Helper for releasing extent buffer page.
4247 */
4248static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
4249 unsigned long start_idx)
4250{
4251 unsigned long index;
4252 unsigned long num_pages;
4253 struct page *page;
4254 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4255
4256 BUG_ON(extent_buffer_under_io(eb));
4257
4258 num_pages = num_extent_pages(eb->start, eb->len);
4259 index = start_idx + num_pages;
4260 if (start_idx >= index)
4261 return;
4262
4263 do {
4264 index--;
4265 page = extent_buffer_page(eb, index);
4266 if (page && mapped) {
4267 spin_lock(&page->mapping->private_lock);
4268 /*
4269 * We do this since we'll remove the pages after we've
4270 * removed the eb from the radix tree, so we could race
4271 * and have this page now attached to the new eb. So
4272 * only clear page_private if it's still connected to
4273 * this eb.
4274 */
4275 if (PagePrivate(page) &&
4276 page->private == (unsigned long)eb) {
4277 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4278 BUG_ON(PageDirty(page));
4279 BUG_ON(PageWriteback(page));
4280 /*
4281 * We need to make sure we haven't be attached
4282 * to a new eb.
4283 */
4284 ClearPagePrivate(page);
4285 set_page_private(page, 0);
4286 /* One for the page private */
4287 page_cache_release(page);
4288 }
4289 spin_unlock(&page->mapping->private_lock);
4290
4291 }
4292 if (page) {
4293 /* One for when we alloced the page */
4294 page_cache_release(page);
4295 }
4296 } while (index != start_idx);
4297}
4298
4299/*
4300 * Helper for releasing the extent buffer.
4301 */
4302static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4303{
4304 btrfs_release_extent_buffer_page(eb, 0);
4305 __free_extent_buffer(eb);
4306}
4307
4139static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, 4308static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
4140 u64 start, 4309 u64 start,
4141 unsigned long len, 4310 unsigned long len,
@@ -4184,13 +4353,16 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4184 struct extent_buffer *new; 4353 struct extent_buffer *new;
4185 unsigned long num_pages = num_extent_pages(src->start, src->len); 4354 unsigned long num_pages = num_extent_pages(src->start, src->len);
4186 4355
4187 new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC); 4356 new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS);
4188 if (new == NULL) 4357 if (new == NULL)
4189 return NULL; 4358 return NULL;
4190 4359
4191 for (i = 0; i < num_pages; i++) { 4360 for (i = 0; i < num_pages; i++) {
4192 p = alloc_page(GFP_ATOMIC); 4361 p = alloc_page(GFP_NOFS);
4193 BUG_ON(!p); 4362 if (!p) {
4363 btrfs_release_extent_buffer(new);
4364 return NULL;
4365 }
4194 attach_extent_buffer_page(new, p); 4366 attach_extent_buffer_page(new, p);
4195 WARN_ON(PageDirty(p)); 4367 WARN_ON(PageDirty(p));
4196 SetPageUptodate(p); 4368 SetPageUptodate(p);
@@ -4210,12 +4382,12 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
4210 unsigned long num_pages = num_extent_pages(0, len); 4382 unsigned long num_pages = num_extent_pages(0, len);
4211 unsigned long i; 4383 unsigned long i;
4212 4384
4213 eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); 4385 eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS);
4214 if (!eb) 4386 if (!eb)
4215 return NULL; 4387 return NULL;
4216 4388
4217 for (i = 0; i < num_pages; i++) { 4389 for (i = 0; i < num_pages; i++) {
4218 eb->pages[i] = alloc_page(GFP_ATOMIC); 4390 eb->pages[i] = alloc_page(GFP_NOFS);
4219 if (!eb->pages[i]) 4391 if (!eb->pages[i])
4220 goto err; 4392 goto err;
4221 } 4393 }
@@ -4231,76 +4403,6 @@ err:
4231 return NULL; 4403 return NULL;
4232} 4404}
4233 4405
4234static int extent_buffer_under_io(struct extent_buffer *eb)
4235{
4236 return (atomic_read(&eb->io_pages) ||
4237 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4238 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4239}
4240
4241/*
4242 * Helper for releasing extent buffer page.
4243 */
4244static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
4245 unsigned long start_idx)
4246{
4247 unsigned long index;
4248 unsigned long num_pages;
4249 struct page *page;
4250 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4251
4252 BUG_ON(extent_buffer_under_io(eb));
4253
4254 num_pages = num_extent_pages(eb->start, eb->len);
4255 index = start_idx + num_pages;
4256 if (start_idx >= index)
4257 return;
4258
4259 do {
4260 index--;
4261 page = extent_buffer_page(eb, index);
4262 if (page && mapped) {
4263 spin_lock(&page->mapping->private_lock);
4264 /*
4265 * We do this since we'll remove the pages after we've
4266 * removed the eb from the radix tree, so we could race
4267 * and have this page now attached to the new eb. So
4268 * only clear page_private if it's still connected to
4269 * this eb.
4270 */
4271 if (PagePrivate(page) &&
4272 page->private == (unsigned long)eb) {
4273 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4274 BUG_ON(PageDirty(page));
4275 BUG_ON(PageWriteback(page));
4276 /*
4277 * We need to make sure we haven't be attached
4278 * to a new eb.
4279 */
4280 ClearPagePrivate(page);
4281 set_page_private(page, 0);
4282 /* One for the page private */
4283 page_cache_release(page);
4284 }
4285 spin_unlock(&page->mapping->private_lock);
4286
4287 }
4288 if (page) {
4289 /* One for when we alloced the page */
4290 page_cache_release(page);
4291 }
4292 } while (index != start_idx);
4293}
4294
4295/*
4296 * Helper for releasing the extent buffer.
4297 */
4298static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4299{
4300 btrfs_release_extent_buffer_page(eb, 0);
4301 __free_extent_buffer(eb);
4302}
4303
4304static void check_buffer_tree_ref(struct extent_buffer *eb) 4406static void check_buffer_tree_ref(struct extent_buffer *eb)
4305{ 4407{
4306 int refs; 4408 int refs;
@@ -4771,7 +4873,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
4771 WARN_ON(start > eb->len); 4873 WARN_ON(start > eb->len);
4772 WARN_ON(start + len > eb->start + eb->len); 4874 WARN_ON(start + len > eb->start + eb->len);
4773 4875
4774 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 4876 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4775 4877
4776 while (len > 0) { 4878 while (len > 0) {
4777 page = extent_buffer_page(eb, i); 4879 page = extent_buffer_page(eb, i);
@@ -4813,8 +4915,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
4813 4915
4814 if (start + min_len > eb->len) { 4916 if (start + min_len > eb->len) {
4815 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, " 4917 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
4816 "wanted %lu %lu\n", (unsigned long long)eb->start, 4918 "wanted %lu %lu\n",
4817 eb->len, start, min_len); 4919 eb->start, eb->len, start, min_len);
4818 return -EINVAL; 4920 return -EINVAL;
4819 } 4921 }
4820 4922
@@ -4841,7 +4943,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
4841 WARN_ON(start > eb->len); 4943 WARN_ON(start > eb->len);
4842 WARN_ON(start + len > eb->start + eb->len); 4944 WARN_ON(start + len > eb->start + eb->len);
4843 4945
4844 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 4946 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4845 4947
4846 while (len > 0) { 4948 while (len > 0) {
4847 page = extent_buffer_page(eb, i); 4949 page = extent_buffer_page(eb, i);
@@ -4875,7 +4977,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
4875 WARN_ON(start > eb->len); 4977 WARN_ON(start > eb->len);
4876 WARN_ON(start + len > eb->start + eb->len); 4978 WARN_ON(start + len > eb->start + eb->len);
4877 4979
4878 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 4980 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4879 4981
4880 while (len > 0) { 4982 while (len > 0) {
4881 page = extent_buffer_page(eb, i); 4983 page = extent_buffer_page(eb, i);
@@ -4905,7 +5007,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
4905 WARN_ON(start > eb->len); 5007 WARN_ON(start > eb->len);
4906 WARN_ON(start + len > eb->start + eb->len); 5008 WARN_ON(start + len > eb->start + eb->len);
4907 5009
4908 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 5010 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4909 5011
4910 while (len > 0) { 5012 while (len > 0) {
4911 page = extent_buffer_page(eb, i); 5013 page = extent_buffer_page(eb, i);
@@ -4936,7 +5038,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
4936 WARN_ON(src->len != dst_len); 5038 WARN_ON(src->len != dst_len);
4937 5039
4938 offset = (start_offset + dst_offset) & 5040 offset = (start_offset + dst_offset) &
4939 ((unsigned long)PAGE_CACHE_SIZE - 1); 5041 (PAGE_CACHE_SIZE - 1);
4940 5042
4941 while (len > 0) { 5043 while (len > 0) {
4942 page = extent_buffer_page(dst, i); 5044 page = extent_buffer_page(dst, i);
@@ -5022,9 +5124,9 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5022 5124
5023 while (len > 0) { 5125 while (len > 0) {
5024 dst_off_in_page = (start_offset + dst_offset) & 5126 dst_off_in_page = (start_offset + dst_offset) &
5025 ((unsigned long)PAGE_CACHE_SIZE - 1); 5127 (PAGE_CACHE_SIZE - 1);
5026 src_off_in_page = (start_offset + src_offset) & 5128 src_off_in_page = (start_offset + src_offset) &
5027 ((unsigned long)PAGE_CACHE_SIZE - 1); 5129 (PAGE_CACHE_SIZE - 1);
5028 5130
5029 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 5131 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
5030 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; 5132 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
@@ -5075,9 +5177,9 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5075 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; 5177 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
5076 5178
5077 dst_off_in_page = (start_offset + dst_end) & 5179 dst_off_in_page = (start_offset + dst_end) &
5078 ((unsigned long)PAGE_CACHE_SIZE - 1); 5180 (PAGE_CACHE_SIZE - 1);
5079 src_off_in_page = (start_offset + src_end) & 5181 src_off_in_page = (start_offset + src_end) &
5080 ((unsigned long)PAGE_CACHE_SIZE - 1); 5182 (PAGE_CACHE_SIZE - 1);
5081 5183
5082 cur = min_t(unsigned long, len, src_off_in_page + 1); 5184 cur = min_t(unsigned long, len, src_off_in_page + 1);
5083 cur = min(cur, dst_off_in_page + 1); 5185 cur = min(cur, dst_off_in_page + 1);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 3b8c4e26e1da..6dbc645f1f3d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -29,6 +29,7 @@
29 */ 29 */
30#define EXTENT_BIO_COMPRESSED 1 30#define EXTENT_BIO_COMPRESSED 1
31#define EXTENT_BIO_TREE_LOG 2 31#define EXTENT_BIO_TREE_LOG 2
32#define EXTENT_BIO_PARENT_LOCKED 4
32#define EXTENT_BIO_FLAG_SHIFT 16 33#define EXTENT_BIO_FLAG_SHIFT 16
33 34
34/* these are bit numbers for test/set bit */ 35/* these are bit numbers for test/set bit */
@@ -44,14 +45,11 @@
44#define EXTENT_BUFFER_DUMMY 9 45#define EXTENT_BUFFER_DUMMY 9
45 46
46/* these are flags for extent_clear_unlock_delalloc */ 47/* these are flags for extent_clear_unlock_delalloc */
47#define EXTENT_CLEAR_UNLOCK_PAGE 0x1 48#define PAGE_UNLOCK (1 << 0)
48#define EXTENT_CLEAR_UNLOCK 0x2 49#define PAGE_CLEAR_DIRTY (1 << 1)
49#define EXTENT_CLEAR_DELALLOC 0x4 50#define PAGE_SET_WRITEBACK (1 << 2)
50#define EXTENT_CLEAR_DIRTY 0x8 51#define PAGE_END_WRITEBACK (1 << 3)
51#define EXTENT_SET_WRITEBACK 0x10 52#define PAGE_SET_PRIVATE2 (1 << 4)
52#define EXTENT_END_WRITEBACK 0x20
53#define EXTENT_SET_PRIVATE2 0x40
54#define EXTENT_CLEAR_ACCOUNTING 0x80
55 53
56/* 54/*
57 * page->private values. Every page that is controlled by the extent 55 * page->private values. Every page that is controlled by the extent
@@ -62,6 +60,7 @@
62 60
63struct extent_state; 61struct extent_state;
64struct btrfs_root; 62struct btrfs_root;
63struct btrfs_io_bio;
65 64
66typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, 65typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
67 struct bio *bio, int mirror_num, 66 struct bio *bio, int mirror_num,
@@ -77,8 +76,9 @@ struct extent_io_ops {
77 size_t size, struct bio *bio, 76 size_t size, struct bio *bio,
78 unsigned long bio_flags); 77 unsigned long bio_flags);
79 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); 78 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
80 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, 79 int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
81 struct extent_state *state, int mirror); 80 struct page *page, u64 start, u64 end,
81 int mirror);
82 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 82 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
83 struct extent_state *state, int uptodate); 83 struct extent_state *state, int uptodate);
84 void (*set_bit_hook)(struct inode *inode, struct extent_state *state, 84 void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
@@ -200,6 +200,8 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
200int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); 200int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
201int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 201int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
202 get_extent_t *get_extent, int mirror_num); 202 get_extent_t *get_extent, int mirror_num);
203int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
204 get_extent_t *get_extent, int mirror_num);
203int __init extent_io_init(void); 205int __init extent_io_init(void);
204void extent_io_exit(void); 206void extent_io_exit(void);
205 207
@@ -261,11 +263,6 @@ int extent_readpages(struct extent_io_tree *tree,
261 get_extent_t get_extent); 263 get_extent_t get_extent);
262int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 264int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
263 __u64 start, __u64 len, get_extent_t *get_extent); 265 __u64 start, __u64 len, get_extent_t *get_extent);
264int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
265void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
266 int count);
267void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio,
268 int bvec_index, u32 csums[], int count);
269int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); 266int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
270void set_page_extent_mapped(struct page *page); 267void set_page_extent_mapped(struct page *page);
271 268
@@ -330,10 +327,10 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
330 unsigned long *map_len); 327 unsigned long *map_len);
331int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); 328int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
332int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); 329int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
333int extent_clear_unlock_delalloc(struct inode *inode, 330int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
334 struct extent_io_tree *tree, 331 struct page *locked_page,
335 u64 start, u64 end, struct page *locked_page, 332 unsigned long bits_to_clear,
336 unsigned long op); 333 unsigned long page_ops);
337struct bio * 334struct bio *
338btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 335btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
339 gfp_t gfp_flags); 336 gfp_t gfp_flags);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a7bfc9541803..4f53159bdb9d 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -23,6 +23,7 @@
23#include "ctree.h" 23#include "ctree.h"
24#include "disk-io.h" 24#include "disk-io.h"
25#include "transaction.h" 25#include "transaction.h"
26#include "volumes.h"
26#include "print-tree.h" 27#include "print-tree.h"
27 28
28#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ 29#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
@@ -152,28 +153,54 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
152 return ret; 153 return ret;
153} 154}
154 155
156static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
157{
158 kfree(bio->csum_allocated);
159}
160
155static int __btrfs_lookup_bio_sums(struct btrfs_root *root, 161static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
156 struct inode *inode, struct bio *bio, 162 struct inode *inode, struct bio *bio,
157 u64 logical_offset, u32 *dst, int dio) 163 u64 logical_offset, u32 *dst, int dio)
158{ 164{
159 u32 sum[16];
160 int len;
161 struct bio_vec *bvec = bio->bi_io_vec; 165 struct bio_vec *bvec = bio->bi_io_vec;
162 int bio_index = 0; 166 struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
167 struct btrfs_csum_item *item = NULL;
168 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
169 struct btrfs_path *path;
170 u8 *csum;
163 u64 offset = 0; 171 u64 offset = 0;
164 u64 item_start_offset = 0; 172 u64 item_start_offset = 0;
165 u64 item_last_offset = 0; 173 u64 item_last_offset = 0;
166 u64 disk_bytenr; 174 u64 disk_bytenr;
167 u32 diff; 175 u32 diff;
168 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 176 int nblocks;
177 int bio_index = 0;
169 int count; 178 int count;
170 struct btrfs_path *path; 179 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
171 struct btrfs_csum_item *item = NULL;
172 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
173 180
174 path = btrfs_alloc_path(); 181 path = btrfs_alloc_path();
175 if (!path) 182 if (!path)
176 return -ENOMEM; 183 return -ENOMEM;
184
185 nblocks = bio->bi_size >> inode->i_sb->s_blocksize_bits;
186 if (!dst) {
187 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
188 btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size,
189 GFP_NOFS);
190 if (!btrfs_bio->csum_allocated) {
191 btrfs_free_path(path);
192 return -ENOMEM;
193 }
194 btrfs_bio->csum = btrfs_bio->csum_allocated;
195 btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
196 } else {
197 btrfs_bio->csum = btrfs_bio->csum_inline;
198 }
199 csum = btrfs_bio->csum;
200 } else {
201 csum = (u8 *)dst;
202 }
203
177 if (bio->bi_size > PAGE_CACHE_SIZE * 8) 204 if (bio->bi_size > PAGE_CACHE_SIZE * 8)
178 path->reada = 2; 205 path->reada = 2;
179 206
@@ -194,11 +221,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
194 if (dio) 221 if (dio)
195 offset = logical_offset; 222 offset = logical_offset;
196 while (bio_index < bio->bi_vcnt) { 223 while (bio_index < bio->bi_vcnt) {
197 len = min_t(int, ARRAY_SIZE(sum), bio->bi_vcnt - bio_index);
198 if (!dio) 224 if (!dio)
199 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 225 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
200 count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, sum, 226 count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
201 len); 227 (u32 *)csum, nblocks);
202 if (count) 228 if (count)
203 goto found; 229 goto found;
204 230
@@ -213,7 +239,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
213 path, disk_bytenr, 0); 239 path, disk_bytenr, 0);
214 if (IS_ERR(item)) { 240 if (IS_ERR(item)) {
215 count = 1; 241 count = 1;
216 sum[0] = 0; 242 memset(csum, 0, csum_size);
217 if (BTRFS_I(inode)->root->root_key.objectid == 243 if (BTRFS_I(inode)->root->root_key.objectid ==
218 BTRFS_DATA_RELOC_TREE_OBJECTID) { 244 BTRFS_DATA_RELOC_TREE_OBJECTID) {
219 set_extent_bits(io_tree, offset, 245 set_extent_bits(io_tree, offset,
@@ -222,9 +248,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
222 } else { 248 } else {
223 printk(KERN_INFO "btrfs no csum found " 249 printk(KERN_INFO "btrfs no csum found "
224 "for inode %llu start %llu\n", 250 "for inode %llu start %llu\n",
225 (unsigned long long) 251 btrfs_ino(inode), offset);
226 btrfs_ino(inode),
227 (unsigned long long)offset);
228 } 252 }
229 item = NULL; 253 item = NULL;
230 btrfs_release_path(path); 254 btrfs_release_path(path);
@@ -249,23 +273,14 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
249 diff = disk_bytenr - item_start_offset; 273 diff = disk_bytenr - item_start_offset;
250 diff = diff / root->sectorsize; 274 diff = diff / root->sectorsize;
251 diff = diff * csum_size; 275 diff = diff * csum_size;
252 count = min_t(int, len, (item_last_offset - disk_bytenr) >> 276 count = min_t(int, nblocks, (item_last_offset - disk_bytenr) >>
253 inode->i_sb->s_blocksize_bits); 277 inode->i_sb->s_blocksize_bits);
254 read_extent_buffer(path->nodes[0], sum, 278 read_extent_buffer(path->nodes[0], csum,
255 ((unsigned long)item) + diff, 279 ((unsigned long)item) + diff,
256 csum_size * count); 280 csum_size * count);
257found: 281found:
258 if (dst) { 282 csum += count * csum_size;
259 memcpy(dst, sum, count * csum_size); 283 nblocks -= count;
260 dst += count;
261 } else {
262 if (dio)
263 extent_cache_csums_dio(io_tree, offset, sum,
264 count);
265 else
266 extent_cache_csums(io_tree, bio, bio_index, sum,
267 count);
268 }
269 while (count--) { 284 while (count--) {
270 disk_bytenr += bvec->bv_len; 285 disk_bytenr += bvec->bv_len;
271 offset += bvec->bv_len; 286 offset += bvec->bv_len;
@@ -284,9 +299,19 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
284} 299}
285 300
286int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, 301int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
287 struct bio *bio, u64 offset) 302 struct btrfs_dio_private *dip, struct bio *bio,
303 u64 offset)
288{ 304{
289 return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1); 305 int len = (bio->bi_sector << 9) - dip->disk_bytenr;
306 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
307 int ret;
308
309 len >>= inode->i_sb->s_blocksize_bits;
310 len *= csum_size;
311
312 ret = __btrfs_lookup_bio_sums(root, inode, bio, offset,
313 (u32 *)(dip->csum + len), 1);
314 return ret;
290} 315}
291 316
292int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 317int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4d2eb6417145..bc5072b2db53 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1334,7 +1334,6 @@ fail:
1334static noinline int check_can_nocow(struct inode *inode, loff_t pos, 1334static noinline int check_can_nocow(struct inode *inode, loff_t pos,
1335 size_t *write_bytes) 1335 size_t *write_bytes)
1336{ 1336{
1337 struct btrfs_trans_handle *trans;
1338 struct btrfs_root *root = BTRFS_I(inode)->root; 1337 struct btrfs_root *root = BTRFS_I(inode)->root;
1339 struct btrfs_ordered_extent *ordered; 1338 struct btrfs_ordered_extent *ordered;
1340 u64 lockstart, lockend; 1339 u64 lockstart, lockend;
@@ -1356,16 +1355,8 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
1356 btrfs_put_ordered_extent(ordered); 1355 btrfs_put_ordered_extent(ordered);
1357 } 1356 }
1358 1357
1359 trans = btrfs_join_transaction(root);
1360 if (IS_ERR(trans)) {
1361 unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1362 return PTR_ERR(trans);
1363 }
1364
1365 num_bytes = lockend - lockstart + 1; 1358 num_bytes = lockend - lockstart + 1;
1366 ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL, 1359 ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
1367 NULL);
1368 btrfs_end_transaction(trans, root);
1369 if (ret <= 0) { 1360 if (ret <= 0) {
1370 ret = 0; 1361 ret = 0;
1371 } else { 1362 } else {
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index b21a3cd667d8..ef3bea7bb257 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -308,7 +308,7 @@ static void io_ctl_unmap_page(struct io_ctl *io_ctl)
308 308
309static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) 309static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
310{ 310{
311 BUG_ON(io_ctl->index >= io_ctl->num_pages); 311 ASSERT(io_ctl->index < io_ctl->num_pages);
312 io_ctl->page = io_ctl->pages[io_ctl->index++]; 312 io_ctl->page = io_ctl->pages[io_ctl->index++];
313 io_ctl->cur = kmap(io_ctl->page); 313 io_ctl->cur = kmap(io_ctl->page);
314 io_ctl->orig = io_ctl->cur; 314 io_ctl->orig = io_ctl->cur;
@@ -673,8 +673,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
673 btrfs_err(root->fs_info, 673 btrfs_err(root->fs_info,
674 "free space inode generation (%llu) " 674 "free space inode generation (%llu) "
675 "did not match free space cache generation (%llu)", 675 "did not match free space cache generation (%llu)",
676 (unsigned long long)BTRFS_I(inode)->generation, 676 BTRFS_I(inode)->generation, generation);
677 (unsigned long long)generation);
678 return 0; 677 return 0;
679 } 678 }
680 679
@@ -729,7 +728,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
729 goto free_cache; 728 goto free_cache;
730 } 729 }
731 } else { 730 } else {
732 BUG_ON(!num_bitmaps); 731 ASSERT(num_bitmaps);
733 num_bitmaps--; 732 num_bitmaps--;
734 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 733 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
735 if (!e->bitmap) { 734 if (!e->bitmap) {
@@ -1029,7 +1028,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1029 leaf = path->nodes[0]; 1028 leaf = path->nodes[0];
1030 if (ret > 0) { 1029 if (ret > 0) {
1031 struct btrfs_key found_key; 1030 struct btrfs_key found_key;
1032 BUG_ON(!path->slots[0]); 1031 ASSERT(path->slots[0]);
1033 path->slots[0]--; 1032 path->slots[0]--;
1034 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1033 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1035 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1034 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
@@ -1117,7 +1116,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1117static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1116static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1118 u64 offset) 1117 u64 offset)
1119{ 1118{
1120 BUG_ON(offset < bitmap_start); 1119 ASSERT(offset >= bitmap_start);
1121 offset -= bitmap_start; 1120 offset -= bitmap_start;
1122 return (unsigned long)(div_u64(offset, unit)); 1121 return (unsigned long)(div_u64(offset, unit));
1123} 1122}
@@ -1272,7 +1271,7 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl,
1272 if (n) { 1271 if (n) {
1273 entry = rb_entry(n, struct btrfs_free_space, 1272 entry = rb_entry(n, struct btrfs_free_space,
1274 offset_index); 1273 offset_index);
1275 BUG_ON(entry->offset > offset); 1274 ASSERT(entry->offset <= offset);
1276 } else { 1275 } else {
1277 if (fuzzy) 1276 if (fuzzy)
1278 return entry; 1277 return entry;
@@ -1336,7 +1335,7 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl,
1336{ 1335{
1337 int ret = 0; 1336 int ret = 0;
1338 1337
1339 BUG_ON(!info->bitmap && !info->bytes); 1338 ASSERT(info->bytes || info->bitmap);
1340 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1339 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1341 &info->offset_index, (info->bitmap != NULL)); 1340 &info->offset_index, (info->bitmap != NULL));
1342 if (ret) 1341 if (ret)
@@ -1359,7 +1358,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1359 1358
1360 max_bitmaps = max(max_bitmaps, 1); 1359 max_bitmaps = max(max_bitmaps, 1);
1361 1360
1362 BUG_ON(ctl->total_bitmaps > max_bitmaps); 1361 ASSERT(ctl->total_bitmaps <= max_bitmaps);
1363 1362
1364 /* 1363 /*
1365 * The goal is to keep the total amount of memory used per 1gb of space 1364 * The goal is to keep the total amount of memory used per 1gb of space
@@ -1403,7 +1402,7 @@ static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1403 1402
1404 start = offset_to_bit(info->offset, ctl->unit, offset); 1403 start = offset_to_bit(info->offset, ctl->unit, offset);
1405 count = bytes_to_bits(bytes, ctl->unit); 1404 count = bytes_to_bits(bytes, ctl->unit);
1406 BUG_ON(start + count > BITS_PER_BITMAP); 1405 ASSERT(start + count <= BITS_PER_BITMAP);
1407 1406
1408 bitmap_clear(info->bitmap, start, count); 1407 bitmap_clear(info->bitmap, start, count);
1409 1408
@@ -1426,7 +1425,7 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1426 1425
1427 start = offset_to_bit(info->offset, ctl->unit, offset); 1426 start = offset_to_bit(info->offset, ctl->unit, offset);
1428 count = bytes_to_bits(bytes, ctl->unit); 1427 count = bytes_to_bits(bytes, ctl->unit);
1429 BUG_ON(start + count > BITS_PER_BITMAP); 1428 ASSERT(start + count <= BITS_PER_BITMAP);
1430 1429
1431 bitmap_set(info->bitmap, start, count); 1430 bitmap_set(info->bitmap, start, count);
1432 1431
@@ -1742,7 +1741,7 @@ no_cluster_bitmap:
1742 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1741 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1743 1, 0); 1742 1, 0);
1744 if (!bitmap_info) { 1743 if (!bitmap_info) {
1745 BUG_ON(added); 1744 ASSERT(added == 0);
1746 goto new_bitmap; 1745 goto new_bitmap;
1747 } 1746 }
1748 1747
@@ -1882,7 +1881,7 @@ out:
1882 1881
1883 if (ret) { 1882 if (ret) {
1884 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1883 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1885 BUG_ON(ret == -EEXIST); 1884 ASSERT(ret != -EEXIST);
1886 } 1885 }
1887 1886
1888 return ret; 1887 return ret;
@@ -1991,8 +1990,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1991 if (info->bytes >= bytes && !block_group->ro) 1990 if (info->bytes >= bytes && !block_group->ro)
1992 count++; 1991 count++;
1993 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 1992 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1994 (unsigned long long)info->offset, 1993 info->offset, info->bytes,
1995 (unsigned long long)info->bytes,
1996 (info->bitmap) ? "yes" : "no"); 1994 (info->bitmap) ? "yes" : "no");
1997 } 1995 }
1998 printk(KERN_INFO "block group has cluster?: %s\n", 1996 printk(KERN_INFO "block group has cluster?: %s\n",
@@ -2371,7 +2369,7 @@ again:
2371 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2369 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2372 ret = tree_insert_offset(&cluster->root, entry->offset, 2370 ret = tree_insert_offset(&cluster->root, entry->offset,
2373 &entry->offset_index, 1); 2371 &entry->offset_index, 1);
2374 BUG_ON(ret); /* -EEXIST; Logic error */ 2372 ASSERT(!ret); /* -EEXIST; Logic error */
2375 2373
2376 trace_btrfs_setup_cluster(block_group, cluster, 2374 trace_btrfs_setup_cluster(block_group, cluster,
2377 total_found * ctl->unit, 1); 2375 total_found * ctl->unit, 1);
@@ -2464,7 +2462,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2464 ret = tree_insert_offset(&cluster->root, entry->offset, 2462 ret = tree_insert_offset(&cluster->root, entry->offset,
2465 &entry->offset_index, 0); 2463 &entry->offset_index, 0);
2466 total_size += entry->bytes; 2464 total_size += entry->bytes;
2467 BUG_ON(ret); /* -EEXIST; Logic error */ 2465 ASSERT(!ret); /* -EEXIST; Logic error */
2468 } while (node && entry != last); 2466 } while (node && entry != last);
2469 2467
2470 cluster->max_size = max_extent; 2468 cluster->max_size = max_extent;
@@ -2525,8 +2523,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2525 * returns zero and sets up cluster if things worked out, otherwise 2523 * returns zero and sets up cluster if things worked out, otherwise
2526 * it returns -enospc 2524 * it returns -enospc
2527 */ 2525 */
2528int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 2526int btrfs_find_space_cluster(struct btrfs_root *root,
2529 struct btrfs_root *root,
2530 struct btrfs_block_group_cache *block_group, 2527 struct btrfs_block_group_cache *block_group,
2531 struct btrfs_free_cluster *cluster, 2528 struct btrfs_free_cluster *cluster,
2532 u64 offset, u64 bytes, u64 empty_size) 2529 u64 offset, u64 bytes, u64 empty_size)
@@ -2856,7 +2853,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2856 2853
2857 ret = search_bitmap(ctl, entry, &offset, &count); 2854 ret = search_bitmap(ctl, entry, &offset, &count);
2858 /* Logic error; Should be empty if it can't find anything */ 2855 /* Logic error; Should be empty if it can't find anything */
2859 BUG_ON(ret); 2856 ASSERT(!ret);
2860 2857
2861 ino = offset; 2858 ino = offset;
2862 bitmap_clear_bits(ctl, entry, offset, 1); 2859 bitmap_clear_bits(ctl, entry, offset, 1);
@@ -2973,33 +2970,68 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
2973} 2970}
2974 2971
2975#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 2972#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
2976static struct btrfs_block_group_cache *init_test_block_group(void) 2973/*
2974 * Use this if you need to make a bitmap or extent entry specifically, it
2975 * doesn't do any of the merging that add_free_space does, this acts a lot like
2976 * how the free space cache loading stuff works, so you can get really weird
2977 * configurations.
2978 */
2979int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
2980 u64 offset, u64 bytes, bool bitmap)
2977{ 2981{
2978 struct btrfs_block_group_cache *cache; 2982 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
2983 struct btrfs_free_space *info = NULL, *bitmap_info;
2984 void *map = NULL;
2985 u64 bytes_added;
2986 int ret;
2979 2987
2980 cache = kzalloc(sizeof(*cache), GFP_NOFS); 2988again:
2981 if (!cache) 2989 if (!info) {
2982 return NULL; 2990 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
2983 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), 2991 if (!info)
2984 GFP_NOFS); 2992 return -ENOMEM;
2985 if (!cache->free_space_ctl) {
2986 kfree(cache);
2987 return NULL;
2988 } 2993 }
2989 2994
2990 cache->key.objectid = 0; 2995 if (!bitmap) {
2991 cache->key.offset = 1024 * 1024 * 1024; 2996 spin_lock(&ctl->tree_lock);
2992 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 2997 info->offset = offset;
2993 cache->sectorsize = 4096; 2998 info->bytes = bytes;
2999 ret = link_free_space(ctl, info);
3000 spin_unlock(&ctl->tree_lock);
3001 if (ret)
3002 kmem_cache_free(btrfs_free_space_cachep, info);
3003 return ret;
3004 }
3005
3006 if (!map) {
3007 map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
3008 if (!map) {
3009 kmem_cache_free(btrfs_free_space_cachep, info);
3010 return -ENOMEM;
3011 }
3012 }
3013
3014 spin_lock(&ctl->tree_lock);
3015 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
3016 1, 0);
3017 if (!bitmap_info) {
3018 info->bitmap = map;
3019 map = NULL;
3020 add_new_bitmap(ctl, info, offset);
3021 bitmap_info = info;
3022 }
2994 3023
2995 spin_lock_init(&cache->lock); 3024 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
2996 INIT_LIST_HEAD(&cache->list); 3025 bytes -= bytes_added;
2997 INIT_LIST_HEAD(&cache->cluster_list); 3026 offset += bytes_added;
2998 INIT_LIST_HEAD(&cache->new_bg_list); 3027 spin_unlock(&ctl->tree_lock);
2999 3028
3000 btrfs_init_free_space_ctl(cache); 3029 if (bytes)
3030 goto again;
3001 3031
3002 return cache; 3032 if (map)
3033 kfree(map);
3034 return 0;
3003} 3035}
3004 3036
3005/* 3037/*
@@ -3007,8 +3039,8 @@ static struct btrfs_block_group_cache *init_test_block_group(void)
3007 * just used to check the absence of space, so if there is free space in the 3039 * just used to check the absence of space, so if there is free space in the
3008 * range at all we will return 1. 3040 * range at all we will return 1.
3009 */ 3041 */
3010static int check_exists(struct btrfs_block_group_cache *cache, u64 offset, 3042int test_check_exists(struct btrfs_block_group_cache *cache,
3011 u64 bytes) 3043 u64 offset, u64 bytes)
3012{ 3044{
3013 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3045 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
3014 struct btrfs_free_space *info; 3046 struct btrfs_free_space *info;
@@ -3085,411 +3117,4 @@ out:
3085 spin_unlock(&ctl->tree_lock); 3117 spin_unlock(&ctl->tree_lock);
3086 return ret; 3118 return ret;
3087} 3119}
3088 3120#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
3089/*
3090 * Use this if you need to make a bitmap or extent entry specifically, it
3091 * doesn't do any of the merging that add_free_space does, this acts a lot like
3092 * how the free space cache loading stuff works, so you can get really weird
3093 * configurations.
3094 */
3095static int add_free_space_entry(struct btrfs_block_group_cache *cache,
3096 u64 offset, u64 bytes, bool bitmap)
3097{
3098 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
3099 struct btrfs_free_space *info = NULL, *bitmap_info;
3100 void *map = NULL;
3101 u64 bytes_added;
3102 int ret;
3103
3104again:
3105 if (!info) {
3106 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
3107 if (!info)
3108 return -ENOMEM;
3109 }
3110
3111 if (!bitmap) {
3112 spin_lock(&ctl->tree_lock);
3113 info->offset = offset;
3114 info->bytes = bytes;
3115 ret = link_free_space(ctl, info);
3116 spin_unlock(&ctl->tree_lock);
3117 if (ret)
3118 kmem_cache_free(btrfs_free_space_cachep, info);
3119 return ret;
3120 }
3121
3122 if (!map) {
3123 map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
3124 if (!map) {
3125 kmem_cache_free(btrfs_free_space_cachep, info);
3126 return -ENOMEM;
3127 }
3128 }
3129
3130 spin_lock(&ctl->tree_lock);
3131 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
3132 1, 0);
3133 if (!bitmap_info) {
3134 info->bitmap = map;
3135 map = NULL;
3136 add_new_bitmap(ctl, info, offset);
3137 bitmap_info = info;
3138 }
3139
3140 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
3141 bytes -= bytes_added;
3142 offset += bytes_added;
3143 spin_unlock(&ctl->tree_lock);
3144
3145 if (bytes)
3146 goto again;
3147
3148 if (map)
3149 kfree(map);
3150 return 0;
3151}
3152
3153#define test_msg(fmt, ...) printk(KERN_INFO "btrfs: selftest: " fmt, ##__VA_ARGS__)
3154
3155/*
3156 * This test just does basic sanity checking, making sure we can add an exten
3157 * entry and remove space from either end and the middle, and make sure we can
3158 * remove space that covers adjacent extent entries.
3159 */
3160static int test_extents(struct btrfs_block_group_cache *cache)
3161{
3162 int ret = 0;
3163
3164 test_msg("Running extent only tests\n");
3165
3166 /* First just make sure we can remove an entire entry */
3167 ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
3168 if (ret) {
3169 test_msg("Error adding initial extents %d\n", ret);
3170 return ret;
3171 }
3172
3173 ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
3174 if (ret) {
3175 test_msg("Error removing extent %d\n", ret);
3176 return ret;
3177 }
3178
3179 if (check_exists(cache, 0, 4 * 1024 * 1024)) {
3180 test_msg("Full remove left some lingering space\n");
3181 return -1;
3182 }
3183
3184 /* Ok edge and middle cases now */
3185 ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
3186 if (ret) {
3187 test_msg("Error adding half extent %d\n", ret);
3188 return ret;
3189 }
3190
3191 ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
3192 if (ret) {
3193 test_msg("Error removing tail end %d\n", ret);
3194 return ret;
3195 }
3196
3197 ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
3198 if (ret) {
3199 test_msg("Error removing front end %d\n", ret);
3200 return ret;
3201 }
3202
3203 ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
3204 if (ret) {
3205 test_msg("Error removing middle piece %d\n", ret);
3206 return ret;
3207 }
3208
3209 if (check_exists(cache, 0, 1 * 1024 * 1024)) {
3210 test_msg("Still have space at the front\n");
3211 return -1;
3212 }
3213
3214 if (check_exists(cache, 2 * 1024 * 1024, 4096)) {
3215 test_msg("Still have space in the middle\n");
3216 return -1;
3217 }
3218
3219 if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
3220 test_msg("Still have space at the end\n");
3221 return -1;
3222 }
3223
3224 /* Cleanup */
3225 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3226
3227 return 0;
3228}
3229
3230static int test_bitmaps(struct btrfs_block_group_cache *cache)
3231{
3232 u64 next_bitmap_offset;
3233 int ret;
3234
3235 test_msg("Running bitmap only tests\n");
3236
3237 ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
3238 if (ret) {
3239 test_msg("Couldn't create a bitmap entry %d\n", ret);
3240 return ret;
3241 }
3242
3243 ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
3244 if (ret) {
3245 test_msg("Error removing bitmap full range %d\n", ret);
3246 return ret;
3247 }
3248
3249 if (check_exists(cache, 0, 4 * 1024 * 1024)) {
3250 test_msg("Left some space in bitmap\n");
3251 return -1;
3252 }
3253
3254 ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
3255 if (ret) {
3256 test_msg("Couldn't add to our bitmap entry %d\n", ret);
3257 return ret;
3258 }
3259
3260 ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
3261 if (ret) {
3262 test_msg("Couldn't remove middle chunk %d\n", ret);
3263 return ret;
3264 }
3265
3266 /*
3267 * The first bitmap we have starts at offset 0 so the next one is just
3268 * at the end of the first bitmap.
3269 */
3270 next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
3271
3272 /* Test a bit straddling two bitmaps */
3273 ret = add_free_space_entry(cache, next_bitmap_offset -
3274 (2 * 1024 * 1024), 4 * 1024 * 1024, 1);
3275 if (ret) {
3276 test_msg("Couldn't add space that straddles two bitmaps %d\n",
3277 ret);
3278 return ret;
3279 }
3280
3281 ret = btrfs_remove_free_space(cache, next_bitmap_offset -
3282 (1 * 1024 * 1024), 2 * 1024 * 1024);
3283 if (ret) {
3284 test_msg("Couldn't remove overlapping space %d\n", ret);
3285 return ret;
3286 }
3287
3288 if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
3289 2 * 1024 * 1024)) {
3290 test_msg("Left some space when removing overlapping\n");
3291 return -1;
3292 }
3293
3294 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3295
3296 return 0;
3297}
3298
3299/* This is the high grade jackassery */
3300static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
3301{
3302 u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
3303 int ret;
3304
3305 test_msg("Running bitmap and extent tests\n");
3306
3307 /*
3308 * First let's do something simple, an extent at the same offset as the
3309 * bitmap, but the free space completely in the extent and then
3310 * completely in the bitmap.
3311 */
3312 ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
3313 if (ret) {
3314 test_msg("Couldn't create bitmap entry %d\n", ret);
3315 return ret;
3316 }
3317
3318 ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
3319 if (ret) {
3320 test_msg("Couldn't add extent entry %d\n", ret);
3321 return ret;
3322 }
3323
3324 ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
3325 if (ret) {
3326 test_msg("Couldn't remove extent entry %d\n", ret);
3327 return ret;
3328 }
3329
3330 if (check_exists(cache, 0, 1 * 1024 * 1024)) {
3331 test_msg("Left remnants after our remove\n");
3332 return -1;
3333 }
3334
3335 /* Now to add back the extent entry and remove from the bitmap */
3336 ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
3337 if (ret) {
3338 test_msg("Couldn't re-add extent entry %d\n", ret);
3339 return ret;
3340 }
3341
3342 ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
3343 if (ret) {
3344 test_msg("Couldn't remove from bitmap %d\n", ret);
3345 return ret;
3346 }
3347
3348 if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
3349 test_msg("Left remnants in the bitmap\n");
3350 return -1;
3351 }
3352
3353 /*
3354 * Ok so a little more evil, extent entry and bitmap at the same offset,
3355 * removing an overlapping chunk.
3356 */
3357 ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
3358 if (ret) {
3359 test_msg("Couldn't add to a bitmap %d\n", ret);
3360 return ret;
3361 }
3362
3363 ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
3364 if (ret) {
3365 test_msg("Couldn't remove overlapping space %d\n", ret);
3366 return ret;
3367 }
3368
3369 if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
3370 test_msg("Left over peices after removing overlapping\n");
3371 return -1;
3372 }
3373
3374 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3375
3376 /* Now with the extent entry offset into the bitmap */
3377 ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
3378 if (ret) {
3379 test_msg("Couldn't add space to the bitmap %d\n", ret);
3380 return ret;
3381 }
3382
3383 ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
3384 if (ret) {
3385 test_msg("Couldn't add extent to the cache %d\n", ret);
3386 return ret;
3387 }
3388
3389 ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
3390 if (ret) {
3391 test_msg("Problem removing overlapping space %d\n", ret);
3392 return ret;
3393 }
3394
3395 if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
3396 test_msg("Left something behind when removing space");
3397 return -1;
3398 }
3399
3400 /*
3401 * This has blown up in the past, the extent entry starts before the
3402 * bitmap entry, but we're trying to remove an offset that falls
3403 * completely within the bitmap range and is in both the extent entry
3404 * and the bitmap entry, looks like this
3405 *
3406 * [ extent ]
3407 * [ bitmap ]
3408 * [ del ]
3409 */
3410 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3411 ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
3412 4 * 1024 * 1024, 1);
3413 if (ret) {
3414 test_msg("Couldn't add bitmap %d\n", ret);
3415 return ret;
3416 }
3417
3418 ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
3419 5 * 1024 * 1024, 0);
3420 if (ret) {
3421 test_msg("Couldn't add extent entry %d\n", ret);
3422 return ret;
3423 }
3424
3425 ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
3426 5 * 1024 * 1024);
3427 if (ret) {
3428 test_msg("Failed to free our space %d\n", ret);
3429 return ret;
3430 }
3431
3432 if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
3433 5 * 1024 * 1024)) {
3434 test_msg("Left stuff over\n");
3435 return -1;
3436 }
3437
3438 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3439
3440 /*
3441 * This blew up before, we have part of the free space in a bitmap and
3442 * then the entirety of the rest of the space in an extent. This used
3443 * to return -EAGAIN back from btrfs_remove_extent, make sure this
3444 * doesn't happen.
3445 */
3446 ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
3447 if (ret) {
3448 test_msg("Couldn't add bitmap entry %d\n", ret);
3449 return ret;
3450 }
3451
3452 ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
3453 if (ret) {
3454 test_msg("Couldn't add extent entry %d\n", ret);
3455 return ret;
3456 }
3457
3458 ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
3459 if (ret) {
3460 test_msg("Error removing bitmap and extent overlapping %d\n", ret);
3461 return ret;
3462 }
3463
3464 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3465 return 0;
3466}
3467
3468void btrfs_test_free_space_cache(void)
3469{
3470 struct btrfs_block_group_cache *cache;
3471
3472 test_msg("Running btrfs free space cache tests\n");
3473
3474 cache = init_test_block_group();
3475 if (!cache) {
3476 test_msg("Couldn't run the tests\n");
3477 return;
3478 }
3479
3480 if (test_extents(cache))
3481 goto out;
3482 if (test_bitmaps(cache))
3483 goto out;
3484 if (test_bitmaps_and_extents(cache))
3485 goto out;
3486out:
3487 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3488 kfree(cache->free_space_ctl);
3489 kfree(cache);
3490 test_msg("Free space cache tests finished\n");
3491}
3492#undef test_msg
3493#else /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
3494void btrfs_test_free_space_cache(void) {}
3495#endif /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 894116b71304..c74904167476 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -98,8 +98,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
98u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root); 98u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
99void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 99void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
100 u64 bytes); 100 u64 bytes);
101int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 101int btrfs_find_space_cluster(struct btrfs_root *root,
102 struct btrfs_root *root,
103 struct btrfs_block_group_cache *block_group, 102 struct btrfs_block_group_cache *block_group,
104 struct btrfs_free_cluster *cluster, 103 struct btrfs_free_cluster *cluster,
105 u64 offset, u64 bytes, u64 empty_size); 104 u64 offset, u64 bytes, u64 empty_size);
@@ -113,6 +112,12 @@ int btrfs_return_cluster_to_free_space(
113int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 112int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
114 u64 *trimmed, u64 start, u64 end, u64 minlen); 113 u64 *trimmed, u64 start, u64 end, u64 minlen);
115 114
116void btrfs_test_free_space_cache(void); 115/* Support functions for runnint our sanity tests */
116#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
117int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
118 u64 offset, u64 bytes, bool bitmap);
119int test_check_exists(struct btrfs_block_group_cache *cache,
120 u64 offset, u64 bytes);
121#endif
117 122
118#endif 123#endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7bdc83d04d54..db1e43948579 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -230,12 +230,13 @@ fail:
230 * does the checks required to make sure the data is small enough 230 * does the checks required to make sure the data is small enough
231 * to fit as an inline extent. 231 * to fit as an inline extent.
232 */ 232 */
233static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, 233static noinline int cow_file_range_inline(struct btrfs_root *root,
234 struct btrfs_root *root, 234 struct inode *inode, u64 start,
235 struct inode *inode, u64 start, u64 end, 235 u64 end, size_t compressed_size,
236 size_t compressed_size, int compress_type, 236 int compress_type,
237 struct page **compressed_pages) 237 struct page **compressed_pages)
238{ 238{
239 struct btrfs_trans_handle *trans;
239 u64 isize = i_size_read(inode); 240 u64 isize = i_size_read(inode);
240 u64 actual_end = min(end + 1, isize); 241 u64 actual_end = min(end + 1, isize);
241 u64 inline_len = actual_end - start; 242 u64 inline_len = actual_end - start;
@@ -256,9 +257,16 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
256 return 1; 257 return 1;
257 } 258 }
258 259
260 trans = btrfs_join_transaction(root);
261 if (IS_ERR(trans))
262 return PTR_ERR(trans);
263 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
264
259 ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1); 265 ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1);
260 if (ret) 266 if (ret) {
261 return ret; 267 btrfs_abort_transaction(trans, root, ret);
268 goto out;
269 }
262 270
263 if (isize > actual_end) 271 if (isize > actual_end)
264 inline_len = min_t(u64, isize, actual_end); 272 inline_len = min_t(u64, isize, actual_end);
@@ -267,15 +275,18 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
267 compress_type, compressed_pages); 275 compress_type, compressed_pages);
268 if (ret && ret != -ENOSPC) { 276 if (ret && ret != -ENOSPC) {
269 btrfs_abort_transaction(trans, root, ret); 277 btrfs_abort_transaction(trans, root, ret);
270 return ret; 278 goto out;
271 } else if (ret == -ENOSPC) { 279 } else if (ret == -ENOSPC) {
272 return 1; 280 ret = 1;
281 goto out;
273 } 282 }
274 283
275 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); 284 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
276 btrfs_delalloc_release_metadata(inode, end + 1 - start); 285 btrfs_delalloc_release_metadata(inode, end + 1 - start);
277 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 286 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
278 return 0; 287out:
288 btrfs_end_transaction(trans, root);
289 return ret;
279} 290}
280 291
281struct async_extent { 292struct async_extent {
@@ -343,7 +354,6 @@ static noinline int compress_file_range(struct inode *inode,
343 int *num_added) 354 int *num_added)
344{ 355{
345 struct btrfs_root *root = BTRFS_I(inode)->root; 356 struct btrfs_root *root = BTRFS_I(inode)->root;
346 struct btrfs_trans_handle *trans;
347 u64 num_bytes; 357 u64 num_bytes;
348 u64 blocksize = root->sectorsize; 358 u64 blocksize = root->sectorsize;
349 u64 actual_end; 359 u64 actual_end;
@@ -461,45 +471,36 @@ again:
461 } 471 }
462cont: 472cont:
463 if (start == 0) { 473 if (start == 0) {
464 trans = btrfs_join_transaction(root);
465 if (IS_ERR(trans)) {
466 ret = PTR_ERR(trans);
467 trans = NULL;
468 goto cleanup_and_out;
469 }
470 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
471
472 /* lets try to make an inline extent */ 474 /* lets try to make an inline extent */
473 if (ret || total_in < (actual_end - start)) { 475 if (ret || total_in < (actual_end - start)) {
474 /* we didn't compress the entire range, try 476 /* we didn't compress the entire range, try
475 * to make an uncompressed inline extent. 477 * to make an uncompressed inline extent.
476 */ 478 */
477 ret = cow_file_range_inline(trans, root, inode, 479 ret = cow_file_range_inline(root, inode, start, end,
478 start, end, 0, 0, NULL); 480 0, 0, NULL);
479 } else { 481 } else {
480 /* try making a compressed inline extent */ 482 /* try making a compressed inline extent */
481 ret = cow_file_range_inline(trans, root, inode, 483 ret = cow_file_range_inline(root, inode, start, end,
482 start, end,
483 total_compressed, 484 total_compressed,
484 compress_type, pages); 485 compress_type, pages);
485 } 486 }
486 if (ret <= 0) { 487 if (ret <= 0) {
488 unsigned long clear_flags = EXTENT_DELALLOC |
489 EXTENT_DEFRAG;
490 clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
491
487 /* 492 /*
488 * inline extent creation worked or returned error, 493 * inline extent creation worked or returned error,
489 * we don't need to create any more async work items. 494 * we don't need to create any more async work items.
490 * Unlock and free up our temp pages. 495 * Unlock and free up our temp pages.
491 */ 496 */
492 extent_clear_unlock_delalloc(inode, 497 extent_clear_unlock_delalloc(inode, start, end, NULL,
493 &BTRFS_I(inode)->io_tree, 498 clear_flags, PAGE_UNLOCK |
494 start, end, NULL, 499 PAGE_CLEAR_DIRTY |
495 EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 500 PAGE_SET_WRITEBACK |
496 EXTENT_CLEAR_DELALLOC | 501 PAGE_END_WRITEBACK);
497 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
498
499 btrfs_end_transaction(trans, root);
500 goto free_pages_out; 502 goto free_pages_out;
501 } 503 }
502 btrfs_end_transaction(trans, root);
503 } 504 }
504 505
505 if (will_compress) { 506 if (will_compress) {
@@ -590,20 +591,6 @@ free_pages_out:
590 kfree(pages); 591 kfree(pages);
591 592
592 goto out; 593 goto out;
593
594cleanup_and_out:
595 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
596 start, end, NULL,
597 EXTENT_CLEAR_UNLOCK_PAGE |
598 EXTENT_CLEAR_DIRTY |
599 EXTENT_CLEAR_DELALLOC |
600 EXTENT_SET_WRITEBACK |
601 EXTENT_END_WRITEBACK);
602 if (!trans || IS_ERR(trans))
603 btrfs_error(root->fs_info, ret, "Failed to join transaction");
604 else
605 btrfs_abort_transaction(trans, root, ret);
606 goto free_pages_out;
607} 594}
608 595
609/* 596/*
@@ -617,7 +604,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
617{ 604{
618 struct async_extent *async_extent; 605 struct async_extent *async_extent;
619 u64 alloc_hint = 0; 606 u64 alloc_hint = 0;
620 struct btrfs_trans_handle *trans;
621 struct btrfs_key ins; 607 struct btrfs_key ins;
622 struct extent_map *em; 608 struct extent_map *em;
623 struct btrfs_root *root = BTRFS_I(inode)->root; 609 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -678,20 +664,10 @@ retry:
678 lock_extent(io_tree, async_extent->start, 664 lock_extent(io_tree, async_extent->start,
679 async_extent->start + async_extent->ram_size - 1); 665 async_extent->start + async_extent->ram_size - 1);
680 666
681 trans = btrfs_join_transaction(root); 667 ret = btrfs_reserve_extent(root,
682 if (IS_ERR(trans)) {
683 ret = PTR_ERR(trans);
684 } else {
685 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
686 ret = btrfs_reserve_extent(trans, root,
687 async_extent->compressed_size, 668 async_extent->compressed_size,
688 async_extent->compressed_size, 669 async_extent->compressed_size,
689 0, alloc_hint, &ins, 1); 670 0, alloc_hint, &ins, 1);
690 if (ret && ret != -ENOSPC)
691 btrfs_abort_transaction(trans, root, ret);
692 btrfs_end_transaction(trans, root);
693 }
694
695 if (ret) { 671 if (ret) {
696 int i; 672 int i;
697 673
@@ -770,16 +746,12 @@ retry:
770 /* 746 /*
771 * clear dirty, set writeback and unlock the pages. 747 * clear dirty, set writeback and unlock the pages.
772 */ 748 */
773 extent_clear_unlock_delalloc(inode, 749 extent_clear_unlock_delalloc(inode, async_extent->start,
774 &BTRFS_I(inode)->io_tree,
775 async_extent->start,
776 async_extent->start + 750 async_extent->start +
777 async_extent->ram_size - 1, 751 async_extent->ram_size - 1,
778 NULL, EXTENT_CLEAR_UNLOCK_PAGE | 752 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
779 EXTENT_CLEAR_UNLOCK | 753 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
780 EXTENT_CLEAR_DELALLOC | 754 PAGE_SET_WRITEBACK);
781 EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK);
782
783 ret = btrfs_submit_compressed_write(inode, 755 ret = btrfs_submit_compressed_write(inode,
784 async_extent->start, 756 async_extent->start,
785 async_extent->ram_size, 757 async_extent->ram_size,
@@ -798,16 +770,13 @@ out:
798out_free_reserve: 770out_free_reserve:
799 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 771 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
800out_free: 772out_free:
801 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 773 extent_clear_unlock_delalloc(inode, async_extent->start,
802 async_extent->start,
803 async_extent->start + 774 async_extent->start +
804 async_extent->ram_size - 1, 775 async_extent->ram_size - 1,
805 NULL, EXTENT_CLEAR_UNLOCK_PAGE | 776 NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
806 EXTENT_CLEAR_UNLOCK | 777 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
807 EXTENT_CLEAR_DELALLOC | 778 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
808 EXTENT_CLEAR_DIRTY | 779 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
809 EXTENT_SET_WRITEBACK |
810 EXTENT_END_WRITEBACK);
811 kfree(async_extent); 780 kfree(async_extent);
812 goto again; 781 goto again;
813} 782}
@@ -857,14 +826,13 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
857 * required to start IO on it. It may be clean and already done with 826 * required to start IO on it. It may be clean and already done with
858 * IO when we return. 827 * IO when we return.
859 */ 828 */
860static noinline int __cow_file_range(struct btrfs_trans_handle *trans, 829static noinline int cow_file_range(struct inode *inode,
861 struct inode *inode, 830 struct page *locked_page,
862 struct btrfs_root *root, 831 u64 start, u64 end, int *page_started,
863 struct page *locked_page, 832 unsigned long *nr_written,
864 u64 start, u64 end, int *page_started, 833 int unlock)
865 unsigned long *nr_written,
866 int unlock)
867{ 834{
835 struct btrfs_root *root = BTRFS_I(inode)->root;
868 u64 alloc_hint = 0; 836 u64 alloc_hint = 0;
869 u64 num_bytes; 837 u64 num_bytes;
870 unsigned long ram_size; 838 unsigned long ram_size;
@@ -885,29 +853,24 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
885 /* if this is a small write inside eof, kick off defrag */ 853 /* if this is a small write inside eof, kick off defrag */
886 if (num_bytes < 64 * 1024 && 854 if (num_bytes < 64 * 1024 &&
887 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) 855 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
888 btrfs_add_inode_defrag(trans, inode); 856 btrfs_add_inode_defrag(NULL, inode);
889 857
890 if (start == 0) { 858 if (start == 0) {
891 /* lets try to make an inline extent */ 859 /* lets try to make an inline extent */
892 ret = cow_file_range_inline(trans, root, inode, 860 ret = cow_file_range_inline(root, inode, start, end, 0, 0,
893 start, end, 0, 0, NULL); 861 NULL);
894 if (ret == 0) { 862 if (ret == 0) {
895 extent_clear_unlock_delalloc(inode, 863 extent_clear_unlock_delalloc(inode, start, end, NULL,
896 &BTRFS_I(inode)->io_tree, 864 EXTENT_LOCKED | EXTENT_DELALLOC |
897 start, end, NULL, 865 EXTENT_DEFRAG, PAGE_UNLOCK |
898 EXTENT_CLEAR_UNLOCK_PAGE | 866 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
899 EXTENT_CLEAR_UNLOCK | 867 PAGE_END_WRITEBACK);
900 EXTENT_CLEAR_DELALLOC |
901 EXTENT_CLEAR_DIRTY |
902 EXTENT_SET_WRITEBACK |
903 EXTENT_END_WRITEBACK);
904 868
905 *nr_written = *nr_written + 869 *nr_written = *nr_written +
906 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 870 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
907 *page_started = 1; 871 *page_started = 1;
908 goto out; 872 goto out;
909 } else if (ret < 0) { 873 } else if (ret < 0) {
910 btrfs_abort_transaction(trans, root, ret);
911 goto out_unlock; 874 goto out_unlock;
912 } 875 }
913 } 876 }
@@ -922,13 +885,11 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
922 unsigned long op; 885 unsigned long op;
923 886
924 cur_alloc_size = disk_num_bytes; 887 cur_alloc_size = disk_num_bytes;
925 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 888 ret = btrfs_reserve_extent(root, cur_alloc_size,
926 root->sectorsize, 0, alloc_hint, 889 root->sectorsize, 0, alloc_hint,
927 &ins, 1); 890 &ins, 1);
928 if (ret < 0) { 891 if (ret < 0)
929 btrfs_abort_transaction(trans, root, ret);
930 goto out_unlock; 892 goto out_unlock;
931 }
932 893
933 em = alloc_extent_map(); 894 em = alloc_extent_map();
934 if (!em) { 895 if (!em) {
@@ -974,10 +935,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
974 BTRFS_DATA_RELOC_TREE_OBJECTID) { 935 BTRFS_DATA_RELOC_TREE_OBJECTID) {
975 ret = btrfs_reloc_clone_csums(inode, start, 936 ret = btrfs_reloc_clone_csums(inode, start,
976 cur_alloc_size); 937 cur_alloc_size);
977 if (ret) { 938 if (ret)
978 btrfs_abort_transaction(trans, root, ret);
979 goto out_reserve; 939 goto out_reserve;
980 }
981 } 940 }
982 941
983 if (disk_num_bytes < cur_alloc_size) 942 if (disk_num_bytes < cur_alloc_size)
@@ -990,13 +949,13 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
990 * Do set the Private2 bit so we know this page was properly 949 * Do set the Private2 bit so we know this page was properly
991 * setup for writepage 950 * setup for writepage
992 */ 951 */
993 op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0; 952 op = unlock ? PAGE_UNLOCK : 0;
994 op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | 953 op |= PAGE_SET_PRIVATE2;
995 EXTENT_SET_PRIVATE2;
996 954
997 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 955 extent_clear_unlock_delalloc(inode, start,
998 start, start + ram_size - 1, 956 start + ram_size - 1, locked_page,
999 locked_page, op); 957 EXTENT_LOCKED | EXTENT_DELALLOC,
958 op);
1000 disk_num_bytes -= cur_alloc_size; 959 disk_num_bytes -= cur_alloc_size;
1001 num_bytes -= cur_alloc_size; 960 num_bytes -= cur_alloc_size;
1002 alloc_hint = ins.objectid + ins.offset; 961 alloc_hint = ins.objectid + ins.offset;
@@ -1008,52 +967,14 @@ out:
1008out_reserve: 967out_reserve:
1009 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 968 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
1010out_unlock: 969out_unlock:
1011 extent_clear_unlock_delalloc(inode, 970 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1012 &BTRFS_I(inode)->io_tree, 971 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
1013 start, end, locked_page, 972 EXTENT_DELALLOC | EXTENT_DEFRAG,
1014 EXTENT_CLEAR_UNLOCK_PAGE | 973 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
1015 EXTENT_CLEAR_UNLOCK | 974 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
1016 EXTENT_CLEAR_DELALLOC |
1017 EXTENT_CLEAR_DIRTY |
1018 EXTENT_SET_WRITEBACK |
1019 EXTENT_END_WRITEBACK);
1020
1021 goto out; 975 goto out;
1022} 976}
1023 977
1024static noinline int cow_file_range(struct inode *inode,
1025 struct page *locked_page,
1026 u64 start, u64 end, int *page_started,
1027 unsigned long *nr_written,
1028 int unlock)
1029{
1030 struct btrfs_trans_handle *trans;
1031 struct btrfs_root *root = BTRFS_I(inode)->root;
1032 int ret;
1033
1034 trans = btrfs_join_transaction(root);
1035 if (IS_ERR(trans)) {
1036 extent_clear_unlock_delalloc(inode,
1037 &BTRFS_I(inode)->io_tree,
1038 start, end, locked_page,
1039 EXTENT_CLEAR_UNLOCK_PAGE |
1040 EXTENT_CLEAR_UNLOCK |
1041 EXTENT_CLEAR_DELALLOC |
1042 EXTENT_CLEAR_DIRTY |
1043 EXTENT_SET_WRITEBACK |
1044 EXTENT_END_WRITEBACK);
1045 return PTR_ERR(trans);
1046 }
1047 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1048
1049 ret = __cow_file_range(trans, inode, root, locked_page, start, end,
1050 page_started, nr_written, unlock);
1051
1052 btrfs_end_transaction(trans, root);
1053
1054 return ret;
1055}
1056
1057/* 978/*
1058 * work queue call back to started compression on a file and pages 979 * work queue call back to started compression on a file and pages
1059 */ 980 */
@@ -1221,15 +1142,13 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1221 1142
1222 path = btrfs_alloc_path(); 1143 path = btrfs_alloc_path();
1223 if (!path) { 1144 if (!path) {
1224 extent_clear_unlock_delalloc(inode, 1145 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1225 &BTRFS_I(inode)->io_tree, 1146 EXTENT_LOCKED | EXTENT_DELALLOC |
1226 start, end, locked_page, 1147 EXTENT_DO_ACCOUNTING |
1227 EXTENT_CLEAR_UNLOCK_PAGE | 1148 EXTENT_DEFRAG, PAGE_UNLOCK |
1228 EXTENT_CLEAR_UNLOCK | 1149 PAGE_CLEAR_DIRTY |
1229 EXTENT_CLEAR_DELALLOC | 1150 PAGE_SET_WRITEBACK |
1230 EXTENT_CLEAR_DIRTY | 1151 PAGE_END_WRITEBACK);
1231 EXTENT_SET_WRITEBACK |
1232 EXTENT_END_WRITEBACK);
1233 return -ENOMEM; 1152 return -ENOMEM;
1234 } 1153 }
1235 1154
@@ -1241,15 +1160,13 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1241 trans = btrfs_join_transaction(root); 1160 trans = btrfs_join_transaction(root);
1242 1161
1243 if (IS_ERR(trans)) { 1162 if (IS_ERR(trans)) {
1244 extent_clear_unlock_delalloc(inode, 1163 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1245 &BTRFS_I(inode)->io_tree, 1164 EXTENT_LOCKED | EXTENT_DELALLOC |
1246 start, end, locked_page, 1165 EXTENT_DO_ACCOUNTING |
1247 EXTENT_CLEAR_UNLOCK_PAGE | 1166 EXTENT_DEFRAG, PAGE_UNLOCK |
1248 EXTENT_CLEAR_UNLOCK | 1167 PAGE_CLEAR_DIRTY |
1249 EXTENT_CLEAR_DELALLOC | 1168 PAGE_SET_WRITEBACK |
1250 EXTENT_CLEAR_DIRTY | 1169 PAGE_END_WRITEBACK);
1251 EXTENT_SET_WRITEBACK |
1252 EXTENT_END_WRITEBACK);
1253 btrfs_free_path(path); 1170 btrfs_free_path(path);
1254 return PTR_ERR(trans); 1171 return PTR_ERR(trans);
1255 } 1172 }
@@ -1369,9 +1286,9 @@ out_check:
1369 1286
1370 btrfs_release_path(path); 1287 btrfs_release_path(path);
1371 if (cow_start != (u64)-1) { 1288 if (cow_start != (u64)-1) {
1372 ret = __cow_file_range(trans, inode, root, locked_page, 1289 ret = cow_file_range(inode, locked_page,
1373 cow_start, found_key.offset - 1, 1290 cow_start, found_key.offset - 1,
1374 page_started, nr_written, 1); 1291 page_started, nr_written, 1);
1375 if (ret) { 1292 if (ret) {
1376 btrfs_abort_transaction(trans, root, ret); 1293 btrfs_abort_transaction(trans, root, ret);
1377 goto error; 1294 goto error;
@@ -1428,11 +1345,11 @@ out_check:
1428 } 1345 }
1429 } 1346 }
1430 1347
1431 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1348 extent_clear_unlock_delalloc(inode, cur_offset,
1432 cur_offset, cur_offset + num_bytes - 1, 1349 cur_offset + num_bytes - 1,
1433 locked_page, EXTENT_CLEAR_UNLOCK_PAGE | 1350 locked_page, EXTENT_LOCKED |
1434 EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | 1351 EXTENT_DELALLOC, PAGE_UNLOCK |
1435 EXTENT_SET_PRIVATE2); 1352 PAGE_SET_PRIVATE2);
1436 cur_offset = extent_end; 1353 cur_offset = extent_end;
1437 if (cur_offset > end) 1354 if (cur_offset > end)
1438 break; 1355 break;
@@ -1445,9 +1362,8 @@ out_check:
1445 } 1362 }
1446 1363
1447 if (cow_start != (u64)-1) { 1364 if (cow_start != (u64)-1) {
1448 ret = __cow_file_range(trans, inode, root, locked_page, 1365 ret = cow_file_range(inode, locked_page, cow_start, end,
1449 cow_start, end, 1366 page_started, nr_written, 1);
1450 page_started, nr_written, 1);
1451 if (ret) { 1367 if (ret) {
1452 btrfs_abort_transaction(trans, root, ret); 1368 btrfs_abort_transaction(trans, root, ret);
1453 goto error; 1369 goto error;
@@ -1460,16 +1376,13 @@ error:
1460 ret = err; 1376 ret = err;
1461 1377
1462 if (ret && cur_offset < end) 1378 if (ret && cur_offset < end)
1463 extent_clear_unlock_delalloc(inode, 1379 extent_clear_unlock_delalloc(inode, cur_offset, end,
1464 &BTRFS_I(inode)->io_tree, 1380 locked_page, EXTENT_LOCKED |
1465 cur_offset, end, locked_page, 1381 EXTENT_DELALLOC | EXTENT_DEFRAG |
1466 EXTENT_CLEAR_UNLOCK_PAGE | 1382 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1467 EXTENT_CLEAR_UNLOCK | 1383 PAGE_CLEAR_DIRTY |
1468 EXTENT_CLEAR_DELALLOC | 1384 PAGE_SET_WRITEBACK |
1469 EXTENT_CLEAR_DIRTY | 1385 PAGE_END_WRITEBACK);
1470 EXTENT_SET_WRITEBACK |
1471 EXTENT_END_WRITEBACK);
1472
1473 btrfs_free_path(path); 1386 btrfs_free_path(path);
1474 return ret; 1387 return ret;
1475} 1388}
@@ -2132,6 +2045,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2132 WARN_ON(1); 2045 WARN_ON(1);
2133 return ret; 2046 return ret;
2134 } 2047 }
2048 ret = 0;
2135 2049
2136 while (1) { 2050 while (1) {
2137 cond_resched(); 2051 cond_resched();
@@ -2181,8 +2095,6 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2181 old->len || extent_offset + num_bytes <= 2095 old->len || extent_offset + num_bytes <=
2182 old->extent_offset + old->offset) 2096 old->extent_offset + old->offset)
2183 continue; 2097 continue;
2184
2185 ret = 0;
2186 break; 2098 break;
2187 } 2099 }
2188 2100
@@ -2238,16 +2150,18 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
2238 2150
2239static int relink_is_mergable(struct extent_buffer *leaf, 2151static int relink_is_mergable(struct extent_buffer *leaf,
2240 struct btrfs_file_extent_item *fi, 2152 struct btrfs_file_extent_item *fi,
2241 u64 disk_bytenr) 2153 struct new_sa_defrag_extent *new)
2242{ 2154{
2243 if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr) 2155 if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
2244 return 0; 2156 return 0;
2245 2157
2246 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) 2158 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2247 return 0; 2159 return 0;
2248 2160
2249 if (btrfs_file_extent_compression(leaf, fi) || 2161 if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
2250 btrfs_file_extent_encryption(leaf, fi) || 2162 return 0;
2163
2164 if (btrfs_file_extent_encryption(leaf, fi) ||
2251 btrfs_file_extent_other_encoding(leaf, fi)) 2165 btrfs_file_extent_other_encoding(leaf, fi))
2252 return 0; 2166 return 0;
2253 2167
@@ -2391,8 +2305,8 @@ again:
2391 struct btrfs_file_extent_item); 2305 struct btrfs_file_extent_item);
2392 extent_len = btrfs_file_extent_num_bytes(leaf, fi); 2306 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
2393 2307
2394 if (relink_is_mergable(leaf, fi, new->bytenr) && 2308 if (extent_len + found_key.offset == start &&
2395 extent_len + found_key.offset == start) { 2309 relink_is_mergable(leaf, fi, new)) {
2396 btrfs_set_file_extent_num_bytes(leaf, fi, 2310 btrfs_set_file_extent_num_bytes(leaf, fi,
2397 extent_len + len); 2311 extent_len + len);
2398 btrfs_mark_buffer_dirty(leaf); 2312 btrfs_mark_buffer_dirty(leaf);
@@ -2648,8 +2562,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2648 struct extent_state *cached_state = NULL; 2562 struct extent_state *cached_state = NULL;
2649 struct new_sa_defrag_extent *new = NULL; 2563 struct new_sa_defrag_extent *new = NULL;
2650 int compress_type = 0; 2564 int compress_type = 0;
2651 int ret; 2565 int ret = 0;
2566 u64 logical_len = ordered_extent->len;
2652 bool nolock; 2567 bool nolock;
2568 bool truncated = false;
2653 2569
2654 nolock = btrfs_is_free_space_inode(inode); 2570 nolock = btrfs_is_free_space_inode(inode);
2655 2571
@@ -2658,6 +2574,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2658 goto out; 2574 goto out;
2659 } 2575 }
2660 2576
2577 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
2578 truncated = true;
2579 logical_len = ordered_extent->truncated_len;
2580 /* Truncated the entire extent, don't bother adding */
2581 if (!logical_len)
2582 goto out;
2583 }
2584
2661 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 2585 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
2662 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ 2586 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
2663 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 2587 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
@@ -2713,15 +2637,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2713 ret = btrfs_mark_extent_written(trans, inode, 2637 ret = btrfs_mark_extent_written(trans, inode,
2714 ordered_extent->file_offset, 2638 ordered_extent->file_offset,
2715 ordered_extent->file_offset + 2639 ordered_extent->file_offset +
2716 ordered_extent->len); 2640 logical_len);
2717 } else { 2641 } else {
2718 BUG_ON(root == root->fs_info->tree_root); 2642 BUG_ON(root == root->fs_info->tree_root);
2719 ret = insert_reserved_file_extent(trans, inode, 2643 ret = insert_reserved_file_extent(trans, inode,
2720 ordered_extent->file_offset, 2644 ordered_extent->file_offset,
2721 ordered_extent->start, 2645 ordered_extent->start,
2722 ordered_extent->disk_len, 2646 ordered_extent->disk_len,
2723 ordered_extent->len, 2647 logical_len, logical_len,
2724 ordered_extent->len,
2725 compress_type, 0, 0, 2648 compress_type, 0, 0,
2726 BTRFS_FILE_EXTENT_REG); 2649 BTRFS_FILE_EXTENT_REG);
2727 } 2650 }
@@ -2753,17 +2676,27 @@ out:
2753 if (trans) 2676 if (trans)
2754 btrfs_end_transaction(trans, root); 2677 btrfs_end_transaction(trans, root);
2755 2678
2756 if (ret) { 2679 if (ret || truncated) {
2757 clear_extent_uptodate(io_tree, ordered_extent->file_offset, 2680 u64 start, end;
2758 ordered_extent->file_offset + 2681
2759 ordered_extent->len - 1, NULL, GFP_NOFS); 2682 if (truncated)
2683 start = ordered_extent->file_offset + logical_len;
2684 else
2685 start = ordered_extent->file_offset;
2686 end = ordered_extent->file_offset + ordered_extent->len - 1;
2687 clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
2688
2689 /* Drop the cache for the part of the extent we didn't write. */
2690 btrfs_drop_extent_cache(inode, start, end, 0);
2760 2691
2761 /* 2692 /*
2762 * If the ordered extent had an IOERR or something else went 2693 * If the ordered extent had an IOERR or something else went
2763 * wrong we need to return the space for this ordered extent 2694 * wrong we need to return the space for this ordered extent
2764 * back to the allocator. 2695 * back to the allocator. We only free the extent in the
2696 * truncated case if we didn't write out the extent at all.
2765 */ 2697 */
2766 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && 2698 if ((ret || !logical_len) &&
2699 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2767 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) 2700 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
2768 btrfs_free_reserved_extent(root, ordered_extent->start, 2701 btrfs_free_reserved_extent(root, ordered_extent->start,
2769 ordered_extent->disk_len); 2702 ordered_extent->disk_len);
@@ -2827,16 +2760,16 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2827 * if there's a match, we allow the bio to finish. If not, the code in 2760 * if there's a match, we allow the bio to finish. If not, the code in
2828 * extent_io.c will try to find good copies for us. 2761 * extent_io.c will try to find good copies for us.
2829 */ 2762 */
2830static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, 2763static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
2831 struct extent_state *state, int mirror) 2764 u64 phy_offset, struct page *page,
2765 u64 start, u64 end, int mirror)
2832{ 2766{
2833 size_t offset = start - page_offset(page); 2767 size_t offset = start - page_offset(page);
2834 struct inode *inode = page->mapping->host; 2768 struct inode *inode = page->mapping->host;
2835 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2769 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2836 char *kaddr; 2770 char *kaddr;
2837 u64 private = ~(u32)0;
2838 int ret;
2839 struct btrfs_root *root = BTRFS_I(inode)->root; 2771 struct btrfs_root *root = BTRFS_I(inode)->root;
2772 u32 csum_expected;
2840 u32 csum = ~(u32)0; 2773 u32 csum = ~(u32)0;
2841 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, 2774 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
2842 DEFAULT_RATELIMIT_BURST); 2775 DEFAULT_RATELIMIT_BURST);
@@ -2856,19 +2789,13 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
2856 return 0; 2789 return 0;
2857 } 2790 }
2858 2791
2859 if (state && state->start == start) { 2792 phy_offset >>= inode->i_sb->s_blocksize_bits;
2860 private = state->private; 2793 csum_expected = *(((u32 *)io_bio->csum) + phy_offset);
2861 ret = 0;
2862 } else {
2863 ret = get_state_private(io_tree, start, &private);
2864 }
2865 kaddr = kmap_atomic(page);
2866 if (ret)
2867 goto zeroit;
2868 2794
2795 kaddr = kmap_atomic(page);
2869 csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1); 2796 csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1);
2870 btrfs_csum_final(csum, (char *)&csum); 2797 btrfs_csum_final(csum, (char *)&csum);
2871 if (csum != private) 2798 if (csum != csum_expected)
2872 goto zeroit; 2799 goto zeroit;
2873 2800
2874 kunmap_atomic(kaddr); 2801 kunmap_atomic(kaddr);
@@ -2877,14 +2804,12 @@ good:
2877 2804
2878zeroit: 2805zeroit:
2879 if (__ratelimit(&_rs)) 2806 if (__ratelimit(&_rs))
2880 btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu", 2807 btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
2881 (unsigned long long)btrfs_ino(page->mapping->host), 2808 btrfs_ino(page->mapping->host), start, csum, csum_expected);
2882 (unsigned long long)start, csum,
2883 (unsigned long long)private);
2884 memset(kaddr + offset, 1, end - start + 1); 2809 memset(kaddr + offset, 1, end - start + 1);
2885 flush_dcache_page(page); 2810 flush_dcache_page(page);
2886 kunmap_atomic(kaddr); 2811 kunmap_atomic(kaddr);
2887 if (private == 0) 2812 if (csum_expected == 0)
2888 return 0; 2813 return 0;
2889 return -EIO; 2814 return -EIO;
2890} 2815}
@@ -2971,8 +2896,10 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
2971 btrfs_root_refs(&root->root_item) > 0) { 2896 btrfs_root_refs(&root->root_item) > 0) {
2972 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, 2897 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
2973 root->root_key.objectid); 2898 root->root_key.objectid);
2974 BUG_ON(ret); 2899 if (ret)
2975 root->orphan_item_inserted = 0; 2900 btrfs_abort_transaction(trans, root, ret);
2901 else
2902 root->orphan_item_inserted = 0;
2976 } 2903 }
2977 2904
2978 if (block_rsv) { 2905 if (block_rsv) {
@@ -3041,11 +2968,18 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
3041 /* insert an orphan item to track this unlinked/truncated file */ 2968 /* insert an orphan item to track this unlinked/truncated file */
3042 if (insert >= 1) { 2969 if (insert >= 1) {
3043 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); 2970 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
3044 if (ret && ret != -EEXIST) { 2971 if (ret) {
3045 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 2972 if (reserve) {
3046 &BTRFS_I(inode)->runtime_flags); 2973 clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3047 btrfs_abort_transaction(trans, root, ret); 2974 &BTRFS_I(inode)->runtime_flags);
3048 return ret; 2975 btrfs_orphan_release_metadata(inode);
2976 }
2977 if (ret != -EEXIST) {
2978 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2979 &BTRFS_I(inode)->runtime_flags);
2980 btrfs_abort_transaction(trans, root, ret);
2981 return ret;
2982 }
3049 } 2983 }
3050 ret = 0; 2984 ret = 0;
3051 } 2985 }
@@ -3084,17 +3018,15 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3084 release_rsv = 1; 3018 release_rsv = 1;
3085 spin_unlock(&root->orphan_lock); 3019 spin_unlock(&root->orphan_lock);
3086 3020
3087 if (trans && delete_item) { 3021 if (trans && delete_item)
3088 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); 3022 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
3089 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
3090 }
3091 3023
3092 if (release_rsv) { 3024 if (release_rsv) {
3093 btrfs_orphan_release_metadata(inode); 3025 btrfs_orphan_release_metadata(inode);
3094 atomic_dec(&root->orphan_inodes); 3026 atomic_dec(&root->orphan_inodes);
3095 } 3027 }
3096 3028
3097 return 0; 3029 return ret;
3098} 3030}
3099 3031
3100/* 3032/*
@@ -3224,8 +3156,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3224 found_key.objectid); 3156 found_key.objectid);
3225 ret = btrfs_del_orphan_item(trans, root, 3157 ret = btrfs_del_orphan_item(trans, root,
3226 found_key.objectid); 3158 found_key.objectid);
3227 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
3228 btrfs_end_transaction(trans, root); 3159 btrfs_end_transaction(trans, root);
3160 if (ret)
3161 goto out;
3229 continue; 3162 continue;
3230 } 3163 }
3231 3164
@@ -3657,8 +3590,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3657 if (ret) { 3590 if (ret) {
3658 btrfs_info(root->fs_info, 3591 btrfs_info(root->fs_info,
3659 "failed to delete reference to %.*s, inode %llu parent %llu", 3592 "failed to delete reference to %.*s, inode %llu parent %llu",
3660 name_len, name, 3593 name_len, name, ino, dir_ino);
3661 (unsigned long long)ino, (unsigned long long)dir_ino);
3662 btrfs_abort_transaction(trans, root, ret); 3594 btrfs_abort_transaction(trans, root, ret);
3663 goto err; 3595 goto err;
3664 } 3596 }
@@ -3929,6 +3861,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3929 u64 extent_num_bytes = 0; 3861 u64 extent_num_bytes = 0;
3930 u64 extent_offset = 0; 3862 u64 extent_offset = 0;
3931 u64 item_end = 0; 3863 u64 item_end = 0;
3864 u64 last_size = (u64)-1;
3932 u32 found_type = (u8)-1; 3865 u32 found_type = (u8)-1;
3933 int found_extent; 3866 int found_extent;
3934 int del_item; 3867 int del_item;
@@ -4026,6 +3959,11 @@ search_again:
4026 if (found_type != BTRFS_EXTENT_DATA_KEY) 3959 if (found_type != BTRFS_EXTENT_DATA_KEY)
4027 goto delete; 3960 goto delete;
4028 3961
3962 if (del_item)
3963 last_size = found_key.offset;
3964 else
3965 last_size = new_size;
3966
4029 if (extent_type != BTRFS_FILE_EXTENT_INLINE) { 3967 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4030 u64 num_dec; 3968 u64 num_dec;
4031 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); 3969 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
@@ -4137,6 +4075,8 @@ out:
4137 btrfs_abort_transaction(trans, root, ret); 4075 btrfs_abort_transaction(trans, root, ret);
4138 } 4076 }
4139error: 4077error:
4078 if (last_size != (u64)-1)
4079 btrfs_ordered_update_i_size(inode, last_size, NULL);
4140 btrfs_free_path(path); 4080 btrfs_free_path(path);
4141 return err; 4081 return err;
4142} 4082}
@@ -4465,8 +4405,26 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
4465 btrfs_inode_resume_unlocked_dio(inode); 4405 btrfs_inode_resume_unlocked_dio(inode);
4466 4406
4467 ret = btrfs_truncate(inode); 4407 ret = btrfs_truncate(inode);
4468 if (ret && inode->i_nlink) 4408 if (ret && inode->i_nlink) {
4469 btrfs_orphan_del(NULL, inode); 4409 int err;
4410
4411 /*
4412 * failed to truncate, disk_i_size is only adjusted down
4413 * as we remove extents, so it should represent the true
4414 * size of the inode, so reset the in memory size and
4415 * delete our orphan entry.
4416 */
4417 trans = btrfs_join_transaction(root);
4418 if (IS_ERR(trans)) {
4419 btrfs_orphan_del(NULL, inode);
4420 return ret;
4421 }
4422 i_size_write(inode, BTRFS_I(inode)->disk_i_size);
4423 err = btrfs_orphan_del(trans, inode);
4424 if (err)
4425 btrfs_abort_transaction(trans, root, err);
4426 btrfs_end_transaction(trans, root);
4427 }
4470 } 4428 }
4471 4429
4472 return ret; 4430 return ret;
@@ -4601,10 +4559,15 @@ void btrfs_evict_inode(struct inode *inode)
4601 4559
4602 btrfs_free_block_rsv(root, rsv); 4560 btrfs_free_block_rsv(root, rsv);
4603 4561
4562 /*
4563 * Errors here aren't a big deal, it just means we leave orphan items
4564 * in the tree. They will be cleaned up on the next mount.
4565 */
4604 if (ret == 0) { 4566 if (ret == 0) {
4605 trans->block_rsv = root->orphan_block_rsv; 4567 trans->block_rsv = root->orphan_block_rsv;
4606 ret = btrfs_orphan_del(trans, inode); 4568 btrfs_orphan_del(trans, inode);
4607 BUG_ON(ret); 4569 } else {
4570 btrfs_orphan_del(NULL, inode);
4608 } 4571 }
4609 4572
4610 trans->block_rsv = &root->fs_info->trans_block_rsv; 4573 trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -6161,10 +6124,7 @@ insert:
6161 btrfs_release_path(path); 6124 btrfs_release_path(path);
6162 if (em->start > start || extent_map_end(em) <= start) { 6125 if (em->start > start || extent_map_end(em) <= start) {
6163 btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]", 6126 btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
6164 (unsigned long long)em->start, 6127 em->start, em->len, start, len);
6165 (unsigned long long)em->len,
6166 (unsigned long long)start,
6167 (unsigned long long)len);
6168 err = -EIO; 6128 err = -EIO;
6169 goto out; 6129 goto out;
6170 } 6130 }
@@ -6362,39 +6322,32 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
6362 u64 start, u64 len) 6322 u64 start, u64 len)
6363{ 6323{
6364 struct btrfs_root *root = BTRFS_I(inode)->root; 6324 struct btrfs_root *root = BTRFS_I(inode)->root;
6365 struct btrfs_trans_handle *trans;
6366 struct extent_map *em; 6325 struct extent_map *em;
6367 struct btrfs_key ins; 6326 struct btrfs_key ins;
6368 u64 alloc_hint; 6327 u64 alloc_hint;
6369 int ret; 6328 int ret;
6370 6329
6371 trans = btrfs_join_transaction(root);
6372 if (IS_ERR(trans))
6373 return ERR_CAST(trans);
6374
6375 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
6376
6377 alloc_hint = get_extent_allocation_hint(inode, start, len); 6330 alloc_hint = get_extent_allocation_hint(inode, start, len);
6378 ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, 6331 ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
6379 alloc_hint, &ins, 1); 6332 alloc_hint, &ins, 1);
6380 if (ret) { 6333 if (ret)
6381 em = ERR_PTR(ret); 6334 return ERR_PTR(ret);
6382 goto out;
6383 }
6384 6335
6385 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, 6336 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
6386 ins.offset, ins.offset, ins.offset, 0); 6337 ins.offset, ins.offset, ins.offset, 0);
6387 if (IS_ERR(em)) 6338 if (IS_ERR(em)) {
6388 goto out; 6339 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
6340 return em;
6341 }
6389 6342
6390 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, 6343 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
6391 ins.offset, ins.offset, 0); 6344 ins.offset, ins.offset, 0);
6392 if (ret) { 6345 if (ret) {
6393 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 6346 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
6394 em = ERR_PTR(ret); 6347 free_extent_map(em);
6348 return ERR_PTR(ret);
6395 } 6349 }
6396out: 6350
6397 btrfs_end_transaction(trans, root);
6398 return em; 6351 return em;
6399} 6352}
6400 6353
@@ -6402,11 +6355,11 @@ out:
6402 * returns 1 when the nocow is safe, < 1 on error, 0 if the 6355 * returns 1 when the nocow is safe, < 1 on error, 0 if the
6403 * block must be cow'd 6356 * block must be cow'd
6404 */ 6357 */
6405noinline int can_nocow_extent(struct btrfs_trans_handle *trans, 6358noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
6406 struct inode *inode, u64 offset, u64 *len,
6407 u64 *orig_start, u64 *orig_block_len, 6359 u64 *orig_start, u64 *orig_block_len,
6408 u64 *ram_bytes) 6360 u64 *ram_bytes)
6409{ 6361{
6362 struct btrfs_trans_handle *trans;
6410 struct btrfs_path *path; 6363 struct btrfs_path *path;
6411 int ret; 6364 int ret;
6412 struct extent_buffer *leaf; 6365 struct extent_buffer *leaf;
@@ -6424,7 +6377,7 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
6424 if (!path) 6377 if (!path)
6425 return -ENOMEM; 6378 return -ENOMEM;
6426 6379
6427 ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), 6380 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
6428 offset, 0); 6381 offset, 0);
6429 if (ret < 0) 6382 if (ret < 0)
6430 goto out; 6383 goto out;
@@ -6489,9 +6442,19 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
6489 * look for other files referencing this extent, if we 6442 * look for other files referencing this extent, if we
6490 * find any we must cow 6443 * find any we must cow
6491 */ 6444 */
6492 if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode), 6445 trans = btrfs_join_transaction(root);
6493 key.offset - backref_offset, disk_bytenr)) 6446 if (IS_ERR(trans)) {
6447 ret = 0;
6494 goto out; 6448 goto out;
6449 }
6450
6451 ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
6452 key.offset - backref_offset, disk_bytenr);
6453 btrfs_end_transaction(trans, root);
6454 if (ret) {
6455 ret = 0;
6456 goto out;
6457 }
6495 6458
6496 /* 6459 /*
6497 * adjust disk_bytenr and num_bytes to cover just the bytes 6460 * adjust disk_bytenr and num_bytes to cover just the bytes
@@ -6633,7 +6596,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6633 u64 start = iblock << inode->i_blkbits; 6596 u64 start = iblock << inode->i_blkbits;
6634 u64 lockstart, lockend; 6597 u64 lockstart, lockend;
6635 u64 len = bh_result->b_size; 6598 u64 len = bh_result->b_size;
6636 struct btrfs_trans_handle *trans;
6637 int unlock_bits = EXTENT_LOCKED; 6599 int unlock_bits = EXTENT_LOCKED;
6638 int ret = 0; 6600 int ret = 0;
6639 6601
@@ -6715,16 +6677,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6715 len = min(len, em->len - (start - em->start)); 6677 len = min(len, em->len - (start - em->start));
6716 block_start = em->block_start + (start - em->start); 6678 block_start = em->block_start + (start - em->start);
6717 6679
6718 /* 6680 if (can_nocow_extent(inode, start, &len, &orig_start,
6719 * we're not going to log anything, but we do need
6720 * to make sure the current transaction stays open
6721 * while we look for nocow cross refs
6722 */
6723 trans = btrfs_join_transaction(root);
6724 if (IS_ERR(trans))
6725 goto must_cow;
6726
6727 if (can_nocow_extent(trans, inode, start, &len, &orig_start,
6728 &orig_block_len, &ram_bytes) == 1) { 6681 &orig_block_len, &ram_bytes) == 1) {
6729 if (type == BTRFS_ORDERED_PREALLOC) { 6682 if (type == BTRFS_ORDERED_PREALLOC) {
6730 free_extent_map(em); 6683 free_extent_map(em);
@@ -6733,24 +6686,20 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6733 block_start, len, 6686 block_start, len,
6734 orig_block_len, 6687 orig_block_len,
6735 ram_bytes, type); 6688 ram_bytes, type);
6736 if (IS_ERR(em)) { 6689 if (IS_ERR(em))
6737 btrfs_end_transaction(trans, root);
6738 goto unlock_err; 6690 goto unlock_err;
6739 }
6740 } 6691 }
6741 6692
6742 ret = btrfs_add_ordered_extent_dio(inode, start, 6693 ret = btrfs_add_ordered_extent_dio(inode, start,
6743 block_start, len, len, type); 6694 block_start, len, len, type);
6744 btrfs_end_transaction(trans, root);
6745 if (ret) { 6695 if (ret) {
6746 free_extent_map(em); 6696 free_extent_map(em);
6747 goto unlock_err; 6697 goto unlock_err;
6748 } 6698 }
6749 goto unlock; 6699 goto unlock;
6750 } 6700 }
6751 btrfs_end_transaction(trans, root);
6752 } 6701 }
6753must_cow: 6702
6754 /* 6703 /*
6755 * this will cow the extent, reset the len in case we changed 6704 * this will cow the extent, reset the len in case we changed
6756 * it above 6705 * it above
@@ -6813,26 +6762,6 @@ unlock_err:
6813 return ret; 6762 return ret;
6814} 6763}
6815 6764
6816struct btrfs_dio_private {
6817 struct inode *inode;
6818 u64 logical_offset;
6819 u64 disk_bytenr;
6820 u64 bytes;
6821 void *private;
6822
6823 /* number of bios pending for this dio */
6824 atomic_t pending_bios;
6825
6826 /* IO errors */
6827 int errors;
6828
6829 /* orig_bio is our btrfs_io_bio */
6830 struct bio *orig_bio;
6831
6832 /* dio_bio came from fs/direct-io.c */
6833 struct bio *dio_bio;
6834};
6835
6836static void btrfs_endio_direct_read(struct bio *bio, int err) 6765static void btrfs_endio_direct_read(struct bio *bio, int err)
6837{ 6766{
6838 struct btrfs_dio_private *dip = bio->bi_private; 6767 struct btrfs_dio_private *dip = bio->bi_private;
@@ -6841,6 +6770,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6841 struct inode *inode = dip->inode; 6770 struct inode *inode = dip->inode;
6842 struct btrfs_root *root = BTRFS_I(inode)->root; 6771 struct btrfs_root *root = BTRFS_I(inode)->root;
6843 struct bio *dio_bio; 6772 struct bio *dio_bio;
6773 u32 *csums = (u32 *)dip->csum;
6774 int index = 0;
6844 u64 start; 6775 u64 start;
6845 6776
6846 start = dip->logical_offset; 6777 start = dip->logical_offset;
@@ -6849,12 +6780,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6849 struct page *page = bvec->bv_page; 6780 struct page *page = bvec->bv_page;
6850 char *kaddr; 6781 char *kaddr;
6851 u32 csum = ~(u32)0; 6782 u32 csum = ~(u32)0;
6852 u64 private = ~(u32)0;
6853 unsigned long flags; 6783 unsigned long flags;
6854 6784
6855 if (get_state_private(&BTRFS_I(inode)->io_tree,
6856 start, &private))
6857 goto failed;
6858 local_irq_save(flags); 6785 local_irq_save(flags);
6859 kaddr = kmap_atomic(page); 6786 kaddr = kmap_atomic(page);
6860 csum = btrfs_csum_data(kaddr + bvec->bv_offset, 6787 csum = btrfs_csum_data(kaddr + bvec->bv_offset,
@@ -6864,18 +6791,17 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6864 local_irq_restore(flags); 6791 local_irq_restore(flags);
6865 6792
6866 flush_dcache_page(bvec->bv_page); 6793 flush_dcache_page(bvec->bv_page);
6867 if (csum != private) { 6794 if (csum != csums[index]) {
6868failed: 6795 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
6869 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u", 6796 btrfs_ino(inode), start, csum,
6870 (unsigned long long)btrfs_ino(inode), 6797 csums[index]);
6871 (unsigned long long)start,
6872 csum, (unsigned)private);
6873 err = -EIO; 6798 err = -EIO;
6874 } 6799 }
6875 } 6800 }
6876 6801
6877 start += bvec->bv_len; 6802 start += bvec->bv_len;
6878 bvec++; 6803 bvec++;
6804 index++;
6879 } while (bvec <= bvec_end); 6805 } while (bvec <= bvec_end);
6880 6806
6881 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 6807 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
@@ -6956,7 +6882,7 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
6956 if (err) { 6882 if (err) {
6957 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu " 6883 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
6958 "sector %#Lx len %u err no %d\n", 6884 "sector %#Lx len %u err no %d\n",
6959 (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw, 6885 btrfs_ino(dip->inode), bio->bi_rw,
6960 (unsigned long long)bio->bi_sector, bio->bi_size, err); 6886 (unsigned long long)bio->bi_sector, bio->bi_size, err);
6961 dip->errors = 1; 6887 dip->errors = 1;
6962 6888
@@ -6992,6 +6918,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6992 int rw, u64 file_offset, int skip_sum, 6918 int rw, u64 file_offset, int skip_sum,
6993 int async_submit) 6919 int async_submit)
6994{ 6920{
6921 struct btrfs_dio_private *dip = bio->bi_private;
6995 int write = rw & REQ_WRITE; 6922 int write = rw & REQ_WRITE;
6996 struct btrfs_root *root = BTRFS_I(inode)->root; 6923 struct btrfs_root *root = BTRFS_I(inode)->root;
6997 int ret; 6924 int ret;
@@ -7026,7 +6953,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
7026 if (ret) 6953 if (ret)
7027 goto err; 6954 goto err;
7028 } else if (!skip_sum) { 6955 } else if (!skip_sum) {
7029 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset); 6956 ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
6957 file_offset);
7030 if (ret) 6958 if (ret)
7031 goto err; 6959 goto err;
7032 } 6960 }
@@ -7061,6 +6989,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7061 bio_put(orig_bio); 6989 bio_put(orig_bio);
7062 return -EIO; 6990 return -EIO;
7063 } 6991 }
6992
7064 if (map_length >= orig_bio->bi_size) { 6993 if (map_length >= orig_bio->bi_size) {
7065 bio = orig_bio; 6994 bio = orig_bio;
7066 goto submit; 6995 goto submit;
@@ -7156,19 +7085,28 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7156 struct btrfs_dio_private *dip; 7085 struct btrfs_dio_private *dip;
7157 struct bio *io_bio; 7086 struct bio *io_bio;
7158 int skip_sum; 7087 int skip_sum;
7088 int sum_len;
7159 int write = rw & REQ_WRITE; 7089 int write = rw & REQ_WRITE;
7160 int ret = 0; 7090 int ret = 0;
7091 u16 csum_size;
7161 7092
7162 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 7093 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
7163 7094
7164 io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS); 7095 io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
7165
7166 if (!io_bio) { 7096 if (!io_bio) {
7167 ret = -ENOMEM; 7097 ret = -ENOMEM;
7168 goto free_ordered; 7098 goto free_ordered;
7169 } 7099 }
7170 7100
7171 dip = kmalloc(sizeof(*dip), GFP_NOFS); 7101 if (!skip_sum && !write) {
7102 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7103 sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits;
7104 sum_len *= csum_size;
7105 } else {
7106 sum_len = 0;
7107 }
7108
7109 dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
7172 if (!dip) { 7110 if (!dip) {
7173 ret = -ENOMEM; 7111 ret = -ENOMEM;
7174 goto free_io_bio; 7112 goto free_io_bio;
@@ -7443,10 +7381,23 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
7443 * whoever cleared the private bit is responsible 7381 * whoever cleared the private bit is responsible
7444 * for the finish_ordered_io 7382 * for the finish_ordered_io
7445 */ 7383 */
7446 if (TestClearPagePrivate2(page) && 7384 if (TestClearPagePrivate2(page)) {
7447 btrfs_dec_test_ordered_pending(inode, &ordered, page_start, 7385 struct btrfs_ordered_inode_tree *tree;
7448 PAGE_CACHE_SIZE, 1)) { 7386 u64 new_len;
7449 btrfs_finish_ordered_io(ordered); 7387
7388 tree = &BTRFS_I(inode)->ordered_tree;
7389
7390 spin_lock_irq(&tree->lock);
7391 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
7392 new_len = page_start - ordered->file_offset;
7393 if (new_len < ordered->truncated_len)
7394 ordered->truncated_len = new_len;
7395 spin_unlock_irq(&tree->lock);
7396
7397 if (btrfs_dec_test_ordered_pending(inode, &ordered,
7398 page_start,
7399 PAGE_CACHE_SIZE, 1))
7400 btrfs_finish_ordered_io(ordered);
7450 } 7401 }
7451 btrfs_put_ordered_extent(ordered); 7402 btrfs_put_ordered_extent(ordered);
7452 cached_state = NULL; 7403 cached_state = NULL;
@@ -7612,7 +7563,6 @@ static int btrfs_truncate(struct inode *inode)
7612 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); 7563 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
7613 7564
7614 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 7565 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
7615 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
7616 7566
7617 /* 7567 /*
7618 * Yes ladies and gentelment, this is indeed ugly. The fact is we have 7568 * Yes ladies and gentelment, this is indeed ugly. The fact is we have
@@ -7876,7 +7826,7 @@ void btrfs_destroy_inode(struct inode *inode)
7876 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 7826 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
7877 &BTRFS_I(inode)->runtime_flags)) { 7827 &BTRFS_I(inode)->runtime_flags)) {
7878 btrfs_info(root->fs_info, "inode %llu still on the orphan list", 7828 btrfs_info(root->fs_info, "inode %llu still on the orphan list",
7879 (unsigned long long)btrfs_ino(inode)); 7829 btrfs_ino(inode));
7880 atomic_dec(&root->orphan_inodes); 7830 atomic_dec(&root->orphan_inodes);
7881 } 7831 }
7882 7832
@@ -7886,8 +7836,7 @@ void btrfs_destroy_inode(struct inode *inode)
7886 break; 7836 break;
7887 else { 7837 else {
7888 btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup", 7838 btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
7889 (unsigned long long)ordered->file_offset, 7839 ordered->file_offset, ordered->len);
7890 (unsigned long long)ordered->len);
7891 btrfs_remove_ordered_extent(inode, ordered); 7840 btrfs_remove_ordered_extent(inode, ordered);
7892 btrfs_put_ordered_extent(ordered); 7841 btrfs_put_ordered_extent(ordered);
7893 btrfs_put_ordered_extent(ordered); 7842 btrfs_put_ordered_extent(ordered);
@@ -8161,10 +8110,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8161 new_dentry->d_name.name, 8110 new_dentry->d_name.name,
8162 new_dentry->d_name.len); 8111 new_dentry->d_name.len);
8163 } 8112 }
8164 if (!ret && new_inode->i_nlink == 0) { 8113 if (!ret && new_inode->i_nlink == 0)
8165 ret = btrfs_orphan_add(trans, new_dentry->d_inode); 8114 ret = btrfs_orphan_add(trans, new_dentry->d_inode);
8166 BUG_ON(ret);
8167 }
8168 if (ret) { 8115 if (ret) {
8169 btrfs_abort_transaction(trans, root, ret); 8116 btrfs_abort_transaction(trans, root, ret);
8170 goto out_fail; 8117 goto out_fail;
@@ -8525,8 +8472,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
8525 8472
8526 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); 8473 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
8527 cur_bytes = max(cur_bytes, min_size); 8474 cur_bytes = max(cur_bytes, min_size);
8528 ret = btrfs_reserve_extent(trans, root, cur_bytes, 8475 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
8529 min_size, 0, *alloc_hint, &ins, 1); 8476 *alloc_hint, &ins, 1);
8530 if (ret) { 8477 if (ret) {
8531 if (own_trans) 8478 if (own_trans)
8532 btrfs_end_transaction(trans, root); 8479 btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 238a05545ee2..1a5b9462dd9a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -43,6 +43,7 @@
43#include <linux/blkdev.h> 43#include <linux/blkdev.h>
44#include <linux/uuid.h> 44#include <linux/uuid.h>
45#include <linux/btrfs.h> 45#include <linux/btrfs.h>
46#include <linux/uaccess.h>
46#include "compat.h" 47#include "compat.h"
47#include "ctree.h" 48#include "ctree.h"
48#include "disk-io.h" 49#include "disk-io.h"
@@ -57,6 +58,9 @@
57#include "send.h" 58#include "send.h"
58#include "dev-replace.h" 59#include "dev-replace.h"
59 60
61static int btrfs_clone(struct inode *src, struct inode *inode,
62 u64 off, u64 olen, u64 olen_aligned, u64 destoff);
63
60/* Mask out flags that are inappropriate for the given type of inode. */ 64/* Mask out flags that are inappropriate for the given type of inode. */
61static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 65static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
62{ 66{
@@ -363,6 +367,13 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
363 return 0; 367 return 0;
364} 368}
365 369
370int btrfs_is_empty_uuid(u8 *uuid)
371{
372 static char empty_uuid[BTRFS_UUID_SIZE] = {0};
373
374 return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE);
375}
376
366static noinline int create_subvol(struct inode *dir, 377static noinline int create_subvol(struct inode *dir,
367 struct dentry *dentry, 378 struct dentry *dentry,
368 char *name, int namelen, 379 char *name, int namelen,
@@ -396,7 +407,7 @@ static noinline int create_subvol(struct inode *dir,
396 * of create_snapshot(). 407 * of create_snapshot().
397 */ 408 */
398 ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 409 ret = btrfs_subvolume_reserve_metadata(root, &block_rsv,
399 7, &qgroup_reserved); 410 8, &qgroup_reserved, false);
400 if (ret) 411 if (ret)
401 return ret; 412 return ret;
402 413
@@ -425,26 +436,25 @@ static noinline int create_subvol(struct inode *dir,
425 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 436 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
426 btrfs_set_header_owner(leaf, objectid); 437 btrfs_set_header_owner(leaf, objectid);
427 438
428 write_extent_buffer(leaf, root->fs_info->fsid, 439 write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf),
429 (unsigned long)btrfs_header_fsid(leaf),
430 BTRFS_FSID_SIZE); 440 BTRFS_FSID_SIZE);
431 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, 441 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
432 (unsigned long)btrfs_header_chunk_tree_uuid(leaf), 442 btrfs_header_chunk_tree_uuid(leaf),
433 BTRFS_UUID_SIZE); 443 BTRFS_UUID_SIZE);
434 btrfs_mark_buffer_dirty(leaf); 444 btrfs_mark_buffer_dirty(leaf);
435 445
436 memset(&root_item, 0, sizeof(root_item)); 446 memset(&root_item, 0, sizeof(root_item));
437 447
438 inode_item = &root_item.inode; 448 inode_item = &root_item.inode;
439 inode_item->generation = cpu_to_le64(1); 449 btrfs_set_stack_inode_generation(inode_item, 1);
440 inode_item->size = cpu_to_le64(3); 450 btrfs_set_stack_inode_size(inode_item, 3);
441 inode_item->nlink = cpu_to_le32(1); 451 btrfs_set_stack_inode_nlink(inode_item, 1);
442 inode_item->nbytes = cpu_to_le64(root->leafsize); 452 btrfs_set_stack_inode_nbytes(inode_item, root->leafsize);
443 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 453 btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
444 454
445 root_item.flags = 0; 455 btrfs_set_root_flags(&root_item, 0);
446 root_item.byte_limit = 0; 456 btrfs_set_root_limit(&root_item, 0);
447 inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT); 457 btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
448 458
449 btrfs_set_root_bytenr(&root_item, leaf->start); 459 btrfs_set_root_bytenr(&root_item, leaf->start);
450 btrfs_set_root_generation(&root_item, trans->transid); 460 btrfs_set_root_generation(&root_item, trans->transid);
@@ -457,8 +467,8 @@ static noinline int create_subvol(struct inode *dir,
457 btrfs_root_generation(&root_item)); 467 btrfs_root_generation(&root_item));
458 uuid_le_gen(&new_uuid); 468 uuid_le_gen(&new_uuid);
459 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); 469 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
460 root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); 470 btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec);
461 root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec); 471 btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec);
462 root_item.ctime = root_item.otime; 472 root_item.ctime = root_item.otime;
463 btrfs_set_root_ctransid(&root_item, trans->transid); 473 btrfs_set_root_ctransid(&root_item, trans->transid);
464 btrfs_set_root_otransid(&root_item, trans->transid); 474 btrfs_set_root_otransid(&root_item, trans->transid);
@@ -518,9 +528,14 @@ static noinline int create_subvol(struct inode *dir,
518 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 528 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
519 objectid, root->root_key.objectid, 529 objectid, root->root_key.objectid,
520 btrfs_ino(dir), index, name, namelen); 530 btrfs_ino(dir), index, name, namelen);
521
522 BUG_ON(ret); 531 BUG_ON(ret);
523 532
533 ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
534 root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
535 objectid);
536 if (ret)
537 btrfs_abort_transaction(trans, root, ret);
538
524fail: 539fail:
525 trans->block_rsv = NULL; 540 trans->block_rsv = NULL;
526 trans->bytes_reserved = 0; 541 trans->bytes_reserved = 0;
@@ -573,10 +588,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
573 * 1 - root item 588 * 1 - root item
574 * 2 - root ref/backref 589 * 2 - root ref/backref
575 * 1 - root of snapshot 590 * 1 - root of snapshot
591 * 1 - UUID item
576 */ 592 */
577 ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, 593 ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
578 &pending_snapshot->block_rsv, 7, 594 &pending_snapshot->block_rsv, 8,
579 &pending_snapshot->qgroup_reserved); 595 &pending_snapshot->qgroup_reserved,
596 false);
580 if (ret) 597 if (ret)
581 goto out; 598 goto out;
582 599
@@ -1267,9 +1284,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1267 cluster = max_cluster; 1284 cluster = max_cluster;
1268 } 1285 }
1269 1286
1270 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
1271 BTRFS_I(inode)->force_compress = compress_type;
1272
1273 if (i + cluster > ra_index) { 1287 if (i + cluster > ra_index) {
1274 ra_index = max(i, ra_index); 1288 ra_index = max(i, ra_index);
1275 btrfs_force_ra(inode->i_mapping, ra, file, ra_index, 1289 btrfs_force_ra(inode->i_mapping, ra, file, ra_index,
@@ -1278,6 +1292,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1278 } 1292 }
1279 1293
1280 mutex_lock(&inode->i_mutex); 1294 mutex_lock(&inode->i_mutex);
1295 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
1296 BTRFS_I(inode)->force_compress = compress_type;
1281 ret = cluster_pages_for_defrag(inode, pages, i, cluster); 1297 ret = cluster_pages_for_defrag(inode, pages, i, cluster);
1282 if (ret < 0) { 1298 if (ret < 0) {
1283 mutex_unlock(&inode->i_mutex); 1299 mutex_unlock(&inode->i_mutex);
@@ -1334,10 +1350,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1334 atomic_read(&root->fs_info->async_delalloc_pages) == 0)); 1350 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
1335 } 1351 }
1336 atomic_dec(&root->fs_info->async_submit_draining); 1352 atomic_dec(&root->fs_info->async_submit_draining);
1337
1338 mutex_lock(&inode->i_mutex);
1339 BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
1340 mutex_unlock(&inode->i_mutex);
1341 } 1353 }
1342 1354
1343 if (range->compress_type == BTRFS_COMPRESS_LZO) { 1355 if (range->compress_type == BTRFS_COMPRESS_LZO) {
@@ -1347,6 +1359,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1347 ret = defrag_count; 1359 ret = defrag_count;
1348 1360
1349out_ra: 1361out_ra:
1362 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
1363 mutex_lock(&inode->i_mutex);
1364 BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
1365 mutex_unlock(&inode->i_mutex);
1366 }
1350 if (!file) 1367 if (!file)
1351 kfree(ra); 1368 kfree(ra);
1352 kfree(pages); 1369 kfree(pages);
@@ -1377,9 +1394,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1377 1394
1378 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1395 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1379 1)) { 1396 1)) {
1380 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
1381 mnt_drop_write_file(file); 1397 mnt_drop_write_file(file);
1382 return -EINVAL; 1398 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
1383 } 1399 }
1384 1400
1385 mutex_lock(&root->fs_info->volume_mutex); 1401 mutex_lock(&root->fs_info->volume_mutex);
@@ -1403,14 +1419,13 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1403 ret = -EINVAL; 1419 ret = -EINVAL;
1404 goto out_free; 1420 goto out_free;
1405 } 1421 }
1406 printk(KERN_INFO "btrfs: resizing devid %llu\n", 1422 printk(KERN_INFO "btrfs: resizing devid %llu\n", devid);
1407 (unsigned long long)devid);
1408 } 1423 }
1409 1424
1410 device = btrfs_find_device(root->fs_info, devid, NULL, NULL); 1425 device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
1411 if (!device) { 1426 if (!device) {
1412 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", 1427 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
1413 (unsigned long long)devid); 1428 devid);
1414 ret = -ENODEV; 1429 ret = -ENODEV;
1415 goto out_free; 1430 goto out_free;
1416 } 1431 }
@@ -1418,7 +1433,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1418 if (!device->writeable) { 1433 if (!device->writeable) {
1419 printk(KERN_INFO "btrfs: resizer unable to apply on " 1434 printk(KERN_INFO "btrfs: resizer unable to apply on "
1420 "readonly device %llu\n", 1435 "readonly device %llu\n",
1421 (unsigned long long)devid); 1436 devid);
1422 ret = -EPERM; 1437 ret = -EPERM;
1423 goto out_free; 1438 goto out_free;
1424 } 1439 }
@@ -1470,8 +1485,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1470 new_size *= root->sectorsize; 1485 new_size *= root->sectorsize;
1471 1486
1472 printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", 1487 printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
1473 rcu_str_deref(device->name), 1488 rcu_str_deref(device->name), new_size);
1474 (unsigned long long)new_size);
1475 1489
1476 if (new_size > old_size) { 1490 if (new_size > old_size) {
1477 trans = btrfs_start_transaction(root, 0); 1491 trans = btrfs_start_transaction(root, 0);
@@ -1721,13 +1735,28 @@ out:
1721static noinline int may_destroy_subvol(struct btrfs_root *root) 1735static noinline int may_destroy_subvol(struct btrfs_root *root)
1722{ 1736{
1723 struct btrfs_path *path; 1737 struct btrfs_path *path;
1738 struct btrfs_dir_item *di;
1724 struct btrfs_key key; 1739 struct btrfs_key key;
1740 u64 dir_id;
1725 int ret; 1741 int ret;
1726 1742
1727 path = btrfs_alloc_path(); 1743 path = btrfs_alloc_path();
1728 if (!path) 1744 if (!path)
1729 return -ENOMEM; 1745 return -ENOMEM;
1730 1746
1747 /* Make sure this root isn't set as the default subvol */
1748 dir_id = btrfs_super_root_dir(root->fs_info->super_copy);
1749 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path,
1750 dir_id, "default", 7, 0);
1751 if (di && !IS_ERR(di)) {
1752 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1753 if (key.objectid == root->root_key.objectid) {
1754 ret = -ENOTEMPTY;
1755 goto out;
1756 }
1757 btrfs_release_path(path);
1758 }
1759
1731 key.objectid = root->root_key.objectid; 1760 key.objectid = root->root_key.objectid;
1732 key.type = BTRFS_ROOT_REF_KEY; 1761 key.type = BTRFS_ROOT_REF_KEY;
1733 key.offset = (u64)-1; 1762 key.offset = (u64)-1;
@@ -1993,25 +2022,29 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1993 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2022 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1994 if (ret < 0) 2023 if (ret < 0)
1995 goto out; 2024 goto out;
2025 else if (ret > 0) {
2026 ret = btrfs_previous_item(root, path, dirid,
2027 BTRFS_INODE_REF_KEY);
2028 if (ret < 0)
2029 goto out;
2030 else if (ret > 0) {
2031 ret = -ENOENT;
2032 goto out;
2033 }
2034 }
1996 2035
1997 l = path->nodes[0]; 2036 l = path->nodes[0];
1998 slot = path->slots[0]; 2037 slot = path->slots[0];
1999 if (ret > 0 && slot > 0)
2000 slot--;
2001 btrfs_item_key_to_cpu(l, &key, slot); 2038 btrfs_item_key_to_cpu(l, &key, slot);
2002 2039
2003 if (ret > 0 && (key.objectid != dirid ||
2004 key.type != BTRFS_INODE_REF_KEY)) {
2005 ret = -ENOENT;
2006 goto out;
2007 }
2008
2009 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); 2040 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
2010 len = btrfs_inode_ref_name_len(l, iref); 2041 len = btrfs_inode_ref_name_len(l, iref);
2011 ptr -= len + 1; 2042 ptr -= len + 1;
2012 total_len += len + 1; 2043 total_len += len + 1;
2013 if (ptr < name) 2044 if (ptr < name) {
2045 ret = -ENAMETOOLONG;
2014 goto out; 2046 goto out;
2047 }
2015 2048
2016 *(ptr + len) = '/'; 2049 *(ptr + len) = '/';
2017 read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); 2050 read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len);
@@ -2024,8 +2057,6 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
2024 key.offset = (u64)-1; 2057 key.offset = (u64)-1;
2025 dirid = key.objectid; 2058 dirid = key.objectid;
2026 } 2059 }
2027 if (ptr < name)
2028 goto out;
2029 memmove(name, ptr, total_len); 2060 memmove(name, ptr, total_len);
2030 name[total_len]='\0'; 2061 name[total_len]='\0';
2031 ret = 0; 2062 ret = 0;
@@ -2174,7 +2205,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2174 * ref/backref. 2205 * ref/backref.
2175 */ 2206 */
2176 err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 2207 err = btrfs_subvolume_reserve_metadata(root, &block_rsv,
2177 5, &qgroup_reserved); 2208 5, &qgroup_reserved, true);
2178 if (err) 2209 if (err)
2179 goto out_up_write; 2210 goto out_up_write;
2180 2211
@@ -2213,6 +2244,27 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2213 goto out_end_trans; 2244 goto out_end_trans;
2214 } 2245 }
2215 } 2246 }
2247
2248 ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
2249 dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
2250 dest->root_key.objectid);
2251 if (ret && ret != -ENOENT) {
2252 btrfs_abort_transaction(trans, root, ret);
2253 err = ret;
2254 goto out_end_trans;
2255 }
2256 if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
2257 ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
2258 dest->root_item.received_uuid,
2259 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
2260 dest->root_key.objectid);
2261 if (ret && ret != -ENOENT) {
2262 btrfs_abort_transaction(trans, root, ret);
2263 err = ret;
2264 goto out_end_trans;
2265 }
2266 }
2267
2216out_end_trans: 2268out_end_trans:
2217 trans->block_rsv = NULL; 2269 trans->block_rsv = NULL;
2218 trans->bytes_reserved = 0; 2270 trans->bytes_reserved = 0;
@@ -2326,8 +2378,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
2326 2378
2327 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2379 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
2328 1)) { 2380 1)) {
2329 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2381 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
2330 return -EINVAL;
2331 } 2382 }
2332 2383
2333 mutex_lock(&root->fs_info->volume_mutex); 2384 mutex_lock(&root->fs_info->volume_mutex);
@@ -2400,10 +2451,10 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
2400 if (!fi_args) 2451 if (!fi_args)
2401 return -ENOMEM; 2452 return -ENOMEM;
2402 2453
2454 mutex_lock(&fs_devices->device_list_mutex);
2403 fi_args->num_devices = fs_devices->num_devices; 2455 fi_args->num_devices = fs_devices->num_devices;
2404 memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid)); 2456 memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
2405 2457
2406 mutex_lock(&fs_devices->device_list_mutex);
2407 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 2458 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
2408 if (device->devid > fi_args->max_id) 2459 if (device->devid > fi_args->max_id)
2409 fi_args->max_id = device->devid; 2460 fi_args->max_id = device->devid;
@@ -2424,7 +2475,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2424 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2475 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2425 int ret = 0; 2476 int ret = 0;
2426 char *s_uuid = NULL; 2477 char *s_uuid = NULL;
2427 char empty_uuid[BTRFS_UUID_SIZE] = {0};
2428 2478
2429 if (!capable(CAP_SYS_ADMIN)) 2479 if (!capable(CAP_SYS_ADMIN))
2430 return -EPERM; 2480 return -EPERM;
@@ -2433,7 +2483,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2433 if (IS_ERR(di_args)) 2483 if (IS_ERR(di_args))
2434 return PTR_ERR(di_args); 2484 return PTR_ERR(di_args);
2435 2485
2436 if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0) 2486 if (!btrfs_is_empty_uuid(di_args->uuid))
2437 s_uuid = di_args->uuid; 2487 s_uuid = di_args->uuid;
2438 2488
2439 mutex_lock(&fs_devices->device_list_mutex); 2489 mutex_lock(&fs_devices->device_list_mutex);
@@ -2469,150 +2519,336 @@ out:
2469 return ret; 2519 return ret;
2470} 2520}
2471 2521
2472static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 2522static struct page *extent_same_get_page(struct inode *inode, u64 off)
2473 u64 off, u64 olen, u64 destoff) 2523{
2524 struct page *page;
2525 pgoff_t index;
2526 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2527
2528 index = off >> PAGE_CACHE_SHIFT;
2529
2530 page = grab_cache_page(inode->i_mapping, index);
2531 if (!page)
2532 return NULL;
2533
2534 if (!PageUptodate(page)) {
2535 if (extent_read_full_page_nolock(tree, page, btrfs_get_extent,
2536 0))
2537 return NULL;
2538 lock_page(page);
2539 if (!PageUptodate(page)) {
2540 unlock_page(page);
2541 page_cache_release(page);
2542 return NULL;
2543 }
2544 }
2545 unlock_page(page);
2546
2547 return page;
2548}
2549
2550static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
2551{
2552 /* do any pending delalloc/csum calc on src, one way or
2553 another, and lock file content */
2554 while (1) {
2555 struct btrfs_ordered_extent *ordered;
2556 lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
2557 ordered = btrfs_lookup_first_ordered_extent(inode,
2558 off + len - 1);
2559 if (!ordered &&
2560 !test_range_bit(&BTRFS_I(inode)->io_tree, off,
2561 off + len - 1, EXTENT_DELALLOC, 0, NULL))
2562 break;
2563 unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
2564 if (ordered)
2565 btrfs_put_ordered_extent(ordered);
2566 btrfs_wait_ordered_range(inode, off, len);
2567 }
2568}
2569
2570static void btrfs_double_unlock(struct inode *inode1, u64 loff1,
2571 struct inode *inode2, u64 loff2, u64 len)
2572{
2573 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
2574 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
2575
2576 mutex_unlock(&inode1->i_mutex);
2577 mutex_unlock(&inode2->i_mutex);
2578}
2579
2580static void btrfs_double_lock(struct inode *inode1, u64 loff1,
2581 struct inode *inode2, u64 loff2, u64 len)
2582{
2583 if (inode1 < inode2) {
2584 swap(inode1, inode2);
2585 swap(loff1, loff2);
2586 }
2587
2588 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
2589 lock_extent_range(inode1, loff1, len);
2590 if (inode1 != inode2) {
2591 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
2592 lock_extent_range(inode2, loff2, len);
2593 }
2594}
2595
2596static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
2597 u64 dst_loff, u64 len)
2598{
2599 int ret = 0;
2600 struct page *src_page, *dst_page;
2601 unsigned int cmp_len = PAGE_CACHE_SIZE;
2602 void *addr, *dst_addr;
2603
2604 while (len) {
2605 if (len < PAGE_CACHE_SIZE)
2606 cmp_len = len;
2607
2608 src_page = extent_same_get_page(src, loff);
2609 if (!src_page)
2610 return -EINVAL;
2611 dst_page = extent_same_get_page(dst, dst_loff);
2612 if (!dst_page) {
2613 page_cache_release(src_page);
2614 return -EINVAL;
2615 }
2616 addr = kmap_atomic(src_page);
2617 dst_addr = kmap_atomic(dst_page);
2618
2619 flush_dcache_page(src_page);
2620 flush_dcache_page(dst_page);
2621
2622 if (memcmp(addr, dst_addr, cmp_len))
2623 ret = BTRFS_SAME_DATA_DIFFERS;
2624
2625 kunmap_atomic(addr);
2626 kunmap_atomic(dst_addr);
2627 page_cache_release(src_page);
2628 page_cache_release(dst_page);
2629
2630 if (ret)
2631 break;
2632
2633 loff += cmp_len;
2634 dst_loff += cmp_len;
2635 len -= cmp_len;
2636 }
2637
2638 return ret;
2639}
2640
2641static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len)
2642{
2643 u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize;
2644
2645 if (off + len > inode->i_size || off + len < off)
2646 return -EINVAL;
2647 /* Check that we are block aligned - btrfs_clone() requires this */
2648 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs))
2649 return -EINVAL;
2650
2651 return 0;
2652}
2653
2654static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
2655 struct inode *dst, u64 dst_loff)
2474{ 2656{
2475 struct inode *inode = file_inode(file);
2476 struct btrfs_root *root = BTRFS_I(inode)->root;
2477 struct fd src_file;
2478 struct inode *src;
2479 struct btrfs_trans_handle *trans;
2480 struct btrfs_path *path;
2481 struct extent_buffer *leaf;
2482 char *buf;
2483 struct btrfs_key key;
2484 u32 nritems;
2485 int slot;
2486 int ret; 2657 int ret;
2487 u64 len = olen;
2488 u64 bs = root->fs_info->sb->s_blocksize;
2489 int same_inode = 0;
2490 2658
2491 /* 2659 /*
2492 * TODO: 2660 * btrfs_clone() can't handle extents in the same file
2493 * - split compressed inline extents. annoying: we need to 2661 * yet. Once that works, we can drop this check and replace it
2494 * decompress into destination's address_space (the file offset 2662 * with a check for the same inode, but overlapping extents.
2495 * may change, so source mapping won't do), then recompress (or
2496 * otherwise reinsert) a subrange.
2497 * - allow ranges within the same file to be cloned (provided
2498 * they don't overlap)?
2499 */ 2663 */
2500 2664 if (src == dst)
2501 /* the destination must be opened for writing */
2502 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
2503 return -EINVAL; 2665 return -EINVAL;
2504 2666
2505 if (btrfs_root_readonly(root)) 2667 btrfs_double_lock(src, loff, dst, dst_loff, len);
2506 return -EROFS; 2668
2669 ret = extent_same_check_offsets(src, loff, len);
2670 if (ret)
2671 goto out_unlock;
2672
2673 ret = extent_same_check_offsets(dst, dst_loff, len);
2674 if (ret)
2675 goto out_unlock;
2676
2677 /* don't make the dst file partly checksummed */
2678 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
2679 (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
2680 ret = -EINVAL;
2681 goto out_unlock;
2682 }
2683
2684 ret = btrfs_cmp_data(src, loff, dst, dst_loff, len);
2685 if (ret == 0)
2686 ret = btrfs_clone(src, dst, loff, len, len, dst_loff);
2687
2688out_unlock:
2689 btrfs_double_unlock(src, loff, dst, dst_loff, len);
2690
2691 return ret;
2692}
2693
2694#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
2695
2696static long btrfs_ioctl_file_extent_same(struct file *file,
2697 void __user *argp)
2698{
2699 struct btrfs_ioctl_same_args *args = argp;
2700 struct btrfs_ioctl_same_args same;
2701 struct btrfs_ioctl_same_extent_info info;
2702 struct inode *src = file->f_dentry->d_inode;
2703 struct file *dst_file = NULL;
2704 struct inode *dst;
2705 u64 off;
2706 u64 len;
2707 int i;
2708 int ret;
2709 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
2710 bool is_admin = capable(CAP_SYS_ADMIN);
2711
2712 if (!(file->f_mode & FMODE_READ))
2713 return -EINVAL;
2507 2714
2508 ret = mnt_want_write_file(file); 2715 ret = mnt_want_write_file(file);
2509 if (ret) 2716 if (ret)
2510 return ret; 2717 return ret;
2511 2718
2512 src_file = fdget(srcfd); 2719 if (copy_from_user(&same,
2513 if (!src_file.file) { 2720 (struct btrfs_ioctl_same_args __user *)argp,
2514 ret = -EBADF; 2721 sizeof(same))) {
2515 goto out_drop_write; 2722 ret = -EFAULT;
2723 goto out;
2516 } 2724 }
2517 2725
2518 ret = -EXDEV; 2726 off = same.logical_offset;
2519 if (src_file.file->f_path.mnt != file->f_path.mnt) 2727 len = same.length;
2520 goto out_fput;
2521 2728
2522 src = file_inode(src_file.file); 2729 /*
2730 * Limit the total length we will dedupe for each operation.
2731 * This is intended to bound the total time spent in this
2732 * ioctl to something sane.
2733 */
2734 if (len > BTRFS_MAX_DEDUPE_LEN)
2735 len = BTRFS_MAX_DEDUPE_LEN;
2523 2736
2524 ret = -EINVAL; 2737 if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
2525 if (src == inode) 2738 /*
2526 same_inode = 1; 2739 * Btrfs does not support blocksize < page_size. As a
2740 * result, btrfs_cmp_data() won't correctly handle
2741 * this situation without an update.
2742 */
2743 ret = -EINVAL;
2744 goto out;
2745 }
2527 2746
2528 /* the src must be open for reading */ 2747 ret = -EISDIR;
2529 if (!(src_file.file->f_mode & FMODE_READ)) 2748 if (S_ISDIR(src->i_mode))
2530 goto out_fput; 2749 goto out;
2531 2750
2532 /* don't make the dst file partly checksummed */ 2751 ret = -EACCES;
2533 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 2752 if (!S_ISREG(src->i_mode))
2534 (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) 2753 goto out;
2535 goto out_fput;
2536 2754
2537 ret = -EISDIR; 2755 ret = 0;
2538 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 2756 for (i = 0; i < same.dest_count; i++) {
2539 goto out_fput; 2757 if (copy_from_user(&info, &args->info[i], sizeof(info))) {
2758 ret = -EFAULT;
2759 goto out;
2760 }
2540 2761
2541 ret = -EXDEV; 2762 info.bytes_deduped = 0;
2542 if (src->i_sb != inode->i_sb)
2543 goto out_fput;
2544 2763
2545 ret = -ENOMEM; 2764 dst_file = fget(info.fd);
2546 buf = vmalloc(btrfs_level_size(root, 0)); 2765 if (!dst_file) {
2547 if (!buf) 2766 info.status = -EBADF;
2548 goto out_fput; 2767 goto next;
2768 }
2549 2769
2550 path = btrfs_alloc_path(); 2770 if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
2551 if (!path) { 2771 info.status = -EINVAL;
2552 vfree(buf); 2772 goto next;
2553 goto out_fput; 2773 }
2554 }
2555 path->reada = 2;
2556 2774
2557 if (!same_inode) { 2775 info.status = -EXDEV;
2558 if (inode < src) { 2776 if (file->f_path.mnt != dst_file->f_path.mnt)
2559 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); 2777 goto next;
2560 mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); 2778
2561 } else { 2779 dst = dst_file->f_dentry->d_inode;
2562 mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); 2780 if (src->i_sb != dst->i_sb)
2563 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); 2781 goto next;
2782
2783 if (S_ISDIR(dst->i_mode)) {
2784 info.status = -EISDIR;
2785 goto next;
2564 } 2786 }
2565 } else {
2566 mutex_lock(&src->i_mutex);
2567 }
2568 2787
2569 /* determine range to clone */ 2788 if (!S_ISREG(dst->i_mode)) {
2570 ret = -EINVAL; 2789 info.status = -EACCES;
2571 if (off + len > src->i_size || off + len < off) 2790 goto next;
2572 goto out_unlock; 2791 }
2573 if (len == 0)
2574 olen = len = src->i_size - off;
2575 /* if we extend to eof, continue to block boundary */
2576 if (off + len == src->i_size)
2577 len = ALIGN(src->i_size, bs) - off;
2578 2792
2579 /* verify the end result is block aligned */ 2793 info.status = btrfs_extent_same(src, off, len, dst,
2580 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || 2794 info.logical_offset);
2581 !IS_ALIGNED(destoff, bs)) 2795 if (info.status == 0)
2582 goto out_unlock; 2796 info.bytes_deduped += len;
2583 2797
2584 /* verify if ranges are overlapped within the same file */ 2798next:
2585 if (same_inode) { 2799 if (dst_file)
2586 if (destoff + len > off && destoff < off + len) 2800 fput(dst_file);
2587 goto out_unlock;
2588 }
2589 2801
2590 if (destoff > inode->i_size) { 2802 if (__put_user_unaligned(info.status, &args->info[i].status) ||
2591 ret = btrfs_cont_expand(inode, inode->i_size, destoff); 2803 __put_user_unaligned(info.bytes_deduped,
2592 if (ret) 2804 &args->info[i].bytes_deduped)) {
2593 goto out_unlock; 2805 ret = -EFAULT;
2806 goto out;
2807 }
2594 } 2808 }
2595 2809
2596 /* truncate page cache pages from target inode range */ 2810out:
2597 truncate_inode_pages_range(&inode->i_data, destoff, 2811 mnt_drop_write_file(file);
2598 PAGE_CACHE_ALIGN(destoff + len) - 1); 2812 return ret;
2813}
2599 2814
2600 /* do any pending delalloc/csum calc on src, one way or 2815/**
2601 another, and lock file content */ 2816 * btrfs_clone() - clone a range from inode file to another
2602 while (1) { 2817 *
2603 struct btrfs_ordered_extent *ordered; 2818 * @src: Inode to clone from
2604 lock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); 2819 * @inode: Inode to clone to
2605 ordered = btrfs_lookup_first_ordered_extent(src, off + len - 1); 2820 * @off: Offset within source to start clone from
2606 if (!ordered && 2821 * @olen: Original length, passed by user, of range to clone
2607 !test_range_bit(&BTRFS_I(src)->io_tree, off, off + len - 1, 2822 * @olen_aligned: Block-aligned value of olen, extent_same uses
2608 EXTENT_DELALLOC, 0, NULL)) 2823 * identical values here
2609 break; 2824 * @destoff: Offset within @inode to start clone
2610 unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); 2825 */
2611 if (ordered) 2826static int btrfs_clone(struct inode *src, struct inode *inode,
2612 btrfs_put_ordered_extent(ordered); 2827 u64 off, u64 olen, u64 olen_aligned, u64 destoff)
2613 btrfs_wait_ordered_range(src, off, len); 2828{
2829 struct btrfs_root *root = BTRFS_I(inode)->root;
2830 struct btrfs_path *path = NULL;
2831 struct extent_buffer *leaf;
2832 struct btrfs_trans_handle *trans;
2833 char *buf = NULL;
2834 struct btrfs_key key;
2835 u32 nritems;
2836 int slot;
2837 int ret;
2838 u64 len = olen_aligned;
2839
2840 ret = -ENOMEM;
2841 buf = vmalloc(btrfs_level_size(root, 0));
2842 if (!buf)
2843 return ret;
2844
2845 path = btrfs_alloc_path();
2846 if (!path) {
2847 vfree(buf);
2848 return ret;
2614 } 2849 }
2615 2850
2851 path->reada = 2;
2616 /* clone data */ 2852 /* clone data */
2617 key.objectid = btrfs_ino(src); 2853 key.objectid = btrfs_ino(src);
2618 key.type = BTRFS_EXTENT_DATA_KEY; 2854 key.type = BTRFS_EXTENT_DATA_KEY;
@@ -2858,15 +3094,132 @@ next:
2858 key.offset++; 3094 key.offset++;
2859 } 3095 }
2860 ret = 0; 3096 ret = 0;
3097
2861out: 3098out:
2862 btrfs_release_path(path); 3099 btrfs_release_path(path);
3100 btrfs_free_path(path);
3101 vfree(buf);
3102 return ret;
3103}
3104
3105static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
3106 u64 off, u64 olen, u64 destoff)
3107{
3108 struct inode *inode = fdentry(file)->d_inode;
3109 struct btrfs_root *root = BTRFS_I(inode)->root;
3110 struct fd src_file;
3111 struct inode *src;
3112 int ret;
3113 u64 len = olen;
3114 u64 bs = root->fs_info->sb->s_blocksize;
3115 int same_inode = 0;
3116
3117 /*
3118 * TODO:
3119 * - split compressed inline extents. annoying: we need to
3120 * decompress into destination's address_space (the file offset
3121 * may change, so source mapping won't do), then recompress (or
3122 * otherwise reinsert) a subrange.
3123 * - allow ranges within the same file to be cloned (provided
3124 * they don't overlap)?
3125 */
3126
3127 /* the destination must be opened for writing */
3128 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
3129 return -EINVAL;
3130
3131 if (btrfs_root_readonly(root))
3132 return -EROFS;
3133
3134 ret = mnt_want_write_file(file);
3135 if (ret)
3136 return ret;
3137
3138 src_file = fdget(srcfd);
3139 if (!src_file.file) {
3140 ret = -EBADF;
3141 goto out_drop_write;
3142 }
3143
3144 ret = -EXDEV;
3145 if (src_file.file->f_path.mnt != file->f_path.mnt)
3146 goto out_fput;
3147
3148 src = file_inode(src_file.file);
3149
3150 ret = -EINVAL;
3151 if (src == inode)
3152 same_inode = 1;
3153
3154 /* the src must be open for reading */
3155 if (!(src_file.file->f_mode & FMODE_READ))
3156 goto out_fput;
3157
3158 /* don't make the dst file partly checksummed */
3159 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
3160 (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
3161 goto out_fput;
3162
3163 ret = -EISDIR;
3164 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
3165 goto out_fput;
3166
3167 ret = -EXDEV;
3168 if (src->i_sb != inode->i_sb)
3169 goto out_fput;
3170
3171 if (!same_inode) {
3172 if (inode < src) {
3173 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
3174 mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
3175 } else {
3176 mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
3177 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
3178 }
3179 } else {
3180 mutex_lock(&src->i_mutex);
3181 }
3182
3183 /* determine range to clone */
3184 ret = -EINVAL;
3185 if (off + len > src->i_size || off + len < off)
3186 goto out_unlock;
3187 if (len == 0)
3188 olen = len = src->i_size - off;
3189 /* if we extend to eof, continue to block boundary */
3190 if (off + len == src->i_size)
3191 len = ALIGN(src->i_size, bs) - off;
3192
3193 /* verify the end result is block aligned */
3194 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
3195 !IS_ALIGNED(destoff, bs))
3196 goto out_unlock;
3197
3198 /* verify if ranges are overlapped within the same file */
3199 if (same_inode) {
3200 if (destoff + len > off && destoff < off + len)
3201 goto out_unlock;
3202 }
3203
3204 if (destoff > inode->i_size) {
3205 ret = btrfs_cont_expand(inode, inode->i_size, destoff);
3206 if (ret)
3207 goto out_unlock;
3208 }
3209
3210 /* truncate page cache pages from target inode range */
3211 truncate_inode_pages_range(&inode->i_data, destoff,
3212 PAGE_CACHE_ALIGN(destoff + len) - 1);
3213
3214 lock_extent_range(src, off, len);
3215
3216 ret = btrfs_clone(src, inode, off, olen, len, destoff);
3217
2863 unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); 3218 unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
2864out_unlock: 3219out_unlock:
2865 mutex_unlock(&src->i_mutex); 3220 mutex_unlock(&src->i_mutex);
2866 if (!same_inode) 3221 if (!same_inode)
2867 mutex_unlock(&inode->i_mutex); 3222 mutex_unlock(&inode->i_mutex);
2868 vfree(buf);
2869 btrfs_free_path(path);
2870out_fput: 3223out_fput:
2871 fdput(src_file); 3224 fdput(src_file);
2872out_drop_write: 3225out_drop_write:
@@ -3312,11 +3665,13 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
3312 3665
3313 switch (p->cmd) { 3666 switch (p->cmd) {
3314 case BTRFS_IOCTL_DEV_REPLACE_CMD_START: 3667 case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
3668 if (root->fs_info->sb->s_flags & MS_RDONLY)
3669 return -EROFS;
3670
3315 if (atomic_xchg( 3671 if (atomic_xchg(
3316 &root->fs_info->mutually_exclusive_operation_running, 3672 &root->fs_info->mutually_exclusive_operation_running,
3317 1)) { 3673 1)) {
3318 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 3674 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
3319 ret = -EINPROGRESS;
3320 } else { 3675 } else {
3321 ret = btrfs_dev_replace_start(root, p); 3676 ret = btrfs_dev_replace_start(root, p);
3322 atomic_set( 3677 atomic_set(
@@ -3560,8 +3915,7 @@ again:
3560 } else { 3915 } else {
3561 /* this is (1) */ 3916 /* this is (1) */
3562 mutex_unlock(&fs_info->balance_mutex); 3917 mutex_unlock(&fs_info->balance_mutex);
3563 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 3918 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
3564 ret = -EINVAL;
3565 goto out; 3919 goto out;
3566 } 3920 }
3567 3921
@@ -3967,6 +4321,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
3967 struct btrfs_trans_handle *trans; 4321 struct btrfs_trans_handle *trans;
3968 struct timespec ct = CURRENT_TIME; 4322 struct timespec ct = CURRENT_TIME;
3969 int ret = 0; 4323 int ret = 0;
4324 int received_uuid_changed;
3970 4325
3971 ret = mnt_want_write_file(file); 4326 ret = mnt_want_write_file(file);
3972 if (ret < 0) 4327 if (ret < 0)
@@ -3996,7 +4351,11 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
3996 goto out; 4351 goto out;
3997 } 4352 }
3998 4353
3999 trans = btrfs_start_transaction(root, 1); 4354 /*
4355 * 1 - root item
4356 * 2 - uuid items (received uuid + subvol uuid)
4357 */
4358 trans = btrfs_start_transaction(root, 3);
4000 if (IS_ERR(trans)) { 4359 if (IS_ERR(trans)) {
4001 ret = PTR_ERR(trans); 4360 ret = PTR_ERR(trans);
4002 trans = NULL; 4361 trans = NULL;
@@ -4007,24 +4366,42 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
4007 sa->rtime.sec = ct.tv_sec; 4366 sa->rtime.sec = ct.tv_sec;
4008 sa->rtime.nsec = ct.tv_nsec; 4367 sa->rtime.nsec = ct.tv_nsec;
4009 4368
4369 received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid,
4370 BTRFS_UUID_SIZE);
4371 if (received_uuid_changed &&
4372 !btrfs_is_empty_uuid(root_item->received_uuid))
4373 btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
4374 root_item->received_uuid,
4375 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4376 root->root_key.objectid);
4010 memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); 4377 memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
4011 btrfs_set_root_stransid(root_item, sa->stransid); 4378 btrfs_set_root_stransid(root_item, sa->stransid);
4012 btrfs_set_root_rtransid(root_item, sa->rtransid); 4379 btrfs_set_root_rtransid(root_item, sa->rtransid);
4013 root_item->stime.sec = cpu_to_le64(sa->stime.sec); 4380 btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec);
4014 root_item->stime.nsec = cpu_to_le32(sa->stime.nsec); 4381 btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec);
4015 root_item->rtime.sec = cpu_to_le64(sa->rtime.sec); 4382 btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec);
4016 root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec); 4383 btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec);
4017 4384
4018 ret = btrfs_update_root(trans, root->fs_info->tree_root, 4385 ret = btrfs_update_root(trans, root->fs_info->tree_root,
4019 &root->root_key, &root->root_item); 4386 &root->root_key, &root->root_item);
4020 if (ret < 0) { 4387 if (ret < 0) {
4021 btrfs_end_transaction(trans, root); 4388 btrfs_end_transaction(trans, root);
4022 trans = NULL;
4023 goto out; 4389 goto out;
4024 } else { 4390 }
4025 ret = btrfs_commit_transaction(trans, root); 4391 if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) {
4026 if (ret < 0) 4392 ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
4393 sa->uuid,
4394 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4395 root->root_key.objectid);
4396 if (ret < 0 && ret != -EEXIST) {
4397 btrfs_abort_transaction(trans, root, ret);
4027 goto out; 4398 goto out;
4399 }
4400 }
4401 ret = btrfs_commit_transaction(trans, root);
4402 if (ret < 0) {
4403 btrfs_abort_transaction(trans, root, ret);
4404 goto out;
4028 } 4405 }
4029 4406
4030 ret = copy_to_user(arg, sa, sizeof(*sa)); 4407 ret = copy_to_user(arg, sa, sizeof(*sa));
@@ -4041,18 +4418,22 @@ out:
4041static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) 4418static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
4042{ 4419{
4043 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4420 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
4044 const char *label = root->fs_info->super_copy->label; 4421 size_t len;
4045 size_t len = strnlen(label, BTRFS_LABEL_SIZE);
4046 int ret; 4422 int ret;
4423 char label[BTRFS_LABEL_SIZE];
4424
4425 spin_lock(&root->fs_info->super_lock);
4426 memcpy(label, root->fs_info->super_copy->label, BTRFS_LABEL_SIZE);
4427 spin_unlock(&root->fs_info->super_lock);
4428
4429 len = strnlen(label, BTRFS_LABEL_SIZE);
4047 4430
4048 if (len == BTRFS_LABEL_SIZE) { 4431 if (len == BTRFS_LABEL_SIZE) {
4049 pr_warn("btrfs: label is too long, return the first %zu bytes\n", 4432 pr_warn("btrfs: label is too long, return the first %zu bytes\n",
4050 --len); 4433 --len);
4051 } 4434 }
4052 4435
4053 mutex_lock(&root->fs_info->volume_mutex);
4054 ret = copy_to_user(arg, label, len); 4436 ret = copy_to_user(arg, label, len);
4055 mutex_unlock(&root->fs_info->volume_mutex);
4056 4437
4057 return ret ? -EFAULT : 0; 4438 return ret ? -EFAULT : 0;
4058} 4439}
@@ -4081,18 +4462,18 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
4081 if (ret) 4462 if (ret)
4082 return ret; 4463 return ret;
4083 4464
4084 mutex_lock(&root->fs_info->volume_mutex);
4085 trans = btrfs_start_transaction(root, 0); 4465 trans = btrfs_start_transaction(root, 0);
4086 if (IS_ERR(trans)) { 4466 if (IS_ERR(trans)) {
4087 ret = PTR_ERR(trans); 4467 ret = PTR_ERR(trans);
4088 goto out_unlock; 4468 goto out_unlock;
4089 } 4469 }
4090 4470
4471 spin_lock(&root->fs_info->super_lock);
4091 strcpy(super_block->label, label); 4472 strcpy(super_block->label, label);
4473 spin_unlock(&root->fs_info->super_lock);
4092 ret = btrfs_end_transaction(trans, root); 4474 ret = btrfs_end_transaction(trans, root);
4093 4475
4094out_unlock: 4476out_unlock:
4095 mutex_unlock(&root->fs_info->volume_mutex);
4096 mnt_drop_write_file(file); 4477 mnt_drop_write_file(file);
4097 return ret; 4478 return ret;
4098} 4479}
@@ -4207,6 +4588,8 @@ long btrfs_ioctl(struct file *file, unsigned int
4207 return btrfs_ioctl_get_fslabel(file, argp); 4588 return btrfs_ioctl_get_fslabel(file, argp);
4208 case BTRFS_IOC_SET_FSLABEL: 4589 case BTRFS_IOC_SET_FSLABEL:
4209 return btrfs_ioctl_set_fslabel(file, argp); 4590 return btrfs_ioctl_set_fslabel(file, argp);
4591 case BTRFS_IOC_FILE_EXTENT_SAME:
4592 return btrfs_ioctl_file_extent_same(file, argp);
4210 } 4593 }
4211 4594
4212 return -ENOTTY; 4595 return -ENOTTY;
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index f93151a98886..b6a6f07c5ce2 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -207,8 +207,10 @@ static int lzo_compress_pages(struct list_head *ws,
207 } 207 }
208 208
209 /* we're making it bigger, give up */ 209 /* we're making it bigger, give up */
210 if (tot_in > 8192 && tot_in < tot_out) 210 if (tot_in > 8192 && tot_in < tot_out) {
211 ret = -1;
211 goto out; 212 goto out;
213 }
212 214
213 /* we're all done */ 215 /* we're all done */
214 if (tot_in >= len) 216 if (tot_in >= len)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 81369827e514..966b413a33b8 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -67,7 +67,7 @@ static void ordered_data_tree_panic(struct inode *inode, int errno,
67{ 67{
68 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 68 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
69 btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset " 69 btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset "
70 "%llu\n", (unsigned long long)offset); 70 "%llu\n", offset);
71} 71}
72 72
73/* 73/*
@@ -205,6 +205,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
205 entry->bytes_left = len; 205 entry->bytes_left = len;
206 entry->inode = igrab(inode); 206 entry->inode = igrab(inode);
207 entry->compress_type = compress_type; 207 entry->compress_type = compress_type;
208 entry->truncated_len = (u64)-1;
208 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 209 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
209 set_bit(type, &entry->flags); 210 set_bit(type, &entry->flags);
210 211
@@ -336,14 +337,12 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
336 *file_offset = dec_end; 337 *file_offset = dec_end;
337 if (dec_start > dec_end) { 338 if (dec_start > dec_end) {
338 printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n", 339 printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
339 (unsigned long long)dec_start, 340 dec_start, dec_end);
340 (unsigned long long)dec_end);
341 } 341 }
342 to_dec = dec_end - dec_start; 342 to_dec = dec_end - dec_start;
343 if (to_dec > entry->bytes_left) { 343 if (to_dec > entry->bytes_left) {
344 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", 344 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
345 (unsigned long long)entry->bytes_left, 345 entry->bytes_left, to_dec);
346 (unsigned long long)to_dec);
347 } 346 }
348 entry->bytes_left -= to_dec; 347 entry->bytes_left -= to_dec;
349 if (!uptodate) 348 if (!uptodate)
@@ -403,8 +402,7 @@ have_entry:
403 402
404 if (io_size > entry->bytes_left) { 403 if (io_size > entry->bytes_left) {
405 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", 404 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
406 (unsigned long long)entry->bytes_left, 405 entry->bytes_left, io_size);
407 (unsigned long long)io_size);
408 } 406 }
409 entry->bytes_left -= io_size; 407 entry->bytes_left -= io_size;
410 if (!uptodate) 408 if (!uptodate)
@@ -671,7 +669,7 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
671 INIT_LIST_HEAD(&splice); 669 INIT_LIST_HEAD(&splice);
672 INIT_LIST_HEAD(&works); 670 INIT_LIST_HEAD(&works);
673 671
674 mutex_lock(&root->fs_info->ordered_operations_mutex); 672 mutex_lock(&root->fs_info->ordered_extent_flush_mutex);
675 spin_lock(&root->fs_info->ordered_root_lock); 673 spin_lock(&root->fs_info->ordered_root_lock);
676 list_splice_init(&cur_trans->ordered_operations, &splice); 674 list_splice_init(&cur_trans->ordered_operations, &splice);
677 while (!list_empty(&splice)) { 675 while (!list_empty(&splice)) {
@@ -718,7 +716,7 @@ out:
718 list_del_init(&work->list); 716 list_del_init(&work->list);
719 btrfs_wait_and_free_delalloc_work(work); 717 btrfs_wait_and_free_delalloc_work(work);
720 } 718 }
721 mutex_unlock(&root->fs_info->ordered_operations_mutex); 719 mutex_unlock(&root->fs_info->ordered_extent_flush_mutex);
722 return ret; 720 return ret;
723} 721}
724 722
@@ -923,12 +921,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
923 struct btrfs_ordered_extent *test; 921 struct btrfs_ordered_extent *test;
924 int ret = 1; 922 int ret = 1;
925 923
926 if (ordered) 924 spin_lock_irq(&tree->lock);
925 if (ordered) {
927 offset = entry_end(ordered); 926 offset = entry_end(ordered);
928 else 927 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags))
928 offset = min(offset,
929 ordered->file_offset +
930 ordered->truncated_len);
931 } else {
929 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); 932 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
930 933 }
931 spin_lock_irq(&tree->lock);
932 disk_i_size = BTRFS_I(inode)->disk_i_size; 934 disk_i_size = BTRFS_I(inode)->disk_i_size;
933 935
934 /* truncate file */ 936 /* truncate file */
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 68844d59ee6f..d9a5aa097b4f 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -69,6 +69,7 @@ struct btrfs_ordered_sum {
69 * the isize. */ 69 * the isize. */
70#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered 70#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
71 ordered extent */ 71 ordered extent */
72#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */
72 73
73struct btrfs_ordered_extent { 74struct btrfs_ordered_extent {
74 /* logical offset in the file */ 75 /* logical offset in the file */
@@ -96,6 +97,12 @@ struct btrfs_ordered_extent {
96 */ 97 */
97 u64 outstanding_isize; 98 u64 outstanding_isize;
98 99
100 /*
101 * If we get truncated we need to adjust the file extent we enter for
102 * this ordered extent so that we do not expose stale data.
103 */
104 u64 truncated_len;
105
99 /* flags (described above) */ 106 /* flags (described above) */
100 unsigned long flags; 107 unsigned long flags;
101 108
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index dc0024f17c1f..0088bedc8631 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -26,14 +26,12 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
26 int i; 26 int i;
27 printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu " 27 printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu "
28 "num_stripes %d\n", 28 "num_stripes %d\n",
29 (unsigned long long)btrfs_chunk_length(eb, chunk), 29 btrfs_chunk_length(eb, chunk), btrfs_chunk_owner(eb, chunk),
30 (unsigned long long)btrfs_chunk_owner(eb, chunk), 30 btrfs_chunk_type(eb, chunk), num_stripes);
31 (unsigned long long)btrfs_chunk_type(eb, chunk),
32 num_stripes);
33 for (i = 0 ; i < num_stripes ; i++) { 31 for (i = 0 ; i < num_stripes ; i++) {
34 printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i, 32 printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i,
35 (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i), 33 btrfs_stripe_devid_nr(eb, chunk, i),
36 (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i)); 34 btrfs_stripe_offset_nr(eb, chunk, i));
37 } 35 }
38} 36}
39static void print_dev_item(struct extent_buffer *eb, 37static void print_dev_item(struct extent_buffer *eb,
@@ -41,18 +39,18 @@ static void print_dev_item(struct extent_buffer *eb,
41{ 39{
42 printk(KERN_INFO "\t\tdev item devid %llu " 40 printk(KERN_INFO "\t\tdev item devid %llu "
43 "total_bytes %llu bytes used %llu\n", 41 "total_bytes %llu bytes used %llu\n",
44 (unsigned long long)btrfs_device_id(eb, dev_item), 42 btrfs_device_id(eb, dev_item),
45 (unsigned long long)btrfs_device_total_bytes(eb, dev_item), 43 btrfs_device_total_bytes(eb, dev_item),
46 (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); 44 btrfs_device_bytes_used(eb, dev_item));
47} 45}
48static void print_extent_data_ref(struct extent_buffer *eb, 46static void print_extent_data_ref(struct extent_buffer *eb,
49 struct btrfs_extent_data_ref *ref) 47 struct btrfs_extent_data_ref *ref)
50{ 48{
51 printk(KERN_INFO "\t\textent data backref root %llu " 49 printk(KERN_INFO "\t\textent data backref root %llu "
52 "objectid %llu offset %llu count %u\n", 50 "objectid %llu offset %llu count %u\n",
53 (unsigned long long)btrfs_extent_data_ref_root(eb, ref), 51 btrfs_extent_data_ref_root(eb, ref),
54 (unsigned long long)btrfs_extent_data_ref_objectid(eb, ref), 52 btrfs_extent_data_ref_objectid(eb, ref),
55 (unsigned long long)btrfs_extent_data_ref_offset(eb, ref), 53 btrfs_extent_data_ref_offset(eb, ref),
56 btrfs_extent_data_ref_count(eb, ref)); 54 btrfs_extent_data_ref_count(eb, ref));
57} 55}
58 56
@@ -87,19 +85,17 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
87 flags = btrfs_extent_flags(eb, ei); 85 flags = btrfs_extent_flags(eb, ei);
88 86
89 printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n", 87 printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n",
90 (unsigned long long)btrfs_extent_refs(eb, ei), 88 btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei),
91 (unsigned long long)btrfs_extent_generation(eb, ei), 89 flags);
92 (unsigned long long)flags);
93 90
94 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 91 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
95 struct btrfs_tree_block_info *info; 92 struct btrfs_tree_block_info *info;
96 info = (struct btrfs_tree_block_info *)(ei + 1); 93 info = (struct btrfs_tree_block_info *)(ei + 1);
97 btrfs_tree_block_key(eb, info, &key); 94 btrfs_tree_block_key(eb, info, &key);
98 printk(KERN_INFO "\t\ttree block key (%llu %x %llu) " 95 printk(KERN_INFO "\t\ttree block key (%llu %u %llu) "
99 "level %d\n", 96 "level %d\n",
100 (unsigned long long)btrfs_disk_key_objectid(&key), 97 btrfs_disk_key_objectid(&key), key.type,
101 key.type, 98 btrfs_disk_key_offset(&key),
102 (unsigned long long)btrfs_disk_key_offset(&key),
103 btrfs_tree_block_level(eb, info)); 99 btrfs_tree_block_level(eb, info));
104 iref = (struct btrfs_extent_inline_ref *)(info + 1); 100 iref = (struct btrfs_extent_inline_ref *)(info + 1);
105 } else { 101 } else {
@@ -115,11 +111,11 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
115 switch (type) { 111 switch (type) {
116 case BTRFS_TREE_BLOCK_REF_KEY: 112 case BTRFS_TREE_BLOCK_REF_KEY:
117 printk(KERN_INFO "\t\ttree block backref " 113 printk(KERN_INFO "\t\ttree block backref "
118 "root %llu\n", (unsigned long long)offset); 114 "root %llu\n", offset);
119 break; 115 break;
120 case BTRFS_SHARED_BLOCK_REF_KEY: 116 case BTRFS_SHARED_BLOCK_REF_KEY:
121 printk(KERN_INFO "\t\tshared block backref " 117 printk(KERN_INFO "\t\tshared block backref "
122 "parent %llu\n", (unsigned long long)offset); 118 "parent %llu\n", offset);
123 break; 119 break;
124 case BTRFS_EXTENT_DATA_REF_KEY: 120 case BTRFS_EXTENT_DATA_REF_KEY:
125 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 121 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
@@ -129,8 +125,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
129 sref = (struct btrfs_shared_data_ref *)(iref + 1); 125 sref = (struct btrfs_shared_data_ref *)(iref + 1);
130 printk(KERN_INFO "\t\tshared data backref " 126 printk(KERN_INFO "\t\tshared data backref "
131 "parent %llu count %u\n", 127 "parent %llu count %u\n",
132 (unsigned long long)offset, 128 offset, btrfs_shared_data_ref_count(eb, sref));
133 btrfs_shared_data_ref_count(eb, sref));
134 break; 129 break;
135 default: 130 default:
136 BUG(); 131 BUG();
@@ -148,13 +143,32 @@ static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
148 ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0); 143 ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
149 printk("\t\textent back ref root %llu gen %llu " 144 printk("\t\textent back ref root %llu gen %llu "
150 "owner %llu num_refs %lu\n", 145 "owner %llu num_refs %lu\n",
151 (unsigned long long)btrfs_ref_root_v0(eb, ref0), 146 btrfs_ref_root_v0(eb, ref0),
152 (unsigned long long)btrfs_ref_generation_v0(eb, ref0), 147 btrfs_ref_generation_v0(eb, ref0),
153 (unsigned long long)btrfs_ref_objectid_v0(eb, ref0), 148 btrfs_ref_objectid_v0(eb, ref0),
154 (unsigned long)btrfs_ref_count_v0(eb, ref0)); 149 (unsigned long)btrfs_ref_count_v0(eb, ref0));
155} 150}
156#endif 151#endif
157 152
153static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
154 u32 item_size)
155{
156 if (!IS_ALIGNED(item_size, sizeof(u64))) {
157 pr_warn("btrfs: uuid item with illegal size %lu!\n",
158 (unsigned long)item_size);
159 return;
160 }
161 while (item_size) {
162 __le64 subvol_id;
163
164 read_extent_buffer(l, &subvol_id, offset, sizeof(subvol_id));
165 printk(KERN_INFO "\t\tsubvol_id %llu\n",
166 (unsigned long long)le64_to_cpu(subvol_id));
167 item_size -= sizeof(u64);
168 offset += sizeof(u64);
169 }
170}
171
158void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) 172void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
159{ 173{
160 int i; 174 int i;
@@ -177,39 +191,34 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
177 nr = btrfs_header_nritems(l); 191 nr = btrfs_header_nritems(l);
178 192
179 btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d", 193 btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d",
180 (unsigned long long)btrfs_header_bytenr(l), nr, 194 btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l));
181 btrfs_leaf_free_space(root, l));
182 for (i = 0 ; i < nr ; i++) { 195 for (i = 0 ; i < nr ; i++) {
183 item = btrfs_item_nr(l, i); 196 item = btrfs_item_nr(l, i);
184 btrfs_item_key_to_cpu(l, &key, i); 197 btrfs_item_key_to_cpu(l, &key, i);
185 type = btrfs_key_type(&key); 198 type = btrfs_key_type(&key);
186 printk(KERN_INFO "\titem %d key (%llu %x %llu) itemoff %d " 199 printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d "
187 "itemsize %d\n", 200 "itemsize %d\n",
188 i, 201 i, key.objectid, type, key.offset,
189 (unsigned long long)key.objectid, type,
190 (unsigned long long)key.offset,
191 btrfs_item_offset(l, item), btrfs_item_size(l, item)); 202 btrfs_item_offset(l, item), btrfs_item_size(l, item));
192 switch (type) { 203 switch (type) {
193 case BTRFS_INODE_ITEM_KEY: 204 case BTRFS_INODE_ITEM_KEY:
194 ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); 205 ii = btrfs_item_ptr(l, i, struct btrfs_inode_item);
195 printk(KERN_INFO "\t\tinode generation %llu size %llu " 206 printk(KERN_INFO "\t\tinode generation %llu size %llu "
196 "mode %o\n", 207 "mode %o\n",
197 (unsigned long long)
198 btrfs_inode_generation(l, ii), 208 btrfs_inode_generation(l, ii),
199 (unsigned long long)btrfs_inode_size(l, ii), 209 btrfs_inode_size(l, ii),
200 btrfs_inode_mode(l, ii)); 210 btrfs_inode_mode(l, ii));
201 break; 211 break;
202 case BTRFS_DIR_ITEM_KEY: 212 case BTRFS_DIR_ITEM_KEY:
203 di = btrfs_item_ptr(l, i, struct btrfs_dir_item); 213 di = btrfs_item_ptr(l, i, struct btrfs_dir_item);
204 btrfs_dir_item_key_to_cpu(l, di, &found_key); 214 btrfs_dir_item_key_to_cpu(l, di, &found_key);
205 printk(KERN_INFO "\t\tdir oid %llu type %u\n", 215 printk(KERN_INFO "\t\tdir oid %llu type %u\n",
206 (unsigned long long)found_key.objectid, 216 found_key.objectid,
207 btrfs_dir_type(l, di)); 217 btrfs_dir_type(l, di));
208 break; 218 break;
209 case BTRFS_ROOT_ITEM_KEY: 219 case BTRFS_ROOT_ITEM_KEY:
210 ri = btrfs_item_ptr(l, i, struct btrfs_root_item); 220 ri = btrfs_item_ptr(l, i, struct btrfs_root_item);
211 printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n", 221 printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n",
212 (unsigned long long)
213 btrfs_disk_root_bytenr(l, ri), 222 btrfs_disk_root_bytenr(l, ri),
214 btrfs_disk_root_refs(l, ri)); 223 btrfs_disk_root_refs(l, ri));
215 break; 224 break;
@@ -245,17 +254,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
245 } 254 }
246 printk(KERN_INFO "\t\textent data disk bytenr %llu " 255 printk(KERN_INFO "\t\textent data disk bytenr %llu "
247 "nr %llu\n", 256 "nr %llu\n",
248 (unsigned long long)
249 btrfs_file_extent_disk_bytenr(l, fi), 257 btrfs_file_extent_disk_bytenr(l, fi),
250 (unsigned long long)
251 btrfs_file_extent_disk_num_bytes(l, fi)); 258 btrfs_file_extent_disk_num_bytes(l, fi));
252 printk(KERN_INFO "\t\textent data offset %llu " 259 printk(KERN_INFO "\t\textent data offset %llu "
253 "nr %llu ram %llu\n", 260 "nr %llu ram %llu\n",
254 (unsigned long long)
255 btrfs_file_extent_offset(l, fi), 261 btrfs_file_extent_offset(l, fi),
256 (unsigned long long)
257 btrfs_file_extent_num_bytes(l, fi), 262 btrfs_file_extent_num_bytes(l, fi),
258 (unsigned long long)
259 btrfs_file_extent_ram_bytes(l, fi)); 263 btrfs_file_extent_ram_bytes(l, fi));
260 break; 264 break;
261 case BTRFS_EXTENT_REF_V0_KEY: 265 case BTRFS_EXTENT_REF_V0_KEY:
@@ -269,7 +273,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
269 bi = btrfs_item_ptr(l, i, 273 bi = btrfs_item_ptr(l, i,
270 struct btrfs_block_group_item); 274 struct btrfs_block_group_item);
271 printk(KERN_INFO "\t\tblock group used %llu\n", 275 printk(KERN_INFO "\t\tblock group used %llu\n",
272 (unsigned long long)
273 btrfs_disk_block_group_used(l, bi)); 276 btrfs_disk_block_group_used(l, bi));
274 break; 277 break;
275 case BTRFS_CHUNK_ITEM_KEY: 278 case BTRFS_CHUNK_ITEM_KEY:
@@ -286,13 +289,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
286 printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n" 289 printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n"
287 "\t\tchunk objectid %llu chunk offset %llu " 290 "\t\tchunk objectid %llu chunk offset %llu "
288 "length %llu\n", 291 "length %llu\n",
289 (unsigned long long)
290 btrfs_dev_extent_chunk_tree(l, dev_extent), 292 btrfs_dev_extent_chunk_tree(l, dev_extent),
291 (unsigned long long)
292 btrfs_dev_extent_chunk_objectid(l, dev_extent), 293 btrfs_dev_extent_chunk_objectid(l, dev_extent),
293 (unsigned long long)
294 btrfs_dev_extent_chunk_offset(l, dev_extent), 294 btrfs_dev_extent_chunk_offset(l, dev_extent),
295 (unsigned long long)
296 btrfs_dev_extent_length(l, dev_extent)); 295 btrfs_dev_extent_length(l, dev_extent));
297 break; 296 break;
298 case BTRFS_DEV_STATS_KEY: 297 case BTRFS_DEV_STATS_KEY:
@@ -301,6 +300,11 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
301 case BTRFS_DEV_REPLACE_KEY: 300 case BTRFS_DEV_REPLACE_KEY:
302 printk(KERN_INFO "\t\tdev replace\n"); 301 printk(KERN_INFO "\t\tdev replace\n");
303 break; 302 break;
303 case BTRFS_UUID_KEY_SUBVOL:
304 case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
305 print_uuid_item(l, btrfs_item_ptr_offset(l, i),
306 btrfs_item_size_nr(l, i));
307 break;
304 }; 308 };
305 } 309 }
306} 310}
@@ -320,16 +324,13 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
320 return; 324 return;
321 } 325 }
322 btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u", 326 btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u",
323 (unsigned long long)btrfs_header_bytenr(c), 327 btrfs_header_bytenr(c), level, nr,
324 level, nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); 328 (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
325 for (i = 0; i < nr; i++) { 329 for (i = 0; i < nr; i++) {
326 btrfs_node_key_to_cpu(c, &key, i); 330 btrfs_node_key_to_cpu(c, &key, i);
327 printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n", 331 printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n",
328 i, 332 i, key.objectid, key.type, key.offset,
329 (unsigned long long)key.objectid, 333 btrfs_node_blockptr(c, i));
330 key.type,
331 (unsigned long long)key.offset,
332 (unsigned long long)btrfs_node_blockptr(c, i));
333 } 334 }
334 for (i = 0; i < nr; i++) { 335 for (i = 0; i < nr; i++) {
335 struct extent_buffer *next = read_tree_block(root, 336 struct extent_buffer *next = read_tree_block(root,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1280eff8af56..4e6ef490619e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -157,18 +157,11 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
157 return qgroup; 157 return qgroup;
158} 158}
159 159
160/* must be called with qgroup_lock held */ 160static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
161static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
162{ 161{
163 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
164 struct btrfs_qgroup_list *list; 162 struct btrfs_qgroup_list *list;
165 163
166 if (!qgroup)
167 return -ENOENT;
168
169 rb_erase(&qgroup->node, &fs_info->qgroup_tree);
170 list_del(&qgroup->dirty); 164 list_del(&qgroup->dirty);
171
172 while (!list_empty(&qgroup->groups)) { 165 while (!list_empty(&qgroup->groups)) {
173 list = list_first_entry(&qgroup->groups, 166 list = list_first_entry(&qgroup->groups,
174 struct btrfs_qgroup_list, next_group); 167 struct btrfs_qgroup_list, next_group);
@@ -185,7 +178,18 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
185 kfree(list); 178 kfree(list);
186 } 179 }
187 kfree(qgroup); 180 kfree(qgroup);
181}
188 182
183/* must be called with qgroup_lock held */
184static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
185{
186 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
187
188 if (!qgroup)
189 return -ENOENT;
190
191 rb_erase(&qgroup->node, &fs_info->qgroup_tree);
192 __del_qgroup_rb(qgroup);
189 return 0; 193 return 0;
190} 194}
191 195
@@ -394,8 +398,7 @@ next1:
394 if (ret == -ENOENT) { 398 if (ret == -ENOENT) {
395 printk(KERN_WARNING 399 printk(KERN_WARNING
396 "btrfs: orphan qgroup relation 0x%llx->0x%llx\n", 400 "btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
397 (unsigned long long)found_key.objectid, 401 found_key.objectid, found_key.offset);
398 (unsigned long long)found_key.offset);
399 ret = 0; /* ignore the error */ 402 ret = 0; /* ignore the error */
400 } 403 }
401 if (ret) 404 if (ret)
@@ -428,39 +431,28 @@ out:
428} 431}
429 432
430/* 433/*
431 * This is only called from close_ctree() or open_ctree(), both in single- 434 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
432 * treaded paths. Clean up the in-memory structures. No locking needed. 435 * first two are in single-threaded paths.And for the third one, we have set
436 * quota_root to be null with qgroup_lock held before, so it is safe to clean
437 * up the in-memory structures without qgroup_lock held.
433 */ 438 */
434void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 439void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
435{ 440{
436 struct rb_node *n; 441 struct rb_node *n;
437 struct btrfs_qgroup *qgroup; 442 struct btrfs_qgroup *qgroup;
438 struct btrfs_qgroup_list *list;
439 443
440 while ((n = rb_first(&fs_info->qgroup_tree))) { 444 while ((n = rb_first(&fs_info->qgroup_tree))) {
441 qgroup = rb_entry(n, struct btrfs_qgroup, node); 445 qgroup = rb_entry(n, struct btrfs_qgroup, node);
442 rb_erase(n, &fs_info->qgroup_tree); 446 rb_erase(n, &fs_info->qgroup_tree);
443 447 __del_qgroup_rb(qgroup);
444 while (!list_empty(&qgroup->groups)) {
445 list = list_first_entry(&qgroup->groups,
446 struct btrfs_qgroup_list,
447 next_group);
448 list_del(&list->next_group);
449 list_del(&list->next_member);
450 kfree(list);
451 }
452
453 while (!list_empty(&qgroup->members)) {
454 list = list_first_entry(&qgroup->members,
455 struct btrfs_qgroup_list,
456 next_member);
457 list_del(&list->next_group);
458 list_del(&list->next_member);
459 kfree(list);
460 }
461 kfree(qgroup);
462 } 448 }
449 /*
450 * we call btrfs_free_qgroup_config() when umounting
451 * filesystem and disabling quota, so we set qgroup_ulit
452 * to be null here to avoid double free.
453 */
463 ulist_free(fs_info->qgroup_ulist); 454 ulist_free(fs_info->qgroup_ulist);
455 fs_info->qgroup_ulist = NULL;
464} 456}
465 457
466static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 458static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
@@ -946,13 +938,9 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
946 fs_info->pending_quota_state = 0; 938 fs_info->pending_quota_state = 0;
947 quota_root = fs_info->quota_root; 939 quota_root = fs_info->quota_root;
948 fs_info->quota_root = NULL; 940 fs_info->quota_root = NULL;
949 btrfs_free_qgroup_config(fs_info);
950 spin_unlock(&fs_info->qgroup_lock); 941 spin_unlock(&fs_info->qgroup_lock);
951 942
952 if (!quota_root) { 943 btrfs_free_qgroup_config(fs_info);
953 ret = -EINVAL;
954 goto out;
955 }
956 944
957 ret = btrfs_clean_quota_tree(trans, quota_root); 945 ret = btrfs_clean_quota_tree(trans, quota_root);
958 if (ret) 946 if (ret)
@@ -1174,7 +1162,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1174 if (ret) { 1162 if (ret) {
1175 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1163 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1176 printk(KERN_INFO "unable to update quota limit for %llu\n", 1164 printk(KERN_INFO "unable to update quota limit for %llu\n",
1177 (unsigned long long)qgroupid); 1165 qgroupid);
1178 } 1166 }
1179 1167
1180 spin_lock(&fs_info->qgroup_lock); 1168 spin_lock(&fs_info->qgroup_lock);
@@ -1884,10 +1872,9 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1884 path, 1, 0); 1872 path, 1, 0);
1885 1873
1886 pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n", 1874 pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1887 (unsigned long long)fs_info->qgroup_rescan_progress.objectid, 1875 fs_info->qgroup_rescan_progress.objectid,
1888 fs_info->qgroup_rescan_progress.type, 1876 fs_info->qgroup_rescan_progress.type,
1889 (unsigned long long)fs_info->qgroup_rescan_progress.offset, 1877 fs_info->qgroup_rescan_progress.offset, ret);
1890 ret);
1891 1878
1892 if (ret) { 1879 if (ret) {
1893 /* 1880 /*
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 0525e1389f5b..d0ecfbd9cc9f 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1540,8 +1540,10 @@ static int full_stripe_write(struct btrfs_raid_bio *rbio)
1540 int ret; 1540 int ret;
1541 1541
1542 ret = alloc_rbio_parity_pages(rbio); 1542 ret = alloc_rbio_parity_pages(rbio);
1543 if (ret) 1543 if (ret) {
1544 __free_raid_bio(rbio);
1544 return ret; 1545 return ret;
1546 }
1545 1547
1546 ret = lock_stripe_add(rbio); 1548 ret = lock_stripe_add(rbio);
1547 if (ret == 0) 1549 if (ret == 0)
@@ -1687,11 +1689,8 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
1687 struct blk_plug_cb *cb; 1689 struct blk_plug_cb *cb;
1688 1690
1689 rbio = alloc_rbio(root, bbio, raid_map, stripe_len); 1691 rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
1690 if (IS_ERR(rbio)) { 1692 if (IS_ERR(rbio))
1691 kfree(raid_map);
1692 kfree(bbio);
1693 return PTR_ERR(rbio); 1693 return PTR_ERR(rbio);
1694 }
1695 bio_list_add(&rbio->bio_list, bio); 1694 bio_list_add(&rbio->bio_list, bio);
1696 rbio->bio_list_bytes = bio->bi_size; 1695 rbio->bio_list_bytes = bio->bi_size;
1697 1696
@@ -2041,9 +2040,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2041 int ret; 2040 int ret;
2042 2041
2043 rbio = alloc_rbio(root, bbio, raid_map, stripe_len); 2042 rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
2044 if (IS_ERR(rbio)) { 2043 if (IS_ERR(rbio))
2045 return PTR_ERR(rbio); 2044 return PTR_ERR(rbio);
2046 }
2047 2045
2048 rbio->read_rebuild = 1; 2046 rbio->read_rebuild = 1;
2049 bio_list_add(&rbio->bio_list, bio); 2047 bio_list_add(&rbio->bio_list, bio);
@@ -2052,6 +2050,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2052 rbio->faila = find_logical_bio_stripe(rbio, bio); 2050 rbio->faila = find_logical_bio_stripe(rbio, bio);
2053 if (rbio->faila == -1) { 2051 if (rbio->faila == -1) {
2054 BUG(); 2052 BUG();
2053 kfree(raid_map);
2054 kfree(bbio);
2055 kfree(rbio); 2055 kfree(rbio);
2056 return -EIO; 2056 return -EIO;
2057 } 2057 }
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 12096496cc99..aacc2121e87c 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -335,7 +335,7 @@ static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr)
335 if (bnode->root) 335 if (bnode->root)
336 fs_info = bnode->root->fs_info; 336 fs_info = bnode->root->fs_info;
337 btrfs_panic(fs_info, errno, "Inconsistency in backref cache " 337 btrfs_panic(fs_info, errno, "Inconsistency in backref cache "
338 "found at offset %llu\n", (unsigned long long)bytenr); 338 "found at offset %llu\n", bytenr);
339} 339}
340 340
341/* 341/*
@@ -641,6 +641,11 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
641 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); 641 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
642 return 1; 642 return 1;
643 } 643 }
644 if (key.type == BTRFS_METADATA_ITEM_KEY &&
645 item_size <= sizeof(*ei)) {
646 WARN_ON(item_size < sizeof(*ei));
647 return 1;
648 }
644 649
645 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 650 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
646 bi = (struct btrfs_tree_block_info *)(ei + 1); 651 bi = (struct btrfs_tree_block_info *)(ei + 1);
@@ -691,6 +696,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
691 int cowonly; 696 int cowonly;
692 int ret; 697 int ret;
693 int err = 0; 698 int err = 0;
699 bool need_check = true;
694 700
695 path1 = btrfs_alloc_path(); 701 path1 = btrfs_alloc_path();
696 path2 = btrfs_alloc_path(); 702 path2 = btrfs_alloc_path();
@@ -914,6 +920,7 @@ again:
914 cur->bytenr); 920 cur->bytenr);
915 921
916 lower = cur; 922 lower = cur;
923 need_check = true;
917 for (; level < BTRFS_MAX_LEVEL; level++) { 924 for (; level < BTRFS_MAX_LEVEL; level++) {
918 if (!path2->nodes[level]) { 925 if (!path2->nodes[level]) {
919 BUG_ON(btrfs_root_bytenr(&root->root_item) != 926 BUG_ON(btrfs_root_bytenr(&root->root_item) !=
@@ -957,14 +964,12 @@ again:
957 964
958 /* 965 /*
959 * add the block to pending list if we 966 * add the block to pending list if we
960 * need check its backrefs. only block 967 * need check its backrefs, we only do this once
961 * at 'cur->level + 1' is added to the 968 * while walking up a tree as we will catch
962 * tail of pending list. this guarantees 969 * anything else later on.
963 * we check backrefs from lower level
964 * blocks to upper level blocks.
965 */ 970 */
966 if (!upper->checked && 971 if (!upper->checked && need_check) {
967 level == cur->level + 1) { 972 need_check = false;
968 list_add_tail(&edge->list[UPPER], 973 list_add_tail(&edge->list[UPPER],
969 &list); 974 &list);
970 } else 975 } else
@@ -2314,8 +2319,13 @@ again:
2314 BUG_ON(root->reloc_root != reloc_root); 2319 BUG_ON(root->reloc_root != reloc_root);
2315 2320
2316 ret = merge_reloc_root(rc, root); 2321 ret = merge_reloc_root(rc, root);
2317 if (ret) 2322 if (ret) {
2323 __update_reloc_root(reloc_root, 1);
2324 free_extent_buffer(reloc_root->node);
2325 free_extent_buffer(reloc_root->commit_root);
2326 kfree(reloc_root);
2318 goto out; 2327 goto out;
2328 }
2319 } else { 2329 } else {
2320 list_del_init(&reloc_root->root_list); 2330 list_del_init(&reloc_root->root_list);
2321 } 2331 }
@@ -2344,9 +2354,6 @@ again:
2344 if (IS_ERR(root)) 2354 if (IS_ERR(root))
2345 continue; 2355 continue;
2346 2356
2347 if (btrfs_root_refs(&root->root_item) == 0)
2348 continue;
2349
2350 trans = btrfs_join_transaction(root); 2357 trans = btrfs_join_transaction(root);
2351 BUG_ON(IS_ERR(trans)); 2358 BUG_ON(IS_ERR(trans));
2352 2359
@@ -3628,7 +3635,7 @@ int add_data_references(struct reloc_control *rc,
3628 unsigned long ptr; 3635 unsigned long ptr;
3629 unsigned long end; 3636 unsigned long end;
3630 u32 blocksize = btrfs_level_size(rc->extent_root, 0); 3637 u32 blocksize = btrfs_level_size(rc->extent_root, 0);
3631 int ret; 3638 int ret = 0;
3632 int err = 0; 3639 int err = 0;
3633 3640
3634 eb = path->nodes[0]; 3641 eb = path->nodes[0];
@@ -3655,6 +3662,10 @@ int add_data_references(struct reloc_control *rc,
3655 } else { 3662 } else {
3656 BUG(); 3663 BUG();
3657 } 3664 }
3665 if (ret) {
3666 err = ret;
3667 goto out;
3668 }
3658 ptr += btrfs_extent_inline_ref_size(key.type); 3669 ptr += btrfs_extent_inline_ref_size(key.type);
3659 } 3670 }
3660 WARN_ON(ptr > end); 3671 WARN_ON(ptr > end);
@@ -3700,6 +3711,7 @@ int add_data_references(struct reloc_control *rc,
3700 } 3711 }
3701 path->slots[0]++; 3712 path->slots[0]++;
3702 } 3713 }
3714out:
3703 btrfs_release_path(path); 3715 btrfs_release_path(path);
3704 if (err) 3716 if (err)
3705 free_block_list(blocks); 3717 free_block_list(blocks);
@@ -4219,8 +4231,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4219 } 4231 }
4220 4232
4221 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", 4233 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
4222 (unsigned long long)rc->block_group->key.objectid, 4234 rc->block_group->key.objectid, rc->block_group->flags);
4223 (unsigned long long)rc->block_group->flags);
4224 4235
4225 ret = btrfs_start_all_delalloc_inodes(fs_info, 0); 4236 ret = btrfs_start_all_delalloc_inodes(fs_info, 0);
4226 if (ret < 0) { 4237 if (ret < 0) {
@@ -4242,7 +4253,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4242 break; 4253 break;
4243 4254
4244 printk(KERN_INFO "btrfs: found %llu extents\n", 4255 printk(KERN_INFO "btrfs: found %llu extents\n",
4245 (unsigned long long)rc->extents_found); 4256 rc->extents_found);
4246 4257
4247 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 4258 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
4248 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); 4259 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index ffb1036ef10d..0b1f4ef8db98 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -29,8 +29,8 @@
29 * generation numbers as then we know the root was once mounted with an older 29 * generation numbers as then we know the root was once mounted with an older
30 * kernel that was not aware of the root item structure change. 30 * kernel that was not aware of the root item structure change.
31 */ 31 */
32void btrfs_read_root_item(struct extent_buffer *eb, int slot, 32static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
33 struct btrfs_root_item *item) 33 struct btrfs_root_item *item)
34{ 34{
35 uuid_le uuid; 35 uuid_le uuid;
36 int len; 36 int len;
@@ -155,8 +155,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
155 if (ret != 0) { 155 if (ret != 0) {
156 btrfs_print_leaf(root, path->nodes[0]); 156 btrfs_print_leaf(root, path->nodes[0]);
157 printk(KERN_CRIT "unable to update root key %llu %u %llu\n", 157 printk(KERN_CRIT "unable to update root key %llu %u %llu\n",
158 (unsigned long long)key->objectid, key->type, 158 key->objectid, key->type, key->offset);
159 (unsigned long long)key->offset);
160 BUG_ON(1); 159 BUG_ON(1);
161 } 160 }
162 161
@@ -490,13 +489,13 @@ again:
490 */ 489 */
491void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item) 490void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
492{ 491{
493 u64 inode_flags = le64_to_cpu(root_item->inode.flags); 492 u64 inode_flags = btrfs_stack_inode_flags(&root_item->inode);
494 493
495 if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) { 494 if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) {
496 inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT; 495 inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT;
497 root_item->inode.flags = cpu_to_le64(inode_flags); 496 btrfs_set_stack_inode_flags(&root_item->inode, inode_flags);
498 root_item->flags = 0; 497 btrfs_set_root_flags(root_item, 0);
499 root_item->byte_limit = 0; 498 btrfs_set_root_limit(root_item, 0);
500 } 499 }
501} 500}
502 501
@@ -507,8 +506,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
507 struct timespec ct = CURRENT_TIME; 506 struct timespec ct = CURRENT_TIME;
508 507
509 spin_lock(&root->root_item_lock); 508 spin_lock(&root->root_item_lock);
510 item->ctransid = cpu_to_le64(trans->transid); 509 btrfs_set_root_ctransid(item, trans->transid);
511 item->ctime.sec = cpu_to_le64(ct.tv_sec); 510 btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec);
512 item->ctime.nsec = cpu_to_le32(ct.tv_nsec); 511 btrfs_set_stack_timespec_nsec(&item->ctime, ct.tv_nsec);
513 spin_unlock(&root->root_item_lock); 512 spin_unlock(&root->root_item_lock);
514} 513}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 64a157becbe5..0afcd452fcb3 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -754,8 +754,7 @@ out:
754 num_uncorrectable_read_errors); 754 num_uncorrectable_read_errors);
755 printk_ratelimited_in_rcu(KERN_ERR 755 printk_ratelimited_in_rcu(KERN_ERR
756 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", 756 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
757 (unsigned long long)fixup->logical, 757 fixup->logical, rcu_str_deref(fixup->dev->name));
758 rcu_str_deref(fixup->dev->name));
759 } 758 }
760 759
761 btrfs_free_path(path); 760 btrfs_free_path(path);
@@ -1154,8 +1153,7 @@ corrected_error:
1154 spin_unlock(&sctx->stat_lock); 1153 spin_unlock(&sctx->stat_lock);
1155 printk_ratelimited_in_rcu(KERN_ERR 1154 printk_ratelimited_in_rcu(KERN_ERR
1156 "btrfs: fixed up error at logical %llu on dev %s\n", 1155 "btrfs: fixed up error at logical %llu on dev %s\n",
1157 (unsigned long long)logical, 1156 logical, rcu_str_deref(dev->name));
1158 rcu_str_deref(dev->name));
1159 } 1157 }
1160 } else { 1158 } else {
1161did_not_correct_error: 1159did_not_correct_error:
@@ -1164,8 +1162,7 @@ did_not_correct_error:
1164 spin_unlock(&sctx->stat_lock); 1162 spin_unlock(&sctx->stat_lock);
1165 printk_ratelimited_in_rcu(KERN_ERR 1163 printk_ratelimited_in_rcu(KERN_ERR
1166 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", 1164 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
1167 (unsigned long long)logical, 1165 logical, rcu_str_deref(dev->name));
1168 rcu_str_deref(dev->name));
1169 } 1166 }
1170 1167
1171out: 1168out:
@@ -1345,12 +1342,12 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1345 mapped_buffer = kmap_atomic(sblock->pagev[0]->page); 1342 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1346 h = (struct btrfs_header *)mapped_buffer; 1343 h = (struct btrfs_header *)mapped_buffer;
1347 1344
1348 if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) || 1345 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) ||
1349 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || 1346 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
1350 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, 1347 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1351 BTRFS_UUID_SIZE)) { 1348 BTRFS_UUID_SIZE)) {
1352 sblock->header_error = 1; 1349 sblock->header_error = 1;
1353 } else if (generation != le64_to_cpu(h->generation)) { 1350 } else if (generation != btrfs_stack_header_generation(h)) {
1354 sblock->header_error = 1; 1351 sblock->header_error = 1;
1355 sblock->generation_error = 1; 1352 sblock->generation_error = 1;
1356 } 1353 }
@@ -1720,10 +1717,10 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1720 * b) the page is already kmapped 1717 * b) the page is already kmapped
1721 */ 1718 */
1722 1719
1723 if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr)) 1720 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
1724 ++fail; 1721 ++fail;
1725 1722
1726 if (sblock->pagev[0]->generation != le64_to_cpu(h->generation)) 1723 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h))
1727 ++fail; 1724 ++fail;
1728 1725
1729 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 1726 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -1786,10 +1783,10 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1786 s = (struct btrfs_super_block *)mapped_buffer; 1783 s = (struct btrfs_super_block *)mapped_buffer;
1787 memcpy(on_disk_csum, s->csum, sctx->csum_size); 1784 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1788 1785
1789 if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr)) 1786 if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
1790 ++fail_cor; 1787 ++fail_cor;
1791 1788
1792 if (sblock->pagev[0]->generation != le64_to_cpu(s->generation)) 1789 if (sblock->pagev[0]->generation != btrfs_super_generation(s))
1793 ++fail_gen; 1790 ++fail_gen;
1794 1791
1795 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 1792 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -2455,8 +2452,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2455 printk(KERN_ERR 2452 printk(KERN_ERR
2456 "btrfs scrub: tree block %llu spanning " 2453 "btrfs scrub: tree block %llu spanning "
2457 "stripes, ignored. logical=%llu\n", 2454 "stripes, ignored. logical=%llu\n",
2458 (unsigned long long)key.objectid, 2455 key.objectid, logical);
2459 (unsigned long long)logical);
2460 goto next; 2456 goto next;
2461 } 2457 }
2462 2458
@@ -2863,9 +2859,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2863 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) { 2859 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
2864 /* not supported for data w/o checksums */ 2860 /* not supported for data w/o checksums */
2865 printk(KERN_ERR 2861 printk(KERN_ERR
2866 "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n", 2862 "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails\n",
2867 fs_info->chunk_root->sectorsize, 2863 fs_info->chunk_root->sectorsize, PAGE_SIZE);
2868 (unsigned long long)PAGE_SIZE);
2869 return -EINVAL; 2864 return -EINVAL;
2870 } 2865 }
2871 2866
@@ -3175,11 +3170,9 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
3175 copy_nocow_pages_for_inode, 3170 copy_nocow_pages_for_inode,
3176 nocow_ctx); 3171 nocow_ctx);
3177 if (ret != 0 && ret != -ENOENT) { 3172 if (ret != 0 && ret != -ENOENT) {
3178 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n", 3173 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n",
3179 (unsigned long long)logical, 3174 logical, physical_for_dev_replace, len, mirror_num,
3180 (unsigned long long)physical_for_dev_replace, 3175 ret);
3181 (unsigned long long)len,
3182 (unsigned long long)mirror_num, ret);
3183 not_written = 1; 3176 not_written = 1;
3184 goto out; 3177 goto out;
3185 } 3178 }
@@ -3224,11 +3217,6 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
3224 return PTR_ERR(local_root); 3217 return PTR_ERR(local_root);
3225 } 3218 }
3226 3219
3227 if (btrfs_root_refs(&local_root->root_item) == 0) {
3228 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3229 return -ENOENT;
3230 }
3231
3232 key.type = BTRFS_INODE_ITEM_KEY; 3220 key.type = BTRFS_INODE_ITEM_KEY;
3233 key.objectid = inum; 3221 key.objectid = inum;
3234 key.offset = 0; 3222 key.offset = 0;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 2e14fd89a8b4..e46e0ed74925 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -26,6 +26,7 @@
26#include <linux/radix-tree.h> 26#include <linux/radix-tree.h>
27#include <linux/crc32c.h> 27#include <linux/crc32c.h>
28#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
29#include <linux/string.h>
29 30
30#include "send.h" 31#include "send.h"
31#include "backref.h" 32#include "backref.h"
@@ -54,8 +55,8 @@ struct fs_path {
54 55
55 char *buf; 56 char *buf;
56 int buf_len; 57 int buf_len;
57 int reversed:1; 58 unsigned int reversed:1;
58 int virtual_mem:1; 59 unsigned int virtual_mem:1;
59 char inline_buf[]; 60 char inline_buf[];
60 }; 61 };
61 char pad[PAGE_SIZE]; 62 char pad[PAGE_SIZE];
@@ -1668,6 +1669,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1668 u64 *who_ino, u64 *who_gen) 1669 u64 *who_ino, u64 *who_gen)
1669{ 1670{
1670 int ret = 0; 1671 int ret = 0;
1672 u64 gen;
1671 u64 other_inode = 0; 1673 u64 other_inode = 0;
1672 u8 other_type = 0; 1674 u8 other_type = 0;
1673 1675
@@ -1678,6 +1680,24 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1678 if (ret <= 0) 1680 if (ret <= 0)
1679 goto out; 1681 goto out;
1680 1682
1683 /*
1684 * If we have a parent root we need to verify that the parent dir was
1685 * not delted and then re-created, if it was then we have no overwrite
1686 * and we can just unlink this entry.
1687 */
1688 if (sctx->parent_root) {
1689 ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
1690 NULL, NULL, NULL);
1691 if (ret < 0 && ret != -ENOENT)
1692 goto out;
1693 if (ret) {
1694 ret = 0;
1695 goto out;
1696 }
1697 if (gen != dir_gen)
1698 goto out;
1699 }
1700
1681 ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, 1701 ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1682 &other_inode, &other_type); 1702 &other_inode, &other_type);
1683 if (ret < 0 && ret != -ENOENT) 1703 if (ret < 0 && ret != -ENOENT)
@@ -2519,7 +2539,8 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir)
2519 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 2539 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
2520 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 2540 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
2521 2541
2522 if (di_key.objectid < sctx->send_progress) { 2542 if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
2543 di_key.objectid < sctx->send_progress) {
2523 ret = 1; 2544 ret = 1;
2524 goto out; 2545 goto out;
2525 } 2546 }
@@ -2581,7 +2602,6 @@ static int record_ref(struct list_head *head, u64 dir,
2581 u64 dir_gen, struct fs_path *path) 2602 u64 dir_gen, struct fs_path *path)
2582{ 2603{
2583 struct recorded_ref *ref; 2604 struct recorded_ref *ref;
2584 char *tmp;
2585 2605
2586 ref = kmalloc(sizeof(*ref), GFP_NOFS); 2606 ref = kmalloc(sizeof(*ref), GFP_NOFS);
2587 if (!ref) 2607 if (!ref)
@@ -2591,25 +2611,35 @@ static int record_ref(struct list_head *head, u64 dir,
2591 ref->dir_gen = dir_gen; 2611 ref->dir_gen = dir_gen;
2592 ref->full_path = path; 2612 ref->full_path = path;
2593 2613
2594 tmp = strrchr(ref->full_path->start, '/'); 2614 ref->name = (char *)kbasename(ref->full_path->start);
2595 if (!tmp) { 2615 ref->name_len = ref->full_path->end - ref->name;
2596 ref->name_len = ref->full_path->end - ref->full_path->start; 2616 ref->dir_path = ref->full_path->start;
2597 ref->name = ref->full_path->start; 2617 if (ref->name == ref->full_path->start)
2598 ref->dir_path_len = 0; 2618 ref->dir_path_len = 0;
2599 ref->dir_path = ref->full_path->start; 2619 else
2600 } else {
2601 tmp++;
2602 ref->name_len = ref->full_path->end - tmp;
2603 ref->name = tmp;
2604 ref->dir_path = ref->full_path->start;
2605 ref->dir_path_len = ref->full_path->end - 2620 ref->dir_path_len = ref->full_path->end -
2606 ref->full_path->start - 1 - ref->name_len; 2621 ref->full_path->start - 1 - ref->name_len;
2607 }
2608 2622
2609 list_add_tail(&ref->list, head); 2623 list_add_tail(&ref->list, head);
2610 return 0; 2624 return 0;
2611} 2625}
2612 2626
2627static int dup_ref(struct recorded_ref *ref, struct list_head *list)
2628{
2629 struct recorded_ref *new;
2630
2631 new = kmalloc(sizeof(*ref), GFP_NOFS);
2632 if (!new)
2633 return -ENOMEM;
2634
2635 new->dir = ref->dir;
2636 new->dir_gen = ref->dir_gen;
2637 new->full_path = NULL;
2638 INIT_LIST_HEAD(&new->list);
2639 list_add_tail(&new->list, list);
2640 return 0;
2641}
2642
2613static void __free_recorded_refs(struct list_head *head) 2643static void __free_recorded_refs(struct list_head *head)
2614{ 2644{
2615 struct recorded_ref *cur; 2645 struct recorded_ref *cur;
@@ -2724,9 +2754,7 @@ static int process_recorded_refs(struct send_ctx *sctx)
2724 int ret = 0; 2754 int ret = 0;
2725 struct recorded_ref *cur; 2755 struct recorded_ref *cur;
2726 struct recorded_ref *cur2; 2756 struct recorded_ref *cur2;
2727 struct ulist *check_dirs = NULL; 2757 struct list_head check_dirs;
2728 struct ulist_iterator uit;
2729 struct ulist_node *un;
2730 struct fs_path *valid_path = NULL; 2758 struct fs_path *valid_path = NULL;
2731 u64 ow_inode = 0; 2759 u64 ow_inode = 0;
2732 u64 ow_gen; 2760 u64 ow_gen;
@@ -2740,6 +2768,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2740 * which is always '..' 2768 * which is always '..'
2741 */ 2769 */
2742 BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); 2770 BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
2771 INIT_LIST_HEAD(&check_dirs);
2743 2772
2744 valid_path = fs_path_alloc(); 2773 valid_path = fs_path_alloc();
2745 if (!valid_path) { 2774 if (!valid_path) {
@@ -2747,12 +2776,6 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2747 goto out; 2776 goto out;
2748 } 2777 }
2749 2778
2750 check_dirs = ulist_alloc(GFP_NOFS);
2751 if (!check_dirs) {
2752 ret = -ENOMEM;
2753 goto out;
2754 }
2755
2756 /* 2779 /*
2757 * First, check if the first ref of the current inode was overwritten 2780 * First, check if the first ref of the current inode was overwritten
2758 * before. If yes, we know that the current inode was already orphanized 2781 * before. If yes, we know that the current inode was already orphanized
@@ -2889,8 +2912,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2889 goto out; 2912 goto out;
2890 } 2913 }
2891 } 2914 }
2892 ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, 2915 ret = dup_ref(cur, &check_dirs);
2893 GFP_NOFS);
2894 if (ret < 0) 2916 if (ret < 0)
2895 goto out; 2917 goto out;
2896 } 2918 }
@@ -2918,8 +2940,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2918 } 2940 }
2919 2941
2920 list_for_each_entry(cur, &sctx->deleted_refs, list) { 2942 list_for_each_entry(cur, &sctx->deleted_refs, list) {
2921 ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, 2943 ret = dup_ref(cur, &check_dirs);
2922 GFP_NOFS);
2923 if (ret < 0) 2944 if (ret < 0)
2924 goto out; 2945 goto out;
2925 } 2946 }
@@ -2930,8 +2951,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2930 */ 2951 */
2931 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, 2952 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
2932 list); 2953 list);
2933 ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, 2954 ret = dup_ref(cur, &check_dirs);
2934 GFP_NOFS);
2935 if (ret < 0) 2955 if (ret < 0)
2936 goto out; 2956 goto out;
2937 } else if (!S_ISDIR(sctx->cur_inode_mode)) { 2957 } else if (!S_ISDIR(sctx->cur_inode_mode)) {
@@ -2951,12 +2971,10 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2951 if (ret < 0) 2971 if (ret < 0)
2952 goto out; 2972 goto out;
2953 } 2973 }
2954 ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, 2974 ret = dup_ref(cur, &check_dirs);
2955 GFP_NOFS);
2956 if (ret < 0) 2975 if (ret < 0)
2957 goto out; 2976 goto out;
2958 } 2977 }
2959
2960 /* 2978 /*
2961 * If the inode is still orphan, unlink the orphan. This may 2979 * If the inode is still orphan, unlink the orphan. This may
2962 * happen when a previous inode did overwrite the first ref 2980 * happen when a previous inode did overwrite the first ref
@@ -2978,33 +2996,32 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2978 * deletion and if it's finally possible to perform the rmdir now. 2996 * deletion and if it's finally possible to perform the rmdir now.
2979 * We also update the inode stats of the parent dirs here. 2997 * We also update the inode stats of the parent dirs here.
2980 */ 2998 */
2981 ULIST_ITER_INIT(&uit); 2999 list_for_each_entry(cur, &check_dirs, list) {
2982 while ((un = ulist_next(check_dirs, &uit))) {
2983 /* 3000 /*
2984 * In case we had refs into dirs that were not processed yet, 3001 * In case we had refs into dirs that were not processed yet,
2985 * we don't need to do the utime and rmdir logic for these dirs. 3002 * we don't need to do the utime and rmdir logic for these dirs.
2986 * The dir will be processed later. 3003 * The dir will be processed later.
2987 */ 3004 */
2988 if (un->val > sctx->cur_ino) 3005 if (cur->dir > sctx->cur_ino)
2989 continue; 3006 continue;
2990 3007
2991 ret = get_cur_inode_state(sctx, un->val, un->aux); 3008 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
2992 if (ret < 0) 3009 if (ret < 0)
2993 goto out; 3010 goto out;
2994 3011
2995 if (ret == inode_state_did_create || 3012 if (ret == inode_state_did_create ||
2996 ret == inode_state_no_change) { 3013 ret == inode_state_no_change) {
2997 /* TODO delayed utimes */ 3014 /* TODO delayed utimes */
2998 ret = send_utimes(sctx, un->val, un->aux); 3015 ret = send_utimes(sctx, cur->dir, cur->dir_gen);
2999 if (ret < 0) 3016 if (ret < 0)
3000 goto out; 3017 goto out;
3001 } else if (ret == inode_state_did_delete) { 3018 } else if (ret == inode_state_did_delete) {
3002 ret = can_rmdir(sctx, un->val, sctx->cur_ino); 3019 ret = can_rmdir(sctx, cur->dir, sctx->cur_ino);
3003 if (ret < 0) 3020 if (ret < 0)
3004 goto out; 3021 goto out;
3005 if (ret) { 3022 if (ret) {
3006 ret = get_cur_path(sctx, un->val, un->aux, 3023 ret = get_cur_path(sctx, cur->dir,
3007 valid_path); 3024 cur->dir_gen, valid_path);
3008 if (ret < 0) 3025 if (ret < 0)
3009 goto out; 3026 goto out;
3010 ret = send_rmdir(sctx, valid_path); 3027 ret = send_rmdir(sctx, valid_path);
@@ -3017,8 +3034,8 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3017 ret = 0; 3034 ret = 0;
3018 3035
3019out: 3036out:
3037 __free_recorded_refs(&check_dirs);
3020 free_recorded_refs(sctx); 3038 free_recorded_refs(sctx);
3021 ulist_free(check_dirs);
3022 fs_path_free(valid_path); 3039 fs_path_free(valid_path);
3023 return ret; 3040 return ret;
3024} 3041}
@@ -3119,6 +3136,8 @@ out:
3119 3136
3120struct find_ref_ctx { 3137struct find_ref_ctx {
3121 u64 dir; 3138 u64 dir;
3139 u64 dir_gen;
3140 struct btrfs_root *root;
3122 struct fs_path *name; 3141 struct fs_path *name;
3123 int found_idx; 3142 int found_idx;
3124}; 3143};
@@ -3128,9 +3147,21 @@ static int __find_iref(int num, u64 dir, int index,
3128 void *ctx_) 3147 void *ctx_)
3129{ 3148{
3130 struct find_ref_ctx *ctx = ctx_; 3149 struct find_ref_ctx *ctx = ctx_;
3150 u64 dir_gen;
3151 int ret;
3131 3152
3132 if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) && 3153 if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
3133 strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { 3154 strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
3155 /*
3156 * To avoid doing extra lookups we'll only do this if everything
3157 * else matches.
3158 */
3159 ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
3160 NULL, NULL, NULL);
3161 if (ret)
3162 return ret;
3163 if (dir_gen != ctx->dir_gen)
3164 return 0;
3134 ctx->found_idx = num; 3165 ctx->found_idx = num;
3135 return 1; 3166 return 1;
3136 } 3167 }
@@ -3140,14 +3171,16 @@ static int __find_iref(int num, u64 dir, int index,
3140static int find_iref(struct btrfs_root *root, 3171static int find_iref(struct btrfs_root *root,
3141 struct btrfs_path *path, 3172 struct btrfs_path *path,
3142 struct btrfs_key *key, 3173 struct btrfs_key *key,
3143 u64 dir, struct fs_path *name) 3174 u64 dir, u64 dir_gen, struct fs_path *name)
3144{ 3175{
3145 int ret; 3176 int ret;
3146 struct find_ref_ctx ctx; 3177 struct find_ref_ctx ctx;
3147 3178
3148 ctx.dir = dir; 3179 ctx.dir = dir;
3149 ctx.name = name; 3180 ctx.name = name;
3181 ctx.dir_gen = dir_gen;
3150 ctx.found_idx = -1; 3182 ctx.found_idx = -1;
3183 ctx.root = root;
3151 3184
3152 ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx); 3185 ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
3153 if (ret < 0) 3186 if (ret < 0)
@@ -3163,11 +3196,17 @@ static int __record_changed_new_ref(int num, u64 dir, int index,
3163 struct fs_path *name, 3196 struct fs_path *name,
3164 void *ctx) 3197 void *ctx)
3165{ 3198{
3199 u64 dir_gen;
3166 int ret; 3200 int ret;
3167 struct send_ctx *sctx = ctx; 3201 struct send_ctx *sctx = ctx;
3168 3202
3203 ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
3204 NULL, NULL, NULL);
3205 if (ret)
3206 return ret;
3207
3169 ret = find_iref(sctx->parent_root, sctx->right_path, 3208 ret = find_iref(sctx->parent_root, sctx->right_path,
3170 sctx->cmp_key, dir, name); 3209 sctx->cmp_key, dir, dir_gen, name);
3171 if (ret == -ENOENT) 3210 if (ret == -ENOENT)
3172 ret = __record_new_ref(num, dir, index, name, sctx); 3211 ret = __record_new_ref(num, dir, index, name, sctx);
3173 else if (ret > 0) 3212 else if (ret > 0)
@@ -3180,11 +3219,17 @@ static int __record_changed_deleted_ref(int num, u64 dir, int index,
3180 struct fs_path *name, 3219 struct fs_path *name,
3181 void *ctx) 3220 void *ctx)
3182{ 3221{
3222 u64 dir_gen;
3183 int ret; 3223 int ret;
3184 struct send_ctx *sctx = ctx; 3224 struct send_ctx *sctx = ctx;
3185 3225
3226 ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
3227 NULL, NULL, NULL);
3228 if (ret)
3229 return ret;
3230
3186 ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key, 3231 ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
3187 dir, name); 3232 dir, dir_gen, name);
3188 if (ret == -ENOENT) 3233 if (ret == -ENOENT)
3189 ret = __record_deleted_ref(num, dir, index, name, sctx); 3234 ret = __record_deleted_ref(num, dir, index, name, sctx);
3190 else if (ret > 0) 3235 else if (ret > 0)
@@ -3869,7 +3914,8 @@ static int is_extent_unchanged(struct send_ctx *sctx,
3869 btrfs_item_key_to_cpu(eb, &found_key, slot); 3914 btrfs_item_key_to_cpu(eb, &found_key, slot);
3870 if (found_key.objectid != key.objectid || 3915 if (found_key.objectid != key.objectid ||
3871 found_key.type != key.type) { 3916 found_key.type != key.type) {
3872 ret = 0; 3917 /* If we're a hole then just pretend nothing changed */
3918 ret = (left_disknr) ? 0 : 1;
3873 goto out; 3919 goto out;
3874 } 3920 }
3875 3921
@@ -3895,7 +3941,8 @@ static int is_extent_unchanged(struct send_ctx *sctx,
3895 * This may only happen on the first iteration. 3941 * This may only happen on the first iteration.
3896 */ 3942 */
3897 if (found_key.offset + right_len <= ekey->offset) { 3943 if (found_key.offset + right_len <= ekey->offset) {
3898 ret = 0; 3944 /* If we're a hole just pretend nothing changed */
3945 ret = (left_disknr) ? 0 : 1;
3899 goto out; 3946 goto out;
3900 } 3947 }
3901 3948
@@ -3960,8 +4007,8 @@ static int process_extent(struct send_ctx *sctx,
3960 struct btrfs_path *path, 4007 struct btrfs_path *path,
3961 struct btrfs_key *key) 4008 struct btrfs_key *key)
3962{ 4009{
3963 int ret = 0;
3964 struct clone_root *found_clone = NULL; 4010 struct clone_root *found_clone = NULL;
4011 int ret = 0;
3965 4012
3966 if (S_ISLNK(sctx->cur_inode_mode)) 4013 if (S_ISLNK(sctx->cur_inode_mode))
3967 return 0; 4014 return 0;
@@ -3974,6 +4021,32 @@ static int process_extent(struct send_ctx *sctx,
3974 ret = 0; 4021 ret = 0;
3975 goto out; 4022 goto out;
3976 } 4023 }
4024 } else {
4025 struct btrfs_file_extent_item *ei;
4026 u8 type;
4027
4028 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
4029 struct btrfs_file_extent_item);
4030 type = btrfs_file_extent_type(path->nodes[0], ei);
4031 if (type == BTRFS_FILE_EXTENT_PREALLOC ||
4032 type == BTRFS_FILE_EXTENT_REG) {
4033 /*
4034 * The send spec does not have a prealloc command yet,
4035 * so just leave a hole for prealloc'ed extents until
4036 * we have enough commands queued up to justify rev'ing
4037 * the send spec.
4038 */
4039 if (type == BTRFS_FILE_EXTENT_PREALLOC) {
4040 ret = 0;
4041 goto out;
4042 }
4043
4044 /* Have a hole, just skip it. */
4045 if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
4046 ret = 0;
4047 goto out;
4048 }
4049 }
3977 } 4050 }
3978 4051
3979 ret = find_extent_clone(sctx, path, key->objectid, key->offset, 4052 ret = find_extent_clone(sctx, path, key->objectid, key->offset,
@@ -4361,6 +4434,64 @@ static int changed_extent(struct send_ctx *sctx,
4361 return ret; 4434 return ret;
4362} 4435}
4363 4436
4437static int dir_changed(struct send_ctx *sctx, u64 dir)
4438{
4439 u64 orig_gen, new_gen;
4440 int ret;
4441
4442 ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
4443 NULL, NULL);
4444 if (ret)
4445 return ret;
4446
4447 ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
4448 NULL, NULL, NULL);
4449 if (ret)
4450 return ret;
4451
4452 return (orig_gen != new_gen) ? 1 : 0;
4453}
4454
4455static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
4456 struct btrfs_key *key)
4457{
4458 struct btrfs_inode_extref *extref;
4459 struct extent_buffer *leaf;
4460 u64 dirid = 0, last_dirid = 0;
4461 unsigned long ptr;
4462 u32 item_size;
4463 u32 cur_offset = 0;
4464 int ref_name_len;
4465 int ret = 0;
4466
4467 /* Easy case, just check this one dirid */
4468 if (key->type == BTRFS_INODE_REF_KEY) {
4469 dirid = key->offset;
4470
4471 ret = dir_changed(sctx, dirid);
4472 goto out;
4473 }
4474
4475 leaf = path->nodes[0];
4476 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
4477 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
4478 while (cur_offset < item_size) {
4479 extref = (struct btrfs_inode_extref *)(ptr +
4480 cur_offset);
4481 dirid = btrfs_inode_extref_parent(leaf, extref);
4482 ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
4483 cur_offset += ref_name_len + sizeof(*extref);
4484 if (dirid == last_dirid)
4485 continue;
4486 ret = dir_changed(sctx, dirid);
4487 if (ret)
4488 break;
4489 last_dirid = dirid;
4490 }
4491out:
4492 return ret;
4493}
4494
4364/* 4495/*
4365 * Updates compare related fields in sctx and simply forwards to the actual 4496 * Updates compare related fields in sctx and simply forwards to the actual
4366 * changed_xxx functions. 4497 * changed_xxx functions.
@@ -4376,6 +4507,19 @@ static int changed_cb(struct btrfs_root *left_root,
4376 int ret = 0; 4507 int ret = 0;
4377 struct send_ctx *sctx = ctx; 4508 struct send_ctx *sctx = ctx;
4378 4509
4510 if (result == BTRFS_COMPARE_TREE_SAME) {
4511 if (key->type != BTRFS_INODE_REF_KEY &&
4512 key->type != BTRFS_INODE_EXTREF_KEY)
4513 return 0;
4514 ret = compare_refs(sctx, left_path, key);
4515 if (!ret)
4516 return 0;
4517 if (ret < 0)
4518 return ret;
4519 result = BTRFS_COMPARE_TREE_CHANGED;
4520 ret = 0;
4521 }
4522
4379 sctx->left_path = left_path; 4523 sctx->left_path = left_path;
4380 sctx->right_path = right_path; 4524 sctx->right_path = right_path;
4381 sctx->cmp_key = key; 4525 sctx->cmp_key = key;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8eb6191d86da..3aab10ce63e8 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -56,6 +56,8 @@
56#include "rcu-string.h" 56#include "rcu-string.h"
57#include "dev-replace.h" 57#include "dev-replace.h"
58#include "free-space-cache.h" 58#include "free-space-cache.h"
59#include "backref.h"
60#include "tests/btrfs-tests.h"
59 61
60#define CREATE_TRACE_POINTS 62#define CREATE_TRACE_POINTS
61#include <trace/events/btrfs.h> 63#include <trace/events/btrfs.h>
@@ -320,14 +322,15 @@ enum {
320 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache, 322 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
321 Opt_no_space_cache, Opt_recovery, Opt_skip_balance, 323 Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
322 Opt_check_integrity, Opt_check_integrity_including_extent_data, 324 Opt_check_integrity, Opt_check_integrity_including_extent_data,
323 Opt_check_integrity_print_mask, Opt_fatal_errors, 325 Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
326 Opt_commit_interval,
324 Opt_err, 327 Opt_err,
325}; 328};
326 329
327static match_table_t tokens = { 330static match_table_t tokens = {
328 {Opt_degraded, "degraded"}, 331 {Opt_degraded, "degraded"},
329 {Opt_subvol, "subvol=%s"}, 332 {Opt_subvol, "subvol=%s"},
330 {Opt_subvolid, "subvolid=%d"}, 333 {Opt_subvolid, "subvolid=%s"},
331 {Opt_device, "device=%s"}, 334 {Opt_device, "device=%s"},
332 {Opt_nodatasum, "nodatasum"}, 335 {Opt_nodatasum, "nodatasum"},
333 {Opt_nodatacow, "nodatacow"}, 336 {Opt_nodatacow, "nodatacow"},
@@ -360,7 +363,9 @@ static match_table_t tokens = {
360 {Opt_check_integrity, "check_int"}, 363 {Opt_check_integrity, "check_int"},
361 {Opt_check_integrity_including_extent_data, "check_int_data"}, 364 {Opt_check_integrity_including_extent_data, "check_int_data"},
362 {Opt_check_integrity_print_mask, "check_int_print_mask=%d"}, 365 {Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
366 {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
363 {Opt_fatal_errors, "fatal_errors=%s"}, 367 {Opt_fatal_errors, "fatal_errors=%s"},
368 {Opt_commit_interval, "commit=%d"},
364 {Opt_err, NULL}, 369 {Opt_err, NULL},
365}; 370};
366 371
@@ -496,10 +501,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
496 btrfs_set_opt(info->mount_opt, NOBARRIER); 501 btrfs_set_opt(info->mount_opt, NOBARRIER);
497 break; 502 break;
498 case Opt_thread_pool: 503 case Opt_thread_pool:
499 intarg = 0; 504 ret = match_int(&args[0], &intarg);
500 match_int(&args[0], &intarg); 505 if (ret) {
501 if (intarg) 506 goto out;
507 } else if (intarg > 0) {
502 info->thread_pool_size = intarg; 508 info->thread_pool_size = intarg;
509 } else {
510 ret = -EINVAL;
511 goto out;
512 }
503 break; 513 break;
504 case Opt_max_inline: 514 case Opt_max_inline:
505 num = match_strdup(&args[0]); 515 num = match_strdup(&args[0]);
@@ -513,7 +523,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
513 root->sectorsize); 523 root->sectorsize);
514 } 524 }
515 printk(KERN_INFO "btrfs: max_inline at %llu\n", 525 printk(KERN_INFO "btrfs: max_inline at %llu\n",
516 (unsigned long long)info->max_inline); 526 info->max_inline);
527 } else {
528 ret = -ENOMEM;
529 goto out;
517 } 530 }
518 break; 531 break;
519 case Opt_alloc_start: 532 case Opt_alloc_start:
@@ -525,7 +538,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
525 kfree(num); 538 kfree(num);
526 printk(KERN_INFO 539 printk(KERN_INFO
527 "btrfs: allocations start at %llu\n", 540 "btrfs: allocations start at %llu\n",
528 (unsigned long long)info->alloc_start); 541 info->alloc_start);
542 } else {
543 ret = -ENOMEM;
544 goto out;
529 } 545 }
530 break; 546 break;
531 case Opt_noacl: 547 case Opt_noacl:
@@ -540,12 +556,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
540 btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); 556 btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
541 break; 557 break;
542 case Opt_ratio: 558 case Opt_ratio:
543 intarg = 0; 559 ret = match_int(&args[0], &intarg);
544 match_int(&args[0], &intarg); 560 if (ret) {
545 if (intarg) { 561 goto out;
562 } else if (intarg >= 0) {
546 info->metadata_ratio = intarg; 563 info->metadata_ratio = intarg;
547 printk(KERN_INFO "btrfs: metadata ratio %d\n", 564 printk(KERN_INFO "btrfs: metadata ratio %d\n",
548 info->metadata_ratio); 565 info->metadata_ratio);
566 } else {
567 ret = -EINVAL;
568 goto out;
549 } 569 }
550 break; 570 break;
551 case Opt_discard: 571 case Opt_discard:
@@ -554,6 +574,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
554 case Opt_space_cache: 574 case Opt_space_cache:
555 btrfs_set_opt(info->mount_opt, SPACE_CACHE); 575 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
556 break; 576 break;
577 case Opt_rescan_uuid_tree:
578 btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
579 break;
557 case Opt_no_space_cache: 580 case Opt_no_space_cache:
558 printk(KERN_INFO "btrfs: disabling disk space caching\n"); 581 printk(KERN_INFO "btrfs: disabling disk space caching\n");
559 btrfs_clear_opt(info->mount_opt, SPACE_CACHE); 582 btrfs_clear_opt(info->mount_opt, SPACE_CACHE);
@@ -596,13 +619,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
596 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); 619 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
597 break; 620 break;
598 case Opt_check_integrity_print_mask: 621 case Opt_check_integrity_print_mask:
599 intarg = 0; 622 ret = match_int(&args[0], &intarg);
600 match_int(&args[0], &intarg); 623 if (ret) {
601 if (intarg) { 624 goto out;
625 } else if (intarg >= 0) {
602 info->check_integrity_print_mask = intarg; 626 info->check_integrity_print_mask = intarg;
603 printk(KERN_INFO "btrfs:" 627 printk(KERN_INFO "btrfs:"
604 " check_integrity_print_mask 0x%x\n", 628 " check_integrity_print_mask 0x%x\n",
605 info->check_integrity_print_mask); 629 info->check_integrity_print_mask);
630 } else {
631 ret = -EINVAL;
632 goto out;
606 } 633 }
607 break; 634 break;
608#else 635#else
@@ -626,6 +653,29 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
626 goto out; 653 goto out;
627 } 654 }
628 break; 655 break;
656 case Opt_commit_interval:
657 intarg = 0;
658 ret = match_int(&args[0], &intarg);
659 if (ret < 0) {
660 printk(KERN_ERR
661 "btrfs: invalid commit interval\n");
662 ret = -EINVAL;
663 goto out;
664 }
665 if (intarg > 0) {
666 if (intarg > 300) {
667 printk(KERN_WARNING
668 "btrfs: excessive commit interval %d\n",
669 intarg);
670 }
671 info->commit_interval = intarg;
672 } else {
673 printk(KERN_INFO
674 "btrfs: using default commit interval %ds\n",
675 BTRFS_DEFAULT_COMMIT_INTERVAL);
676 info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
677 }
678 break;
629 case Opt_err: 679 case Opt_err:
630 printk(KERN_INFO "btrfs: unrecognized mount option " 680 printk(KERN_INFO "btrfs: unrecognized mount option "
631 "'%s'\n", p); 681 "'%s'\n", p);
@@ -654,8 +704,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
654{ 704{
655 substring_t args[MAX_OPT_ARGS]; 705 substring_t args[MAX_OPT_ARGS];
656 char *device_name, *opts, *orig, *p; 706 char *device_name, *opts, *orig, *p;
707 char *num = NULL;
657 int error = 0; 708 int error = 0;
658 int intarg;
659 709
660 if (!options) 710 if (!options)
661 return 0; 711 return 0;
@@ -679,17 +729,23 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
679 case Opt_subvol: 729 case Opt_subvol:
680 kfree(*subvol_name); 730 kfree(*subvol_name);
681 *subvol_name = match_strdup(&args[0]); 731 *subvol_name = match_strdup(&args[0]);
732 if (!*subvol_name) {
733 error = -ENOMEM;
734 goto out;
735 }
682 break; 736 break;
683 case Opt_subvolid: 737 case Opt_subvolid:
684 intarg = 0; 738 num = match_strdup(&args[0]);
685 error = match_int(&args[0], &intarg); 739 if (num) {
686 if (!error) { 740 *subvol_objectid = memparse(num, NULL);
741 kfree(num);
687 /* we want the original fs_tree */ 742 /* we want the original fs_tree */
688 if (!intarg) 743 if (!*subvol_objectid)
689 *subvol_objectid = 744 *subvol_objectid =
690 BTRFS_FS_TREE_OBJECTID; 745 BTRFS_FS_TREE_OBJECTID;
691 else 746 } else {
692 *subvol_objectid = intarg; 747 error = -EINVAL;
748 goto out;
693 } 749 }
694 break; 750 break;
695 case Opt_subvolrootid: 751 case Opt_subvolrootid:
@@ -892,11 +948,9 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
892 if (btrfs_test_opt(root, NOBARRIER)) 948 if (btrfs_test_opt(root, NOBARRIER))
893 seq_puts(seq, ",nobarrier"); 949 seq_puts(seq, ",nobarrier");
894 if (info->max_inline != 8192 * 1024) 950 if (info->max_inline != 8192 * 1024)
895 seq_printf(seq, ",max_inline=%llu", 951 seq_printf(seq, ",max_inline=%llu", info->max_inline);
896 (unsigned long long)info->max_inline);
897 if (info->alloc_start != 0) 952 if (info->alloc_start != 0)
898 seq_printf(seq, ",alloc_start=%llu", 953 seq_printf(seq, ",alloc_start=%llu", info->alloc_start);
899 (unsigned long long)info->alloc_start);
900 if (info->thread_pool_size != min_t(unsigned long, 954 if (info->thread_pool_size != min_t(unsigned long,
901 num_online_cpus() + 2, 8)) 955 num_online_cpus() + 2, 8))
902 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); 956 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
@@ -928,6 +982,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
928 seq_puts(seq, ",space_cache"); 982 seq_puts(seq, ",space_cache");
929 else 983 else
930 seq_puts(seq, ",nospace_cache"); 984 seq_puts(seq, ",nospace_cache");
985 if (btrfs_test_opt(root, RESCAN_UUID_TREE))
986 seq_puts(seq, ",rescan_uuid_tree");
931 if (btrfs_test_opt(root, CLEAR_CACHE)) 987 if (btrfs_test_opt(root, CLEAR_CACHE))
932 seq_puts(seq, ",clear_cache"); 988 seq_puts(seq, ",clear_cache");
933 if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) 989 if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
@@ -940,8 +996,24 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
940 seq_puts(seq, ",inode_cache"); 996 seq_puts(seq, ",inode_cache");
941 if (btrfs_test_opt(root, SKIP_BALANCE)) 997 if (btrfs_test_opt(root, SKIP_BALANCE))
942 seq_puts(seq, ",skip_balance"); 998 seq_puts(seq, ",skip_balance");
999 if (btrfs_test_opt(root, RECOVERY))
1000 seq_puts(seq, ",recovery");
1001#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1002 if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
1003 seq_puts(seq, ",check_int_data");
1004 else if (btrfs_test_opt(root, CHECK_INTEGRITY))
1005 seq_puts(seq, ",check_int");
1006 if (info->check_integrity_print_mask)
1007 seq_printf(seq, ",check_int_print_mask=%d",
1008 info->check_integrity_print_mask);
1009#endif
1010 if (info->metadata_ratio)
1011 seq_printf(seq, ",metadata_ratio=%d",
1012 info->metadata_ratio);
943 if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR)) 1013 if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR))
944 seq_puts(seq, ",fatal_errors=panic"); 1014 seq_puts(seq, ",fatal_errors=panic");
1015 if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
1016 seq_printf(seq, ",commit=%d", info->commit_interval);
945 return 0; 1017 return 0;
946} 1018}
947 1019
@@ -1696,6 +1768,11 @@ static void btrfs_print_info(void)
1696 "\n"); 1768 "\n");
1697} 1769}
1698 1770
1771static int btrfs_run_sanity_tests(void)
1772{
1773 return btrfs_test_free_space_cache();
1774}
1775
1699static int __init init_btrfs_fs(void) 1776static int __init init_btrfs_fs(void)
1700{ 1777{
1701 int err; 1778 int err;
@@ -1734,23 +1811,32 @@ static int __init init_btrfs_fs(void)
1734 if (err) 1811 if (err)
1735 goto free_auto_defrag; 1812 goto free_auto_defrag;
1736 1813
1737 err = btrfs_interface_init(); 1814 err = btrfs_prelim_ref_init();
1738 if (err) 1815 if (err)
1739 goto free_delayed_ref; 1816 goto free_prelim_ref;
1740 1817
1741 err = register_filesystem(&btrfs_fs_type); 1818 err = btrfs_interface_init();
1742 if (err) 1819 if (err)
1743 goto unregister_ioctl; 1820 goto free_delayed_ref;
1744 1821
1745 btrfs_init_lockdep(); 1822 btrfs_init_lockdep();
1746 1823
1747 btrfs_print_info(); 1824 btrfs_print_info();
1748 btrfs_test_free_space_cache(); 1825
1826 err = btrfs_run_sanity_tests();
1827 if (err)
1828 goto unregister_ioctl;
1829
1830 err = register_filesystem(&btrfs_fs_type);
1831 if (err)
1832 goto unregister_ioctl;
1749 1833
1750 return 0; 1834 return 0;
1751 1835
1752unregister_ioctl: 1836unregister_ioctl:
1753 btrfs_interface_exit(); 1837 btrfs_interface_exit();
1838free_prelim_ref:
1839 btrfs_prelim_ref_exit();
1754free_delayed_ref: 1840free_delayed_ref:
1755 btrfs_delayed_ref_exit(); 1841 btrfs_delayed_ref_exit();
1756free_auto_defrag: 1842free_auto_defrag:
@@ -1777,6 +1863,7 @@ static void __exit exit_btrfs_fs(void)
1777 btrfs_delayed_ref_exit(); 1863 btrfs_delayed_ref_exit();
1778 btrfs_auto_defrag_exit(); 1864 btrfs_auto_defrag_exit();
1779 btrfs_delayed_inode_exit(); 1865 btrfs_delayed_inode_exit();
1866 btrfs_prelim_ref_exit();
1780 ordered_data_exit(); 1867 ordered_data_exit();
1781 extent_map_exit(); 1868 extent_map_exit();
1782 extent_io_exit(); 1869 extent_io_exit();
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
new file mode 100644
index 000000000000..580877625776
--- /dev/null
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -0,0 +1,34 @@
1/*
2 * Copyright (C) 2013 Fusion IO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_TESTS
20#define __BTRFS_TESTS
21
22#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
23
24#define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__)
25
26int btrfs_test_free_space_cache(void);
27#else
28static inline int btrfs_test_free_space_cache(void)
29{
30 return 0;
31}
32#endif
33
34#endif
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
new file mode 100644
index 000000000000..6fc82010dc15
--- /dev/null
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -0,0 +1,395 @@
1/*
2 * Copyright (C) 2013 Fusion IO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/slab.h>
20#include "btrfs-tests.h"
21#include "../ctree.h"
22#include "../free-space-cache.h"
23
24#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
25static struct btrfs_block_group_cache *init_test_block_group(void)
26{
27 struct btrfs_block_group_cache *cache;
28
29 cache = kzalloc(sizeof(*cache), GFP_NOFS);
30 if (!cache)
31 return NULL;
32 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
33 GFP_NOFS);
34 if (!cache->free_space_ctl) {
35 kfree(cache);
36 return NULL;
37 }
38
39 cache->key.objectid = 0;
40 cache->key.offset = 1024 * 1024 * 1024;
41 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
42 cache->sectorsize = 4096;
43
44 spin_lock_init(&cache->lock);
45 INIT_LIST_HEAD(&cache->list);
46 INIT_LIST_HEAD(&cache->cluster_list);
47 INIT_LIST_HEAD(&cache->new_bg_list);
48
49 btrfs_init_free_space_ctl(cache);
50
51 return cache;
52}
53
54/*
55 * This test just does basic sanity checking, making sure we can add an exten
56 * entry and remove space from either end and the middle, and make sure we can
57 * remove space that covers adjacent extent entries.
58 */
59static int test_extents(struct btrfs_block_group_cache *cache)
60{
61 int ret = 0;
62
63 test_msg("Running extent only tests\n");
64
65 /* First just make sure we can remove an entire entry */
66 ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
67 if (ret) {
68 test_msg("Error adding initial extents %d\n", ret);
69 return ret;
70 }
71
72 ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
73 if (ret) {
74 test_msg("Error removing extent %d\n", ret);
75 return ret;
76 }
77
78 if (test_check_exists(cache, 0, 4 * 1024 * 1024)) {
79 test_msg("Full remove left some lingering space\n");
80 return -1;
81 }
82
83 /* Ok edge and middle cases now */
84 ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
85 if (ret) {
86 test_msg("Error adding half extent %d\n", ret);
87 return ret;
88 }
89
90 ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
91 if (ret) {
92 test_msg("Error removing tail end %d\n", ret);
93 return ret;
94 }
95
96 ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
97 if (ret) {
98 test_msg("Error removing front end %d\n", ret);
99 return ret;
100 }
101
102 ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
103 if (ret) {
104 test_msg("Error removing middle peice %d\n", ret);
105 return ret;
106 }
107
108 if (test_check_exists(cache, 0, 1 * 1024 * 1024)) {
109 test_msg("Still have space at the front\n");
110 return -1;
111 }
112
113 if (test_check_exists(cache, 2 * 1024 * 1024, 4096)) {
114 test_msg("Still have space in the middle\n");
115 return -1;
116 }
117
118 if (test_check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
119 test_msg("Still have space at the end\n");
120 return -1;
121 }
122
123 /* Cleanup */
124 __btrfs_remove_free_space_cache(cache->free_space_ctl);
125
126 return 0;
127}
128
129static int test_bitmaps(struct btrfs_block_group_cache *cache)
130{
131 u64 next_bitmap_offset;
132 int ret;
133
134 test_msg("Running bitmap only tests\n");
135
136 ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
137 if (ret) {
138 test_msg("Couldn't create a bitmap entry %d\n", ret);
139 return ret;
140 }
141
142 ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
143 if (ret) {
144 test_msg("Error removing bitmap full range %d\n", ret);
145 return ret;
146 }
147
148 if (test_check_exists(cache, 0, 4 * 1024 * 1024)) {
149 test_msg("Left some space in bitmap\n");
150 return -1;
151 }
152
153 ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
154 if (ret) {
155 test_msg("Couldn't add to our bitmap entry %d\n", ret);
156 return ret;
157 }
158
159 ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
160 if (ret) {
161 test_msg("Couldn't remove middle chunk %d\n", ret);
162 return ret;
163 }
164
165 /*
166 * The first bitmap we have starts at offset 0 so the next one is just
167 * at the end of the first bitmap.
168 */
169 next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
170
171 /* Test a bit straddling two bitmaps */
172 ret = test_add_free_space_entry(cache, next_bitmap_offset -
173 (2 * 1024 * 1024), 4 * 1024 * 1024, 1);
174 if (ret) {
175 test_msg("Couldn't add space that straddles two bitmaps %d\n",
176 ret);
177 return ret;
178 }
179
180 ret = btrfs_remove_free_space(cache, next_bitmap_offset -
181 (1 * 1024 * 1024), 2 * 1024 * 1024);
182 if (ret) {
183 test_msg("Couldn't remove overlapping space %d\n", ret);
184 return ret;
185 }
186
187 if (test_check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
188 2 * 1024 * 1024)) {
189 test_msg("Left some space when removing overlapping\n");
190 return -1;
191 }
192
193 __btrfs_remove_free_space_cache(cache->free_space_ctl);
194
195 return 0;
196}
197
198/* This is the high grade jackassery */
199static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
200{
201 u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
202 int ret;
203
204 test_msg("Running bitmap and extent tests\n");
205
206 /*
207 * First let's do something simple, an extent at the same offset as the
208 * bitmap, but the free space completely in the extent and then
209 * completely in the bitmap.
210 */
211 ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
212 if (ret) {
213 test_msg("Couldn't create bitmap entry %d\n", ret);
214 return ret;
215 }
216
217 ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
218 if (ret) {
219 test_msg("Couldn't add extent entry %d\n", ret);
220 return ret;
221 }
222
223 ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
224 if (ret) {
225 test_msg("Couldn't remove extent entry %d\n", ret);
226 return ret;
227 }
228
229 if (test_check_exists(cache, 0, 1 * 1024 * 1024)) {
230 test_msg("Left remnants after our remove\n");
231 return -1;
232 }
233
234 /* Now to add back the extent entry and remove from the bitmap */
235 ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
236 if (ret) {
237 test_msg("Couldn't re-add extent entry %d\n", ret);
238 return ret;
239 }
240
241 ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
242 if (ret) {
243 test_msg("Couldn't remove from bitmap %d\n", ret);
244 return ret;
245 }
246
247 if (test_check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
248 test_msg("Left remnants in the bitmap\n");
249 return -1;
250 }
251
252 /*
253 * Ok so a little more evil, extent entry and bitmap at the same offset,
254 * removing an overlapping chunk.
255 */
256 ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
257 if (ret) {
258 test_msg("Couldn't add to a bitmap %d\n", ret);
259 return ret;
260 }
261
262 ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
263 if (ret) {
264 test_msg("Couldn't remove overlapping space %d\n", ret);
265 return ret;
266 }
267
268 if (test_check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
269 test_msg("Left over peices after removing overlapping\n");
270 return -1;
271 }
272
273 __btrfs_remove_free_space_cache(cache->free_space_ctl);
274
275 /* Now with the extent entry offset into the bitmap */
276 ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
277 if (ret) {
278 test_msg("Couldn't add space to the bitmap %d\n", ret);
279 return ret;
280 }
281
282 ret = test_add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
283 if (ret) {
284 test_msg("Couldn't add extent to the cache %d\n", ret);
285 return ret;
286 }
287
288 ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
289 if (ret) {
290 test_msg("Problem removing overlapping space %d\n", ret);
291 return ret;
292 }
293
294 if (test_check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
295 test_msg("Left something behind when removing space");
296 return -1;
297 }
298
299 /*
300 * This has blown up in the past, the extent entry starts before the
301 * bitmap entry, but we're trying to remove an offset that falls
302 * completely within the bitmap range and is in both the extent entry
303 * and the bitmap entry, looks like this
304 *
305 * [ extent ]
306 * [ bitmap ]
307 * [ del ]
308 */
309 __btrfs_remove_free_space_cache(cache->free_space_ctl);
310 ret = test_add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
311 4 * 1024 * 1024, 1);
312 if (ret) {
313 test_msg("Couldn't add bitmap %d\n", ret);
314 return ret;
315 }
316
317 ret = test_add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
318 5 * 1024 * 1024, 0);
319 if (ret) {
320 test_msg("Couldn't add extent entry %d\n", ret);
321 return ret;
322 }
323
324 ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
325 5 * 1024 * 1024);
326 if (ret) {
327 test_msg("Failed to free our space %d\n", ret);
328 return ret;
329 }
330
331 if (test_check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
332 5 * 1024 * 1024)) {
333 test_msg("Left stuff over\n");
334 return -1;
335 }
336
337 __btrfs_remove_free_space_cache(cache->free_space_ctl);
338
339 /*
340 * This blew up before, we have part of the free space in a bitmap and
341 * then the entirety of the rest of the space in an extent. This used
342 * to return -EAGAIN back from btrfs_remove_extent, make sure this
343 * doesn't happen.
344 */
345 ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
346 if (ret) {
347 test_msg("Couldn't add bitmap entry %d\n", ret);
348 return ret;
349 }
350
351 ret = test_add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
352 if (ret) {
353 test_msg("Couldn't add extent entry %d\n", ret);
354 return ret;
355 }
356
357 ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
358 if (ret) {
359 test_msg("Error removing bitmap and extent overlapping %d\n", ret);
360 return ret;
361 }
362
363 __btrfs_remove_free_space_cache(cache->free_space_ctl);
364 return 0;
365}
366
367int btrfs_test_free_space_cache(void)
368{
369 struct btrfs_block_group_cache *cache;
370 int ret;
371
372 test_msg("Running btrfs free space cache tests\n");
373
374 cache = init_test_block_group();
375 if (!cache) {
376 test_msg("Couldn't run the tests\n");
377 return 0;
378 }
379
380 ret = test_extents(cache);
381 if (ret)
382 goto out;
383 ret = test_bitmaps(cache);
384 if (ret)
385 goto out;
386 ret = test_bitmaps_and_extents(cache);
387 if (ret)
388 goto out;
389out:
390 __btrfs_remove_free_space_cache(cache->free_space_ctl);
391 kfree(cache->free_space_ctl);
392 kfree(cache);
393 test_msg("Free space cache tests finished\n");
394 return ret;
395}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index af1931a5960d..cac4a3f76323 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -837,7 +837,7 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
837 * them in one of two extent_io trees. This is used to make sure all of 837 * them in one of two extent_io trees. This is used to make sure all of
838 * those extents are on disk for transaction or log commit 838 * those extents are on disk for transaction or log commit
839 */ 839 */
840int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 840static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
841 struct extent_io_tree *dirty_pages, int mark) 841 struct extent_io_tree *dirty_pages, int mark)
842{ 842{
843 int ret; 843 int ret;
@@ -1225,8 +1225,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1225 btrfs_set_root_stransid(new_root_item, 0); 1225 btrfs_set_root_stransid(new_root_item, 0);
1226 btrfs_set_root_rtransid(new_root_item, 0); 1226 btrfs_set_root_rtransid(new_root_item, 0);
1227 } 1227 }
1228 new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); 1228 btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec);
1229 new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec); 1229 btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec);
1230 btrfs_set_root_otransid(new_root_item, trans->transid); 1230 btrfs_set_root_otransid(new_root_item, trans->transid);
1231 1231
1232 old = btrfs_lock_root_node(root); 1232 old = btrfs_lock_root_node(root);
@@ -1311,8 +1311,26 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1311 dentry->d_name.len * 2); 1311 dentry->d_name.len * 2);
1312 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; 1312 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1313 ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); 1313 ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
1314 if (ret) 1314 if (ret) {
1315 btrfs_abort_transaction(trans, root, ret);
1316 goto fail;
1317 }
1318 ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, new_uuid.b,
1319 BTRFS_UUID_KEY_SUBVOL, objectid);
1320 if (ret) {
1315 btrfs_abort_transaction(trans, root, ret); 1321 btrfs_abort_transaction(trans, root, ret);
1322 goto fail;
1323 }
1324 if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
1325 ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
1326 new_root_item->received_uuid,
1327 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
1328 objectid);
1329 if (ret && ret != -EEXIST) {
1330 btrfs_abort_transaction(trans, root, ret);
1331 goto fail;
1332 }
1333 }
1316fail: 1334fail:
1317 pending->error = ret; 1335 pending->error = ret;
1318dir_item_existed: 1336dir_item_existed:
@@ -1362,6 +1380,8 @@ static void update_super_roots(struct btrfs_root *root)
1362 super->root_level = root_item->level; 1380 super->root_level = root_item->level;
1363 if (btrfs_test_opt(root, SPACE_CACHE)) 1381 if (btrfs_test_opt(root, SPACE_CACHE))
1364 super->cache_generation = root_item->generation; 1382 super->cache_generation = root_item->generation;
1383 if (root->fs_info->update_uuid_tree_gen)
1384 super->uuid_tree_generation = root_item->generation;
1365} 1385}
1366 1386
1367int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 1387int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -1928,8 +1948,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
1928 list_del_init(&root->root_list); 1948 list_del_init(&root->root_list);
1929 spin_unlock(&fs_info->trans_lock); 1949 spin_unlock(&fs_info->trans_lock);
1930 1950
1931 pr_debug("btrfs: cleaner removing %llu\n", 1951 pr_debug("btrfs: cleaner removing %llu\n", root->objectid);
1932 (unsigned long long)root->objectid);
1933 1952
1934 btrfs_kill_all_delayed_nodes(root); 1953 btrfs_kill_all_delayed_nodes(root);
1935 1954
@@ -1942,6 +1961,5 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
1942 * If we encounter a transaction abort during snapshot cleaning, we 1961 * If we encounter a transaction abort during snapshot cleaning, we
1943 * don't want to crash here 1962 * don't want to crash here
1944 */ 1963 */
1945 BUG_ON(ret < 0 && ret != -EAGAIN && ret != -EROFS); 1964 return (ret < 0) ? 0 : 1;
1946 return 1;
1947} 1965}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index defbc4269897..5c2af8491621 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -160,8 +160,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
160void btrfs_throttle(struct btrfs_root *root); 160void btrfs_throttle(struct btrfs_root *root);
161int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 161int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
162 struct btrfs_root *root); 162 struct btrfs_root *root);
163int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
164 struct extent_io_tree *dirty_pages, int mark);
165int btrfs_write_marked_extents(struct btrfs_root *root, 163int btrfs_write_marked_extents(struct btrfs_root *root,
166 struct extent_io_tree *dirty_pages, int mark); 164 struct extent_io_tree *dirty_pages, int mark);
167int btrfs_wait_marked_extents(struct btrfs_root *root, 165int btrfs_wait_marked_extents(struct btrfs_root *root,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ff60d8978ae2..0d9613c3f5e5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -747,7 +747,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
747 ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); 747 ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
748 if (ret) 748 if (ret)
749 goto out; 749 goto out;
750 btrfs_run_delayed_items(trans, root); 750 else
751 ret = btrfs_run_delayed_items(trans, root);
751out: 752out:
752 kfree(name); 753 kfree(name);
753 iput(inode); 754 iput(inode);
@@ -923,7 +924,9 @@ again:
923 kfree(victim_name); 924 kfree(victim_name);
924 if (ret) 925 if (ret)
925 return ret; 926 return ret;
926 btrfs_run_delayed_items(trans, root); 927 ret = btrfs_run_delayed_items(trans, root);
928 if (ret)
929 return ret;
927 *search_done = 1; 930 *search_done = 1;
928 goto again; 931 goto again;
929 } 932 }
@@ -990,7 +993,9 @@ again:
990 inode, 993 inode,
991 victim_name, 994 victim_name,
992 victim_name_len); 995 victim_name_len);
993 btrfs_run_delayed_items(trans, root); 996 if (!ret)
997 ret = btrfs_run_delayed_items(
998 trans, root);
994 } 999 }
995 iput(victim_parent); 1000 iput(victim_parent);
996 kfree(victim_name); 1001 kfree(victim_name);
@@ -1536,8 +1541,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1536 1541
1537 name_len = btrfs_dir_name_len(eb, di); 1542 name_len = btrfs_dir_name_len(eb, di);
1538 name = kmalloc(name_len, GFP_NOFS); 1543 name = kmalloc(name_len, GFP_NOFS);
1539 if (!name) 1544 if (!name) {
1540 return -ENOMEM; 1545 ret = -ENOMEM;
1546 goto out;
1547 }
1541 1548
1542 log_type = btrfs_dir_type(eb, di); 1549 log_type = btrfs_dir_type(eb, di);
1543 read_extent_buffer(eb, name, (unsigned long)(di + 1), 1550 read_extent_buffer(eb, name, (unsigned long)(di + 1),
@@ -1810,7 +1817,7 @@ again:
1810 ret = btrfs_unlink_inode(trans, root, dir, inode, 1817 ret = btrfs_unlink_inode(trans, root, dir, inode,
1811 name, name_len); 1818 name, name_len);
1812 if (!ret) 1819 if (!ret)
1813 btrfs_run_delayed_items(trans, root); 1820 ret = btrfs_run_delayed_items(trans, root);
1814 kfree(name); 1821 kfree(name);
1815 iput(inode); 1822 iput(inode);
1816 if (ret) 1823 if (ret)
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
new file mode 100644
index 000000000000..dd0dea3766f7
--- /dev/null
+++ b/fs/btrfs/uuid-tree.c
@@ -0,0 +1,358 @@
1/*
2 * Copyright (C) STRATO AG 2013. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/uuid.h>
19#include <asm/unaligned.h>
20#include "ctree.h"
21#include "transaction.h"
22#include "disk-io.h"
23#include "print-tree.h"
24
25
26static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key)
27{
28 key->type = type;
29 key->objectid = get_unaligned_le64(uuid);
30 key->offset = get_unaligned_le64(uuid + sizeof(u64));
31}
32
33/* return -ENOENT for !found, < 0 for errors, or 0 if an item was found */
34static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid,
35 u8 type, u64 subid)
36{
37 int ret;
38 struct btrfs_path *path = NULL;
39 struct extent_buffer *eb;
40 int slot;
41 u32 item_size;
42 unsigned long offset;
43 struct btrfs_key key;
44
45 if (WARN_ON_ONCE(!uuid_root)) {
46 ret = -ENOENT;
47 goto out;
48 }
49
50 path = btrfs_alloc_path();
51 if (!path) {
52 ret = -ENOMEM;
53 goto out;
54 }
55
56 btrfs_uuid_to_key(uuid, type, &key);
57 ret = btrfs_search_slot(NULL, uuid_root, &key, path, 0, 0);
58 if (ret < 0) {
59 goto out;
60 } else if (ret > 0) {
61 ret = -ENOENT;
62 goto out;
63 }
64
65 eb = path->nodes[0];
66 slot = path->slots[0];
67 item_size = btrfs_item_size_nr(eb, slot);
68 offset = btrfs_item_ptr_offset(eb, slot);
69 ret = -ENOENT;
70
71 if (!IS_ALIGNED(item_size, sizeof(u64))) {
72 pr_warn("btrfs: uuid item with illegal size %lu!\n",
73 (unsigned long)item_size);
74 goto out;
75 }
76 while (item_size) {
77 __le64 data;
78
79 read_extent_buffer(eb, &data, offset, sizeof(data));
80 if (le64_to_cpu(data) == subid) {
81 ret = 0;
82 break;
83 }
84 offset += sizeof(data);
85 item_size -= sizeof(data);
86 }
87
88out:
89 btrfs_free_path(path);
90 return ret;
91}
92
93int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
94 struct btrfs_root *uuid_root, u8 *uuid, u8 type,
95 u64 subid_cpu)
96{
97 int ret;
98 struct btrfs_path *path = NULL;
99 struct btrfs_key key;
100 struct extent_buffer *eb;
101 int slot;
102 unsigned long offset;
103 __le64 subid_le;
104
105 ret = btrfs_uuid_tree_lookup(uuid_root, uuid, type, subid_cpu);
106 if (ret != -ENOENT)
107 return ret;
108
109 if (WARN_ON_ONCE(!uuid_root)) {
110 ret = -EINVAL;
111 goto out;
112 }
113
114 btrfs_uuid_to_key(uuid, type, &key);
115
116 path = btrfs_alloc_path();
117 if (!path) {
118 ret = -ENOMEM;
119 goto out;
120 }
121
122 ret = btrfs_insert_empty_item(trans, uuid_root, path, &key,
123 sizeof(subid_le));
124 if (ret >= 0) {
125 /* Add an item for the type for the first time */
126 eb = path->nodes[0];
127 slot = path->slots[0];
128 offset = btrfs_item_ptr_offset(eb, slot);
129 } else if (ret == -EEXIST) {
130 /*
131 * An item with that type already exists.
132 * Extend the item and store the new subid at the end.
133 */
134 btrfs_extend_item(uuid_root, path, sizeof(subid_le));
135 eb = path->nodes[0];
136 slot = path->slots[0];
137 offset = btrfs_item_ptr_offset(eb, slot);
138 offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le);
139 } else if (ret < 0) {
140 pr_warn("btrfs: insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!\n",
141 ret, (unsigned long long)key.objectid,
142 (unsigned long long)key.offset, type);
143 goto out;
144 }
145
146 ret = 0;
147 subid_le = cpu_to_le64(subid_cpu);
148 write_extent_buffer(eb, &subid_le, offset, sizeof(subid_le));
149 btrfs_mark_buffer_dirty(eb);
150
151out:
152 btrfs_free_path(path);
153 return ret;
154}
155
156int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
157 struct btrfs_root *uuid_root, u8 *uuid, u8 type,
158 u64 subid)
159{
160 int ret;
161 struct btrfs_path *path = NULL;
162 struct btrfs_key key;
163 struct extent_buffer *eb;
164 int slot;
165 unsigned long offset;
166 u32 item_size;
167 unsigned long move_dst;
168 unsigned long move_src;
169 unsigned long move_len;
170
171 if (WARN_ON_ONCE(!uuid_root)) {
172 ret = -EINVAL;
173 goto out;
174 }
175
176 btrfs_uuid_to_key(uuid, type, &key);
177
178 path = btrfs_alloc_path();
179 if (!path) {
180 ret = -ENOMEM;
181 goto out;
182 }
183
184 ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1);
185 if (ret < 0) {
186 pr_warn("btrfs: error %d while searching for uuid item!\n",
187 ret);
188 goto out;
189 }
190 if (ret > 0) {
191 ret = -ENOENT;
192 goto out;
193 }
194
195 eb = path->nodes[0];
196 slot = path->slots[0];
197 offset = btrfs_item_ptr_offset(eb, slot);
198 item_size = btrfs_item_size_nr(eb, slot);
199 if (!IS_ALIGNED(item_size, sizeof(u64))) {
200 pr_warn("btrfs: uuid item with illegal size %lu!\n",
201 (unsigned long)item_size);
202 ret = -ENOENT;
203 goto out;
204 }
205 while (item_size) {
206 __le64 read_subid;
207
208 read_extent_buffer(eb, &read_subid, offset, sizeof(read_subid));
209 if (le64_to_cpu(read_subid) == subid)
210 break;
211 offset += sizeof(read_subid);
212 item_size -= sizeof(read_subid);
213 }
214
215 if (!item_size) {
216 ret = -ENOENT;
217 goto out;
218 }
219
220 item_size = btrfs_item_size_nr(eb, slot);
221 if (item_size == sizeof(subid)) {
222 ret = btrfs_del_item(trans, uuid_root, path);
223 goto out;
224 }
225
226 move_dst = offset;
227 move_src = offset + sizeof(subid);
228 move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot));
229 memmove_extent_buffer(eb, move_dst, move_src, move_len);
230 btrfs_truncate_item(uuid_root, path, item_size - sizeof(subid), 1);
231
232out:
233 btrfs_free_path(path);
234 return ret;
235}
236
237static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type,
238 u64 subid)
239{
240 struct btrfs_trans_handle *trans;
241 int ret;
242
243 /* 1 - for the uuid item */
244 trans = btrfs_start_transaction(uuid_root, 1);
245 if (IS_ERR(trans)) {
246 ret = PTR_ERR(trans);
247 goto out;
248 }
249
250 ret = btrfs_uuid_tree_rem(trans, uuid_root, uuid, type, subid);
251 btrfs_end_transaction(trans, uuid_root);
252
253out:
254 return ret;
255}
256
257int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
258 int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
259 u64))
260{
261 struct btrfs_root *root = fs_info->uuid_root;
262 struct btrfs_key key;
263 struct btrfs_key max_key;
264 struct btrfs_path *path;
265 int ret = 0;
266 struct extent_buffer *leaf;
267 int slot;
268 u32 item_size;
269 unsigned long offset;
270
271 path = btrfs_alloc_path();
272 if (!path) {
273 ret = -ENOMEM;
274 goto out;
275 }
276
277 key.objectid = 0;
278 key.type = 0;
279 key.offset = 0;
280 max_key.objectid = (u64)-1;
281 max_key.type = (u8)-1;
282 max_key.offset = (u64)-1;
283
284again_search_slot:
285 path->keep_locks = 1;
286 ret = btrfs_search_forward(root, &key, &max_key, path, 0);
287 if (ret) {
288 if (ret > 0)
289 ret = 0;
290 goto out;
291 }
292
293 while (1) {
294 cond_resched();
295 leaf = path->nodes[0];
296 slot = path->slots[0];
297 btrfs_item_key_to_cpu(leaf, &key, slot);
298
299 if (key.type != BTRFS_UUID_KEY_SUBVOL &&
300 key.type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
301 goto skip;
302
303 offset = btrfs_item_ptr_offset(leaf, slot);
304 item_size = btrfs_item_size_nr(leaf, slot);
305 if (!IS_ALIGNED(item_size, sizeof(u64))) {
306 pr_warn("btrfs: uuid item with illegal size %lu!\n",
307 (unsigned long)item_size);
308 goto skip;
309 }
310 while (item_size) {
311 u8 uuid[BTRFS_UUID_SIZE];
312 __le64 subid_le;
313 u64 subid_cpu;
314
315 put_unaligned_le64(key.objectid, uuid);
316 put_unaligned_le64(key.offset, uuid + sizeof(u64));
317 read_extent_buffer(leaf, &subid_le, offset,
318 sizeof(subid_le));
319 subid_cpu = le64_to_cpu(subid_le);
320 ret = check_func(fs_info, uuid, key.type, subid_cpu);
321 if (ret < 0)
322 goto out;
323 if (ret > 0) {
324 btrfs_release_path(path);
325 ret = btrfs_uuid_iter_rem(root, uuid, key.type,
326 subid_cpu);
327 if (ret == 0) {
328 /*
329 * this might look inefficient, but the
330 * justification is that it is an
331 * exception that check_func returns 1,
332 * and that in the regular case only one
333 * entry per UUID exists.
334 */
335 goto again_search_slot;
336 }
337 if (ret < 0 && ret != -ENOENT)
338 goto out;
339 }
340 item_size -= sizeof(subid_le);
341 offset += sizeof(subid_le);
342 }
343
344skip:
345 ret = btrfs_next_item(root, path);
346 if (ret == 0)
347 continue;
348 else if (ret > 0)
349 ret = 0;
350 break;
351 }
352
353out:
354 btrfs_free_path(path);
355 if (ret)
356 pr_warn("btrfs: btrfs_uuid_tree_iterate failed %d\n", ret);
357 return 0;
358}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 67a085381845..0052ca8264d9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -26,6 +26,7 @@
26#include <linux/ratelimit.h> 26#include <linux/ratelimit.h>
27#include <linux/kthread.h> 27#include <linux/kthread.h>
28#include <linux/raid/pq.h> 28#include <linux/raid/pq.h>
29#include <linux/semaphore.h>
29#include <asm/div64.h> 30#include <asm/div64.h>
30#include "compat.h" 31#include "compat.h"
31#include "ctree.h" 32#include "ctree.h"
@@ -62,6 +63,48 @@ static void unlock_chunks(struct btrfs_root *root)
62 mutex_unlock(&root->fs_info->chunk_mutex); 63 mutex_unlock(&root->fs_info->chunk_mutex);
63} 64}
64 65
66static struct btrfs_fs_devices *__alloc_fs_devices(void)
67{
68 struct btrfs_fs_devices *fs_devs;
69
70 fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS);
71 if (!fs_devs)
72 return ERR_PTR(-ENOMEM);
73
74 mutex_init(&fs_devs->device_list_mutex);
75
76 INIT_LIST_HEAD(&fs_devs->devices);
77 INIT_LIST_HEAD(&fs_devs->alloc_list);
78 INIT_LIST_HEAD(&fs_devs->list);
79
80 return fs_devs;
81}
82
83/**
84 * alloc_fs_devices - allocate struct btrfs_fs_devices
85 * @fsid: a pointer to UUID for this FS. If NULL a new UUID is
86 * generated.
87 *
88 * Return: a pointer to a new &struct btrfs_fs_devices on success;
89 * ERR_PTR() on error. Returned struct is not linked onto any lists and
90 * can be destroyed with kfree() right away.
91 */
92static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
93{
94 struct btrfs_fs_devices *fs_devs;
95
96 fs_devs = __alloc_fs_devices();
97 if (IS_ERR(fs_devs))
98 return fs_devs;
99
100 if (fsid)
101 memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
102 else
103 generate_random_uuid(fs_devs->fsid);
104
105 return fs_devs;
106}
107
65static void free_fs_devices(struct btrfs_fs_devices *fs_devices) 108static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
66{ 109{
67 struct btrfs_device *device; 110 struct btrfs_device *device;
@@ -101,6 +144,27 @@ void btrfs_cleanup_fs_uuids(void)
101 } 144 }
102} 145}
103 146
147static struct btrfs_device *__alloc_device(void)
148{
149 struct btrfs_device *dev;
150
151 dev = kzalloc(sizeof(*dev), GFP_NOFS);
152 if (!dev)
153 return ERR_PTR(-ENOMEM);
154
155 INIT_LIST_HEAD(&dev->dev_list);
156 INIT_LIST_HEAD(&dev->dev_alloc_list);
157
158 spin_lock_init(&dev->io_lock);
159
160 spin_lock_init(&dev->reada_lock);
161 atomic_set(&dev->reada_in_flight, 0);
162 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
163 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
164
165 return dev;
166}
167
104static noinline struct btrfs_device *__find_device(struct list_head *head, 168static noinline struct btrfs_device *__find_device(struct list_head *head,
105 u64 devid, u8 *uuid) 169 u64 devid, u8 *uuid)
106{ 170{
@@ -395,16 +459,14 @@ static noinline int device_list_add(const char *path,
395 459
396 fs_devices = find_fsid(disk_super->fsid); 460 fs_devices = find_fsid(disk_super->fsid);
397 if (!fs_devices) { 461 if (!fs_devices) {
398 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); 462 fs_devices = alloc_fs_devices(disk_super->fsid);
399 if (!fs_devices) 463 if (IS_ERR(fs_devices))
400 return -ENOMEM; 464 return PTR_ERR(fs_devices);
401 INIT_LIST_HEAD(&fs_devices->devices); 465
402 INIT_LIST_HEAD(&fs_devices->alloc_list);
403 list_add(&fs_devices->list, &fs_uuids); 466 list_add(&fs_devices->list, &fs_uuids);
404 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
405 fs_devices->latest_devid = devid; 467 fs_devices->latest_devid = devid;
406 fs_devices->latest_trans = found_transid; 468 fs_devices->latest_trans = found_transid;
407 mutex_init(&fs_devices->device_list_mutex); 469
408 device = NULL; 470 device = NULL;
409 } else { 471 } else {
410 device = __find_device(&fs_devices->devices, devid, 472 device = __find_device(&fs_devices->devices, devid,
@@ -414,17 +476,12 @@ static noinline int device_list_add(const char *path,
414 if (fs_devices->opened) 476 if (fs_devices->opened)
415 return -EBUSY; 477 return -EBUSY;
416 478
417 device = kzalloc(sizeof(*device), GFP_NOFS); 479 device = btrfs_alloc_device(NULL, &devid,
418 if (!device) { 480 disk_super->dev_item.uuid);
481 if (IS_ERR(device)) {
419 /* we can safely leave the fs_devices entry around */ 482 /* we can safely leave the fs_devices entry around */
420 return -ENOMEM; 483 return PTR_ERR(device);
421 } 484 }
422 device->devid = devid;
423 device->dev_stats_valid = 0;
424 device->work.func = pending_bios_fn;
425 memcpy(device->uuid, disk_super->dev_item.uuid,
426 BTRFS_UUID_SIZE);
427 spin_lock_init(&device->io_lock);
428 485
429 name = rcu_string_strdup(path, GFP_NOFS); 486 name = rcu_string_strdup(path, GFP_NOFS);
430 if (!name) { 487 if (!name) {
@@ -432,22 +489,13 @@ static noinline int device_list_add(const char *path,
432 return -ENOMEM; 489 return -ENOMEM;
433 } 490 }
434 rcu_assign_pointer(device->name, name); 491 rcu_assign_pointer(device->name, name);
435 INIT_LIST_HEAD(&device->dev_alloc_list);
436
437 /* init readahead state */
438 spin_lock_init(&device->reada_lock);
439 device->reada_curr_zone = NULL;
440 atomic_set(&device->reada_in_flight, 0);
441 device->reada_next = 0;
442 INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT);
443 INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT);
444 492
445 mutex_lock(&fs_devices->device_list_mutex); 493 mutex_lock(&fs_devices->device_list_mutex);
446 list_add_rcu(&device->dev_list, &fs_devices->devices); 494 list_add_rcu(&device->dev_list, &fs_devices->devices);
495 fs_devices->num_devices++;
447 mutex_unlock(&fs_devices->device_list_mutex); 496 mutex_unlock(&fs_devices->device_list_mutex);
448 497
449 device->fs_devices = fs_devices; 498 device->fs_devices = fs_devices;
450 fs_devices->num_devices++;
451 } else if (!device->name || strcmp(device->name->str, path)) { 499 } else if (!device->name || strcmp(device->name->str, path)) {
452 name = rcu_string_strdup(path, GFP_NOFS); 500 name = rcu_string_strdup(path, GFP_NOFS);
453 if (!name) 501 if (!name)
@@ -474,25 +522,21 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
474 struct btrfs_device *device; 522 struct btrfs_device *device;
475 struct btrfs_device *orig_dev; 523 struct btrfs_device *orig_dev;
476 524
477 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); 525 fs_devices = alloc_fs_devices(orig->fsid);
478 if (!fs_devices) 526 if (IS_ERR(fs_devices))
479 return ERR_PTR(-ENOMEM); 527 return fs_devices;
480 528
481 INIT_LIST_HEAD(&fs_devices->devices);
482 INIT_LIST_HEAD(&fs_devices->alloc_list);
483 INIT_LIST_HEAD(&fs_devices->list);
484 mutex_init(&fs_devices->device_list_mutex);
485 fs_devices->latest_devid = orig->latest_devid; 529 fs_devices->latest_devid = orig->latest_devid;
486 fs_devices->latest_trans = orig->latest_trans; 530 fs_devices->latest_trans = orig->latest_trans;
487 fs_devices->total_devices = orig->total_devices; 531 fs_devices->total_devices = orig->total_devices;
488 memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
489 532
490 /* We have held the volume lock, it is safe to get the devices. */ 533 /* We have held the volume lock, it is safe to get the devices. */
491 list_for_each_entry(orig_dev, &orig->devices, dev_list) { 534 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
492 struct rcu_string *name; 535 struct rcu_string *name;
493 536
494 device = kzalloc(sizeof(*device), GFP_NOFS); 537 device = btrfs_alloc_device(NULL, &orig_dev->devid,
495 if (!device) 538 orig_dev->uuid);
539 if (IS_ERR(device))
496 goto error; 540 goto error;
497 541
498 /* 542 /*
@@ -506,13 +550,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
506 } 550 }
507 rcu_assign_pointer(device->name, name); 551 rcu_assign_pointer(device->name, name);
508 552
509 device->devid = orig_dev->devid;
510 device->work.func = pending_bios_fn;
511 memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
512 spin_lock_init(&device->io_lock);
513 INIT_LIST_HEAD(&device->dev_list);
514 INIT_LIST_HEAD(&device->dev_alloc_list);
515
516 list_add(&device->dev_list, &fs_devices->devices); 553 list_add(&device->dev_list, &fs_devices->devices);
517 device->fs_devices = fs_devices; 554 device->fs_devices = fs_devices;
518 fs_devices->num_devices++; 555 fs_devices->num_devices++;
@@ -636,23 +673,22 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
636 673
637 if (device->can_discard) 674 if (device->can_discard)
638 fs_devices->num_can_discard--; 675 fs_devices->num_can_discard--;
676 if (device->missing)
677 fs_devices->missing_devices--;
639 678
640 new_device = kmalloc(sizeof(*new_device), GFP_NOFS); 679 new_device = btrfs_alloc_device(NULL, &device->devid,
641 BUG_ON(!new_device); /* -ENOMEM */ 680 device->uuid);
642 memcpy(new_device, device, sizeof(*new_device)); 681 BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
643 682
644 /* Safe because we are under uuid_mutex */ 683 /* Safe because we are under uuid_mutex */
645 if (device->name) { 684 if (device->name) {
646 name = rcu_string_strdup(device->name->str, GFP_NOFS); 685 name = rcu_string_strdup(device->name->str, GFP_NOFS);
647 BUG_ON(device->name && !name); /* -ENOMEM */ 686 BUG_ON(!name); /* -ENOMEM */
648 rcu_assign_pointer(new_device->name, name); 687 rcu_assign_pointer(new_device->name, name);
649 } 688 }
650 new_device->bdev = NULL; 689
651 new_device->writeable = 0;
652 new_device->in_fs_metadata = 0;
653 new_device->can_discard = 0;
654 spin_lock_init(&new_device->io_lock);
655 list_replace_rcu(&device->dev_list, &new_device->dev_list); 690 list_replace_rcu(&device->dev_list, &new_device->dev_list);
691 new_device->fs_devices = device->fs_devices;
656 692
657 call_rcu(&device->rcu, free_device); 693 call_rcu(&device->rcu, free_device);
658 } 694 }
@@ -865,7 +901,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
865 disk_super = p + (bytenr & ~PAGE_CACHE_MASK); 901 disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
866 902
867 if (btrfs_super_bytenr(disk_super) != bytenr || 903 if (btrfs_super_bytenr(disk_super) != bytenr ||
868 disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) 904 btrfs_super_magic(disk_super) != BTRFS_MAGIC)
869 goto error_unmap; 905 goto error_unmap;
870 906
871 devid = btrfs_stack_device_id(&disk_super->dev_item); 907 devid = btrfs_stack_device_id(&disk_super->dev_item);
@@ -880,8 +916,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
880 printk(KERN_INFO "device fsid %pU ", disk_super->fsid); 916 printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
881 } 917 }
882 918
883 printk(KERN_CONT "devid %llu transid %llu %s\n", 919 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
884 (unsigned long long)devid, (unsigned long long)transid, path);
885 920
886 ret = device_list_add(path, disk_super, devid, fs_devices_ret); 921 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
887 if (!ret && fs_devices_ret) 922 if (!ret && fs_devices_ret)
@@ -1278,8 +1313,7 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1278 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); 1313 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
1279 1314
1280 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, 1315 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
1281 (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent), 1316 btrfs_dev_extent_chunk_tree_uuid(extent), BTRFS_UUID_SIZE);
1282 BTRFS_UUID_SIZE);
1283 1317
1284 btrfs_set_dev_extent_length(leaf, extent, num_bytes); 1318 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
1285 btrfs_mark_buffer_dirty(leaf); 1319 btrfs_mark_buffer_dirty(leaf);
@@ -1307,15 +1341,14 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
1307 return ret; 1341 return ret;
1308} 1342}
1309 1343
1310static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) 1344static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
1345 u64 *devid_ret)
1311{ 1346{
1312 int ret; 1347 int ret;
1313 struct btrfs_key key; 1348 struct btrfs_key key;
1314 struct btrfs_key found_key; 1349 struct btrfs_key found_key;
1315 struct btrfs_path *path; 1350 struct btrfs_path *path;
1316 1351
1317 root = root->fs_info->chunk_root;
1318
1319 path = btrfs_alloc_path(); 1352 path = btrfs_alloc_path();
1320 if (!path) 1353 if (!path)
1321 return -ENOMEM; 1354 return -ENOMEM;
@@ -1324,20 +1357,21 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
1324 key.type = BTRFS_DEV_ITEM_KEY; 1357 key.type = BTRFS_DEV_ITEM_KEY;
1325 key.offset = (u64)-1; 1358 key.offset = (u64)-1;
1326 1359
1327 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1360 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
1328 if (ret < 0) 1361 if (ret < 0)
1329 goto error; 1362 goto error;
1330 1363
1331 BUG_ON(ret == 0); /* Corruption */ 1364 BUG_ON(ret == 0); /* Corruption */
1332 1365
1333 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, 1366 ret = btrfs_previous_item(fs_info->chunk_root, path,
1367 BTRFS_DEV_ITEMS_OBJECTID,
1334 BTRFS_DEV_ITEM_KEY); 1368 BTRFS_DEV_ITEM_KEY);
1335 if (ret) { 1369 if (ret) {
1336 *objectid = 1; 1370 *devid_ret = 1;
1337 } else { 1371 } else {
1338 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1372 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1339 path->slots[0]); 1373 path->slots[0]);
1340 *objectid = found_key.offset + 1; 1374 *devid_ret = found_key.offset + 1;
1341 } 1375 }
1342 ret = 0; 1376 ret = 0;
1343error: 1377error:
@@ -1391,9 +1425,9 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans,
1391 btrfs_set_device_bandwidth(leaf, dev_item, 0); 1425 btrfs_set_device_bandwidth(leaf, dev_item, 0);
1392 btrfs_set_device_start_offset(leaf, dev_item, 0); 1426 btrfs_set_device_start_offset(leaf, dev_item, 0);
1393 1427
1394 ptr = (unsigned long)btrfs_device_uuid(dev_item); 1428 ptr = btrfs_device_uuid(dev_item);
1395 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); 1429 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
1396 ptr = (unsigned long)btrfs_device_fsid(dev_item); 1430 ptr = btrfs_device_fsid(dev_item);
1397 write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE); 1431 write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
1398 btrfs_mark_buffer_dirty(leaf); 1432 btrfs_mark_buffer_dirty(leaf);
1399 1433
@@ -1562,7 +1596,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1562 clear_super = true; 1596 clear_super = true;
1563 } 1597 }
1564 1598
1599 mutex_unlock(&uuid_mutex);
1565 ret = btrfs_shrink_device(device, 0); 1600 ret = btrfs_shrink_device(device, 0);
1601 mutex_lock(&uuid_mutex);
1566 if (ret) 1602 if (ret)
1567 goto error_undo; 1603 goto error_undo;
1568 1604
@@ -1586,7 +1622,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1586 /* 1622 /*
1587 * the device list mutex makes sure that we don't change 1623 * the device list mutex makes sure that we don't change
1588 * the device list while someone else is writing out all 1624 * the device list while someone else is writing out all
1589 * the device supers. 1625 * the device supers. Whoever is writing all supers, should
1626 * lock the device list mutex before getting the number of
1627 * devices in the super block (super_copy). Conversely,
1628 * whoever updates the number of devices in the super block
1629 * (super_copy) should hold the device list mutex.
1590 */ 1630 */
1591 1631
1592 cur_devices = device->fs_devices; 1632 cur_devices = device->fs_devices;
@@ -1610,10 +1650,10 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1610 device->fs_devices->open_devices--; 1650 device->fs_devices->open_devices--;
1611 1651
1612 call_rcu(&device->rcu, free_device); 1652 call_rcu(&device->rcu, free_device);
1613 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1614 1653
1615 num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; 1654 num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
1616 btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices); 1655 btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices);
1656 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1617 1657
1618 if (cur_devices->open_devices == 0) { 1658 if (cur_devices->open_devices == 0) {
1619 struct btrfs_fs_devices *fs_devices; 1659 struct btrfs_fs_devices *fs_devices;
@@ -1793,9 +1833,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
1793 if (!fs_devices->seeding) 1833 if (!fs_devices->seeding)
1794 return -EINVAL; 1834 return -EINVAL;
1795 1835
1796 seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); 1836 seed_devices = __alloc_fs_devices();
1797 if (!seed_devices) 1837 if (IS_ERR(seed_devices))
1798 return -ENOMEM; 1838 return PTR_ERR(seed_devices);
1799 1839
1800 old_devices = clone_fs_devices(fs_devices); 1840 old_devices = clone_fs_devices(fs_devices);
1801 if (IS_ERR(old_devices)) { 1841 if (IS_ERR(old_devices)) {
@@ -1814,7 +1854,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
1814 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 1854 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1815 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, 1855 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
1816 synchronize_rcu); 1856 synchronize_rcu);
1817 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1818 1857
1819 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); 1858 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
1820 list_for_each_entry(device, &seed_devices->devices, dev_list) { 1859 list_for_each_entry(device, &seed_devices->devices, dev_list) {
@@ -1830,6 +1869,8 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
1830 generate_random_uuid(fs_devices->fsid); 1869 generate_random_uuid(fs_devices->fsid);
1831 memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); 1870 memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1832 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); 1871 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1872 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1873
1833 super_flags = btrfs_super_flags(disk_super) & 1874 super_flags = btrfs_super_flags(disk_super) &
1834 ~BTRFS_SUPER_FLAG_SEEDING; 1875 ~BTRFS_SUPER_FLAG_SEEDING;
1835 btrfs_set_super_flags(disk_super, super_flags); 1876 btrfs_set_super_flags(disk_super, super_flags);
@@ -1889,11 +1930,9 @@ next_slot:
1889 dev_item = btrfs_item_ptr(leaf, path->slots[0], 1930 dev_item = btrfs_item_ptr(leaf, path->slots[0],
1890 struct btrfs_dev_item); 1931 struct btrfs_dev_item);
1891 devid = btrfs_device_id(leaf, dev_item); 1932 devid = btrfs_device_id(leaf, dev_item);
1892 read_extent_buffer(leaf, dev_uuid, 1933 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
1893 (unsigned long)btrfs_device_uuid(dev_item),
1894 BTRFS_UUID_SIZE); 1934 BTRFS_UUID_SIZE);
1895 read_extent_buffer(leaf, fs_uuid, 1935 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
1896 (unsigned long)btrfs_device_fsid(dev_item),
1897 BTRFS_UUID_SIZE); 1936 BTRFS_UUID_SIZE);
1898 device = btrfs_find_device(root->fs_info, devid, dev_uuid, 1937 device = btrfs_find_device(root->fs_info, devid, dev_uuid,
1899 fs_uuid); 1938 fs_uuid);
@@ -1956,10 +1995,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1956 } 1995 }
1957 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 1996 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1958 1997
1959 device = kzalloc(sizeof(*device), GFP_NOFS); 1998 device = btrfs_alloc_device(root->fs_info, NULL, NULL);
1960 if (!device) { 1999 if (IS_ERR(device)) {
1961 /* we can safely leave the fs_devices entry around */ 2000 /* we can safely leave the fs_devices entry around */
1962 ret = -ENOMEM; 2001 ret = PTR_ERR(device);
1963 goto error; 2002 goto error;
1964 } 2003 }
1965 2004
@@ -1971,13 +2010,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1971 } 2010 }
1972 rcu_assign_pointer(device->name, name); 2011 rcu_assign_pointer(device->name, name);
1973 2012
1974 ret = find_next_devid(root, &device->devid);
1975 if (ret) {
1976 rcu_string_free(device->name);
1977 kfree(device);
1978 goto error;
1979 }
1980
1981 trans = btrfs_start_transaction(root, 0); 2013 trans = btrfs_start_transaction(root, 0);
1982 if (IS_ERR(trans)) { 2014 if (IS_ERR(trans)) {
1983 rcu_string_free(device->name); 2015 rcu_string_free(device->name);
@@ -1992,9 +2024,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1992 if (blk_queue_discard(q)) 2024 if (blk_queue_discard(q))
1993 device->can_discard = 1; 2025 device->can_discard = 1;
1994 device->writeable = 1; 2026 device->writeable = 1;
1995 device->work.func = pending_bios_fn;
1996 generate_random_uuid(device->uuid);
1997 spin_lock_init(&device->io_lock);
1998 device->generation = trans->transid; 2027 device->generation = trans->transid;
1999 device->io_width = root->sectorsize; 2028 device->io_width = root->sectorsize;
2000 device->io_align = root->sectorsize; 2029 device->io_align = root->sectorsize;
@@ -2121,6 +2150,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
2121 struct btrfs_fs_info *fs_info = root->fs_info; 2150 struct btrfs_fs_info *fs_info = root->fs_info;
2122 struct list_head *devices; 2151 struct list_head *devices;
2123 struct rcu_string *name; 2152 struct rcu_string *name;
2153 u64 devid = BTRFS_DEV_REPLACE_DEVID;
2124 int ret = 0; 2154 int ret = 0;
2125 2155
2126 *device_out = NULL; 2156 *device_out = NULL;
@@ -2142,9 +2172,9 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
2142 } 2172 }
2143 } 2173 }
2144 2174
2145 device = kzalloc(sizeof(*device), GFP_NOFS); 2175 device = btrfs_alloc_device(NULL, &devid, NULL);
2146 if (!device) { 2176 if (IS_ERR(device)) {
2147 ret = -ENOMEM; 2177 ret = PTR_ERR(device);
2148 goto error; 2178 goto error;
2149 } 2179 }
2150 2180
@@ -2161,10 +2191,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
2161 device->can_discard = 1; 2191 device->can_discard = 1;
2162 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 2192 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2163 device->writeable = 1; 2193 device->writeable = 1;
2164 device->work.func = pending_bios_fn;
2165 generate_random_uuid(device->uuid);
2166 device->devid = BTRFS_DEV_REPLACE_DEVID;
2167 spin_lock_init(&device->io_lock);
2168 device->generation = 0; 2194 device->generation = 0;
2169 device->io_width = root->sectorsize; 2195 device->io_width = root->sectorsize;
2170 device->io_align = root->sectorsize; 2196 device->io_align = root->sectorsize;
@@ -2971,10 +2997,6 @@ again:
2971 if (found_key.objectid != key.objectid) 2997 if (found_key.objectid != key.objectid)
2972 break; 2998 break;
2973 2999
2974 /* chunk zero is special */
2975 if (found_key.offset == 0)
2976 break;
2977
2978 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); 3000 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
2979 3001
2980 if (!counting) { 3002 if (!counting) {
@@ -3010,6 +3032,8 @@ again:
3010 spin_unlock(&fs_info->balance_lock); 3032 spin_unlock(&fs_info->balance_lock);
3011 } 3033 }
3012loop: 3034loop:
3035 if (found_key.offset == 0)
3036 break;
3013 key.offset = found_key.offset - 1; 3037 key.offset = found_key.offset - 1;
3014 } 3038 }
3015 3039
@@ -3074,9 +3098,6 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
3074 atomic_set(&fs_info->mutually_exclusive_operation_running, 0); 3098 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3075} 3099}
3076 3100
3077void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
3078 struct btrfs_ioctl_balance_args *bargs);
3079
3080/* 3101/*
3081 * Should be called with both balance and volume mutexes held 3102 * Should be called with both balance and volume mutexes held
3082 */ 3103 */
@@ -3139,7 +3160,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3139 (bctl->data.target & ~allowed))) { 3160 (bctl->data.target & ~allowed))) {
3140 printk(KERN_ERR "btrfs: unable to start balance with target " 3161 printk(KERN_ERR "btrfs: unable to start balance with target "
3141 "data profile %llu\n", 3162 "data profile %llu\n",
3142 (unsigned long long)bctl->data.target); 3163 bctl->data.target);
3143 ret = -EINVAL; 3164 ret = -EINVAL;
3144 goto out; 3165 goto out;
3145 } 3166 }
@@ -3148,7 +3169,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3148 (bctl->meta.target & ~allowed))) { 3169 (bctl->meta.target & ~allowed))) {
3149 printk(KERN_ERR "btrfs: unable to start balance with target " 3170 printk(KERN_ERR "btrfs: unable to start balance with target "
3150 "metadata profile %llu\n", 3171 "metadata profile %llu\n",
3151 (unsigned long long)bctl->meta.target); 3172 bctl->meta.target);
3152 ret = -EINVAL; 3173 ret = -EINVAL;
3153 goto out; 3174 goto out;
3154 } 3175 }
@@ -3157,7 +3178,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3157 (bctl->sys.target & ~allowed))) { 3178 (bctl->sys.target & ~allowed))) {
3158 printk(KERN_ERR "btrfs: unable to start balance with target " 3179 printk(KERN_ERR "btrfs: unable to start balance with target "
3159 "system profile %llu\n", 3180 "system profile %llu\n",
3160 (unsigned long long)bctl->sys.target); 3181 bctl->sys.target);
3161 ret = -EINVAL; 3182 ret = -EINVAL;
3162 goto out; 3183 goto out;
3163 } 3184 }
@@ -3430,6 +3451,264 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
3430 return 0; 3451 return 0;
3431} 3452}
3432 3453
3454static int btrfs_uuid_scan_kthread(void *data)
3455{
3456 struct btrfs_fs_info *fs_info = data;
3457 struct btrfs_root *root = fs_info->tree_root;
3458 struct btrfs_key key;
3459 struct btrfs_key max_key;
3460 struct btrfs_path *path = NULL;
3461 int ret = 0;
3462 struct extent_buffer *eb;
3463 int slot;
3464 struct btrfs_root_item root_item;
3465 u32 item_size;
3466 struct btrfs_trans_handle *trans = NULL;
3467
3468 path = btrfs_alloc_path();
3469 if (!path) {
3470 ret = -ENOMEM;
3471 goto out;
3472 }
3473
3474 key.objectid = 0;
3475 key.type = BTRFS_ROOT_ITEM_KEY;
3476 key.offset = 0;
3477
3478 max_key.objectid = (u64)-1;
3479 max_key.type = BTRFS_ROOT_ITEM_KEY;
3480 max_key.offset = (u64)-1;
3481
3482 path->keep_locks = 1;
3483
3484 while (1) {
3485 ret = btrfs_search_forward(root, &key, &max_key, path, 0);
3486 if (ret) {
3487 if (ret > 0)
3488 ret = 0;
3489 break;
3490 }
3491
3492 if (key.type != BTRFS_ROOT_ITEM_KEY ||
3493 (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
3494 key.objectid != BTRFS_FS_TREE_OBJECTID) ||
3495 key.objectid > BTRFS_LAST_FREE_OBJECTID)
3496 goto skip;
3497
3498 eb = path->nodes[0];
3499 slot = path->slots[0];
3500 item_size = btrfs_item_size_nr(eb, slot);
3501 if (item_size < sizeof(root_item))
3502 goto skip;
3503
3504 read_extent_buffer(eb, &root_item,
3505 btrfs_item_ptr_offset(eb, slot),
3506 (int)sizeof(root_item));
3507 if (btrfs_root_refs(&root_item) == 0)
3508 goto skip;
3509
3510 if (!btrfs_is_empty_uuid(root_item.uuid) ||
3511 !btrfs_is_empty_uuid(root_item.received_uuid)) {
3512 if (trans)
3513 goto update_tree;
3514
3515 btrfs_release_path(path);
3516 /*
3517 * 1 - subvol uuid item
3518 * 1 - received_subvol uuid item
3519 */
3520 trans = btrfs_start_transaction(fs_info->uuid_root, 2);
3521 if (IS_ERR(trans)) {
3522 ret = PTR_ERR(trans);
3523 break;
3524 }
3525 continue;
3526 } else {
3527 goto skip;
3528 }
3529update_tree:
3530 if (!btrfs_is_empty_uuid(root_item.uuid)) {
3531 ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
3532 root_item.uuid,
3533 BTRFS_UUID_KEY_SUBVOL,
3534 key.objectid);
3535 if (ret < 0) {
3536 pr_warn("btrfs: uuid_tree_add failed %d\n",
3537 ret);
3538 break;
3539 }
3540 }
3541
3542 if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
3543 ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
3544 root_item.received_uuid,
3545 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
3546 key.objectid);
3547 if (ret < 0) {
3548 pr_warn("btrfs: uuid_tree_add failed %d\n",
3549 ret);
3550 break;
3551 }
3552 }
3553
3554skip:
3555 if (trans) {
3556 ret = btrfs_end_transaction(trans, fs_info->uuid_root);
3557 trans = NULL;
3558 if (ret)
3559 break;
3560 }
3561
3562 btrfs_release_path(path);
3563 if (key.offset < (u64)-1) {
3564 key.offset++;
3565 } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
3566 key.offset = 0;
3567 key.type = BTRFS_ROOT_ITEM_KEY;
3568 } else if (key.objectid < (u64)-1) {
3569 key.offset = 0;
3570 key.type = BTRFS_ROOT_ITEM_KEY;
3571 key.objectid++;
3572 } else {
3573 break;
3574 }
3575 cond_resched();
3576 }
3577
3578out:
3579 btrfs_free_path(path);
3580 if (trans && !IS_ERR(trans))
3581 btrfs_end_transaction(trans, fs_info->uuid_root);
3582 if (ret)
3583 pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret);
3584 else
3585 fs_info->update_uuid_tree_gen = 1;
3586 up(&fs_info->uuid_tree_rescan_sem);
3587 return 0;
3588}
3589
3590/*
3591 * Callback for btrfs_uuid_tree_iterate().
3592 * returns:
3593 * 0 check succeeded, the entry is not outdated.
3594 * < 0 if an error occured.
3595 * > 0 if the check failed, which means the caller shall remove the entry.
3596 */
3597static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
3598 u8 *uuid, u8 type, u64 subid)
3599{
3600 struct btrfs_key key;
3601 int ret = 0;
3602 struct btrfs_root *subvol_root;
3603
3604 if (type != BTRFS_UUID_KEY_SUBVOL &&
3605 type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
3606 goto out;
3607
3608 key.objectid = subid;
3609 key.type = BTRFS_ROOT_ITEM_KEY;
3610 key.offset = (u64)-1;
3611 subvol_root = btrfs_read_fs_root_no_name(fs_info, &key);
3612 if (IS_ERR(subvol_root)) {
3613 ret = PTR_ERR(subvol_root);
3614 if (ret == -ENOENT)
3615 ret = 1;
3616 goto out;
3617 }
3618
3619 switch (type) {
3620 case BTRFS_UUID_KEY_SUBVOL:
3621 if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE))
3622 ret = 1;
3623 break;
3624 case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
3625 if (memcmp(uuid, subvol_root->root_item.received_uuid,
3626 BTRFS_UUID_SIZE))
3627 ret = 1;
3628 break;
3629 }
3630
3631out:
3632 return ret;
3633}
3634
3635static int btrfs_uuid_rescan_kthread(void *data)
3636{
3637 struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data;
3638 int ret;
3639
3640 /*
3641 * 1st step is to iterate through the existing UUID tree and
3642 * to delete all entries that contain outdated data.
3643 * 2nd step is to add all missing entries to the UUID tree.
3644 */
3645 ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
3646 if (ret < 0) {
3647 pr_warn("btrfs: iterating uuid_tree failed %d\n", ret);
3648 up(&fs_info->uuid_tree_rescan_sem);
3649 return ret;
3650 }
3651 return btrfs_uuid_scan_kthread(data);
3652}
3653
3654int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
3655{
3656 struct btrfs_trans_handle *trans;
3657 struct btrfs_root *tree_root = fs_info->tree_root;
3658 struct btrfs_root *uuid_root;
3659 struct task_struct *task;
3660 int ret;
3661
3662 /*
3663 * 1 - root node
3664 * 1 - root item
3665 */
3666 trans = btrfs_start_transaction(tree_root, 2);
3667 if (IS_ERR(trans))
3668 return PTR_ERR(trans);
3669
3670 uuid_root = btrfs_create_tree(trans, fs_info,
3671 BTRFS_UUID_TREE_OBJECTID);
3672 if (IS_ERR(uuid_root)) {
3673 btrfs_abort_transaction(trans, tree_root,
3674 PTR_ERR(uuid_root));
3675 return PTR_ERR(uuid_root);
3676 }
3677
3678 fs_info->uuid_root = uuid_root;
3679
3680 ret = btrfs_commit_transaction(trans, tree_root);
3681 if (ret)
3682 return ret;
3683
3684 down(&fs_info->uuid_tree_rescan_sem);
3685 task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
3686 if (IS_ERR(task)) {
3687 /* fs_info->update_uuid_tree_gen remains 0 in all error case */
3688 pr_warn("btrfs: failed to start uuid_scan task\n");
3689 up(&fs_info->uuid_tree_rescan_sem);
3690 return PTR_ERR(task);
3691 }
3692
3693 return 0;
3694}
3695
3696int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
3697{
3698 struct task_struct *task;
3699
3700 down(&fs_info->uuid_tree_rescan_sem);
3701 task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
3702 if (IS_ERR(task)) {
3703 /* fs_info->update_uuid_tree_gen remains 0 in all error case */
3704 pr_warn("btrfs: failed to start uuid_rescan task\n");
3705 up(&fs_info->uuid_tree_rescan_sem);
3706 return PTR_ERR(task);
3707 }
3708
3709 return 0;
3710}
3711
3433/* 3712/*
3434 * shrinking a device means finding all of the device extents past 3713 * shrinking a device means finding all of the device extents past
3435 * the new size, and then following the back refs to the chunks. 3714 * the new size, and then following the back refs to the chunks.
@@ -4194,13 +4473,13 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
4194 * and exit, so return 1 so the callers don't try to use other copies. 4473 * and exit, so return 1 so the callers don't try to use other copies.
4195 */ 4474 */
4196 if (!em) { 4475 if (!em) {
4197 btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical, 4476 btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical,
4198 logical+len); 4477 logical+len);
4199 return 1; 4478 return 1;
4200 } 4479 }
4201 4480
4202 if (em->start > logical || em->start + em->len < logical) { 4481 if (em->start > logical || em->start + em->len < logical) {
4203 btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got " 4482 btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got "
4204 "%Lu-%Lu\n", logical, logical+len, em->start, 4483 "%Lu-%Lu\n", logical, logical+len, em->start,
4205 em->start + em->len); 4484 em->start + em->len);
4206 return 1; 4485 return 1;
@@ -4375,8 +4654,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4375 4654
4376 if (!em) { 4655 if (!em) {
4377 btrfs_crit(fs_info, "unable to find logical %llu len %llu", 4656 btrfs_crit(fs_info, "unable to find logical %llu len %llu",
4378 (unsigned long long)logical, 4657 logical, *length);
4379 (unsigned long long)*length);
4380 return -EINVAL; 4658 return -EINVAL;
4381 } 4659 }
4382 4660
@@ -4671,6 +4949,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4671 } 4949 }
4672 bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS); 4950 bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS);
4673 if (!bbio) { 4951 if (!bbio) {
4952 kfree(raid_map);
4674 ret = -ENOMEM; 4953 ret = -ENOMEM;
4675 goto out; 4954 goto out;
4676 } 4955 }
@@ -5246,9 +5525,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5246 5525
5247 if (map_length < length) { 5526 if (map_length < length) {
5248 btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu", 5527 btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu",
5249 (unsigned long long)logical, 5528 logical, length, map_length);
5250 (unsigned long long)length,
5251 (unsigned long long)map_length);
5252 BUG(); 5529 BUG();
5253 } 5530 }
5254 5531
@@ -5314,23 +5591,72 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
5314 struct btrfs_device *device; 5591 struct btrfs_device *device;
5315 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 5592 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
5316 5593
5317 device = kzalloc(sizeof(*device), GFP_NOFS); 5594 device = btrfs_alloc_device(NULL, &devid, dev_uuid);
5318 if (!device) 5595 if (IS_ERR(device))
5319 return NULL; 5596 return NULL;
5320 list_add(&device->dev_list, 5597
5321 &fs_devices->devices); 5598 list_add(&device->dev_list, &fs_devices->devices);
5322 device->devid = devid;
5323 device->work.func = pending_bios_fn;
5324 device->fs_devices = fs_devices; 5599 device->fs_devices = fs_devices;
5325 device->missing = 1;
5326 fs_devices->num_devices++; 5600 fs_devices->num_devices++;
5601
5602 device->missing = 1;
5327 fs_devices->missing_devices++; 5603 fs_devices->missing_devices++;
5328 spin_lock_init(&device->io_lock); 5604
5329 INIT_LIST_HEAD(&device->dev_alloc_list);
5330 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
5331 return device; 5605 return device;
5332} 5606}
5333 5607
5608/**
5609 * btrfs_alloc_device - allocate struct btrfs_device
5610 * @fs_info: used only for generating a new devid, can be NULL if
5611 * devid is provided (i.e. @devid != NULL).
5612 * @devid: a pointer to devid for this device. If NULL a new devid
5613 * is generated.
5614 * @uuid: a pointer to UUID for this device. If NULL a new UUID
5615 * is generated.
5616 *
5617 * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
5618 * on error. Returned struct is not linked onto any lists and can be
5619 * destroyed with kfree() right away.
5620 */
5621struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
5622 const u64 *devid,
5623 const u8 *uuid)
5624{
5625 struct btrfs_device *dev;
5626 u64 tmp;
5627
5628 if (!devid && !fs_info) {
5629 WARN_ON(1);
5630 return ERR_PTR(-EINVAL);
5631 }
5632
5633 dev = __alloc_device();
5634 if (IS_ERR(dev))
5635 return dev;
5636
5637 if (devid)
5638 tmp = *devid;
5639 else {
5640 int ret;
5641
5642 ret = find_next_devid(fs_info, &tmp);
5643 if (ret) {
5644 kfree(dev);
5645 return ERR_PTR(ret);
5646 }
5647 }
5648 dev->devid = tmp;
5649
5650 if (uuid)
5651 memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
5652 else
5653 generate_random_uuid(dev->uuid);
5654
5655 dev->work.func = pending_bios_fn;
5656
5657 return dev;
5658}
5659
5334static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, 5660static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
5335 struct extent_buffer *leaf, 5661 struct extent_buffer *leaf,
5336 struct btrfs_chunk *chunk) 5662 struct btrfs_chunk *chunk)
@@ -5437,7 +5763,7 @@ static void fill_device_from_item(struct extent_buffer *leaf,
5437 WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID); 5763 WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
5438 device->is_tgtdev_for_dev_replace = 0; 5764 device->is_tgtdev_for_dev_replace = 0;
5439 5765
5440 ptr = (unsigned long)btrfs_device_uuid(dev_item); 5766 ptr = btrfs_device_uuid(dev_item);
5441 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); 5767 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
5442} 5768}
5443 5769
@@ -5500,11 +5826,9 @@ static int read_one_dev(struct btrfs_root *root,
5500 u8 dev_uuid[BTRFS_UUID_SIZE]; 5826 u8 dev_uuid[BTRFS_UUID_SIZE];
5501 5827
5502 devid = btrfs_device_id(leaf, dev_item); 5828 devid = btrfs_device_id(leaf, dev_item);
5503 read_extent_buffer(leaf, dev_uuid, 5829 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
5504 (unsigned long)btrfs_device_uuid(dev_item),
5505 BTRFS_UUID_SIZE); 5830 BTRFS_UUID_SIZE);
5506 read_extent_buffer(leaf, fs_uuid, 5831 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
5507 (unsigned long)btrfs_device_fsid(dev_item),
5508 BTRFS_UUID_SIZE); 5832 BTRFS_UUID_SIZE);
5509 5833
5510 if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) { 5834 if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
@@ -5519,8 +5843,7 @@ static int read_one_dev(struct btrfs_root *root,
5519 return -EIO; 5843 return -EIO;
5520 5844
5521 if (!device) { 5845 if (!device) {
5522 btrfs_warn(root->fs_info, "devid %llu missing", 5846 btrfs_warn(root->fs_info, "devid %llu missing", devid);
5523 (unsigned long long)devid);
5524 device = add_missing_dev(root, devid, dev_uuid); 5847 device = add_missing_dev(root, devid, dev_uuid);
5525 if (!device) 5848 if (!device)
5526 return -ENOMEM; 5849 return -ENOMEM;
@@ -5644,14 +5967,15 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
5644 mutex_lock(&uuid_mutex); 5967 mutex_lock(&uuid_mutex);
5645 lock_chunks(root); 5968 lock_chunks(root);
5646 5969
5647 /* first we search for all of the device items, and then we 5970 /*
5648 * read in all of the chunk items. This way we can create chunk 5971 * Read all device items, and then all the chunk items. All
5649 * mappings that reference all of the devices that are afound 5972 * device items are found before any chunk item (their object id
5973 * is smaller than the lowest possible object id for a chunk
5974 * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
5650 */ 5975 */
5651 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 5976 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
5652 key.offset = 0; 5977 key.offset = 0;
5653 key.type = 0; 5978 key.type = 0;
5654again:
5655 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5979 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5656 if (ret < 0) 5980 if (ret < 0)
5657 goto error; 5981 goto error;
@@ -5667,17 +5991,13 @@ again:
5667 break; 5991 break;
5668 } 5992 }
5669 btrfs_item_key_to_cpu(leaf, &found_key, slot); 5993 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5670 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { 5994 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
5671 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID) 5995 struct btrfs_dev_item *dev_item;
5672 break; 5996 dev_item = btrfs_item_ptr(leaf, slot,
5673 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
5674 struct btrfs_dev_item *dev_item;
5675 dev_item = btrfs_item_ptr(leaf, slot,
5676 struct btrfs_dev_item); 5997 struct btrfs_dev_item);
5677 ret = read_one_dev(root, leaf, dev_item); 5998 ret = read_one_dev(root, leaf, dev_item);
5678 if (ret) 5999 if (ret)
5679 goto error; 6000 goto error;
5680 }
5681 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { 6001 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
5682 struct btrfs_chunk *chunk; 6002 struct btrfs_chunk *chunk;
5683 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); 6003 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@ -5687,11 +6007,6 @@ again:
5687 } 6007 }
5688 path->slots[0]++; 6008 path->slots[0]++;
5689 } 6009 }
5690 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
5691 key.objectid = 0;
5692 btrfs_release_path(path);
5693 goto again;
5694 }
5695 ret = 0; 6010 ret = 0;
5696error: 6011error:
5697 unlock_chunks(root); 6012 unlock_chunks(root);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 86705583480d..b72f540c8b29 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -152,6 +152,8 @@ struct btrfs_fs_devices {
152 int rotating; 152 int rotating;
153}; 153};
154 154
155#define BTRFS_BIO_INLINE_CSUM_SIZE 64
156
155/* 157/*
156 * we need the mirror number and stripe index to be passed around 158 * we need the mirror number and stripe index to be passed around
157 * the call chain while we are processing end_io (especially errors). 159 * the call chain while we are processing end_io (especially errors).
@@ -161,9 +163,14 @@ struct btrfs_fs_devices {
161 * we allocate are actually btrfs_io_bios. We'll cram as much of 163 * we allocate are actually btrfs_io_bios. We'll cram as much of
162 * struct btrfs_bio as we can into this over time. 164 * struct btrfs_bio as we can into this over time.
163 */ 165 */
166typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err);
164struct btrfs_io_bio { 167struct btrfs_io_bio {
165 unsigned long mirror_num; 168 unsigned long mirror_num;
166 unsigned long stripe_index; 169 unsigned long stripe_index;
170 u8 *csum;
171 u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
172 u8 *csum_allocated;
173 btrfs_io_bio_end_io_t *end_io;
167 struct bio bio; 174 struct bio bio;
168}; 175};
169 176
@@ -298,6 +305,9 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
298int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, 305int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
299 char *device_path, 306 char *device_path,
300 struct btrfs_device **device); 307 struct btrfs_device **device);
308struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
309 const u64 *devid,
310 const u8 *uuid);
301int btrfs_rm_device(struct btrfs_root *root, char *device_path); 311int btrfs_rm_device(struct btrfs_root *root, char *device_path);
302void btrfs_cleanup_fs_uuids(void); 312void btrfs_cleanup_fs_uuids(void);
303int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); 313int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
@@ -315,6 +325,8 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
315int btrfs_recover_balance(struct btrfs_fs_info *fs_info); 325int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
316int btrfs_pause_balance(struct btrfs_fs_info *fs_info); 326int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
317int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); 327int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
328int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
329int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info);
318int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 330int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
319int find_free_dev_extent(struct btrfs_trans_handle *trans, 331int find_free_dev_extent(struct btrfs_trans_handle *trans,
320 struct btrfs_device *device, u64 num_bytes, 332 struct btrfs_device *device, u64 num_bytes,
diff --git a/fs/cifs/AUTHORS b/fs/cifs/AUTHORS
deleted file mode 100644
index ea940b1db77b..000000000000
--- a/fs/cifs/AUTHORS
+++ /dev/null
@@ -1,55 +0,0 @@
1Original Author
2===============
3Steve French (sfrench@samba.org)
4
5The author wishes to express his appreciation and thanks to:
6Andrew Tridgell (Samba team) for his early suggestions about smb/cifs VFS
7improvements. Thanks to IBM for allowing me time and test resources to pursue
8this project, to Jim McDonough from IBM (and the Samba Team) for his help, to
9the IBM Linux JFS team for explaining many esoteric Linux filesystem features.
10Jeremy Allison of the Samba team has done invaluable work in adding the server
11side of the original CIFS Unix extensions and reviewing and implementing
12portions of the newer CIFS POSIX extensions into the Samba 3 file server. Thank
13Dave Boutcher of IBM Rochester (author of the OS/400 smb/cifs filesystem client)
14for proving years ago that very good smb/cifs clients could be done on Unix-like
15operating systems. Volker Lendecke, Andrew Tridgell, Urban Widmark, John
16Newbigin and others for their work on the Linux smbfs module. Thanks to
17the other members of the Storage Network Industry Association CIFS Technical
18Workgroup for their work specifying this highly complex protocol and finally
19thanks to the Samba team for their technical advice and encouragement.
20
21Patch Contributors
22------------------
23Zwane Mwaikambo
24Andi Kleen
25Amrut Joshi
26Shobhit Dayal
27Sergey Vlasov
28Richard Hughes
29Yury Umanets
30Mark Hamzy (for some of the early cifs IPv6 work)
31Domen Puncer
32Jesper Juhl (in particular for lots of whitespace/formatting cleanup)
33Vince Negri and Dave Stahl (for finding an important caching bug)
34Adrian Bunk (kcalloc cleanups)
35Miklos Szeredi
36Kazeon team for various fixes especially for 2.4 version.
37Asser Ferno (Change Notify support)
38Shaggy (Dave Kleikamp) for innumerable small fs suggestions and some good cleanup
39Gunter Kukkukk (testing and suggestions for support of old servers)
40Igor Mammedov (DFS support)
41Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code)
42
43Test case and Bug Report contributors
44-------------------------------------
45Thanks to those in the community who have submitted detailed bug reports
46and debug of problems they have found: Jochen Dolze, David Blaine,
47Rene Scharfe, Martin Josefsson, Alexander Wild, Anthony Liguori,
48Lars Muller, Urban Widmark, Massimiliano Ferrero, Howard Owen,
49Olaf Kirch, Kieron Briggs, Nick Millington and others. Also special
50mention to the Stanford Checker (SWAT) which pointed out many minor
51bugs in error paths. Valuable suggestions also have come from Al Viro
52and Dave Miller.
53
54And thanks to the IBM LTC and Power test teams and SuSE testers for
55finding multiple bugs during excellent stress test runs.
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
deleted file mode 100644
index bc0025cdd1c9..000000000000
--- a/fs/cifs/CHANGES
+++ /dev/null
@@ -1,1065 +0,0 @@
1Version 1.62
2------------
3Add sockopt=TCP_NODELAY mount option. EA (xattr) routines hardened
4to more strictly handle corrupt frames.
5
6Version 1.61
7------------
8Fix append problem to Samba servers (files opened with O_APPEND could
9have duplicated data). Fix oops in cifs_lookup. Workaround problem
10mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session.
11Disable use of server inode numbers when server only
12partially supports them (e.g. for one server querying inode numbers on
13FindFirst fails but QPathInfo queries works). Fix oops with dfs in
14cifs_put_smb_ses. Fix mmap to work on directio mounts (needed
15for OpenOffice when on forcedirectio mount e.g.)
16
17Version 1.60
18-------------
19Fix memory leak in reconnect. Fix oops in DFS mount error path.
20Set s_maxbytes to smaller (the max that vfs can handle) so that
21sendfile will now work over cifs mounts again. Add noforcegid
22and noforceuid mount parameters. Fix small mem leak when using
23ntlmv2. Fix 2nd mount to same server but with different port to
24be allowed (rather than reusing the 1st port) - only when the
25user explicitly overrides the port on the 2nd mount.
26
27Version 1.59
28------------
29Client uses server inode numbers (which are persistent) rather than
30client generated ones by default (mount option "serverino" turned
31on by default if server supports it). Add forceuid and forcegid
32mount options (so that when negotiating unix extensions specifying
33which uid mounted does not immediately force the server's reported
34uids to be overridden). Add support for scope mount parm. Improve
35hard link detection to use same inode for both. Do not set
36read-only dos attribute on directories (for chmod) since Windows
37explorer special cases this attribute bit for directories for
38a different purpose.
39
40Version 1.58
41------------
42Guard against buffer overruns in various UCS-2 to UTF-8 string conversions
43when the UTF-8 string is composed of unusually long (more than 4 byte) converted
44characters. Add support for mounting root of a share which redirects immediately
45to DFS target. Convert string conversion functions from Unicode to more
46accurately mark string length before allocating memory (which may help the
47rare cases where a UTF-8 string is much larger than the UCS2 string that
48we converted from). Fix endianness of the vcnum field used during
49session setup to distinguish multiple mounts to same server from different
50userids. Raw NTLMSSP fixed (it requires /proc/fs/cifs/experimental
51flag to be set to 2, and mount must enable krb5 to turn on extended security).
52Performance of file create to Samba improved (posix create on lookup
53removes 1 of 2 network requests sent on file create)
54
55Version 1.57
56------------
57Improve support for multiple security contexts to the same server. We
58used to use the same "vcnumber" for all connections which could cause
59the server to treat subsequent connections, especially those that
60are authenticated as guest, as reconnections, invalidating the earlier
61user's smb session. This fix allows cifs to mount multiple times to the
62same server with different userids without risking invalidating earlier
63established security contexts. fsync now sends SMB Flush operation
64to better ensure that we wait for server to write all of the data to
65server disk (not just write it over the network). Add new mount
66parameter to allow user to disable sending the (slow) SMB flush on
67fsync if desired (fsync still flushes all cached write data to the server).
68Posix file open support added (turned off after one attempt if server
69fails to support it properly, as with Samba server versions prior to 3.3.2)
70Fix "redzone overwritten" bug in cifs_put_tcon (CIFSTcon may allocate too
71little memory for the "nativeFileSystem" field returned by the server
72during mount). Endian convert inode numbers if necessary (makes it easier
73to compare inode numbers on network files from big endian systems).
74
75Version 1.56
76------------
77Add "forcemandatorylock" mount option to allow user to use mandatory
78rather than posix (advisory) byte range locks, even though server would
79support posix byte range locks. Fix query of root inode when prefixpath
80specified and user does not have access to query information about the
81top of the share. Fix problem in 2.6.28 resolving DFS paths to
82Samba servers (worked to Windows). Fix rmdir so that pending search
83(readdir) requests do not get invalid results which include the now
84removed directory. Fix oops in cifs_dfs_ref.c when prefixpath is not reachable
85when using DFS. Add better file create support to servers which support
86the CIFS POSIX protocol extensions (this adds support for new flags
87on create, and improves semantics for write of locked ranges).
88
89Version 1.55
90------------
91Various fixes to make delete of open files behavior more predictable
92(when delete of an open file fails we mark the file as "delete-on-close"
93in a way that more servers accept, but only if we can first rename the
94file to a temporary name). Add experimental support for more safely
95handling fcntl(F_SETLEASE). Convert cifs to using blocking tcp
96sends, and also let tcp autotune the socket send and receive buffers.
97This reduces the number of EAGAIN errors returned by TCP/IP in
98high stress workloads (and the number of retries on socket writes
99when sending large SMBWriteX requests). Fix case in which a portion of
100data can in some cases not get written to the file on the server before the
101file is closed. Fix DFS parsing to properly handle path consumed field,
102and to handle certain codepage conversions better. Fix mount and
103umount race that can cause oops in mount or umount or reconnect.
104
105Version 1.54
106------------
107Fix premature write failure on congested networks (we would give up
108on EAGAIN from the socket too quickly on large writes).
109Cifs_mkdir and cifs_create now respect the setgid bit on parent dir.
110Fix endian problems in acl (mode from/to cifs acl) on bigendian
111architectures. Fix problems with preserving timestamps on copying open
112files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit
113on parent directory when server supports Unix Extensions but not POSIX
114create. Update cifs.upcall version to handle new Kerberos sec flags
115(this requires update of cifs.upcall program from Samba). Fix memory leak
116on dns_upcall (resolving DFS referralls). Fix plain text password
117authentication (requires setting SecurityFlags to 0x30030 to enable
118lanman and plain text though). Fix writes to be at correct offset when
119file is open with O_APPEND and file is on a directio (forcediretio) mount.
120Fix bug in rewinding readdir directory searches. Add nodfs mount option.
121
122Version 1.53
123------------
124DFS support added (Microsoft Distributed File System client support needed
125for referrals which enable a hierarchical name space among servers).
126Disable temporary caching of mode bits to servers which do not support
127storing of mode (e.g. Windows servers, when client mounts without cifsacl
128mount option) and add new "dynperm" mount option to enable temporary caching
129of mode (enable old behavior). Fix hang on mount caused when server crashes
130tcp session during negotiate protocol.
131
132Version 1.52
133------------
134Fix oops on second mount to server when null auth is used.
135Enable experimental Kerberos support. Return writebehind errors on flush
136and sync so that events like out of disk space get reported properly on
137cached files. Fix setxattr failure to certain Samba versions. Fix mount
138of second share to disconnected server session (autoreconnect on this).
139Add ability to modify cifs acls for handling chmod (when mounted with
140cifsacl flag). Fix prefixpath path separator so we can handle mounts
141with prefixpaths longer than one directory (one path component) when
142mounted to Windows servers. Fix slow file open when cifsacl
143enabled. Fix memory leak in FindNext when the SMB call returns -EBADF.
144
145
146Version 1.51
147------------
148Fix memory leak in statfs when mounted to very old servers (e.g.
149Windows 9x). Add new feature "POSIX open" which allows servers
150which support the current POSIX Extensions to provide better semantics
151(e.g. delete for open files opened with posix open). Take into
152account umask on posix mkdir not just older style mkdir. Add
153ability to mount to IPC$ share (which allows CIFS named pipes to be
154opened, read and written as if they were files). When 1st tree
155connect fails (e.g. due to signing negotiation failure) fix
156leak that causes cifsd not to stop and rmmod to fail to cleanup
157cifs_request_buffers pool. Fix problem with POSIX Open/Mkdir on
158bigendian architectures. Fix possible memory corruption when
159EAGAIN returned on kern_recvmsg. Return better error if server
160requires packet signing but client has disabled it. When mounted
161with cifsacl mount option - mode bits are approximated based
162on the contents of the ACL of the file or directory. When cifs
163mount helper is missing convert make sure that UNC name
164has backslash (not forward slash) between ip address of server
165and the share name.
166
167Version 1.50
168------------
169Fix NTLMv2 signing. NFS server mounted over cifs works (if cifs mount is
170done with "serverino" mount option). Add support for POSIX Unlink
171(helps with certain sharing violation cases when server such as
172Samba supports newer POSIX CIFS Protocol Extensions). Add "nounix"
173mount option to allow disabling the CIFS Unix Extensions for just
174that mount. Fix hang on spinlock in find_writable_file (race when
175reopening file after session crash). Byte range unlock request to
176windows server could unlock more bytes (on server copy of file)
177than intended if start of unlock request is well before start of
178a previous byte range lock that we issued.
179
180Version 1.49
181------------
182IPv6 support. Enable ipv6 addresses to be passed on mount (put the ipv6
183address after the "ip=" mount option, at least until mount.cifs is fixed to
184handle DNS host to ipv6 name translation). Accept override of uid or gid
185on mount even when Unix Extensions are negotiated (it used to be ignored
186when Unix Extensions were ignored). This allows users to override the
187default uid and gid for files when they are certain that the uids or
188gids on the server do not match those of the client. Make "sec=none"
189mount override username (so that null user connection is attempted)
190to match what documentation said. Support for very large reads, over 127K,
191available to some newer servers (such as Samba 3.0.26 and later but
192note that it also requires setting CIFSMaxBufSize at module install
193time to a larger value which may hurt performance in some cases).
194Make sign option force signing (or fail if server does not support it).
195
196Version 1.48
197------------
198Fix mtime bouncing around from local idea of last write times to remote time.
199Fix hang (in i_size_read) when simultaneous size update of same remote file
200on smp system corrupts sequence number. Do not reread unnecessarily partial page
201(which we are about to overwrite anyway) when writing out file opened rw.
202When DOS attribute of file on non-Unix server's file changes on the server side
203from read-only back to read-write, reflect this change in default file mode
204(we had been leaving a file's mode read-only until the inode were reloaded).
205Allow setting of attribute back to ATTR_NORMAL (removing readonly dos attribute
206when archive dos attribute not set and we are changing mode back to writeable
207on server which does not support the Unix Extensions). Remove read only dos
208attribute on chmod when adding any write permission (ie on any of
209user/group/other (not all of user/group/other ie 0222) when
210mounted to windows. Add support for POSIX MkDir (slight performance
211enhancement and eliminates the network race between the mkdir and set
212path info of the mode).
213
214
215Version 1.47
216------------
217Fix oops in list_del during mount caused by unaligned string.
218Fix file corruption which could occur on some large file
219copies caused by writepages page i/o completion bug.
220Seek to SEEK_END forces check for update of file size for non-cached
221files. Allow file size to be updated on remote extend of locally open,
222non-cached file. Fix reconnect to newer Samba servers (or other servers
223which support the CIFS Unix/POSIX extensions) so that we again tell the
224server the Unix/POSIX cifs capabilities which we support (SetFSInfo).
225Add experimental support for new POSIX Open/Mkdir (which returns
226stat information on the open, and allows setting the mode).
227
228Version 1.46
229------------
230Support deep tree mounts. Better support OS/2, Win9x (DOS) time stamps.
231Allow null user to be specified on mount ("username="). Do not return
232EINVAL on readdir when filldir fails due to overwritten blocksize
233(fixes FC problem). Return error in rename 2nd attempt retry (ie report
234if rename by handle also fails, after rename by path fails, we were
235not reporting whether the retry worked or not). Fix NTLMv2 to
236work to Windows servers (mount with option "sec=ntlmv2").
237
238Version 1.45
239------------
240Do not time out lockw calls when using posix extensions. Do not
241time out requests if server still responding reasonably fast
242on requests on other threads. Improve POSIX locking emulation,
243(lock cancel now works, and unlock of merged range works even
244to Windows servers now). Fix oops on mount to lanman servers
245(win9x, os/2 etc.) when null password. Do not send listxattr
246(SMB to query all EAs) if nouser_xattr specified. Fix SE Linux
247problem (instantiate inodes/dentries in right order for readdir).
248
249Version 1.44
250------------
251Rewritten sessionsetup support, including support for legacy SMB
252session setup needed for OS/2 and older servers such as Windows 95 and 98.
253Fix oops on ls to OS/2 servers. Add support for level 1 FindFirst
254so we can do search (ls etc.) to OS/2. Do not send NTCreateX
255or recent levels of FindFirst unless server says it supports NT SMBs
256(instead use legacy equivalents from LANMAN dialect). Fix to allow
257NTLMv2 authentication support (now can use stronger password hashing
258on mount if corresponding /proc/fs/cifs/SecurityFlags is set (0x4004).
259Allow override of global cifs security flags on mount via "sec=" option(s).
260
261Version 1.43
262------------
263POSIX locking to servers which support CIFS POSIX Extensions
264(disabled by default controlled by proc/fs/cifs/Experimental).
265Handle conversion of long share names (especially Asian languages)
266to Unicode during mount. Fix memory leak in sess struct on reconnect.
267Fix rare oops after acpi suspend. Fix O_TRUNC opens to overwrite on
268cifs open which helps rare case when setpathinfo fails or server does
269not support it.
270
271Version 1.42
272------------
273Fix slow oplock break when mounted to different servers at the same time and
274the tids match and we try to find matching fid on wrong server. Fix read
275looping when signing required by server (2.6.16 kernel only). Fix readdir
276vs. rename race which could cause each to hang. Return . and .. even
277if server does not. Allow searches to skip first three entries and
278begin at any location. Fix oops in find_writeable_file.
279
280Version 1.41
281------------
282Fix NTLMv2 security (can be enabled in /proc/fs/cifs) so customers can
283configure stronger authentication. Fix sfu symlinks so they can
284be followed (not just recognized). Fix wraparound of bcc on
285read responses when buffer size over 64K and also fix wrap of
286max smb buffer size when CIFSMaxBufSize over 64K. Fix oops in
287cifs_user_read and cifs_readpages (when EAGAIN on send of smb
288on socket is returned over and over). Add POSIX (advisory) byte range
289locking support (requires server with newest CIFS UNIX Extensions
290to the protocol implemented). Slow down negprot slightly in port 139
291RFC1001 case to give session_init time on buggy servers.
292
293Version 1.40
294------------
295Use fsuid (fsgid) more consistently instead of uid (gid). Improve performance
296of readpages by eliminating one extra memcpy. Allow update of file size
297from remote server even if file is open for write as long as mount is
298directio. Recognize share mode security and send NTLM encrypted password
299on tree connect if share mode negotiated.
300
301Version 1.39
302------------
303Defer close of a file handle slightly if pending writes depend on that handle
304(this reduces the EBADF bad file handle errors that can be logged under heavy
305stress on writes). Modify cifs Kconfig options to expose CONFIG_CIFS_STATS2
306Fix SFU style symlinks and mknod needed for servers which do not support the
307CIFS Unix Extensions. Fix setfacl/getfacl on bigendian. Timeout negative
308dentries so files that the client sees as deleted but that later get created
309on the server will be recognized. Add client side permission check on setattr.
310Timeout stuck requests better (where server has never responded or sent corrupt
311responses)
312
313Version 1.38
314------------
315Fix tcp socket retransmission timeouts (e.g. on ENOSPACE from the socket)
316to be smaller at first (but increasing) so large write performance performance
317over GigE is better. Do not hang thread on illegal byte range lock response
318from Windows (Windows can send an RFC1001 size which does not match smb size) by
319allowing an SMBs TCP length to be up to a few bytes longer than it should be.
320wsize and rsize can now be larger than negotiated buffer size if server
321supports large readx/writex, even when directio mount flag not specified.
322Write size will in many cases now be 16K instead of 4K which greatly helps
323file copy performance on lightly loaded networks. Fix oops in dnotify
324when experimental config flag enabled. Make cifsFYI more granular.
325
326Version 1.37
327------------
328Fix readdir caching when unlink removes file in current search buffer,
329and this is followed by a rewind search to just before the deleted entry.
330Do not attempt to set ctime unless atime and/or mtime change requested
331(most servers throw it away anyway). Fix length check of received smbs
332to be more accurate. Fix big endian problem with mapchars mount option,
333and with a field returned by statfs.
334
335Version 1.36
336------------
337Add support for mounting to older pre-CIFS servers such as Windows9x and ME.
338For these older servers, add option for passing netbios name of server in
339on mount (servernetbiosname). Add suspend support for power management, to
340avoid cifsd thread preventing software suspend from working.
341Add mount option for disabling the default behavior of sending byte range lock
342requests to the server (necessary for certain applications which break with
343mandatory lock behavior such as Evolution), and also mount option for
344requesting case insensitive matching for path based requests (requesting
345case sensitive is the default).
346
347Version 1.35
348------------
349Add writepage performance improvements. Fix path name conversions
350for long filenames on mounts which were done with "mapchars" mount option
351specified. Ensure multiplex ids do not collide. Fix case in which
352rmmod can oops if done soon after last unmount. Fix truncated
353search (readdir) output when resume filename was a long filename.
354Fix filename conversion when mapchars mount option was specified and
355filename was a long filename.
356
357Version 1.34
358------------
359Fix error mapping of the TOO_MANY_LINKS (hardlinks) case.
360Do not oops if root user kills cifs oplock kernel thread or
361kills the cifsd thread (NB: killing the cifs kernel threads is not
362recommended, unmount and rmmod cifs will kill them when they are
363no longer needed). Fix readdir to ASCII servers (ie older servers
364which do not support Unicode) and also require asterisk.
365Fix out of memory case in which data could be written one page
366off in the page cache.
367
368Version 1.33
369------------
370Fix caching problem, in which readdir of directory containing a file
371which was cached could cause the file's time stamp to be updated
372without invalidating the readahead data (so we could get stale
373file data on the client for that file even as the server copy changed).
374Cleanup response processing so cifsd can not loop when abnormally
375terminated.
376
377
378Version 1.32
379------------
380Fix oops in ls when Transact2 FindFirst (or FindNext) returns more than one
381transact response for an SMB request and search entry split across two frames.
382Add support for lsattr (getting ext2/ext3/reiserfs attr flags from the server)
383as new protocol extensions. Do not send Get/Set calls for POSIX ACLs
384unless server explicitly claims to support them in CIFS Unix extensions
385POSIX ACL capability bit. Fix packet signing when multiuser mounting with
386different users from the same client to the same server. Fix oops in
387cifs_close. Add mount option for remapping reserved characters in
388filenames (also allow recognizing files with created by SFU which have any
389of these seven reserved characters, except backslash, to be recognized).
390Fix invalid transact2 message (we were sometimes trying to interpret
391oplock breaks as SMB responses). Add ioctl for checking that the
392current uid matches the uid of the mounter (needed by umount.cifs).
393Reduce the number of large buffer allocations in cifs response processing
394(significantly reduces memory pressure under heavy stress with multiple
395processes accessing the same server at the same time).
396
397Version 1.31
398------------
399Fix updates of DOS attributes and time fields so that files on NT4 servers
400do not get marked delete on close. Display sizes of cifs buffer pools in
401cifs stats. Fix oops in unmount when cifsd thread being killed by
402shutdown. Add generic readv/writev and aio support. Report inode numbers
403consistently in readdir and lookup (when serverino mount option is
404specified use the inode number that the server reports - for both lookup
405and readdir, otherwise by default the locally generated inode number is used
406for inodes created in either path since servers are not always able to
407provide unique inode numbers when exporting multiple volumes from under one
408sharename).
409
410Version 1.30
411------------
412Allow new nouser_xattr mount parm to disable xattr support for user namespace.
413Do not flag user_xattr mount parm in dmesg. Retry failures setting file time
414(mostly affects NT4 servers) by retry with handle based network operation.
415Add new POSIX Query FS Info for returning statfs info more accurately.
416Handle passwords with multiple commas in them.
417
418Version 1.29
419------------
420Fix default mode in sysfs of cifs module parms. Remove old readdir routine.
421Fix capabilities flags for large readx so as to allow reads larger than 64K.
422
423Version 1.28
424------------
425Add module init parm for large SMB buffer size (to allow it to be changed
426from its default of 16K) which is especially useful for large file copy
427when mounting with the directio mount option. Fix oops after
428returning from mount when experimental ExtendedSecurity enabled and
429SpnegoNegotiated returning invalid error. Fix case to retry better when
430peek returns from 1 to 3 bytes on socket which should have more data.
431Fixed path based calls (such as cifs lookup) to handle path names
432longer than 530 (now can handle PATH_MAX). Fix pass through authentication
433from Samba server to DC (Samba required dummy LM password).
434
435Version 1.27
436------------
437Turn off DNOTIFY (directory change notification support) by default
438(unless built with the experimental flag) to fix hang with KDE
439file browser. Fix DNOTIFY flag mappings. Fix hang (in wait_event
440waiting on an SMB response) in SendReceive when session dies but
441reconnects quickly from another task. Add module init parms for
442minimum number of large and small network buffers in the buffer pools,
443and for the maximum number of simultaneous requests.
444
445Version 1.26
446------------
447Add setfacl support to allow setting of ACLs remotely to Samba 3.10 and later
448and other POSIX CIFS compliant servers. Fix error mapping for getfacl
449to EOPNOTSUPP when server does not support posix acls on the wire. Fix
450improperly zeroed buffer in CIFS Unix extensions set times call.
451
452Version 1.25
453------------
454Fix internationalization problem in cifs readdir with filenames that map to
455longer UTF-8 strings than the string on the wire was in Unicode. Add workaround
456for readdir to netapp servers. Fix search rewind (seek into readdir to return
457non-consecutive entries). Do not do readdir when server negotiates
458buffer size to small to fit filename. Add support for reading POSIX ACLs from
459the server (add also acl and noacl mount options).
460
461Version 1.24
462------------
463Optionally allow using server side inode numbers, rather than client generated
464ones by specifying mount option "serverino" - this is required for some apps
465to work which double check hardlinked files and have persistent inode numbers.
466
467Version 1.23
468------------
469Multiple bigendian fixes. On little endian systems (for reconnect after
470network failure) fix tcp session reconnect code so we do not try first
471to reconnect on reverse of port 445. Treat reparse points (NTFS junctions)
472as directories rather than symlinks because we can do follow link on them.
473
474Version 1.22
475------------
476Add config option to enable XATTR (extended attribute) support, mapping
477xattr names in the "user." namespace space to SMB/CIFS EAs. Lots of
478minor fixes pointed out by the Stanford SWAT checker (mostly missing
479or out of order NULL pointer checks in little used error paths).
480
481Version 1.21
482------------
483Add new mount parm to control whether mode check (generic_permission) is done
484on the client. If Unix extensions are enabled and the uids on the client
485and server do not match, client permission checks are meaningless on
486server uids that do not exist on the client (this does not affect the
487normal ACL check which occurs on the server). Fix default uid
488on mknod to match create and mkdir. Add optional mount parm to allow
489override of the default uid behavior (in which the server sets the uid
490and gid of newly created files). Normally for network filesystem mounts
491user want the server to set the uid/gid on newly created files (rather than
492using uid of the client processes you would in a local filesystem).
493
494Version 1.20
495------------
496Make transaction counts more consistent. Merge /proc/fs/cifs/SimultaneousOps
497info into /proc/fs/cifs/DebugData. Fix oops in rare oops in readdir
498(in build_wildcard_path_from_dentry). Fix mknod to pass type field
499(block/char/fifo) properly. Remove spurious mount warning log entry when
500credentials passed as mount argument. Set major/minor device number in
501inode for block and char devices when unix extensions enabled.
502
503Version 1.19
504------------
505Fix /proc/fs/cifs/Stats and DebugData display to handle larger
506amounts of return data. Properly limit requests to MAX_REQ (50
507is the usual maximum active multiplex SMB/CIFS requests per server).
508Do not kill cifsd (and thus hurt the other SMB session) when more than one
509session to the same server (but with different userids) exists and one
510of the two user's smb sessions is being removed while leaving the other.
511Do not loop reconnecting in cifsd demultiplex thread when admin
512kills the thread without going through unmount.
513
514Version 1.18
515------------
516Do not rename hardlinked files (since that should be a noop). Flush
517cached write behind data when reopening a file after session abend,
518except when already in write. Grab per socket sem during reconnect
519to avoid oops in sendmsg if overlapping with reconnect. Do not
520reset cached inode file size on readdir for files open for write on
521client.
522
523
524Version 1.17
525------------
526Update number of blocks in file so du command is happier (in Linux a fake
527blocksize of 512 is required for calculating number of blocks in inode).
528Fix prepare write of partial pages to read in data from server if possible.
529Fix race on tcpStatus field between unmount and reconnection code, causing
530cifsd process sometimes to hang around forever. Improve out of memory
531checks in cifs_filldir
532
533Version 1.16
534------------
535Fix incorrect file size in file handle based setattr on big endian hardware.
536Fix oops in build_path_from_dentry when out of memory. Add checks for invalid
537and closing file structs in writepage/partialpagewrite. Add statistics
538for each mounted share (new menuconfig option). Fix endianness problem in
539volume information displayed in /proc/fs/cifs/DebugData (only affects
540affects big endian architectures). Prevent renames while constructing
541path names for open, mkdir and rmdir.
542
543Version 1.15
544------------
545Change to mempools for alloc smb request buffers and multiplex structs
546to better handle low memory problems (and potential deadlocks).
547
548Version 1.14
549------------
550Fix incomplete listings of large directories on Samba servers when Unix
551extensions enabled. Fix oops when smb_buffer can not be allocated. Fix
552rename deadlock when writing out dirty pages at same time.
553
554Version 1.13
555------------
556Fix open of files in which O_CREATE can cause the mode to change in
557some cases. Fix case in which retry of write overlaps file close.
558Fix PPC64 build error. Reduce excessive stack usage in smb password
559hashing. Fix overwrite of Linux user's view of file mode to Windows servers.
560
561Version 1.12
562------------
563Fixes for large file copy, signal handling, socket retry, buffer
564allocation and low memory situations.
565
566Version 1.11
567------------
568Better port 139 support to Windows servers (RFC1001/RFC1002 Session_Initialize)
569also now allowing support for specifying client netbiosname. NT4 support added.
570
571Version 1.10
572------------
573Fix reconnection (and certain failed mounts) to properly wake up the
574blocked users thread so it does not seem hung (in some cases was blocked
575until the cifs receive timeout expired). Fix spurious error logging
576to kernel log when application with open network files killed.
577
578Version 1.09
579------------
580Fix /proc/fs module unload warning message (that could be logged
581to the kernel log). Fix intermittent failure in connectathon
582test7 (hardlink count not immediately refreshed in case in which
583inode metadata can be incorrectly kept cached when time near zero)
584
585Version 1.08
586------------
587Allow file_mode and dir_mode (specified at mount time) to be enforced
588locally (the server already enforced its own ACLs too) for servers
589that do not report the correct mode (do not support the
590CIFS Unix Extensions).
591
592Version 1.07
593------------
594Fix some small memory leaks in some unmount error paths. Fix major leak
595of cache pages in readpages causing multiple read oriented stress
596testcases (including fsx, and even large file copy) to fail over time.
597
598Version 1.06
599------------
600Send NTCreateX with ATTR_POSIX if Linux/Unix extensions negotiated with server.
601This allows files that differ only in case and improves performance of file
602creation and file open to such servers. Fix semaphore conflict which causes
603slow delete of open file to Samba (which unfortunately can cause an oplock
604break to self while vfs_unlink held i_sem) which can hang for 20 seconds.
605
606Version 1.05
607------------
608fixes to cifs_readpages for fsx test case
609
610Version 1.04
611------------
612Fix caching data integrity bug when extending file size especially when no
613oplock on file. Fix spurious logging of valid already parsed mount options
614that are parsed outside of the cifs vfs such as nosuid.
615
616
617Version 1.03
618------------
619Connect to server when port number override not specified, and tcp port
620unitialized. Reset search to restart at correct file when kernel routine
621filldir returns error during large directory searches (readdir).
622
623Version 1.02
624------------
625Fix caching problem when files opened by multiple clients in which
626page cache could contain stale data, and write through did
627not occur often enough while file was still open when read ahead
628(read oplock) not allowed. Treat "sep=" when first mount option
629as an override of comma as the default separator between mount
630options.
631
632Version 1.01
633------------
634Allow passwords longer than 16 bytes. Allow null password string.
635
636Version 1.00
637------------
638Gracefully clean up failed mounts when attempting to mount to servers such as
639Windows 98 that terminate tcp sessions during protocol negotiation. Handle
640embedded commas in mount parsing of passwords.
641
642Version 0.99
643------------
644Invalidate local inode cached pages on oplock break and when last file
645instance is closed so that the client does not continue using stale local
646copy rather than later modified server copy of file. Do not reconnect
647when server drops the tcp session prematurely before negotiate
648protocol response. Fix oops in reopen_file when dentry freed. Allow
649the support for CIFS Unix Extensions to be disabled via proc interface.
650
651Version 0.98
652------------
653Fix hang in commit_write during reconnection of open files under heavy load.
654Fix unload_nls oops in a mount failure path. Serialize writes to same socket
655which also fixes any possible races when cifs signatures are enabled in SMBs
656being sent out of signature sequence number order.
657
658Version 0.97
659------------
660Fix byte range locking bug (endian problem) causing bad offset and
661length.
662
663Version 0.96
664------------
665Fix oops (in send_sig) caused by CIFS unmount code trying to
666wake up the demultiplex thread after it had exited. Do not log
667error on harmless oplock release of closed handle.
668
669Version 0.95
670------------
671Fix unsafe global variable usage and password hash failure on gcc 3.3.1
672Fix problem reconnecting secondary mounts to same server after session
673failure. Fix invalid dentry - race in mkdir when directory gets created
674by another client between the lookup and mkdir.
675
676Version 0.94
677------------
678Fix to list processing in reopen_files. Fix reconnection when server hung
679but tcpip session still alive. Set proper timeout on socket read.
680
681Version 0.93
682------------
683Add missing mount options including iocharset. SMP fixes in write and open.
684Fix errors in reconnecting after TCP session failure. Fix module unloading
685of default nls codepage
686
687Version 0.92
688------------
689Active smb transactions should never go negative (fix double FreeXid). Fix
690list processing in file routines. Check return code on kmalloc in open.
691Fix spinlock usage for SMP.
692
693Version 0.91
694------------
695Fix oops in reopen_files when invalid dentry. drop dentry on server rename
696and on revalidate errors. Fix cases where pid is now tgid. Fix return code
697on create hard link when server does not support them.
698
699Version 0.90
700------------
701Fix scheduling while atomic error in getting inode info on newly created file.
702Fix truncate of existing files opened with O_CREAT but not O_TRUNC set.
703
704Version 0.89
705------------
706Fix oops on write to dead tcp session. Remove error log write for case when file open
707O_CREAT but not O_EXCL
708
709Version 0.88
710------------
711Fix non-POSIX behavior on rename of open file and delete of open file by taking
712advantage of trans2 SetFileInfo rename facility if available on target server.
713Retry on ENOSPC and EAGAIN socket errors.
714
715Version 0.87
716------------
717Fix oops on big endian readdir. Set blksize to be even power of two (2**blkbits) to fix
718allocation size miscalculation. After oplock token lost do not read through
719cache.
720
721Version 0.86
722------------
723Fix oops on empty file readahead. Fix for file size handling for locally cached files.
724
725Version 0.85
726------------
727Fix oops in mkdir when server fails to return inode info. Fix oops in reopen_files
728during auto reconnection to server after server recovered from failure.
729
730Version 0.84
731------------
732Finish support for Linux 2.5 open/create changes, which removes the
733redundant NTCreate/QPathInfo/close that was sent during file create.
734Enable oplock by default. Enable packet signing by default (needed to
735access many recent Windows servers)
736
737Version 0.83
738------------
739Fix oops when mounting to long server names caused by inverted parms to kmalloc.
740Fix MultiuserMount (/proc/fs/cifs configuration setting) so that when enabled
741we will choose a cifs user session (smb uid) that better matches the local
742uid if a) the mount uid does not match the current uid and b) we have another
743session to the same server (ip address) for a different mount which
744matches the current local uid.
745
746Version 0.82
747------------
748Add support for mknod of block or character devices. Fix oplock
749code (distributed caching) to properly send response to oplock
750break from server.
751
752Version 0.81
753------------
754Finish up CIFS packet digital signing for the default
755NTLM security case. This should help Windows 2003
756network interoperability since it is common for
757packet signing to be required now. Fix statfs (stat -f)
758which recently started returning errors due to
759invalid value (-1 instead of 0) being set in the
760struct kstatfs f_ffiles field.
761
762Version 0.80
763-----------
764Fix oops on stopping oplock thread when removing cifs when
765built as module.
766
767Version 0.79
768------------
769Fix mount options for ro (readonly), uid, gid and file and directory mode.
770
771Version 0.78
772------------
773Fix errors displayed on failed mounts to be more understandable.
774Fixed various incorrect or misleading smb to posix error code mappings.
775
776Version 0.77
777------------
778Fix display of NTFS DFS junctions to display as symlinks.
779They are the network equivalent. Fix oops in
780cifs_partialpagewrite caused by missing spinlock protection
781of openfile linked list. Allow writebehind caching errors to
782be returned to the application at file close.
783
784Version 0.76
785------------
786Clean up options displayed in /proc/mounts by show_options to
787be more consistent with other filesystems.
788
789Version 0.75
790------------
791Fix delete of readonly file to Windows servers. Reflect
792presence or absence of read only dos attribute in mode
793bits for servers that do not support CIFS Unix extensions.
794Fix shortened results on readdir of large directories to
795servers supporting CIFS Unix extensions (caused by
796incorrect resume key).
797
798Version 0.74
799------------
800Fix truncate bug (set file size) that could cause hangs e.g. running fsx
801
802Version 0.73
803------------
804unload nls if mount fails.
805
806Version 0.72
807------------
808Add resume key support to search (readdir) code to workaround
809Windows bug. Add /proc/fs/cifs/LookupCacheEnable which
810allows disabling caching of attribute information for
811lookups.
812
813Version 0.71
814------------
815Add more oplock handling (distributed caching code). Remove
816dead code. Remove excessive stack space utilization from
817symlink routines.
818
819Version 0.70
820------------
821Fix oops in get dfs referral (triggered when null path sent in to
822mount). Add support for overriding rsize at mount time.
823
824Version 0.69
825------------
826Fix buffer overrun in readdir which caused intermittent kernel oopses.
827Fix writepage code to release kmap on write data. Allow "-ip=" new
828mount option to be passed in on parameter distinct from the first part
829(server name portion of) the UNC name. Allow override of the
830tcp port of the target server via new mount option "-port="
831
832Version 0.68
833------------
834Fix search handle leak on rewind. Fix setuid and gid so that they are
835reflected in the local inode immediately. Cleanup of whitespace
836to make 2.4 and 2.5 versions more consistent.
837
838
839Version 0.67
840------------
841Fix signal sending so that captive thread (cifsd) exits on umount
842(which was causing the warning in kmem_cache_free of the request buffers
843at rmmod time). This had broken as a sideeffect of the recent global
844kernel change to daemonize. Fix memory leak in readdir code which
845showed up in "ls -R" (and applications that did search rewinding).
846
847Version 0.66
848------------
849Reconnect tids and fids after session reconnection (still do not
850reconnect byte range locks though). Fix problem caching
851lookup information for directory inodes, improving performance,
852especially in deep directory trees. Fix various build warnings.
853
854Version 0.65
855------------
856Finish fixes to commit write for caching/readahead consistency. fsx
857now works to Samba servers. Fix oops caused when readahead
858was interrupted by a signal.
859
860Version 0.64
861------------
862Fix data corruption (in partial page after truncate) that caused fsx to
863fail to Windows servers. Cleaned up some extraneous error logging in
864common error paths. Add generic sendfile support.
865
866Version 0.63
867------------
868Fix memory leak in AllocMidQEntry.
869Finish reconnection logic, so connection with server can be dropped
870(or server rebooted) and the cifs client will reconnect.
871
872Version 0.62
873------------
874Fix temporary socket leak when bad userid or password specified
875(or other SMBSessSetup failure). Increase maximum buffer size to slightly
876over 16K to allow negotiation of up to Samba and Windows server default read
877sizes. Add support for readpages
878
879Version 0.61
880------------
881Fix oops when username not passed in on mount. Extensive fixes and improvements
882to error logging (strip redundant newlines, change debug macros to ensure newline
883passed in and to be more consistent). Fix writepage wrong file handle problem,
884a readonly file handle could be incorrectly used to attempt to write out
885file updates through the page cache to multiply open files. This could cause
886the iozone benchmark to fail on the fwrite test. Fix bug mounting two different
887shares to the same Windows server when using different usernames
888(doing this to Samba servers worked but Windows was rejecting it) - now it is
889possible to use different userids when connecting to the same server from a
890Linux client. Fix oops when treeDisconnect called during unmount on
891previously freed socket.
892
893Version 0.60
894------------
895Fix oops in readpages caused by not setting address space operations in inode in
896rare code path.
897
898Version 0.59
899------------
900Includes support for deleting of open files and renaming over existing files (per POSIX
901requirement). Add readlink support for Windows junction points (directory symlinks).
902
903Version 0.58
904------------
905Changed read and write to go through pagecache. Added additional address space operations.
906Memory mapped operations now working.
907
908Version 0.57
909------------
910Added writepage code for additional memory mapping support. Fixed leak in xids causing
911the simultaneous operations counter (/proc/fs/cifs/SimultaneousOps) to increase on
912every stat call. Additional formatting cleanup.
913
914Version 0.56
915------------
916Fix bigendian bug in order of time conversion. Merge 2.5 to 2.4 version. Formatting cleanup.
917
918Version 0.55
919------------
920Fixes from Zwane Mwaikambo for adding missing return code checking in a few places.
921Also included a modified version of his fix to protect global list manipulation of
922the smb session and tree connection and mid related global variables.
923
924Version 0.54
925------------
926Fix problem with captive thread hanging around at unmount time. Adjust to 2.5.42-pre
927changes to superblock layout. Remove wasteful allocation of smb buffers (now the send
928buffer is reused for responses). Add more oplock handling. Additional minor cleanup.
929
930Version 0.53
931------------
932More stylistic updates to better match kernel style. Add additional statistics
933for filesystem which can be viewed via /proc/fs/cifs. Add more pieces of NTLMv2
934and CIFS Packet Signing enablement.
935
936Version 0.52
937------------
938Replace call to sleep_on with safer wait_on_event.
939Make stylistic changes to better match kernel style recommendations.
940Remove most typedef usage (except for the PDUs themselves).
941
942Version 0.51
943------------
944Update mount so the -unc mount option is no longer required (the ip address can be specified
945in a UNC style device name. Implementation of readpage/writepage started.
946
947Version 0.50
948------------
949Fix intermittent problem with incorrect smb header checking on badly
950fragmented tcp responses
951
952Version 0.49
953------------
954Fixes to setting of allocation size and file size.
955
956Version 0.48
957------------
958Various 2.5.38 fixes. Now works on 2.5.38
959
960Version 0.47
961------------
962Prepare for 2.5 kernel merge. Remove ifdefs.
963
964Version 0.46
965------------
966Socket buffer management fixes. Fix dual free.
967
968Version 0.45
969------------
970Various big endian fixes for hardlinks and symlinks and also for dfs.
971
972Version 0.44
973------------
974Various big endian fixes for servers with Unix extensions such as Samba
975
976Version 0.43
977------------
978Various FindNext fixes for incorrect filenames on large directory searches on big endian
979clients. basic posix file i/o tests now work on big endian machines, not just le
980
981Version 0.42
982------------
983SessionSetup and NegotiateProtocol now work from Big Endian machines.
984Various Big Endian fixes found during testing on the Linux on 390. Various fixes for compatibility with older
985versions of 2.4 kernel (now builds and works again on kernels at least as early as 2.4.7).
986
987Version 0.41
988------------
989Various minor fixes for Connectathon Posix "basic" file i/o test suite. Directory caching fixed so hardlinked
990files now return the correct number of links on fstat as they are repeatedly linked and unlinked.
991
992Version 0.40
993------------
994Implemented "Raw" (i.e. not encapsulated in SPNEGO) NTLMSSP (i.e. the Security Provider Interface used to negotiate
995session advanced session authentication). Raw NTLMSSP is preferred by Windows 2000 Professional and Windows XP.
996Began implementing support for SPNEGO encapsulation of NTLMSSP based session authentication blobs
997(which is the mechanism preferred by Windows 2000 server in the absence of Kerberos).
998
999Version 0.38
1000------------
1001Introduced optional mount helper utility mount.cifs and made coreq changes to cifs vfs to enable
1002it. Fixed a few bugs in the DFS code (e.g. bcc two bytes too short and incorrect uid in PDU).
1003
1004Version 0.37
1005------------
1006Rewrote much of connection and mount/unmount logic to handle bugs with
1007multiple uses to same share, multiple users to same server etc.
1008
1009Version 0.36
1010------------
1011Fixed major problem with dentry corruption (missing call to dput)
1012
1013Version 0.35
1014------------
1015Rewrite of readdir code to fix bug. Various fixes for bigendian machines.
1016Begin adding oplock support. Multiusermount and oplockEnabled flags added to /proc/fs/cifs
1017although corresponding function not fully implemented in the vfs yet
1018
1019Version 0.34
1020------------
1021Fixed dentry caching bug, misc. cleanup
1022
1023Version 0.33
1024------------
1025Fixed 2.5 support to handle build and configure changes as well as misc. 2.5 changes. Now can build
1026on current 2.5 beta version (2.5.24) of the Linux kernel as well as on 2.4 Linux kernels.
1027Support for STATUS codes (newer 32 bit NT error codes) added. DFS support begun to be added.
1028
1029Version 0.32
1030------------
1031Unix extensions (symlink, readlink, hardlink, chmod and some chgrp and chown) implemented
1032and tested against Samba 2.2.5
1033
1034
1035Version 0.31
1036------------
10371) Fixed lockrange to be correct (it was one byte too short)
1038
10392) Fixed GETLK (i.e. the fcntl call to test a range of bytes in a file to see if locked) to correctly
1040show range as locked when there is a conflict with an existing lock.
1041
10423) default file perms are now 2767 (indicating support for mandatory locks) instead of 777 for directories
1043in most cases. Eventually will offer optional ability to query server for the correct perms.
1044
10453) Fixed eventual trap when mounting twice to different shares on the same server when the first succeeded
1046but the second one was invalid and failed (the second one was incorrectly disconnecting the tcp and smb
1047session)
1048
10494) Fixed error logging of valid mount options
1050
10515) Removed logging of password field.
1052
10536) Moved negotiate, treeDisconnect and uloggoffX (only tConx and SessSetup remain in connect.c) to cifssmb.c
1054and cleaned them up and made them more consistent with other cifs functions.
1055
10567) Server support for Unix extensions is now fully detected and FindFirst is implemented both ways
1057(with or without Unix extensions) but FindNext and QueryPathInfo with the Unix extensions are not completed,
1058nor is the symlink support using the Unix extensions
1059
10608) Started adding the readlink and follow_link code
1061
1062Version 0.3
1063-----------
1064Initial drop
1065
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index aa0d68b086eb..1964d212ab08 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_CIFS) += cifs.o
6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ 6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
7 link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \ 7 link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ 8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
9 readdir.o ioctl.o sess.o export.o smb1ops.o 9 readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o
10 10
11cifs-$(CONFIG_CIFS_ACL) += cifsacl.o 11cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
12 12
diff --git a/fs/cifs/README b/fs/cifs/README
deleted file mode 100644
index 2d5622f60e11..000000000000
--- a/fs/cifs/README
+++ /dev/null
@@ -1,753 +0,0 @@
1The CIFS VFS support for Linux supports many advanced network filesystem
2features such as hierarchical dfs like namespace, hardlinks, locking and more.
3It was designed to comply with the SNIA CIFS Technical Reference (which
4supersedes the 1992 X/Open SMB Standard) as well as to perform best practice
5practical interoperability with Windows 2000, Windows XP, Samba and equivalent
6servers. This code was developed in participation with the Protocol Freedom
7Information Foundation.
8
9Please see
10 http://protocolfreedom.org/ and
11 http://samba.org/samba/PFIF/
12for more details.
13
14
15For questions or bug reports please contact:
16 sfrench@samba.org (sfrench@us.ibm.com)
17
18Build instructions:
19==================
20For Linux 2.4:
211) Get the kernel source (e.g.from http://www.kernel.org)
22and download the cifs vfs source (see the project page
23at http://us1.samba.org/samba/Linux_CIFS_client.html)
24and change directory into the top of the kernel directory
25then patch the kernel (e.g. "patch -p1 < cifs_24.patch")
26to add the cifs vfs to your kernel configure options if
27it has not already been added (e.g. current SuSE and UL
28users do not need to apply the cifs_24.patch since the cifs vfs is
29already in the kernel configure menu) and then
30mkdir linux/fs/cifs and then copy the current cifs vfs files from
31the cifs download to your kernel build directory e.g.
32
33 cp <cifs_download_dir>/fs/cifs/* to <kernel_download_dir>/fs/cifs
34
352) make menuconfig (or make xconfig)
363) select cifs from within the network filesystem choices
374) save and exit
385) make dep
396) make modules (or "make" if CIFS VFS not to be built as a module)
40
41For Linux 2.6:
421) Download the kernel (e.g. from http://www.kernel.org)
43and change directory into the top of the kernel directory tree
44(e.g. /usr/src/linux-2.5.73)
452) make menuconfig (or make xconfig)
463) select cifs from within the network filesystem choices
474) save and exit
485) make
49
50
51Installation instructions:
52=========================
53If you have built the CIFS vfs as module (successfully) simply
54type "make modules_install" (or if you prefer, manually copy the file to
55the modules directory e.g. /lib/modules/2.4.10-4GB/kernel/fs/cifs/cifs.o).
56
57If you have built the CIFS vfs into the kernel itself, follow the instructions
58for your distribution on how to install a new kernel (usually you
59would simply type "make install").
60
61If you do not have the utility mount.cifs (in the Samba 3.0 source tree and on
62the CIFS VFS web site) copy it to the same directory in which mount.smbfs and
63similar files reside (usually /sbin). Although the helper software is not
64required, mount.cifs is recommended. Eventually the Samba 3.0 utility program
65"net" may also be helpful since it may someday provide easier mount syntax for
66users who are used to Windows e.g.
67 net use <mount point> <UNC name or cifs URL>
68Note that running the Winbind pam/nss module (logon service) on all of your
69Linux clients is useful in mapping Uids and Gids consistently across the
70domain to the proper network user. The mount.cifs mount helper can be
71trivially built from Samba 3.0 or later source e.g. by executing:
72
73 gcc samba/source/client/mount.cifs.c -o mount.cifs
74
75If cifs is built as a module, then the size and number of network buffers
76and maximum number of simultaneous requests to one server can be configured.
77Changing these from their defaults is not recommended. By executing modinfo
78 modinfo kernel/fs/cifs/cifs.ko
79on kernel/fs/cifs/cifs.ko the list of configuration changes that can be made
80at module initialization time (by running insmod cifs.ko) can be seen.
81
82Allowing User Mounts
83====================
84To permit users to mount and unmount over directories they own is possible
85with the cifs vfs. A way to enable such mounting is to mark the mount.cifs
86utility as suid (e.g. "chmod +s /sbin/mount.cifs). To enable users to
87umount shares they mount requires
881) mount.cifs version 1.4 or later
892) an entry for the share in /etc/fstab indicating that a user may
90unmount it e.g.
91//server/usersharename /mnt/username cifs user 0 0
92
93Note that when the mount.cifs utility is run suid (allowing user mounts),
94in order to reduce risks, the "nosuid" mount flag is passed in on mount to
95disallow execution of an suid program mounted on the remote target.
96When mount is executed as root, nosuid is not passed in by default,
97and execution of suid programs on the remote target would be enabled
98by default. This can be changed, as with nfs and other filesystems,
99by simply specifying "nosuid" among the mount options. For user mounts
100though to be able to pass the suid flag to mount requires rebuilding
101mount.cifs with the following flag:
102
103 gcc samba/source/client/mount.cifs.c -DCIFS_ALLOW_USR_SUID -o mount.cifs
104
105There is a corresponding manual page for cifs mounting in the Samba 3.0 and
106later source tree in docs/manpages/mount.cifs.8
107
108Allowing User Unmounts
109======================
110To permit users to ummount directories that they have user mounted (see above),
111the utility umount.cifs may be used. It may be invoked directly, or if
112umount.cifs is placed in /sbin, umount can invoke the cifs umount helper
113(at least for most versions of the umount utility) for umount of cifs
114mounts, unless umount is invoked with -i (which will avoid invoking a umount
115helper). As with mount.cifs, to enable user unmounts umount.cifs must be marked
116as suid (e.g. "chmod +s /sbin/umount.cifs") or equivalent (some distributions
117allow adding entries to a file to the /etc/permissions file to achieve the
118equivalent suid effect). For this utility to succeed the target path
119must be a cifs mount, and the uid of the current user must match the uid
120of the user who mounted the resource.
121
122Also note that the customary way of allowing user mounts and unmounts is
123(instead of using mount.cifs and unmount.cifs as suid) to add a line
124to the file /etc/fstab for each //server/share you wish to mount, but
125this can become unwieldy when potential mount targets include many
126or unpredictable UNC names.
127
128Samba Considerations
129====================
130To get the maximum benefit from the CIFS VFS, we recommend using a server that
131supports the SNIA CIFS Unix Extensions standard (e.g. Samba 2.2.5 or later or
132Samba 3.0) but the CIFS vfs works fine with a wide variety of CIFS servers.
133Note that uid, gid and file permissions will display default values if you do
134not have a server that supports the Unix extensions for CIFS (such as Samba
1352.2.5 or later). To enable the Unix CIFS Extensions in the Samba server, add
136the line:
137
138 unix extensions = yes
139
140to your smb.conf file on the server. Note that the following smb.conf settings
141are also useful (on the Samba server) when the majority of clients are Unix or
142Linux:
143
144 case sensitive = yes
145 delete readonly = yes
146 ea support = yes
147
148Note that server ea support is required for supporting xattrs from the Linux
149cifs client, and that EA support is present in later versions of Samba (e.g.
1503.0.6 and later (also EA support works in all versions of Windows, at least to
151shares on NTFS filesystems). Extended Attribute (xattr) support is an optional
152feature of most Linux filesystems which may require enabling via
153make menuconfig. Client support for extended attributes (user xattr) can be
154disabled on a per-mount basis by specifying "nouser_xattr" on mount.
155
156The CIFS client can get and set POSIX ACLs (getfacl, setfacl) to Samba servers
157version 3.10 and later. Setting POSIX ACLs requires enabling both XATTR and
158then POSIX support in the CIFS configuration options when building the cifs
159module. POSIX ACL support can be disabled on a per mount basic by specifying
160"noacl" on mount.
161
162Some administrators may want to change Samba's smb.conf "map archive" and
163"create mask" parameters from the default. Unless the create mask is changed
164newly created files can end up with an unnecessarily restrictive default mode,
165which may not be what you want, although if the CIFS Unix extensions are
166enabled on the server and client, subsequent setattr calls (e.g. chmod) can
167fix the mode. Note that creating special devices (mknod) remotely
168may require specifying a mkdev function to Samba if you are not using
169Samba 3.0.6 or later. For more information on these see the manual pages
170("man smb.conf") on the Samba server system. Note that the cifs vfs,
171unlike the smbfs vfs, does not read the smb.conf on the client system
172(the few optional settings are passed in on mount via -o parameters instead).
173Note that Samba 2.2.7 or later includes a fix that allows the CIFS VFS to delete
174open files (required for strict POSIX compliance). Windows Servers already
175supported this feature. Samba server does not allow symlinks that refer to files
176outside of the share, so in Samba versions prior to 3.0.6, most symlinks to
177files with absolute paths (ie beginning with slash) such as:
178 ln -s /mnt/foo bar
179would be forbidden. Samba 3.0.6 server or later includes the ability to create
180such symlinks safely by converting unsafe symlinks (ie symlinks to server
181files that are outside of the share) to a samba specific format on the server
182that is ignored by local server applications and non-cifs clients and that will
183not be traversed by the Samba server). This is opaque to the Linux client
184application using the cifs vfs. Absolute symlinks will work to Samba 3.0.5 or
185later, but only for remote clients using the CIFS Unix extensions, and will
186be invisbile to Windows clients and typically will not affect local
187applications running on the same server as Samba.
188
189Use instructions:
190================
191Once the CIFS VFS support is built into the kernel or installed as a module
192(cifs.o), you can use mount syntax like the following to access Samba or Windows
193servers:
194
195 mount -t cifs //9.53.216.11/e$ /mnt -o user=myname,pass=mypassword
196
197Before -o the option -v may be specified to make the mount.cifs
198mount helper display the mount steps more verbosely.
199After -o the following commonly used cifs vfs specific options
200are supported:
201
202 user=<username>
203 pass=<password>
204 domain=<domain name>
205
206Other cifs mount options are described below. Use of TCP names (in addition to
207ip addresses) is available if the mount helper (mount.cifs) is installed. If
208you do not trust the server to which are mounted, or if you do not have
209cifs signing enabled (and the physical network is insecure), consider use
210of the standard mount options "noexec" and "nosuid" to reduce the risk of
211running an altered binary on your local system (downloaded from a hostile server
212or altered by a hostile router).
213
214Although mounting using format corresponding to the CIFS URL specification is
215not possible in mount.cifs yet, it is possible to use an alternate format
216for the server and sharename (which is somewhat similar to NFS style mount
217syntax) instead of the more widely used UNC format (i.e. \\server\share):
218 mount -t cifs tcp_name_of_server:share_name /mnt -o user=myname,pass=mypasswd
219
220When using the mount helper mount.cifs, passwords may be specified via alternate
221mechanisms, instead of specifying it after -o using the normal "pass=" syntax
222on the command line:
2231) By including it in a credential file. Specify credentials=filename as one
224of the mount options. Credential files contain two lines
225 username=someuser
226 password=your_password
2272) By specifying the password in the PASSWD environment variable (similarly
228the user name can be taken from the USER environment variable).
2293) By specifying the password in a file by name via PASSWD_FILE
2304) By specifying the password in a file by file descriptor via PASSWD_FD
231
232If no password is provided, mount.cifs will prompt for password entry
233
234Restrictions
235============
236Servers must support either "pure-TCP" (port 445 TCP/IP CIFS connections) or RFC
2371001/1002 support for "Netbios-Over-TCP/IP." This is not likely to be a
238problem as most servers support this.
239
240Valid filenames differ between Windows and Linux. Windows typically restricts
241filenames which contain certain reserved characters (e.g.the character :
242which is used to delimit the beginning of a stream name by Windows), while
243Linux allows a slightly wider set of valid characters in filenames. Windows
244servers can remap such characters when an explicit mapping is specified in
245the Server's registry. Samba starting with version 3.10 will allow such
246filenames (ie those which contain valid Linux characters, which normally
247would be forbidden for Windows/CIFS semantics) as long as the server is
248configured for Unix Extensions (and the client has not disabled
249/proc/fs/cifs/LinuxExtensionsEnabled).
250
251
252CIFS VFS Mount Options
253======================
254A partial list of the supported mount options follows:
255 user The user name to use when trying to establish
256 the CIFS session.
257 password The user password. If the mount helper is
258 installed, the user will be prompted for password
259 if not supplied.
260 ip The ip address of the target server
261 unc The target server Universal Network Name (export) to
262 mount.
263 domain Set the SMB/CIFS workgroup name prepended to the
264 username during CIFS session establishment
265 forceuid Set the default uid for inodes to the uid
266 passed in on mount. For mounts to servers
267 which do support the CIFS Unix extensions, such as a
268 properly configured Samba server, the server provides
269 the uid, gid and mode so this parameter should not be
270 specified unless the server and clients uid and gid
271 numbering differ. If the server and client are in the
272 same domain (e.g. running winbind or nss_ldap) and
273 the server supports the Unix Extensions then the uid
274 and gid can be retrieved from the server (and uid
275 and gid would not have to be specifed on the mount.
276 For servers which do not support the CIFS Unix
277 extensions, the default uid (and gid) returned on lookup
278 of existing files will be the uid (gid) of the person
279 who executed the mount (root, except when mount.cifs
280 is configured setuid for user mounts) unless the "uid="
281 (gid) mount option is specified. Also note that permission
282 checks (authorization checks) on accesses to a file occur
283 at the server, but there are cases in which an administrator
284 may want to restrict at the client as well. For those
285 servers which do not report a uid/gid owner
286 (such as Windows), permissions can also be checked at the
287 client, and a crude form of client side permission checking
288 can be enabled by specifying file_mode and dir_mode on
289 the client. (default)
290 forcegid (similar to above but for the groupid instead of uid) (default)
291 noforceuid Fill in file owner information (uid) by requesting it from
292 the server if possible. With this option, the value given in
293 the uid= option (on mount) will only be used if the server
294 can not support returning uids on inodes.
295 noforcegid (similar to above but for the group owner, gid, instead of uid)
296 uid Set the default uid for inodes, and indicate to the
297 cifs kernel driver which local user mounted. If the server
298 supports the unix extensions the default uid is
299 not used to fill in the owner fields of inodes (files)
300 unless the "forceuid" parameter is specified.
301 gid Set the default gid for inodes (similar to above).
302 file_mode If CIFS Unix extensions are not supported by the server
303 this overrides the default mode for file inodes.
304 fsc Enable local disk caching using FS-Cache (off by default). This
305 option could be useful to improve performance on a slow link,
306 heavily loaded server and/or network where reading from the
307 disk is faster than reading from the server (over the network).
308 This could also impact scalability positively as the
309 number of calls to the server are reduced. However, local
310 caching is not suitable for all workloads for e.g. read-once
311 type workloads. So, you need to consider carefully your
312 workload/scenario before using this option. Currently, local
313 disk caching is functional for CIFS files opened as read-only.
314 dir_mode If CIFS Unix extensions are not supported by the server
315 this overrides the default mode for directory inodes.
316 port attempt to contact the server on this tcp port, before
317 trying the usual ports (port 445, then 139).
318 iocharset Codepage used to convert local path names to and from
319 Unicode. Unicode is used by default for network path
320 names if the server supports it. If iocharset is
321 not specified then the nls_default specified
322 during the local client kernel build will be used.
323 If server does not support Unicode, this parameter is
324 unused.
325 rsize default read size (usually 16K). The client currently
326 can not use rsize larger than CIFSMaxBufSize. CIFSMaxBufSize
327 defaults to 16K and may be changed (from 8K to the maximum
328 kmalloc size allowed by your kernel) at module install time
329 for cifs.ko. Setting CIFSMaxBufSize to a very large value
330 will cause cifs to use more memory and may reduce performance
331 in some cases. To use rsize greater than 127K (the original
332 cifs protocol maximum) also requires that the server support
333 a new Unix Capability flag (for very large read) which some
334 newer servers (e.g. Samba 3.0.26 or later) do. rsize can be
335 set from a minimum of 2048 to a maximum of 130048 (127K or
336 CIFSMaxBufSize, whichever is smaller)
337 wsize default write size (default 57344)
338 maximum wsize currently allowed by CIFS is 57344 (fourteen
339 4096 byte pages)
340 actimeo=n attribute cache timeout in seconds (default 1 second).
341 After this timeout, the cifs client requests fresh attribute
342 information from the server. This option allows to tune the
343 attribute cache timeout to suit the workload needs. Shorter
344 timeouts mean better the cache coherency, but increased number
345 of calls to the server. Longer timeouts mean reduced number
346 of calls to the server at the expense of less stricter cache
347 coherency checks (i.e. incorrect attribute cache for a short
348 period of time).
349 rw mount the network share read-write (note that the
350 server may still consider the share read-only)
351 ro mount network share read-only
352 version used to distinguish different versions of the
353 mount helper utility (not typically needed)
354 sep if first mount option (after the -o), overrides
355 the comma as the separator between the mount
356 parms. e.g.
357 -o user=myname,password=mypassword,domain=mydom
358 could be passed instead with period as the separator by
359 -o sep=.user=myname.password=mypassword.domain=mydom
360 this might be useful when comma is contained within username
361 or password or domain. This option is less important
362 when the cifs mount helper cifs.mount (version 1.1 or later)
363 is used.
364 nosuid Do not allow remote executables with the suid bit
365 program to be executed. This is only meaningful for mounts
366 to servers such as Samba which support the CIFS Unix Extensions.
367 If you do not trust the servers in your network (your mount
368 targets) it is recommended that you specify this option for
369 greater security.
370 exec Permit execution of binaries on the mount.
371 noexec Do not permit execution of binaries on the mount.
372 dev Recognize block devices on the remote mount.
373 nodev Do not recognize devices on the remote mount.
374 suid Allow remote files on this mountpoint with suid enabled to
375 be executed (default for mounts when executed as root,
376 nosuid is default for user mounts).
377 credentials Although ignored by the cifs kernel component, it is used by
378 the mount helper, mount.cifs. When mount.cifs is installed it
379 opens and reads the credential file specified in order
380 to obtain the userid and password arguments which are passed to
381 the cifs vfs.
382 guest Although ignored by the kernel component, the mount.cifs
383 mount helper will not prompt the user for a password
384 if guest is specified on the mount options. If no
385 password is specified a null password will be used.
386 perm Client does permission checks (vfs_permission check of uid
387 and gid of the file against the mode and desired operation),
388 Note that this is in addition to the normal ACL check on the
389 target machine done by the server software.
390 Client permission checking is enabled by default.
391 noperm Client does not do permission checks. This can expose
392 files on this mount to access by other users on the local
393 client system. It is typically only needed when the server
394 supports the CIFS Unix Extensions but the UIDs/GIDs on the
395 client and server system do not match closely enough to allow
396 access by the user doing the mount, but it may be useful with
397 non CIFS Unix Extension mounts for cases in which the default
398 mode is specified on the mount but is not to be enforced on the
399 client (e.g. perhaps when MultiUserMount is enabled)
400 Note that this does not affect the normal ACL check on the
401 target machine done by the server software (of the server
402 ACL against the user name provided at mount time).
403 serverino Use server's inode numbers instead of generating automatically
404 incrementing inode numbers on the client. Although this will
405 make it easier to spot hardlinked files (as they will have
406 the same inode numbers) and inode numbers may be persistent,
407 note that the server does not guarantee that the inode numbers
408 are unique if multiple server side mounts are exported under a
409 single share (since inode numbers on the servers might not
410 be unique if multiple filesystems are mounted under the same
411 shared higher level directory). Note that some older
412 (e.g. pre-Windows 2000) do not support returning UniqueIDs
413 or the CIFS Unix Extensions equivalent and for those
414 this mount option will have no effect. Exporting cifs mounts
415 under nfsd requires this mount option on the cifs mount.
416 This is now the default if server supports the
417 required network operation.
418 noserverino Client generates inode numbers (rather than using the actual one
419 from the server). These inode numbers will vary after
420 unmount or reboot which can confuse some applications,
421 but not all server filesystems support unique inode
422 numbers.
423 setuids If the CIFS Unix extensions are negotiated with the server
424 the client will attempt to set the effective uid and gid of
425 the local process on newly created files, directories, and
426 devices (create, mkdir, mknod). If the CIFS Unix Extensions
427 are not negotiated, for newly created files and directories
428 instead of using the default uid and gid specified on
429 the mount, cache the new file's uid and gid locally which means
430 that the uid for the file can change when the inode is
431 reloaded (or the user remounts the share).
432 nosetuids The client will not attempt to set the uid and gid on
433 on newly created files, directories, and devices (create,
434 mkdir, mknod) which will result in the server setting the
435 uid and gid to the default (usually the server uid of the
436 user who mounted the share). Letting the server (rather than
437 the client) set the uid and gid is the default. If the CIFS
438 Unix Extensions are not negotiated then the uid and gid for
439 new files will appear to be the uid (gid) of the mounter or the
440 uid (gid) parameter specified on the mount.
441 netbiosname When mounting to servers via port 139, specifies the RFC1001
442 source name to use to represent the client netbios machine
443 name when doing the RFC1001 netbios session initialize.
444 direct Do not do inode data caching on files opened on this mount.
445 This precludes mmapping files on this mount. In some cases
446 with fast networks and little or no caching benefits on the
447 client (e.g. when the application is doing large sequential
448 reads bigger than page size without rereading the same data)
449 this can provide better performance than the default
450 behavior which caches reads (readahead) and writes
451 (writebehind) through the local Linux client pagecache
452 if oplock (caching token) is granted and held. Note that
453 direct allows write operations larger than page size
454 to be sent to the server.
455 strictcache Use for switching on strict cache mode. In this mode the
456 client read from the cache all the time it has Oplock Level II,
457 otherwise - read from the server. All written data are stored
458 in the cache, but if the client doesn't have Exclusive Oplock,
459 it writes the data to the server.
460 rwpidforward Forward pid of a process who opened a file to any read or write
461 operation on that file. This prevent applications like WINE
462 from failing on read and write if we use mandatory brlock style.
463 acl Allow setfacl and getfacl to manage posix ACLs if server
464 supports them. (default)
465 noacl Do not allow setfacl and getfacl calls on this mount
466 user_xattr Allow getting and setting user xattrs (those attributes whose
467 name begins with "user." or "os2.") as OS/2 EAs (extended
468 attributes) to the server. This allows support of the
469 setfattr and getfattr utilities. (default)
470 nouser_xattr Do not allow getfattr/setfattr to get/set/list xattrs
471 mapchars Translate six of the seven reserved characters (not backslash)
472 *?<>|:
473 to the remap range (above 0xF000), which also
474 allows the CIFS client to recognize files created with
475 such characters by Windows's POSIX emulation. This can
476 also be useful when mounting to most versions of Samba
477 (which also forbids creating and opening files
478 whose names contain any of these seven characters).
479 This has no effect if the server does not support
480 Unicode on the wire.
481 nomapchars Do not translate any of these seven characters (default).
482 nocase Request case insensitive path name matching (case
483 sensitive is the default if the server supports it).
484 (mount option "ignorecase" is identical to "nocase")
485 posixpaths If CIFS Unix extensions are supported, attempt to
486 negotiate posix path name support which allows certain
487 characters forbidden in typical CIFS filenames, without
488 requiring remapping. (default)
489 noposixpaths If CIFS Unix extensions are supported, do not request
490 posix path name support (this may cause servers to
491 reject creatingfile with certain reserved characters).
492 nounix Disable the CIFS Unix Extensions for this mount (tree
493 connection). This is rarely needed, but it may be useful
494 in order to turn off multiple settings all at once (ie
495 posix acls, posix locks, posix paths, symlink support
496 and retrieving uids/gids/mode from the server) or to
497 work around a bug in server which implement the Unix
498 Extensions.
499 nobrl Do not send byte range lock requests to the server.
500 This is necessary for certain applications that break
501 with cifs style mandatory byte range locks (and most
502 cifs servers do not yet support requesting advisory
503 byte range locks).
504 forcemandatorylock Even if the server supports posix (advisory) byte range
505 locking, send only mandatory lock requests. For some
506 (presumably rare) applications, originally coded for
507 DOS/Windows, which require Windows style mandatory byte range
508 locking, they may be able to take advantage of this option,
509 forcing the cifs client to only send mandatory locks
510 even if the cifs server would support posix advisory locks.
511 "forcemand" is accepted as a shorter form of this mount
512 option.
513 nostrictsync If this mount option is set, when an application does an
514 fsync call then the cifs client does not send an SMB Flush
515 to the server (to force the server to write all dirty data
516 for this file immediately to disk), although cifs still sends
517 all dirty (cached) file data to the server and waits for the
518 server to respond to the write. Since SMB Flush can be
519 very slow, and some servers may be reliable enough (to risk
520 delaying slightly flushing the data to disk on the server),
521 turning on this option may be useful to improve performance for
522 applications that fsync too much, at a small risk of server
523 crash. If this mount option is not set, by default cifs will
524 send an SMB flush request (and wait for a response) on every
525 fsync call.
526 nodfs Disable DFS (global name space support) even if the
527 server claims to support it. This can help work around
528 a problem with parsing of DFS paths with Samba server
529 versions 3.0.24 and 3.0.25.
530 remount remount the share (often used to change from ro to rw mounts
531 or vice versa)
532 cifsacl Report mode bits (e.g. on stat) based on the Windows ACL for
533 the file. (EXPERIMENTAL)
534 servern Specify the server 's netbios name (RFC1001 name) to use
535 when attempting to setup a session to the server.
536 This is needed for mounting to some older servers (such
537 as OS/2 or Windows 98 and Windows ME) since they do not
538 support a default server name. A server name can be up
539 to 15 characters long and is usually uppercased.
540 sfu When the CIFS Unix Extensions are not negotiated, attempt to
541 create device files and fifos in a format compatible with
542 Services for Unix (SFU). In addition retrieve bits 10-12
543 of the mode via the SETFILEBITS extended attribute (as
544 SFU does). In the future the bottom 9 bits of the
545 mode also will be emulated using queries of the security
546 descriptor (ACL).
547 mfsymlinks Enable support for Minshall+French symlinks
548 (see http://wiki.samba.org/index.php/UNIX_Extensions#Minshall.2BFrench_symlinks)
549 This option is ignored when specified together with the
550 'sfu' option. Minshall+French symlinks are used even if
551 the server supports the CIFS Unix Extensions.
552 sign Must use packet signing (helps avoid unwanted data modification
553 by intermediate systems in the route). Note that signing
554 does not work with lanman or plaintext authentication.
555 seal Must seal (encrypt) all data on this mounted share before
556 sending on the network. Requires support for Unix Extensions.
557 Note that this differs from the sign mount option in that it
558 causes encryption of data sent over this mounted share but other
559 shares mounted to the same server are unaffected.
560 locallease This option is rarely needed. Fcntl F_SETLEASE is
561 used by some applications such as Samba and NFSv4 server to
562 check to see whether a file is cacheable. CIFS has no way
563 to explicitly request a lease, but can check whether a file
564 is cacheable (oplocked). Unfortunately, even if a file
565 is not oplocked, it could still be cacheable (ie cifs client
566 could grant fcntl leases if no other local processes are using
567 the file) for cases for example such as when the server does not
568 support oplocks and the user is sure that the only updates to
569 the file will be from this client. Specifying this mount option
570 will allow the cifs client to check for leases (only) locally
571 for files which are not oplocked instead of denying leases
572 in that case. (EXPERIMENTAL)
573 sec Security mode. Allowed values are:
574 none attempt to connection as a null user (no name)
575 krb5 Use Kerberos version 5 authentication
576 krb5i Use Kerberos authentication and packet signing
577 ntlm Use NTLM password hashing (default)
578 ntlmi Use NTLM password hashing with signing (if
579 /proc/fs/cifs/PacketSigningEnabled on or if
580 server requires signing also can be the default)
581 ntlmv2 Use NTLMv2 password hashing
582 ntlmv2i Use NTLMv2 password hashing with packet signing
583 lanman (if configured in kernel config) use older
584 lanman hash
585hard Retry file operations if server is not responding
586soft Limit retries to unresponsive servers (usually only
587 one retry) before returning an error. (default)
588
589The mount.cifs mount helper also accepts a few mount options before -o
590including:
591
592 -S take password from stdin (equivalent to setting the environment
593 variable "PASSWD_FD=0"
594 -V print mount.cifs version
595 -? display simple usage information
596
597With most 2.6 kernel versions of modutils, the version of the cifs kernel
598module can be displayed via modinfo.
599
600Misc /proc/fs/cifs Flags and Debug Info
601=======================================
602Informational pseudo-files:
603DebugData Displays information about active CIFS sessions and
604 shares, features enabled as well as the cifs.ko
605 version.
606Stats Lists summary resource usage information as well as per
607 share statistics, if CONFIG_CIFS_STATS in enabled
608 in the kernel configuration.
609
610Configuration pseudo-files:
611PacketSigningEnabled If set to one, cifs packet signing is enabled
612 and will be used if the server requires
613 it. If set to two, cifs packet signing is
614 required even if the server considers packet
615 signing optional. (default 1)
616SecurityFlags Flags which control security negotiation and
617 also packet signing. Authentication (may/must)
618 flags (e.g. for NTLM and/or NTLMv2) may be combined with
619 the signing flags. Specifying two different password
620 hashing mechanisms (as "must use") on the other hand
621 does not make much sense. Default flags are
622 0x07007
623 (NTLM, NTLMv2 and packet signing allowed). The maximum
624 allowable flags if you want to allow mounts to servers
625 using weaker password hashes is 0x37037 (lanman,
626 plaintext, ntlm, ntlmv2, signing allowed). Some
627 SecurityFlags require the corresponding menuconfig
628 options to be enabled (lanman and plaintext require
629 CONFIG_CIFS_WEAK_PW_HASH for example). Enabling
630 plaintext authentication currently requires also
631 enabling lanman authentication in the security flags
632 because the cifs module only supports sending
633 laintext passwords using the older lanman dialect
634 form of the session setup SMB. (e.g. for authentication
635 using plain text passwords, set the SecurityFlags
636 to 0x30030):
637
638 may use packet signing 0x00001
639 must use packet signing 0x01001
640 may use NTLM (most common password hash) 0x00002
641 must use NTLM 0x02002
642 may use NTLMv2 0x00004
643 must use NTLMv2 0x04004
644 may use Kerberos security 0x00008
645 must use Kerberos 0x08008
646 may use lanman (weak) password hash 0x00010
647 must use lanman password hash 0x10010
648 may use plaintext passwords 0x00020
649 must use plaintext passwords 0x20020
650 (reserved for future packet encryption) 0x00040
651
652cifsFYI If set to non-zero value, additional debug information
653 will be logged to the system error log. This field
654 contains three flags controlling different classes of
655 debugging entries. The maximum value it can be set
656 to is 7 which enables all debugging points (default 0).
657 Some debugging statements are not compiled into the
658 cifs kernel unless CONFIG_CIFS_DEBUG2 is enabled in the
659 kernel configuration. cifsFYI may be set to one or
660 nore of the following flags (7 sets them all):
661
662 log cifs informational messages 0x01
663 log return codes from cifs entry points 0x02
664 log slow responses (ie which take longer than 1 second)
665 CONFIG_CIFS_STATS2 must be enabled in .config 0x04
666
667
668traceSMB If set to one, debug information is logged to the
669 system error log with the start of smb requests
670 and responses (default 0)
671LookupCacheEnable If set to one, inode information is kept cached
672 for one second improving performance of lookups
673 (default 1)
674OplockEnabled If set to one, safe distributed caching enabled.
675 (default 1)
676LinuxExtensionsEnabled If set to one then the client will attempt to
677 use the CIFS "UNIX" extensions which are optional
678 protocol enhancements that allow CIFS servers
679 to return accurate UID/GID information as well
680 as support symbolic links. If you use servers
681 such as Samba that support the CIFS Unix
682 extensions but do not want to use symbolic link
683 support and want to map the uid and gid fields
684 to values supplied at mount (rather than the
685 actual values, then set this to zero. (default 1)
686
687These experimental features and tracing can be enabled by changing flags in
688/proc/fs/cifs (after the cifs module has been installed or built into the
689kernel, e.g. insmod cifs). To enable a feature set it to 1 e.g. to enable
690tracing to the kernel message log type:
691
692 echo 7 > /proc/fs/cifs/cifsFYI
693
694cifsFYI functions as a bit mask. Setting it to 1 enables additional kernel
695logging of various informational messages. 2 enables logging of non-zero
696SMB return codes while 4 enables logging of requests that take longer
697than one second to complete (except for byte range lock requests).
698Setting it to 4 requires defining CONFIG_CIFS_STATS2 manually in the
699source code (typically by setting it in the beginning of cifsglob.h),
700and setting it to seven enables all three. Finally, tracing
701the start of smb requests and responses can be enabled via:
702
703 echo 1 > /proc/fs/cifs/traceSMB
704
705Per share (per client mount) statistics are available in /proc/fs/cifs/Stats
706if the kernel was configured with cifs statistics enabled. The statistics
707represent the number of successful (ie non-zero return code from the server)
708SMB responses to some of the more common commands (open, delete, mkdir etc.).
709Also recorded is the total bytes read and bytes written to the server for
710that share. Note that due to client caching effects this can be less than the
711number of bytes read and written by the application running on the client.
712The statistics for the number of total SMBs and oplock breaks are different in
713that they represent all for that share, not just those for which the server
714returned success.
715
716Also note that "cat /proc/fs/cifs/DebugData" will display information about
717the active sessions and the shares that are mounted.
718
719Enabling Kerberos (extended security) works but requires version 1.2 or later
720of the helper program cifs.upcall to be present and to be configured in the
721/etc/request-key.conf file. The cifs.upcall helper program is from the Samba
722project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not
723require this helper. Note that NTLMv2 security (which does not require the
724cifs.upcall helper program), instead of using Kerberos, is sufficient for
725some use cases.
726
727DFS support allows transparent redirection to shares in an MS-DFS name space.
728In addition, DFS support for target shares which are specified as UNC
729names which begin with host names (rather than IP addresses) requires
730a user space helper (such as cifs.upcall) to be present in order to
731translate host names to ip address, and the user space helper must also
732be configured in the file /etc/request-key.conf. Samba, Windows servers and
733many NAS appliances support DFS as a way of constructing a global name
734space to ease network configuration and improve reliability.
735
736To use cifs Kerberos and DFS support, the Linux keyutils package should be
737installed and something like the following lines should be added to the
738/etc/request-key.conf file:
739
740create cifs.spnego * * /usr/local/sbin/cifs.upcall %k
741create dns_resolver * * /usr/local/sbin/cifs.upcall %k
742
743CIFS kernel module parameters
744=============================
745These module parameters can be specified or modified either during the time of
746module loading or during the runtime by using the interface
747 /proc/module/cifs/parameters/<param>
748
749i.e. echo "value" > /sys/module/cifs/parameters/<param>
750
7511. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default.
752 [Y/y/1]. To disable use any of [N/n/0].
753
diff --git a/fs/cifs/TODO b/fs/cifs/TODO
deleted file mode 100644
index 355abcdcda98..000000000000
--- a/fs/cifs/TODO
+++ /dev/null
@@ -1,129 +0,0 @@
1Version 1.53 May 20, 2008
2
3A Partial List of Missing Features
4==================================
5
6Contributions are welcome. There are plenty of opportunities
7for visible, important contributions to this module. Here
8is a partial list of the known problems and missing features:
9
10a) Support for SecurityDescriptors(Windows/CIFS ACLs) for chmod/chgrp/chown
11so that these operations can be supported to Windows servers
12
13b) Mapping POSIX ACLs (and eventually NFSv4 ACLs) to CIFS
14SecurityDescriptors
15
16c) Better pam/winbind integration (e.g. to handle uid mapping
17better)
18
19d) Cleanup now unneeded SessSetup code in
20fs/cifs/connect.c and add back in NTLMSSP code if any servers
21need it
22
23e) fix NTLMv2 signing when two mounts with different users to same
24server.
25
26f) Directory entry caching relies on a 1 second timer, rather than
27using FindNotify or equivalent. - (started)
28
29g) quota support (needs minor kernel change since quota calls
30to make it to network filesystems or deviceless filesystems)
31
32h) investigate sync behavior (including syncpage) and check
33for proper behavior of intr/nointr
34
35i) improve support for very old servers (OS/2 and Win9x for example)
36Including support for changing the time remotely (utimes command).
37
38j) hook lower into the sockets api (as NFS/SunRPC does) to avoid the
39extra copy in/out of the socket buffers in some cases.
40
41k) Better optimize open (and pathbased setfilesize) to reduce the
42oplock breaks coming from windows srv. Piggyback identical file
43opens on top of each other by incrementing reference count rather
44than resending (helps reduce server resource utilization and avoid
45spurious oplock breaks).
46
47l) Improve performance of readpages by sending more than one read
48at a time when 8 pages or more are requested. In conjuntion
49add support for async_cifs_readpages.
50
51m) Add support for storing symlink info to Windows servers
52in the Extended Attribute format their SFU clients would recognize.
53
54n) Finish fcntl D_NOTIFY support so kde and gnome file list windows
55will autorefresh (partially complete by Asser). Needs minor kernel
56vfs change to support removing D_NOTIFY on a file.
57
58o) Add GUI tool to configure /proc/fs/cifs settings and for display of
59the CIFS statistics (started)
60
61p) implement support for security and trusted categories of xattrs
62(requires minor protocol extension) to enable better support for SELINUX
63
64q) Implement O_DIRECT flag on open (already supported on mount)
65
66r) Create UID mapping facility so server UIDs can be mapped on a per
67mount or a per server basis to client UIDs or nobody if no mapping
68exists. This is helpful when Unix extensions are negotiated to
69allow better permission checking when UIDs differ on the server
70and client. Add new protocol request to the CIFS protocol
71standard for asking the server for the corresponding name of a
72particular uid.
73
74s) Add support for CIFS Unix and also the newer POSIX extensions to the
75server side for Samba 4.
76
77t) In support for OS/2 (LANMAN 1.2 and LANMAN2.1 based SMB servers)
78need to add ability to set time to server (utimes command)
79
80u) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for this too)
81
82v) mount check for unmatched uids
83
84w) Add support for new vfs entry point for fallocate
85
86x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of
87processes can proceed better in parallel (on the server)
88
89y) Fix Samba 3 to handle reads/writes over 127K (and remove the cifs mount
90restriction of wsize max being 127K)
91
92KNOWN BUGS (updated April 24, 2007)
93====================================
94See http://bugzilla.samba.org - search on product "CifsVFS" for
95current bug list.
96
971) existing symbolic links (Windows reparse points) are recognized but
98can not be created remotely. They are implemented for Samba and those that
99support the CIFS Unix extensions, although earlier versions of Samba
100overly restrict the pathnames.
1012) follow_link and readdir code does not follow dfs junctions
102but recognizes them
1033) create of new files to FAT partitions on Windows servers can
104succeed but still return access denied (appears to be Windows
105server not cifs client problem) and has not been reproduced recently.
106NTFS partitions do not have this problem.
1074) Unix/POSIX capabilities are reset after reconnection, and affect
108a few fields in the tree connection but we do do not know which
109superblocks to apply these changes to. We should probably walk
110the list of superblocks to set these. Also need to check the
111flags on the second mount to the same share, and see if we
112can do the same trick that NFS does to remount duplicate shares.
113
114Misc testing to do
115==================
1161) check out max path names and max path name components against various server
117types. Try nested symlinks (8 deep). Return max path name in stat -f information
118
1192) Modify file portion of ltp so it can run against a mounted network
120share and run it against cifs vfs in automated fashion.
121
1223) Additional performance testing and optimization using iozone and similar -
123there are some easy changes that can be done to parallelize sequential writes,
124and when signing is disabled to request larger read sizes (larger than
125negotiated size) and send larger write sizes to modern servers.
126
1274) More exhaustively test against less common servers. More testing
128against Windows 9x, Windows ME servers.
129
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index fe8d6276410a..d8eac3b6cefb 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -91,6 +91,8 @@ extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen,
91#endif /* CONFIG_CIFS_SMB2 */ 91#endif /* CONFIG_CIFS_SMB2 */
92#endif 92#endif
93 93
94wchar_t cifs_toupper(wchar_t in);
95
94/* 96/*
95 * UniStrcat: Concatenate the second string to the first 97 * UniStrcat: Concatenate the second string to the first
96 * 98 *
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 85ea98d139fc..a16b4e58bcc6 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -255,6 +255,7 @@ cifs_alloc_inode(struct super_block *sb)
255 cifs_inode->server_eof = 0; 255 cifs_inode->server_eof = 0;
256 cifs_inode->uniqueid = 0; 256 cifs_inode->uniqueid = 0;
257 cifs_inode->createtime = 0; 257 cifs_inode->createtime = 0;
258 cifs_inode->epoch = 0;
258#ifdef CONFIG_CIFS_SMB2 259#ifdef CONFIG_CIFS_SMB2
259 get_random_bytes(cifs_inode->lease_key, SMB2_LEASE_KEY_SIZE); 260 get_random_bytes(cifs_inode->lease_key, SMB2_LEASE_KEY_SIZE);
260#endif 261#endif
@@ -357,6 +358,18 @@ cifs_show_cache_flavor(struct seq_file *s, struct cifs_sb_info *cifs_sb)
357 seq_printf(s, "loose"); 358 seq_printf(s, "loose");
358} 359}
359 360
361static void
362cifs_show_nls(struct seq_file *s, struct nls_table *cur)
363{
364 struct nls_table *def;
365
366 /* Display iocharset= option if it's not default charset */
367 def = load_nls_default();
368 if (def != cur)
369 seq_printf(s, ",iocharset=%s", cur->charset);
370 unload_nls(def);
371}
372
360/* 373/*
361 * cifs_show_options() is for displaying mount options in /proc/mounts. 374 * cifs_show_options() is for displaying mount options in /proc/mounts.
362 * Not all settable options are displayed but most of the important 375 * Not all settable options are displayed but most of the important
@@ -418,6 +431,9 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
418 seq_printf(s, ",file_mode=0%ho,dir_mode=0%ho", 431 seq_printf(s, ",file_mode=0%ho,dir_mode=0%ho",
419 cifs_sb->mnt_file_mode, 432 cifs_sb->mnt_file_mode,
420 cifs_sb->mnt_dir_mode); 433 cifs_sb->mnt_dir_mode);
434
435 cifs_show_nls(s, cifs_sb->local_nls);
436
421 if (tcon->seal) 437 if (tcon->seal)
422 seq_printf(s, ",seal"); 438 seq_printf(s, ",seal");
423 if (tcon->nocase) 439 if (tcon->nocase)
@@ -718,7 +734,7 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
718 734
719 written = generic_file_aio_write(iocb, iov, nr_segs, pos); 735 written = generic_file_aio_write(iocb, iov, nr_segs, pos);
720 736
721 if (CIFS_I(inode)->clientCanCacheAll) 737 if (CIFS_CACHE_WRITE(CIFS_I(inode)))
722 return written; 738 return written;
723 739
724 rc = filemap_fdatawrite(inode->i_mapping); 740 rc = filemap_fdatawrite(inode->i_mapping);
@@ -743,7 +759,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
743 * We need to be sure that all dirty pages are written and the 759 * We need to be sure that all dirty pages are written and the
744 * server has the newest file length. 760 * server has the newest file length.
745 */ 761 */
746 if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping && 762 if (!CIFS_CACHE_READ(CIFS_I(inode)) && inode->i_mapping &&
747 inode->i_mapping->nrpages != 0) { 763 inode->i_mapping->nrpages != 0) {
748 rc = filemap_fdatawait(inode->i_mapping); 764 rc = filemap_fdatawait(inode->i_mapping);
749 if (rc) { 765 if (rc) {
@@ -767,8 +783,10 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
767 783
768static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) 784static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
769{ 785{
770 /* note that this is called by vfs setlease with i_lock held 786 /*
771 to protect *lease from going away */ 787 * Note that this is called by vfs setlease with i_lock held to
788 * protect *lease from going away.
789 */
772 struct inode *inode = file_inode(file); 790 struct inode *inode = file_inode(file);
773 struct cifsFileInfo *cfile = file->private_data; 791 struct cifsFileInfo *cfile = file->private_data;
774 792
@@ -776,20 +794,19 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
776 return -EINVAL; 794 return -EINVAL;
777 795
778 /* check if file is oplocked */ 796 /* check if file is oplocked */
779 if (((arg == F_RDLCK) && 797 if (((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) ||
780 (CIFS_I(inode)->clientCanCacheRead)) || 798 ((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode))))
781 ((arg == F_WRLCK) &&
782 (CIFS_I(inode)->clientCanCacheAll)))
783 return generic_setlease(file, arg, lease); 799 return generic_setlease(file, arg, lease);
784 else if (tlink_tcon(cfile->tlink)->local_lease && 800 else if (tlink_tcon(cfile->tlink)->local_lease &&
785 !CIFS_I(inode)->clientCanCacheRead) 801 !CIFS_CACHE_READ(CIFS_I(inode)))
786 /* If the server claims to support oplock on this 802 /*
787 file, then we still need to check oplock even 803 * If the server claims to support oplock on this file, then we
788 if the local_lease mount option is set, but there 804 * still need to check oplock even if the local_lease mount
789 are servers which do not support oplock for which 805 * option is set, but there are servers which do not support
790 this mount option may be useful if the user 806 * oplock for which this mount option may be useful if the user
791 knows that the file won't be changed on the server 807 * knows that the file won't be changed on the server by anyone
792 by anyone else */ 808 * else.
809 */
793 return generic_setlease(file, arg, lease); 810 return generic_setlease(file, arg, lease);
794 else 811 else
795 return -EAGAIN; 812 return -EAGAIN;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 52ca861ed35e..cfa14c80ef3b 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -28,6 +28,7 @@
28#include "cifsacl.h" 28#include "cifsacl.h"
29#include <crypto/internal/hash.h> 29#include <crypto/internal/hash.h>
30#include <linux/scatterlist.h> 30#include <linux/scatterlist.h>
31#include <uapi/linux/cifs/cifs_mount.h>
31#ifdef CONFIG_CIFS_SMB2 32#ifdef CONFIG_CIFS_SMB2
32#include "smb2pdu.h" 33#include "smb2pdu.h"
33#endif 34#endif
@@ -41,12 +42,7 @@
41#define MAX_SES_INFO 2 42#define MAX_SES_INFO 2
42#define MAX_TCON_INFO 4 43#define MAX_TCON_INFO 4
43 44
44#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1) 45#define MAX_TREE_SIZE (2 + CIFS_NI_MAXHOST + 1 + CIFS_MAX_SHARE_LEN + 1)
45#define MAX_SERVER_SIZE 15
46#define MAX_SHARE_SIZE 80
47#define CIFS_MAX_DOMAINNAME_LEN 256 /* max domain name length */
48#define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */
49#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
50 46
51#define CIFS_MIN_RCV_POOL 4 47#define CIFS_MIN_RCV_POOL 4
52 48
@@ -135,6 +131,7 @@ struct cifs_secmech {
135 131
136/* per smb session structure/fields */ 132/* per smb session structure/fields */
137struct ntlmssp_auth { 133struct ntlmssp_auth {
134 bool sesskey_per_smbsess; /* whether session key is per smb session */
138 __u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */ 135 __u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */
139 __u32 server_flags; /* sent by server in type 2 ntlmssp exchange */ 136 __u32 server_flags; /* sent by server in type 2 ntlmssp exchange */
140 unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */ 137 unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */
@@ -308,6 +305,9 @@ struct smb_version_operations {
308 int (*create_hardlink)(const unsigned int, struct cifs_tcon *, 305 int (*create_hardlink)(const unsigned int, struct cifs_tcon *,
309 const char *, const char *, 306 const char *, const char *,
310 struct cifs_sb_info *); 307 struct cifs_sb_info *);
308 /* query symlink target */
309 int (*query_symlink)(const unsigned int, struct cifs_tcon *,
310 const char *, char **, struct cifs_sb_info *);
311 /* open a file for non-posix mounts */ 311 /* open a file for non-posix mounts */
312 int (*open)(const unsigned int, struct cifs_open_parms *, 312 int (*open)(const unsigned int, struct cifs_open_parms *,
313 __u32 *, FILE_ALL_INFO *); 313 __u32 *, FILE_ALL_INFO *);
@@ -361,18 +361,24 @@ struct smb_version_operations {
361 /* push brlocks from the cache to the server */ 361 /* push brlocks from the cache to the server */
362 int (*push_mand_locks)(struct cifsFileInfo *); 362 int (*push_mand_locks)(struct cifsFileInfo *);
363 /* get lease key of the inode */ 363 /* get lease key of the inode */
364 void (*get_lease_key)(struct inode *, struct cifs_fid *fid); 364 void (*get_lease_key)(struct inode *, struct cifs_fid *);
365 /* set lease key of the inode */ 365 /* set lease key of the inode */
366 void (*set_lease_key)(struct inode *, struct cifs_fid *fid); 366 void (*set_lease_key)(struct inode *, struct cifs_fid *);
367 /* generate new lease key */ 367 /* generate new lease key */
368 void (*new_lease_key)(struct cifs_fid *fid); 368 void (*new_lease_key)(struct cifs_fid *);
369 /* The next two functions will need to be changed to per smb session */ 369 int (*generate_signingkey)(struct cifs_ses *);
370 void (*generate_signingkey)(struct TCP_Server_Info *server); 370 int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *);
371 int (*calc_signature)(struct smb_rqst *rqst, 371 int (*query_mf_symlink)(const unsigned char *, char *, unsigned int *,
372 struct TCP_Server_Info *server); 372 struct cifs_sb_info *, unsigned int);
373 int (*query_mf_symlink)(const unsigned char *path, char *pbuf, 373 /* if we can do cache read operations */
374 unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb, 374 bool (*is_read_op)(__u32);
375 unsigned int xid); 375 /* set oplock level for the inode */
376 void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int,
377 bool *);
378 /* create lease context buffer for CREATE request */
379 char * (*create_lease_buf)(u8 *, u8);
380 /* parse lease context buffer and return oplock/epoch info */
381 __u8 (*parse_lease_buf)(void *, unsigned int *);
376}; 382};
377 383
378struct smb_version_values { 384struct smb_version_values {
@@ -390,9 +396,9 @@ struct smb_version_values {
390 unsigned int cap_unix; 396 unsigned int cap_unix;
391 unsigned int cap_nt_find; 397 unsigned int cap_nt_find;
392 unsigned int cap_large_files; 398 unsigned int cap_large_files;
393 unsigned int oplock_read;
394 __u16 signing_enabled; 399 __u16 signing_enabled;
395 __u16 signing_required; 400 __u16 signing_required;
401 size_t create_lease_size;
396}; 402};
397 403
398#define HEADER_SIZE(server) (server->vals->header_size) 404#define HEADER_SIZE(server) (server->vals->header_size)
@@ -548,7 +554,6 @@ struct TCP_Server_Info {
548 int timeAdj; /* Adjust for difference in server time zone in sec */ 554 int timeAdj; /* Adjust for difference in server time zone in sec */
549 __u64 CurrentMid; /* multiplex id - rotating counter */ 555 __u64 CurrentMid; /* multiplex id - rotating counter */
550 char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ 556 char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
551 char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */
552 /* 16th byte of RFC1001 workstation name is always null */ 557 /* 16th byte of RFC1001 workstation name is always null */
553 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; 558 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
554 __u32 sequence_number; /* for signing, protected by srv_mutex */ 559 __u32 sequence_number; /* for signing, protected by srv_mutex */
@@ -731,6 +736,7 @@ struct cifs_ses {
731 bool need_reconnect:1; /* connection reset, uid now invalid */ 736 bool need_reconnect:1; /* connection reset, uid now invalid */
732#ifdef CONFIG_CIFS_SMB2 737#ifdef CONFIG_CIFS_SMB2
733 __u16 session_flags; 738 __u16 session_flags;
739 char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */
734#endif /* CONFIG_CIFS_SMB2 */ 740#endif /* CONFIG_CIFS_SMB2 */
735}; 741};
736 742
@@ -935,6 +941,8 @@ struct cifs_fid {
935 __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ 941 __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */
936#endif 942#endif
937 struct cifs_pending_open *pending_open; 943 struct cifs_pending_open *pending_open;
944 unsigned int epoch;
945 bool purge_cache;
938}; 946};
939 947
940struct cifs_fid_locks { 948struct cifs_fid_locks {
@@ -1032,6 +1040,17 @@ cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file)
1032struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file); 1040struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file);
1033void cifsFileInfo_put(struct cifsFileInfo *cifs_file); 1041void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
1034 1042
1043#define CIFS_CACHE_READ_FLG 1
1044#define CIFS_CACHE_HANDLE_FLG 2
1045#define CIFS_CACHE_RH_FLG (CIFS_CACHE_READ_FLG | CIFS_CACHE_HANDLE_FLG)
1046#define CIFS_CACHE_WRITE_FLG 4
1047#define CIFS_CACHE_RW_FLG (CIFS_CACHE_READ_FLG | CIFS_CACHE_WRITE_FLG)
1048#define CIFS_CACHE_RHW_FLG (CIFS_CACHE_RW_FLG | CIFS_CACHE_HANDLE_FLG)
1049
1050#define CIFS_CACHE_READ(cinode) (cinode->oplock & CIFS_CACHE_READ_FLG)
1051#define CIFS_CACHE_HANDLE(cinode) (cinode->oplock & CIFS_CACHE_HANDLE_FLG)
1052#define CIFS_CACHE_WRITE(cinode) (cinode->oplock & CIFS_CACHE_WRITE_FLG)
1053
1035/* 1054/*
1036 * One of these for each file inode 1055 * One of these for each file inode
1037 */ 1056 */
@@ -1043,8 +1062,8 @@ struct cifsInodeInfo {
1043 /* BB add in lists for dirty pages i.e. write caching info for oplock */ 1062 /* BB add in lists for dirty pages i.e. write caching info for oplock */
1044 struct list_head openFileList; 1063 struct list_head openFileList;
1045 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ 1064 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
1046 bool clientCanCacheRead; /* read oplock */ 1065 unsigned int oplock; /* oplock/lease level we have */
1047 bool clientCanCacheAll; /* read and writebehind oplock */ 1066 unsigned int epoch; /* used to track lease state changes */
1048 bool delete_pending; /* DELETE_ON_CLOSE is set */ 1067 bool delete_pending; /* DELETE_ON_CLOSE is set */
1049 bool invalid_mapping; /* pagecache is invalid */ 1068 bool invalid_mapping; /* pagecache is invalid */
1050 unsigned long time; /* jiffies of last update of inode */ 1069 unsigned long time; /* jiffies of last update of inode */
@@ -1502,7 +1521,7 @@ extern mempool_t *cifs_mid_poolp;
1502extern struct smb_version_operations smb1_operations; 1521extern struct smb_version_operations smb1_operations;
1503extern struct smb_version_values smb1_values; 1522extern struct smb_version_values smb1_values;
1504#define SMB20_VERSION_STRING "2.0" 1523#define SMB20_VERSION_STRING "2.0"
1505/*extern struct smb_version_operations smb20_operations; */ /* not needed yet */ 1524extern struct smb_version_operations smb20_operations;
1506extern struct smb_version_values smb20_values; 1525extern struct smb_version_values smb20_values;
1507#define SMB21_VERSION_STRING "2.1" 1526#define SMB21_VERSION_STRING "2.1"
1508extern struct smb_version_operations smb21_operations; 1527extern struct smb_version_operations smb21_operations;
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 11ca24a8e054..948676db8e2e 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1495,11 +1495,12 @@ struct reparse_data {
1495 __u32 ReparseTag; 1495 __u32 ReparseTag;
1496 __u16 ReparseDataLength; 1496 __u16 ReparseDataLength;
1497 __u16 Reserved; 1497 __u16 Reserved;
1498 __u16 AltNameOffset; 1498 __u16 SubstituteNameOffset;
1499 __u16 AltNameLen; 1499 __u16 SubstituteNameLength;
1500 __u16 TargetNameOffset; 1500 __u16 PrintNameOffset;
1501 __u16 TargetNameLen; 1501 __u16 PrintNameLength;
1502 char LinkNamesBuf[1]; 1502 __u32 Flags;
1503 char PathBuffer[0];
1503} __attribute__((packed)); 1504} __attribute__((packed));
1504 1505
1505struct cifs_quota_data { 1506struct cifs_quota_data {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index b29a012bed33..b5ec2a268f56 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -357,13 +357,9 @@ extern int CIFSSMBUnixQuerySymLink(const unsigned int xid,
357 struct cifs_tcon *tcon, 357 struct cifs_tcon *tcon,
358 const unsigned char *searchName, char **syminfo, 358 const unsigned char *searchName, char **syminfo,
359 const struct nls_table *nls_codepage); 359 const struct nls_table *nls_codepage);
360#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL 360extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
361extern int CIFSSMBQueryReparseLinkInfo(const unsigned int xid, 361 __u16 fid, char **symlinkinfo,
362 struct cifs_tcon *tcon, 362 const struct nls_table *nls_codepage);
363 const unsigned char *searchName,
364 char *symlinkinfo, const int buflen, __u16 fid,
365 const struct nls_table *nls_codepage);
366#endif /* temporarily unused until cifs_symlink fixed */
367extern int CIFSSMBOpen(const unsigned int xid, struct cifs_tcon *tcon, 363extern int CIFSSMBOpen(const unsigned int xid, struct cifs_tcon *tcon,
368 const char *fileName, const int disposition, 364 const char *fileName, const int disposition,
369 const int access_flags, const int omode, 365 const int access_flags, const int omode,
@@ -435,7 +431,7 @@ extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *);
435extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *); 431extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
436extern void cifs_crypto_shash_release(struct TCP_Server_Info *); 432extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
437extern int calc_seckey(struct cifs_ses *); 433extern int calc_seckey(struct cifs_ses *);
438extern void generate_smb3signingkey(struct TCP_Server_Info *); 434extern int generate_smb3signingkey(struct cifs_ses *);
439 435
440#ifdef CONFIG_CIFS_WEAK_PW_HASH 436#ifdef CONFIG_CIFS_WEAK_PW_HASH
441extern int calc_lanman_hash(const char *password, const char *cryptkey, 437extern int calc_lanman_hash(const char *password, const char *cryptkey,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index a89c4cb4e6cf..a3d74fea1623 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3067,7 +3067,6 @@ querySymLinkRetry:
3067 return rc; 3067 return rc;
3068} 3068}
3069 3069
3070#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
3071/* 3070/*
3072 * Recent Windows versions now create symlinks more frequently 3071 * Recent Windows versions now create symlinks more frequently
3073 * and they use the "reparse point" mechanism below. We can of course 3072 * and they use the "reparse point" mechanism below. We can of course
@@ -3079,18 +3078,22 @@ querySymLinkRetry:
3079 * it is not compiled in by default until callers fixed up and more tested. 3078 * it is not compiled in by default until callers fixed up and more tested.
3080 */ 3079 */
3081int 3080int
3082CIFSSMBQueryReparseLinkInfo(const unsigned int xid, struct cifs_tcon *tcon, 3081CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
3083 const unsigned char *searchName, 3082 __u16 fid, char **symlinkinfo,
3084 char *symlinkinfo, const int buflen, __u16 fid, 3083 const struct nls_table *nls_codepage)
3085 const struct nls_table *nls_codepage)
3086{ 3084{
3087 int rc = 0; 3085 int rc = 0;
3088 int bytes_returned; 3086 int bytes_returned;
3089 struct smb_com_transaction_ioctl_req *pSMB; 3087 struct smb_com_transaction_ioctl_req *pSMB;
3090 struct smb_com_transaction_ioctl_rsp *pSMBr; 3088 struct smb_com_transaction_ioctl_rsp *pSMBr;
3089 bool is_unicode;
3090 unsigned int sub_len;
3091 char *sub_start;
3092 struct reparse_data *reparse_buf;
3093 __u32 data_offset, data_count;
3094 char *end_of_smb;
3091 3095
3092 cifs_dbg(FYI, "In Windows reparse style QueryLink for path %s\n", 3096 cifs_dbg(FYI, "In Windows reparse style QueryLink for fid %u\n", fid);
3093 searchName);
3094 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, 3097 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
3095 (void **) &pSMBr); 3098 (void **) &pSMBr);
3096 if (rc) 3099 if (rc)
@@ -3119,66 +3122,55 @@ CIFSSMBQueryReparseLinkInfo(const unsigned int xid, struct cifs_tcon *tcon,
3119 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3122 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3120 if (rc) { 3123 if (rc) {
3121 cifs_dbg(FYI, "Send error in QueryReparseLinkInfo = %d\n", rc); 3124 cifs_dbg(FYI, "Send error in QueryReparseLinkInfo = %d\n", rc);
3122 } else { /* decode response */ 3125 goto qreparse_out;
3123 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset); 3126 }
3124 __u32 data_count = le32_to_cpu(pSMBr->DataCount);
3125 if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
3126 /* BB also check enough total bytes returned */
3127 rc = -EIO; /* bad smb */
3128 goto qreparse_out;
3129 }
3130 if (data_count && (data_count < 2048)) {
3131 char *end_of_smb = 2 /* sizeof byte count */ +
3132 get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
3133
3134 struct reparse_data *reparse_buf =
3135 (struct reparse_data *)
3136 ((char *)&pSMBr->hdr.Protocol
3137 + data_offset);
3138 if ((char *)reparse_buf >= end_of_smb) {
3139 rc = -EIO;
3140 goto qreparse_out;
3141 }
3142 if ((reparse_buf->LinkNamesBuf +
3143 reparse_buf->TargetNameOffset +
3144 reparse_buf->TargetNameLen) > end_of_smb) {
3145 cifs_dbg(FYI, "reparse buf beyond SMB\n");
3146 rc = -EIO;
3147 goto qreparse_out;
3148 }
3149 3127
3150 if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { 3128 data_offset = le32_to_cpu(pSMBr->DataOffset);
3151 cifs_from_ucs2(symlinkinfo, (__le16 *) 3129 data_count = le32_to_cpu(pSMBr->DataCount);
3152 (reparse_buf->LinkNamesBuf + 3130 if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
3153 reparse_buf->TargetNameOffset), 3131 /* BB also check enough total bytes returned */
3154 buflen, 3132 rc = -EIO; /* bad smb */
3155 reparse_buf->TargetNameLen, 3133 goto qreparse_out;
3156 nls_codepage, 0); 3134 }
3157 } else { /* ASCII names */ 3135 if (!data_count || (data_count > 2048)) {
3158 strncpy(symlinkinfo, 3136 rc = -EIO;
3159 reparse_buf->LinkNamesBuf + 3137 cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n");
3160 reparse_buf->TargetNameOffset, 3138 goto qreparse_out;
3161 min_t(const int, buflen, 3139 }
3162 reparse_buf->TargetNameLen)); 3140 end_of_smb = 2 + get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
3163 } 3141 reparse_buf = (struct reparse_data *)
3164 } else { 3142 ((char *)&pSMBr->hdr.Protocol + data_offset);
3165 rc = -EIO; 3143 if ((char *)reparse_buf >= end_of_smb) {
3166 cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n"); 3144 rc = -EIO;
3167 } 3145 goto qreparse_out;
3168 symlinkinfo[buflen] = 0; /* just in case so the caller
3169 does not go off the end of the buffer */
3170 cifs_dbg(FYI, "readlink result - %s\n", symlinkinfo);
3171 } 3146 }
3147 if ((reparse_buf->PathBuffer + reparse_buf->PrintNameOffset +
3148 reparse_buf->PrintNameLength) > end_of_smb) {
3149 cifs_dbg(FYI, "reparse buf beyond SMB\n");
3150 rc = -EIO;
3151 goto qreparse_out;
3152 }
3153 sub_start = reparse_buf->SubstituteNameOffset + reparse_buf->PathBuffer;
3154 sub_len = reparse_buf->SubstituteNameLength;
3155 if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
3156 is_unicode = true;
3157 else
3158 is_unicode = false;
3172 3159
3160 /* BB FIXME investigate remapping reserved chars here */
3161 *symlinkinfo = cifs_strndup_from_utf16(sub_start, sub_len, is_unicode,
3162 nls_codepage);
3163 if (!*symlinkinfo)
3164 rc = -ENOMEM;
3173qreparse_out: 3165qreparse_out:
3174 cifs_buf_release(pSMB); 3166 cifs_buf_release(pSMB);
3175 3167
3176 /* Note: On -EAGAIN error only caller can retry on handle based calls 3168 /*
3177 since file handle passed in no longer valid */ 3169 * Note: On -EAGAIN error only caller can retry on handle based calls
3178 3170 * since file handle passed in no longer valid.
3171 */
3179 return rc; 3172 return rc;
3180} 3173}
3181#endif /* CIFS_SYMLINK_EXPERIMENTAL */ /* BB temporarily unused */
3182 3174
3183#ifdef CONFIG_CIFS_POSIX 3175#ifdef CONFIG_CIFS_POSIX
3184 3176
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d67c550c4980..a279ffc0bc29 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -379,6 +379,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
379 try_to_freeze(); 379 try_to_freeze();
380 380
381 /* we should try only the port we connected to before */ 381 /* we should try only the port we connected to before */
382 mutex_lock(&server->srv_mutex);
382 rc = generic_ip_connect(server); 383 rc = generic_ip_connect(server);
383 if (rc) { 384 if (rc) {
384 cifs_dbg(FYI, "reconnect error %d\n", rc); 385 cifs_dbg(FYI, "reconnect error %d\n", rc);
@@ -390,6 +391,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
390 server->tcpStatus = CifsNeedNegotiate; 391 server->tcpStatus = CifsNeedNegotiate;
391 spin_unlock(&GlobalMid_Lock); 392 spin_unlock(&GlobalMid_Lock);
392 } 393 }
394 mutex_unlock(&server->srv_mutex);
393 } while (server->tcpStatus == CifsNeedReconnect); 395 } while (server->tcpStatus == CifsNeedReconnect);
394 396
395 return rc; 397 return rc;
@@ -1114,7 +1116,7 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
1114 break; 1116 break;
1115#ifdef CONFIG_CIFS_SMB2 1117#ifdef CONFIG_CIFS_SMB2
1116 case Smb_20: 1118 case Smb_20:
1117 vol->ops = &smb21_operations; /* currently identical with 2.1 */ 1119 vol->ops = &smb20_operations;
1118 vol->vals = &smb20_values; 1120 vol->vals = &smb20_values;
1119 break; 1121 break;
1120 case Smb_21: 1122 case Smb_21:
@@ -1575,8 +1577,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1575 if (string == NULL) 1577 if (string == NULL)
1576 goto out_nomem; 1578 goto out_nomem;
1577 1579
1578 if (strnlen(string, MAX_USERNAME_SIZE) > 1580 if (strnlen(string, CIFS_MAX_USERNAME_LEN) >
1579 MAX_USERNAME_SIZE) { 1581 CIFS_MAX_USERNAME_LEN) {
1580 printk(KERN_WARNING "CIFS: username too long\n"); 1582 printk(KERN_WARNING "CIFS: username too long\n");
1581 goto cifs_parse_mount_err; 1583 goto cifs_parse_mount_err;
1582 } 1584 }
@@ -2221,13 +2223,13 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol)
2221 /* anything else takes username/password */ 2223 /* anything else takes username/password */
2222 if (strncmp(ses->user_name, 2224 if (strncmp(ses->user_name,
2223 vol->username ? vol->username : "", 2225 vol->username ? vol->username : "",
2224 MAX_USERNAME_SIZE)) 2226 CIFS_MAX_USERNAME_LEN))
2225 return 0; 2227 return 0;
2226 if (strlen(vol->username) != 0 && 2228 if (strlen(vol->username) != 0 &&
2227 ses->password != NULL && 2229 ses->password != NULL &&
2228 strncmp(ses->password, 2230 strncmp(ses->password,
2229 vol->password ? vol->password : "", 2231 vol->password ? vol->password : "",
2230 MAX_PASSWORD_SIZE)) 2232 CIFS_MAX_PASSWORD_LEN))
2231 return 0; 2233 return 0;
2232 } 2234 }
2233 return 1; 2235 return 1;
@@ -2352,7 +2354,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
2352 } 2354 }
2353 2355
2354 len = delim - payload; 2356 len = delim - payload;
2355 if (len > MAX_USERNAME_SIZE || len <= 0) { 2357 if (len > CIFS_MAX_USERNAME_LEN || len <= 0) {
2356 cifs_dbg(FYI, "Bad value from username search (len=%zd)\n", 2358 cifs_dbg(FYI, "Bad value from username search (len=%zd)\n",
2357 len); 2359 len);
2358 rc = -EINVAL; 2360 rc = -EINVAL;
@@ -2369,7 +2371,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
2369 cifs_dbg(FYI, "%s: username=%s\n", __func__, vol->username); 2371 cifs_dbg(FYI, "%s: username=%s\n", __func__, vol->username);
2370 2372
2371 len = key->datalen - (len + 1); 2373 len = key->datalen - (len + 1);
2372 if (len > MAX_PASSWORD_SIZE || len <= 0) { 2374 if (len > CIFS_MAX_PASSWORD_LEN || len <= 0) {
2373 cifs_dbg(FYI, "Bad len for password search (len=%zd)\n", len); 2375 cifs_dbg(FYI, "Bad len for password search (len=%zd)\n", len);
2374 rc = -EINVAL; 2376 rc = -EINVAL;
2375 kfree(vol->username); 2377 kfree(vol->username);
@@ -3826,33 +3828,8 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
3826 if (server->ops->sess_setup) 3828 if (server->ops->sess_setup)
3827 rc = server->ops->sess_setup(xid, ses, nls_info); 3829 rc = server->ops->sess_setup(xid, ses, nls_info);
3828 3830
3829 if (rc) { 3831 if (rc)
3830 cifs_dbg(VFS, "Send error in SessSetup = %d\n", rc); 3832 cifs_dbg(VFS, "Send error in SessSetup = %d\n", rc);
3831 } else {
3832 mutex_lock(&server->srv_mutex);
3833 if (!server->session_estab) {
3834 server->session_key.response = ses->auth_key.response;
3835 server->session_key.len = ses->auth_key.len;
3836 server->sequence_number = 0x2;
3837 server->session_estab = true;
3838 ses->auth_key.response = NULL;
3839 if (server->ops->generate_signingkey)
3840 server->ops->generate_signingkey(server);
3841 }
3842 mutex_unlock(&server->srv_mutex);
3843
3844 cifs_dbg(FYI, "CIFS Session Established successfully\n");
3845 spin_lock(&GlobalMid_Lock);
3846 ses->status = CifsGood;
3847 ses->need_reconnect = false;
3848 spin_unlock(&GlobalMid_Lock);
3849 }
3850
3851 kfree(ses->auth_key.response);
3852 ses->auth_key.response = NULL;
3853 ses->auth_key.len = 0;
3854 kfree(ses->ntlmssp);
3855 ses->ntlmssp = NULL;
3856 3833
3857 return rc; 3834 return rc;
3858} 3835}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index d62ce0d48141..d3e2eaa503a6 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -32,6 +32,7 @@
32#include "cifsproto.h" 32#include "cifsproto.h"
33#include "cifs_debug.h" 33#include "cifs_debug.h"
34#include "cifs_fs_sb.h" 34#include "cifs_fs_sb.h"
35#include "cifs_unicode.h"
35 36
36static void 37static void
37renew_parental_timestamps(struct dentry *direntry) 38renew_parental_timestamps(struct dentry *direntry)
@@ -834,12 +835,17 @@ static int cifs_ci_hash(const struct dentry *dentry, struct qstr *q)
834{ 835{
835 struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls; 836 struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls;
836 unsigned long hash; 837 unsigned long hash;
837 int i; 838 wchar_t c;
839 int i, charlen;
838 840
839 hash = init_name_hash(); 841 hash = init_name_hash();
840 for (i = 0; i < q->len; i++) 842 for (i = 0; i < q->len; i += charlen) {
841 hash = partial_name_hash(nls_tolower(codepage, q->name[i]), 843 charlen = codepage->char2uni(&q->name[i], q->len - i, &c);
842 hash); 844 /* error out if we can't convert the character */
845 if (unlikely(charlen < 0))
846 return charlen;
847 hash = partial_name_hash(cifs_toupper(c), hash);
848 }
843 q->hash = end_name_hash(hash); 849 q->hash = end_name_hash(hash);
844 850
845 return 0; 851 return 0;
@@ -849,11 +855,47 @@ static int cifs_ci_compare(const struct dentry *parent, const struct dentry *den
849 unsigned int len, const char *str, const struct qstr *name) 855 unsigned int len, const char *str, const struct qstr *name)
850{ 856{
851 struct nls_table *codepage = CIFS_SB(parent->d_sb)->local_nls; 857 struct nls_table *codepage = CIFS_SB(parent->d_sb)->local_nls;
858 wchar_t c1, c2;
859 int i, l1, l2;
852 860
853 if ((name->len == len) && 861 /*
854 (nls_strnicmp(codepage, name->name, str, len) == 0)) 862 * We make the assumption here that uppercase characters in the local
855 return 0; 863 * codepage are always the same length as their lowercase counterparts.
856 return 1; 864 *
865 * If that's ever not the case, then this will fail to match it.
866 */
867 if (name->len != len)
868 return 1;
869
870 for (i = 0; i < len; i += l1) {
871 /* Convert characters in both strings to UTF-16. */
872 l1 = codepage->char2uni(&str[i], len - i, &c1);
873 l2 = codepage->char2uni(&name->name[i], name->len - i, &c2);
874
875 /*
876 * If we can't convert either character, just declare it to
877 * be 1 byte long and compare the original byte.
878 */
879 if (unlikely(l1 < 0 && l2 < 0)) {
880 if (str[i] != name->name[i])
881 return 1;
882 l1 = 1;
883 continue;
884 }
885
886 /*
887 * Here, we again ass|u|me that upper/lowercase versions of
888 * a character are the same length in the local NLS.
889 */
890 if (l1 != l2)
891 return 1;
892
893 /* Now compare uppercase versions of these characters */
894 if (cifs_toupper(c1) != cifs_toupper(c2))
895 return 1;
896 }
897
898 return 0;
857} 899}
858 900
859const struct dentry_operations cifs_ci_dentry_ops = { 901const struct dentry_operations cifs_ci_dentry_ops = {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9d0dd952ad79..d044b35ce228 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -313,8 +313,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
313 * If the server returned a read oplock and we have mandatory brlocks, 313 * If the server returned a read oplock and we have mandatory brlocks,
314 * set oplock level to None. 314 * set oplock level to None.
315 */ 315 */
316 if (oplock == server->vals->oplock_read && 316 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
317 cifs_has_mand_locks(cinode)) {
318 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 317 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
319 oplock = 0; 318 oplock = 0;
320 } 319 }
@@ -324,6 +323,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
324 oplock = fid->pending_open->oplock; 323 oplock = fid->pending_open->oplock;
325 list_del(&fid->pending_open->olist); 324 list_del(&fid->pending_open->olist);
326 325
326 fid->purge_cache = false;
327 server->ops->set_fid(cfile, fid, oplock); 327 server->ops->set_fid(cfile, fid, oplock);
328 328
329 list_add(&cfile->tlist, &tcon->openFileList); 329 list_add(&cfile->tlist, &tcon->openFileList);
@@ -334,6 +334,9 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
334 list_add_tail(&cfile->flist, &cinode->openFileList); 334 list_add_tail(&cfile->flist, &cinode->openFileList);
335 spin_unlock(&cifs_file_list_lock); 335 spin_unlock(&cifs_file_list_lock);
336 336
337 if (fid->purge_cache)
338 cifs_invalidate_mapping(inode);
339
337 file->private_data = cfile; 340 file->private_data = cfile;
338 return cfile; 341 return cfile;
339} 342}
@@ -1524,12 +1527,12 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1524 * read won't conflict with non-overlapted locks due to 1527 * read won't conflict with non-overlapted locks due to
1525 * pagereading. 1528 * pagereading.
1526 */ 1529 */
1527 if (!CIFS_I(inode)->clientCanCacheAll && 1530 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1528 CIFS_I(inode)->clientCanCacheRead) { 1531 CIFS_CACHE_READ(CIFS_I(inode))) {
1529 cifs_invalidate_mapping(inode); 1532 cifs_invalidate_mapping(inode);
1530 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", 1533 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1531 inode); 1534 inode);
1532 CIFS_I(inode)->clientCanCacheRead = false; 1535 CIFS_I(inode)->oplock = 0;
1533 } 1536 }
1534 1537
1535 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1538 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
@@ -2213,7 +2216,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2213 cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n", 2216 cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2214 file->f_path.dentry->d_name.name, datasync); 2217 file->f_path.dentry->d_name.name, datasync);
2215 2218
2216 if (!CIFS_I(inode)->clientCanCacheRead) { 2219 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2217 rc = cifs_invalidate_mapping(inode); 2220 rc = cifs_invalidate_mapping(inode);
2218 if (rc) { 2221 if (rc) {
2219 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); 2222 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
@@ -2577,7 +2580,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2577 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 2580 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2578 ssize_t written; 2581 ssize_t written;
2579 2582
2580 if (cinode->clientCanCacheAll) { 2583 if (CIFS_CACHE_WRITE(cinode)) {
2581 if (cap_unix(tcon->ses) && 2584 if (cap_unix(tcon->ses) &&
2582 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 2585 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2583 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2586 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
@@ -2591,7 +2594,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2591 * these pages but not on the region from pos to ppos+len-1. 2594 * these pages but not on the region from pos to ppos+len-1.
2592 */ 2595 */
2593 written = cifs_user_writev(iocb, iov, nr_segs, pos); 2596 written = cifs_user_writev(iocb, iov, nr_segs, pos);
2594 if (written > 0 && cinode->clientCanCacheRead) { 2597 if (written > 0 && CIFS_CACHE_READ(cinode)) {
2595 /* 2598 /*
2596 * Windows 7 server can delay breaking level2 oplock if a write 2599 * Windows 7 server can delay breaking level2 oplock if a write
2597 * request comes - break it on the client to prevent reading 2600 * request comes - break it on the client to prevent reading
@@ -2600,7 +2603,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2600 cifs_invalidate_mapping(inode); 2603 cifs_invalidate_mapping(inode);
2601 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n", 2604 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2602 inode); 2605 inode);
2603 cinode->clientCanCacheRead = false; 2606 cinode->oplock = 0;
2604 } 2607 }
2605 return written; 2608 return written;
2606} 2609}
@@ -2957,7 +2960,7 @@ cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2957 * on pages affected by this read but not on the region from pos to 2960 * on pages affected by this read but not on the region from pos to
2958 * pos+len-1. 2961 * pos+len-1.
2959 */ 2962 */
2960 if (!cinode->clientCanCacheRead) 2963 if (!CIFS_CACHE_READ(cinode))
2961 return cifs_user_readv(iocb, iov, nr_segs, pos); 2964 return cifs_user_readv(iocb, iov, nr_segs, pos);
2962 2965
2963 if (cap_unix(tcon->ses) && 2966 if (cap_unix(tcon->ses) &&
@@ -3093,7 +3096,7 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3093 3096
3094 xid = get_xid(); 3097 xid = get_xid();
3095 3098
3096 if (!CIFS_I(inode)->clientCanCacheRead) { 3099 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3097 rc = cifs_invalidate_mapping(inode); 3100 rc = cifs_invalidate_mapping(inode);
3098 if (rc) 3101 if (rc)
3099 return rc; 3102 return rc;
@@ -3526,7 +3529,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
3526 * is, when the page lies beyond the EOF, or straddles the EOF 3529 * is, when the page lies beyond the EOF, or straddles the EOF
3527 * and the write will cover all of the existing data. 3530 * and the write will cover all of the existing data.
3528 */ 3531 */
3529 if (CIFS_I(mapping->host)->clientCanCacheRead) { 3532 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3530 i_size = i_size_read(mapping->host); 3533 i_size = i_size_read(mapping->host);
3531 if (page_start >= i_size || 3534 if (page_start >= i_size ||
3532 (offset == 0 && (pos + len) >= i_size)) { 3535 (offset == 0 && (pos + len) >= i_size)) {
@@ -3609,20 +3612,20 @@ void cifs_oplock_break(struct work_struct *work)
3609 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 3612 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3610 int rc = 0; 3613 int rc = 0;
3611 3614
3612 if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead && 3615 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3613 cifs_has_mand_locks(cinode)) { 3616 cifs_has_mand_locks(cinode)) {
3614 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", 3617 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3615 inode); 3618 inode);
3616 cinode->clientCanCacheRead = false; 3619 cinode->oplock = 0;
3617 } 3620 }
3618 3621
3619 if (inode && S_ISREG(inode->i_mode)) { 3622 if (inode && S_ISREG(inode->i_mode)) {
3620 if (cinode->clientCanCacheRead) 3623 if (CIFS_CACHE_READ(cinode))
3621 break_lease(inode, O_RDONLY); 3624 break_lease(inode, O_RDONLY);
3622 else 3625 else
3623 break_lease(inode, O_WRONLY); 3626 break_lease(inode, O_WRONLY);
3624 rc = filemap_fdatawrite(inode->i_mapping); 3627 rc = filemap_fdatawrite(inode->i_mapping);
3625 if (cinode->clientCanCacheRead == 0) { 3628 if (!CIFS_CACHE_READ(cinode)) {
3626 rc = filemap_fdatawait(inode->i_mapping); 3629 rc = filemap_fdatawait(inode->i_mapping);
3627 mapping_set_error(inode->i_mapping, rc); 3630 mapping_set_error(inode->i_mapping, rc);
3628 cifs_invalidate_mapping(inode); 3631 cifs_invalidate_mapping(inode);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 449b6cf09b09..e3bb6477c83f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -101,7 +101,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
101 } 101 }
102 102
103 /* don't bother with revalidation if we have an oplock */ 103 /* don't bother with revalidation if we have an oplock */
104 if (cifs_i->clientCanCacheRead) { 104 if (CIFS_CACHE_READ(cifs_i)) {
105 cifs_dbg(FYI, "%s: inode %llu is oplocked\n", 105 cifs_dbg(FYI, "%s: inode %llu is oplocked\n",
106 __func__, cifs_i->uniqueid); 106 __func__, cifs_i->uniqueid);
107 return; 107 return;
@@ -549,6 +549,10 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
549 * when Unix extensions are disabled - fake it. 549 * when Unix extensions are disabled - fake it.
550 */ 550 */
551 fattr->cf_nlink = 2; 551 fattr->cf_nlink = 2;
552 } else if (fattr->cf_cifsattrs & ATTR_REPARSE) {
553 fattr->cf_mode = S_IFLNK;
554 fattr->cf_dtype = DT_LNK;
555 fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
552 } else { 556 } else {
553 fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; 557 fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
554 fattr->cf_dtype = DT_REG; 558 fattr->cf_dtype = DT_REG;
@@ -646,7 +650,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
646 cifs_dbg(FYI, "Getting info on %s\n", full_path); 650 cifs_dbg(FYI, "Getting info on %s\n", full_path);
647 651
648 if ((data == NULL) && (*inode != NULL)) { 652 if ((data == NULL) && (*inode != NULL)) {
649 if (CIFS_I(*inode)->clientCanCacheRead) { 653 if (CIFS_CACHE_READ(CIFS_I(*inode))) {
650 cifs_dbg(FYI, "No need to revalidate cached inode sizes\n"); 654 cifs_dbg(FYI, "No need to revalidate cached inode sizes\n");
651 goto cgii_exit; 655 goto cgii_exit;
652 } 656 }
@@ -1657,7 +1661,7 @@ cifs_inode_needs_reval(struct inode *inode)
1657 struct cifsInodeInfo *cifs_i = CIFS_I(inode); 1661 struct cifsInodeInfo *cifs_i = CIFS_I(inode);
1658 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1662 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1659 1663
1660 if (cifs_i->clientCanCacheRead) 1664 if (CIFS_CACHE_READ(cifs_i))
1661 return false; 1665 return false;
1662 1666
1663 if (!lookupCacheEnabled) 1667 if (!lookupCacheEnabled)
@@ -1800,7 +1804,7 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1800 * We need to be sure that all dirty pages are written and the server 1804 * We need to be sure that all dirty pages are written and the server
1801 * has actual ctime, mtime and file length. 1805 * has actual ctime, mtime and file length.
1802 */ 1806 */
1803 if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping && 1807 if (!CIFS_CACHE_READ(CIFS_I(inode)) && inode->i_mapping &&
1804 inode->i_mapping->nrpages != 0) { 1808 inode->i_mapping->nrpages != 0) {
1805 rc = filemap_fdatawait(inode->i_mapping); 1809 rc = filemap_fdatawait(inode->i_mapping);
1806 if (rc) { 1810 if (rc) {
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 562044f700e5..7e36ceba0c7a 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -509,6 +509,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
509 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 509 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
510 struct tcon_link *tlink = NULL; 510 struct tcon_link *tlink = NULL;
511 struct cifs_tcon *tcon; 511 struct cifs_tcon *tcon;
512 struct TCP_Server_Info *server;
512 513
513 xid = get_xid(); 514 xid = get_xid();
514 515
@@ -519,25 +520,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
519 goto out; 520 goto out;
520 } 521 }
521 tcon = tlink_tcon(tlink); 522 tcon = tlink_tcon(tlink);
522 523 server = tcon->ses->server;
523 /*
524 * For now, we just handle symlinks with unix extensions enabled.
525 * Eventually we should handle NTFS reparse points, and MacOS
526 * symlink support. For instance...
527 *
528 * rc = CIFSSMBQueryReparseLinkInfo(...)
529 *
530 * For now, just return -EACCES when the server doesn't support posix
531 * extensions. Note that we still allow querying symlinks when posix
532 * extensions are manually disabled. We could disable these as well
533 * but there doesn't seem to be any harm in allowing the client to
534 * read them.
535 */
536 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) &&
537 !cap_unix(tcon->ses)) {
538 rc = -EACCES;
539 goto out;
540 }
541 524
542 full_path = build_path_from_dentry(direntry); 525 full_path = build_path_from_dentry(direntry);
543 if (!full_path) 526 if (!full_path)
@@ -559,6 +542,9 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
559 if ((rc != 0) && cap_unix(tcon->ses)) 542 if ((rc != 0) && cap_unix(tcon->ses))
560 rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path, 543 rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path,
561 cifs_sb->local_nls); 544 cifs_sb->local_nls);
545 else if (rc != 0 && server->ops->query_symlink)
546 rc = server->ops->query_symlink(xid, tcon, full_path,
547 &target_path, cifs_sb);
562 548
563 kfree(full_path); 549 kfree(full_path);
564out: 550out:
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index f7d4b2285efe..138a011633fe 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -105,6 +105,7 @@ sesInfoFree(struct cifs_ses *buf_to_free)
105 } 105 }
106 kfree(buf_to_free->user_name); 106 kfree(buf_to_free->user_name);
107 kfree(buf_to_free->domainName); 107 kfree(buf_to_free->domainName);
108 kfree(buf_to_free->auth_key.response);
108 kfree(buf_to_free); 109 kfree(buf_to_free);
109} 110}
110 111
@@ -545,19 +546,15 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
545 oplock &= 0xF; 546 oplock &= 0xF;
546 547
547 if (oplock == OPLOCK_EXCLUSIVE) { 548 if (oplock == OPLOCK_EXCLUSIVE) {
548 cinode->clientCanCacheAll = true; 549 cinode->oplock = CIFS_CACHE_WRITE_FLG | CIFS_CACHE_READ_FLG;
549 cinode->clientCanCacheRead = true;
550 cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n", 550 cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n",
551 &cinode->vfs_inode); 551 &cinode->vfs_inode);
552 } else if (oplock == OPLOCK_READ) { 552 } else if (oplock == OPLOCK_READ) {
553 cinode->clientCanCacheAll = false; 553 cinode->oplock = CIFS_CACHE_READ_FLG;
554 cinode->clientCanCacheRead = true;
555 cifs_dbg(FYI, "Level II Oplock granted on inode %p\n", 554 cifs_dbg(FYI, "Level II Oplock granted on inode %p\n",
556 &cinode->vfs_inode); 555 &cinode->vfs_inode);
557 } else { 556 } else
558 cinode->clientCanCacheAll = false; 557 cinode->oplock = 0;
559 cinode->clientCanCacheRead = false;
560 }
561} 558}
562 559
563bool 560bool
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 69d2c826a23b..42ef03be089f 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -172,6 +172,9 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
172 if (cifs_dfs_is_possible(cifs_sb) && 172 if (cifs_dfs_is_possible(cifs_sb) &&
173 (fattr->cf_cifsattrs & ATTR_REPARSE)) 173 (fattr->cf_cifsattrs & ATTR_REPARSE))
174 fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; 174 fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
175 } else if (fattr->cf_cifsattrs & ATTR_REPARSE) {
176 fattr->cf_mode = S_IFLNK;
177 fattr->cf_dtype = DT_LNK;
175 } else { 178 } else {
176 fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; 179 fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
177 fattr->cf_dtype = DT_REG; 180 fattr->cf_dtype = DT_REG;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 08dd37bb23aa..5f99b7f19e78 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -226,7 +226,7 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
226 *(bcc_ptr+1) = 0; 226 *(bcc_ptr+1) = 0;
227 } else { 227 } else {
228 bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->user_name, 228 bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->user_name,
229 MAX_USERNAME_SIZE, nls_cp); 229 CIFS_MAX_USERNAME_LEN, nls_cp);
230 } 230 }
231 bcc_ptr += 2 * bytes_ret; 231 bcc_ptr += 2 * bytes_ret;
232 bcc_ptr += 2; /* account for null termination */ 232 bcc_ptr += 2; /* account for null termination */
@@ -246,8 +246,8 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
246 /* BB what about null user mounts - check that we do this BB */ 246 /* BB what about null user mounts - check that we do this BB */
247 /* copy user */ 247 /* copy user */
248 if (ses->user_name != NULL) { 248 if (ses->user_name != NULL) {
249 strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE); 249 strncpy(bcc_ptr, ses->user_name, CIFS_MAX_USERNAME_LEN);
250 bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE); 250 bcc_ptr += strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN);
251 } 251 }
252 /* else null user mount */ 252 /* else null user mount */
253 *bcc_ptr = 0; 253 *bcc_ptr = 0;
@@ -428,7 +428,8 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
428 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; 428 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
429 if (ses->server->sign) { 429 if (ses->server->sign) {
430 flags |= NTLMSSP_NEGOTIATE_SIGN; 430 flags |= NTLMSSP_NEGOTIATE_SIGN;
431 if (!ses->server->session_estab) 431 if (!ses->server->session_estab ||
432 ses->ntlmssp->sesskey_per_smbsess)
432 flags |= NTLMSSP_NEGOTIATE_KEY_XCH; 433 flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
433 } 434 }
434 435
@@ -466,7 +467,8 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
466 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; 467 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
467 if (ses->server->sign) { 468 if (ses->server->sign) {
468 flags |= NTLMSSP_NEGOTIATE_SIGN; 469 flags |= NTLMSSP_NEGOTIATE_SIGN;
469 if (!ses->server->session_estab) 470 if (!ses->server->session_estab ||
471 ses->ntlmssp->sesskey_per_smbsess)
470 flags |= NTLMSSP_NEGOTIATE_KEY_XCH; 472 flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
471 } 473 }
472 474
@@ -501,7 +503,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
501 } else { 503 } else {
502 int len; 504 int len;
503 len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName, 505 len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName,
504 MAX_USERNAME_SIZE, nls_cp); 506 CIFS_MAX_USERNAME_LEN, nls_cp);
505 len *= 2; /* unicode is 2 bytes each */ 507 len *= 2; /* unicode is 2 bytes each */
506 sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); 508 sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
507 sec_blob->DomainName.Length = cpu_to_le16(len); 509 sec_blob->DomainName.Length = cpu_to_le16(len);
@@ -517,7 +519,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
517 } else { 519 } else {
518 int len; 520 int len;
519 len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name, 521 len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name,
520 MAX_USERNAME_SIZE, nls_cp); 522 CIFS_MAX_USERNAME_LEN, nls_cp);
521 len *= 2; /* unicode is 2 bytes each */ 523 len *= 2; /* unicode is 2 bytes each */
522 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); 524 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
523 sec_blob->UserName.Length = cpu_to_le16(len); 525 sec_blob->UserName.Length = cpu_to_le16(len);
@@ -629,7 +631,8 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
629 type = select_sectype(ses->server, ses->sectype); 631 type = select_sectype(ses->server, ses->sectype);
630 cifs_dbg(FYI, "sess setup type %d\n", type); 632 cifs_dbg(FYI, "sess setup type %d\n", type);
631 if (type == Unspecified) { 633 if (type == Unspecified) {
632 cifs_dbg(VFS, "Unable to select appropriate authentication method!"); 634 cifs_dbg(VFS,
635 "Unable to select appropriate authentication method!");
633 return -EINVAL; 636 return -EINVAL;
634 } 637 }
635 638
@@ -640,6 +643,8 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
640 ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL); 643 ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
641 if (!ses->ntlmssp) 644 if (!ses->ntlmssp)
642 return -ENOMEM; 645 return -ENOMEM;
646 ses->ntlmssp->sesskey_per_smbsess = false;
647
643 } 648 }
644 649
645ssetup_ntlmssp_authenticate: 650ssetup_ntlmssp_authenticate:
@@ -815,8 +820,9 @@ ssetup_ntlmssp_authenticate:
815 ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, 820 ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
816 GFP_KERNEL); 821 GFP_KERNEL);
817 if (!ses->auth_key.response) { 822 if (!ses->auth_key.response) {
818 cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory", 823 cifs_dbg(VFS,
819 msg->sesskey_len); 824 "Kerberos can't allocate (%u bytes) memory",
825 msg->sesskey_len);
820 rc = -ENOMEM; 826 rc = -ENOMEM;
821 goto ssetup_exit; 827 goto ssetup_exit;
822 } 828 }
@@ -1005,5 +1011,37 @@ ssetup_exit:
1005 if ((phase == NtLmChallenge) && (rc == 0)) 1011 if ((phase == NtLmChallenge) && (rc == 0))
1006 goto ssetup_ntlmssp_authenticate; 1012 goto ssetup_ntlmssp_authenticate;
1007 1013
1014 if (!rc) {
1015 mutex_lock(&ses->server->srv_mutex);
1016 if (!ses->server->session_estab) {
1017 if (ses->server->sign) {
1018 ses->server->session_key.response =
1019 kmemdup(ses->auth_key.response,
1020 ses->auth_key.len, GFP_KERNEL);
1021 if (!ses->server->session_key.response) {
1022 rc = -ENOMEM;
1023 mutex_unlock(&ses->server->srv_mutex);
1024 goto keycp_exit;
1025 }
1026 ses->server->session_key.len =
1027 ses->auth_key.len;
1028 }
1029 ses->server->sequence_number = 0x2;
1030 ses->server->session_estab = true;
1031 }
1032 mutex_unlock(&ses->server->srv_mutex);
1033
1034 cifs_dbg(FYI, "CIFS session established successfully\n");
1035 spin_lock(&GlobalMid_Lock);
1036 ses->status = CifsGood;
1037 ses->need_reconnect = false;
1038 spin_unlock(&GlobalMid_Lock);
1039 }
1040
1041keycp_exit:
1042 kfree(ses->auth_key.response);
1043 ses->auth_key.response = NULL;
1044 kfree(ses->ntlmssp);
1045
1008 return rc; 1046 return rc;
1009} 1047}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 60943978aec3..8233b174de3d 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -700,7 +700,7 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
700 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); 700 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
701 cfile->fid.netfid = fid->netfid; 701 cfile->fid.netfid = fid->netfid;
702 cifs_set_oplock_level(cinode, oplock); 702 cifs_set_oplock_level(cinode, oplock);
703 cinode->can_cache_brlcks = cinode->clientCanCacheAll; 703 cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
704} 704}
705 705
706static void 706static void
@@ -837,7 +837,7 @@ cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
837{ 837{
838 return CIFSSMBLock(0, tcon, fid->netfid, current->tgid, 0, 0, 0, 0, 838 return CIFSSMBLock(0, tcon, fid->netfid, current->tgid, 0, 0, 0, 0,
839 LOCKING_ANDX_OPLOCK_RELEASE, false, 839 LOCKING_ANDX_OPLOCK_RELEASE, false,
840 cinode->clientCanCacheRead ? 1 : 0); 840 CIFS_CACHE_READ(cinode) ? 1 : 0);
841} 841}
842 842
843static int 843static int
@@ -881,6 +881,43 @@ cifs_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
881 (__u8)type, wait, 0); 881 (__u8)type, wait, 0);
882} 882}
883 883
884static int
885cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
886 const char *full_path, char **target_path,
887 struct cifs_sb_info *cifs_sb)
888{
889 int rc;
890 int oplock = 0;
891 __u16 netfid;
892
893 cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
894
895 rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN,
896 FILE_READ_ATTRIBUTES, OPEN_REPARSE_POINT, &netfid,
897 &oplock, NULL, cifs_sb->local_nls,
898 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
899 if (rc)
900 return rc;
901
902 rc = CIFSSMBQuerySymLink(xid, tcon, netfid, target_path,
903 cifs_sb->local_nls);
904 if (rc) {
905 CIFSSMBClose(xid, tcon, netfid);
906 return rc;
907 }
908
909 convert_delimiter(*target_path, '/');
910 CIFSSMBClose(xid, tcon, netfid);
911 cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
912 return rc;
913}
914
915static bool
916cifs_is_read_op(__u32 oplock)
917{
918 return oplock == OPLOCK_READ;
919}
920
884struct smb_version_operations smb1_operations = { 921struct smb_version_operations smb1_operations = {
885 .send_cancel = send_nt_cancel, 922 .send_cancel = send_nt_cancel,
886 .compare_fids = cifs_compare_fids, 923 .compare_fids = cifs_compare_fids,
@@ -927,6 +964,7 @@ struct smb_version_operations smb1_operations = {
927 .rename_pending_delete = cifs_rename_pending_delete, 964 .rename_pending_delete = cifs_rename_pending_delete,
928 .rename = CIFSSMBRename, 965 .rename = CIFSSMBRename,
929 .create_hardlink = CIFSCreateHardLink, 966 .create_hardlink = CIFSCreateHardLink,
967 .query_symlink = cifs_query_symlink,
930 .open = cifs_open_file, 968 .open = cifs_open_file,
931 .set_fid = cifs_set_fid, 969 .set_fid = cifs_set_fid,
932 .close = cifs_close_file, 970 .close = cifs_close_file,
@@ -945,6 +983,7 @@ struct smb_version_operations smb1_operations = {
945 .mand_unlock_range = cifs_unlock_range, 983 .mand_unlock_range = cifs_unlock_range,
946 .push_mand_locks = cifs_push_mandatory_locks, 984 .push_mand_locks = cifs_push_mandatory_locks,
947 .query_mf_symlink = open_query_close_cifs_symlink, 985 .query_mf_symlink = open_query_close_cifs_symlink,
986 .is_read_op = cifs_is_read_op,
948}; 987};
949 988
950struct smb_version_values smb1_values = { 989struct smb_version_values smb1_values = {
@@ -960,7 +999,6 @@ struct smb_version_values smb1_values = {
960 .cap_unix = CAP_UNIX, 999 .cap_unix = CAP_UNIX,
961 .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND, 1000 .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND,
962 .cap_large_files = CAP_LARGE_FILES, 1001 .cap_large_files = CAP_LARGE_FILES,
963 .oplock_read = OPLOCK_READ,
964 .signing_enabled = SECMODE_SIGN_ENABLED, 1002 .signing_enabled = SECMODE_SIGN_ENABLED,
965 .signing_required = SECMODE_SIGN_REQUIRED, 1003 .signing_required = SECMODE_SIGN_REQUIRED,
966}; 1004};
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 04a81a4142c3..3f17b4550831 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -34,29 +34,6 @@
34#include "fscache.h" 34#include "fscache.h"
35#include "smb2proto.h" 35#include "smb2proto.h"
36 36
37void
38smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
39{
40 oplock &= 0xFF;
41 if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
42 return;
43 if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE ||
44 oplock == SMB2_OPLOCK_LEVEL_BATCH) {
45 cinode->clientCanCacheAll = true;
46 cinode->clientCanCacheRead = true;
47 cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n",
48 &cinode->vfs_inode);
49 } else if (oplock == SMB2_OPLOCK_LEVEL_II) {
50 cinode->clientCanCacheAll = false;
51 cinode->clientCanCacheRead = true;
52 cifs_dbg(FYI, "Level II Oplock granted on inode %p\n",
53 &cinode->vfs_inode);
54 } else {
55 cinode->clientCanCacheAll = false;
56 cinode->clientCanCacheRead = false;
57 }
58}
59
60int 37int
61smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, 38smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
62 __u32 *oplock, FILE_ALL_INFO *buf) 39 __u32 *oplock, FILE_ALL_INFO *buf)
@@ -86,7 +63,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
86 if (oparms->tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) 63 if (oparms->tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING)
87 memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE); 64 memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE);
88 65
89 rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data); 66 rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data, NULL);
90 if (rc) 67 if (rc)
91 goto out; 68 goto out;
92 69
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index c6ec1633309a..78ff88c467b9 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -60,7 +60,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
60 oparms.fid = &fid; 60 oparms.fid = &fid;
61 oparms.reconnect = false; 61 oparms.reconnect = false;
62 62
63 rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); 63 rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
64 if (rc) { 64 if (rc) {
65 kfree(utf16_path); 65 kfree(utf16_path);
66 return rc; 66 return rc;
@@ -136,7 +136,8 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
136 return -ENOMEM; 136 return -ENOMEM;
137 137
138 rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path, 138 rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path,
139 FILE_READ_ATTRIBUTES, FILE_OPEN, 0, smb2_data, 139 FILE_READ_ATTRIBUTES, FILE_OPEN,
140 OPEN_REPARSE_POINT, smb2_data,
140 SMB2_OP_QUERY_INFO); 141 SMB2_OP_QUERY_INFO);
141 if (rc) 142 if (rc)
142 goto out; 143 goto out;
@@ -191,8 +192,8 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
191 struct cifs_sb_info *cifs_sb) 192 struct cifs_sb_info *cifs_sb)
192{ 193{
193 return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, 194 return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
194 CREATE_DELETE_ON_CLOSE, NULL, 195 CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
195 SMB2_OP_DELETE); 196 NULL, SMB2_OP_DELETE);
196} 197}
197 198
198static int 199static int
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index b0c43345cd98..fb3966265b6e 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -171,6 +171,10 @@ smb2_check_message(char *buf, unsigned int length)
171 if (4 + len != clc_len) { 171 if (4 + len != clc_len) {
172 cifs_dbg(FYI, "Calculated size %u length %u mismatch mid %llu\n", 172 cifs_dbg(FYI, "Calculated size %u length %u mismatch mid %llu\n",
173 clc_len, 4 + len, mid); 173 clc_len, 4 + len, mid);
174 /* create failed on symlink */
175 if (command == SMB2_CREATE_HE &&
176 hdr->Status == STATUS_STOPPED_ON_SYMLINK)
177 return 0;
174 /* Windows 7 server returns 24 bytes more */ 178 /* Windows 7 server returns 24 bytes more */
175 if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE) 179 if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE)
176 return 0; 180 return 0;
@@ -376,23 +380,15 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb)
376__le32 380__le32
377smb2_get_lease_state(struct cifsInodeInfo *cinode) 381smb2_get_lease_state(struct cifsInodeInfo *cinode)
378{ 382{
379 if (cinode->clientCanCacheAll) 383 __le32 lease = 0;
380 return SMB2_LEASE_WRITE_CACHING | SMB2_LEASE_READ_CACHING; 384
381 else if (cinode->clientCanCacheRead) 385 if (CIFS_CACHE_WRITE(cinode))
382 return SMB2_LEASE_READ_CACHING; 386 lease |= SMB2_LEASE_WRITE_CACHING;
383 return 0; 387 if (CIFS_CACHE_HANDLE(cinode))
384} 388 lease |= SMB2_LEASE_HANDLE_CACHING;
385 389 if (CIFS_CACHE_READ(cinode))
386__u8 smb2_map_lease_to_oplock(__le32 lease_state) 390 lease |= SMB2_LEASE_READ_CACHING;
387{ 391 return lease;
388 if (lease_state & SMB2_LEASE_WRITE_CACHING) {
389 if (lease_state & SMB2_LEASE_HANDLE_CACHING)
390 return SMB2_OPLOCK_LEVEL_BATCH;
391 else
392 return SMB2_OPLOCK_LEVEL_EXCLUSIVE;
393 } else if (lease_state & SMB2_LEASE_READ_CACHING)
394 return SMB2_OPLOCK_LEVEL_II;
395 return 0;
396} 392}
397 393
398struct smb2_lease_break_work { 394struct smb2_lease_break_work {
@@ -417,96 +413,109 @@ cifs_ses_oplock_break(struct work_struct *work)
417} 413}
418 414
419static bool 415static bool
420smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) 416smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
417 struct smb2_lease_break_work *lw)
421{ 418{
422 struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer; 419 bool found;
423 struct list_head *tmp, *tmp1, *tmp2; 420 __u8 lease_state;
424 struct cifs_ses *ses; 421 struct list_head *tmp;
425 struct cifs_tcon *tcon;
426 struct cifsInodeInfo *cinode;
427 struct cifsFileInfo *cfile; 422 struct cifsFileInfo *cfile;
423 struct TCP_Server_Info *server = tcon->ses->server;
428 struct cifs_pending_open *open; 424 struct cifs_pending_open *open;
429 struct smb2_lease_break_work *lw; 425 struct cifsInodeInfo *cinode;
430 bool found;
431 int ack_req = le32_to_cpu(rsp->Flags & 426 int ack_req = le32_to_cpu(rsp->Flags &
432 SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED); 427 SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED);
433 428
434 lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); 429 lease_state = le32_to_cpu(rsp->NewLeaseState);
435 if (!lw)
436 return false;
437 430
438 INIT_WORK(&lw->lease_break, cifs_ses_oplock_break); 431 list_for_each(tmp, &tcon->openFileList) {
439 lw->lease_state = rsp->NewLeaseState; 432 cfile = list_entry(tmp, struct cifsFileInfo, tlist);
433 cinode = CIFS_I(cfile->dentry->d_inode);
440 434
441 cifs_dbg(FYI, "Checking for lease break\n"); 435 if (memcmp(cinode->lease_key, rsp->LeaseKey,
436 SMB2_LEASE_KEY_SIZE))
437 continue;
442 438
443 /* look up tcon based on tid & uid */ 439 cifs_dbg(FYI, "found in the open list\n");
444 spin_lock(&cifs_tcp_ses_lock); 440 cifs_dbg(FYI, "lease key match, lease break 0x%d\n",
445 list_for_each(tmp, &server->smb_ses_list) { 441 le32_to_cpu(rsp->NewLeaseState));
446 ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
447 442
448 spin_lock(&cifs_file_list_lock); 443 server->ops->set_oplock_level(cinode, lease_state, 0, NULL);
449 list_for_each(tmp1, &ses->tcon_list) {
450 tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
451 444
452 cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); 445 if (ack_req)
453 list_for_each(tmp2, &tcon->openFileList) { 446 cfile->oplock_break_cancelled = false;
454 cfile = list_entry(tmp2, struct cifsFileInfo, 447 else
455 tlist); 448 cfile->oplock_break_cancelled = true;
456 cinode = CIFS_I(cfile->dentry->d_inode);
457 449
458 if (memcmp(cinode->lease_key, rsp->LeaseKey, 450 queue_work(cifsiod_wq, &cfile->oplock_break);
459 SMB2_LEASE_KEY_SIZE)) 451 kfree(lw);
460 continue; 452 return true;
453 }
461 454
462 cifs_dbg(FYI, "found in the open list\n"); 455 found = false;
463 cifs_dbg(FYI, "lease key match, lease break 0x%d\n", 456 list_for_each_entry(open, &tcon->pending_opens, olist) {
464 le32_to_cpu(rsp->NewLeaseState)); 457 if (memcmp(open->lease_key, rsp->LeaseKey,
458 SMB2_LEASE_KEY_SIZE))
459 continue;
460
461 if (!found && ack_req) {
462 found = true;
463 memcpy(lw->lease_key, open->lease_key,
464 SMB2_LEASE_KEY_SIZE);
465 lw->tlink = cifs_get_tlink(open->tlink);
466 queue_work(cifsiod_wq, &lw->lease_break);
467 }
465 468
466 smb2_set_oplock_level(cinode, 469 cifs_dbg(FYI, "found in the pending open list\n");
467 smb2_map_lease_to_oplock(rsp->NewLeaseState)); 470 cifs_dbg(FYI, "lease key match, lease break 0x%d\n",
471 le32_to_cpu(rsp->NewLeaseState));
468 472
469 if (ack_req) 473 open->oplock = lease_state;
470 cfile->oplock_break_cancelled = false; 474 }
471 else 475 return found;
472 cfile->oplock_break_cancelled = true; 476}
473 477
474 queue_work(cifsiod_wq, &cfile->oplock_break); 478static bool
479smb2_is_valid_lease_break(char *buffer)
480{
481 struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer;
482 struct list_head *tmp, *tmp1, *tmp2;
483 struct TCP_Server_Info *server;
484 struct cifs_ses *ses;
485 struct cifs_tcon *tcon;
486 struct smb2_lease_break_work *lw;
475 487
476 spin_unlock(&cifs_file_list_lock); 488 lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL);
477 spin_unlock(&cifs_tcp_ses_lock); 489 if (!lw)
478 return true; 490 return false;
479 }
480 491
481 found = false; 492 INIT_WORK(&lw->lease_break, cifs_ses_oplock_break);
482 list_for_each_entry(open, &tcon->pending_opens, olist) { 493 lw->lease_state = rsp->NewLeaseState;
483 if (memcmp(open->lease_key, rsp->LeaseKey,
484 SMB2_LEASE_KEY_SIZE))
485 continue;
486 494
487 if (!found && ack_req) { 495 cifs_dbg(FYI, "Checking for lease break\n");
488 found = true;
489 memcpy(lw->lease_key, open->lease_key,
490 SMB2_LEASE_KEY_SIZE);
491 lw->tlink = cifs_get_tlink(open->tlink);
492 queue_work(cifsiod_wq,
493 &lw->lease_break);
494 }
495 496
496 cifs_dbg(FYI, "found in the pending open list\n"); 497 /* look up tcon based on tid & uid */
497 cifs_dbg(FYI, "lease key match, lease break 0x%d\n", 498 spin_lock(&cifs_tcp_ses_lock);
498 le32_to_cpu(rsp->NewLeaseState)); 499 list_for_each(tmp, &cifs_tcp_ses_list) {
500 server = list_entry(tmp, struct TCP_Server_Info, tcp_ses_list);
499 501
500 open->oplock = 502 list_for_each(tmp1, &server->smb_ses_list) {
501 smb2_map_lease_to_oplock(rsp->NewLeaseState); 503 ses = list_entry(tmp1, struct cifs_ses, smb_ses_list);
502 } 504
503 if (found) { 505 spin_lock(&cifs_file_list_lock);
504 spin_unlock(&cifs_file_list_lock); 506 list_for_each(tmp2, &ses->tcon_list) {
505 spin_unlock(&cifs_tcp_ses_lock); 507 tcon = list_entry(tmp2, struct cifs_tcon,
506 return true; 508 tcon_list);
509 cifs_stats_inc(
510 &tcon->stats.cifs_stats.num_oplock_brks);
511 if (smb2_tcon_has_lease(tcon, rsp, lw)) {
512 spin_unlock(&cifs_file_list_lock);
513 spin_unlock(&cifs_tcp_ses_lock);
514 return true;
515 }
507 } 516 }
517 spin_unlock(&cifs_file_list_lock);
508 } 518 }
509 spin_unlock(&cifs_file_list_lock);
510 } 519 }
511 spin_unlock(&cifs_tcp_ses_lock); 520 spin_unlock(&cifs_tcp_ses_lock);
512 kfree(lw); 521 kfree(lw);
@@ -532,7 +541,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
532 if (rsp->StructureSize != 541 if (rsp->StructureSize !=
533 smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { 542 smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) {
534 if (le16_to_cpu(rsp->StructureSize) == 44) 543 if (le16_to_cpu(rsp->StructureSize) == 44)
535 return smb2_is_valid_lease_break(buffer, server); 544 return smb2_is_valid_lease_break(buffer);
536 else 545 else
537 return false; 546 return false;
538 } 547 }
@@ -560,14 +569,15 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
560 cifs_dbg(FYI, "file id match, oplock break\n"); 569 cifs_dbg(FYI, "file id match, oplock break\n");
561 cinode = CIFS_I(cfile->dentry->d_inode); 570 cinode = CIFS_I(cfile->dentry->d_inode);
562 571
563 if (!cinode->clientCanCacheAll && 572 if (!CIFS_CACHE_WRITE(cinode) &&
564 rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE) 573 rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE)
565 cfile->oplock_break_cancelled = true; 574 cfile->oplock_break_cancelled = true;
566 else 575 else
567 cfile->oplock_break_cancelled = false; 576 cfile->oplock_break_cancelled = false;
568 577
569 smb2_set_oplock_level(cinode, 578 server->ops->set_oplock_level(cinode,
570 rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0); 579 rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0,
580 0, NULL);
571 581
572 queue_work(cifsiod_wq, &cfile->oplock_break); 582 queue_work(cifsiod_wq, &cfile->oplock_break);
573 583
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index f259e6cc8357..861b33214144 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -24,6 +24,7 @@
24#include "smb2proto.h" 24#include "smb2proto.h"
25#include "cifsproto.h" 25#include "cifsproto.h"
26#include "cifs_debug.h" 26#include "cifs_debug.h"
27#include "cifs_unicode.h"
27#include "smb2status.h" 28#include "smb2status.h"
28#include "smb2glob.h" 29#include "smb2glob.h"
29 30
@@ -229,7 +230,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
229 oparms.fid = &fid; 230 oparms.fid = &fid;
230 oparms.reconnect = false; 231 oparms.reconnect = false;
231 232
232 rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); 233 rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
233 if (rc) { 234 if (rc) {
234 kfree(utf16_path); 235 kfree(utf16_path);
235 return rc; 236 return rc;
@@ -376,10 +377,13 @@ static void
376smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) 377smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
377{ 378{
378 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); 379 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
380 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
381
379 cfile->fid.persistent_fid = fid->persistent_fid; 382 cfile->fid.persistent_fid = fid->persistent_fid;
380 cfile->fid.volatile_fid = fid->volatile_fid; 383 cfile->fid.volatile_fid = fid->volatile_fid;
381 smb2_set_oplock_level(cinode, oplock); 384 server->ops->set_oplock_level(cinode, oplock, fid->epoch,
382 cinode->can_cache_brlcks = cinode->clientCanCacheAll; 385 &fid->purge_cache);
386 cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
383} 387}
384 388
385static void 389static void
@@ -463,7 +467,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
463 oparms.fid = fid; 467 oparms.fid = fid;
464 oparms.reconnect = false; 468 oparms.reconnect = false;
465 469
466 rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); 470 rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
467 kfree(utf16_path); 471 kfree(utf16_path);
468 if (rc) { 472 if (rc) {
469 cifs_dbg(VFS, "open dir failed\n"); 473 cifs_dbg(VFS, "open dir failed\n");
@@ -530,7 +534,7 @@ smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
530 534
531 return SMB2_oplock_break(0, tcon, fid->persistent_fid, 535 return SMB2_oplock_break(0, tcon, fid->persistent_fid,
532 fid->volatile_fid, 536 fid->volatile_fid,
533 cinode->clientCanCacheRead ? 1 : 0); 537 CIFS_CACHE_READ(cinode) ? 1 : 0);
534} 538}
535 539
536static int 540static int
@@ -550,7 +554,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
550 oparms.fid = &fid; 554 oparms.fid = &fid;
551 oparms.reconnect = false; 555 oparms.reconnect = false;
552 556
553 rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL); 557 rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL);
554 if (rc) 558 if (rc)
555 return rc; 559 return rc;
556 buf->f_type = SMB2_MAGIC_NUMBER; 560 buf->f_type = SMB2_MAGIC_NUMBER;
@@ -596,7 +600,245 @@ smb2_new_lease_key(struct cifs_fid *fid)
596 get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE); 600 get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE);
597} 601}
598 602
599struct smb_version_operations smb21_operations = { 603static int
604smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
605 const char *full_path, char **target_path,
606 struct cifs_sb_info *cifs_sb)
607{
608 int rc;
609 __le16 *utf16_path;
610 __u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
611 struct cifs_open_parms oparms;
612 struct cifs_fid fid;
613 struct smb2_err_rsp *err_buf = NULL;
614 struct smb2_symlink_err_rsp *symlink;
615 unsigned int sub_len, sub_offset;
616
617 cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
618
619 utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
620 if (!utf16_path)
621 return -ENOMEM;
622
623 oparms.tcon = tcon;
624 oparms.desired_access = FILE_READ_ATTRIBUTES;
625 oparms.disposition = FILE_OPEN;
626 oparms.create_options = 0;
627 oparms.fid = &fid;
628 oparms.reconnect = false;
629
630 rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_buf);
631
632 if (!rc || !err_buf) {
633 kfree(utf16_path);
634 return -ENOENT;
635 }
636 /* open must fail on symlink - reset rc */
637 rc = 0;
638 symlink = (struct smb2_symlink_err_rsp *)err_buf->ErrorData;
639 sub_len = le16_to_cpu(symlink->SubstituteNameLength);
640 sub_offset = le16_to_cpu(symlink->SubstituteNameOffset);
641 *target_path = cifs_strndup_from_utf16(
642 (char *)symlink->PathBuffer + sub_offset,
643 sub_len, true, cifs_sb->local_nls);
644 if (!(*target_path)) {
645 kfree(utf16_path);
646 return -ENOMEM;
647 }
648 convert_delimiter(*target_path, '/');
649 cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
650 kfree(utf16_path);
651 return rc;
652}
653
654static void
655smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
656 unsigned int epoch, bool *purge_cache)
657{
658 oplock &= 0xFF;
659 if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
660 return;
661 if (oplock == SMB2_OPLOCK_LEVEL_BATCH) {
662 cinode->oplock = CIFS_CACHE_RHW_FLG;
663 cifs_dbg(FYI, "Batch Oplock granted on inode %p\n",
664 &cinode->vfs_inode);
665 } else if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
666 cinode->oplock = CIFS_CACHE_RW_FLG;
667 cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n",
668 &cinode->vfs_inode);
669 } else if (oplock == SMB2_OPLOCK_LEVEL_II) {
670 cinode->oplock = CIFS_CACHE_READ_FLG;
671 cifs_dbg(FYI, "Level II Oplock granted on inode %p\n",
672 &cinode->vfs_inode);
673 } else
674 cinode->oplock = 0;
675}
676
677static void
678smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
679 unsigned int epoch, bool *purge_cache)
680{
681 char message[5] = {0};
682
683 oplock &= 0xFF;
684 if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
685 return;
686
687 cinode->oplock = 0;
688 if (oplock & SMB2_LEASE_READ_CACHING_HE) {
689 cinode->oplock |= CIFS_CACHE_READ_FLG;
690 strcat(message, "R");
691 }
692 if (oplock & SMB2_LEASE_HANDLE_CACHING_HE) {
693 cinode->oplock |= CIFS_CACHE_HANDLE_FLG;
694 strcat(message, "H");
695 }
696 if (oplock & SMB2_LEASE_WRITE_CACHING_HE) {
697 cinode->oplock |= CIFS_CACHE_WRITE_FLG;
698 strcat(message, "W");
699 }
700 if (!cinode->oplock)
701 strcat(message, "None");
702 cifs_dbg(FYI, "%s Lease granted on inode %p\n", message,
703 &cinode->vfs_inode);
704}
705
706static void
707smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
708 unsigned int epoch, bool *purge_cache)
709{
710 unsigned int old_oplock = cinode->oplock;
711
712 smb21_set_oplock_level(cinode, oplock, epoch, purge_cache);
713
714 if (purge_cache) {
715 *purge_cache = false;
716 if (old_oplock == CIFS_CACHE_READ_FLG) {
717 if (cinode->oplock == CIFS_CACHE_READ_FLG &&
718 (epoch - cinode->epoch > 0))
719 *purge_cache = true;
720 else if (cinode->oplock == CIFS_CACHE_RH_FLG &&
721 (epoch - cinode->epoch > 1))
722 *purge_cache = true;
723 else if (cinode->oplock == CIFS_CACHE_RHW_FLG &&
724 (epoch - cinode->epoch > 1))
725 *purge_cache = true;
726 else if (cinode->oplock == 0 &&
727 (epoch - cinode->epoch > 0))
728 *purge_cache = true;
729 } else if (old_oplock == CIFS_CACHE_RH_FLG) {
730 if (cinode->oplock == CIFS_CACHE_RH_FLG &&
731 (epoch - cinode->epoch > 0))
732 *purge_cache = true;
733 else if (cinode->oplock == CIFS_CACHE_RHW_FLG &&
734 (epoch - cinode->epoch > 1))
735 *purge_cache = true;
736 }
737 cinode->epoch = epoch;
738 }
739}
740
741static bool
742smb2_is_read_op(__u32 oplock)
743{
744 return oplock == SMB2_OPLOCK_LEVEL_II;
745}
746
747static bool
748smb21_is_read_op(__u32 oplock)
749{
750 return (oplock & SMB2_LEASE_READ_CACHING_HE) &&
751 !(oplock & SMB2_LEASE_WRITE_CACHING_HE);
752}
753
754static __le32
755map_oplock_to_lease(u8 oplock)
756{
757 if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE)
758 return SMB2_LEASE_WRITE_CACHING | SMB2_LEASE_READ_CACHING;
759 else if (oplock == SMB2_OPLOCK_LEVEL_II)
760 return SMB2_LEASE_READ_CACHING;
761 else if (oplock == SMB2_OPLOCK_LEVEL_BATCH)
762 return SMB2_LEASE_HANDLE_CACHING | SMB2_LEASE_READ_CACHING |
763 SMB2_LEASE_WRITE_CACHING;
764 return 0;
765}
766
767static char *
768smb2_create_lease_buf(u8 *lease_key, u8 oplock)
769{
770 struct create_lease *buf;
771
772 buf = kzalloc(sizeof(struct create_lease), GFP_KERNEL);
773 if (!buf)
774 return NULL;
775
776 buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key));
777 buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8)));
778 buf->lcontext.LeaseState = map_oplock_to_lease(oplock);
779
780 buf->ccontext.DataOffset = cpu_to_le16(offsetof
781 (struct create_lease, lcontext));
782 buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context));
783 buf->ccontext.NameOffset = cpu_to_le16(offsetof
784 (struct create_lease, Name));
785 buf->ccontext.NameLength = cpu_to_le16(4);
786 buf->Name[0] = 'R';
787 buf->Name[1] = 'q';
788 buf->Name[2] = 'L';
789 buf->Name[3] = 's';
790 return (char *)buf;
791}
792
793static char *
794smb3_create_lease_buf(u8 *lease_key, u8 oplock)
795{
796 struct create_lease_v2 *buf;
797
798 buf = kzalloc(sizeof(struct create_lease_v2), GFP_KERNEL);
799 if (!buf)
800 return NULL;
801
802 buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key));
803 buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8)));
804 buf->lcontext.LeaseState = map_oplock_to_lease(oplock);
805
806 buf->ccontext.DataOffset = cpu_to_le16(offsetof
807 (struct create_lease_v2, lcontext));
808 buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context_v2));
809 buf->ccontext.NameOffset = cpu_to_le16(offsetof
810 (struct create_lease_v2, Name));
811 buf->ccontext.NameLength = cpu_to_le16(4);
812 buf->Name[0] = 'R';
813 buf->Name[1] = 'q';
814 buf->Name[2] = 'L';
815 buf->Name[3] = 's';
816 return (char *)buf;
817}
818
819static __u8
820smb2_parse_lease_buf(void *buf, unsigned int *epoch)
821{
822 struct create_lease *lc = (struct create_lease *)buf;
823
824 *epoch = 0; /* not used */
825 if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS)
826 return SMB2_OPLOCK_LEVEL_NOCHANGE;
827 return le32_to_cpu(lc->lcontext.LeaseState);
828}
829
830static __u8
831smb3_parse_lease_buf(void *buf, unsigned int *epoch)
832{
833 struct create_lease_v2 *lc = (struct create_lease_v2 *)buf;
834
835 *epoch = le16_to_cpu(lc->lcontext.Epoch);
836 if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS)
837 return SMB2_OPLOCK_LEVEL_NOCHANGE;
838 return le32_to_cpu(lc->lcontext.LeaseState);
839}
840
841struct smb_version_operations smb20_operations = {
600 .compare_fids = smb2_compare_fids, 842 .compare_fids = smb2_compare_fids,
601 .setup_request = smb2_setup_request, 843 .setup_request = smb2_setup_request,
602 .setup_async_request = smb2_setup_async_request, 844 .setup_async_request = smb2_setup_async_request,
@@ -638,6 +880,7 @@ struct smb_version_operations smb21_operations = {
638 .unlink = smb2_unlink, 880 .unlink = smb2_unlink,
639 .rename = smb2_rename_path, 881 .rename = smb2_rename_path,
640 .create_hardlink = smb2_create_hardlink, 882 .create_hardlink = smb2_create_hardlink,
883 .query_symlink = smb2_query_symlink,
641 .open = smb2_open_file, 884 .open = smb2_open_file,
642 .set_fid = smb2_set_fid, 885 .set_fid = smb2_set_fid,
643 .close = smb2_close_file, 886 .close = smb2_close_file,
@@ -660,8 +903,82 @@ struct smb_version_operations smb21_operations = {
660 .set_lease_key = smb2_set_lease_key, 903 .set_lease_key = smb2_set_lease_key,
661 .new_lease_key = smb2_new_lease_key, 904 .new_lease_key = smb2_new_lease_key,
662 .calc_signature = smb2_calc_signature, 905 .calc_signature = smb2_calc_signature,
906 .is_read_op = smb2_is_read_op,
907 .set_oplock_level = smb2_set_oplock_level,
908 .create_lease_buf = smb2_create_lease_buf,
909 .parse_lease_buf = smb2_parse_lease_buf,
663}; 910};
664 911
912struct smb_version_operations smb21_operations = {
913 .compare_fids = smb2_compare_fids,
914 .setup_request = smb2_setup_request,
915 .setup_async_request = smb2_setup_async_request,
916 .check_receive = smb2_check_receive,
917 .add_credits = smb2_add_credits,
918 .set_credits = smb2_set_credits,
919 .get_credits_field = smb2_get_credits_field,
920 .get_credits = smb2_get_credits,
921 .get_next_mid = smb2_get_next_mid,
922 .read_data_offset = smb2_read_data_offset,
923 .read_data_length = smb2_read_data_length,
924 .map_error = map_smb2_to_linux_error,
925 .find_mid = smb2_find_mid,
926 .check_message = smb2_check_message,
927 .dump_detail = smb2_dump_detail,
928 .clear_stats = smb2_clear_stats,
929 .print_stats = smb2_print_stats,
930 .is_oplock_break = smb2_is_valid_oplock_break,
931 .need_neg = smb2_need_neg,
932 .negotiate = smb2_negotiate,
933 .negotiate_wsize = smb2_negotiate_wsize,
934 .negotiate_rsize = smb2_negotiate_rsize,
935 .sess_setup = SMB2_sess_setup,
936 .logoff = SMB2_logoff,
937 .tree_connect = SMB2_tcon,
938 .tree_disconnect = SMB2_tdis,
939 .is_path_accessible = smb2_is_path_accessible,
940 .can_echo = smb2_can_echo,
941 .echo = SMB2_echo,
942 .query_path_info = smb2_query_path_info,
943 .get_srv_inum = smb2_get_srv_inum,
944 .query_file_info = smb2_query_file_info,
945 .set_path_size = smb2_set_path_size,
946 .set_file_size = smb2_set_file_size,
947 .set_file_info = smb2_set_file_info,
948 .mkdir = smb2_mkdir,
949 .mkdir_setinfo = smb2_mkdir_setinfo,
950 .rmdir = smb2_rmdir,
951 .unlink = smb2_unlink,
952 .rename = smb2_rename_path,
953 .create_hardlink = smb2_create_hardlink,
954 .query_symlink = smb2_query_symlink,
955 .open = smb2_open_file,
956 .set_fid = smb2_set_fid,
957 .close = smb2_close_file,
958 .flush = smb2_flush_file,
959 .async_readv = smb2_async_readv,
960 .async_writev = smb2_async_writev,
961 .sync_read = smb2_sync_read,
962 .sync_write = smb2_sync_write,
963 .query_dir_first = smb2_query_dir_first,
964 .query_dir_next = smb2_query_dir_next,
965 .close_dir = smb2_close_dir,
966 .calc_smb_size = smb2_calc_size,
967 .is_status_pending = smb2_is_status_pending,
968 .oplock_response = smb2_oplock_response,
969 .queryfs = smb2_queryfs,
970 .mand_lock = smb2_mand_lock,
971 .mand_unlock_range = smb2_unlock_range,
972 .push_mand_locks = smb2_push_mandatory_locks,
973 .get_lease_key = smb2_get_lease_key,
974 .set_lease_key = smb2_set_lease_key,
975 .new_lease_key = smb2_new_lease_key,
976 .calc_signature = smb2_calc_signature,
977 .is_read_op = smb21_is_read_op,
978 .set_oplock_level = smb21_set_oplock_level,
979 .create_lease_buf = smb2_create_lease_buf,
980 .parse_lease_buf = smb2_parse_lease_buf,
981};
665 982
666struct smb_version_operations smb30_operations = { 983struct smb_version_operations smb30_operations = {
667 .compare_fids = smb2_compare_fids, 984 .compare_fids = smb2_compare_fids,
@@ -706,6 +1023,7 @@ struct smb_version_operations smb30_operations = {
706 .unlink = smb2_unlink, 1023 .unlink = smb2_unlink,
707 .rename = smb2_rename_path, 1024 .rename = smb2_rename_path,
708 .create_hardlink = smb2_create_hardlink, 1025 .create_hardlink = smb2_create_hardlink,
1026 .query_symlink = smb2_query_symlink,
709 .open = smb2_open_file, 1027 .open = smb2_open_file,
710 .set_fid = smb2_set_fid, 1028 .set_fid = smb2_set_fid,
711 .close = smb2_close_file, 1029 .close = smb2_close_file,
@@ -729,6 +1047,10 @@ struct smb_version_operations smb30_operations = {
729 .new_lease_key = smb2_new_lease_key, 1047 .new_lease_key = smb2_new_lease_key,
730 .generate_signingkey = generate_smb3signingkey, 1048 .generate_signingkey = generate_smb3signingkey,
731 .calc_signature = smb3_calc_signature, 1049 .calc_signature = smb3_calc_signature,
1050 .is_read_op = smb21_is_read_op,
1051 .set_oplock_level = smb3_set_oplock_level,
1052 .create_lease_buf = smb3_create_lease_buf,
1053 .parse_lease_buf = smb3_parse_lease_buf,
732}; 1054};
733 1055
734struct smb_version_values smb20_values = { 1056struct smb_version_values smb20_values = {
@@ -746,9 +1068,9 @@ struct smb_version_values smb20_values = {
746 .cap_unix = 0, 1068 .cap_unix = 0,
747 .cap_nt_find = SMB2_NT_FIND, 1069 .cap_nt_find = SMB2_NT_FIND,
748 .cap_large_files = SMB2_LARGE_FILES, 1070 .cap_large_files = SMB2_LARGE_FILES,
749 .oplock_read = SMB2_OPLOCK_LEVEL_II,
750 .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, 1071 .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
751 .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, 1072 .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
1073 .create_lease_size = sizeof(struct create_lease),
752}; 1074};
753 1075
754struct smb_version_values smb21_values = { 1076struct smb_version_values smb21_values = {
@@ -766,9 +1088,9 @@ struct smb_version_values smb21_values = {
766 .cap_unix = 0, 1088 .cap_unix = 0,
767 .cap_nt_find = SMB2_NT_FIND, 1089 .cap_nt_find = SMB2_NT_FIND,
768 .cap_large_files = SMB2_LARGE_FILES, 1090 .cap_large_files = SMB2_LARGE_FILES,
769 .oplock_read = SMB2_OPLOCK_LEVEL_II,
770 .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, 1091 .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
771 .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, 1092 .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
1093 .create_lease_size = sizeof(struct create_lease),
772}; 1094};
773 1095
774struct smb_version_values smb30_values = { 1096struct smb_version_values smb30_values = {
@@ -786,9 +1108,9 @@ struct smb_version_values smb30_values = {
786 .cap_unix = 0, 1108 .cap_unix = 0,
787 .cap_nt_find = SMB2_NT_FIND, 1109 .cap_nt_find = SMB2_NT_FIND,
788 .cap_large_files = SMB2_LARGE_FILES, 1110 .cap_large_files = SMB2_LARGE_FILES,
789 .oplock_read = SMB2_OPLOCK_LEVEL_II,
790 .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, 1111 .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
791 .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, 1112 .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
1113 .create_lease_size = sizeof(struct create_lease_v2),
792}; 1114};
793 1115
794struct smb_version_values smb302_values = { 1116struct smb_version_values smb302_values = {
@@ -806,7 +1128,7 @@ struct smb_version_values smb302_values = {
806 .cap_unix = 0, 1128 .cap_unix = 0,
807 .cap_nt_find = SMB2_NT_FIND, 1129 .cap_nt_find = SMB2_NT_FIND,
808 .cap_large_files = SMB2_LARGE_FILES, 1130 .cap_large_files = SMB2_LARGE_FILES,
809 .oplock_read = SMB2_OPLOCK_LEVEL_II,
810 .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, 1131 .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
811 .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, 1132 .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
1133 .create_lease_size = sizeof(struct create_lease_v2),
812}; 1134};
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index abc9c2809b51..eba0efde66d7 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -478,12 +478,20 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
478 } 478 }
479 479
480 /* 480 /*
481 * If we are here due to reconnect, free per-smb session key
482 * in case signing was required.
483 */
484 kfree(ses->auth_key.response);
485 ses->auth_key.response = NULL;
486
487 /*
481 * If memory allocation is successful, caller of this function 488 * If memory allocation is successful, caller of this function
482 * frees it. 489 * frees it.
483 */ 490 */
484 ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL); 491 ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
485 if (!ses->ntlmssp) 492 if (!ses->ntlmssp)
486 return -ENOMEM; 493 return -ENOMEM;
494 ses->ntlmssp->sesskey_per_smbsess = true;
487 495
488 /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */ 496 /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */
489 ses->sectype = RawNTLMSSP; 497 ses->sectype = RawNTLMSSP;
@@ -628,6 +636,40 @@ ssetup_exit:
628 /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */ 636 /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */
629 if ((phase == NtLmChallenge) && (rc == 0)) 637 if ((phase == NtLmChallenge) && (rc == 0))
630 goto ssetup_ntlmssp_authenticate; 638 goto ssetup_ntlmssp_authenticate;
639
640 if (!rc) {
641 mutex_lock(&server->srv_mutex);
642 if (server->sign && server->ops->generate_signingkey) {
643 rc = server->ops->generate_signingkey(ses);
644 kfree(ses->auth_key.response);
645 ses->auth_key.response = NULL;
646 if (rc) {
647 cifs_dbg(FYI,
648 "SMB3 session key generation failed\n");
649 mutex_unlock(&server->srv_mutex);
650 goto keygen_exit;
651 }
652 }
653 if (!server->session_estab) {
654 server->sequence_number = 0x2;
655 server->session_estab = true;
656 }
657 mutex_unlock(&server->srv_mutex);
658
659 cifs_dbg(FYI, "SMB2/3 session established successfully\n");
660 spin_lock(&GlobalMid_Lock);
661 ses->status = CifsGood;
662 ses->need_reconnect = false;
663 spin_unlock(&GlobalMid_Lock);
664 }
665
666keygen_exit:
667 if (!server->sign) {
668 kfree(ses->auth_key.response);
669 ses->auth_key.response = NULL;
670 }
671 kfree(ses->ntlmssp);
672
631 return rc; 673 return rc;
632} 674}
633 675
@@ -813,39 +855,6 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
813 return rc; 855 return rc;
814} 856}
815 857
816static struct create_lease *
817create_lease_buf(u8 *lease_key, u8 oplock)
818{
819 struct create_lease *buf;
820
821 buf = kzalloc(sizeof(struct create_lease), GFP_KERNEL);
822 if (!buf)
823 return NULL;
824
825 buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key));
826 buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8)));
827 if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE)
828 buf->lcontext.LeaseState = SMB2_LEASE_WRITE_CACHING |
829 SMB2_LEASE_READ_CACHING;
830 else if (oplock == SMB2_OPLOCK_LEVEL_II)
831 buf->lcontext.LeaseState = SMB2_LEASE_READ_CACHING;
832 else if (oplock == SMB2_OPLOCK_LEVEL_BATCH)
833 buf->lcontext.LeaseState = SMB2_LEASE_HANDLE_CACHING |
834 SMB2_LEASE_READ_CACHING |
835 SMB2_LEASE_WRITE_CACHING;
836
837 buf->ccontext.DataOffset = cpu_to_le16(offsetof
838 (struct create_lease, lcontext));
839 buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context));
840 buf->ccontext.NameOffset = cpu_to_le16(offsetof
841 (struct create_lease, Name));
842 buf->ccontext.NameLength = cpu_to_le16(4);
843 buf->Name[0] = 'R';
844 buf->Name[1] = 'q';
845 buf->Name[2] = 'L';
846 buf->Name[3] = 's';
847 return buf;
848}
849 858
850static struct create_durable * 859static struct create_durable *
851create_durable_buf(void) 860create_durable_buf(void)
@@ -894,55 +903,49 @@ create_reconnect_durable_buf(struct cifs_fid *fid)
894} 903}
895 904
896static __u8 905static __u8
897parse_lease_state(struct smb2_create_rsp *rsp) 906parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp,
907 unsigned int *epoch)
898{ 908{
899 char *data_offset; 909 char *data_offset;
900 struct create_lease *lc; 910 struct create_context *cc;
901 bool found = false;
902 unsigned int next = 0; 911 unsigned int next = 0;
903 char *name; 912 char *name;
904 913
905 data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset); 914 data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset);
906 lc = (struct create_lease *)data_offset; 915 cc = (struct create_context *)data_offset;
907 do { 916 do {
908 lc = (struct create_lease *)((char *)lc + next); 917 cc = (struct create_context *)((char *)cc + next);
909 name = le16_to_cpu(lc->ccontext.NameOffset) + (char *)lc; 918 name = le16_to_cpu(cc->NameOffset) + (char *)cc;
910 if (le16_to_cpu(lc->ccontext.NameLength) != 4 || 919 if (le16_to_cpu(cc->NameLength) != 4 ||
911 strncmp(name, "RqLs", 4)) { 920 strncmp(name, "RqLs", 4)) {
912 next = le32_to_cpu(lc->ccontext.Next); 921 next = le32_to_cpu(cc->Next);
913 continue; 922 continue;
914 } 923 }
915 if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) 924 return server->ops->parse_lease_buf(cc, epoch);
916 return SMB2_OPLOCK_LEVEL_NOCHANGE;
917 found = true;
918 break;
919 } while (next != 0); 925 } while (next != 0);
920 926
921 if (!found) 927 return 0;
922 return 0;
923
924 return smb2_map_lease_to_oplock(lc->lcontext.LeaseState);
925} 928}
926 929
927static int 930static int
928add_lease_context(struct kvec *iov, unsigned int *num_iovec, __u8 *oplock) 931add_lease_context(struct TCP_Server_Info *server, struct kvec *iov,
932 unsigned int *num_iovec, __u8 *oplock)
929{ 933{
930 struct smb2_create_req *req = iov[0].iov_base; 934 struct smb2_create_req *req = iov[0].iov_base;
931 unsigned int num = *num_iovec; 935 unsigned int num = *num_iovec;
932 936
933 iov[num].iov_base = create_lease_buf(oplock+1, *oplock); 937 iov[num].iov_base = server->ops->create_lease_buf(oplock+1, *oplock);
934 if (iov[num].iov_base == NULL) 938 if (iov[num].iov_base == NULL)
935 return -ENOMEM; 939 return -ENOMEM;
936 iov[num].iov_len = sizeof(struct create_lease); 940 iov[num].iov_len = server->vals->create_lease_size;
937 req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE; 941 req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE;
938 if (!req->CreateContextsOffset) 942 if (!req->CreateContextsOffset)
939 req->CreateContextsOffset = cpu_to_le32( 943 req->CreateContextsOffset = cpu_to_le32(
940 sizeof(struct smb2_create_req) - 4 + 944 sizeof(struct smb2_create_req) - 4 +
941 iov[num - 1].iov_len); 945 iov[num - 1].iov_len);
942 req->CreateContextsLength = cpu_to_le32( 946 le32_add_cpu(&req->CreateContextsLength,
943 le32_to_cpu(req->CreateContextsLength) + 947 server->vals->create_lease_size);
944 sizeof(struct create_lease)); 948 inc_rfc1001_len(&req->hdr, server->vals->create_lease_size);
945 inc_rfc1001_len(&req->hdr, sizeof(struct create_lease));
946 *num_iovec = num + 1; 949 *num_iovec = num + 1;
947 return 0; 950 return 0;
948} 951}
@@ -967,9 +970,7 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec,
967 req->CreateContextsOffset = 970 req->CreateContextsOffset =
968 cpu_to_le32(sizeof(struct smb2_create_req) - 4 + 971 cpu_to_le32(sizeof(struct smb2_create_req) - 4 +
969 iov[1].iov_len); 972 iov[1].iov_len);
970 req->CreateContextsLength = 973 le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable));
971 cpu_to_le32(le32_to_cpu(req->CreateContextsLength) +
972 sizeof(struct create_durable));
973 inc_rfc1001_len(&req->hdr, sizeof(struct create_durable)); 974 inc_rfc1001_len(&req->hdr, sizeof(struct create_durable));
974 *num_iovec = num + 1; 975 *num_iovec = num + 1;
975 return 0; 976 return 0;
@@ -977,7 +978,8 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec,
977 978
978int 979int
979SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, 980SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
980 __u8 *oplock, struct smb2_file_all_info *buf) 981 __u8 *oplock, struct smb2_file_all_info *buf,
982 struct smb2_err_rsp **err_buf)
981{ 983{
982 struct smb2_create_req *req; 984 struct smb2_create_req *req;
983 struct smb2_create_rsp *rsp; 985 struct smb2_create_rsp *rsp;
@@ -1048,11 +1050,11 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
1048 if (!server->oplocks) 1050 if (!server->oplocks)
1049 *oplock = SMB2_OPLOCK_LEVEL_NONE; 1051 *oplock = SMB2_OPLOCK_LEVEL_NONE;
1050 1052
1051 if (!(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) || 1053 if (!(server->capabilities & SMB2_GLOBAL_CAP_LEASING) ||
1052 *oplock == SMB2_OPLOCK_LEVEL_NONE) 1054 *oplock == SMB2_OPLOCK_LEVEL_NONE)
1053 req->RequestedOplockLevel = *oplock; 1055 req->RequestedOplockLevel = *oplock;
1054 else { 1056 else {
1055 rc = add_lease_context(iov, &num_iovecs, oplock); 1057 rc = add_lease_context(server, iov, &num_iovecs, oplock);
1056 if (rc) { 1058 if (rc) {
1057 cifs_small_buf_release(req); 1059 cifs_small_buf_release(req);
1058 kfree(copy_path); 1060 kfree(copy_path);
@@ -1062,11 +1064,11 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
1062 1064
1063 if (*oplock == SMB2_OPLOCK_LEVEL_BATCH) { 1065 if (*oplock == SMB2_OPLOCK_LEVEL_BATCH) {
1064 /* need to set Next field of lease context if we request it */ 1066 /* need to set Next field of lease context if we request it */
1065 if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) { 1067 if (server->capabilities & SMB2_GLOBAL_CAP_LEASING) {
1066 struct create_context *ccontext = 1068 struct create_context *ccontext =
1067 (struct create_context *)iov[num_iovecs-1].iov_base; 1069 (struct create_context *)iov[num_iovecs-1].iov_base;
1068 ccontext->Next = 1070 ccontext->Next =
1069 cpu_to_le32(sizeof(struct create_lease)); 1071 cpu_to_le32(server->vals->create_lease_size);
1070 } 1072 }
1071 rc = add_durable_context(iov, &num_iovecs, oparms); 1073 rc = add_durable_context(iov, &num_iovecs, oparms);
1072 if (rc) { 1074 if (rc) {
@@ -1082,6 +1084,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
1082 1084
1083 if (rc != 0) { 1085 if (rc != 0) {
1084 cifs_stats_fail_inc(tcon, SMB2_CREATE_HE); 1086 cifs_stats_fail_inc(tcon, SMB2_CREATE_HE);
1087 if (err_buf)
1088 *err_buf = kmemdup(rsp, get_rfc1002_length(rsp) + 4,
1089 GFP_KERNEL);
1085 goto creat_exit; 1090 goto creat_exit;
1086 } 1091 }
1087 1092
@@ -1098,7 +1103,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
1098 } 1103 }
1099 1104
1100 if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) 1105 if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE)
1101 *oplock = parse_lease_state(rsp); 1106 *oplock = parse_lease_state(server, rsp, &oparms->fid->epoch);
1102 else 1107 else
1103 *oplock = rsp->OplockLevel; 1108 *oplock = rsp->OplockLevel;
1104creat_exit: 1109creat_exit:
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 36b0d37ea69b..b83d0118a757 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -150,6 +150,20 @@ struct smb2_err_rsp {
150 __u8 ErrorData[1]; /* variable length */ 150 __u8 ErrorData[1]; /* variable length */
151} __packed; 151} __packed;
152 152
153struct smb2_symlink_err_rsp {
154 __le32 SymLinkLength;
155 __le32 SymLinkErrorTag;
156 __le32 ReparseTag;
157 __le16 ReparseDataLength;
158 __le16 UnparsedPathLength;
159 __le16 SubstituteNameOffset;
160 __le16 SubstituteNameLength;
161 __le16 PrintNameOffset;
162 __le16 PrintNameLength;
163 __le32 Flags;
164 __u8 PathBuffer[0];
165} __packed;
166
153#define SMB2_CLIENT_GUID_SIZE 16 167#define SMB2_CLIENT_GUID_SIZE 16
154 168
155extern __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; 169extern __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE];
@@ -462,6 +476,10 @@ struct create_context {
462 __u8 Buffer[0]; 476 __u8 Buffer[0];
463} __packed; 477} __packed;
464 478
479#define SMB2_LEASE_READ_CACHING_HE 0x01
480#define SMB2_LEASE_HANDLE_CACHING_HE 0x02
481#define SMB2_LEASE_WRITE_CACHING_HE 0x04
482
465#define SMB2_LEASE_NONE __constant_cpu_to_le32(0x00) 483#define SMB2_LEASE_NONE __constant_cpu_to_le32(0x00)
466#define SMB2_LEASE_READ_CACHING __constant_cpu_to_le32(0x01) 484#define SMB2_LEASE_READ_CACHING __constant_cpu_to_le32(0x01)
467#define SMB2_LEASE_HANDLE_CACHING __constant_cpu_to_le32(0x02) 485#define SMB2_LEASE_HANDLE_CACHING __constant_cpu_to_le32(0x02)
@@ -479,12 +497,31 @@ struct lease_context {
479 __le64 LeaseDuration; 497 __le64 LeaseDuration;
480} __packed; 498} __packed;
481 499
500struct lease_context_v2 {
501 __le64 LeaseKeyLow;
502 __le64 LeaseKeyHigh;
503 __le32 LeaseState;
504 __le32 LeaseFlags;
505 __le64 LeaseDuration;
506 __le64 ParentLeaseKeyLow;
507 __le64 ParentLeaseKeyHigh;
508 __le16 Epoch;
509 __le16 Reserved;
510} __packed;
511
482struct create_lease { 512struct create_lease {
483 struct create_context ccontext; 513 struct create_context ccontext;
484 __u8 Name[8]; 514 __u8 Name[8];
485 struct lease_context lcontext; 515 struct lease_context lcontext;
486} __packed; 516} __packed;
487 517
518struct create_lease_v2 {
519 struct create_context ccontext;
520 __u8 Name[8];
521 struct lease_context_v2 lcontext;
522 __u8 Pad[4];
523} __packed;
524
488struct create_durable { 525struct create_durable {
489 struct create_context ccontext; 526 struct create_context ccontext;
490 __u8 Name[8]; 527 __u8 Name[8];
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 1a5ecbed40ed..e3fb4801ee96 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -53,7 +53,6 @@ extern int smb3_calc_signature(struct smb_rqst *rqst,
53 struct TCP_Server_Info *server); 53 struct TCP_Server_Info *server);
54extern void smb2_echo_request(struct work_struct *work); 54extern void smb2_echo_request(struct work_struct *work);
55extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode); 55extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode);
56extern __u8 smb2_map_lease_to_oplock(__le32 lease_state);
57extern bool smb2_is_valid_oplock_break(char *buffer, 56extern bool smb2_is_valid_oplock_break(char *buffer,
58 struct TCP_Server_Info *srv); 57 struct TCP_Server_Info *srv);
59 58
@@ -87,7 +86,6 @@ extern int smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
87extern int smb2_open_file(const unsigned int xid, 86extern int smb2_open_file(const unsigned int xid,
88 struct cifs_open_parms *oparms, 87 struct cifs_open_parms *oparms,
89 __u32 *oplock, FILE_ALL_INFO *buf); 88 __u32 *oplock, FILE_ALL_INFO *buf);
90extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
91extern int smb2_unlock_range(struct cifsFileInfo *cfile, 89extern int smb2_unlock_range(struct cifsFileInfo *cfile,
92 struct file_lock *flock, const unsigned int xid); 90 struct file_lock *flock, const unsigned int xid);
93extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile); 91extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile);
@@ -106,7 +104,8 @@ extern int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses,
106extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon); 104extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon);
107extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, 105extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms,
108 __le16 *path, __u8 *oplock, 106 __le16 *path, __u8 *oplock,
109 struct smb2_file_all_info *buf); 107 struct smb2_file_all_info *buf,
108 struct smb2_err_rsp **err_buf);
110extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, 109extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
111 u64 persistent_fid, u64 volatile_fid, u32 opcode, 110 u64 persistent_fid, u64 volatile_fid, u32 opcode,
112 bool is_fsctl, char *in_data, u32 indatalen, 111 bool is_fsctl, char *in_data, u32 indatalen,
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 4f2300d020c7..340abca3aa52 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -114,6 +114,23 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
114 return 0; 114 return 0;
115} 115}
116 116
117static struct cifs_ses *
118smb2_find_smb_ses(struct smb2_hdr *smb2hdr, struct TCP_Server_Info *server)
119{
120 struct cifs_ses *ses;
121
122 spin_lock(&cifs_tcp_ses_lock);
123 list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
124 if (ses->Suid != smb2hdr->SessionId)
125 continue;
126 spin_unlock(&cifs_tcp_ses_lock);
127 return ses;
128 }
129 spin_unlock(&cifs_tcp_ses_lock);
130
131 return NULL;
132}
133
117 134
118int 135int
119smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) 136smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
@@ -124,6 +141,13 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
124 struct kvec *iov = rqst->rq_iov; 141 struct kvec *iov = rqst->rq_iov;
125 int n_vec = rqst->rq_nvec; 142 int n_vec = rqst->rq_nvec;
126 struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; 143 struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
144 struct cifs_ses *ses;
145
146 ses = smb2_find_smb_ses(smb2_pdu, server);
147 if (!ses) {
148 cifs_dbg(VFS, "%s: Could not find session\n", __func__);
149 return 0;
150 }
127 151
128 memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); 152 memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE);
129 memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); 153 memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE);
@@ -135,7 +159,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
135 } 159 }
136 160
137 rc = crypto_shash_setkey(server->secmech.hmacsha256, 161 rc = crypto_shash_setkey(server->secmech.hmacsha256,
138 server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); 162 ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
139 if (rc) { 163 if (rc) {
140 cifs_dbg(VFS, "%s: Could not update with response\n", __func__); 164 cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
141 return rc; 165 return rc;
@@ -198,8 +222,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
198 return rc; 222 return rc;
199} 223}
200 224
201void 225int
202generate_smb3signingkey(struct TCP_Server_Info *server) 226generate_smb3signingkey(struct cifs_ses *ses)
203{ 227{
204 unsigned char zero = 0x0; 228 unsigned char zero = 0x0;
205 __u8 i[4] = {0, 0, 0, 1}; 229 __u8 i[4] = {0, 0, 0, 1};
@@ -209,90 +233,99 @@ generate_smb3signingkey(struct TCP_Server_Info *server)
209 unsigned char *hashptr = prfhash; 233 unsigned char *hashptr = prfhash;
210 234
211 memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE); 235 memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE);
212 memset(server->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE); 236 memset(ses->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE);
213 237
214 rc = smb3_crypto_shash_allocate(server); 238 rc = smb3_crypto_shash_allocate(ses->server);
215 if (rc) { 239 if (rc) {
216 cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__); 240 cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__);
217 goto smb3signkey_ret; 241 goto smb3signkey_ret;
218 } 242 }
219 243
220 rc = crypto_shash_setkey(server->secmech.hmacsha256, 244 rc = crypto_shash_setkey(ses->server->secmech.hmacsha256,
221 server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); 245 ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
222 if (rc) { 246 if (rc) {
223 cifs_dbg(VFS, "%s: Could not set with session key\n", __func__); 247 cifs_dbg(VFS, "%s: Could not set with session key\n", __func__);
224 goto smb3signkey_ret; 248 goto smb3signkey_ret;
225 } 249 }
226 250
227 rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); 251 rc = crypto_shash_init(&ses->server->secmech.sdeschmacsha256->shash);
228 if (rc) { 252 if (rc) {
229 cifs_dbg(VFS, "%s: Could not init sign hmac\n", __func__); 253 cifs_dbg(VFS, "%s: Could not init sign hmac\n", __func__);
230 goto smb3signkey_ret; 254 goto smb3signkey_ret;
231 } 255 }
232 256
233 rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, 257 rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
234 i, 4); 258 i, 4);
235 if (rc) { 259 if (rc) {
236 cifs_dbg(VFS, "%s: Could not update with n\n", __func__); 260 cifs_dbg(VFS, "%s: Could not update with n\n", __func__);
237 goto smb3signkey_ret; 261 goto smb3signkey_ret;
238 } 262 }
239 263
240 rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, 264 rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
241 "SMB2AESCMAC", 12); 265 "SMB2AESCMAC", 12);
242 if (rc) { 266 if (rc) {
243 cifs_dbg(VFS, "%s: Could not update with label\n", __func__); 267 cifs_dbg(VFS, "%s: Could not update with label\n", __func__);
244 goto smb3signkey_ret; 268 goto smb3signkey_ret;
245 } 269 }
246 270
247 rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, 271 rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
248 &zero, 1); 272 &zero, 1);
249 if (rc) { 273 if (rc) {
250 cifs_dbg(VFS, "%s: Could not update with zero\n", __func__); 274 cifs_dbg(VFS, "%s: Could not update with zero\n", __func__);
251 goto smb3signkey_ret; 275 goto smb3signkey_ret;
252 } 276 }
253 277
254 rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, 278 rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
255 "SmbSign", 8); 279 "SmbSign", 8);
256 if (rc) { 280 if (rc) {
257 cifs_dbg(VFS, "%s: Could not update with context\n", __func__); 281 cifs_dbg(VFS, "%s: Could not update with context\n", __func__);
258 goto smb3signkey_ret; 282 goto smb3signkey_ret;
259 } 283 }
260 284
261 rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, 285 rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
262 L, 4); 286 L, 4);
263 if (rc) { 287 if (rc) {
264 cifs_dbg(VFS, "%s: Could not update with L\n", __func__); 288 cifs_dbg(VFS, "%s: Could not update with L\n", __func__);
265 goto smb3signkey_ret; 289 goto smb3signkey_ret;
266 } 290 }
267 291
268 rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash, 292 rc = crypto_shash_final(&ses->server->secmech.sdeschmacsha256->shash,
269 hashptr); 293 hashptr);
270 if (rc) { 294 if (rc) {
271 cifs_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__); 295 cifs_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__);
272 goto smb3signkey_ret; 296 goto smb3signkey_ret;
273 } 297 }
274 298
275 memcpy(server->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE); 299 memcpy(ses->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE);
276 300
277smb3signkey_ret: 301smb3signkey_ret:
278 return; 302 return rc;
279} 303}
280 304
281int 305int
282smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) 306smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
283{ 307{
284 int i, rc; 308 int i;
309 int rc = 0;
285 unsigned char smb3_signature[SMB2_CMACAES_SIZE]; 310 unsigned char smb3_signature[SMB2_CMACAES_SIZE];
286 unsigned char *sigptr = smb3_signature; 311 unsigned char *sigptr = smb3_signature;
287 struct kvec *iov = rqst->rq_iov; 312 struct kvec *iov = rqst->rq_iov;
288 int n_vec = rqst->rq_nvec; 313 int n_vec = rqst->rq_nvec;
289 struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; 314 struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
315 struct cifs_ses *ses;
316
317 ses = smb2_find_smb_ses(smb2_pdu, server);
318 if (!ses) {
319 cifs_dbg(VFS, "%s: Could not find session\n", __func__);
320 return 0;
321 }
290 322
291 memset(smb3_signature, 0x0, SMB2_CMACAES_SIZE); 323 memset(smb3_signature, 0x0, SMB2_CMACAES_SIZE);
292 memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); 324 memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE);
293 325
294 rc = crypto_shash_setkey(server->secmech.cmacaes, 326 rc = crypto_shash_setkey(server->secmech.cmacaes,
295 server->smb3signingkey, SMB2_CMACAES_SIZE); 327 ses->smb3signingkey, SMB2_CMACAES_SIZE);
328
296 if (rc) { 329 if (rc) {
297 cifs_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__); 330 cifs_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__);
298 return rc; 331 return rc;
@@ -389,6 +422,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
389 struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)rqst->rq_iov[0].iov_base; 422 struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)rqst->rq_iov[0].iov_base;
390 423
391 if ((smb2_pdu->Command == SMB2_NEGOTIATE) || 424 if ((smb2_pdu->Command == SMB2_NEGOTIATE) ||
425 (smb2_pdu->Command == SMB2_SESSION_SETUP) ||
392 (smb2_pdu->Command == SMB2_OPLOCK_BREAK) || 426 (smb2_pdu->Command == SMB2_OPLOCK_BREAK) ||
393 (!server->session_estab)) 427 (!server->session_estab))
394 return 0; 428 return 0;
diff --git a/fs/cifs/winucase.c b/fs/cifs/winucase.c
new file mode 100644
index 000000000000..1506d4fddb2c
--- /dev/null
+++ b/fs/cifs/winucase.c
@@ -0,0 +1,663 @@
1/*
2 * fs/cifs/winucase.c
3 *
4 * Copyright (c) Jeffrey Layton <jlayton@redhat.com>, 2013
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
14 * the GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 * The const tables in this file were converted from the following info
21 * provided by Microsoft:
22 *
23 * 3.1.5.3 Mapping UTF-16 Strings to Upper Case:
24 *
25 * http://msdn.microsoft.com/en-us/library/hh877830.aspx
26 * http://www.microsoft.com/en-us/download/details.aspx?displaylang=en&id=10921
27 *
28 * In particular, the table in "Windows 8 Upper Case Mapping Table.txt" was
29 * post-processed using the winucase_convert.pl script.
30 */
31
32#include <linux/nls.h>
33
34wchar_t cifs_toupper(wchar_t in); /* quiet sparse */
35
36static const wchar_t t2_00[256] = {
37 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
38 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
39 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
40 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
41 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
42 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
43 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
44 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
45 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
46 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
47 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
48 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
49 0x0000, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
50 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
51 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
52 0x0058, 0x0059, 0x005a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
53 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
54 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
55 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
56 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
57 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
58 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
59 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
60 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
61 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
62 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
63 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
64 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
65 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
66 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
67 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0000,
68 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178,
69};
70
71static const wchar_t t2_01[256] = {
72 0x0000, 0x0100, 0x0000, 0x0102, 0x0000, 0x0104, 0x0000, 0x0106,
73 0x0000, 0x0108, 0x0000, 0x010a, 0x0000, 0x010c, 0x0000, 0x010e,
74 0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0000, 0x0116,
75 0x0000, 0x0118, 0x0000, 0x011a, 0x0000, 0x011c, 0x0000, 0x011e,
76 0x0000, 0x0120, 0x0000, 0x0122, 0x0000, 0x0124, 0x0000, 0x0126,
77 0x0000, 0x0128, 0x0000, 0x012a, 0x0000, 0x012c, 0x0000, 0x012e,
78 0x0000, 0x0000, 0x0000, 0x0132, 0x0000, 0x0134, 0x0000, 0x0136,
79 0x0000, 0x0000, 0x0139, 0x0000, 0x013b, 0x0000, 0x013d, 0x0000,
80 0x013f, 0x0000, 0x0141, 0x0000, 0x0143, 0x0000, 0x0145, 0x0000,
81 0x0147, 0x0000, 0x0000, 0x014a, 0x0000, 0x014c, 0x0000, 0x014e,
82 0x0000, 0x0150, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
83 0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x0000, 0x015e,
84 0x0000, 0x0160, 0x0000, 0x0162, 0x0000, 0x0164, 0x0000, 0x0166,
85 0x0000, 0x0168, 0x0000, 0x016a, 0x0000, 0x016c, 0x0000, 0x016e,
86 0x0000, 0x0170, 0x0000, 0x0172, 0x0000, 0x0174, 0x0000, 0x0176,
87 0x0000, 0x0000, 0x0179, 0x0000, 0x017b, 0x0000, 0x017d, 0x0000,
88 0x0243, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0000, 0x0000,
89 0x0187, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x0000,
90 0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x01f6, 0x0000, 0x0000,
91 0x0000, 0x0198, 0x023d, 0x0000, 0x0000, 0x0000, 0x0220, 0x0000,
92 0x0000, 0x01a0, 0x0000, 0x01a2, 0x0000, 0x01a4, 0x0000, 0x0000,
93 0x01a7, 0x0000, 0x0000, 0x0000, 0x0000, 0x01ac, 0x0000, 0x0000,
94 0x01af, 0x0000, 0x0000, 0x0000, 0x01b3, 0x0000, 0x01b5, 0x0000,
95 0x0000, 0x01b8, 0x0000, 0x0000, 0x0000, 0x01bc, 0x0000, 0x01f7,
96 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01c4, 0x0000,
97 0x0000, 0x01c7, 0x0000, 0x0000, 0x01ca, 0x0000, 0x01cd, 0x0000,
98 0x01cf, 0x0000, 0x01d1, 0x0000, 0x01d3, 0x0000, 0x01d5, 0x0000,
99 0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x018e, 0x0000, 0x01de,
100 0x0000, 0x01e0, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
101 0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
102 0x0000, 0x0000, 0x0000, 0x01f1, 0x0000, 0x01f4, 0x0000, 0x0000,
103 0x0000, 0x01f8, 0x0000, 0x01fa, 0x0000, 0x01fc, 0x0000, 0x01fe,
104};
105
106static const wchar_t t2_02[256] = {
107 0x0000, 0x0200, 0x0000, 0x0202, 0x0000, 0x0204, 0x0000, 0x0206,
108 0x0000, 0x0208, 0x0000, 0x020a, 0x0000, 0x020c, 0x0000, 0x020e,
109 0x0000, 0x0210, 0x0000, 0x0212, 0x0000, 0x0214, 0x0000, 0x0216,
110 0x0000, 0x0218, 0x0000, 0x021a, 0x0000, 0x021c, 0x0000, 0x021e,
111 0x0000, 0x0000, 0x0000, 0x0222, 0x0000, 0x0224, 0x0000, 0x0226,
112 0x0000, 0x0228, 0x0000, 0x022a, 0x0000, 0x022c, 0x0000, 0x022e,
113 0x0000, 0x0230, 0x0000, 0x0232, 0x0000, 0x0000, 0x0000, 0x0000,
114 0x0000, 0x0000, 0x0000, 0x0000, 0x023b, 0x0000, 0x0000, 0x0000,
115 0x0000, 0x0000, 0x0241, 0x0000, 0x0000, 0x0000, 0x0000, 0x0246,
116 0x0000, 0x0248, 0x0000, 0x024a, 0x0000, 0x024c, 0x0000, 0x024e,
117 0x2c6f, 0x2c6d, 0x0000, 0x0181, 0x0186, 0x0000, 0x0189, 0x018a,
118 0x0000, 0x018f, 0x0000, 0x0190, 0x0000, 0x0000, 0x0000, 0x0000,
119 0x0193, 0x0000, 0x0000, 0x0194, 0x0000, 0x0000, 0x0000, 0x0000,
120 0x0197, 0x0196, 0x0000, 0x2c62, 0x0000, 0x0000, 0x0000, 0x019c,
121 0x0000, 0x2c6e, 0x019d, 0x0000, 0x0000, 0x019f, 0x0000, 0x0000,
122 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2c64, 0x0000, 0x0000,
123 0x01a6, 0x0000, 0x0000, 0x01a9, 0x0000, 0x0000, 0x0000, 0x0000,
124 0x01ae, 0x0244, 0x01b1, 0x01b2, 0x0245, 0x0000, 0x0000, 0x0000,
125 0x0000, 0x0000, 0x01b7, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
126 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
127 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
128 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
129 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
130 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
131 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
132 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
133 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
134 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
135 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
136 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
137 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
138 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
139};
140
141static const wchar_t t2_03[256] = {
142 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
143 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
144 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
145 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
146 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
147 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
148 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
149 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
150 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
151 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
152 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
153 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
154 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
155 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
156 0x0000, 0x0370, 0x0000, 0x0372, 0x0000, 0x0000, 0x0000, 0x0376,
157 0x0000, 0x0000, 0x0000, 0x03fd, 0x03fe, 0x03ff, 0x0000, 0x0000,
158 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
159 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
160 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
161 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
162 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
163 0x0000, 0x0000, 0x0000, 0x0000, 0x0386, 0x0388, 0x0389, 0x038a,
164 0x0000, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
165 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
166 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
167 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x038c, 0x038e, 0x038f, 0x0000,
168 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x03cf,
169 0x0000, 0x03d8, 0x0000, 0x03da, 0x0000, 0x03dc, 0x0000, 0x03de,
170 0x0000, 0x03e0, 0x0000, 0x03e2, 0x0000, 0x03e4, 0x0000, 0x03e6,
171 0x0000, 0x03e8, 0x0000, 0x03ea, 0x0000, 0x03ec, 0x0000, 0x03ee,
172 0x0000, 0x0000, 0x03f9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
173 0x03f7, 0x0000, 0x0000, 0x03fa, 0x0000, 0x0000, 0x0000, 0x0000,
174};
175
176static const wchar_t t2_04[256] = {
177 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
178 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
179 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
180 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
181 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
182 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
183 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
184 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
185 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
186 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
187 0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
188 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x040d, 0x040e, 0x040f,
189 0x0000, 0x0460, 0x0000, 0x0462, 0x0000, 0x0464, 0x0000, 0x0466,
190 0x0000, 0x0468, 0x0000, 0x046a, 0x0000, 0x046c, 0x0000, 0x046e,
191 0x0000, 0x0470, 0x0000, 0x0472, 0x0000, 0x0474, 0x0000, 0x0476,
192 0x0000, 0x0478, 0x0000, 0x047a, 0x0000, 0x047c, 0x0000, 0x047e,
193 0x0000, 0x0480, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
194 0x0000, 0x0000, 0x0000, 0x048a, 0x0000, 0x048c, 0x0000, 0x048e,
195 0x0000, 0x0490, 0x0000, 0x0492, 0x0000, 0x0494, 0x0000, 0x0496,
196 0x0000, 0x0498, 0x0000, 0x049a, 0x0000, 0x049c, 0x0000, 0x049e,
197 0x0000, 0x04a0, 0x0000, 0x04a2, 0x0000, 0x04a4, 0x0000, 0x04a6,
198 0x0000, 0x04a8, 0x0000, 0x04aa, 0x0000, 0x04ac, 0x0000, 0x04ae,
199 0x0000, 0x04b0, 0x0000, 0x04b2, 0x0000, 0x04b4, 0x0000, 0x04b6,
200 0x0000, 0x04b8, 0x0000, 0x04ba, 0x0000, 0x04bc, 0x0000, 0x04be,
201 0x0000, 0x0000, 0x04c1, 0x0000, 0x04c3, 0x0000, 0x04c5, 0x0000,
202 0x04c7, 0x0000, 0x04c9, 0x0000, 0x04cb, 0x0000, 0x04cd, 0x04c0,
203 0x0000, 0x04d0, 0x0000, 0x04d2, 0x0000, 0x04d4, 0x0000, 0x04d6,
204 0x0000, 0x04d8, 0x0000, 0x04da, 0x0000, 0x04dc, 0x0000, 0x04de,
205 0x0000, 0x04e0, 0x0000, 0x04e2, 0x0000, 0x04e4, 0x0000, 0x04e6,
206 0x0000, 0x04e8, 0x0000, 0x04ea, 0x0000, 0x04ec, 0x0000, 0x04ee,
207 0x0000, 0x04f0, 0x0000, 0x04f2, 0x0000, 0x04f4, 0x0000, 0x04f6,
208 0x0000, 0x04f8, 0x0000, 0x04fa, 0x0000, 0x04fc, 0x0000, 0x04fe,
209};
210
211static const wchar_t t2_05[256] = {
212 0x0000, 0x0500, 0x0000, 0x0502, 0x0000, 0x0504, 0x0000, 0x0506,
213 0x0000, 0x0508, 0x0000, 0x050a, 0x0000, 0x050c, 0x0000, 0x050e,
214 0x0000, 0x0510, 0x0000, 0x0512, 0x0000, 0x0514, 0x0000, 0x0516,
215 0x0000, 0x0518, 0x0000, 0x051a, 0x0000, 0x051c, 0x0000, 0x051e,
216 0x0000, 0x0520, 0x0000, 0x0522, 0x0000, 0x0000, 0x0000, 0x0000,
217 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
218 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
219 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
220 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
221 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
222 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
223 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
224 0x0000, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537,
225 0x0538, 0x0539, 0x053a, 0x053b, 0x053c, 0x053d, 0x053e, 0x053f,
226 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547,
227 0x0548, 0x0549, 0x054a, 0x054b, 0x054c, 0x054d, 0x054e, 0x054f,
228 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0000,
229 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
230 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
231 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
232 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
233 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
234 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
235 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
236 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
237 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
238 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
239 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
240 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
241 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
242 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
243 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
244};
245
246static const wchar_t t2_1d[256] = {
247 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
248 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
249 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
250 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
251 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
252 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
253 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
254 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
255 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
256 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
257 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
258 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
259 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
260 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
261 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
262 0x0000, 0xa77d, 0x0000, 0x0000, 0x0000, 0x2c63, 0x0000, 0x0000,
263 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
264 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
265 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
266 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
267 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
268 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
269 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
270 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
271 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
272 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
273 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
274 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
275 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
276 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
277 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
278 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
279};
280
281static const wchar_t t2_1e[256] = {
282 0x0000, 0x1e00, 0x0000, 0x1e02, 0x0000, 0x1e04, 0x0000, 0x1e06,
283 0x0000, 0x1e08, 0x0000, 0x1e0a, 0x0000, 0x1e0c, 0x0000, 0x1e0e,
284 0x0000, 0x1e10, 0x0000, 0x1e12, 0x0000, 0x1e14, 0x0000, 0x1e16,
285 0x0000, 0x1e18, 0x0000, 0x1e1a, 0x0000, 0x1e1c, 0x0000, 0x1e1e,
286 0x0000, 0x1e20, 0x0000, 0x1e22, 0x0000, 0x1e24, 0x0000, 0x1e26,
287 0x0000, 0x1e28, 0x0000, 0x1e2a, 0x0000, 0x1e2c, 0x0000, 0x1e2e,
288 0x0000, 0x1e30, 0x0000, 0x1e32, 0x0000, 0x1e34, 0x0000, 0x1e36,
289 0x0000, 0x1e38, 0x0000, 0x1e3a, 0x0000, 0x1e3c, 0x0000, 0x1e3e,
290 0x0000, 0x1e40, 0x0000, 0x1e42, 0x0000, 0x1e44, 0x0000, 0x1e46,
291 0x0000, 0x1e48, 0x0000, 0x1e4a, 0x0000, 0x1e4c, 0x0000, 0x1e4e,
292 0x0000, 0x1e50, 0x0000, 0x1e52, 0x0000, 0x1e54, 0x0000, 0x1e56,
293 0x0000, 0x1e58, 0x0000, 0x1e5a, 0x0000, 0x1e5c, 0x0000, 0x1e5e,
294 0x0000, 0x1e60, 0x0000, 0x1e62, 0x0000, 0x1e64, 0x0000, 0x1e66,
295 0x0000, 0x1e68, 0x0000, 0x1e6a, 0x0000, 0x1e6c, 0x0000, 0x1e6e,
296 0x0000, 0x1e70, 0x0000, 0x1e72, 0x0000, 0x1e74, 0x0000, 0x1e76,
297 0x0000, 0x1e78, 0x0000, 0x1e7a, 0x0000, 0x1e7c, 0x0000, 0x1e7e,
298 0x0000, 0x1e80, 0x0000, 0x1e82, 0x0000, 0x1e84, 0x0000, 0x1e86,
299 0x0000, 0x1e88, 0x0000, 0x1e8a, 0x0000, 0x1e8c, 0x0000, 0x1e8e,
300 0x0000, 0x1e90, 0x0000, 0x1e92, 0x0000, 0x1e94, 0x0000, 0x0000,
301 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
302 0x0000, 0x1ea0, 0x0000, 0x1ea2, 0x0000, 0x1ea4, 0x0000, 0x1ea6,
303 0x0000, 0x1ea8, 0x0000, 0x1eaa, 0x0000, 0x1eac, 0x0000, 0x1eae,
304 0x0000, 0x1eb0, 0x0000, 0x1eb2, 0x0000, 0x1eb4, 0x0000, 0x1eb6,
305 0x0000, 0x1eb8, 0x0000, 0x1eba, 0x0000, 0x1ebc, 0x0000, 0x1ebe,
306 0x0000, 0x1ec0, 0x0000, 0x1ec2, 0x0000, 0x1ec4, 0x0000, 0x1ec6,
307 0x0000, 0x1ec8, 0x0000, 0x1eca, 0x0000, 0x1ecc, 0x0000, 0x1ece,
308 0x0000, 0x1ed0, 0x0000, 0x1ed2, 0x0000, 0x1ed4, 0x0000, 0x1ed6,
309 0x0000, 0x1ed8, 0x0000, 0x1eda, 0x0000, 0x1edc, 0x0000, 0x1ede,
310 0x0000, 0x1ee0, 0x0000, 0x1ee2, 0x0000, 0x1ee4, 0x0000, 0x1ee6,
311 0x0000, 0x1ee8, 0x0000, 0x1eea, 0x0000, 0x1eec, 0x0000, 0x1eee,
312 0x0000, 0x1ef0, 0x0000, 0x1ef2, 0x0000, 0x1ef4, 0x0000, 0x1ef6,
313 0x0000, 0x1ef8, 0x0000, 0x1efa, 0x0000, 0x1efc, 0x0000, 0x1efe,
314};
315
316static const wchar_t t2_1f[256] = {
317 0x1f08, 0x1f09, 0x1f0a, 0x1f0b, 0x1f0c, 0x1f0d, 0x1f0e, 0x1f0f,
318 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
319 0x1f18, 0x1f19, 0x1f1a, 0x1f1b, 0x1f1c, 0x1f1d, 0x0000, 0x0000,
320 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
321 0x1f28, 0x1f29, 0x1f2a, 0x1f2b, 0x1f2c, 0x1f2d, 0x1f2e, 0x1f2f,
322 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
323 0x1f38, 0x1f39, 0x1f3a, 0x1f3b, 0x1f3c, 0x1f3d, 0x1f3e, 0x1f3f,
324 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
325 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, 0x1f4c, 0x1f4d, 0x0000, 0x0000,
326 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
327 0x0000, 0x1f59, 0x0000, 0x1f5b, 0x0000, 0x1f5d, 0x0000, 0x1f5f,
328 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
329 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f,
330 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
331 0x1fba, 0x1fbb, 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, 0x1fda, 0x1fdb,
332 0x1ff8, 0x1ff9, 0x1fea, 0x1feb, 0x1ffa, 0x1ffb, 0x0000, 0x0000,
333 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f,
334 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
335 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f,
336 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
337 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf,
338 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
339 0x1fb8, 0x1fb9, 0x0000, 0x1fbc, 0x0000, 0x0000, 0x0000, 0x0000,
340 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
341 0x0000, 0x0000, 0x0000, 0x1fcc, 0x0000, 0x0000, 0x0000, 0x0000,
342 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
343 0x1fd8, 0x1fd9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
344 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
345 0x1fe8, 0x1fe9, 0x0000, 0x0000, 0x0000, 0x1fec, 0x0000, 0x0000,
346 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
347 0x0000, 0x0000, 0x0000, 0x1ffc, 0x0000, 0x0000, 0x0000, 0x0000,
348 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
349};
350
351static const wchar_t t2_21[256] = {
352 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
353 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
354 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
355 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
356 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
357 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
358 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
359 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
360 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
361 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2132, 0x0000,
362 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
363 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
364 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
365 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
366 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167,
367 0x2168, 0x2169, 0x216a, 0x216b, 0x216c, 0x216d, 0x216e, 0x216f,
368 0x0000, 0x0000, 0x0000, 0x0000, 0x2183, 0x0000, 0x0000, 0x0000,
369 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
370 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
371 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
372 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
373 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
374 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
375 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
376 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
377 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
378 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
379 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
380 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
381 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
382 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
383 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
384};
385
386static const wchar_t t2_24[256] = {
387 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
388 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
389 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
390 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
391 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
392 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
393 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
394 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
395 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
396 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
397 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
398 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
399 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
400 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
401 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
402 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
403 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
404 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
405 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
406 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
407 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
408 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
409 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
410 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
411 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
412 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
413 0x24b6, 0x24b7, 0x24b8, 0x24b9, 0x24ba, 0x24bb, 0x24bc, 0x24bd,
414 0x24be, 0x24bf, 0x24c0, 0x24c1, 0x24c2, 0x24c3, 0x24c4, 0x24c5,
415 0x24c6, 0x24c7, 0x24c8, 0x24c9, 0x24ca, 0x24cb, 0x24cc, 0x24cd,
416 0x24ce, 0x24cf, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
417 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
418 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
419};
420
421static const wchar_t t2_2c[256] = {
422 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
423 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
424 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
425 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
426 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
427 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
428 0x2c00, 0x2c01, 0x2c02, 0x2c03, 0x2c04, 0x2c05, 0x2c06, 0x2c07,
429 0x2c08, 0x2c09, 0x2c0a, 0x2c0b, 0x2c0c, 0x2c0d, 0x2c0e, 0x2c0f,
430 0x2c10, 0x2c11, 0x2c12, 0x2c13, 0x2c14, 0x2c15, 0x2c16, 0x2c17,
431 0x2c18, 0x2c19, 0x2c1a, 0x2c1b, 0x2c1c, 0x2c1d, 0x2c1e, 0x2c1f,
432 0x2c20, 0x2c21, 0x2c22, 0x2c23, 0x2c24, 0x2c25, 0x2c26, 0x2c27,
433 0x2c28, 0x2c29, 0x2c2a, 0x2c2b, 0x2c2c, 0x2c2d, 0x2c2e, 0x0000,
434 0x0000, 0x2c60, 0x0000, 0x0000, 0x0000, 0x023a, 0x023e, 0x0000,
435 0x2c67, 0x0000, 0x2c69, 0x0000, 0x2c6b, 0x0000, 0x0000, 0x0000,
436 0x0000, 0x0000, 0x0000, 0x2c72, 0x0000, 0x0000, 0x2c75, 0x0000,
437 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
438 0x0000, 0x2c80, 0x0000, 0x2c82, 0x0000, 0x2c84, 0x0000, 0x2c86,
439 0x0000, 0x2c88, 0x0000, 0x2c8a, 0x0000, 0x2c8c, 0x0000, 0x2c8e,
440 0x0000, 0x2c90, 0x0000, 0x2c92, 0x0000, 0x2c94, 0x0000, 0x2c96,
441 0x0000, 0x2c98, 0x0000, 0x2c9a, 0x0000, 0x2c9c, 0x0000, 0x2c9e,
442 0x0000, 0x2ca0, 0x0000, 0x2ca2, 0x0000, 0x2ca4, 0x0000, 0x2ca6,
443 0x0000, 0x2ca8, 0x0000, 0x2caa, 0x0000, 0x2cac, 0x0000, 0x2cae,
444 0x0000, 0x2cb0, 0x0000, 0x2cb2, 0x0000, 0x2cb4, 0x0000, 0x2cb6,
445 0x0000, 0x2cb8, 0x0000, 0x2cba, 0x0000, 0x2cbc, 0x0000, 0x2cbe,
446 0x0000, 0x2cc0, 0x0000, 0x2cc2, 0x0000, 0x2cc4, 0x0000, 0x2cc6,
447 0x0000, 0x2cc8, 0x0000, 0x2cca, 0x0000, 0x2ccc, 0x0000, 0x2cce,
448 0x0000, 0x2cd0, 0x0000, 0x2cd2, 0x0000, 0x2cd4, 0x0000, 0x2cd6,
449 0x0000, 0x2cd8, 0x0000, 0x2cda, 0x0000, 0x2cdc, 0x0000, 0x2cde,
450 0x0000, 0x2ce0, 0x0000, 0x2ce2, 0x0000, 0x0000, 0x0000, 0x0000,
451 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
452 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
453 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
454};
455
456static const wchar_t t2_2d[256] = {
457 0x10a0, 0x10a1, 0x10a2, 0x10a3, 0x10a4, 0x10a5, 0x10a6, 0x10a7,
458 0x10a8, 0x10a9, 0x10aa, 0x10ab, 0x10ac, 0x10ad, 0x10ae, 0x10af,
459 0x10b0, 0x10b1, 0x10b2, 0x10b3, 0x10b4, 0x10b5, 0x10b6, 0x10b7,
460 0x10b8, 0x10b9, 0x10ba, 0x10bb, 0x10bc, 0x10bd, 0x10be, 0x10bf,
461 0x10c0, 0x10c1, 0x10c2, 0x10c3, 0x10c4, 0x10c5, 0x0000, 0x0000,
462 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
463 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
464 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
465 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
466 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
467 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
468 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
469 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
470 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
471 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
472 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
473 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
474 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
475 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
476 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
477 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
478 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
479 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
480 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
481 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
482 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
483 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
484 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
485 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
486 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
487 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
488 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
489};
490
491static const wchar_t t2_a6[256] = {
492 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
493 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
494 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
495 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
496 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
497 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
498 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
499 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
500 0x0000, 0xa640, 0x0000, 0xa642, 0x0000, 0xa644, 0x0000, 0xa646,
501 0x0000, 0xa648, 0x0000, 0xa64a, 0x0000, 0xa64c, 0x0000, 0xa64e,
502 0x0000, 0xa650, 0x0000, 0xa652, 0x0000, 0xa654, 0x0000, 0xa656,
503 0x0000, 0xa658, 0x0000, 0xa65a, 0x0000, 0xa65c, 0x0000, 0xa65e,
504 0x0000, 0x0000, 0x0000, 0xa662, 0x0000, 0xa664, 0x0000, 0xa666,
505 0x0000, 0xa668, 0x0000, 0xa66a, 0x0000, 0xa66c, 0x0000, 0x0000,
506 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
507 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
508 0x0000, 0xa680, 0x0000, 0xa682, 0x0000, 0xa684, 0x0000, 0xa686,
509 0x0000, 0xa688, 0x0000, 0xa68a, 0x0000, 0xa68c, 0x0000, 0xa68e,
510 0x0000, 0xa690, 0x0000, 0xa692, 0x0000, 0xa694, 0x0000, 0xa696,
511 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
512 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
513 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
514 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
515 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
516 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
517 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
518 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
519 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
520 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
521 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
522 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
523 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
524};
525
526static const wchar_t t2_a7[256] = {
527 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
528 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
529 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
530 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
531 0x0000, 0x0000, 0x0000, 0xa722, 0x0000, 0xa724, 0x0000, 0xa726,
532 0x0000, 0xa728, 0x0000, 0xa72a, 0x0000, 0xa72c, 0x0000, 0xa72e,
533 0x0000, 0x0000, 0x0000, 0xa732, 0x0000, 0xa734, 0x0000, 0xa736,
534 0x0000, 0xa738, 0x0000, 0xa73a, 0x0000, 0xa73c, 0x0000, 0xa73e,
535 0x0000, 0xa740, 0x0000, 0xa742, 0x0000, 0xa744, 0x0000, 0xa746,
536 0x0000, 0xa748, 0x0000, 0xa74a, 0x0000, 0xa74c, 0x0000, 0xa74e,
537 0x0000, 0xa750, 0x0000, 0xa752, 0x0000, 0xa754, 0x0000, 0xa756,
538 0x0000, 0xa758, 0x0000, 0xa75a, 0x0000, 0xa75c, 0x0000, 0xa75e,
539 0x0000, 0xa760, 0x0000, 0xa762, 0x0000, 0xa764, 0x0000, 0xa766,
540 0x0000, 0xa768, 0x0000, 0xa76a, 0x0000, 0xa76c, 0x0000, 0xa76e,
541 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
542 0x0000, 0x0000, 0xa779, 0x0000, 0xa77b, 0x0000, 0x0000, 0xa77e,
543 0x0000, 0xa780, 0x0000, 0xa782, 0x0000, 0xa784, 0x0000, 0xa786,
544 0x0000, 0x0000, 0x0000, 0x0000, 0xa78b, 0x0000, 0x0000, 0x0000,
545 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
546 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
547 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
548 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
549 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
550 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
551 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
552 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
553 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
554 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
555 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
556 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
557 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
558 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
559};
560
561static const wchar_t t2_ff[256] = {
562 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
563 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
564 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
565 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
566 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
567 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
568 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
569 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
570 0x0000, 0xff21, 0xff22, 0xff23, 0xff24, 0xff25, 0xff26, 0xff27,
571 0xff28, 0xff29, 0xff2a, 0xff2b, 0xff2c, 0xff2d, 0xff2e, 0xff2f,
572 0xff30, 0xff31, 0xff32, 0xff33, 0xff34, 0xff35, 0xff36, 0xff37,
573 0xff38, 0xff39, 0xff3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
574 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
575 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
576 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
577 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
578 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
579 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
580 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
581 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
582 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
583 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
584 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
585 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
586 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
587 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
588 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
589 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
590 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
591 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
592 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
593 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
594};
595
596static const wchar_t *const toplevel[256] = {
597 t2_00, t2_01, t2_02, t2_03, t2_04, t2_05, NULL, NULL,
598 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
599 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
600 NULL, NULL, NULL, NULL, NULL, t2_1d, t2_1e, t2_1f,
601 NULL, t2_21, NULL, NULL, t2_24, NULL, NULL, NULL,
602 NULL, NULL, NULL, NULL, t2_2c, t2_2d, NULL, NULL,
603 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
604 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
605 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
606 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
607 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
608 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
609 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
610 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
611 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
612 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
613 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
614 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
615 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
616 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
617 NULL, NULL, NULL, NULL, NULL, NULL, t2_a6, t2_a7,
618 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
619 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
620 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
621 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
622 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
623 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
624 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
625 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
626 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
627 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
628 NULL, NULL, NULL, NULL, NULL, NULL, NULL, t2_ff,
629};
630
631/**
632 * cifs_toupper - convert a wchar_t from lower to uppercase
633 * @in: character to convert from lower to uppercase
634 *
635 * This function consults the static tables above to convert a wchar_t from
636 * lower to uppercase. In the event that there is no mapping, the original
637 * "in" character is returned.
638 */
639wchar_t
640cifs_toupper(wchar_t in)
641{
642 unsigned char idx;
643 const wchar_t *tbl;
644 wchar_t out;
645
646 /* grab upper byte */
647 idx = (in & 0xff00) >> 8;
648
649 /* find pointer to 2nd layer table */
650 tbl = toplevel[idx];
651 if (!tbl)
652 return in;
653
654 /* grab lower byte */
655 idx = in & 0xff;
656
657 /* look up character in table */
658 out = tbl[idx];
659 if (out)
660 return out;
661
662 return in;
663}
diff --git a/fs/coredump.c b/fs/coredump.c
index 72f816d6cad9..9bdeca12ae0e 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -190,6 +190,11 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm)
190 err = cn_printf(cn, "%d", 190 err = cn_printf(cn, "%d",
191 task_tgid_vnr(current)); 191 task_tgid_vnr(current));
192 break; 192 break;
193 /* global pid */
194 case 'P':
195 err = cn_printf(cn, "%d",
196 task_tgid_nr(current));
197 break;
193 /* uid */ 198 /* uid */
194 case 'u': 199 case 'u':
195 err = cn_printf(cn, "%d", cred->uid); 200 err = cn_printf(cn, "%d", cred->uid);
diff --git a/fs/dcache.c b/fs/dcache.c
index c932ed32c77b..1bd4614ce93b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -90,8 +90,8 @@ static struct kmem_cache *dentry_cache __read_mostly;
90 90
91/** 91/**
92 * read_seqbegin_or_lock - begin a sequence number check or locking block 92 * read_seqbegin_or_lock - begin a sequence number check or locking block
93 * lock: sequence lock 93 * @lock: sequence lock
94 * seq : sequence number to be checked 94 * @seq : sequence number to be checked
95 * 95 *
96 * First try it once optimistically without taking the lock. If that fails, 96 * First try it once optimistically without taking the lock. If that fails,
97 * take the lock. The sequence number is also used as a marker for deciding 97 * take the lock. The sequence number is also used as a marker for deciding
@@ -103,7 +103,7 @@ static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
103 if (!(*seq & 1)) /* Even */ 103 if (!(*seq & 1)) /* Even */
104 *seq = read_seqbegin(lock); 104 *seq = read_seqbegin(lock);
105 else /* Odd */ 105 else /* Odd */
106 write_seqlock(lock); 106 read_seqlock_excl(lock);
107} 107}
108 108
109static inline int need_seqretry(seqlock_t *lock, int seq) 109static inline int need_seqretry(seqlock_t *lock, int seq)
@@ -114,7 +114,7 @@ static inline int need_seqretry(seqlock_t *lock, int seq)
114static inline void done_seqretry(seqlock_t *lock, int seq) 114static inline void done_seqretry(seqlock_t *lock, int seq)
115{ 115{
116 if (seq & 1) 116 if (seq & 1)
117 write_sequnlock(lock); 117 read_sequnlock_excl(lock);
118} 118}
119 119
120/* 120/*
@@ -2753,9 +2753,9 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2753 2753
2754/** 2754/**
2755 * prepend_name - prepend a pathname in front of current buffer pointer 2755 * prepend_name - prepend a pathname in front of current buffer pointer
2756 * buffer: buffer pointer 2756 * @buffer: buffer pointer
2757 * buflen: allocated length of the buffer 2757 * @buflen: allocated length of the buffer
2758 * name: name string and length qstr structure 2758 * @name: name string and length qstr structure
2759 * 2759 *
2760 * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to 2760 * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to
2761 * make sure that either the old or the new name pointer and length are 2761 * make sure that either the old or the new name pointer and length are
@@ -2793,14 +2793,15 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2793 * @buffer: pointer to the end of the buffer 2793 * @buffer: pointer to the end of the buffer
2794 * @buflen: pointer to buffer length 2794 * @buflen: pointer to buffer length
2795 * 2795 *
2796 * The function tries to write out the pathname without taking any lock other 2796 * The function will first try to write out the pathname without taking any
2797 * than the RCU read lock to make sure that dentries won't go away. It only 2797 * lock other than the RCU read lock to make sure that dentries won't go away.
2798 * checks the sequence number of the global rename_lock as any change in the 2798 * It only checks the sequence number of the global rename_lock as any change
2799 * dentry's d_seq will be preceded by changes in the rename_lock sequence 2799 * in the dentry's d_seq will be preceded by changes in the rename_lock
2800 * number. If the sequence number had been change, it will restart the whole 2800 * sequence number. If the sequence number had been changed, it will restart
2801 * pathname back-tracing sequence again. It performs a total of 3 trials of 2801 * the whole pathname back-tracing sequence again by taking the rename_lock.
2802 * lockless back-tracing sequences before falling back to take the 2802 * In this case, there is no need to take the RCU read lock as the recursive
2803 * rename_lock. 2803 * parent pointer references will keep the dentry chain alive as long as no
2804 * rename operation is performed.
2804 */ 2805 */
2805static int prepend_path(const struct path *path, 2806static int prepend_path(const struct path *path,
2806 const struct path *root, 2807 const struct path *root,
@@ -2948,6 +2949,16 @@ static int prepend_unreachable(char **buffer, int *buflen)
2948 return prepend(buffer, buflen, "(unreachable)", 13); 2949 return prepend(buffer, buflen, "(unreachable)", 13);
2949} 2950}
2950 2951
2952static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
2953{
2954 unsigned seq;
2955
2956 do {
2957 seq = read_seqcount_begin(&fs->seq);
2958 *root = fs->root;
2959 } while (read_seqcount_retry(&fs->seq, seq));
2960}
2961
2951/** 2962/**
2952 * d_path - return the path of a dentry 2963 * d_path - return the path of a dentry
2953 * @path: path to report 2964 * @path: path to report
@@ -2980,13 +2991,15 @@ char *d_path(const struct path *path, char *buf, int buflen)
2980 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2991 if (path->dentry->d_op && path->dentry->d_op->d_dname)
2981 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2992 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2982 2993
2983 get_fs_root(current->fs, &root); 2994 rcu_read_lock();
2995 get_fs_root_rcu(current->fs, &root);
2984 br_read_lock(&vfsmount_lock); 2996 br_read_lock(&vfsmount_lock);
2985 error = path_with_deleted(path, &root, &res, &buflen); 2997 error = path_with_deleted(path, &root, &res, &buflen);
2986 br_read_unlock(&vfsmount_lock); 2998 br_read_unlock(&vfsmount_lock);
2999 rcu_read_unlock();
3000
2987 if (error < 0) 3001 if (error < 0)
2988 res = ERR_PTR(error); 3002 res = ERR_PTR(error);
2989 path_put(&root);
2990 return res; 3003 return res;
2991} 3004}
2992EXPORT_SYMBOL(d_path); 3005EXPORT_SYMBOL(d_path);
@@ -3094,6 +3107,18 @@ Elong:
3094 return ERR_PTR(-ENAMETOOLONG); 3107 return ERR_PTR(-ENAMETOOLONG);
3095} 3108}
3096 3109
3110static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
3111 struct path *pwd)
3112{
3113 unsigned seq;
3114
3115 do {
3116 seq = read_seqcount_begin(&fs->seq);
3117 *root = fs->root;
3118 *pwd = fs->pwd;
3119 } while (read_seqcount_retry(&fs->seq, seq));
3120}
3121
3097/* 3122/*
3098 * NOTE! The user-level library version returns a 3123 * NOTE! The user-level library version returns a
3099 * character pointer. The kernel system call just 3124 * character pointer. The kernel system call just
@@ -3116,23 +3141,25 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
3116{ 3141{
3117 int error; 3142 int error;
3118 struct path pwd, root; 3143 struct path pwd, root;
3119 char *page = (char *) __get_free_page(GFP_USER); 3144 char *page = __getname();
3120 3145
3121 if (!page) 3146 if (!page)
3122 return -ENOMEM; 3147 return -ENOMEM;
3123 3148
3124 get_fs_root_and_pwd(current->fs, &root, &pwd); 3149 rcu_read_lock();
3150 get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
3125 3151
3126 error = -ENOENT; 3152 error = -ENOENT;
3127 br_read_lock(&vfsmount_lock); 3153 br_read_lock(&vfsmount_lock);
3128 if (!d_unlinked(pwd.dentry)) { 3154 if (!d_unlinked(pwd.dentry)) {
3129 unsigned long len; 3155 unsigned long len;
3130 char *cwd = page + PAGE_SIZE; 3156 char *cwd = page + PATH_MAX;
3131 int buflen = PAGE_SIZE; 3157 int buflen = PATH_MAX;
3132 3158
3133 prepend(&cwd, &buflen, "\0", 1); 3159 prepend(&cwd, &buflen, "\0", 1);
3134 error = prepend_path(&pwd, &root, &cwd, &buflen); 3160 error = prepend_path(&pwd, &root, &cwd, &buflen);
3135 br_read_unlock(&vfsmount_lock); 3161 br_read_unlock(&vfsmount_lock);
3162 rcu_read_unlock();
3136 3163
3137 if (error < 0) 3164 if (error < 0)
3138 goto out; 3165 goto out;
@@ -3145,7 +3172,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
3145 } 3172 }
3146 3173
3147 error = -ERANGE; 3174 error = -ERANGE;
3148 len = PAGE_SIZE + page - cwd; 3175 len = PATH_MAX + page - cwd;
3149 if (len <= size) { 3176 if (len <= size) {
3150 error = len; 3177 error = len;
3151 if (copy_to_user(buf, cwd, len)) 3178 if (copy_to_user(buf, cwd, len))
@@ -3153,12 +3180,11 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
3153 } 3180 }
3154 } else { 3181 } else {
3155 br_read_unlock(&vfsmount_lock); 3182 br_read_unlock(&vfsmount_lock);
3183 rcu_read_unlock();
3156 } 3184 }
3157 3185
3158out: 3186out:
3159 path_put(&pwd); 3187 __putname(page);
3160 path_put(&root);
3161 free_page((unsigned long) page);
3162 return error; 3188 return error;
3163} 3189}
3164 3190
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index d10757635b9c..c88e355f7635 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -609,39 +609,35 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
609 char *full_alg_name; 609 char *full_alg_name;
610 int rc = -EINVAL; 610 int rc = -EINVAL;
611 611
612 if (!crypt_stat->cipher) {
613 ecryptfs_printk(KERN_ERR, "No cipher specified\n");
614 goto out;
615 }
616 ecryptfs_printk(KERN_DEBUG, 612 ecryptfs_printk(KERN_DEBUG,
617 "Initializing cipher [%s]; strlen = [%d]; " 613 "Initializing cipher [%s]; strlen = [%d]; "
618 "key_size_bits = [%zd]\n", 614 "key_size_bits = [%zd]\n",
619 crypt_stat->cipher, (int)strlen(crypt_stat->cipher), 615 crypt_stat->cipher, (int)strlen(crypt_stat->cipher),
620 crypt_stat->key_size << 3); 616 crypt_stat->key_size << 3);
617 mutex_lock(&crypt_stat->cs_tfm_mutex);
621 if (crypt_stat->tfm) { 618 if (crypt_stat->tfm) {
622 rc = 0; 619 rc = 0;
623 goto out; 620 goto out_unlock;
624 } 621 }
625 mutex_lock(&crypt_stat->cs_tfm_mutex);
626 rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name, 622 rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name,
627 crypt_stat->cipher, "cbc"); 623 crypt_stat->cipher, "cbc");
628 if (rc) 624 if (rc)
629 goto out_unlock; 625 goto out_unlock;
630 crypt_stat->tfm = crypto_alloc_ablkcipher(full_alg_name, 0, 0); 626 crypt_stat->tfm = crypto_alloc_ablkcipher(full_alg_name, 0, 0);
631 kfree(full_alg_name);
632 if (IS_ERR(crypt_stat->tfm)) { 627 if (IS_ERR(crypt_stat->tfm)) {
633 rc = PTR_ERR(crypt_stat->tfm); 628 rc = PTR_ERR(crypt_stat->tfm);
634 crypt_stat->tfm = NULL; 629 crypt_stat->tfm = NULL;
635 ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): " 630 ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): "
636 "Error initializing cipher [%s]\n", 631 "Error initializing cipher [%s]\n",
637 crypt_stat->cipher); 632 full_alg_name);
638 goto out_unlock; 633 goto out_free;
639 } 634 }
640 crypto_ablkcipher_set_flags(crypt_stat->tfm, CRYPTO_TFM_REQ_WEAK_KEY); 635 crypto_ablkcipher_set_flags(crypt_stat->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
641 rc = 0; 636 rc = 0;
637out_free:
638 kfree(full_alg_name);
642out_unlock: 639out_unlock:
643 mutex_unlock(&crypt_stat->cs_tfm_mutex); 640 mutex_unlock(&crypt_stat->cs_tfm_mutex);
644out:
645 return rc; 641 return rc;
646} 642}
647 643
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 293f86741ddb..473e09da7d02 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -740,6 +740,7 @@ static void ep_free(struct eventpoll *ep)
740 epi = rb_entry(rbp, struct epitem, rbn); 740 epi = rb_entry(rbp, struct epitem, rbn);
741 741
742 ep_unregister_pollwait(ep, epi); 742 ep_unregister_pollwait(ep, epi);
743 cond_resched();
743 } 744 }
744 745
745 /* 746 /*
@@ -754,6 +755,7 @@ static void ep_free(struct eventpoll *ep)
754 while ((rbp = rb_first(&ep->rbr)) != NULL) { 755 while ((rbp = rb_first(&ep->rbr)) != NULL) {
755 epi = rb_entry(rbp, struct epitem, rbn); 756 epi = rb_entry(rbp, struct epitem, rbn);
756 ep_remove(ep, epi); 757 ep_remove(ep, epi);
758 cond_resched();
757 } 759 }
758 mutex_unlock(&ep->mtx); 760 mutex_unlock(&ep->mtx);
759 761
diff --git a/fs/exec.c b/fs/exec.c
index fd774c7cb483..8875dd10ae7a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -74,6 +74,8 @@ static DEFINE_RWLOCK(binfmt_lock);
74void __register_binfmt(struct linux_binfmt * fmt, int insert) 74void __register_binfmt(struct linux_binfmt * fmt, int insert)
75{ 75{
76 BUG_ON(!fmt); 76 BUG_ON(!fmt);
77 if (WARN_ON(!fmt->load_binary))
78 return;
77 write_lock(&binfmt_lock); 79 write_lock(&binfmt_lock);
78 insert ? list_add(&fmt->lh, &formats) : 80 insert ? list_add(&fmt->lh, &formats) :
79 list_add_tail(&fmt->lh, &formats); 81 list_add_tail(&fmt->lh, &formats);
@@ -266,7 +268,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
266 BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); 268 BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
267 vma->vm_end = STACK_TOP_MAX; 269 vma->vm_end = STACK_TOP_MAX;
268 vma->vm_start = vma->vm_end - PAGE_SIZE; 270 vma->vm_start = vma->vm_end - PAGE_SIZE;
269 vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; 271 vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
270 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); 272 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
271 INIT_LIST_HEAD(&vma->anon_vma_chain); 273 INIT_LIST_HEAD(&vma->anon_vma_chain);
272 274
@@ -1365,18 +1367,18 @@ out:
1365} 1367}
1366EXPORT_SYMBOL(remove_arg_zero); 1368EXPORT_SYMBOL(remove_arg_zero);
1367 1369
1370#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
1368/* 1371/*
1369 * cycle the list of binary formats handler, until one recognizes the image 1372 * cycle the list of binary formats handler, until one recognizes the image
1370 */ 1373 */
1371int search_binary_handler(struct linux_binprm *bprm) 1374int search_binary_handler(struct linux_binprm *bprm)
1372{ 1375{
1373 unsigned int depth = bprm->recursion_depth; 1376 bool need_retry = IS_ENABLED(CONFIG_MODULES);
1374 int try,retval;
1375 struct linux_binfmt *fmt; 1377 struct linux_binfmt *fmt;
1376 pid_t old_pid, old_vpid; 1378 int retval;
1377 1379
1378 /* This allows 4 levels of binfmt rewrites before failing hard. */ 1380 /* This allows 4 levels of binfmt rewrites before failing hard. */
1379 if (depth > 5) 1381 if (bprm->recursion_depth > 5)
1380 return -ELOOP; 1382 return -ELOOP;
1381 1383
1382 retval = security_bprm_check(bprm); 1384 retval = security_bprm_check(bprm);
@@ -1387,71 +1389,67 @@ int search_binary_handler(struct linux_binprm *bprm)
1387 if (retval) 1389 if (retval)
1388 return retval; 1390 return retval;
1389 1391
1392 retval = -ENOENT;
1393 retry:
1394 read_lock(&binfmt_lock);
1395 list_for_each_entry(fmt, &formats, lh) {
1396 if (!try_module_get(fmt->module))
1397 continue;
1398 read_unlock(&binfmt_lock);
1399 bprm->recursion_depth++;
1400 retval = fmt->load_binary(bprm);
1401 bprm->recursion_depth--;
1402 if (retval >= 0 || retval != -ENOEXEC ||
1403 bprm->mm == NULL || bprm->file == NULL) {
1404 put_binfmt(fmt);
1405 return retval;
1406 }
1407 read_lock(&binfmt_lock);
1408 put_binfmt(fmt);
1409 }
1410 read_unlock(&binfmt_lock);
1411
1412 if (need_retry && retval == -ENOEXEC) {
1413 if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
1414 printable(bprm->buf[2]) && printable(bprm->buf[3]))
1415 return retval;
1416 if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0)
1417 return retval;
1418 need_retry = false;
1419 goto retry;
1420 }
1421
1422 return retval;
1423}
1424EXPORT_SYMBOL(search_binary_handler);
1425
1426static int exec_binprm(struct linux_binprm *bprm)
1427{
1428 pid_t old_pid, old_vpid;
1429 int ret;
1430
1390 /* Need to fetch pid before load_binary changes it */ 1431 /* Need to fetch pid before load_binary changes it */
1391 old_pid = current->pid; 1432 old_pid = current->pid;
1392 rcu_read_lock(); 1433 rcu_read_lock();
1393 old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); 1434 old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
1394 rcu_read_unlock(); 1435 rcu_read_unlock();
1395 1436
1396 retval = -ENOENT; 1437 ret = search_binary_handler(bprm);
1397 for (try=0; try<2; try++) { 1438 if (ret >= 0) {
1398 read_lock(&binfmt_lock); 1439 trace_sched_process_exec(current, old_pid, bprm);
1399 list_for_each_entry(fmt, &formats, lh) { 1440 ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
1400 int (*fn)(struct linux_binprm *) = fmt->load_binary; 1441 current->did_exec = 1;
1401 if (!fn) 1442 proc_exec_connector(current);
1402 continue; 1443
1403 if (!try_module_get(fmt->module)) 1444 if (bprm->file) {
1404 continue; 1445 allow_write_access(bprm->file);
1405 read_unlock(&binfmt_lock); 1446 fput(bprm->file);
1406 bprm->recursion_depth = depth + 1; 1447 bprm->file = NULL; /* to catch use-after-free */
1407 retval = fn(bprm);
1408 bprm->recursion_depth = depth;
1409 if (retval >= 0) {
1410 if (depth == 0) {
1411 trace_sched_process_exec(current, old_pid, bprm);
1412 ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
1413 }
1414 put_binfmt(fmt);
1415 allow_write_access(bprm->file);
1416 if (bprm->file)
1417 fput(bprm->file);
1418 bprm->file = NULL;
1419 current->did_exec = 1;
1420 proc_exec_connector(current);
1421 return retval;
1422 }
1423 read_lock(&binfmt_lock);
1424 put_binfmt(fmt);
1425 if (retval != -ENOEXEC || bprm->mm == NULL)
1426 break;
1427 if (!bprm->file) {
1428 read_unlock(&binfmt_lock);
1429 return retval;
1430 }
1431 } 1448 }
1432 read_unlock(&binfmt_lock);
1433#ifdef CONFIG_MODULES
1434 if (retval != -ENOEXEC || bprm->mm == NULL) {
1435 break;
1436 } else {
1437#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
1438 if (printable(bprm->buf[0]) &&
1439 printable(bprm->buf[1]) &&
1440 printable(bprm->buf[2]) &&
1441 printable(bprm->buf[3]))
1442 break; /* -ENOEXEC */
1443 if (try)
1444 break; /* -ENOEXEC */
1445 request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
1446 }
1447#else
1448 break;
1449#endif
1450 } 1449 }
1451 return retval;
1452}
1453 1450
1454EXPORT_SYMBOL(search_binary_handler); 1451 return ret;
1452}
1455 1453
1456/* 1454/*
1457 * sys_execve() executes a new program. 1455 * sys_execve() executes a new program.
@@ -1541,7 +1539,7 @@ static int do_execve_common(const char *filename,
1541 if (retval < 0) 1539 if (retval < 0)
1542 goto out; 1540 goto out;
1543 1541
1544 retval = search_binary_handler(bprm); 1542 retval = exec_binprm(bprm);
1545 if (retval < 0) 1543 if (retval < 0)
1546 goto out; 1544 goto out;
1547 1545
diff --git a/fs/file_table.c b/fs/file_table.c
index 322cd37626cb..abdd15ad13c9 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -311,8 +311,7 @@ void fput(struct file *file)
311 return; 311 return;
312 /* 312 /*
313 * After this task has run exit_task_work(), 313 * After this task has run exit_task_work(),
314 * task_work_add() will fail. free_ipc_ns()-> 314 * task_work_add() will fail. Fall through to delayed
315 * shm_destroy() can do this. Fall through to delayed
316 * fput to avoid leaking *file. 315 * fput to avoid leaking *file.
317 */ 316 */
318 } 317 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 68851ff2fd41..30f6f27d5a59 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -723,7 +723,7 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb,
723 return wrote; 723 return wrote;
724} 724}
725 725
726long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, 726static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
727 enum wb_reason reason) 727 enum wb_reason reason)
728{ 728{
729 struct wb_writeback_work work = { 729 struct wb_writeback_work work = {
@@ -1049,10 +1049,8 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
1049{ 1049{
1050 struct backing_dev_info *bdi; 1050 struct backing_dev_info *bdi;
1051 1051
1052 if (!nr_pages) { 1052 if (!nr_pages)
1053 nr_pages = global_page_state(NR_FILE_DIRTY) + 1053 nr_pages = get_nr_dirty_pages();
1054 global_page_state(NR_UNSTABLE_NFS);
1055 }
1056 1054
1057 rcu_read_lock(); 1055 rcu_read_lock();
1058 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { 1056 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
@@ -1173,6 +1171,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1173 bool wakeup_bdi = false; 1171 bool wakeup_bdi = false;
1174 bdi = inode_to_bdi(inode); 1172 bdi = inode_to_bdi(inode);
1175 1173
1174 spin_unlock(&inode->i_lock);
1175 spin_lock(&bdi->wb.list_lock);
1176 if (bdi_cap_writeback_dirty(bdi)) { 1176 if (bdi_cap_writeback_dirty(bdi)) {
1177 WARN(!test_bit(BDI_registered, &bdi->state), 1177 WARN(!test_bit(BDI_registered, &bdi->state),
1178 "bdi-%s not registered\n", bdi->name); 1178 "bdi-%s not registered\n", bdi->name);
@@ -1187,8 +1187,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1187 wakeup_bdi = true; 1187 wakeup_bdi = true;
1188 } 1188 }
1189 1189
1190 spin_unlock(&inode->i_lock);
1191 spin_lock(&bdi->wb.list_lock);
1192 inode->dirtied_when = jiffies; 1190 inode->dirtied_when = jiffies;
1193 list_move(&inode->i_wb_list, &bdi->wb.b_dirty); 1191 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
1194 spin_unlock(&bdi->wb.list_lock); 1192 spin_unlock(&bdi->wb.list_lock);
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 8702b732109a..73899c1c3449 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -913,7 +913,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
913 (1 << FSCACHE_OP_WAITING) | 913 (1 << FSCACHE_OP_WAITING) |
914 (1 << FSCACHE_OP_UNUSE_COOKIE); 914 (1 << FSCACHE_OP_UNUSE_COOKIE);
915 915
916 ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); 916 ret = radix_tree_maybe_preload(gfp & ~__GFP_HIGHMEM);
917 if (ret < 0) 917 if (ret < 0)
918 goto nomem_free; 918 goto nomem_free;
919 919
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e0fe703ee3d6..84434594e80e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -930,7 +930,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
930 fc->bdi.name = "fuse"; 930 fc->bdi.name = "fuse";
931 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 931 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
932 /* fuse does it's own writeback accounting */ 932 /* fuse does it's own writeback accounting */
933 fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; 933 fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT;
934 934
935 err = bdi_init(&fc->bdi); 935 err = bdi_init(&fc->bdi);
936 if (err) 936 if (err)
diff --git a/fs/hfsplus/Kconfig b/fs/hfsplus/Kconfig
index a63371815aab..24bc20fd42f7 100644
--- a/fs/hfsplus/Kconfig
+++ b/fs/hfsplus/Kconfig
@@ -11,3 +11,21 @@ config HFSPLUS_FS
11 MacOS 8. It includes all Mac specific filesystem data such as 11 MacOS 8. It includes all Mac specific filesystem data such as
12 data forks and creator codes, but it also has several UNIX 12 data forks and creator codes, but it also has several UNIX
13 style features such as file ownership and permissions. 13 style features such as file ownership and permissions.
14
15config HFSPLUS_FS_POSIX_ACL
16 bool "HFS+ POSIX Access Control Lists"
17 depends on HFSPLUS_FS
18 select FS_POSIX_ACL
19 help
20 POSIX Access Control Lists (ACLs) support permissions for users and
21 groups beyond the owner/group/world scheme.
22
23 To learn more about Access Control Lists, visit the POSIX ACLs for
24 Linux website <http://acl.bestbits.at/>.
25
26 It needs to understand that POSIX ACLs are treated only under
27 Linux. POSIX ACLs doesn't mean something under Mac OS X.
28 Mac OS X beginning with version 10.4 ("Tiger") support NFSv4 ACLs,
29 which are part of the NFSv4 standard.
30
31 If you don't know what Access Control Lists are, say N
diff --git a/fs/hfsplus/Makefile b/fs/hfsplus/Makefile
index 09d278bb7b91..683fca2e5e65 100644
--- a/fs/hfsplus/Makefile
+++ b/fs/hfsplus/Makefile
@@ -7,3 +7,5 @@ obj-$(CONFIG_HFSPLUS_FS) += hfsplus.o
7hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \ 7hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \
8 bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \ 8 bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \
9 attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o 9 attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o
10
11hfsplus-$(CONFIG_HFSPLUS_FS_POSIX_ACL) += posix_acl.o
diff --git a/fs/hfsplus/acl.h b/fs/hfsplus/acl.h
new file mode 100644
index 000000000000..07c0d4947527
--- /dev/null
+++ b/fs/hfsplus/acl.h
@@ -0,0 +1,30 @@
1/*
2 * linux/fs/hfsplus/acl.h
3 *
4 * Vyacheslav Dubeyko <slava@dubeyko.com>
5 *
6 * Handler for Posix Access Control Lists (ACLs) support.
7 */
8
9#include <linux/posix_acl_xattr.h>
10
11#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
12
13/* posix_acl.c */
14struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type);
15extern int hfsplus_posix_acl_chmod(struct inode *);
16extern int hfsplus_init_posix_acl(struct inode *, struct inode *);
17
18#else /* CONFIG_HFSPLUS_FS_POSIX_ACL */
19#define hfsplus_get_posix_acl NULL
20
21static inline int hfsplus_posix_acl_chmod(struct inode *inode)
22{
23 return 0;
24}
25
26static inline int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir)
27{
28 return 0;
29}
30#endif /* CONFIG_HFSPLUS_FS_POSIX_ACL */
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index d8ce4bd17fc5..4a4fea002673 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -16,6 +16,7 @@
16#include "hfsplus_fs.h" 16#include "hfsplus_fs.h"
17#include "hfsplus_raw.h" 17#include "hfsplus_raw.h"
18#include "xattr.h" 18#include "xattr.h"
19#include "acl.h"
19 20
20static inline void hfsplus_instantiate(struct dentry *dentry, 21static inline void hfsplus_instantiate(struct dentry *dentry,
21 struct inode *inode, u32 cnid) 22 struct inode *inode, u32 cnid)
@@ -529,6 +530,9 @@ const struct inode_operations hfsplus_dir_inode_operations = {
529 .getxattr = generic_getxattr, 530 .getxattr = generic_getxattr,
530 .listxattr = hfsplus_listxattr, 531 .listxattr = hfsplus_listxattr,
531 .removexattr = hfsplus_removexattr, 532 .removexattr = hfsplus_removexattr,
533#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
534 .get_acl = hfsplus_get_posix_acl,
535#endif
532}; 536};
533 537
534const struct file_operations hfsplus_dir_operations = { 538const struct file_operations hfsplus_dir_operations = {
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index ede79317cfb8..2b9cd01696e2 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -30,6 +30,7 @@
30#define DBG_EXTENT 0x00000020 30#define DBG_EXTENT 0x00000020
31#define DBG_BITMAP 0x00000040 31#define DBG_BITMAP 0x00000040
32#define DBG_ATTR_MOD 0x00000080 32#define DBG_ATTR_MOD 0x00000080
33#define DBG_ACL_MOD 0x00000100
33 34
34#if 0 35#if 0
35#define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD) 36#define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD)
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index f833d35630ab..4d2edaea891c 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -19,6 +19,7 @@
19#include "hfsplus_fs.h" 19#include "hfsplus_fs.h"
20#include "hfsplus_raw.h" 20#include "hfsplus_raw.h"
21#include "xattr.h" 21#include "xattr.h"
22#include "acl.h"
22 23
23static int hfsplus_readpage(struct file *file, struct page *page) 24static int hfsplus_readpage(struct file *file, struct page *page)
24{ 25{
@@ -316,6 +317,13 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
316 317
317 setattr_copy(inode, attr); 318 setattr_copy(inode, attr);
318 mark_inode_dirty(inode); 319 mark_inode_dirty(inode);
320
321 if (attr->ia_valid & ATTR_MODE) {
322 error = hfsplus_posix_acl_chmod(inode);
323 if (unlikely(error))
324 return error;
325 }
326
319 return 0; 327 return 0;
320} 328}
321 329
@@ -383,6 +391,9 @@ static const struct inode_operations hfsplus_file_inode_operations = {
383 .getxattr = generic_getxattr, 391 .getxattr = generic_getxattr,
384 .listxattr = hfsplus_listxattr, 392 .listxattr = hfsplus_listxattr,
385 .removexattr = hfsplus_removexattr, 393 .removexattr = hfsplus_removexattr,
394#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
395 .get_acl = hfsplus_get_posix_acl,
396#endif
386}; 397};
387 398
388static const struct file_operations hfsplus_file_operations = { 399static const struct file_operations hfsplus_file_operations = {
diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c
new file mode 100644
index 000000000000..b609cc14c72e
--- /dev/null
+++ b/fs/hfsplus/posix_acl.c
@@ -0,0 +1,274 @@
1/*
2 * linux/fs/hfsplus/posix_acl.c
3 *
4 * Vyacheslav Dubeyko <slava@dubeyko.com>
5 *
6 * Handler for Posix Access Control Lists (ACLs) support.
7 */
8
9#include "hfsplus_fs.h"
10#include "xattr.h"
11#include "acl.h"
12
13struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type)
14{
15 struct posix_acl *acl;
16 char *xattr_name;
17 char *value = NULL;
18 ssize_t size;
19
20 acl = get_cached_acl(inode, type);
21 if (acl != ACL_NOT_CACHED)
22 return acl;
23
24 switch (type) {
25 case ACL_TYPE_ACCESS:
26 xattr_name = POSIX_ACL_XATTR_ACCESS;
27 break;
28 case ACL_TYPE_DEFAULT:
29 xattr_name = POSIX_ACL_XATTR_DEFAULT;
30 break;
31 default:
32 return ERR_PTR(-EINVAL);
33 }
34
35 size = __hfsplus_getxattr(inode, xattr_name, NULL, 0);
36
37 if (size > 0) {
38 value = (char *)hfsplus_alloc_attr_entry();
39 if (unlikely(!value))
40 return ERR_PTR(-ENOMEM);
41 size = __hfsplus_getxattr(inode, xattr_name, value, size);
42 }
43
44 if (size > 0)
45 acl = posix_acl_from_xattr(&init_user_ns, value, size);
46 else if (size == -ENODATA)
47 acl = NULL;
48 else
49 acl = ERR_PTR(size);
50
51 hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value);
52
53 if (!IS_ERR(acl))
54 set_cached_acl(inode, type, acl);
55
56 return acl;
57}
58
59static int hfsplus_set_posix_acl(struct inode *inode,
60 int type,
61 struct posix_acl *acl)
62{
63 int err;
64 char *xattr_name;
65 size_t size = 0;
66 char *value = NULL;
67
68 if (S_ISLNK(inode->i_mode))
69 return -EOPNOTSUPP;
70
71 switch (type) {
72 case ACL_TYPE_ACCESS:
73 xattr_name = POSIX_ACL_XATTR_ACCESS;
74 if (acl) {
75 err = posix_acl_equiv_mode(acl, &inode->i_mode);
76 if (err < 0)
77 return err;
78 }
79 err = 0;
80 break;
81
82 case ACL_TYPE_DEFAULT:
83 xattr_name = POSIX_ACL_XATTR_DEFAULT;
84 if (!S_ISDIR(inode->i_mode))
85 return acl ? -EACCES : 0;
86 break;
87
88 default:
89 return -EINVAL;
90 }
91
92 if (acl) {
93 size = posix_acl_xattr_size(acl->a_count);
94 if (unlikely(size > HFSPLUS_MAX_INLINE_DATA_SIZE))
95 return -ENOMEM;
96 value = (char *)hfsplus_alloc_attr_entry();
97 if (unlikely(!value))
98 return -ENOMEM;
99 err = posix_acl_to_xattr(&init_user_ns, acl, value, size);
100 if (unlikely(err < 0))
101 goto end_set_acl;
102 }
103
104 err = __hfsplus_setxattr(inode, xattr_name, value, size, 0);
105
106end_set_acl:
107 hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value);
108
109 if (!err)
110 set_cached_acl(inode, type, acl);
111
112 return err;
113}
114
115int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir)
116{
117 int err = 0;
118 struct posix_acl *acl = NULL;
119
120 hfs_dbg(ACL_MOD,
121 "[%s]: ino %lu, dir->ino %lu\n",
122 __func__, inode->i_ino, dir->i_ino);
123
124 if (S_ISLNK(inode->i_mode))
125 return 0;
126
127 acl = hfsplus_get_posix_acl(dir, ACL_TYPE_DEFAULT);
128 if (IS_ERR(acl))
129 return PTR_ERR(acl);
130
131 if (acl) {
132 if (S_ISDIR(inode->i_mode)) {
133 err = hfsplus_set_posix_acl(inode,
134 ACL_TYPE_DEFAULT,
135 acl);
136 if (unlikely(err))
137 goto init_acl_cleanup;
138 }
139
140 err = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
141 if (unlikely(err < 0))
142 return err;
143
144 if (err > 0)
145 err = hfsplus_set_posix_acl(inode,
146 ACL_TYPE_ACCESS,
147 acl);
148 } else
149 inode->i_mode &= ~current_umask();
150
151init_acl_cleanup:
152 posix_acl_release(acl);
153 return err;
154}
155
156int hfsplus_posix_acl_chmod(struct inode *inode)
157{
158 int err;
159 struct posix_acl *acl;
160
161 hfs_dbg(ACL_MOD, "[%s]: ino %lu\n", __func__, inode->i_ino);
162
163 if (S_ISLNK(inode->i_mode))
164 return -EOPNOTSUPP;
165
166 acl = hfsplus_get_posix_acl(inode, ACL_TYPE_ACCESS);
167 if (IS_ERR(acl) || !acl)
168 return PTR_ERR(acl);
169
170 err = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
171 if (unlikely(err))
172 return err;
173
174 err = hfsplus_set_posix_acl(inode, ACL_TYPE_ACCESS, acl);
175 posix_acl_release(acl);
176 return err;
177}
178
179static int hfsplus_xattr_get_posix_acl(struct dentry *dentry,
180 const char *name,
181 void *buffer,
182 size_t size,
183 int type)
184{
185 int err = 0;
186 struct posix_acl *acl;
187
188 hfs_dbg(ACL_MOD,
189 "[%s]: ino %lu, buffer %p, size %zu, type %#x\n",
190 __func__, dentry->d_inode->i_ino, buffer, size, type);
191
192 if (strcmp(name, "") != 0)
193 return -EINVAL;
194
195 acl = hfsplus_get_posix_acl(dentry->d_inode, type);
196 if (IS_ERR(acl))
197 return PTR_ERR(acl);
198 if (acl == NULL)
199 return -ENODATA;
200
201 err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
202 posix_acl_release(acl);
203
204 return err;
205}
206
207static int hfsplus_xattr_set_posix_acl(struct dentry *dentry,
208 const char *name,
209 const void *value,
210 size_t size,
211 int flags,
212 int type)
213{
214 int err = 0;
215 struct inode *inode = dentry->d_inode;
216 struct posix_acl *acl = NULL;
217
218 hfs_dbg(ACL_MOD,
219 "[%s]: ino %lu, value %p, size %zu, flags %#x, type %#x\n",
220 __func__, inode->i_ino, value, size, flags, type);
221
222 if (strcmp(name, "") != 0)
223 return -EINVAL;
224
225 if (!inode_owner_or_capable(inode))
226 return -EPERM;
227
228 if (value) {
229 acl = posix_acl_from_xattr(&init_user_ns, value, size);
230 if (IS_ERR(acl))
231 return PTR_ERR(acl);
232 else if (acl) {
233 err = posix_acl_valid(acl);
234 if (err)
235 goto end_xattr_set_acl;
236 }
237 }
238
239 err = hfsplus_set_posix_acl(inode, type, acl);
240
241end_xattr_set_acl:
242 posix_acl_release(acl);
243 return err;
244}
245
246static size_t hfsplus_xattr_list_posix_acl(struct dentry *dentry,
247 char *list,
248 size_t list_size,
249 const char *name,
250 size_t name_len,
251 int type)
252{
253 /*
254 * This method is not used.
255 * It is used hfsplus_listxattr() instead of generic_listxattr().
256 */
257 return -EOPNOTSUPP;
258}
259
260const struct xattr_handler hfsplus_xattr_acl_access_handler = {
261 .prefix = POSIX_ACL_XATTR_ACCESS,
262 .flags = ACL_TYPE_ACCESS,
263 .list = hfsplus_xattr_list_posix_acl,
264 .get = hfsplus_xattr_get_posix_acl,
265 .set = hfsplus_xattr_set_posix_acl,
266};
267
268const struct xattr_handler hfsplus_xattr_acl_default_handler = {
269 .prefix = POSIX_ACL_XATTR_DEFAULT,
270 .flags = ACL_TYPE_DEFAULT,
271 .list = hfsplus_xattr_list_posix_acl,
272 .get = hfsplus_xattr_get_posix_acl,
273 .set = hfsplus_xattr_set_posix_acl,
274};
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index f66346155df5..bd8471fb9a6a 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -8,11 +8,16 @@
8 8
9#include "hfsplus_fs.h" 9#include "hfsplus_fs.h"
10#include "xattr.h" 10#include "xattr.h"
11#include "acl.h"
11 12
12const struct xattr_handler *hfsplus_xattr_handlers[] = { 13const struct xattr_handler *hfsplus_xattr_handlers[] = {
13 &hfsplus_xattr_osx_handler, 14 &hfsplus_xattr_osx_handler,
14 &hfsplus_xattr_user_handler, 15 &hfsplus_xattr_user_handler,
15 &hfsplus_xattr_trusted_handler, 16 &hfsplus_xattr_trusted_handler,
17#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
18 &hfsplus_xattr_acl_access_handler,
19 &hfsplus_xattr_acl_default_handler,
20#endif
16 &hfsplus_xattr_security_handler, 21 &hfsplus_xattr_security_handler,
17 NULL 22 NULL
18}; 23};
@@ -46,11 +51,58 @@ static inline int is_known_namespace(const char *name)
46 return true; 51 return true;
47} 52}
48 53
54static int can_set_system_xattr(struct inode *inode, const char *name,
55 const void *value, size_t size)
56{
57#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
58 struct posix_acl *acl;
59 int err;
60
61 if (!inode_owner_or_capable(inode))
62 return -EPERM;
63
64 /*
65 * POSIX_ACL_XATTR_ACCESS is tied to i_mode
66 */
67 if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) {
68 acl = posix_acl_from_xattr(&init_user_ns, value, size);
69 if (IS_ERR(acl))
70 return PTR_ERR(acl);
71 if (acl) {
72 err = posix_acl_equiv_mode(acl, &inode->i_mode);
73 posix_acl_release(acl);
74 if (err < 0)
75 return err;
76 mark_inode_dirty(inode);
77 }
78 /*
79 * We're changing the ACL. Get rid of the cached one
80 */
81 forget_cached_acl(inode, ACL_TYPE_ACCESS);
82
83 return 0;
84 } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
85 acl = posix_acl_from_xattr(&init_user_ns, value, size);
86 if (IS_ERR(acl))
87 return PTR_ERR(acl);
88 posix_acl_release(acl);
89
90 /*
91 * We're changing the default ACL. Get rid of the cached one
92 */
93 forget_cached_acl(inode, ACL_TYPE_DEFAULT);
94
95 return 0;
96 }
97#endif /* CONFIG_HFSPLUS_FS_POSIX_ACL */
98 return -EOPNOTSUPP;
99}
100
49static int can_set_xattr(struct inode *inode, const char *name, 101static int can_set_xattr(struct inode *inode, const char *name,
50 const void *value, size_t value_len) 102 const void *value, size_t value_len)
51{ 103{
52 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 104 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
53 return -EOPNOTSUPP; /* TODO: implement ACL support */ 105 return can_set_system_xattr(inode, name, value, value_len);
54 106
55 if (!strncmp(name, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN)) { 107 if (!strncmp(name, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN)) {
56 /* 108 /*
@@ -253,11 +305,10 @@ static int copy_name(char *buffer, const char *xattr_name, int name_len)
253 return len; 305 return len;
254} 306}
255 307
256static ssize_t hfsplus_getxattr_finder_info(struct dentry *dentry, 308static ssize_t hfsplus_getxattr_finder_info(struct inode *inode,
257 void *value, size_t size) 309 void *value, size_t size)
258{ 310{
259 ssize_t res = 0; 311 ssize_t res = 0;
260 struct inode *inode = dentry->d_inode;
261 struct hfs_find_data fd; 312 struct hfs_find_data fd;
262 u16 entry_type; 313 u16 entry_type;
263 u16 folder_rec_len = sizeof(struct DInfo) + sizeof(struct DXInfo); 314 u16 folder_rec_len = sizeof(struct DInfo) + sizeof(struct DXInfo);
@@ -304,10 +355,9 @@ end_getxattr_finder_info:
304 return res; 355 return res;
305} 356}
306 357
307ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, 358ssize_t __hfsplus_getxattr(struct inode *inode, const char *name,
308 void *value, size_t size) 359 void *value, size_t size)
309{ 360{
310 struct inode *inode = dentry->d_inode;
311 struct hfs_find_data fd; 361 struct hfs_find_data fd;
312 hfsplus_attr_entry *entry; 362 hfsplus_attr_entry *entry;
313 __be32 xattr_record_type; 363 __be32 xattr_record_type;
@@ -333,7 +383,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
333 } 383 }
334 384
335 if (!strcmp_xattr_finder_info(name)) 385 if (!strcmp_xattr_finder_info(name))
336 return hfsplus_getxattr_finder_info(dentry, value, size); 386 return hfsplus_getxattr_finder_info(inode, value, size);
337 387
338 if (!HFSPLUS_SB(inode->i_sb)->attr_tree) 388 if (!HFSPLUS_SB(inode->i_sb)->attr_tree)
339 return -EOPNOTSUPP; 389 return -EOPNOTSUPP;
diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h
index 847b695b984d..841b5698c0fc 100644
--- a/fs/hfsplus/xattr.h
+++ b/fs/hfsplus/xattr.h
@@ -14,8 +14,8 @@
14extern const struct xattr_handler hfsplus_xattr_osx_handler; 14extern const struct xattr_handler hfsplus_xattr_osx_handler;
15extern const struct xattr_handler hfsplus_xattr_user_handler; 15extern const struct xattr_handler hfsplus_xattr_user_handler;
16extern const struct xattr_handler hfsplus_xattr_trusted_handler; 16extern const struct xattr_handler hfsplus_xattr_trusted_handler;
17/*extern const struct xattr_handler hfsplus_xattr_acl_access_handler;*/ 17extern const struct xattr_handler hfsplus_xattr_acl_access_handler;
18/*extern const struct xattr_handler hfsplus_xattr_acl_default_handler;*/ 18extern const struct xattr_handler hfsplus_xattr_acl_default_handler;
19extern const struct xattr_handler hfsplus_xattr_security_handler; 19extern const struct xattr_handler hfsplus_xattr_security_handler;
20 20
21extern const struct xattr_handler *hfsplus_xattr_handlers[]; 21extern const struct xattr_handler *hfsplus_xattr_handlers[];
@@ -29,9 +29,17 @@ static inline int hfsplus_setxattr(struct dentry *dentry, const char *name,
29 return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags); 29 return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags);
30} 30}
31 31
32ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, 32ssize_t __hfsplus_getxattr(struct inode *inode, const char *name,
33 void *value, size_t size); 33 void *value, size_t size);
34 34
35static inline ssize_t hfsplus_getxattr(struct dentry *dentry,
36 const char *name,
37 void *value,
38 size_t size)
39{
40 return __hfsplus_getxattr(dentry->d_inode, name, value, size);
41}
42
35ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); 43ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
36 44
37int hfsplus_removexattr(struct dentry *dentry, const char *name); 45int hfsplus_removexattr(struct dentry *dentry, const char *name);
@@ -39,22 +47,7 @@ int hfsplus_removexattr(struct dentry *dentry, const char *name);
39int hfsplus_init_security(struct inode *inode, struct inode *dir, 47int hfsplus_init_security(struct inode *inode, struct inode *dir,
40 const struct qstr *qstr); 48 const struct qstr *qstr);
41 49
42static inline int hfsplus_init_acl(struct inode *inode, struct inode *dir) 50int hfsplus_init_inode_security(struct inode *inode, struct inode *dir,
43{ 51 const struct qstr *qstr);
44 /*TODO: implement*/
45 return 0;
46}
47
48static inline int hfsplus_init_inode_security(struct inode *inode,
49 struct inode *dir,
50 const struct qstr *qstr)
51{
52 int err;
53
54 err = hfsplus_init_acl(inode, dir);
55 if (!err)
56 err = hfsplus_init_security(inode, dir, qstr);
57 return err;
58}
59 52
60#endif 53#endif
diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c
index 83b842f113c5..00722765ea79 100644
--- a/fs/hfsplus/xattr_security.c
+++ b/fs/hfsplus/xattr_security.c
@@ -9,6 +9,7 @@
9#include <linux/security.h> 9#include <linux/security.h>
10#include "hfsplus_fs.h" 10#include "hfsplus_fs.h"
11#include "xattr.h" 11#include "xattr.h"
12#include "acl.h"
12 13
13static int hfsplus_security_getxattr(struct dentry *dentry, const char *name, 14static int hfsplus_security_getxattr(struct dentry *dentry, const char *name,
14 void *buffer, size_t size, int type) 15 void *buffer, size_t size, int type)
@@ -96,6 +97,18 @@ int hfsplus_init_security(struct inode *inode, struct inode *dir,
96 &hfsplus_initxattrs, NULL); 97 &hfsplus_initxattrs, NULL);
97} 98}
98 99
100int hfsplus_init_inode_security(struct inode *inode,
101 struct inode *dir,
102 const struct qstr *qstr)
103{
104 int err;
105
106 err = hfsplus_init_posix_acl(inode, dir);
107 if (!err)
108 err = hfsplus_init_security(inode, dir, qstr);
109 return err;
110}
111
99const struct xattr_handler hfsplus_xattr_security_handler = { 112const struct xattr_handler hfsplus_xattr_security_handler = {
100 .prefix = XATTR_SECURITY_PREFIX, 113 .prefix = XATTR_SECURITY_PREFIX,
101 .list = hfsplus_security_listxattr, 114 .list = hfsplus_security_listxattr,
diff --git a/fs/namespace.c b/fs/namespace.c
index 25845d1b300b..da5c49483430 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -17,7 +17,7 @@
17#include <linux/security.h> 17#include <linux/security.h>
18#include <linux/idr.h> 18#include <linux/idr.h>
19#include <linux/acct.h> /* acct_auto_close_mnt */ 19#include <linux/acct.h> /* acct_auto_close_mnt */
20#include <linux/ramfs.h> /* init_rootfs */ 20#include <linux/init.h> /* init_rootfs */
21#include <linux/fs_struct.h> /* get_fs_root et.al. */ 21#include <linux/fs_struct.h> /* get_fs_root et.al. */
22#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ 22#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
23#include <linux/uaccess.h> 23#include <linux/uaccess.h>
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index f520a1113b38..28842abafab4 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -279,15 +279,15 @@ _nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
279 if (test_bit(sp4_mode, &clp->cl_sp4_flags)) { 279 if (test_bit(sp4_mode, &clp->cl_sp4_flags)) {
280 spin_lock(&clp->cl_lock); 280 spin_lock(&clp->cl_lock);
281 if (clp->cl_machine_cred != NULL) 281 if (clp->cl_machine_cred != NULL)
282 newcred = get_rpccred(clp->cl_machine_cred); 282 /* don't call get_rpccred on the machine cred -
283 * a reference will be held for life of clp */
284 newcred = clp->cl_machine_cred;
283 spin_unlock(&clp->cl_lock); 285 spin_unlock(&clp->cl_lock);
284 if (msg->rpc_cred)
285 put_rpccred(msg->rpc_cred);
286 msg->rpc_cred = newcred; 286 msg->rpc_cred = newcred;
287 287
288 flavor = clp->cl_rpcclient->cl_auth->au_flavor; 288 flavor = clp->cl_rpcclient->cl_auth->au_flavor;
289 WARN_ON(flavor != RPC_AUTH_GSS_KRB5I && 289 WARN_ON_ONCE(flavor != RPC_AUTH_GSS_KRB5I &&
290 flavor != RPC_AUTH_GSS_KRB5P); 290 flavor != RPC_AUTH_GSS_KRB5P);
291 *clntp = clp->cl_rpcclient; 291 *clntp = clp->cl_rpcclient;
292 292
293 return true; 293 return true;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 39b6cf2d1683..989bb9d3074d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6001,10 +6001,12 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
6001 .rpc_resp = &res, 6001 .rpc_resp = &res,
6002 }; 6002 };
6003 struct rpc_clnt *clnt = NFS_SERVER(dir)->client; 6003 struct rpc_clnt *clnt = NFS_SERVER(dir)->client;
6004 struct rpc_cred *cred = NULL;
6004 6005
6005 if (use_integrity) { 6006 if (use_integrity) {
6006 clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient; 6007 clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient;
6007 msg.rpc_cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client); 6008 cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client);
6009 msg.rpc_cred = cred;
6008 } 6010 }
6009 6011
6010 dprintk("NFS call secinfo %s\n", name->name); 6012 dprintk("NFS call secinfo %s\n", name->name);
@@ -6016,8 +6018,8 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
6016 &res.seq_res, 0); 6018 &res.seq_res, 0);
6017 dprintk("NFS reply secinfo: %d\n", status); 6019 dprintk("NFS reply secinfo: %d\n", status);
6018 6020
6019 if (msg.rpc_cred) 6021 if (cred)
6020 put_rpccred(msg.rpc_cred); 6022 put_rpccred(cred);
6021 6023
6022 return status; 6024 return status;
6023} 6025}
@@ -6151,11 +6153,13 @@ static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = {
6151 }, 6153 },
6152 .allow.u.words = { 6154 .allow.u.words = {
6153 [0] = 1 << (OP_CLOSE) | 6155 [0] = 1 << (OP_CLOSE) |
6154 1 << (OP_LOCKU), 6156 1 << (OP_LOCKU) |
6157 1 << (OP_COMMIT),
6155 [1] = 1 << (OP_SECINFO - 32) | 6158 [1] = 1 << (OP_SECINFO - 32) |
6156 1 << (OP_SECINFO_NO_NAME - 32) | 6159 1 << (OP_SECINFO_NO_NAME - 32) |
6157 1 << (OP_TEST_STATEID - 32) | 6160 1 << (OP_TEST_STATEID - 32) |
6158 1 << (OP_FREE_STATEID - 32) 6161 1 << (OP_FREE_STATEID - 32) |
6162 1 << (OP_WRITE - 32)
6159 } 6163 }
6160}; 6164};
6161 6165
@@ -7496,11 +7500,13 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
7496 .rpc_resp = &res, 7500 .rpc_resp = &res,
7497 }; 7501 };
7498 struct rpc_clnt *clnt = server->client; 7502 struct rpc_clnt *clnt = server->client;
7503 struct rpc_cred *cred = NULL;
7499 int status; 7504 int status;
7500 7505
7501 if (use_integrity) { 7506 if (use_integrity) {
7502 clnt = server->nfs_client->cl_rpcclient; 7507 clnt = server->nfs_client->cl_rpcclient;
7503 msg.rpc_cred = nfs4_get_clid_cred(server->nfs_client); 7508 cred = nfs4_get_clid_cred(server->nfs_client);
7509 msg.rpc_cred = cred;
7504 } 7510 }
7505 7511
7506 dprintk("--> %s\n", __func__); 7512 dprintk("--> %s\n", __func__);
@@ -7508,8 +7514,8 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
7508 &res.seq_res, 0); 7514 &res.seq_res, 0);
7509 dprintk("<-- %s status=%d\n", __func__, status); 7515 dprintk("<-- %s status=%d\n", __func__, status);
7510 7516
7511 if (msg.rpc_cred) 7517 if (cred)
7512 put_rpccred(msg.rpc_cred); 7518 put_rpccred(cred);
7513 7519
7514 return status; 7520 return status;
7515} 7521}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index fbdad9e1719f..79210d23f607 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -414,7 +414,7 @@ static int nfs4_stat_to_errno(int);
414#define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1) 414#define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1)
415#define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \ 415#define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \
416 XDR_QUADLEN(NFS4_STATEID_SIZE)) 416 XDR_QUADLEN(NFS4_STATEID_SIZE))
417#define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1) 417#define decode_free_stateid_maxsz (op_decode_hdr_maxsz)
418#else /* CONFIG_NFS_V4_1 */ 418#else /* CONFIG_NFS_V4_1 */
419#define encode_sequence_maxsz 0 419#define encode_sequence_maxsz 0
420#define decode_sequence_maxsz 0 420#define decode_sequence_maxsz 0
@@ -5966,21 +5966,8 @@ out:
5966static int decode_free_stateid(struct xdr_stream *xdr, 5966static int decode_free_stateid(struct xdr_stream *xdr,
5967 struct nfs41_free_stateid_res *res) 5967 struct nfs41_free_stateid_res *res)
5968{ 5968{
5969 __be32 *p; 5969 res->status = decode_op_hdr(xdr, OP_FREE_STATEID);
5970 int status;
5971
5972 status = decode_op_hdr(xdr, OP_FREE_STATEID);
5973 if (status)
5974 return status;
5975
5976 p = xdr_inline_decode(xdr, 4);
5977 if (unlikely(!p))
5978 goto out_overflow;
5979 res->status = be32_to_cpup(p++);
5980 return res->status; 5970 return res->status;
5981out_overflow:
5982 print_overflow_msg(__func__, xdr);
5983 return -EIO;
5984} 5971}
5985#endif /* CONFIG_NFS_V4_1 */ 5972#endif /* CONFIG_NFS_V4_1 */
5986 5973
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 105a3b080d12..e0a65a9e37e9 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -173,8 +173,6 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
173 int status; 173 int status;
174 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 174 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
175 175
176 dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
177
178 if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) 176 if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
179 return; 177 return;
180 if (!nn->rec_file) 178 if (!nn->rec_file)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 43f42290e5df..0874998a49cd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -368,11 +368,8 @@ static struct nfs4_delegation *
368alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) 368alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh)
369{ 369{
370 struct nfs4_delegation *dp; 370 struct nfs4_delegation *dp;
371 struct nfs4_file *fp = stp->st_file;
372 371
373 dprintk("NFSD alloc_init_deleg\n"); 372 dprintk("NFSD alloc_init_deleg\n");
374 if (fp->fi_had_conflict)
375 return NULL;
376 if (num_delegations > max_delegations) 373 if (num_delegations > max_delegations)
377 return NULL; 374 return NULL;
378 dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); 375 dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
@@ -389,8 +386,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
389 INIT_LIST_HEAD(&dp->dl_perfile); 386 INIT_LIST_HEAD(&dp->dl_perfile);
390 INIT_LIST_HEAD(&dp->dl_perclnt); 387 INIT_LIST_HEAD(&dp->dl_perclnt);
391 INIT_LIST_HEAD(&dp->dl_recall_lru); 388 INIT_LIST_HEAD(&dp->dl_recall_lru);
392 get_nfs4_file(fp); 389 dp->dl_file = NULL;
393 dp->dl_file = fp;
394 dp->dl_type = NFS4_OPEN_DELEGATE_READ; 390 dp->dl_type = NFS4_OPEN_DELEGATE_READ;
395 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle); 391 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
396 dp->dl_time = 0; 392 dp->dl_time = 0;
@@ -3035,7 +3031,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
3035 if (status) { 3031 if (status) {
3036 list_del_init(&dp->dl_perclnt); 3032 list_del_init(&dp->dl_perclnt);
3037 locks_free_lock(fl); 3033 locks_free_lock(fl);
3038 return -ENOMEM; 3034 return status;
3039 } 3035 }
3040 fp->fi_lease = fl; 3036 fp->fi_lease = fl;
3041 fp->fi_deleg_file = get_file(fl->fl_file); 3037 fp->fi_deleg_file = get_file(fl->fl_file);
@@ -3044,22 +3040,35 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
3044 return 0; 3040 return 0;
3045} 3041}
3046 3042
3047static int nfs4_set_delegation(struct nfs4_delegation *dp) 3043static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp)
3048{ 3044{
3049 struct nfs4_file *fp = dp->dl_file; 3045 int status;
3050 3046
3051 if (!fp->fi_lease) 3047 if (fp->fi_had_conflict)
3052 return nfs4_setlease(dp); 3048 return -EAGAIN;
3049 get_nfs4_file(fp);
3050 dp->dl_file = fp;
3051 if (!fp->fi_lease) {
3052 status = nfs4_setlease(dp);
3053 if (status)
3054 goto out_free;
3055 return 0;
3056 }
3053 spin_lock(&recall_lock); 3057 spin_lock(&recall_lock);
3054 if (fp->fi_had_conflict) { 3058 if (fp->fi_had_conflict) {
3055 spin_unlock(&recall_lock); 3059 spin_unlock(&recall_lock);
3056 return -EAGAIN; 3060 status = -EAGAIN;
3061 goto out_free;
3057 } 3062 }
3058 atomic_inc(&fp->fi_delegees); 3063 atomic_inc(&fp->fi_delegees);
3059 list_add(&dp->dl_perfile, &fp->fi_delegations); 3064 list_add(&dp->dl_perfile, &fp->fi_delegations);
3060 spin_unlock(&recall_lock); 3065 spin_unlock(&recall_lock);
3061 list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); 3066 list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
3062 return 0; 3067 return 0;
3068out_free:
3069 put_nfs4_file(fp);
3070 dp->dl_file = fp;
3071 return status;
3063} 3072}
3064 3073
3065static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) 3074static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
@@ -3134,7 +3143,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
3134 dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); 3143 dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh);
3135 if (dp == NULL) 3144 if (dp == NULL)
3136 goto out_no_deleg; 3145 goto out_no_deleg;
3137 status = nfs4_set_delegation(dp); 3146 status = nfs4_set_delegation(dp, stp->st_file);
3138 if (status) 3147 if (status)
3139 goto out_free; 3148 goto out_free;
3140 3149
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 8a404576fb26..b4f788e0ca31 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -51,10 +51,6 @@ static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size)
51 return ERR_PTR(-EINVAL); 51 return ERR_PTR(-EINVAL);
52 52
53 count = size / sizeof(struct posix_acl_entry); 53 count = size / sizeof(struct posix_acl_entry);
54 if (count < 0)
55 return ERR_PTR(-EINVAL);
56 if (count == 0)
57 return NULL;
58 54
59 acl = posix_acl_alloc(count, GFP_NOFS); 55 acl = posix_acl_alloc(count, GFP_NOFS);
60 if (!acl) 56 if (!acl)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 94417a85ce6e..f37d3c0e2053 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2044,7 +2044,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
2044 2044
2045out_write_size: 2045out_write_size:
2046 pos += copied; 2046 pos += copied;
2047 if (pos > inode->i_size) { 2047 if (pos > i_size_read(inode)) {
2048 i_size_write(inode, pos); 2048 i_size_write(inode, pos);
2049 mark_inode_dirty(inode); 2049 mark_inode_dirty(inode);
2050 } 2050 }
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 5c1c864e81cc..363f0dcc924f 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -628,11 +628,9 @@ static void o2hb_fire_callbacks(struct o2hb_callback *hbcall,
628 struct o2nm_node *node, 628 struct o2nm_node *node,
629 int idx) 629 int idx)
630{ 630{
631 struct list_head *iter;
632 struct o2hb_callback_func *f; 631 struct o2hb_callback_func *f;
633 632
634 list_for_each(iter, &hbcall->list) { 633 list_for_each_entry(f, &hbcall->list, hc_item) {
635 f = list_entry(iter, struct o2hb_callback_func, hc_item);
636 mlog(ML_HEARTBEAT, "calling funcs %p\n", f); 634 mlog(ML_HEARTBEAT, "calling funcs %p\n", f);
637 (f->hc_func)(node, idx, f->hc_data); 635 (f->hc_func)(node, idx, f->hc_data);
638 } 636 }
@@ -641,16 +639,9 @@ static void o2hb_fire_callbacks(struct o2hb_callback *hbcall,
641/* Will run the list in order until we process the passed event */ 639/* Will run the list in order until we process the passed event */
642static void o2hb_run_event_list(struct o2hb_node_event *queued_event) 640static void o2hb_run_event_list(struct o2hb_node_event *queued_event)
643{ 641{
644 int empty;
645 struct o2hb_callback *hbcall; 642 struct o2hb_callback *hbcall;
646 struct o2hb_node_event *event; 643 struct o2hb_node_event *event;
647 644
648 spin_lock(&o2hb_live_lock);
649 empty = list_empty(&queued_event->hn_item);
650 spin_unlock(&o2hb_live_lock);
651 if (empty)
652 return;
653
654 /* Holding callback sem assures we don't alter the callback 645 /* Holding callback sem assures we don't alter the callback
655 * lists when doing this, and serializes ourselves with other 646 * lists when doing this, and serializes ourselves with other
656 * processes wanting callbacks. */ 647 * processes wanting callbacks. */
@@ -709,6 +700,7 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot)
709 struct o2hb_node_event event = 700 struct o2hb_node_event event =
710 { .hn_item = LIST_HEAD_INIT(event.hn_item), }; 701 { .hn_item = LIST_HEAD_INIT(event.hn_item), };
711 struct o2nm_node *node; 702 struct o2nm_node *node;
703 int queued = 0;
712 704
713 node = o2nm_get_node_by_num(slot->ds_node_num); 705 node = o2nm_get_node_by_num(slot->ds_node_num);
714 if (!node) 706 if (!node)
@@ -726,11 +718,13 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot)
726 718
727 o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node, 719 o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node,
728 slot->ds_node_num); 720 slot->ds_node_num);
721 queued = 1;
729 } 722 }
730 } 723 }
731 spin_unlock(&o2hb_live_lock); 724 spin_unlock(&o2hb_live_lock);
732 725
733 o2hb_run_event_list(&event); 726 if (queued)
727 o2hb_run_event_list(&event);
734 728
735 o2nm_node_put(node); 729 o2nm_node_put(node);
736} 730}
@@ -790,6 +784,7 @@ static int o2hb_check_slot(struct o2hb_region *reg,
790 unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; 784 unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS;
791 unsigned int slot_dead_ms; 785 unsigned int slot_dead_ms;
792 int tmp; 786 int tmp;
787 int queued = 0;
793 788
794 memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); 789 memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes);
795 790
@@ -883,6 +878,7 @@ fire_callbacks:
883 slot->ds_node_num); 878 slot->ds_node_num);
884 879
885 changed = 1; 880 changed = 1;
881 queued = 1;
886 } 882 }
887 883
888 list_add_tail(&slot->ds_live_item, 884 list_add_tail(&slot->ds_live_item,
@@ -934,6 +930,7 @@ fire_callbacks:
934 node, slot->ds_node_num); 930 node, slot->ds_node_num);
935 931
936 changed = 1; 932 changed = 1;
933 queued = 1;
937 } 934 }
938 935
939 /* We don't clear this because the node is still 936 /* We don't clear this because the node is still
@@ -949,7 +946,8 @@ fire_callbacks:
949out: 946out:
950 spin_unlock(&o2hb_live_lock); 947 spin_unlock(&o2hb_live_lock);
951 948
952 o2hb_run_event_list(&event); 949 if (queued)
950 o2hb_run_event_list(&event);
953 951
954 if (node) 952 if (node)
955 o2nm_node_put(node); 953 o2nm_node_put(node);
@@ -2516,8 +2514,7 @@ unlock:
2516int o2hb_register_callback(const char *region_uuid, 2514int o2hb_register_callback(const char *region_uuid,
2517 struct o2hb_callback_func *hc) 2515 struct o2hb_callback_func *hc)
2518{ 2516{
2519 struct o2hb_callback_func *tmp; 2517 struct o2hb_callback_func *f;
2520 struct list_head *iter;
2521 struct o2hb_callback *hbcall; 2518 struct o2hb_callback *hbcall;
2522 int ret; 2519 int ret;
2523 2520
@@ -2540,10 +2537,9 @@ int o2hb_register_callback(const char *region_uuid,
2540 2537
2541 down_write(&o2hb_callback_sem); 2538 down_write(&o2hb_callback_sem);
2542 2539
2543 list_for_each(iter, &hbcall->list) { 2540 list_for_each_entry(f, &hbcall->list, hc_item) {
2544 tmp = list_entry(iter, struct o2hb_callback_func, hc_item); 2541 if (hc->hc_priority < f->hc_priority) {
2545 if (hc->hc_priority < tmp->hc_priority) { 2542 list_add_tail(&hc->hc_item, &f->hc_item);
2546 list_add_tail(&hc->hc_item, iter);
2547 break; 2543 break;
2548 } 2544 }
2549 } 2545 }
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index d644dc611425..2cd2406b4140 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -543,8 +543,9 @@ static void o2net_set_nn_state(struct o2net_node *nn,
543 } 543 }
544 544
545 if (was_valid && !valid) { 545 if (was_valid && !valid) {
546 printk(KERN_NOTICE "o2net: No longer connected to " 546 if (old_sc)
547 SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); 547 printk(KERN_NOTICE "o2net: No longer connected to "
548 SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc));
548 o2net_complete_nodes_nsw(nn); 549 o2net_complete_nodes_nsw(nn);
549 } 550 }
550 551
@@ -765,32 +766,32 @@ static struct o2net_msg_handler *
765o2net_handler_tree_lookup(u32 msg_type, u32 key, struct rb_node ***ret_p, 766o2net_handler_tree_lookup(u32 msg_type, u32 key, struct rb_node ***ret_p,
766 struct rb_node **ret_parent) 767 struct rb_node **ret_parent)
767{ 768{
768 struct rb_node **p = &o2net_handler_tree.rb_node; 769 struct rb_node **p = &o2net_handler_tree.rb_node;
769 struct rb_node *parent = NULL; 770 struct rb_node *parent = NULL;
770 struct o2net_msg_handler *nmh, *ret = NULL; 771 struct o2net_msg_handler *nmh, *ret = NULL;
771 int cmp; 772 int cmp;
772 773
773 while (*p) { 774 while (*p) {
774 parent = *p; 775 parent = *p;
775 nmh = rb_entry(parent, struct o2net_msg_handler, nh_node); 776 nmh = rb_entry(parent, struct o2net_msg_handler, nh_node);
776 cmp = o2net_handler_cmp(nmh, msg_type, key); 777 cmp = o2net_handler_cmp(nmh, msg_type, key);
777 778
778 if (cmp < 0) 779 if (cmp < 0)
779 p = &(*p)->rb_left; 780 p = &(*p)->rb_left;
780 else if (cmp > 0) 781 else if (cmp > 0)
781 p = &(*p)->rb_right; 782 p = &(*p)->rb_right;
782 else { 783 else {
783 ret = nmh; 784 ret = nmh;
784 break; 785 break;
785 } 786 }
786 } 787 }
787 788
788 if (ret_p != NULL) 789 if (ret_p != NULL)
789 *ret_p = p; 790 *ret_p = p;
790 if (ret_parent != NULL) 791 if (ret_parent != NULL)
791 *ret_parent = parent; 792 *ret_parent = parent;
792 793
793 return ret; 794 return ret;
794} 795}
795 796
796static void o2net_handler_kref_release(struct kref *kref) 797static void o2net_handler_kref_release(struct kref *kref)
@@ -1695,13 +1696,12 @@ static void o2net_start_connect(struct work_struct *work)
1695 ret = 0; 1696 ret = 0;
1696 1697
1697out: 1698out:
1698 if (ret) { 1699 if (ret && sc) {
1699 printk(KERN_NOTICE "o2net: Connect attempt to " SC_NODEF_FMT 1700 printk(KERN_NOTICE "o2net: Connect attempt to " SC_NODEF_FMT
1700 " failed with errno %d\n", SC_NODEF_ARGS(sc), ret); 1701 " failed with errno %d\n", SC_NODEF_ARGS(sc), ret);
1701 /* 0 err so that another will be queued and attempted 1702 /* 0 err so that another will be queued and attempted
1702 * from set_nn_state */ 1703 * from set_nn_state */
1703 if (sc) 1704 o2net_ensure_shutdown(nn, sc, 0);
1704 o2net_ensure_shutdown(nn, sc, 0);
1705 } 1705 }
1706 if (sc) 1706 if (sc)
1707 sc_put(sc); 1707 sc_put(sc);
@@ -1873,12 +1873,16 @@ static int o2net_accept_one(struct socket *sock)
1873 1873
1874 if (o2nm_this_node() >= node->nd_num) { 1874 if (o2nm_this_node() >= node->nd_num) {
1875 local_node = o2nm_get_node_by_num(o2nm_this_node()); 1875 local_node = o2nm_get_node_by_num(o2nm_this_node());
1876 printk(KERN_NOTICE "o2net: Unexpected connect attempt seen " 1876 if (local_node)
1877 "at node '%s' (%u, %pI4:%d) from node '%s' (%u, " 1877 printk(KERN_NOTICE "o2net: Unexpected connect attempt "
1878 "%pI4:%d)\n", local_node->nd_name, local_node->nd_num, 1878 "seen at node '%s' (%u, %pI4:%d) from "
1879 &(local_node->nd_ipv4_address), 1879 "node '%s' (%u, %pI4:%d)\n",
1880 ntohs(local_node->nd_ipv4_port), node->nd_name, 1880 local_node->nd_name, local_node->nd_num,
1881 node->nd_num, &sin.sin_addr.s_addr, ntohs(sin.sin_port)); 1881 &(local_node->nd_ipv4_address),
1882 ntohs(local_node->nd_ipv4_port),
1883 node->nd_name,
1884 node->nd_num, &sin.sin_addr.s_addr,
1885 ntohs(sin.sin_port));
1882 ret = -EINVAL; 1886 ret = -EINVAL;
1883 goto out; 1887 goto out;
1884 } 1888 }
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index fbec0be62326..b46278f9ae44 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -292,7 +292,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
292 struct dlm_lock *lock = NULL; 292 struct dlm_lock *lock = NULL;
293 struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf; 293 struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf;
294 char *name; 294 char *name;
295 struct list_head *iter, *head=NULL; 295 struct list_head *head = NULL;
296 __be64 cookie; 296 __be64 cookie;
297 u32 flags; 297 u32 flags;
298 u8 node; 298 u8 node;
@@ -373,8 +373,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
373 /* try convert queue for both ast/bast */ 373 /* try convert queue for both ast/bast */
374 head = &res->converting; 374 head = &res->converting;
375 lock = NULL; 375 lock = NULL;
376 list_for_each(iter, head) { 376 list_for_each_entry(lock, head, list) {
377 lock = list_entry (iter, struct dlm_lock, list);
378 if (lock->ml.cookie == cookie) 377 if (lock->ml.cookie == cookie)
379 goto do_ast; 378 goto do_ast;
380 } 379 }
@@ -385,8 +384,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
385 else 384 else
386 head = &res->granted; 385 head = &res->granted;
387 386
388 list_for_each(iter, head) { 387 list_for_each_entry(lock, head, list) {
389 lock = list_entry (iter, struct dlm_lock, list);
390 if (lock->ml.cookie == cookie) 388 if (lock->ml.cookie == cookie)
391 goto do_ast; 389 goto do_ast;
392 } 390 }
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index de854cca12a2..e0517762fcc0 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -1079,11 +1079,9 @@ static inline int dlm_lock_compatible(int existing, int request)
1079static inline int dlm_lock_on_list(struct list_head *head, 1079static inline int dlm_lock_on_list(struct list_head *head,
1080 struct dlm_lock *lock) 1080 struct dlm_lock *lock)
1081{ 1081{
1082 struct list_head *iter;
1083 struct dlm_lock *tmplock; 1082 struct dlm_lock *tmplock;
1084 1083
1085 list_for_each(iter, head) { 1084 list_for_each_entry(tmplock, head, list) {
1086 tmplock = list_entry(iter, struct dlm_lock, list);
1087 if (tmplock == lock) 1085 if (tmplock == lock)
1088 return 1; 1086 return 1;
1089 } 1087 }
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 29a886d1e82c..e36d63ff1783 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -123,7 +123,6 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm,
123 int *kick_thread) 123 int *kick_thread)
124{ 124{
125 enum dlm_status status = DLM_NORMAL; 125 enum dlm_status status = DLM_NORMAL;
126 struct list_head *iter;
127 struct dlm_lock *tmplock=NULL; 126 struct dlm_lock *tmplock=NULL;
128 127
129 assert_spin_locked(&res->spinlock); 128 assert_spin_locked(&res->spinlock);
@@ -185,16 +184,14 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm,
185 184
186 /* upconvert from here on */ 185 /* upconvert from here on */
187 status = DLM_NORMAL; 186 status = DLM_NORMAL;
188 list_for_each(iter, &res->granted) { 187 list_for_each_entry(tmplock, &res->granted, list) {
189 tmplock = list_entry(iter, struct dlm_lock, list);
190 if (tmplock == lock) 188 if (tmplock == lock)
191 continue; 189 continue;
192 if (!dlm_lock_compatible(tmplock->ml.type, type)) 190 if (!dlm_lock_compatible(tmplock->ml.type, type))
193 goto switch_queues; 191 goto switch_queues;
194 } 192 }
195 193
196 list_for_each(iter, &res->converting) { 194 list_for_each_entry(tmplock, &res->converting, list) {
197 tmplock = list_entry(iter, struct dlm_lock, list);
198 if (!dlm_lock_compatible(tmplock->ml.type, type)) 195 if (!dlm_lock_compatible(tmplock->ml.type, type))
199 goto switch_queues; 196 goto switch_queues;
200 /* existing conversion requests take precedence */ 197 /* existing conversion requests take precedence */
@@ -424,8 +421,8 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data,
424 struct dlm_ctxt *dlm = data; 421 struct dlm_ctxt *dlm = data;
425 struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf; 422 struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf;
426 struct dlm_lock_resource *res = NULL; 423 struct dlm_lock_resource *res = NULL;
427 struct list_head *iter;
428 struct dlm_lock *lock = NULL; 424 struct dlm_lock *lock = NULL;
425 struct dlm_lock *tmp_lock;
429 struct dlm_lockstatus *lksb; 426 struct dlm_lockstatus *lksb;
430 enum dlm_status status = DLM_NORMAL; 427 enum dlm_status status = DLM_NORMAL;
431 u32 flags; 428 u32 flags;
@@ -471,14 +468,13 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data,
471 dlm_error(status); 468 dlm_error(status);
472 goto leave; 469 goto leave;
473 } 470 }
474 list_for_each(iter, &res->granted) { 471 list_for_each_entry(tmp_lock, &res->granted, list) {
475 lock = list_entry(iter, struct dlm_lock, list); 472 if (tmp_lock->ml.cookie == cnv->cookie &&
476 if (lock->ml.cookie == cnv->cookie && 473 tmp_lock->ml.node == cnv->node_idx) {
477 lock->ml.node == cnv->node_idx) { 474 lock = tmp_lock;
478 dlm_lock_get(lock); 475 dlm_lock_get(lock);
479 break; 476 break;
480 } 477 }
481 lock = NULL;
482 } 478 }
483 spin_unlock(&res->spinlock); 479 spin_unlock(&res->spinlock);
484 if (!lock) { 480 if (!lock) {
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 0e28e242226d..e33cd7a3c582 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -96,7 +96,6 @@ static void __dlm_print_lock(struct dlm_lock *lock)
96 96
97void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) 97void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
98{ 98{
99 struct list_head *iter2;
100 struct dlm_lock *lock; 99 struct dlm_lock *lock;
101 char buf[DLM_LOCKID_NAME_MAX]; 100 char buf[DLM_LOCKID_NAME_MAX];
102 101
@@ -118,18 +117,15 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
118 res->inflight_locks, atomic_read(&res->asts_reserved)); 117 res->inflight_locks, atomic_read(&res->asts_reserved));
119 dlm_print_lockres_refmap(res); 118 dlm_print_lockres_refmap(res);
120 printk(" granted queue:\n"); 119 printk(" granted queue:\n");
121 list_for_each(iter2, &res->granted) { 120 list_for_each_entry(lock, &res->granted, list) {
122 lock = list_entry(iter2, struct dlm_lock, list);
123 __dlm_print_lock(lock); 121 __dlm_print_lock(lock);
124 } 122 }
125 printk(" converting queue:\n"); 123 printk(" converting queue:\n");
126 list_for_each(iter2, &res->converting) { 124 list_for_each_entry(lock, &res->converting, list) {
127 lock = list_entry(iter2, struct dlm_lock, list);
128 __dlm_print_lock(lock); 125 __dlm_print_lock(lock);
129 } 126 }
130 printk(" blocked queue:\n"); 127 printk(" blocked queue:\n");
131 list_for_each(iter2, &res->blocked) { 128 list_for_each_entry(lock, &res->blocked, list) {
132 lock = list_entry(iter2, struct dlm_lock, list);
133 __dlm_print_lock(lock); 129 __dlm_print_lock(lock);
134 } 130 }
135} 131}
@@ -446,7 +442,6 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len)
446{ 442{
447 struct dlm_master_list_entry *mle; 443 struct dlm_master_list_entry *mle;
448 struct hlist_head *bucket; 444 struct hlist_head *bucket;
449 struct hlist_node *list;
450 int i, out = 0; 445 int i, out = 0;
451 unsigned long total = 0, longest = 0, bucket_count = 0; 446 unsigned long total = 0, longest = 0, bucket_count = 0;
452 447
@@ -456,9 +451,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len)
456 spin_lock(&dlm->master_lock); 451 spin_lock(&dlm->master_lock);
457 for (i = 0; i < DLM_HASH_BUCKETS; i++) { 452 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
458 bucket = dlm_master_hash(dlm, i); 453 bucket = dlm_master_hash(dlm, i);
459 hlist_for_each(list, bucket) { 454 hlist_for_each_entry(mle, bucket, master_hash_node) {
460 mle = hlist_entry(list, struct dlm_master_list_entry,
461 master_hash_node);
462 ++total; 455 ++total;
463 ++bucket_count; 456 ++bucket_count;
464 if (len - out < 200) 457 if (len - out < 200)
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index dbb17c07656a..8b3382abf840 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -193,7 +193,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm,
193 unsigned int hash) 193 unsigned int hash)
194{ 194{
195 struct hlist_head *bucket; 195 struct hlist_head *bucket;
196 struct hlist_node *list; 196 struct dlm_lock_resource *res;
197 197
198 mlog(0, "%.*s\n", len, name); 198 mlog(0, "%.*s\n", len, name);
199 199
@@ -201,9 +201,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm,
201 201
202 bucket = dlm_lockres_hash(dlm, hash); 202 bucket = dlm_lockres_hash(dlm, hash);
203 203
204 hlist_for_each(list, bucket) { 204 hlist_for_each_entry(res, bucket, hash_node) {
205 struct dlm_lock_resource *res = hlist_entry(list,
206 struct dlm_lock_resource, hash_node);
207 if (res->lockname.name[0] != name[0]) 205 if (res->lockname.name[0] != name[0])
208 continue; 206 continue;
209 if (unlikely(res->lockname.len != len)) 207 if (unlikely(res->lockname.len != len))
@@ -262,22 +260,19 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
262 260
263static struct dlm_ctxt * __dlm_lookup_domain_full(const char *domain, int len) 261static struct dlm_ctxt * __dlm_lookup_domain_full(const char *domain, int len)
264{ 262{
265 struct dlm_ctxt *tmp = NULL; 263 struct dlm_ctxt *tmp;
266 struct list_head *iter;
267 264
268 assert_spin_locked(&dlm_domain_lock); 265 assert_spin_locked(&dlm_domain_lock);
269 266
270 /* tmp->name here is always NULL terminated, 267 /* tmp->name here is always NULL terminated,
271 * but domain may not be! */ 268 * but domain may not be! */
272 list_for_each(iter, &dlm_domains) { 269 list_for_each_entry(tmp, &dlm_domains, list) {
273 tmp = list_entry (iter, struct dlm_ctxt, list);
274 if (strlen(tmp->name) == len && 270 if (strlen(tmp->name) == len &&
275 memcmp(tmp->name, domain, len)==0) 271 memcmp(tmp->name, domain, len)==0)
276 break; 272 return tmp;
277 tmp = NULL;
278 } 273 }
279 274
280 return tmp; 275 return NULL;
281} 276}
282 277
283/* For null terminated domain strings ONLY */ 278/* For null terminated domain strings ONLY */
@@ -366,25 +361,22 @@ static void __dlm_get(struct dlm_ctxt *dlm)
366 * you shouldn't trust your pointer. */ 361 * you shouldn't trust your pointer. */
367struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm) 362struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm)
368{ 363{
369 struct list_head *iter; 364 struct dlm_ctxt *target;
370 struct dlm_ctxt *target = NULL; 365 struct dlm_ctxt *ret = NULL;
371 366
372 spin_lock(&dlm_domain_lock); 367 spin_lock(&dlm_domain_lock);
373 368
374 list_for_each(iter, &dlm_domains) { 369 list_for_each_entry(target, &dlm_domains, list) {
375 target = list_entry (iter, struct dlm_ctxt, list);
376
377 if (target == dlm) { 370 if (target == dlm) {
378 __dlm_get(target); 371 __dlm_get(target);
372 ret = target;
379 break; 373 break;
380 } 374 }
381
382 target = NULL;
383 } 375 }
384 376
385 spin_unlock(&dlm_domain_lock); 377 spin_unlock(&dlm_domain_lock);
386 378
387 return target; 379 return ret;
388} 380}
389 381
390int dlm_domain_fully_joined(struct dlm_ctxt *dlm) 382int dlm_domain_fully_joined(struct dlm_ctxt *dlm)
@@ -2296,13 +2288,10 @@ static DECLARE_RWSEM(dlm_callback_sem);
2296void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, 2288void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm,
2297 int node_num) 2289 int node_num)
2298{ 2290{
2299 struct list_head *iter;
2300 struct dlm_eviction_cb *cb; 2291 struct dlm_eviction_cb *cb;
2301 2292
2302 down_read(&dlm_callback_sem); 2293 down_read(&dlm_callback_sem);
2303 list_for_each(iter, &dlm->dlm_eviction_callbacks) { 2294 list_for_each_entry(cb, &dlm->dlm_eviction_callbacks, ec_item) {
2304 cb = list_entry(iter, struct dlm_eviction_cb, ec_item);
2305
2306 cb->ec_func(node_num, cb->ec_data); 2295 cb->ec_func(node_num, cb->ec_data);
2307 } 2296 }
2308 up_read(&dlm_callback_sem); 2297 up_read(&dlm_callback_sem);
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 47e67c2d228f..5d32f7511f74 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -91,19 +91,14 @@ void dlm_destroy_lock_cache(void)
91static int dlm_can_grant_new_lock(struct dlm_lock_resource *res, 91static int dlm_can_grant_new_lock(struct dlm_lock_resource *res,
92 struct dlm_lock *lock) 92 struct dlm_lock *lock)
93{ 93{
94 struct list_head *iter;
95 struct dlm_lock *tmplock; 94 struct dlm_lock *tmplock;
96 95
97 list_for_each(iter, &res->granted) { 96 list_for_each_entry(tmplock, &res->granted, list) {
98 tmplock = list_entry(iter, struct dlm_lock, list);
99
100 if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) 97 if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
101 return 0; 98 return 0;
102 } 99 }
103 100
104 list_for_each(iter, &res->converting) { 101 list_for_each_entry(tmplock, &res->converting, list) {
105 tmplock = list_entry(iter, struct dlm_lock, list);
106
107 if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) 102 if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
108 return 0; 103 return 0;
109 if (!dlm_lock_compatible(tmplock->ml.convert_type, 104 if (!dlm_lock_compatible(tmplock->ml.convert_type,
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 33ecbe0e6734..cf0f103963b1 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -342,16 +342,13 @@ static int dlm_find_mle(struct dlm_ctxt *dlm,
342{ 342{
343 struct dlm_master_list_entry *tmpmle; 343 struct dlm_master_list_entry *tmpmle;
344 struct hlist_head *bucket; 344 struct hlist_head *bucket;
345 struct hlist_node *list;
346 unsigned int hash; 345 unsigned int hash;
347 346
348 assert_spin_locked(&dlm->master_lock); 347 assert_spin_locked(&dlm->master_lock);
349 348
350 hash = dlm_lockid_hash(name, namelen); 349 hash = dlm_lockid_hash(name, namelen);
351 bucket = dlm_master_hash(dlm, hash); 350 bucket = dlm_master_hash(dlm, hash);
352 hlist_for_each(list, bucket) { 351 hlist_for_each_entry(tmpmle, bucket, master_hash_node) {
353 tmpmle = hlist_entry(list, struct dlm_master_list_entry,
354 master_hash_node);
355 if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) 352 if (!dlm_mle_equal(dlm, tmpmle, name, namelen))
356 continue; 353 continue;
357 dlm_get_mle(tmpmle); 354 dlm_get_mle(tmpmle);
@@ -3183,7 +3180,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
3183 struct dlm_master_list_entry *mle; 3180 struct dlm_master_list_entry *mle;
3184 struct dlm_lock_resource *res; 3181 struct dlm_lock_resource *res;
3185 struct hlist_head *bucket; 3182 struct hlist_head *bucket;
3186 struct hlist_node *list; 3183 struct hlist_node *tmp;
3187 unsigned int i; 3184 unsigned int i;
3188 3185
3189 mlog(0, "dlm=%s, dead node=%u\n", dlm->name, dead_node); 3186 mlog(0, "dlm=%s, dead node=%u\n", dlm->name, dead_node);
@@ -3194,10 +3191,7 @@ top:
3194 spin_lock(&dlm->master_lock); 3191 spin_lock(&dlm->master_lock);
3195 for (i = 0; i < DLM_HASH_BUCKETS; i++) { 3192 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
3196 bucket = dlm_master_hash(dlm, i); 3193 bucket = dlm_master_hash(dlm, i);
3197 hlist_for_each(list, bucket) { 3194 hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) {
3198 mle = hlist_entry(list, struct dlm_master_list_entry,
3199 master_hash_node);
3200
3201 BUG_ON(mle->type != DLM_MLE_BLOCK && 3195 BUG_ON(mle->type != DLM_MLE_BLOCK &&
3202 mle->type != DLM_MLE_MASTER && 3196 mle->type != DLM_MLE_MASTER &&
3203 mle->type != DLM_MLE_MIGRATION); 3197 mle->type != DLM_MLE_MIGRATION);
@@ -3378,7 +3372,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm)
3378 int i; 3372 int i;
3379 struct hlist_head *bucket; 3373 struct hlist_head *bucket;
3380 struct dlm_master_list_entry *mle; 3374 struct dlm_master_list_entry *mle;
3381 struct hlist_node *tmp, *list; 3375 struct hlist_node *tmp;
3382 3376
3383 /* 3377 /*
3384 * We notified all other nodes that we are exiting the domain and 3378 * We notified all other nodes that we are exiting the domain and
@@ -3394,9 +3388,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm)
3394 3388
3395 for (i = 0; i < DLM_HASH_BUCKETS; i++) { 3389 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
3396 bucket = dlm_master_hash(dlm, i); 3390 bucket = dlm_master_hash(dlm, i);
3397 hlist_for_each_safe(list, tmp, bucket) { 3391 hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) {
3398 mle = hlist_entry(list, struct dlm_master_list_entry,
3399 master_hash_node);
3400 if (mle->type != DLM_MLE_BLOCK) { 3392 if (mle->type != DLM_MLE_BLOCK) {
3401 mlog(ML_ERROR, "bad mle: %p\n", mle); 3393 mlog(ML_ERROR, "bad mle: %p\n", mle);
3402 dlm_print_one_mle(mle); 3394 dlm_print_one_mle(mle);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 773bd32bfd8c..0b5adca1b178 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -787,6 +787,7 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
787{ 787{
788 struct dlm_lock_request lr; 788 struct dlm_lock_request lr;
789 int ret; 789 int ret;
790 int status;
790 791
791 mlog(0, "\n"); 792 mlog(0, "\n");
792 793
@@ -800,13 +801,15 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
800 801
801 // send message 802 // send message
802 ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, 803 ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key,
803 &lr, sizeof(lr), request_from, NULL); 804 &lr, sizeof(lr), request_from, &status);
804 805
805 /* negative status is handled by caller */ 806 /* negative status is handled by caller */
806 if (ret < 0) 807 if (ret < 0)
807 mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u " 808 mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u "
808 "to recover dead node %u\n", dlm->name, ret, 809 "to recover dead node %u\n", dlm->name, ret,
809 request_from, dead_node); 810 request_from, dead_node);
811 else
812 ret = status;
810 // return from here, then 813 // return from here, then
811 // sleep until all received or error 814 // sleep until all received or error
812 return ret; 815 return ret;
@@ -2328,6 +2331,14 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
2328 } else if (res->owner == dlm->node_num) { 2331 } else if (res->owner == dlm->node_num) {
2329 dlm_free_dead_locks(dlm, res, dead_node); 2332 dlm_free_dead_locks(dlm, res, dead_node);
2330 __dlm_lockres_calc_usage(dlm, res); 2333 __dlm_lockres_calc_usage(dlm, res);
2334 } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
2335 if (test_bit(dead_node, res->refmap)) {
2336 mlog(0, "%s:%.*s: dead node %u had a ref, but had "
2337 "no locks and had not purged before dying\n",
2338 dlm->name, res->lockname.len,
2339 res->lockname.name, dead_node);
2340 dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
2341 }
2331 } 2342 }
2332 spin_unlock(&res->spinlock); 2343 spin_unlock(&res->spinlock);
2333 } 2344 }
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index e73c833fc2a1..9db869de829d 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -286,8 +286,6 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
286 struct dlm_lock_resource *res) 286 struct dlm_lock_resource *res)
287{ 287{
288 struct dlm_lock *lock, *target; 288 struct dlm_lock *lock, *target;
289 struct list_head *iter;
290 struct list_head *head;
291 int can_grant = 1; 289 int can_grant = 1;
292 290
293 /* 291 /*
@@ -314,9 +312,7 @@ converting:
314 dlm->name, res->lockname.len, res->lockname.name); 312 dlm->name, res->lockname.len, res->lockname.name);
315 BUG(); 313 BUG();
316 } 314 }
317 head = &res->granted; 315 list_for_each_entry(lock, &res->granted, list) {
318 list_for_each(iter, head) {
319 lock = list_entry(iter, struct dlm_lock, list);
320 if (lock==target) 316 if (lock==target)
321 continue; 317 continue;
322 if (!dlm_lock_compatible(lock->ml.type, 318 if (!dlm_lock_compatible(lock->ml.type,
@@ -333,9 +329,8 @@ converting:
333 target->ml.convert_type; 329 target->ml.convert_type;
334 } 330 }
335 } 331 }
336 head = &res->converting; 332
337 list_for_each(iter, head) { 333 list_for_each_entry(lock, &res->converting, list) {
338 lock = list_entry(iter, struct dlm_lock, list);
339 if (lock==target) 334 if (lock==target)
340 continue; 335 continue;
341 if (!dlm_lock_compatible(lock->ml.type, 336 if (!dlm_lock_compatible(lock->ml.type,
@@ -384,9 +379,7 @@ blocked:
384 goto leave; 379 goto leave;
385 target = list_entry(res->blocked.next, struct dlm_lock, list); 380 target = list_entry(res->blocked.next, struct dlm_lock, list);
386 381
387 head = &res->granted; 382 list_for_each_entry(lock, &res->granted, list) {
388 list_for_each(iter, head) {
389 lock = list_entry(iter, struct dlm_lock, list);
390 if (lock==target) 383 if (lock==target)
391 continue; 384 continue;
392 if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { 385 if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) {
@@ -400,9 +393,7 @@ blocked:
400 } 393 }
401 } 394 }
402 395
403 head = &res->converting; 396 list_for_each_entry(lock, &res->converting, list) {
404 list_for_each(iter, head) {
405 lock = list_entry(iter, struct dlm_lock, list);
406 if (lock==target) 397 if (lock==target)
407 continue; 398 continue;
408 if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { 399 if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) {
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 850aa7e87537..5698b52cf5c9 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -388,7 +388,6 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
388 struct dlm_ctxt *dlm = data; 388 struct dlm_ctxt *dlm = data;
389 struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf; 389 struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf;
390 struct dlm_lock_resource *res = NULL; 390 struct dlm_lock_resource *res = NULL;
391 struct list_head *iter;
392 struct dlm_lock *lock = NULL; 391 struct dlm_lock *lock = NULL;
393 enum dlm_status status = DLM_NORMAL; 392 enum dlm_status status = DLM_NORMAL;
394 int found = 0, i; 393 int found = 0, i;
@@ -458,8 +457,7 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
458 } 457 }
459 458
460 for (i=0; i<3; i++) { 459 for (i=0; i<3; i++) {
461 list_for_each(iter, queue) { 460 list_for_each_entry(lock, queue, list) {
462 lock = list_entry(iter, struct dlm_lock, list);
463 if (lock->ml.cookie == unlock->cookie && 461 if (lock->ml.cookie == unlock->cookie &&
464 lock->ml.node == unlock->node_idx) { 462 lock->ml.node == unlock->node_idx) {
465 dlm_lock_get(lock); 463 dlm_lock_get(lock);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 12bafb7265ce..efa2b3d339e3 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -401,11 +401,8 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
401{ 401{
402 struct inode *inode = new_inode(sb); 402 struct inode *inode = new_inode(sb);
403 umode_t mode = S_IFDIR | 0755; 403 umode_t mode = S_IFDIR | 0755;
404 struct dlmfs_inode_private *ip;
405 404
406 if (inode) { 405 if (inode) {
407 ip = DLMFS_I(inode);
408
409 inode->i_ino = get_next_ino(); 406 inode->i_ino = get_next_ino();
410 inode_init_owner(inode, NULL, mode); 407 inode_init_owner(inode, NULL, mode);
411 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; 408 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 2487116d0d33..767370b656ca 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -781,7 +781,6 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
781 cpos = map_start >> osb->s_clustersize_bits; 781 cpos = map_start >> osb->s_clustersize_bits;
782 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, 782 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
783 map_start + map_len); 783 map_start + map_len);
784 mapping_end -= cpos;
785 is_last = 0; 784 is_last = 0;
786 while (cpos < mapping_end && !is_last) { 785 while (cpos < mapping_end && !is_last) {
787 u32 fe_flags; 786 u32 fe_flags;
@@ -852,20 +851,20 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
852 851
853 down_read(&OCFS2_I(inode)->ip_alloc_sem); 852 down_read(&OCFS2_I(inode)->ip_alloc_sem);
854 853
855 if (*offset >= inode->i_size) { 854 if (*offset >= i_size_read(inode)) {
856 ret = -ENXIO; 855 ret = -ENXIO;
857 goto out_unlock; 856 goto out_unlock;
858 } 857 }
859 858
860 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 859 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
861 if (whence == SEEK_HOLE) 860 if (whence == SEEK_HOLE)
862 *offset = inode->i_size; 861 *offset = i_size_read(inode);
863 goto out_unlock; 862 goto out_unlock;
864 } 863 }
865 864
866 clen = 0; 865 clen = 0;
867 cpos = *offset >> cs_bits; 866 cpos = *offset >> cs_bits;
868 cend = ocfs2_clusters_for_bytes(inode->i_sb, inode->i_size); 867 cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
869 868
870 while (cpos < cend && !is_last) { 869 while (cpos < cend && !is_last) {
871 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, 870 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
@@ -904,8 +903,8 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
904 extlen = clen; 903 extlen = clen;
905 extlen <<= cs_bits; 904 extlen <<= cs_bits;
906 905
907 if ((extoff + extlen) > inode->i_size) 906 if ((extoff + extlen) > i_size_read(inode))
908 extlen = inode->i_size - extoff; 907 extlen = i_size_read(inode) - extoff;
909 extoff += extlen; 908 extoff += extlen;
910 if (extoff > *offset) 909 if (extoff > *offset)
911 *offset = extoff; 910 *offset = extoff;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 3261d71319ee..4f8197caa487 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -671,11 +671,7 @@ restarted_transaction:
671 } else { 671 } else {
672 BUG_ON(why != RESTART_TRANS); 672 BUG_ON(why != RESTART_TRANS);
673 673
674 /* TODO: This can be more intelligent. */ 674 status = ocfs2_allocate_extend_trans(handle, 1);
675 credits = ocfs2_calc_extend_credits(osb->sb,
676 &fe->id2.i_list,
677 clusters_to_add);
678 status = ocfs2_extend_trans(handle, credits);
679 if (status < 0) { 675 if (status < 0) {
680 /* handle still has to be committed at 676 /* handle still has to be committed at
681 * this point. */ 677 * this point. */
@@ -1800,6 +1796,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1800 ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); 1796 ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
1801 1797
1802out: 1798out:
1799 ocfs2_free_path(path);
1803 ocfs2_schedule_truncate_log_flush(osb, 1); 1800 ocfs2_schedule_truncate_log_flush(osb, 1);
1804 ocfs2_run_deallocs(osb, &dealloc); 1801 ocfs2_run_deallocs(osb, &dealloc);
1805 1802
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 0c60ef2d8056..fa32ce9b455d 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -303,7 +303,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode,
303 if (o2info_from_user(oij, req)) 303 if (o2info_from_user(oij, req))
304 goto bail; 304 goto bail;
305 305
306 oij.ij_journal_size = osb->journal->j_inode->i_size; 306 oij.ij_journal_size = i_size_read(osb->journal->j_inode);
307 307
308 o2info_set_request_filled(&oij.ij_req); 308 o2info_set_request_filled(&oij.ij_req);
309 309
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 242170d83971..44fc3e530c3d 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -455,6 +455,41 @@ bail:
455 return status; 455 return status;
456} 456}
457 457
458/*
459 * If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA.
460 * If that fails, restart the transaction & regain write access for the
461 * buffer head which is used for metadata modifications.
462 * Taken from Ext4: extend_or_restart_transaction()
463 */
464int ocfs2_allocate_extend_trans(handle_t *handle, int thresh)
465{
466 int status, old_nblks;
467
468 BUG_ON(!handle);
469
470 old_nblks = handle->h_buffer_credits;
471 trace_ocfs2_allocate_extend_trans(old_nblks, thresh);
472
473 if (old_nblks < thresh)
474 return 0;
475
476 status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA);
477 if (status < 0) {
478 mlog_errno(status);
479 goto bail;
480 }
481
482 if (status > 0) {
483 status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA);
484 if (status < 0)
485 mlog_errno(status);
486 }
487
488bail:
489 return status;
490}
491
492
458struct ocfs2_triggers { 493struct ocfs2_triggers {
459 struct jbd2_buffer_trigger_type ot_triggers; 494 struct jbd2_buffer_trigger_type ot_triggers;
460 int ot_offset; 495 int ot_offset;
@@ -801,14 +836,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
801 inode_lock = 1; 836 inode_lock = 1;
802 di = (struct ocfs2_dinode *)bh->b_data; 837 di = (struct ocfs2_dinode *)bh->b_data;
803 838
804 if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { 839 if (i_size_read(inode) < OCFS2_MIN_JOURNAL_SIZE) {
805 mlog(ML_ERROR, "Journal file size (%lld) is too small!\n", 840 mlog(ML_ERROR, "Journal file size (%lld) is too small!\n",
806 inode->i_size); 841 i_size_read(inode));
807 status = -EINVAL; 842 status = -EINVAL;
808 goto done; 843 goto done;
809 } 844 }
810 845
811 trace_ocfs2_journal_init(inode->i_size, 846 trace_ocfs2_journal_init(i_size_read(inode),
812 (unsigned long long)inode->i_blocks, 847 (unsigned long long)inode->i_blocks,
813 OCFS2_I(inode)->ip_clusters); 848 OCFS2_I(inode)->ip_clusters);
814 849
@@ -1096,7 +1131,7 @@ static int ocfs2_force_read_journal(struct inode *inode)
1096 1131
1097 memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); 1132 memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
1098 1133
1099 num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size); 1134 num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
1100 v_blkno = 0; 1135 v_blkno = 0;
1101 while (v_blkno < num_blocks) { 1136 while (v_blkno < num_blocks) {
1102 status = ocfs2_extent_map_get_blocks(inode, v_blkno, 1137 status = ocfs2_extent_map_get_blocks(inode, v_blkno,
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 0a992737dcaf..0b479bab3671 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -258,6 +258,17 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb,
258int ocfs2_commit_trans(struct ocfs2_super *osb, 258int ocfs2_commit_trans(struct ocfs2_super *osb,
259 handle_t *handle); 259 handle_t *handle);
260int ocfs2_extend_trans(handle_t *handle, int nblocks); 260int ocfs2_extend_trans(handle_t *handle, int nblocks);
261int ocfs2_allocate_extend_trans(handle_t *handle,
262 int thresh);
263
264/*
265 * Define an arbitrary limit for the amount of data we will anticipate
266 * writing to any given transaction. For unbounded transactions such as
267 * fallocate(2) we can write more than this, but we always
268 * start off at the maximum transaction size and grow the transaction
269 * optimistically as we go.
270 */
271#define OCFS2_MAX_TRANS_DATA 64U
261 272
262/* 273/*
263 * Create access is for when we get a newly created buffer and we're 274 * Create access is for when we get a newly created buffer and we're
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index aebeacd807c3..cd5496b7a0a3 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -1082,7 +1082,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
1082 } 1082 }
1083 1083
1084retry_enospc: 1084retry_enospc:
1085 (*ac)->ac_bits_wanted = osb->local_alloc_default_bits; 1085 (*ac)->ac_bits_wanted = osb->local_alloc_bits;
1086 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 1086 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
1087 if (status == -ENOSPC) { 1087 if (status == -ENOSPC) {
1088 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == 1088 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
@@ -1154,7 +1154,7 @@ retry_enospc:
1154 OCFS2_LA_DISABLED) 1154 OCFS2_LA_DISABLED)
1155 goto bail; 1155 goto bail;
1156 1156
1157 ac->ac_bits_wanted = osb->local_alloc_default_bits; 1157 ac->ac_bits_wanted = osb->local_alloc_bits;
1158 status = ocfs2_claim_clusters(handle, ac, 1158 status = ocfs2_claim_clusters(handle, ac,
1159 osb->local_alloc_bits, 1159 osb->local_alloc_bits,
1160 &cluster_off, 1160 &cluster_off,
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 452068b45749..3d3f3c83065c 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -152,6 +152,7 @@ static int __ocfs2_move_extent(handle_t *handle,
152 } 152 }
153 153
154out: 154out:
155 ocfs2_free_path(path);
155 return ret; 156 return ret;
156} 157}
157 158
@@ -845,7 +846,7 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh,
845 struct ocfs2_move_extents *range = context->range; 846 struct ocfs2_move_extents *range = context->range;
846 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 847 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
847 848
848 if ((inode->i_size == 0) || (range->me_len == 0)) 849 if ((i_size_read(inode) == 0) || (range->me_len == 0))
849 return 0; 850 return 0;
850 851
851 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 852 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index 3b481f490633..1b60c62aa9d6 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -2579,6 +2579,8 @@ DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans);
2579 2579
2580DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart); 2580DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart);
2581 2581
2582DEFINE_OCFS2_INT_INT_EVENT(ocfs2_allocate_extend_trans);
2583
2582DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_journal_access); 2584DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_journal_access);
2583 2585
2584DEFINE_OCFS2_ULL_EVENT(ocfs2_journal_dirty); 2586DEFINE_OCFS2_ULL_EVENT(ocfs2_journal_dirty);
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 332a281f217e..aaa50611ec66 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
234 len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset; 234 len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset;
235 } 235 }
236 236
237 if (gqinode->i_size < off + len) { 237 if (i_size_read(gqinode) < off + len) {
238 loff_t rounded_end = 238 loff_t rounded_end =
239 ocfs2_align_bytes_to_blocks(sb, off + len); 239 ocfs2_align_bytes_to_blocks(sb, off + len);
240 240
@@ -778,8 +778,8 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
778 */ 778 */
779 WARN_ON(journal_current_handle()); 779 WARN_ON(journal_current_handle());
780 status = ocfs2_extend_no_holes(gqinode, NULL, 780 status = ocfs2_extend_no_holes(gqinode, NULL,
781 gqinode->i_size + (need_alloc << sb->s_blocksize_bits), 781 i_size_read(gqinode) + (need_alloc << sb->s_blocksize_bits),
782 gqinode->i_size); 782 i_size_read(gqinode));
783 if (status < 0) 783 if (status < 0)
784 goto out_dq; 784 goto out_dq;
785 } 785 }
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 27fe7ee4874c..2e4344be3b96 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -982,14 +982,14 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
982 982
983 /* We are protected by dqio_sem so no locking needed */ 983 /* We are protected by dqio_sem so no locking needed */
984 status = ocfs2_extend_no_holes(lqinode, NULL, 984 status = ocfs2_extend_no_holes(lqinode, NULL,
985 lqinode->i_size + 2 * sb->s_blocksize, 985 i_size_read(lqinode) + 2 * sb->s_blocksize,
986 lqinode->i_size); 986 i_size_read(lqinode));
987 if (status < 0) { 987 if (status < 0) {
988 mlog_errno(status); 988 mlog_errno(status);
989 goto out; 989 goto out;
990 } 990 }
991 status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, 991 status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh,
992 lqinode->i_size + 2 * sb->s_blocksize); 992 i_size_read(lqinode) + 2 * sb->s_blocksize);
993 if (status < 0) { 993 if (status < 0) {
994 mlog_errno(status); 994 mlog_errno(status);
995 goto out; 995 goto out;
@@ -1125,14 +1125,14 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1125 1125
1126 /* We are protected by dqio_sem so no locking needed */ 1126 /* We are protected by dqio_sem so no locking needed */
1127 status = ocfs2_extend_no_holes(lqinode, NULL, 1127 status = ocfs2_extend_no_holes(lqinode, NULL,
1128 lqinode->i_size + sb->s_blocksize, 1128 i_size_read(lqinode) + sb->s_blocksize,
1129 lqinode->i_size); 1129 i_size_read(lqinode));
1130 if (status < 0) { 1130 if (status < 0) {
1131 mlog_errno(status); 1131 mlog_errno(status);
1132 goto out; 1132 goto out;
1133 } 1133 }
1134 status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, 1134 status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh,
1135 lqinode->i_size + sb->s_blocksize); 1135 i_size_read(lqinode) + sb->s_blocksize);
1136 if (status < 0) { 1136 if (status < 0) {
1137 mlog_errno(status); 1137 mlog_errno(status);
1138 goto out; 1138 goto out;
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index a70d604593b6..bf4dfc14bb2c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3854,7 +3854,10 @@ static int ocfs2_attach_refcount_tree(struct inode *inode,
3854 while (cpos < clusters) { 3854 while (cpos < clusters) {
3855 ret = ocfs2_get_clusters(inode, cpos, &p_cluster, 3855 ret = ocfs2_get_clusters(inode, cpos, &p_cluster,
3856 &num_clusters, &ext_flags); 3856 &num_clusters, &ext_flags);
3857 3857 if (ret) {
3858 mlog_errno(ret);
3859 goto unlock;
3860 }
3858 if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) { 3861 if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) {
3859 ret = ocfs2_add_refcount_flag(inode, &di_et, 3862 ret = ocfs2_add_refcount_flag(inode, &di_et,
3860 &ref_tree->rf_ci, 3863 &ref_tree->rf_ci,
@@ -4025,7 +4028,10 @@ static int ocfs2_duplicate_extent_list(struct inode *s_inode,
4025 while (cpos < clusters) { 4028 while (cpos < clusters) {
4026 ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster, 4029 ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster,
4027 &num_clusters, &ext_flags); 4030 &num_clusters, &ext_flags);
4028 4031 if (ret) {
4032 mlog_errno(ret);
4033 goto out;
4034 }
4029 if (p_cluster) { 4035 if (p_cluster) {
4030 ret = ocfs2_add_refcounted_extent(t_inode, &et, 4036 ret = ocfs2_add_refcounted_extent(t_inode, &et,
4031 ref_ci, ref_root_bh, 4037 ref_ci, ref_root_bh,
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 317ef0abccbb..6ce0686eab72 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3505,7 +3505,7 @@ int ocfs2_xattr_set(struct inode *inode,
3505 int ret, credits, ref_meta = 0, ref_credits = 0; 3505 int ret, credits, ref_meta = 0, ref_credits = 0;
3506 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3506 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3507 struct inode *tl_inode = osb->osb_tl_inode; 3507 struct inode *tl_inode = osb->osb_tl_inode;
3508 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 3508 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
3509 struct ocfs2_refcount_tree *ref_tree = NULL; 3509 struct ocfs2_refcount_tree *ref_tree = NULL;
3510 3510
3511 struct ocfs2_xattr_info xi = { 3511 struct ocfs2_xattr_info xi = {
@@ -3609,13 +3609,14 @@ int ocfs2_xattr_set(struct inode *inode,
3609 if (IS_ERR(ctxt.handle)) { 3609 if (IS_ERR(ctxt.handle)) {
3610 ret = PTR_ERR(ctxt.handle); 3610 ret = PTR_ERR(ctxt.handle);
3611 mlog_errno(ret); 3611 mlog_errno(ret);
3612 goto cleanup; 3612 goto out_free_ac;
3613 } 3613 }
3614 3614
3615 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3615 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3616 3616
3617 ocfs2_commit_trans(osb, ctxt.handle); 3617 ocfs2_commit_trans(osb, ctxt.handle);
3618 3618
3619out_free_ac:
3619 if (ctxt.data_ac) 3620 if (ctxt.data_ac)
3620 ocfs2_free_alloc_context(ctxt.data_ac); 3621 ocfs2_free_alloc_context(ctxt.data_ac);
3621 if (ctxt.meta_ac) 3622 if (ctxt.meta_ac)
@@ -5881,6 +5882,10 @@ static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5881 while (cpos < clusters) { 5882 while (cpos < clusters) {
5882 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 5883 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5883 &num_clusters, el, &ext_flags); 5884 &num_clusters, el, &ext_flags);
5885 if (ret) {
5886 mlog_errno(ret);
5887 break;
5888 }
5884 5889
5885 cpos += num_clusters; 5890 cpos += num_clusters;
5886 if ((ext_flags & OCFS2_EXT_REFCOUNTED)) 5891 if ((ext_flags & OCFS2_EXT_REFCOUNTED))
@@ -6797,7 +6802,7 @@ out:
6797 if (ret) { 6802 if (ret) {
6798 if (*meta_ac) { 6803 if (*meta_ac) {
6799 ocfs2_free_alloc_context(*meta_ac); 6804 ocfs2_free_alloc_context(*meta_ac);
6800 meta_ac = NULL; 6805 *meta_ac = NULL;
6801 } 6806 }
6802 } 6807 }
6803 6808
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 0ff80f9b930f..985ea881b5bc 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -286,7 +286,7 @@ int proc_fd_permission(struct inode *inode, int mask)
286 int rv = generic_permission(inode, mask); 286 int rv = generic_permission(inode, mask);
287 if (rv == 0) 287 if (rv == 0)
288 return 0; 288 return 0;
289 if (task_pid(current) == proc_pid(inode)) 289 if (task_tgid(current) == proc_pid(inode))
290 rv = 0; 290 rv = 0;
291 return rv; 291 return rv;
292} 292}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 107d026f5d6e..7366e9d63cee 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -740,6 +740,9 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
740 ptent = pte_file_clear_soft_dirty(ptent); 740 ptent = pte_file_clear_soft_dirty(ptent);
741 } 741 }
742 742
743 if (vma->vm_flags & VM_SOFTDIRTY)
744 vma->vm_flags &= ~VM_SOFTDIRTY;
745
743 set_pte_at(vma->vm_mm, addr, pte, ptent); 746 set_pte_at(vma->vm_mm, addr, pte, ptent);
744#endif 747#endif
745} 748}
@@ -949,13 +952,15 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
949 if (is_migration_entry(entry)) 952 if (is_migration_entry(entry))
950 page = migration_entry_to_page(entry); 953 page = migration_entry_to_page(entry);
951 } else { 954 } else {
952 *pme = make_pme(PM_NOT_PRESENT(pm->v2)); 955 if (vma->vm_flags & VM_SOFTDIRTY)
956 flags2 |= __PM_SOFT_DIRTY;
957 *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
953 return; 958 return;
954 } 959 }
955 960
956 if (page && !PageAnon(page)) 961 if (page && !PageAnon(page))
957 flags |= PM_FILE; 962 flags |= PM_FILE;
958 if (pte_soft_dirty(pte)) 963 if ((vma->vm_flags & VM_SOFTDIRTY) || pte_soft_dirty(pte))
959 flags2 |= __PM_SOFT_DIRTY; 964 flags2 |= __PM_SOFT_DIRTY;
960 965
961 *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); 966 *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
@@ -974,7 +979,7 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *p
974 *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) 979 *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
975 | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); 980 | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT);
976 else 981 else
977 *pme = make_pme(PM_NOT_PRESENT(pm->v2)); 982 *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, pmd_flags2));
978} 983}
979#else 984#else
980static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, 985static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
@@ -997,7 +1002,11 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
997 if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { 1002 if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
998 int pmd_flags2; 1003 int pmd_flags2;
999 1004
1000 pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0); 1005 if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
1006 pmd_flags2 = __PM_SOFT_DIRTY;
1007 else
1008 pmd_flags2 = 0;
1009
1001 for (; addr != end; addr += PAGE_SIZE) { 1010 for (; addr != end; addr += PAGE_SIZE) {
1002 unsigned long offset; 1011 unsigned long offset;
1003 1012
@@ -1015,12 +1024,17 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
1015 if (pmd_trans_unstable(pmd)) 1024 if (pmd_trans_unstable(pmd))
1016 return 0; 1025 return 0;
1017 for (; addr != end; addr += PAGE_SIZE) { 1026 for (; addr != end; addr += PAGE_SIZE) {
1027 int flags2;
1018 1028
1019 /* check to see if we've left 'vma' behind 1029 /* check to see if we've left 'vma' behind
1020 * and need a new, higher one */ 1030 * and need a new, higher one */
1021 if (vma && (addr >= vma->vm_end)) { 1031 if (vma && (addr >= vma->vm_end)) {
1022 vma = find_vma(walk->mm, addr); 1032 vma = find_vma(walk->mm, addr);
1023 pme = make_pme(PM_NOT_PRESENT(pm->v2)); 1033 if (vma && (vma->vm_flags & VM_SOFTDIRTY))
1034 flags2 = __PM_SOFT_DIRTY;
1035 else
1036 flags2 = 0;
1037 pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
1024 } 1038 }
1025 1039
1026 /* check that 'vma' actually covers this address, 1040 /* check that 'vma' actually covers this address,
@@ -1044,13 +1058,15 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
1044 1058
1045#ifdef CONFIG_HUGETLB_PAGE 1059#ifdef CONFIG_HUGETLB_PAGE
1046static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, 1060static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
1047 pte_t pte, int offset) 1061 pte_t pte, int offset, int flags2)
1048{ 1062{
1049 if (pte_present(pte)) 1063 if (pte_present(pte))
1050 *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) 1064 *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) |
1051 | PM_STATUS2(pm->v2, 0) | PM_PRESENT); 1065 PM_STATUS2(pm->v2, flags2) |
1066 PM_PRESENT);
1052 else 1067 else
1053 *pme = make_pme(PM_NOT_PRESENT(pm->v2)); 1068 *pme = make_pme(PM_NOT_PRESENT(pm->v2) |
1069 PM_STATUS2(pm->v2, flags2));
1054} 1070}
1055 1071
1056/* This function walks within one hugetlb entry in the single call */ 1072/* This function walks within one hugetlb entry in the single call */
@@ -1059,12 +1075,22 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
1059 struct mm_walk *walk) 1075 struct mm_walk *walk)
1060{ 1076{
1061 struct pagemapread *pm = walk->private; 1077 struct pagemapread *pm = walk->private;
1078 struct vm_area_struct *vma;
1062 int err = 0; 1079 int err = 0;
1080 int flags2;
1063 pagemap_entry_t pme; 1081 pagemap_entry_t pme;
1064 1082
1083 vma = find_vma(walk->mm, addr);
1084 WARN_ON_ONCE(!vma);
1085
1086 if (vma && (vma->vm_flags & VM_SOFTDIRTY))
1087 flags2 = __PM_SOFT_DIRTY;
1088 else
1089 flags2 = 0;
1090
1065 for (; addr != end; addr += PAGE_SIZE) { 1091 for (; addr != end; addr += PAGE_SIZE) {
1066 int offset = (addr & ~hmask) >> PAGE_SHIFT; 1092 int offset = (addr & ~hmask) >> PAGE_SHIFT;
1067 huge_pte_to_pagemap_entry(&pme, pm, *pte, offset); 1093 huge_pte_to_pagemap_entry(&pme, pm, *pte, offset, flags2);
1068 err = add_to_pagemap(addr, &pme, pm); 1094 err = add_to_pagemap(addr, &pme, pm);
1069 if (err) 1095 if (err)
1070 return err; 1096 return err;
@@ -1376,8 +1402,10 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1376 walk.mm = mm; 1402 walk.mm = mm;
1377 1403
1378 pol = get_vma_policy(task, vma, vma->vm_start); 1404 pol = get_vma_policy(task, vma, vma->vm_start);
1379 mpol_to_str(buffer, sizeof(buffer), pol); 1405 n = mpol_to_str(buffer, sizeof(buffer), pol);
1380 mpol_cond_put(pol); 1406 mpol_cond_put(pol);
1407 if (n < 0)
1408 return n;
1381 1409
1382 seq_printf(m, "%08lx %s", vma->vm_start, buffer); 1410 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1383 1411
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index a1a16eb97c7b..9100d6959886 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -21,6 +21,7 @@
21#include <linux/crash_dump.h> 21#include <linux/crash_dump.h>
22#include <linux/list.h> 22#include <linux/list.h>
23#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
24#include <linux/pagemap.h>
24#include <asm/uaccess.h> 25#include <asm/uaccess.h>
25#include <asm/io.h> 26#include <asm/io.h>
26#include "internal.h" 27#include "internal.h"
@@ -123,11 +124,65 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
123 return read; 124 return read;
124} 125}
125 126
127/*
128 * Architectures may override this function to allocate ELF header in 2nd kernel
129 */
130int __weak elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
131{
132 return 0;
133}
134
135/*
136 * Architectures may override this function to free header
137 */
138void __weak elfcorehdr_free(unsigned long long addr)
139{}
140
141/*
142 * Architectures may override this function to read from ELF header
143 */
144ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
145{
146 return read_from_oldmem(buf, count, ppos, 0);
147}
148
149/*
150 * Architectures may override this function to read from notes sections
151 */
152ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
153{
154 return read_from_oldmem(buf, count, ppos, 0);
155}
156
157/*
158 * Architectures may override this function to map oldmem
159 */
160int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
161 unsigned long from, unsigned long pfn,
162 unsigned long size, pgprot_t prot)
163{
164 return remap_pfn_range(vma, from, pfn, size, prot);
165}
166
167/*
168 * Copy to either kernel or user space
169 */
170static int copy_to(void *target, void *src, size_t size, int userbuf)
171{
172 if (userbuf) {
173 if (copy_to_user((char __user *) target, src, size))
174 return -EFAULT;
175 } else {
176 memcpy(target, src, size);
177 }
178 return 0;
179}
180
126/* Read from the ELF header and then the crash dump. On error, negative value is 181/* Read from the ELF header and then the crash dump. On error, negative value is
127 * returned otherwise number of bytes read are returned. 182 * returned otherwise number of bytes read are returned.
128 */ 183 */
129static ssize_t read_vmcore(struct file *file, char __user *buffer, 184static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
130 size_t buflen, loff_t *fpos) 185 int userbuf)
131{ 186{
132 ssize_t acc = 0, tmp; 187 ssize_t acc = 0, tmp;
133 size_t tsz; 188 size_t tsz;
@@ -144,7 +199,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
144 /* Read ELF core header */ 199 /* Read ELF core header */
145 if (*fpos < elfcorebuf_sz) { 200 if (*fpos < elfcorebuf_sz) {
146 tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen); 201 tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen);
147 if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) 202 if (copy_to(buffer, elfcorebuf + *fpos, tsz, userbuf))
148 return -EFAULT; 203 return -EFAULT;
149 buflen -= tsz; 204 buflen -= tsz;
150 *fpos += tsz; 205 *fpos += tsz;
@@ -162,7 +217,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
162 217
163 tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen); 218 tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen);
164 kaddr = elfnotes_buf + *fpos - elfcorebuf_sz; 219 kaddr = elfnotes_buf + *fpos - elfcorebuf_sz;
165 if (copy_to_user(buffer, kaddr, tsz)) 220 if (copy_to(buffer, kaddr, tsz, userbuf))
166 return -EFAULT; 221 return -EFAULT;
167 buflen -= tsz; 222 buflen -= tsz;
168 *fpos += tsz; 223 *fpos += tsz;
@@ -178,7 +233,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
178 if (*fpos < m->offset + m->size) { 233 if (*fpos < m->offset + m->size) {
179 tsz = min_t(size_t, m->offset + m->size - *fpos, buflen); 234 tsz = min_t(size_t, m->offset + m->size - *fpos, buflen);
180 start = m->paddr + *fpos - m->offset; 235 start = m->paddr + *fpos - m->offset;
181 tmp = read_from_oldmem(buffer, tsz, &start, 1); 236 tmp = read_from_oldmem(buffer, tsz, &start, userbuf);
182 if (tmp < 0) 237 if (tmp < 0)
183 return tmp; 238 return tmp;
184 buflen -= tsz; 239 buflen -= tsz;
@@ -195,6 +250,55 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
195 return acc; 250 return acc;
196} 251}
197 252
253static ssize_t read_vmcore(struct file *file, char __user *buffer,
254 size_t buflen, loff_t *fpos)
255{
256 return __read_vmcore((__force char *) buffer, buflen, fpos, 1);
257}
258
259/*
260 * The vmcore fault handler uses the page cache and fills data using the
261 * standard __vmcore_read() function.
262 *
263 * On s390 the fault handler is used for memory regions that can't be mapped
264 * directly with remap_pfn_range().
265 */
266static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
267{
268#ifdef CONFIG_S390
269 struct address_space *mapping = vma->vm_file->f_mapping;
270 pgoff_t index = vmf->pgoff;
271 struct page *page;
272 loff_t offset;
273 char *buf;
274 int rc;
275
276 page = find_or_create_page(mapping, index, GFP_KERNEL);
277 if (!page)
278 return VM_FAULT_OOM;
279 if (!PageUptodate(page)) {
280 offset = (loff_t) index << PAGE_CACHE_SHIFT;
281 buf = __va((page_to_pfn(page) << PAGE_SHIFT));
282 rc = __read_vmcore(buf, PAGE_SIZE, &offset, 0);
283 if (rc < 0) {
284 unlock_page(page);
285 page_cache_release(page);
286 return (rc == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
287 }
288 SetPageUptodate(page);
289 }
290 unlock_page(page);
291 vmf->page = page;
292 return 0;
293#else
294 return VM_FAULT_SIGBUS;
295#endif
296}
297
298static const struct vm_operations_struct vmcore_mmap_ops = {
299 .fault = mmap_vmcore_fault,
300};
301
198/** 302/**
199 * alloc_elfnotes_buf - allocate buffer for ELF note segment in 303 * alloc_elfnotes_buf - allocate buffer for ELF note segment in
200 * vmalloc memory 304 * vmalloc memory
@@ -223,7 +327,7 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz)
223 * regions in the 1st kernel pointed to by PT_LOAD entries) into 327 * regions in the 1st kernel pointed to by PT_LOAD entries) into
224 * virtually contiguous user-space in ELF layout. 328 * virtually contiguous user-space in ELF layout.
225 */ 329 */
226#if defined(CONFIG_MMU) && !defined(CONFIG_S390) 330#ifdef CONFIG_MMU
227static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) 331static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
228{ 332{
229 size_t size = vma->vm_end - vma->vm_start; 333 size_t size = vma->vm_end - vma->vm_start;
@@ -241,6 +345,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
241 345
242 vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); 346 vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
243 vma->vm_flags |= VM_MIXEDMAP; 347 vma->vm_flags |= VM_MIXEDMAP;
348 vma->vm_ops = &vmcore_mmap_ops;
244 349
245 len = 0; 350 len = 0;
246 351
@@ -282,9 +387,9 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
282 387
283 tsz = min_t(size_t, m->offset + m->size - start, size); 388 tsz = min_t(size_t, m->offset + m->size - start, size);
284 paddr = m->paddr + start - m->offset; 389 paddr = m->paddr + start - m->offset;
285 if (remap_pfn_range(vma, vma->vm_start + len, 390 if (remap_oldmem_pfn_range(vma, vma->vm_start + len,
286 paddr >> PAGE_SHIFT, tsz, 391 paddr >> PAGE_SHIFT, tsz,
287 vma->vm_page_prot)) 392 vma->vm_page_prot))
288 goto fail; 393 goto fail;
289 size -= tsz; 394 size -= tsz;
290 start += tsz; 395 start += tsz;
@@ -357,7 +462,7 @@ static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr)
357 notes_section = kmalloc(max_sz, GFP_KERNEL); 462 notes_section = kmalloc(max_sz, GFP_KERNEL);
358 if (!notes_section) 463 if (!notes_section)
359 return -ENOMEM; 464 return -ENOMEM;
360 rc = read_from_oldmem(notes_section, max_sz, &offset, 0); 465 rc = elfcorehdr_read_notes(notes_section, max_sz, &offset);
361 if (rc < 0) { 466 if (rc < 0) {
362 kfree(notes_section); 467 kfree(notes_section);
363 return rc; 468 return rc;
@@ -444,7 +549,8 @@ static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf)
444 if (phdr_ptr->p_type != PT_NOTE) 549 if (phdr_ptr->p_type != PT_NOTE)
445 continue; 550 continue;
446 offset = phdr_ptr->p_offset; 551 offset = phdr_ptr->p_offset;
447 rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); 552 rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz,
553 &offset);
448 if (rc < 0) 554 if (rc < 0)
449 return rc; 555 return rc;
450 notes_buf += phdr_ptr->p_memsz; 556 notes_buf += phdr_ptr->p_memsz;
@@ -536,7 +642,7 @@ static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr)
536 notes_section = kmalloc(max_sz, GFP_KERNEL); 642 notes_section = kmalloc(max_sz, GFP_KERNEL);
537 if (!notes_section) 643 if (!notes_section)
538 return -ENOMEM; 644 return -ENOMEM;
539 rc = read_from_oldmem(notes_section, max_sz, &offset, 0); 645 rc = elfcorehdr_read_notes(notes_section, max_sz, &offset);
540 if (rc < 0) { 646 if (rc < 0) {
541 kfree(notes_section); 647 kfree(notes_section);
542 return rc; 648 return rc;
@@ -623,7 +729,8 @@ static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf)
623 if (phdr_ptr->p_type != PT_NOTE) 729 if (phdr_ptr->p_type != PT_NOTE)
624 continue; 730 continue;
625 offset = phdr_ptr->p_offset; 731 offset = phdr_ptr->p_offset;
626 rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); 732 rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz,
733 &offset);
627 if (rc < 0) 734 if (rc < 0)
628 return rc; 735 return rc;
629 notes_buf += phdr_ptr->p_memsz; 736 notes_buf += phdr_ptr->p_memsz;
@@ -810,7 +917,7 @@ static int __init parse_crash_elf64_headers(void)
810 addr = elfcorehdr_addr; 917 addr = elfcorehdr_addr;
811 918
812 /* Read Elf header */ 919 /* Read Elf header */
813 rc = read_from_oldmem((char*)&ehdr, sizeof(Elf64_Ehdr), &addr, 0); 920 rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf64_Ehdr), &addr);
814 if (rc < 0) 921 if (rc < 0)
815 return rc; 922 return rc;
816 923
@@ -837,7 +944,7 @@ static int __init parse_crash_elf64_headers(void)
837 if (!elfcorebuf) 944 if (!elfcorebuf)
838 return -ENOMEM; 945 return -ENOMEM;
839 addr = elfcorehdr_addr; 946 addr = elfcorehdr_addr;
840 rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); 947 rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr);
841 if (rc < 0) 948 if (rc < 0)
842 goto fail; 949 goto fail;
843 950
@@ -866,7 +973,7 @@ static int __init parse_crash_elf32_headers(void)
866 addr = elfcorehdr_addr; 973 addr = elfcorehdr_addr;
867 974
868 /* Read Elf header */ 975 /* Read Elf header */
869 rc = read_from_oldmem((char*)&ehdr, sizeof(Elf32_Ehdr), &addr, 0); 976 rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf32_Ehdr), &addr);
870 if (rc < 0) 977 if (rc < 0)
871 return rc; 978 return rc;
872 979
@@ -892,7 +999,7 @@ static int __init parse_crash_elf32_headers(void)
892 if (!elfcorebuf) 999 if (!elfcorebuf)
893 return -ENOMEM; 1000 return -ENOMEM;
894 addr = elfcorehdr_addr; 1001 addr = elfcorehdr_addr;
895 rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); 1002 rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr);
896 if (rc < 0) 1003 if (rc < 0)
897 goto fail; 1004 goto fail;
898 1005
@@ -919,7 +1026,7 @@ static int __init parse_crash_elf_headers(void)
919 int rc=0; 1026 int rc=0;
920 1027
921 addr = elfcorehdr_addr; 1028 addr = elfcorehdr_addr;
922 rc = read_from_oldmem(e_ident, EI_NIDENT, &addr, 0); 1029 rc = elfcorehdr_read(e_ident, EI_NIDENT, &addr);
923 if (rc < 0) 1030 if (rc < 0)
924 return rc; 1031 return rc;
925 if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) { 1032 if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
@@ -952,7 +1059,14 @@ static int __init vmcore_init(void)
952{ 1059{
953 int rc = 0; 1060 int rc = 0;
954 1061
955 /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/ 1062 /* Allow architectures to allocate ELF header in 2nd kernel */
1063 rc = elfcorehdr_alloc(&elfcorehdr_addr, &elfcorehdr_size);
1064 if (rc)
1065 return rc;
1066 /*
1067 * If elfcorehdr= has been passed in cmdline or created in 2nd kernel,
1068 * then capture the dump.
1069 */
956 if (!(is_vmcore_usable())) 1070 if (!(is_vmcore_usable()))
957 return rc; 1071 return rc;
958 rc = parse_crash_elf_headers(); 1072 rc = parse_crash_elf_headers();
@@ -960,6 +1074,8 @@ static int __init vmcore_init(void)
960 pr_warn("Kdump: vmcore not initialized\n"); 1074 pr_warn("Kdump: vmcore not initialized\n");
961 return rc; 1075 return rc;
962 } 1076 }
1077 elfcorehdr_free(elfcorehdr_addr);
1078 elfcorehdr_addr = ELFCORE_ADDR_ERR;
963 1079
964 proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); 1080 proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations);
965 if (proc_vmcore) 1081 if (proc_vmcore)
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index c24f1e10b946..39d14659a8d3 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -244,12 +244,6 @@ struct dentry *ramfs_mount(struct file_system_type *fs_type,
244 return mount_nodev(fs_type, flags, data, ramfs_fill_super); 244 return mount_nodev(fs_type, flags, data, ramfs_fill_super);
245} 245}
246 246
247static struct dentry *rootfs_mount(struct file_system_type *fs_type,
248 int flags, const char *dev_name, void *data)
249{
250 return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super);
251}
252
253static void ramfs_kill_sb(struct super_block *sb) 247static void ramfs_kill_sb(struct super_block *sb)
254{ 248{
255 kfree(sb->s_fs_info); 249 kfree(sb->s_fs_info);
@@ -262,29 +256,23 @@ static struct file_system_type ramfs_fs_type = {
262 .kill_sb = ramfs_kill_sb, 256 .kill_sb = ramfs_kill_sb,
263 .fs_flags = FS_USERNS_MOUNT, 257 .fs_flags = FS_USERNS_MOUNT,
264}; 258};
265static struct file_system_type rootfs_fs_type = {
266 .name = "rootfs",
267 .mount = rootfs_mount,
268 .kill_sb = kill_litter_super,
269};
270 259
271static int __init init_ramfs_fs(void) 260int __init init_ramfs_fs(void)
272{
273 return register_filesystem(&ramfs_fs_type);
274}
275module_init(init_ramfs_fs)
276
277int __init init_rootfs(void)
278{ 261{
262 static unsigned long once;
279 int err; 263 int err;
280 264
265 if (test_and_set_bit(0, &once))
266 return 0;
267
281 err = bdi_init(&ramfs_backing_dev_info); 268 err = bdi_init(&ramfs_backing_dev_info);
282 if (err) 269 if (err)
283 return err; 270 return err;
284 271
285 err = register_filesystem(&rootfs_fs_type); 272 err = register_filesystem(&ramfs_fs_type);
286 if (err) 273 if (err)
287 bdi_destroy(&ramfs_backing_dev_info); 274 bdi_destroy(&ramfs_backing_dev_info);
288 275
289 return err; 276 return err;
290} 277}
278module_init(init_ramfs_fs)
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index fb50652e4e11..41d108ecc9be 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -167,17 +167,14 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
167 /* 167 /*
168 * Block is uncompressed. 168 * Block is uncompressed.
169 */ 169 */
170 int i, in, pg_offset = 0; 170 int in, pg_offset = 0;
171
172 for (i = 0; i < b; i++) {
173 wait_on_buffer(bh[i]);
174 if (!buffer_uptodate(bh[i]))
175 goto block_release;
176 }
177 171
178 for (bytes = length; k < b; k++) { 172 for (bytes = length; k < b; k++) {
179 in = min(bytes, msblk->devblksize - offset); 173 in = min(bytes, msblk->devblksize - offset);
180 bytes -= in; 174 bytes -= in;
175 wait_on_buffer(bh[k]);
176 if (!buffer_uptodate(bh[k]))
177 goto block_release;
181 while (in) { 178 while (in) {
182 if (pg_offset == PAGE_CACHE_SIZE) { 179 if (pg_offset == PAGE_CACHE_SIZE) {
183 page++; 180 page++;
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index f7f527bf8c10..d8c2d747be28 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -54,6 +54,7 @@ static int get_dir_index_using_offset(struct super_block *sb,
54{ 54{
55 struct squashfs_sb_info *msblk = sb->s_fs_info; 55 struct squashfs_sb_info *msblk = sb->s_fs_info;
56 int err, i, index, length = 0; 56 int err, i, index, length = 0;
57 unsigned int size;
57 struct squashfs_dir_index dir_index; 58 struct squashfs_dir_index dir_index;
58 59
59 TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %lld\n", 60 TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %lld\n",
@@ -81,8 +82,14 @@ static int get_dir_index_using_offset(struct super_block *sb,
81 */ 82 */
82 break; 83 break;
83 84
85 size = le32_to_cpu(dir_index.size) + 1;
86
87 /* size should never be larger than SQUASHFS_NAME_LEN */
88 if (size > SQUASHFS_NAME_LEN)
89 break;
90
84 err = squashfs_read_metadata(sb, NULL, &index_start, 91 err = squashfs_read_metadata(sb, NULL, &index_start,
85 &index_offset, le32_to_cpu(dir_index.size) + 1); 92 &index_offset, size);
86 if (err < 0) 93 if (err < 0)
87 break; 94 break;
88 95
@@ -105,9 +112,8 @@ static int squashfs_readdir(struct file *file, struct dir_context *ctx)
105 struct inode *inode = file_inode(file); 112 struct inode *inode = file_inode(file);
106 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 113 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
107 u64 block = squashfs_i(inode)->start + msblk->directory_table; 114 u64 block = squashfs_i(inode)->start + msblk->directory_table;
108 int offset = squashfs_i(inode)->offset, length, dir_count, size, 115 int offset = squashfs_i(inode)->offset, length, err;
109 type, err; 116 unsigned int inode_number, dir_count, size, type;
110 unsigned int inode_number;
111 struct squashfs_dir_header dirh; 117 struct squashfs_dir_header dirh;
112 struct squashfs_dir_entry *dire; 118 struct squashfs_dir_entry *dire;
113 119
@@ -200,6 +206,9 @@ static int squashfs_readdir(struct file *file, struct dir_context *ctx)
200 ((short) le16_to_cpu(dire->inode_number)); 206 ((short) le16_to_cpu(dire->inode_number));
201 type = le16_to_cpu(dire->type); 207 type = le16_to_cpu(dire->type);
202 208
209 if (type > SQUASHFS_MAX_DIR_TYPE)
210 goto failed_read;
211
203 if (!dir_emit(ctx, dire->name, size, 212 if (!dir_emit(ctx, dire->name, size,
204 inode_number, 213 inode_number,
205 squashfs_filetype_table[type])) 214 squashfs_filetype_table[type]))
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 7834a517f7f4..67cad77fefb4 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -79,7 +79,8 @@ static int get_dir_index_using_name(struct super_block *sb,
79 int len) 79 int len)
80{ 80{
81 struct squashfs_sb_info *msblk = sb->s_fs_info; 81 struct squashfs_sb_info *msblk = sb->s_fs_info;
82 int i, size, length = 0, err; 82 int i, length = 0, err;
83 unsigned int size;
83 struct squashfs_dir_index *index; 84 struct squashfs_dir_index *index;
84 char *str; 85 char *str;
85 86
@@ -103,6 +104,8 @@ static int get_dir_index_using_name(struct super_block *sb,
103 104
104 105
105 size = le32_to_cpu(index->size) + 1; 106 size = le32_to_cpu(index->size) + 1;
107 if (size > SQUASHFS_NAME_LEN)
108 break;
106 109
107 err = squashfs_read_metadata(sb, index->name, &index_start, 110 err = squashfs_read_metadata(sb, index->name, &index_start,
108 &index_offset, size); 111 &index_offset, size);
@@ -144,7 +147,8 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
144 struct squashfs_dir_entry *dire; 147 struct squashfs_dir_entry *dire;
145 u64 block = squashfs_i(dir)->start + msblk->directory_table; 148 u64 block = squashfs_i(dir)->start + msblk->directory_table;
146 int offset = squashfs_i(dir)->offset; 149 int offset = squashfs_i(dir)->offset;
147 int err, length, dir_count, size; 150 int err, length;
151 unsigned int dir_count, size;
148 152
149 TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset); 153 TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset);
150 154
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 9e2349d07cb1..4b2beda49498 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -87,7 +87,7 @@
87#define SQUASHFS_COMP_OPTS(flags) SQUASHFS_BIT(flags, \ 87#define SQUASHFS_COMP_OPTS(flags) SQUASHFS_BIT(flags, \
88 SQUASHFS_COMP_OPT) 88 SQUASHFS_COMP_OPT)
89 89
90/* Max number of types and file types */ 90/* Inode types including extended types */
91#define SQUASHFS_DIR_TYPE 1 91#define SQUASHFS_DIR_TYPE 1
92#define SQUASHFS_REG_TYPE 2 92#define SQUASHFS_REG_TYPE 2
93#define SQUASHFS_SYMLINK_TYPE 3 93#define SQUASHFS_SYMLINK_TYPE 3
@@ -103,6 +103,9 @@
103#define SQUASHFS_LFIFO_TYPE 13 103#define SQUASHFS_LFIFO_TYPE 13
104#define SQUASHFS_LSOCKET_TYPE 14 104#define SQUASHFS_LSOCKET_TYPE 14
105 105
106/* Max type value stored in directory entry */
107#define SQUASHFS_MAX_DIR_TYPE 7
108
106/* Xattr types */ 109/* Xattr types */
107#define SQUASHFS_XATTR_USER 0 110#define SQUASHFS_XATTR_USER 0
108#define SQUASHFS_XATTR_TRUSTED 1 111#define SQUASHFS_XATTR_TRUSTED 1